<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Combine and Condense Duplicate Records in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Combine-and-Condense-Duplicate-Records/m-p/939770#M368959</link>
    <description>&lt;P&gt;Assuming that no store_id occurs more than once per member_id (so that there won't be a false overlap inferred), then a DATA step passing through each ID twice will do.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;It uses a 2-way array indexed by YEAR and MONTH to track overlaps.&amp;nbsp;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;/*Combine and Condense Duplicate Records*/
%let HLB=2019;  /* History lower bound */
%let HUB=2021;  /* History upper bound */

data have;
infile cards expandtabs truncover;
input Obs_Num	Store_ID $	Member_ID $	Member_From :mmddyy10.	Member_To  :mmddyy10.	Member_Type $;
format Member_From Member_To mmddyy10.;
cards;
1	AA	123	01/01/2020	12/31/2021	Full
2	BB	123	06/01/2021	12/31/2999	Full
3	AA	456	01/01/2020	12/31/2021	Full
4	BB	456	09/01/2021	10/31/2021	Part
5	AA	789	01/01/2020	12/31/2021	Full
6	BB	911	06/01/2021	10/01/2021	Part
7	AA	411	01/01/2020	12/31/2022	Full
8	BB	411	09/01/2021	12/31/2099	Part
9	CC	411	12/01/2021	12/31/2022	Full
;

data want (drop=_:);
  /** Do a first pass through a single ID **/
  set have;
  by member_id notsorted;

  array mfreq{&amp;amp;HLB:&amp;amp;HUB,12} _temporary_  /*Frequency by year and month */;
  retain mnths_overlaps_till2021 num_stores_overlap type_overlap overlap_from overlap_to;
  length type_overlap $20;
  format overlap_from overlap_to mmddyy10. ;

  if first.member_id then do;
    call missing(of mfreq{*},of mnths_overlaps_till2021 -- overlap_to);
    mnths_overlaps_till2021 = 0;
  end;

  num_stores_overlap + not(first.member_id=1 and last.member_id=1);

  type_overlap=catx('/',type_overlap,member_type);

  _mdate=member_from;
  do while (_mdate&amp;lt; min(member_to,"31dec&amp;amp;HUB"d));
    _y=year(_mdate);
    _m=month(_mdate);
    mfreq{_y,_m}+1;
    
    if mfreq{_y,_m}=2 then do;
      overlap_from=min(overlap_from,mdy(_m,1,_y));
      overlap_to=max(overlap_to,intnx('month',mdy(_m,1,_y),0,'end'));
      mnths_overlaps_till2021 = intck('month',overlap_from,overlap_to)+1;
    end;
    _mdate=intnx('month',_mdate,1);
  end;

  /* Do a second pass through the same ID, and output */
  if last.member_id then do _i=1 to coalesce(dif(_n_),_n_);
    set have;
    output;
  end;
run;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Also, this assumes that all overlaps are nested (e.g. for 3 stores A,B, and C:&amp;nbsp; A/B overlap for 4 months, and&amp;nbsp; &amp;nbsp;A/C and B/C overlap 1 month within that 4 month A/B overlap).&amp;nbsp; And you want the longest span of those nested overlaps.&lt;/P&gt;</description>
    <pubDate>Sun, 18 Aug 2024 15:43:01 GMT</pubDate>
    <dc:creator>mkeintz</dc:creator>
    <dc:date>2024-08-18T15:43:01Z</dc:date>
    <item>
      <title>Combine and Condense Duplicate Records</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Combine-and-Condense-Duplicate-Records/m-p/939698#M368939</link>
      <description>&lt;P&gt;Hello Community. Bringing this back up after 2 years.&lt;/P&gt;&lt;P&gt;I have the following data that I need to use to measure the periods of membership overlap (if any) for the same individual but in multiple stores:&lt;/P&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;Obs_Num&lt;/TD&gt;&lt;TD&gt;Store_ID&lt;/TD&gt;&lt;TD&gt;Member_ID&lt;/TD&gt;&lt;TD&gt;Member_From&lt;/TD&gt;&lt;TD&gt;Member_To&lt;/TD&gt;&lt;TD&gt;Member_Type&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;AA&lt;/TD&gt;&lt;TD&gt;123&lt;/TD&gt;&lt;TD&gt;01/01/2020&lt;/TD&gt;&lt;TD&gt;12/31/2021&lt;/TD&gt;&lt;TD&gt;Full&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;BB&lt;/TD&gt;&lt;TD&gt;123&lt;/TD&gt;&lt;TD&gt;06/01/2021&lt;/TD&gt;&lt;TD&gt;12/31/2999&lt;/TD&gt;&lt;TD&gt;Full&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;3&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;AA&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;456&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;01/01/2020&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;12/31/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;Full&lt;/FONT&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;4&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;BB&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;456&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;09/01/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;10/31/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;Part&lt;/FONT&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;5&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;AA&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;789&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;01/01/2020&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;12/31/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;Full&lt;/FONT&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;6&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;BB&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;911&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;06/01/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;10/01/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;Part&lt;/FONT&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;FONT color="#339966"&gt;7&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#339966"&gt;AA&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#339966"&gt;411&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#339966"&gt;01/01/2020&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#339966"&gt;12/31/2022&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#339966"&gt;Full&lt;/FONT&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;FONT color="#339966"&gt;8&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#339966"&gt;BB&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#339966"&gt;411&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#339966"&gt;09/01/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#339966"&gt;12/31/2099&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#339966"&gt;Part&lt;/FONT&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;FONT color="#339966"&gt;9&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#339966"&gt;CC&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#339966"&gt;411&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#339966"&gt;12/01/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#339966"&gt;12/31/2022&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#339966"&gt;Full&lt;/FONT&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;Few notes (the colors are to clarify the overlapping):&lt;/P&gt;&lt;P&gt;- Observations 1&amp;amp;2 are for the same ID in 2 different stores&lt;/P&gt;&lt;P&gt;- Observations 5&amp;amp; 6 are for distinct ID and for only one stores (no overlap)&lt;/P&gt;&lt;P&gt;- Observations 7-8-9 are for the same ID in 3 different stores&lt;/P&gt;&lt;P&gt;- The new dataset (below) should have 5 new columns to measure overlap from start UNTILL the end of 2021.&lt;/P&gt;&lt;P&gt;- The new column "Type_Overlap" is tricky in that it has to start with that store's membership type first. For example, Obs 7/8/9 are similar in 4 of the new 5 columns EXCEPT the type_overlap.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The new columns to be added to the data should be as follows:&lt;/P&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;Obs_Num&lt;/TD&gt;&lt;TD&gt;Store_ID&lt;/TD&gt;&lt;TD&gt;Member_ID&lt;/TD&gt;&lt;TD&gt;Member_From&lt;/TD&gt;&lt;TD&gt;Member_To&lt;/TD&gt;&lt;TD&gt;Member_Type&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;Mnths_Overlaps_till2021&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;Num_Stores_Overlap&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;Type_Overlap&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;Overlap_From&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;Overlap_To&lt;/STRONG&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;AA&lt;/TD&gt;&lt;TD&gt;123&lt;/TD&gt;&lt;TD&gt;01/01/2020&lt;/TD&gt;&lt;TD&gt;12/31/2021&lt;/TD&gt;&lt;TD&gt;Full&lt;/TD&gt;&lt;TD&gt;7&lt;/TD&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;Full&lt;/STRONG&gt;/Full&lt;/TD&gt;&lt;TD&gt;06/01/2021&lt;/TD&gt;&lt;TD&gt;12/31/2021&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;BB&lt;/TD&gt;&lt;TD&gt;123&lt;/TD&gt;&lt;TD&gt;06/01/2021&lt;/TD&gt;&lt;TD&gt;12/31/2999&lt;/TD&gt;&lt;TD&gt;Full&lt;/TD&gt;&lt;TD&gt;7&lt;/TD&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;Full&lt;/STRONG&gt;/Full&lt;/TD&gt;&lt;TD&gt;06/01/2021&lt;/TD&gt;&lt;TD&gt;12/31/2021&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;3&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;AA&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;456&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;01/01/2020&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;12/31/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;Full&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;2&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;2&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;&lt;STRONG&gt;Full&lt;/STRONG&gt;/Part&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;09/01/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;10/31/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;4&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;BB&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;456&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;09/01/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;10/31/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;Part&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;2&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;2&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;&lt;STRONG&gt;Part&lt;/STRONG&gt;/Full&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;09/01/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#0000FF"&gt;10/31/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;5&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;AA&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;789&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;01/01/2020&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;12/31/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;Full&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;0&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;0&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;Full&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;6&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;BB&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;911&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;06/01/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;10/01/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;Part&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;0&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;0&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#FF0000"&gt;Part&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;7&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;AA&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;411&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;01/01/2020&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;12/31/2022&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;Full&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;4&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;3&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;&lt;STRONG&gt;Full&lt;/STRONG&gt;/Part/Full&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;09/01/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;12/31/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;8&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;BB&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;411&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;09/01/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;12/31/2099&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;Part&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;4&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;3&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;&lt;STRONG&gt;Part&lt;/STRONG&gt;/Full/Full&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;09/01/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;12/31/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;9&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;CC&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;411&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;12/01/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;12/31/2022&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;Full&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;4&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;3&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;&lt;STRONG&gt;Full&lt;/STRONG&gt;/Full/Part&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;09/01/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;TD&gt;&lt;FONT color="#008000"&gt;12/31/2021&lt;/FONT&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Is there a way to do this in SAS in a meaningful code? My data is a lot much larger than the above, but I hope I captured all the scenarios.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 16 Aug 2024 20:20:03 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Combine-and-Condense-Duplicate-Records/m-p/939698#M368939</guid>
      <dc:creator>altijani</dc:creator>
      <dc:date>2024-08-16T20:20:03Z</dc:date>
    </item>
    <item>
      <title>Re: Combine and Condense Duplicate Records</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Combine-and-Condense-Duplicate-Records/m-p/939705#M368940</link>
      <description>&lt;P&gt;What if a member has periods at the same store that do not over lap at all what would the output look like:&lt;/P&gt;
&lt;PRE&gt;5	AA	789	01/01/2020	12/31/2021	Full
6	AA	789	01/01/2023	12/31/2023	Full&lt;/PRE&gt;
&lt;P&gt;Or ta different stores&lt;/P&gt;
&lt;PRE&gt;5	AA	789	01/01/2020	12/31/2021	Full
6	BB	789	01/01/2023	12/31/2023	Full&lt;/PRE&gt;
&lt;P&gt;Same store with gap and overlap by different store&lt;/P&gt;
&lt;PRE&gt;5	AA	789	01/01/2020	12/31/2021	Full
6	AA	789	01/01/2023	12/31/2024	Part
7	BB	789	06/01/2021	07/31/2023	Full&lt;/PRE&gt;
&lt;P&gt;Multiple stores with gaps but no overlap as well for the same ID.&lt;/P&gt;
&lt;P&gt;Should sequential non-overlapping dates be treated differently:&lt;/P&gt;
&lt;PRE&gt;5	AA	789	01/01/2020	12/31/2021	Full
6	AA	789	01/01/2022	12/31/2022	Full
7	AA	789	01/01/2023	12/31/2023	Full&lt;/PRE&gt;
&lt;P&gt;You will likely get better responses if you provide some example data in the form of a working data step.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I am also interested to a minor degree in exactly how the output data set is to be used.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Do you have any idea what the maximum number of values might be involved with this Type_overlap? If you data has enough time involved that has the potential of having dozens if not more values in one variable an it is seldom a good idea to ever have more than one value in a single variable. &lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 16 Aug 2024 21:09:25 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Combine-and-Condense-Duplicate-Records/m-p/939705#M368940</guid>
      <dc:creator>ballardw</dc:creator>
      <dc:date>2024-08-16T21:09:25Z</dc:date>
    </item>
    <item>
      <title>Re: Combine and Condense Duplicate Records</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Combine-and-Condense-Duplicate-Records/m-p/939718#M368942</link>
      <description>&lt;P&gt;Your overlap period only considers months through 31dec2021.&amp;nbsp; &amp;nbsp;I get that.&amp;nbsp; But why is the overlap count for the three observations for ID 411 set to 4?&amp;nbsp; The date ranges are&lt;/P&gt;
&lt;UL&gt;
&lt;LI&gt;01jan2020:31dec2022&lt;/LI&gt;
&lt;LI&gt;01sep2021:31dec2099&lt;BR /&gt;and&lt;/LI&gt;
&lt;LI&gt;01dec2021:31dec2022&lt;/LI&gt;
&lt;/UL&gt;
&lt;P&gt;Yes, the first two obs overlap for 4 months, but the last one only overlaps for 1, yet you assign a vale of 4 to all three obs.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;What if the dates were&lt;/P&gt;
&lt;UL&gt;
&lt;LI&gt;01jan2020:31dec2021&lt;/LI&gt;
&lt;LI&gt;01jan2020:30jun2020&lt;BR /&gt;and&lt;/LI&gt;
&lt;LI&gt;01jan2021:30apr2021&lt;/LI&gt;
&lt;/UL&gt;
&lt;P&gt;How many overlap months would those 3 ranges have?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;What is your rule for measuring overlap?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Also&amp;nbsp;&lt;/P&gt;
&lt;OL&gt;
&lt;LI&gt;Do you always have 1 obs for each member_id/store_id combination?&lt;/LI&gt;
&lt;LI&gt;Are the data always sorted by &lt;STRIKE&gt;member_id/member_from&lt;/STRIKE&gt;&amp;nbsp; member_from within each member_id?&lt;/LI&gt;
&lt;/OL&gt;</description>
      <pubDate>Sun, 18 Aug 2024 15:37:44 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Combine-and-Condense-Duplicate-Records/m-p/939718#M368942</guid>
      <dc:creator>mkeintz</dc:creator>
      <dc:date>2024-08-18T15:37:44Z</dc:date>
    </item>
    <item>
      <title>Re: Combine and Condense Duplicate Records</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Combine-and-Condense-Duplicate-Records/m-p/939728#M368945</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
infile cards expandtabs truncover;
input Obs_Num	Store_ID $	Member_ID $	Member_From :mmddyy10.	Member_To  :mmddyy10.	Member_Type $;
format Member_From Member_To mmddyy10.;
cards;
1	AA	123	01/01/2020	12/31/2021	Full
2	BB	123	06/01/2021	12/31/2999	Full
3	AA	456	01/01/2020	12/31/2021	Full
4	BB	456	09/01/2021	10/31/2021	Part
5	AA	789	01/01/2020	12/31/2021	Full
6	BB	911	06/01/2021	10/01/2021	Part
7	AA	411	01/01/2020	12/31/2022	Full
8	BB	411	09/01/2021	12/31/2099	Part
9	CC	411	12/01/2021	12/31/2022	Full
;
data have2;
 set have;
 if Member_To&amp;gt;'31dec2021'd then Member_To='31dec2021'd;
run;
data temp;
 set have2;
 do date= Member_From to Member_To;
   output;
 end;
 format date mmddyy10.;
 drop  Member_From Member_To;
run;
proc sort data=temp out=temp2 nodupkey;
by Member_ID date  Obs_Num	Store_ID;
run;
data temp3;
do until(last.date);
 set temp2;
 by Member_ID date ; 
 length _Obs_Num _Store_ID _Member_Type $ 200;
 _Obs_Num=catx('|',_Obs_Num,Obs_Num);
 _Store_ID=catx('|',_Store_ID,Store_ID);
 _Member_Type=catx('|',_Member_Type,Member_Type);
end;
if findc(_Obs_Num,'|') then output;
 drop  Obs_Num	Store_ID Member_Type;
run;
data temp4;
 set temp3;
 by Member_ID date ; 
 if first.Member_ID or dif(date) ne 1 then group+1;
run;
proc sql;
create table want1 as
select group,Member_ID,min(date) as Overlap_From format=mmddyy10.,max(date) as Overlap_To format=mmddyy10.,
intck('month',calculated Overlap_From,calculated Overlap_To,'c')+1 as Mnths_Overlaps_till2021,
max(countw(_Store_ID,'|')) as Num_Stores_Overlap
 from temp4
  group by group,Member_ID;

create table want2 as
select a.*,group,Overlap_From,Overlap_To,
coalesce(Mnths_Overlaps_till2021,0) as Mnths_Overlaps_till2021,
coalesce(Num_Stores_Overlap,0) as Num_Stores_Overlap
 from have as a left join want1 as b
  on a.Member_ID=b.Member_ID
   order by Obs_Num;

create table temp5 as
select distinct group,Member_ID,_Obs_Num,_Member_Type
 from temp4
  group by group,Member_ID
   having countw(_Obs_Num,'|')=max(countw(_Obs_Num,'|'));

create table want3 as
select a.*,_Obs_Num,coalescec(_Member_Type,Member_Type) as _Member_Type
 from want2 as a left join temp5 as b
  on a.group=b.group and a.Member_ID=b.Member_ID
   order by Obs_Num;
quit;

data want4;
 set want3;
 length Type_Overlap $ 200;
 if missing(_Obs_Num) or input(scan(_Obs_Num,1,'|'),best.)=Obs_Num then Type_Overlap=_Member_Type;
  else do;
    p=findw(_Obs_Num,strip(Obs_Num),'|','eit');
    Type_Overlap=scan(_Member_Type,p,'|');
	do i=1 to countw(_Member_Type,'|');
      if i ne p then Type_Overlap=catx('|',Type_Overlap,scan(_Member_Type,i,'|'));
	end;
  end;
drop p i  _Obs_Num _Member_Type;
run;
&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Sat, 17 Aug 2024 09:49:15 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Combine-and-Condense-Duplicate-Records/m-p/939728#M368945</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2024-08-17T09:49:15Z</dc:date>
    </item>
    <item>
      <title>Re: Combine and Condense Duplicate Records</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Combine-and-Condense-Duplicate-Records/m-p/939770#M368959</link>
      <description>&lt;P&gt;Assuming that no store_id occurs more than once per member_id (so that there won't be a false overlap inferred), then a DATA step passing through each ID twice will do.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;It uses a 2-way array indexed by YEAR and MONTH to track overlaps.&amp;nbsp;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;/*Combine and Condense Duplicate Records*/
%let HLB=2019;  /* History lower bound */
%let HUB=2021;  /* History upper bound */

data have;
infile cards expandtabs truncover;
input Obs_Num	Store_ID $	Member_ID $	Member_From :mmddyy10.	Member_To  :mmddyy10.	Member_Type $;
format Member_From Member_To mmddyy10.;
cards;
1	AA	123	01/01/2020	12/31/2021	Full
2	BB	123	06/01/2021	12/31/2999	Full
3	AA	456	01/01/2020	12/31/2021	Full
4	BB	456	09/01/2021	10/31/2021	Part
5	AA	789	01/01/2020	12/31/2021	Full
6	BB	911	06/01/2021	10/01/2021	Part
7	AA	411	01/01/2020	12/31/2022	Full
8	BB	411	09/01/2021	12/31/2099	Part
9	CC	411	12/01/2021	12/31/2022	Full
;

data want (drop=_:);
  /** Do a first pass through a single ID **/
  set have;
  by member_id notsorted;

  array mfreq{&amp;amp;HLB:&amp;amp;HUB,12} _temporary_  /*Frequency by year and month */;
  retain mnths_overlaps_till2021 num_stores_overlap type_overlap overlap_from overlap_to;
  length type_overlap $20;
  format overlap_from overlap_to mmddyy10. ;

  if first.member_id then do;
    call missing(of mfreq{*},of mnths_overlaps_till2021 -- overlap_to);
    mnths_overlaps_till2021 = 0;
  end;

  num_stores_overlap + not(first.member_id=1 and last.member_id=1);

  type_overlap=catx('/',type_overlap,member_type);

  _mdate=member_from;
  do while (_mdate&amp;lt; min(member_to,"31dec&amp;amp;HUB"d));
    _y=year(_mdate);
    _m=month(_mdate);
    mfreq{_y,_m}+1;
    
    if mfreq{_y,_m}=2 then do;
      overlap_from=min(overlap_from,mdy(_m,1,_y));
      overlap_to=max(overlap_to,intnx('month',mdy(_m,1,_y),0,'end'));
      mnths_overlaps_till2021 = intck('month',overlap_from,overlap_to)+1;
    end;
    _mdate=intnx('month',_mdate,1);
  end;

  /* Do a second pass through the same ID, and output */
  if last.member_id then do _i=1 to coalesce(dif(_n_),_n_);
    set have;
    output;
  end;
run;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Also, this assumes that all overlaps are nested (e.g. for 3 stores A,B, and C:&amp;nbsp; A/B overlap for 4 months, and&amp;nbsp; &amp;nbsp;A/C and B/C overlap 1 month within that 4 month A/B overlap).&amp;nbsp; And you want the longest span of those nested overlaps.&lt;/P&gt;</description>
      <pubDate>Sun, 18 Aug 2024 15:43:01 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Combine-and-Condense-Duplicate-Records/m-p/939770#M368959</guid>
      <dc:creator>mkeintz</dc:creator>
      <dc:date>2024-08-18T15:43:01Z</dc:date>
    </item>
  </channel>
</rss>

