<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Deduplication across rows - extension in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Deduplication-across-rows-extension/m-p/488784#M127439</link>
    <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;
  set Hierarchy;
/*ARRAY to group the variables together*/
  array lhs(8) L1-L8;
  array lhsn(8) L1_name L2_name L3_name L4_name L5_name L6_name L7_name L8_name;
  do i=1 to 8;
    do j=1 to 8;
/*IF logic to determine if the values are the same and if so, set
  the repeat value to missing*/
      if i ne j and lhs(j)=lhs(i) then lhs(j)=' ';
        if i ne j and lhsn(j)=lhsn(i) then lhsn(j)=' ';
    end;
  end;
/*this section of the code checks to see if a value is missing and if so,
  shifts the non-missing values to the left*/
/*if the values are not to be shifted over, then leave this section of the
  out*/
  do k=1 to 8;
    if lhs[k] = '' then do;
      do l= k+1 to 8;
        if lhs[l] ne '' then do;
          lhs[k] = lhs[l];
          lhs[l] = '';
          leave;
        end;
        end;
    end;
  end;
    do k=1 to 8;
    if lhsn[k] = '' then do;
      do l= k+1 to 8;
        if lhsn[l] ne '' then do;
          lhsn[k] = lhsn[l];
          lhsn[l] = '';
          leave;
        end;
        end;
    end;
  end;
  drop i j k l;
run;
&lt;/CODE&gt;&lt;/PRE&gt;</description>
    <pubDate>Wed, 22 Aug 2018 07:02:15 GMT</pubDate>
    <dc:creator>Jagadishkatam</dc:creator>
    <dc:date>2018-08-22T07:02:15Z</dc:date>
    <item>
      <title>Deduplication across rows - extension</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Deduplication-across-rows-extension/m-p/488782#M127437</link>
      <description>&lt;P&gt;Hi everyone,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;This is an extension to a question i posted earlier under the topic - Deduplication across rows. This time, the data set contains both levels and names as below:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Screen Shot 2018-08-22 at 07.17.43.png" style="width: 600px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/22678i4F3B3399BA775E42/image-size/large?v=v2&amp;amp;px=999" role="button" title="Screen Shot 2018-08-22 at 07.17.43.png" alt="Screen Shot 2018-08-22 at 07.17.43.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;This can be created with the data step:&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data Hierarchy;
infile datalines dlm='' truncover;
length CustID $10 L1 $20 L1_Name $20 L2 $20 L2_Name $20 L3 $20 L3_Name $20 L4 $20 L4_Name $20 L5 $20 L5_Name $20 L6 $20 L6_Name $20 L7 $20 L7_Name $20 L8 $20 L8_Name $20;
input CustID  $ L1 $ L1_Name $ L2 $ L2_Name $ L3 $ L3_Name $ L4 $ L4_Name $ L5 $ L5_Name $ L6 $ L6_Name $ L7 $ L7_Name $ L8 $ L8_Name $;
cards;
1 A20 James A20 James A40 Peter A50 Andrew A60 Bartholomew A70 Thomas A80 Judas
2 A40 Peter A70 Thomas A80 Judas A80 Judas
3 A70 Thomas A80 Judas A90 Steve A100 Luke
4 A30 John A60 Bartholomew A70 Thomas
;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Notice:&lt;/P&gt;&lt;P&gt;You may notice duplicates across rows. e.g on row 1, the values of L1 and L1_name equals the values of L2 and L2_names respectively. Also, on row 2, the values of L3 and L3_Name equals those on L4 and L4_name respectively.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;OBJECTIVE:&lt;/P&gt;&lt;P&gt;The objective is to:&lt;/P&gt;&lt;P&gt;1. Delete duplicate values completely e.g delete values in L2 and L2_Names since they are duplicates&lt;/P&gt;&lt;P&gt;2. Ensure blanks are only at the left end of each cell. E.g we don't want blanks in the L2 and L2_Names (see point 3)&lt;/P&gt;&lt;P&gt;3. To deal with blanks, we want to shift cells leftwards where a blank has been formed due to us deleting duplicates.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;TARGET DATASET:&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Screen Shot 2018-08-22 at 07.30.21.png" style="width: 600px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/22679i44AB1DF438F1ED8F/image-size/large?v=v2&amp;amp;px=999" role="button" title="Screen Shot 2018-08-22 at 07.30.21.png" alt="Screen Shot 2018-08-22 at 07.30.21.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 22 Aug 2018 06:31:11 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Deduplication-across-rows-extension/m-p/488782#M127437</guid>
      <dc:creator>frupaul</dc:creator>
      <dc:date>2018-08-22T06:31:11Z</dc:date>
    </item>
    <item>
      <title>Re: Deduplication across rows - extension</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Deduplication-across-rows-extension/m-p/488784#M127439</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;
  set Hierarchy;
/*ARRAY to group the variables together*/
  array lhs(8) L1-L8;
  array lhsn(8) L1_name L2_name L3_name L4_name L5_name L6_name L7_name L8_name;
  do i=1 to 8;
    do j=1 to 8;
/*IF logic to determine if the values are the same and if so, set
  the repeat value to missing*/
      if i ne j and lhs(j)=lhs(i) then lhs(j)=' ';
        if i ne j and lhsn(j)=lhsn(i) then lhsn(j)=' ';
    end;
  end;
/*this section of the code checks to see if a value is missing and if so,
  shifts the non-missing values to the left*/
/*if the values are not to be shifted over, then leave this section of the
  out*/
  do k=1 to 8;
    if lhs[k] = '' then do;
      do l= k+1 to 8;
        if lhs[l] ne '' then do;
          lhs[k] = lhs[l];
          lhs[l] = '';
          leave;
        end;
        end;
    end;
  end;
    do k=1 to 8;
    if lhsn[k] = '' then do;
      do l= k+1 to 8;
        if lhsn[l] ne '' then do;
          lhsn[k] = lhsn[l];
          lhsn[l] = '';
          leave;
        end;
        end;
    end;
  end;
  drop i j k l;
run;
&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 22 Aug 2018 07:02:15 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Deduplication-across-rows-extension/m-p/488784#M127439</guid>
      <dc:creator>Jagadishkatam</dc:creator>
      <dc:date>2018-08-22T07:02:15Z</dc:date>
    </item>
    <item>
      <title>Re: Deduplication across rows - extension</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Deduplication-across-rows-extension/m-p/488786#M127441</link>
      <description>&lt;P&gt;Once again, the approach with a long dataset format:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data Hierarchy;
infile datalines dlm='' truncover;
length CustID $10 L1 $20 L1_Name $20 L2 $20 L2_Name $20 L3 $20 L3_Name $20 L4 $20 L4_Name $20 L5 $20 L5_Name $20 L6 $20 L6_Name $20 L7 $20 L7_Name $20 L8 $20 L8_Name $20;
input CustID  $ L1 $ L1_Name $ L2 $ L2_Name $ L3 $ L3_Name $ L4 $ L4_Name $ L5 $ L5_Name $ L6 $ L6_Name $ L7 $ L7_Name $ L8 $ L8_Name $;
cards;
1 A20 James A20 James A40 Peter A50 Andrew A60 Bartholomew A70 Thomas A80 Judas
2 A40 Peter A70 Thomas A80 Judas A80 Judas
3 A70 Thomas A80 Judas A90 Steve A100 Luke
4 A30 John A60 Bartholomew A70 Thomas
;
run;

proc transpose
  data=hierarchy
  out=trans1
;
by custid;
var l:;
run;

data trans2;
set trans1;
by custid;
retain count;
if first.custid
then count = 1;
else count + 1;
if count ge 2 and lag2(col1) = col1 then delete;
drop count;
run;

data trans3;
set trans2;
by custid;
retain count;
if first.custid then count = 1;
substr(_name_,2,1) = put(count,1.);
if index(_name_,'Name') then count + 1;
drop count;
run;

proc transpose
  data=trans3
  out=trans4 (drop=_name_)
;
by custid;
id _name_;
var col1;
run;

data want;
set
  hierarchy (obs=0)
  trans4
;
run;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;If you kept your data in a sensible format from the beginning:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data Hierarchy;
infile datalines dlm='' truncover;
length
  CustID $10
  sequence 3
  L $20
  L_Name $20
;
input CustID sequence $ L $ L_Name;
cards;
1 1 A20 James
1 2 A20 James
1 3 A40 Peter
1 4 A50 Andrew
1 5 A60 Bartholomew
1 6 A70 Thomas
1 7 A80 Judas
2 1 A40 Peter
2 2 A70 Thomas
2 3 A80 Judas
2 4 A80 Judas
3 1 A70 Thomas
3 2 A80 Judas
3 3 A90 Steve
3 4 A100 Luke
4 1 A30 John
4 2 A60 Bartholomew
4 3 A70 Thomas
;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;coding against it would be MUCH easier:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;
format custid sequence l l_name;
set hierarchy (drop=sequence);
by custid;
l_l = lag(l);
if first.custid
then sequence = 1;
else do;
  if l_l = l
  then delete;
  else sequence + 1;
end;
drop l_l;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 22 Aug 2018 07:34:06 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Deduplication-across-rows-extension/m-p/488786#M127441</guid>
      <dc:creator>Kurt_Bremser</dc:creator>
      <dc:date>2018-08-22T07:34:06Z</dc:date>
    </item>
    <item>
      <title>Re: Deduplication across rows - extension</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Deduplication-across-rows-extension/m-p/488924#M127517</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data Hierarchy;
infile datalines dlm='' truncover;
length CustID $10 L1 $20 L1_Name $20 L2 $20 L2_Name $20 L3 $20 L3_Name $20 L4 $20 L4_Name $20 L5 $20 L5_Name $20 L6 $20 L6_Name $20 L7 $20 L7_Name $20 L8 $20 L8_Name $20;
input CustID  $ L1 $ L1_Name $ L2 $ L2_Name $ L3 $ L3_Name $ L4 $ L4_Name $ L5 $ L5_Name $ L6 $ L6_Name $ L7 $ L7_Name $ L8 $ L8_Name $;
cards;
1 A20 James A20 James A40 Peter A50 Andrew A60 Bartholomew A70 Thomas A80 Judas
2 A40 Peter A70 Thomas A80 Judas A80 Judas
3 A70 Thomas A80 Judas A90 Steve A100 Luke
4 A30 John A60 Bartholomew A70 Thomas
;

data want;
 set Hierarchy;
 array x1{*} $ L1-L8;
 array x2{*} $ L1_name L2_name L3_name L4_name L5_name L6_name L7_name L8_name;

 array y{9999} $ 100 _temporary_;
 call missing(of y{*});
 j=0;
 do i=1 to dim(x1);
   temp=catx(' ',x1{i},x2{i});
   if temp not in y then do;j+1;y{j}=temp;end;
 end;
 do i=1 to dim(x1);
   x1{i}=scan(y{i},1);   x2{i}=scan(y{i},2);
 end;
 drop i j;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 22 Aug 2018 15:18:43 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Deduplication-across-rows-extension/m-p/488924#M127517</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2018-08-22T15:18:43Z</dc:date>
    </item>
    <item>
      <title>Re: Deduplication across rows - extension</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Deduplication-across-rows-extension/m-p/488928#M127518</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Your solutions works but I got lost in the arrays.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Can you recommend any material on arrays please?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;</description>
      <pubDate>Wed, 22 Aug 2018 15:22:20 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Deduplication-across-rows-extension/m-p/488928#M127518</guid>
      <dc:creator>frupaul</dc:creator>
      <dc:date>2018-08-22T15:22:20Z</dc:date>
    </item>
    <item>
      <title>Re: Deduplication across rows - extension</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Deduplication-across-rows-extension/m-p/488933#M127520</link>
      <description>&lt;P&gt;Sorry I am always late, but fun:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data Hierarchy;
infile datalines dlm='' truncover;
length CustID $10 L1 $20 L1_Name $20 L2 $20 L2_Name $20 L3 $20 L3_Name $20 L4 $20 L4_Name $20 L5 $20 L5_Name $20 L6 $20 L6_Name $20 L7 $20 L7_Name $20 L8 $20 L8_Name $20;
input CustID  $ L1 $ L1_Name $ L2 $ L2_Name $ L3 $ L3_Name $ L4 $ L4_Name $ L5 $ L5_Name $ L6 $ L6_Name $ L7 $ L7_Name $ L8 $ L8_Name $;
cards;
1 A20 James A20 James A40 Peter A50 Andrew A60 Bartholomew A70 Thomas A80 Judas
2 A40 Peter A70 Thomas A80 Judas A80 Judas
3 A70 Thomas A80 Judas A90 Steve A100 Luke
4 A30 John A60 Bartholomew A70 Thomas
;


data want;
if _n_=1 then do;
 dcl hash H (ordered:'y') ;
 length kn $20 k_name $20;
   h.definekey  ('i') ;
   h.definedata ('kn',"k_name") ;
   h.definedone () ;
   call missing(kn,k_name);
end;
set Hierarchy end=z;
array t(*) l:;
do i=1 to dim(t) by 2;
rc=h.find(key:i-2);
if  t(i) ne kn then rc=h.add(key:i,data:t(i),data:t(i+1));
end;
dcl hiter it('h');
rc = it.first();
do i=1 by 2 while (rc = 0);
t(i)=kn; t(i+1)=k_name;
rc = it.next();
end;
h.clear();
keep custid l:;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 22 Aug 2018 15:28:29 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Deduplication-across-rows-extension/m-p/488933#M127520</guid>
      <dc:creator>novinosrin</dc:creator>
      <dc:date>2018-08-22T15:28:29Z</dc:date>
    </item>
    <item>
      <title>Re: Deduplication across rows - extension</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Deduplication-across-rows-extension/m-p/488947#M127523</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data Hierarchy;
infile datalines dlm='' truncover;
length CustID $10 L1 $20 L1_Name $20 L2 $20 L2_Name $20 L3 $20 L3_Name $20 L4 $20 L4_Name $20 L5 $20 L5_Name $20 L6 $20 L6_Name $20 L7 $20 L7_Name $20 L8 $20 L8_Name $20;
input CustID  $ L1 $ L1_Name $ L2 $ L2_Name $ L3 $ L3_Name $ L4 $ L4_Name $ L5 $ L5_Name $ L6 $ L6_Name $ L7 $ L7_Name $ L8 $ L8_Name $;
cards;
1 A20 James A20 James A40 Peter A50 Andrew A60 Bartholomew A70 Thomas A80 Judas
2 A40 Peter A70 Thomas A80 Judas A80 Judas
3 A70 Thomas A80 Judas A90 Steve A100 Luke
4 A30 John A60 Bartholomew A70 Thomas
;


data temp;
set Hierarchy;
array t(*) l:;
do i=1 to dim(t) by 2;
kn=t(i);
k_name=t(i+1);
output;
end;
keep custid kn k_name;
run;

data want;
set temp;
by custid kn k_name notsorted;
if 0 then set Hierarchy;
array t(*) l1--l8_name;
retain t;
if first.custid then do; n=1;call missing(of t(*));end;
else if first.kn then n+2;
t(n)=kn;
t(n+1)=k_name;
if last.custid;
keep custid l:;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 22 Aug 2018 15:47:54 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Deduplication-across-rows-extension/m-p/488947#M127523</guid>
      <dc:creator>novinosrin</dc:creator>
      <dc:date>2018-08-22T15:47:54Z</dc:date>
    </item>
    <item>
      <title>Re: Deduplication across rows - extension</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Deduplication-across-rows-extension/m-p/489057#M127572</link>
      <description>&lt;P&gt;HI&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/134464"&gt;@frupaul&lt;/a&gt;&amp;nbsp; For some reason, when i tested the solutions with million + records the two step solution previous apparently is the fastest(sighs). I was intrigued to test as&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/32733"&gt;@FreelanceReinh&lt;/a&gt;&amp;nbsp;diligently pointed out a similar two step solution in the previous thread of yours was much faster and of course he knows i try to pay attention to those finer details to learn more. On my college lab machine, that holds true and in other machines it could be otherwise. FWIW, that was fun testing.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 22 Aug 2018 21:28:50 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Deduplication-across-rows-extension/m-p/489057#M127572</guid>
      <dc:creator>novinosrin</dc:creator>
      <dc:date>2018-08-22T21:28:50Z</dc:date>
    </item>
  </channel>
</rss>

