<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Remove similarities in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Remove-similarities/m-p/606901#M176319</link>
    <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/301053"&gt;@Nicholasamh&lt;/a&gt;&amp;nbsp;Welcome to the SAS Community! &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
input ID Transport $ 3-103;
infile datalines truncover;
datalines;
1 Singapore Car A
1 Car A
1 UK
1 UK Van CA
2 Airplane GA
2 Japan Airplane GA
;

data want(drop=i j a);
    array _{9999} $ 100 _temporary_;
    
    do i=1 by 1 until (last.id);
        set have;
        by id;
        _[i]=Transport;
    end;

    do until (last.id);
        set have;
        by id;
        a=0;
        do j=1 to dim(_);
           a=max(a, (find(_[j], transport, 'it') &amp;amp; _[j] ne transport));
           if j=dim(_) &amp;amp; a=0 then output;
        end;
    end;
run;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Result:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;ID  Transport
1   Singapore Car A
1   UK Van CA
2   Japan Airplane GA
&lt;/PRE&gt;</description>
    <pubDate>Mon, 25 Nov 2019 11:32:37 GMT</pubDate>
    <dc:creator>PeterClemmensen</dc:creator>
    <dc:date>2019-11-25T11:32:37Z</dc:date>
    <item>
      <title>Remove similarities</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Remove-similarities/m-p/606884#M176308</link>
      <description>Hi, I am new to SAS code. I have like to remove those data that are consist of the same words to each other based on ID. Really appreciate someone can help.&lt;BR /&gt;Example:&lt;BR /&gt;ID Transport&lt;BR /&gt;1 Singapore Car A&lt;BR /&gt;1 Car A&lt;BR /&gt;1 UK&lt;BR /&gt;1 UK Van CA&lt;BR /&gt;2 Japan Airplane GA&lt;BR /&gt;2 Airplane GA&lt;BR /&gt;.......&lt;BR /&gt;Expected outcome:&lt;BR /&gt;ID Transport&lt;BR /&gt;1 Singapore Car A&lt;BR /&gt;1 UK Van CA&lt;BR /&gt;2 Japan Airplane GA</description>
      <pubDate>Mon, 25 Nov 2019 07:35:07 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Remove-similarities/m-p/606884#M176308</guid>
      <dc:creator>Nicholasamh</dc:creator>
      <dc:date>2019-11-25T07:35:07Z</dc:date>
    </item>
    <item>
      <title>Re: Remove similarities</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Remove-similarities/m-p/606894#M176315</link>
      <description>&lt;P&gt;Welcome to the SAS communities.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Your problem is not trivial and requires a bit more than novice level coding. I couldn't think of something simpler than below two coding options (one datastep, one SQL).&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
  infile datalines truncover;
  input Group_ID Transport $40.;
  datalines;
1 Singapore Car A
1 Car A
1 UK
1 UK Van CA
2 Japan Airplane GA
2 Airplane GA
;

/* SAS datastep using Hash lookup */
data want(drop=_:);
  if _n_=1 then
    do;
      if 0 then set have(rename=(Transport=_Transport));
      dcl hash h1(dataset:'have(rename=(Transport=_Transport))', multidata:'y');
      h1.defineKey('Group_ID');
      h1.defineData('_Transport');
      h1.defineDone();
    end;
  set have;

  do while(h1.do_over() = 0);
    if Transport=_Transport then continue;
    if find(_Transport,Transport,'it')&amp;gt;0 then
      do;
        delete;
      end;
  end;
run;

proc print data=want;
run;

/* SQL using EXIST clause */
proc sql feedback;
/*  create table want2 as*/
    select 
      o.*
    from have o
    where not exists
      (
        select *
        from have i
        where
          i.group_id=o.group_id
          and i.transport ne o.transport
          and find(i.Transport,o.Transport,'it')&amp;gt;0
      )
    ;
quit;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;If you don't understand the code then search in the SAS docu what you find about the Hash object and the SQL EXIST clause.&lt;/P&gt;
&lt;P&gt;If you're new to SQL then not sure which of the two options is easier to understand for you. If you've got already some familiarity with SQL then it's likely the EXIST clause.&lt;/P&gt;</description>
      <pubDate>Mon, 25 Nov 2019 10:26:55 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Remove-similarities/m-p/606894#M176315</guid>
      <dc:creator>Patrick</dc:creator>
      <dc:date>2019-11-25T10:26:55Z</dc:date>
    </item>
    <item>
      <title>Re: Remove similarities</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Remove-similarities/m-p/606901#M176319</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/301053"&gt;@Nicholasamh&lt;/a&gt;&amp;nbsp;Welcome to the SAS Community! &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
input ID Transport $ 3-103;
infile datalines truncover;
datalines;
1 Singapore Car A
1 Car A
1 UK
1 UK Van CA
2 Airplane GA
2 Japan Airplane GA
;

data want(drop=i j a);
    array _{9999} $ 100 _temporary_;
    
    do i=1 by 1 until (last.id);
        set have;
        by id;
        _[i]=Transport;
    end;

    do until (last.id);
        set have;
        by id;
        a=0;
        do j=1 to dim(_);
           a=max(a, (find(_[j], transport, 'it') &amp;amp; _[j] ne transport));
           if j=dim(_) &amp;amp; a=0 then output;
        end;
    end;
run;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Result:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;ID  Transport
1   Singapore Car A
1   UK Van CA
2   Japan Airplane GA
&lt;/PRE&gt;</description>
      <pubDate>Mon, 25 Nov 2019 11:32:37 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Remove-similarities/m-p/606901#M176319</guid>
      <dc:creator>PeterClemmensen</dc:creator>
      <dc:date>2019-11-25T11:32:37Z</dc:date>
    </item>
    <item>
      <title>Re: Remove similarities</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Remove-similarities/m-p/606911#M176327</link>
      <description>&lt;P&gt;The simplest solution is probably something like this:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
  create table want as 
  select * from have base where not exists(
    select * from have where id=base.id and Transport ne base.Transport 
    and indexw(Transport,trim(base.transport))&amp;gt;0&lt;BR /&gt;    )
  ;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 25 Nov 2019 10:10:13 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Remove-similarities/m-p/606911#M176327</guid>
      <dc:creator>s_lassen</dc:creator>
      <dc:date>2019-11-25T10:10:13Z</dc:date>
    </item>
    <item>
      <title>Re: Remove similarities</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Remove-similarities/m-p/606929#M176336</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
input ID Transport $ 3-103;
infile datalines truncover;
datalines;
1 Singapore Car A
1 Car A
1 UK
1 UK Van CA
2 Airplane GA
2 Japan Airplane GA
;
proc sql;
create table want as
select distinct a.*
 from have as a,have as b
  where a.id=b.id and a.Transport ne b.Transport 
and a.Transport contains strip(b.Transport);
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 25 Nov 2019 11:31:36 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Remove-similarities/m-p/606929#M176336</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2019-11-25T11:31:36Z</dc:date>
    </item>
    <item>
      <title>Re: Remove similarities</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Remove-similarities/m-p/607203#M176439</link>
      <description>Thanks for all the great solutions</description>
      <pubDate>Tue, 26 Nov 2019 05:36:31 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Remove-similarities/m-p/607203#M176439</guid>
      <dc:creator>Nicholasamh</dc:creator>
      <dc:date>2019-11-26T05:36:31Z</dc:date>
    </item>
  </channel>
</rss>

