<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: duplicate values in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/duplicate-values/m-p/491919#M129168</link>
    <description>Sorry &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&lt;BR /&gt;I edited the post</description>
    <pubDate>Sun, 02 Sep 2018 19:03:40 GMT</pubDate>
    <dc:creator>Carmel</dc:creator>
    <dc:date>2018-09-02T19:03:40Z</dc:date>
    <item>
      <title>duplicate values</title>
      <link>https://communities.sas.com/t5/SAS-Programming/duplicate-values/m-p/491917#M129166</link>
      <description>&lt;P&gt;Hi!&lt;/P&gt;&lt;P&gt;I'm working on SAS Studio and I have a big data of&lt;SPAN&gt;&amp;nbsp;almost 700,000 observations, and it looks something like that:&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;_____________________________&lt;/P&gt;&lt;P&gt;ID&amp;nbsp; &amp;nbsp; a1&amp;nbsp; &amp;nbsp; a2&amp;nbsp; &amp;nbsp; a3&amp;nbsp; &amp;nbsp; a4&amp;nbsp; &amp;nbsp; ...&amp;nbsp; &amp;nbsp;a20&lt;/P&gt;&lt;P&gt;1&amp;nbsp; &amp;nbsp; &amp;nbsp; 11&amp;nbsp; &amp;nbsp; 12&amp;nbsp; &amp;nbsp; 14&amp;nbsp; &amp;nbsp; 15&amp;nbsp; &amp;nbsp; &amp;nbsp;...&lt;/P&gt;&lt;P&gt;2&amp;nbsp; &amp;nbsp; &amp;nbsp; 11&amp;nbsp; &amp;nbsp; 13&amp;nbsp; &amp;nbsp; &amp;nbsp; .&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;.&amp;nbsp; &amp;nbsp; &amp;nbsp; ...&lt;/P&gt;&lt;P&gt;3&amp;nbsp; &amp;nbsp; &amp;nbsp; 12&amp;nbsp; &amp;nbsp; 13&amp;nbsp; &amp;nbsp; 14&amp;nbsp; &amp;nbsp; &amp;nbsp; .&amp;nbsp; &amp;nbsp; &amp;nbsp; ...&lt;/P&gt;&lt;P&gt;4&amp;nbsp; &amp;nbsp; &amp;nbsp; 12&amp;nbsp; &amp;nbsp; 14&amp;nbsp; &amp;nbsp; 15&amp;nbsp; &amp;nbsp; &amp;nbsp; .&amp;nbsp; &amp;nbsp; &amp;nbsp; ...&lt;/P&gt;&lt;P&gt;...&lt;/P&gt;&lt;P&gt;700000&amp;nbsp; ....&lt;/P&gt;&lt;P&gt;_____________________________&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;(The numbers in the a's can be everything, but always a1&amp;lt;a2&amp;lt;a3&amp;lt;a4 etc.)&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;as you can see, ID 1, ID 3 and ID 4&amp;nbsp;have the numbers 12 and 14 (not in the same variables),and ID 1 and 4 have 12 and 15 as well as 14 and 15.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;So the data I want is:&lt;/P&gt;&lt;P&gt;___________&lt;/P&gt;&lt;P&gt;A&amp;nbsp; &amp;nbsp; &amp;nbsp; B&amp;nbsp; &amp;nbsp; freq&lt;/P&gt;&lt;P&gt;12&amp;nbsp; &amp;nbsp; 14&amp;nbsp; &amp;nbsp; &amp;nbsp;3&lt;/P&gt;&lt;P&gt;12&amp;nbsp; &amp;nbsp; 15&amp;nbsp; &amp;nbsp; &amp;nbsp;2&lt;/P&gt;&lt;P&gt;14&amp;nbsp; &amp;nbsp; 15&amp;nbsp; &amp;nbsp; &amp;nbsp;2&lt;/P&gt;&lt;P&gt;___________&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;(I want to know how many pairs of numbers repeats in the data and their frequency)&lt;/P&gt;&lt;P&gt;How can I do that? Thank you very much!&lt;/P&gt;</description>
      <pubDate>Sun, 02 Sep 2018 19:01:58 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/duplicate-values/m-p/491917#M129166</guid>
      <dc:creator>Carmel</dc:creator>
      <dc:date>2018-09-02T19:01:58Z</dc:date>
    </item>
    <item>
      <title>Re: duplicate values</title>
      <link>https://communities.sas.com/t5/SAS-Programming/duplicate-values/m-p/491918#M129167</link>
      <description>&lt;P&gt;So the fact that 12 and 13 occurs in rows 3 and 4 is irrelevant? Could you explain further the logic as to why 12 and 13 don't appear in your output?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Is it only pairs of numbers you are interested in and not triples or quadruples or higher?&lt;/P&gt;</description>
      <pubDate>Sun, 02 Sep 2018 18:57:28 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/duplicate-values/m-p/491918#M129167</guid>
      <dc:creator>PaigeMiller</dc:creator>
      <dc:date>2018-09-02T18:57:28Z</dc:date>
    </item>
    <item>
      <title>Re: duplicate values</title>
      <link>https://communities.sas.com/t5/SAS-Programming/duplicate-values/m-p/491919#M129168</link>
      <description>Sorry &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&lt;BR /&gt;I edited the post</description>
      <pubDate>Sun, 02 Sep 2018 19:03:40 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/duplicate-values/m-p/491919#M129168</guid>
      <dc:creator>Carmel</dc:creator>
      <dc:date>2018-09-02T19:03:40Z</dc:date>
    </item>
    <item>
      <title>Re: duplicate values</title>
      <link>https://communities.sas.com/t5/SAS-Programming/duplicate-values/m-p/491920#M129169</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
input ID    a1    a2    a3    a4  ;
cards;
1      11    12    14    15     ...
2      11    13      .       .      ...
3      12    13    14      .      ...
4      12    14    15      .      ...
;

data temp;

set have;

array t(*) a1-a4;

do i=1 to dim(t)-1;

do j=i+1 to dim(t);

v1=t(i);

v2=t(j);

if n(v1,v2)=2 then output;

end;

end;

keep v1 v2;

run;



proc sql;

create table want as

select v1,v2,count(*) as freq

from temp

group by v1,v2

having freq&amp;gt;1;

quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Sun, 02 Sep 2018 19:53:28 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/duplicate-values/m-p/491920#M129169</guid>
      <dc:creator>novinosrin</dc:creator>
      <dc:date>2018-09-02T19:53:28Z</dc:date>
    </item>
    <item>
      <title>Re: duplicate values</title>
      <link>https://communities.sas.com/t5/SAS-Programming/duplicate-values/m-p/491934#M129179</link>
      <description>&lt;BLOCKQUOTE&gt;&lt;HR /&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/230109"&gt;@Carmel&lt;/a&gt;&amp;nbsp;wrote:&lt;BR /&gt;Sorry &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&lt;BR /&gt;I edited the post&lt;HR /&gt;&lt;/BLOCKQUOTE&gt;
&lt;P&gt;You may have edited the post, but I don't see where you have answered the question about 12 and 13.&lt;/P&gt;</description>
      <pubDate>Sun, 02 Sep 2018 22:14:45 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/duplicate-values/m-p/491934#M129179</guid>
      <dc:creator>PaigeMiller</dc:creator>
      <dc:date>2018-09-02T22:14:45Z</dc:date>
    </item>
    <item>
      <title>Re: duplicate values</title>
      <link>https://communities.sas.com/t5/SAS-Programming/duplicate-values/m-p/491939#M129181</link>
      <description>13 is no longer exists in row 4.. check the data and output again..</description>
      <pubDate>Sun, 02 Sep 2018 23:26:48 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/duplicate-values/m-p/491939#M129181</guid>
      <dc:creator>Carmel</dc:creator>
      <dc:date>2018-09-02T23:26:48Z</dc:date>
    </item>
    <item>
      <title>Re: duplicate values</title>
      <link>https://communities.sas.com/t5/SAS-Programming/duplicate-values/m-p/491945#M129184</link>
      <description>&lt;P&gt;Like this?&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data HAVE;
input ID    a1    a2    a3    a4;
cards;
1      11    12    14    15     
2      11    13      .       .  
3      12    13    14      .    
4      12    14    15      .    
run;

data PAIRS; 
  keep A B FRQ;
  * 1- Define the hash table. 
  *    FRQ is incremented when keys are accessed;
  if _N_=1 then do;
    dcl hash H(suminc:'FRQ',ordered:'a');
    H.definekey('A','B');
    H.definedone();
    dcl hiter HI('H');
    call missing(A,B); 
    FRQ=1;
  end;
  * 2- Fill the hash table. ;
  *    Methods check() or add() increment FRQ;
  array VALUES(*) A1-A4 _temporary_;
  do while(^LASTOBS);
    set HAVE end=LASTOBS;
    do I=1 to dim(VALUES)-1;
      do J=I+1 to dim(VALUES);
        A=VALUES[I];
        B=VALUES[J];
        RC=H.check() ;
        if A &amp;amp; B &amp;amp; A ne B &amp;amp; RC then RC=H.add();  
      end;
    end;
  end;      
  * 3- Read the hash table and fetch FRQ for each row;
  *    Output if frequency &amp;gt; 1;
  RC=HI.first();  
  do while(^RC);
    RC=H.sum(sum:FRQ);
    if FRQ &amp;gt; 1 then output;
    RC=HI.next();     
  end;     
  stop;
run;  
             

&lt;/CODE&gt;&lt;/PRE&gt;
&lt;TABLE style="border-collapse: collapse; width: 144pt;" border="0" width="192" cellspacing="0" cellpadding="0"&gt;
&lt;TBODY&gt;
&lt;TR style="height: 15.0pt;"&gt;
&lt;TD width="64" height="20" style="height: 15.0pt; width: 48pt;"&gt;A&lt;/TD&gt;
&lt;TD width="64" style="width: 48pt;"&gt;B&lt;/TD&gt;
&lt;TD width="64" style="width: 48pt;"&gt;FRQ&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR style="height: 15.0pt;"&gt;
&lt;TD height="20" align="right" style="height: 15.0pt;"&gt;12&lt;/TD&gt;
&lt;TD align="right"&gt;14&lt;/TD&gt;
&lt;TD align="right"&gt;3&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR style="height: 15.0pt;"&gt;
&lt;TD height="20" align="right" style="height: 15.0pt;"&gt;12&lt;/TD&gt;
&lt;TD align="right"&gt;15&lt;/TD&gt;
&lt;TD align="right"&gt;2&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR style="height: 15.0pt;"&gt;
&lt;TD height="20" align="right" style="height: 15.0pt;"&gt;14&lt;/TD&gt;
&lt;TD align="right"&gt;15&lt;/TD&gt;
&lt;TD align="right"&gt;2&lt;/TD&gt;
&lt;/TR&gt;
&lt;/TBODY&gt;
&lt;/TABLE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 03 Sep 2018 02:16:31 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/duplicate-values/m-p/491945#M129184</guid>
      <dc:creator>ChrisNZ</dc:creator>
      <dc:date>2018-09-03T02:16:31Z</dc:date>
    </item>
    <item>
      <title>Re: duplicate values</title>
      <link>https://communities.sas.com/t5/SAS-Programming/duplicate-values/m-p/491957#M129191</link>
      <description>You want all two way frequencies? Is there a fixed range of values that can be present in the variables? If so you want a distance type matrix, that shows all two way combinations. One way is to make all combinations and count them, shown by others. Another is to create a dummy variable framework and use a PROC CORR.</description>
      <pubDate>Mon, 03 Sep 2018 01:56:58 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/duplicate-values/m-p/491957#M129191</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2018-09-03T01:56:58Z</dc:date>
    </item>
    <item>
      <title>Re: duplicate values</title>
      <link>https://communities.sas.com/t5/SAS-Programming/duplicate-values/m-p/491965#M129193</link>
      <description>&lt;P&gt;another hash:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data _null_;
if _N_=1 then do;
    declare hash H(ordered:'a');
    H.definekey('v1','v2');
	h.definedata('v1','v2','freq');
    H.definedone();
    call missing(v1,v2); 
 end;
 set have end=lr;
array t(*) a1-a4;
do i=1 to dim(t)-1;
do j=i+1 to dim(t);
v1=t(i);
v2=t(j);
if n(v1,v2)=2 then do;
rc=h.find();
if rc=0 then do;freq=sum(freq,1);h.replace();end;
else do;freq=1;h.replace();end;
end;
end;
end;
if lr then h.output(dataset:'want(where=(freq&amp;gt;1))');
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 03 Sep 2018 03:10:51 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/duplicate-values/m-p/491965#M129193</guid>
      <dc:creator>novinosrin</dc:creator>
      <dc:date>2018-09-03T03:10:51Z</dc:date>
    </item>
  </channel>
</rss>

