<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Compare values across multiple variables and flag duplicates in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Compare-values-across-multiple-variables-and-flag-duplicates/m-p/376110#M90225</link>
    <description>&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;&lt;STRONG&gt;ID&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;cell(char-variable)&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;home(char-variable)&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;work(char-variable)&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;Dup_phone&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;All_same&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;Dup_from&lt;/STRONG&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;(XXX)XXX-XXXX&lt;/TD&gt;&lt;TD&gt;(XXX)XXX-XXXX&lt;/TD&gt;&lt;TD&gt;(YYY)YYY-YYYY&lt;/TD&gt;&lt;TD&gt;Y&lt;/TD&gt;&lt;TD&gt;N&lt;/TD&gt;&lt;TD&gt;CELL_HOME&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;(ZZZ)ZZZ-ZZZZ&lt;/TD&gt;&lt;TD&gt;(YYY)YYY-YYYY&lt;/TD&gt;&lt;TD&gt;(ZZZ)ZZZ-ZZZZ&lt;/TD&gt;&lt;TD&gt;Y&lt;/TD&gt;&lt;TD&gt;N&lt;/TD&gt;&lt;TD&gt;CELL_WORK&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;3&lt;/TD&gt;&lt;TD&gt;(ZZZ)ZZZ-ZZZZ&lt;/TD&gt;&lt;TD&gt;(YYY)YYY-YYYY&lt;/TD&gt;&lt;TD&gt;(YYY)YYY-YYYY&lt;/TD&gt;&lt;TD&gt;Y&lt;/TD&gt;&lt;TD&gt;N&lt;/TD&gt;&lt;TD&gt;HOME_WORK&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;TD&gt;(XXX)XXX-XXXX&lt;/TD&gt;&lt;TD&gt;(XXX)XXX-XXXX&lt;/TD&gt;&lt;TD&gt;(XXX)XXX-XXXX&lt;/TD&gt;&lt;TD&gt;Y&lt;/TD&gt;&lt;TD&gt;Y&lt;/TD&gt;&lt;TD&gt;ALL_DUP&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;TD&gt;(XXX)XXX-XXXX&lt;/TD&gt;&lt;TD&gt;(YYY)YYY-YYYY&lt;/TD&gt;&lt;TD&gt;(ZZZ)ZZZ-ZZZZ&lt;/TD&gt;&lt;TD&gt;N&lt;/TD&gt;&lt;TD&gt;N&lt;/TD&gt;&lt;TD&gt;NO_DUP&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;6&lt;/TD&gt;&lt;TD&gt;(XXX)XXX-XXXX&lt;/TD&gt;&lt;TD&gt;missing&lt;/TD&gt;&lt;TD&gt;missing&lt;/TD&gt;&lt;TD&gt;N&lt;/TD&gt;&lt;TD&gt;N&lt;/TD&gt;&lt;TD&gt;NO_DUP&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have above dataset with ID, cell, home, work variables. For each ID, I need to look if we have any duplicate phone information and and also identify which ones are duplicates. I need to be able to create last three columns of above data. MIssing values shouldnt be accounted for equality. Any quick logic using data step?&amp;nbsp; Thanks!&lt;/P&gt;</description>
    <pubDate>Fri, 14 Jul 2017 17:13:24 GMT</pubDate>
    <dc:creator>sivakoya</dc:creator>
    <dc:date>2017-07-14T17:13:24Z</dc:date>
    <item>
      <title>Compare values across multiple variables and flag duplicates</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Compare-values-across-multiple-variables-and-flag-duplicates/m-p/376110#M90225</link>
      <description>&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;&lt;STRONG&gt;ID&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;cell(char-variable)&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;home(char-variable)&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;work(char-variable)&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;Dup_phone&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;All_same&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;Dup_from&lt;/STRONG&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;(XXX)XXX-XXXX&lt;/TD&gt;&lt;TD&gt;(XXX)XXX-XXXX&lt;/TD&gt;&lt;TD&gt;(YYY)YYY-YYYY&lt;/TD&gt;&lt;TD&gt;Y&lt;/TD&gt;&lt;TD&gt;N&lt;/TD&gt;&lt;TD&gt;CELL_HOME&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;(ZZZ)ZZZ-ZZZZ&lt;/TD&gt;&lt;TD&gt;(YYY)YYY-YYYY&lt;/TD&gt;&lt;TD&gt;(ZZZ)ZZZ-ZZZZ&lt;/TD&gt;&lt;TD&gt;Y&lt;/TD&gt;&lt;TD&gt;N&lt;/TD&gt;&lt;TD&gt;CELL_WORK&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;3&lt;/TD&gt;&lt;TD&gt;(ZZZ)ZZZ-ZZZZ&lt;/TD&gt;&lt;TD&gt;(YYY)YYY-YYYY&lt;/TD&gt;&lt;TD&gt;(YYY)YYY-YYYY&lt;/TD&gt;&lt;TD&gt;Y&lt;/TD&gt;&lt;TD&gt;N&lt;/TD&gt;&lt;TD&gt;HOME_WORK&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;TD&gt;(XXX)XXX-XXXX&lt;/TD&gt;&lt;TD&gt;(XXX)XXX-XXXX&lt;/TD&gt;&lt;TD&gt;(XXX)XXX-XXXX&lt;/TD&gt;&lt;TD&gt;Y&lt;/TD&gt;&lt;TD&gt;Y&lt;/TD&gt;&lt;TD&gt;ALL_DUP&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;TD&gt;(XXX)XXX-XXXX&lt;/TD&gt;&lt;TD&gt;(YYY)YYY-YYYY&lt;/TD&gt;&lt;TD&gt;(ZZZ)ZZZ-ZZZZ&lt;/TD&gt;&lt;TD&gt;N&lt;/TD&gt;&lt;TD&gt;N&lt;/TD&gt;&lt;TD&gt;NO_DUP&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;6&lt;/TD&gt;&lt;TD&gt;(XXX)XXX-XXXX&lt;/TD&gt;&lt;TD&gt;missing&lt;/TD&gt;&lt;TD&gt;missing&lt;/TD&gt;&lt;TD&gt;N&lt;/TD&gt;&lt;TD&gt;N&lt;/TD&gt;&lt;TD&gt;NO_DUP&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have above dataset with ID, cell, home, work variables. For each ID, I need to look if we have any duplicate phone information and and also identify which ones are duplicates. I need to be able to create last three columns of above data. MIssing values shouldnt be accounted for equality. Any quick logic using data step?&amp;nbsp; Thanks!&lt;/P&gt;</description>
      <pubDate>Fri, 14 Jul 2017 17:13:24 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Compare-values-across-multiple-variables-and-flag-duplicates/m-p/376110#M90225</guid>
      <dc:creator>sivakoya</dc:creator>
      <dc:date>2017-07-14T17:13:24Z</dc:date>
    </item>
    <item>
      <title>Re: Compare values across multiple variables and flag duplicates</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Compare-values-across-multiple-variables-and-flag-duplicates/m-p/376132#M90239</link>
      <description>&lt;P&gt;something like this&lt;/P&gt;
&lt;PRE&gt;&lt;FONT face="Courier New" size="3"&gt;data have;
infile datalines truncover ;
informat id cell $12. home $12. work $12.;;
input 
ID cell $ home $ work $ ;
datalines;
1 (XXX)XXX-XXXX (XXX)XXX-XXXX (YYY)YYY-YYYY 
2 (ZZZ)ZZZ-ZZZZ (YYY)YYY-YYYY (ZZZ)ZZZ-ZZZZ 
3 (ZZZ)ZZZ-ZZZZ (YYY)YYY-YYYY (YYY)YYY-YYYY 
4 (XXX)XXX-XXXX (XXX)XXX-XXXX (XXX)XXX-XXXX 
5 (XXX)XXX-XXXX (YYY)YYY-YYYY (ZZZ)ZZZ-ZZZZ 
6 (XXX)XXX-XXXX
;



proc sql;
select  cell, 
        home ,
         work,
		case when ((cell = home) and (cell is not missing and home is not missing))or 
          			((home =work) and (cell is not missing and home is not missing)) or
         			 ((cell =work)and (cell is not missing and home is not missing)) then 'Y'
        	else 'N'
		end as dup_phone,
		case when cell = home and home =work and cell is not missing and home is not missing and work is not missing then 'Y'
   			 else 'N'
		end as all_same, 
		case 
			when  cell = home and home =work and cell is not missing and home is not missing and work is not missing then 'ALL_DUP'
            when ((cell = home) and (cell is not missing and home is not missing)) then 'CELL_HOME'
            when((home =work) and (cell is not missing and home is not missing)) then 'HOME_WORK'
            when ((cell =work)and (cell is not missing and home is not missing)) then 'CELL_WORK'
            
			else 'NO_DUP'
       end as Dup_from 
	from  have;
&lt;/FONT&gt;&lt;/PRE&gt;</description>
      <pubDate>Fri, 14 Jul 2017 18:31:10 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Compare-values-across-multiple-variables-and-flag-duplicates/m-p/376132#M90239</guid>
      <dc:creator>kiranv_</dc:creator>
      <dc:date>2017-07-14T18:31:10Z</dc:date>
    </item>
    <item>
      <title>Re: Compare values across multiple variables and flag duplicates</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Compare-values-across-multiple-variables-and-flag-duplicates/m-p/376228#M90277</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
infile datalines truncover ;
informat id cell $12. home $12. work $12.;;
input 
ID cell $ home $ work $ ;
datalines;
1 (XXX)XXX-XXXX (XXX)XXX-XXXX (YYY)YYY-YYYY 
2 (ZZZ)ZZZ-ZZZZ (YYY)YYY-YYYY (ZZZ)ZZZ-ZZZZ 
3 (ZZZ)ZZZ-ZZZZ (YYY)YYY-YYYY (YYY)YYY-YYYY 
4 (XXX)XXX-XXXX (XXX)XXX-XXXX (XXX)XXX-XXXX 
5 (XXX)XXX-XXXX (YYY)YYY-YYYY (ZZZ)ZZZ-ZZZZ 
6 (XXX)XXX-XXXX
;
run;

data want;
 if _n_=1 then do;
  length k $ 100;
  declare hash h();
  h.definekey('k');
  h.definedone();
 end;
set have;
length dup_phone all_same $ 1 dup_from  temp $ 200;
array x{*} $ cell home work;
do i=1 to dim(x);
  k=x{i};
  h.replace();
end;

if h.num_items=1 then do;
  if cmiss(of x{*})=0 then do;dup_phone='Y';dup_from='All_dup';end;
   else do;dup_phone='N';dup_from='All_missing';end;
  all_same='Y';
end;
 else do;
        dup_phone='N';dup_from='No_dup';
              do i=1 to dim(x)-1;
			   do j=i+1 to dim(x);
                 if not missing(x{i}) and not missing(x{j}) and x{i}=x{j} then do; 
                    yes=1; dup_phone='Y';temp=catx('|',temp,cats(vname(x{i}),'_',vname(x{j})));
                 end;
			   end;
			  end; 
        if yes then dup_from=temp; 
        all_same='N';
	  end;
h.clear();
drop i j k temp yes;
run;
&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Sat, 15 Jul 2017 13:29:54 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Compare-values-across-multiple-variables-and-flag-duplicates/m-p/376228#M90277</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2017-07-15T13:29:54Z</dc:date>
    </item>
  </channel>
</rss>

