BookmarkSubscribeRSS Feed
sivakoya
Obsidian | Level 7
IDcell(char-variable)home(char-variable)work(char-variable)Dup_phoneAll_sameDup_from
1(XXX)XXX-XXXX(XXX)XXX-XXXX(YYY)YYY-YYYYYNCELL_HOME
2(ZZZ)ZZZ-ZZZZ(YYY)YYY-YYYY(ZZZ)ZZZ-ZZZZYNCELL_WORK
3(ZZZ)ZZZ-ZZZZ(YYY)YYY-YYYY(YYY)YYY-YYYYYNHOME_WORK
4(XXX)XXX-XXXX(XXX)XXX-XXXX(XXX)XXX-XXXXYYALL_DUP
5(XXX)XXX-XXXX(YYY)YYY-YYYY(ZZZ)ZZZ-ZZZZNNNO_DUP
6(XXX)XXX-XXXXmissingmissingNNNO_DUP

 

 

I have above dataset with ID, cell, home, work variables. For each ID, I need to look if we have any duplicate phone information and and also identify which ones are duplicates. I need to be able to create last three columns of above data. MIssing values shouldnt be accounted for equality. Any quick logic using data step?  Thanks!

2 REPLIES 2
kiranv_
Rhodochrosite | Level 12

something like this

data have;
infile datalines truncover ;
informat id cell $12. home $12. work $12.;;
input 
ID cell $ home $ work $ ;
datalines;
1 (XXX)XXX-XXXX (XXX)XXX-XXXX (YYY)YYY-YYYY 
2 (ZZZ)ZZZ-ZZZZ (YYY)YYY-YYYY (ZZZ)ZZZ-ZZZZ 
3 (ZZZ)ZZZ-ZZZZ (YYY)YYY-YYYY (YYY)YYY-YYYY 
4 (XXX)XXX-XXXX (XXX)XXX-XXXX (XXX)XXX-XXXX 
5 (XXX)XXX-XXXX (YYY)YYY-YYYY (ZZZ)ZZZ-ZZZZ 
6 (XXX)XXX-XXXX
;



proc sql;
select  cell, 
        home ,
         work,
		case when ((cell = home) and (cell is not missing and home is not missing))or 
          			((home =work) and (cell is not missing and home is not missing)) or
         			 ((cell =work)and (cell is not missing and home is not missing)) then 'Y'
        	else 'N'
		end as dup_phone,
		case when cell = home and home =work and cell is not missing and home is not missing and work is not missing then 'Y'
   			 else 'N'
		end as all_same, 
		case 
			when  cell = home and home =work and cell is not missing and home is not missing and work is not missing then 'ALL_DUP'
            when ((cell = home) and (cell is not missing and home is not missing)) then 'CELL_HOME'
            when((home =work) and (cell is not missing and home is not missing)) then 'HOME_WORK'
            when ((cell =work)and (cell is not missing and home is not missing)) then 'CELL_WORK'
            
			else 'NO_DUP'
       end as Dup_from 
	from  have;
Ksharp
Super User
data have;
infile datalines truncover ;
informat id cell $12. home $12. work $12.;;
input 
ID cell $ home $ work $ ;
datalines;
1 (XXX)XXX-XXXX (XXX)XXX-XXXX (YYY)YYY-YYYY 
2 (ZZZ)ZZZ-ZZZZ (YYY)YYY-YYYY (ZZZ)ZZZ-ZZZZ 
3 (ZZZ)ZZZ-ZZZZ (YYY)YYY-YYYY (YYY)YYY-YYYY 
4 (XXX)XXX-XXXX (XXX)XXX-XXXX (XXX)XXX-XXXX 
5 (XXX)XXX-XXXX (YYY)YYY-YYYY (ZZZ)ZZZ-ZZZZ 
6 (XXX)XXX-XXXX
;
run;

data want;
 if _n_=1 then do;
  length k $ 100;
  declare hash h();
  h.definekey('k');
  h.definedone();
 end;
set have;
length dup_phone all_same $ 1 dup_from  temp $ 200;
array x{*} $ cell home work;
do i=1 to dim(x);
  k=x{i};
  h.replace();
end;

if h.num_items=1 then do;
  if cmiss(of x{*})=0 then do;dup_phone='Y';dup_from='All_dup';end;
   else do;dup_phone='N';dup_from='All_missing';end;
  all_same='Y';
end;
 else do;
        dup_phone='N';dup_from='No_dup';
              do i=1 to dim(x)-1;
			   do j=i+1 to dim(x);
                 if not missing(x{i}) and not missing(x{j}) and x{i}=x{j} then do; 
                    yes=1; dup_phone='Y';temp=catx('|',temp,cats(vname(x{i}),'_',vname(x{j})));
                 end;
			   end;
			  end; 
        if yes then dup_from=temp; 
        all_same='N';
	  end;
h.clear();
drop i j k temp yes;
run;

hackathon24-white-horiz.png

The 2025 SAS Hackathon has begun!

It's finally time to hack! Remember to visit the SAS Hacker's Hub regularly for news and updates.

Latest Updates

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

SAS Training: Just a Click Away

 Ready to level-up your skills? Choose your own adventure.

Browse our catalog!

Discussion stats
  • 2 replies
  • 2922 views
  • 0 likes
  • 3 in conversation