Hi i am having table numbers i am having 3 variables id1,id2,id3 i want to check the data if id1 and id2 and id3 repeating
any where in the table having different clu should have the same clu number.
data Numbers;
infile cards dsd missover dlm="|";
input id1$ id2$ id3$ sas_id clu Run_date;
format Run_date date9.;
informat Run_date date9.;
cards;
a| | |1|1|24feb2012
|a| |2|2|24feb2012
| |a|3|3|24feb2012
b|b|b|4|4|24feb2012
e|e| |5|5|24feb2012
x|y|e|6|6|24feb2012
;
run;
output
a| | |1|1|24feb2012
|a| |2|1|24feb2012
| |a|3|1|24feb2012
b|b|b|4|4|24feb2012
e|e| |5|5|24feb2012
x|y|e|6|5|24feb2012
Based on the main table i want to create new table clu_change table
SAS_ID|OLD_CLUSTER_ID|FROM_DATE|NEW_CLUSTER_ID|CHANGE_DATE
clu_change Table
2|2|24feb2012|1|29feb2012
3|3|24feb2012|1|29feb2012
6|6|24feb2012|5|29feb2012
In this table it should give the sasid that are changing and from which cluster they are in past and what are now in presend
and from date is the run_date of the main table and change_date is todays date.
My final dataset is not 'want' ,but 'clu_change' ;
in 'clu_change', NEW_CLUSTER_ID is clu which you mean.
You can rename it on your own, if you want.
data Numbers; infile cards dsd missover dlm="|"; input id1$ id2$ id3$ sas_id clu Run_date; format Run_date date9.; informat Run_date date9.; cards; a| | |1|1|24feb2012 |a| |2|2|24feb2012 | |a|3|3|24feb2012 b|b|b|4|4|24feb2012 e|e| |5|5|24feb2012 x|y|e|6|6|24feb2012 ; run; data test; set numbers; rename id1=pan1 id2=pan2 id3=pan3 ; run; options compress=yes; data want(keep=pan1 pan2 pan3 sas_id clu Run_date household); declare hash ha(hashexp : 20,ordered : 'a'); declare hiter hi('ha'); ha.definekey('count'); ha.definedata('count','pan1','pan2','pan3','sas_id', 'clu', 'Run_date'); ha.definedone(); declare hash _ha(hashexp: 20,ordered : 'a'); _ha.definekey('key'); _ha.definedone(); do until(last); set test end=last; if cmiss(pan1,pan2,pan3) lt 3 then do; count+1; ha.add(); end; end; length key $ 40; array h{3} $ 40 pan1 pan2 pan3 ; _rc=hi.first(); do while(_rc eq 0); household+1; do i=1 to 3; if not missing(h{i}) then do; key=h{i}; _ha.replace();end; end; do until(x=1); x=1; rc=hi.first(); do while(rc=0); found=0; do j=1 to 3; key=h{j};rcc=_ha.check(); if rcc =0 then found=1; end; if found then do; do k=1 to 3; if not missing(h{k}) then do; key=h{k};_ha.replace();end; end; output;x=0; _count=count; end; rc=hi.next(); if found then rx=ha.remove(key : _count); end; end; _ha.clear(); _rc=hi.first(); end; run; data clu_change(drop= household); set want(rename=(clu=OLD_CLUSTER_ID run_date=FROM_DATE)); by household; length NEW_CLUSTER_ID $ 40; retain NEW_CLUSTER_ID; retain CHANGE_DATE "&sysdate"d; format CHANGE_DATE date9.; if first.household then NEW_CLUSTER_ID= OLD_CLUSTER_ID; run;
Ksharp
If I don't make a mistake, I remembered I have coded it for you at last year.
data Numbers; infile cards dsd missover dlm="|"; input id1$ id2$ id3$ sas_id clu Run_date; format Run_date date9.; informat Run_date date9.; cards; a| | |1|1|24feb2012 |a| |2|2|24feb2012 | |a|3|3|24feb2012 b|b|b|4|4|24feb2012 e|e| |5|5|24feb2012 x|y|e|6|6|24feb2012 ; run; data test; set numbers; rename id1=pan1 id2=pan2 id3=pan3 ; run; options compress=yes; data want(keep=pan1 pan2 pan3 sas_id clu Run_date household); declare hash ha(hashexp : 20,ordered : 'a'); declare hiter hi('ha'); ha.definekey('count'); ha.definedata('count','pan1','pan2','pan3','sas_id', 'clu', 'Run_date'); ha.definedone(); declare hash _ha(hashexp: 20,ordered : 'a'); _ha.definekey('key'); _ha.definedone(); do until(last); set test end=last; if cmiss(pan1,pan2,pan3) lt 3 then do; count+1; ha.add(); end; end; length key $ 40; array h{3} $ 40 pan1 pan2 pan3 ; _rc=hi.first(); do while(_rc eq 0); household+1; do i=1 to 3; if not missing(h{i}) then do; key=h{i}; _ha.replace();end; end; do until(x=1); x=1; rc=hi.first(); do while(rc=0); found=0; do j=1 to 3; key=h{j};rcc=_ha.check(); if rcc =0 then found=1; end; if found then do; do k=1 to 3; if not missing(h{k}) then do; key=h{k};_ha.replace();end; end; output;x=0; _count=count; end; rc=hi.next(); if found then rx=ha.remove(key : _count); end; end; _ha.clear(); _rc=hi.first(); end; run; data temp; set want; by household; if not (first.household and last.household); run; data clu_change(drop=pan: household); set temp(rename=(clu=OLD_CLUSTER_ID run_date=FROM_DATE)); by household; length NEW_CLUSTER_ID $ 40; retain NEW_CLUSTER_ID; retain CHANGE_DATE "&sysdate"d; format CHANGE_DATE date9.; if first.household then NEW_CLUSTER_ID= OLD_CLUSTER_ID; if not first.household then output; run;
Ksharp
ksharp actually you should modify in the variable clu but you have given in new variable household,if there is not change the clu variable should be as it is.
The output should be like this with out variable household can you do for it
Obs pan1 pan2 pan3 sas_id clu Run_date
1 a 1 1 24FEB2012
2 a 2 1 24FEB2012
3 a 3 1 24FEB2012
4 b b b 4 4 24FEB2012
5 e e 5 5 24FEB2012
6 x y e 6 5 24FEB2012
That would be more simple.
data Numbers; infile cards dsd missover dlm="|"; input id1$ id2$ id3$ sas_id clu Run_date; format Run_date date9.; informat Run_date date9.; cards; a| | |1|1|24feb2012 |a| |2|2|24feb2012 | |a|3|3|24feb2012 b|b|b|4|4|24feb2012 e|e| |5|5|24feb2012 x|y|e|6|6|24feb2012 ; run; data test; set numbers; rename id1=pan1 id2=pan2 id3=pan3 ; run; options compress=yes; data want(keep=pan1 pan2 pan3 sas_id clu Run_date household); declare hash ha(hashexp : 20,ordered : 'a'); declare hiter hi('ha'); ha.definekey('count'); ha.definedata('count','pan1','pan2','pan3','sas_id', 'clu', 'Run_date'); ha.definedone(); declare hash _ha(hashexp: 20,ordered : 'a'); _ha.definekey('key'); _ha.definedone(); do until(last); set test end=last; if cmiss(pan1,pan2,pan3) lt 3 then do; count+1; ha.add(); end; end; length key $ 40; array h{3} $ 40 pan1 pan2 pan3 ; _rc=hi.first(); do while(_rc eq 0); household+1; do i=1 to 3; if not missing(h{i}) then do; key=h{i}; _ha.replace();end; end; do until(x=1); x=1; rc=hi.first(); do while(rc=0); found=0; do j=1 to 3; key=h{j};rcc=_ha.check(); if rcc =0 then found=1; end; if found then do; do k=1 to 3; if not missing(h{k}) then do; key=h{k};_ha.replace();end; end; output;x=0; _count=count; end; rc=hi.next(); if found then rx=ha.remove(key : _count); end; end; _ha.clear(); _rc=hi.first(); end; run; data clu_change(drop= household); set want(rename=(clu=OLD_CLUSTER_ID run_date=FROM_DATE)); by household; length NEW_CLUSTER_ID $ 40; retain NEW_CLUSTER_ID; retain CHANGE_DATE "&sysdate"d; format CHANGE_DATE date9.; if first.household then NEW_CLUSTER_ID= OLD_CLUSTER_ID; run;
Ksharp
Hi ksharp i am attaching the file i want the output of the tables in that manner can you help me in it .
Be honest. I really not know what you want.
data Numbers; infile cards dsd missover dlm="|"; input id1$ id2$ id3$ sas_id clu Run_date; format Run_date date9.; informat Run_date date9.; cards; a| | |1|1|24feb2012 |a| |2|2|24feb2012 | |a|3|3|24feb2012 b|b|b|4|4|24feb2012 e|e| |5|5|24feb2012 x|y|e|6|6|24feb2012 ; run; data test; set numbers; rename id1=pan1 id2=pan2 id3=pan3 ; run; options compress=yes; data want(keep=pan1 pan2 pan3 sas_id clu Run_date household); declare hash ha(hashexp : 20,ordered : 'a'); declare hiter hi('ha'); ha.definekey('count'); ha.definedata('count','pan1','pan2','pan3','sas_id', 'clu', 'Run_date'); ha.definedone(); declare hash _ha(hashexp: 20,ordered : 'a'); _ha.definekey('key'); _ha.definedone(); do until(last); set test end=last; if cmiss(pan1,pan2,pan3) lt 3 then do; count+1; ha.add(); end; end; length key $ 40; array h{3} $ 40 pan1 pan2 pan3 ; _rc=hi.first(); do while(_rc eq 0); household+1; do i=1 to 3; if not missing(h{i}) then do; key=h{i}; _ha.replace();end; end; do until(x=1); x=1; rc=hi.first(); do while(rc=0); found=0; do j=1 to 3; key=h{j};rcc=_ha.check(); if rcc =0 then found=1; end; if found then do; do k=1 to 3; if not missing(h{k}) then do; key=h{k};_ha.replace();end; end; output;x=0; _count=count; end; rc=hi.next(); if found then rx=ha.remove(key : _count); end; end; _ha.clear(); _rc=hi.first(); end; run; data temp; set want; by household; if not (first.household and last.household); run; data clu_change(drop=pan: household); set temp(rename=(clu=OLD_CLUSTER_ID run_date=FROM_DATE)); by household; length NEW_CLUSTER_ID $ 40; retain NEW_CLUSTER_ID; retain CHANGE_DATE "&sysdate"d; format CHANGE_DATE date9.; if first.household then NEW_CLUSTER_ID= OLD_CLUSTER_ID; if not first.household then output; run; title 'Output for Main Table'; title2 '----------------------'; proc print data=numbers;run; title 'Output for clu_change Table'; title2 '---------------------------'; proc print data=clu_change;run;
Ksharp
Hi ksharp the process for the clu_change table is correct but the output for the main table is wrong again ,for your refernce i am sending the text file please check it thqs for your respocene
OMG. Could you rename it by yourself?
data Numbers; infile cards dsd missover dlm="|"; input id1$ id2$ id3$ sas_id clu Run_date; format Run_date date9.; informat Run_date date9.; cards; a| | |1|1|24feb2012 |a| |2|2|24feb2012 | |a|3|3|24feb2012 b|b|b|4|4|24feb2012 e|e| |5|5|24feb2012 x|y|e|6|6|24feb2012 ; run; data test; set numbers; rename id1=pan1 id2=pan2 id3=pan3 ; run; options compress=yes; data want(keep=pan1 pan2 pan3 sas_id clu Run_date household); declare hash ha(hashexp : 20,ordered : 'a'); declare hiter hi('ha'); ha.definekey('count'); ha.definedata('count','pan1','pan2','pan3','sas_id', 'clu', 'Run_date'); ha.definedone(); declare hash _ha(hashexp: 20,ordered : 'a'); _ha.definekey('key'); _ha.definedone(); do until(last); set test end=last; if cmiss(pan1,pan2,pan3) lt 3 then do; count+1; ha.add(); end; end; length key $ 40; array h{3} $ 40 pan1 pan2 pan3 ; _rc=hi.first(); do while(_rc eq 0); household+1; do i=1 to 3; if not missing(h{i}) then do; key=h{i}; _ha.replace();end; end; do until(x=1); x=1; rc=hi.first(); do while(rc=0); found=0; do j=1 to 3; key=h{j};rcc=_ha.check(); if rcc =0 then found=1; end; if found then do; do k=1 to 3; if not missing(h{k}) then do; key=h{k};_ha.replace();end; end; output;x=0; _count=count; end; rc=hi.next(); if found then rx=ha.remove(key : _count); end; end; _ha.clear(); _rc=hi.first(); end; run; data temp; set want; by household; if not (first.household and last.household); run; data clu_change(drop=pan: household); set temp(rename=(clu=OLD_CLUSTER_ID run_date=FROM_DATE)); by household; length NEW_CLUSTER_ID $ 40; retain NEW_CLUSTER_ID; retain CHANGE_DATE "&sysdate"d; format CHANGE_DATE date9.; if first.household then NEW_CLUSTER_ID= OLD_CLUSTER_ID; if not first.household then output; run; title 'Output for Main Table'; title2 '----------------------'; proc print data=numbers(rename=(id1-id3=pan1-pan3));run; title 'Output for clu_change Table'; title2 '---------------------------'; proc print data=clu_change;run;
Ksharp
ksharp actually my problem is not with the rename can you check the variables clu and household in the dataset want actually i dont want to change the clu variable unless there is any change in variables id1 id2 and id3.
Actually in want dataset the clu variable output should be
clu
1
1
1
4
5
5
I am asking about this i this there is a confusion so that is the reason i have send that text file to you...
Thqs for your help.
My final dataset is not 'want' ,but 'clu_change' ;
in 'clu_change', NEW_CLUSTER_ID is clu which you mean.
You can rename it on your own, if you want.
data Numbers; infile cards dsd missover dlm="|"; input id1$ id2$ id3$ sas_id clu Run_date; format Run_date date9.; informat Run_date date9.; cards; a| | |1|1|24feb2012 |a| |2|2|24feb2012 | |a|3|3|24feb2012 b|b|b|4|4|24feb2012 e|e| |5|5|24feb2012 x|y|e|6|6|24feb2012 ; run; data test; set numbers; rename id1=pan1 id2=pan2 id3=pan3 ; run; options compress=yes; data want(keep=pan1 pan2 pan3 sas_id clu Run_date household); declare hash ha(hashexp : 20,ordered : 'a'); declare hiter hi('ha'); ha.definekey('count'); ha.definedata('count','pan1','pan2','pan3','sas_id', 'clu', 'Run_date'); ha.definedone(); declare hash _ha(hashexp: 20,ordered : 'a'); _ha.definekey('key'); _ha.definedone(); do until(last); set test end=last; if cmiss(pan1,pan2,pan3) lt 3 then do; count+1; ha.add(); end; end; length key $ 40; array h{3} $ 40 pan1 pan2 pan3 ; _rc=hi.first(); do while(_rc eq 0); household+1; do i=1 to 3; if not missing(h{i}) then do; key=h{i}; _ha.replace();end; end; do until(x=1); x=1; rc=hi.first(); do while(rc=0); found=0; do j=1 to 3; key=h{j};rcc=_ha.check(); if rcc =0 then found=1; end; if found then do; do k=1 to 3; if not missing(h{k}) then do; key=h{k};_ha.replace();end; end; output;x=0; _count=count; end; rc=hi.next(); if found then rx=ha.remove(key : _count); end; end; _ha.clear(); _rc=hi.first(); end; run; data clu_change(drop= household); set want(rename=(clu=OLD_CLUSTER_ID run_date=FROM_DATE)); by household; length NEW_CLUSTER_ID $ 40; retain NEW_CLUSTER_ID; retain CHANGE_DATE "&sysdate"d; format CHANGE_DATE date9.; if first.household then NEW_CLUSTER_ID= OLD_CLUSTER_ID; run;
Ksharp
SAS Innovate 2025 is scheduled for May 6-9 in Orlando, FL. Sign up to be first to learn about the agenda and registration!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.