BookmarkSubscribeRSS Feed
🔒 This topic is solved and locked. Need further help from the community? Please sign in and ask a new question.
sas_Forum
Calcite | Level 5

Hi i am having table numbers i am having 3 variables id1,id2,id3 i want to check the data if id1 and id2 and id3 repeating
any where in the table having different clu should have the same clu number.

data Numbers;
infile cards dsd missover dlm="|";
input id1$ id2$ id3$ sas_id clu Run_date;
format Run_date date9.;
informat Run_date date9.;
cards;
a| | |1|1|24feb2012
|a| |2|2|24feb2012
| |a|3|3|24feb2012
b|b|b|4|4|24feb2012
e|e| |5|5|24feb2012
x|y|e|6|6|24feb2012
;
run;


output

a| | |1|1|24feb2012
|a| |2|1|24feb2012
| |a|3|1|24feb2012
b|b|b|4|4|24feb2012
e|e| |5|5|24feb2012
x|y|e|6|5|24feb2012

Based on the main table i want to create new table clu_change table

SAS_ID|OLD_CLUSTER_ID|FROM_DATE|NEW_CLUSTER_ID|CHANGE_DATE

clu_change Table


2|2|24feb2012|1|29feb2012
3|3|24feb2012|1|29feb2012
6|6|24feb2012|5|29feb2012

In this table it should give the sasid that are changing and from which cluster they are in past and what are now in presend
and from date is the run_date of the main table and change_date is todays date.

1 ACCEPTED SOLUTION

Accepted Solutions
Ksharp
Super User

My final dataset is not 'want' ,but 'clu_change' ;

in 'clu_change', NEW_CLUSTER_ID is clu which you mean.

You can rename it on your own, if you want.

data Numbers;
infile cards dsd missover dlm="|";
input id1$ id2$ id3$ sas_id clu Run_date;
format Run_date date9.;
informat Run_date date9.;
cards;
a| | |1|1|24feb2012
|a| |2|2|24feb2012
| |a|3|3|24feb2012
b|b|b|4|4|24feb2012
e|e| |5|5|24feb2012
x|y|e|6|6|24feb2012
;
run;
data test;
 set numbers;
 rename id1=pan1 id2=pan2 id3=pan3 ;
run;

 

options compress=yes;
data want(keep=pan1 pan2 pan3 sas_id clu Run_date household);
 declare hash ha(hashexp : 20,ordered : 'a');
 declare hiter hi('ha');
  ha.definekey('count');
  ha.definedata('count','pan1','pan2','pan3','sas_id', 'clu', 'Run_date');
  ha.definedone();
 declare hash _ha(hashexp: 20,ordered : 'a');
  _ha.definekey('key');
  _ha.definedone();

do until(last);
 set test end=last; 
 if cmiss(pan1,pan2,pan3) lt 3 then do;
                                           count+1;
                                           ha.add();
                                         end;
end;

length key $ 40;
array h{3} $ 40 pan1 pan2 pan3 ;
_rc=hi.first();
do while(_rc eq 0);
 household+1;
 do i=1 to 3;
   if not missing(h{i}) then do; key=h{i}; _ha.replace();end;
 end;   
 do until(x=1);
    x=1;
    rc=hi.first();
    do while(rc=0);
      found=0;
      do j=1 to 3;
      key=h{j};rcc=_ha.check();
      if rcc =0 then found=1;
      end;
      if found then do;
                      do k=1 to 3;
                      if not missing(h{k}) then do; key=h{k};_ha.replace();end;
                      end;
                      output;x=0; _count=count;
                    end;
      rc=hi.next();
      if found then rx=ha.remove(key : _count);
     end;
 end;
 _ha.clear();
_rc=hi.first();
end; 
run;

data clu_change(drop= household);
 set want(rename=(clu=OLD_CLUSTER_ID run_date=FROM_DATE));
 by  household;
 length NEW_CLUSTER_ID $ 40;
 retain  NEW_CLUSTER_ID;
 retain CHANGE_DATE "&sysdate"d;
 format CHANGE_DATE date9.;
 if first.household then  NEW_CLUSTER_ID= OLD_CLUSTER_ID;
run;

Ksharp

View solution in original post

9 REPLIES 9
Ksharp
Super User

If I don't make a mistake, I remembered I have coded it for you at last year.

data Numbers;
infile cards dsd missover dlm="|";
input id1$ id2$ id3$ sas_id clu Run_date;
format Run_date date9.;
informat Run_date date9.;
cards;
a| | |1|1|24feb2012
|a| |2|2|24feb2012
| |a|3|3|24feb2012
b|b|b|4|4|24feb2012
e|e| |5|5|24feb2012
x|y|e|6|6|24feb2012
;
run;
data test;
 set numbers;
 rename id1=pan1 id2=pan2 id3=pan3 ;
run;

 

options compress=yes;
data want(keep=pan1 pan2 pan3 sas_id clu Run_date household);
 declare hash ha(hashexp : 20,ordered : 'a');
 declare hiter hi('ha');
  ha.definekey('count');
  ha.definedata('count','pan1','pan2','pan3','sas_id', 'clu', 'Run_date');
  ha.definedone();
 declare hash _ha(hashexp: 20,ordered : 'a');
  _ha.definekey('key');
  _ha.definedone();

do until(last);
 set test end=last; 
 if cmiss(pan1,pan2,pan3) lt 3 then do;
                                           count+1;
                                           ha.add();
                                         end;
end;

length key $ 40;
array h{3} $ 40 pan1 pan2 pan3 ;
_rc=hi.first();
do while(_rc eq 0);
 household+1;
 do i=1 to 3;
   if not missing(h{i}) then do; key=h{i}; _ha.replace();end;
 end;   
 do until(x=1);
    x=1;
    rc=hi.first();
    do while(rc=0);
      found=0;
      do j=1 to 3;
      key=h{j};rcc=_ha.check();
      if rcc =0 then found=1;
      end;
      if found then do;
                      do k=1 to 3;
                      if not missing(h{k}) then do; key=h{k};_ha.replace();end;
                      end;
                      output;x=0; _count=count;
                    end;
      rc=hi.next();
      if found then rx=ha.remove(key : _count);
     end;
 end;
 _ha.clear();
_rc=hi.first();
end; 
run;
data temp;
 set want;
 by household;
 if not (first.household and last.household);
run;
data clu_change(drop=pan: household);
 set temp(rename=(clu=OLD_CLUSTER_ID run_date=FROM_DATE));
 by  household;
 length NEW_CLUSTER_ID $ 40;
 retain  NEW_CLUSTER_ID;
 retain CHANGE_DATE "&sysdate"d;
 format CHANGE_DATE date9.;
 if first.household then  NEW_CLUSTER_ID= OLD_CLUSTER_ID;
 if not first.household then output;
run;



Ksharp

sas_Forum
Calcite | Level 5

ksharp actually you should modify in the variable clu but you have given in new variable household,if there is not change the clu variable should be as it is.

The output should be like this with out variable household can you do for it


                               Obs    pan1    pan2    pan3    sas_id    clu     Run_date   

                                1      a                         1       1     24FEB2012          
                                2              a                 2       1     24FEB2012         
                                3                      a         3       1     24FEB2012          
                                4      b       b       b         4       4     24FEB2012         
                                5      e       e                 5       5     24FEB2012          
                                6      x       y       e         6       5     24FEB2012        

Ksharp
Super User

That would be more simple.

data Numbers;
infile cards dsd missover dlm="|";
input id1$ id2$ id3$ sas_id clu Run_date;
format Run_date date9.;
informat Run_date date9.;
cards;
a| | |1|1|24feb2012
|a| |2|2|24feb2012
| |a|3|3|24feb2012
b|b|b|4|4|24feb2012
e|e| |5|5|24feb2012
x|y|e|6|6|24feb2012
;
run;
data test;
 set numbers;
 rename id1=pan1 id2=pan2 id3=pan3 ;
run;

 

options compress=yes;
data want(keep=pan1 pan2 pan3 sas_id clu Run_date household);
 declare hash ha(hashexp : 20,ordered : 'a');
 declare hiter hi('ha');
  ha.definekey('count');
  ha.definedata('count','pan1','pan2','pan3','sas_id', 'clu', 'Run_date');
  ha.definedone();
 declare hash _ha(hashexp: 20,ordered : 'a');
  _ha.definekey('key');
  _ha.definedone();

do until(last);
 set test end=last; 
 if cmiss(pan1,pan2,pan3) lt 3 then do;
                                           count+1;
                                           ha.add();
                                         end;
end;

length key $ 40;
array h{3} $ 40 pan1 pan2 pan3 ;
_rc=hi.first();
do while(_rc eq 0);
 household+1;
 do i=1 to 3;
   if not missing(h{i}) then do; key=h{i}; _ha.replace();end;
 end;   
 do until(x=1);
    x=1;
    rc=hi.first();
    do while(rc=0);
      found=0;
      do j=1 to 3;
      key=h{j};rcc=_ha.check();
      if rcc =0 then found=1;
      end;
      if found then do;
                      do k=1 to 3;
                      if not missing(h{k}) then do; key=h{k};_ha.replace();end;
                      end;
                      output;x=0; _count=count;
                    end;
      rc=hi.next();
      if found then rx=ha.remove(key : _count);
     end;
 end;
 _ha.clear();
_rc=hi.first();
end; 
run;

data clu_change(drop= household);
 set want(rename=(clu=OLD_CLUSTER_ID run_date=FROM_DATE));
 by  household;
 length NEW_CLUSTER_ID $ 40;
 retain  NEW_CLUSTER_ID;
 retain CHANGE_DATE "&sysdate"d;
 format CHANGE_DATE date9.;
 if first.household then  NEW_CLUSTER_ID= OLD_CLUSTER_ID;
run;


Ksharp

sas_Forum
Calcite | Level 5

Hi ksharp i am attaching the file i want the output of the tables in that manner can you help me in it .

Ksharp
Super User

Be honest. I really not know what you want.

data Numbers;
infile cards dsd missover dlm="|";
input id1$ id2$ id3$ sas_id clu Run_date;
format Run_date date9.;
informat Run_date date9.;
cards;
a| | |1|1|24feb2012
|a| |2|2|24feb2012
| |a|3|3|24feb2012
b|b|b|4|4|24feb2012
e|e| |5|5|24feb2012
x|y|e|6|6|24feb2012
;
run;
data test;
 set numbers;
 rename id1=pan1 id2=pan2 id3=pan3 ;
run;

 

options compress=yes;
data want(keep=pan1 pan2 pan3 sas_id clu Run_date household);
 declare hash ha(hashexp : 20,ordered : 'a');
 declare hiter hi('ha');
  ha.definekey('count');
  ha.definedata('count','pan1','pan2','pan3','sas_id', 'clu', 'Run_date');
  ha.definedone();
 declare hash _ha(hashexp: 20,ordered : 'a');
  _ha.definekey('key');
  _ha.definedone();

do until(last);
 set test end=last; 
 if cmiss(pan1,pan2,pan3) lt 3 then do;
                                           count+1;
                                           ha.add();
                                         end;
end;

length key $ 40;
array h{3} $ 40 pan1 pan2 pan3 ;
_rc=hi.first();
do while(_rc eq 0);
 household+1;
 do i=1 to 3;
   if not missing(h{i}) then do; key=h{i}; _ha.replace();end;
 end;   
 do until(x=1);
    x=1;
    rc=hi.first();
    do while(rc=0);
      found=0;
      do j=1 to 3;
      key=h{j};rcc=_ha.check();
      if rcc =0 then found=1;
      end;
      if found then do;
                      do k=1 to 3;
                      if not missing(h{k}) then do; key=h{k};_ha.replace();end;
                      end;
                      output;x=0; _count=count;
                    end;
      rc=hi.next();
      if found then rx=ha.remove(key : _count);
     end;
 end;
 _ha.clear();
_rc=hi.first();
end; 
run;
data temp;
 set want;
 by household;
 if not (first.household and last.household);
run;
data clu_change(drop=pan: household);
 set temp(rename=(clu=OLD_CLUSTER_ID run_date=FROM_DATE));
 by  household;
 length NEW_CLUSTER_ID $ 40;
 retain  NEW_CLUSTER_ID;
 retain CHANGE_DATE "&sysdate"d;
 format CHANGE_DATE date9.;
 if first.household then  NEW_CLUSTER_ID= OLD_CLUSTER_ID;
 if not first.household then output;
run;

title 'Output for Main Table';
title2 '----------------------';
proc print data=numbers;run;
title 'Output for clu_change Table';
title2 '---------------------------';
proc print data=clu_change;run;

Ksharp

sas_Forum
Calcite | Level 5

Hi ksharp the process for the clu_change table is correct but the output for the main table is wrong again ,for your refernce i am sending the text file please check it thqs for your respocene

Ksharp
Super User

OMG. Could you rename it by yourself?

data Numbers;
infile cards dsd missover dlm="|";
input id1$ id2$ id3$ sas_id clu Run_date;
format Run_date date9.;
informat Run_date date9.;
cards;
a| | |1|1|24feb2012
|a| |2|2|24feb2012
| |a|3|3|24feb2012
b|b|b|4|4|24feb2012
e|e| |5|5|24feb2012
x|y|e|6|6|24feb2012
;
run;
data test;
 set numbers;
 rename id1=pan1 id2=pan2 id3=pan3 ;
run;

 

options compress=yes;
data want(keep=pan1 pan2 pan3 sas_id clu Run_date household);
 declare hash ha(hashexp : 20,ordered : 'a');
 declare hiter hi('ha');
  ha.definekey('count');
  ha.definedata('count','pan1','pan2','pan3','sas_id', 'clu', 'Run_date');
  ha.definedone();
 declare hash _ha(hashexp: 20,ordered : 'a');
  _ha.definekey('key');
  _ha.definedone();

do until(last);
 set test end=last; 
 if cmiss(pan1,pan2,pan3) lt 3 then do;
                                           count+1;
                                           ha.add();
                                         end;
end;

length key $ 40;
array h{3} $ 40 pan1 pan2 pan3 ;
_rc=hi.first();
do while(_rc eq 0);
 household+1;
 do i=1 to 3;
   if not missing(h{i}) then do; key=h{i}; _ha.replace();end;
 end;   
 do until(x=1);
    x=1;
    rc=hi.first();
    do while(rc=0);
      found=0;
      do j=1 to 3;
      key=h{j};rcc=_ha.check();
      if rcc =0 then found=1;
      end;
      if found then do;
                      do k=1 to 3;
                      if not missing(h{k}) then do; key=h{k};_ha.replace();end;
                      end;
                      output;x=0; _count=count;
                    end;
      rc=hi.next();
      if found then rx=ha.remove(key : _count);
     end;
 end;
 _ha.clear();
_rc=hi.first();
end; 
run;
data temp;
 set want;
 by household;
 if not (first.household and last.household);
run;
data clu_change(drop=pan: household);
 set temp(rename=(clu=OLD_CLUSTER_ID run_date=FROM_DATE));
 by  household;
 length NEW_CLUSTER_ID $ 40;
 retain  NEW_CLUSTER_ID;
 retain CHANGE_DATE "&sysdate"d;
 format CHANGE_DATE date9.;
 if first.household then  NEW_CLUSTER_ID= OLD_CLUSTER_ID;
 if not first.household then output;
run;

title 'Output for Main Table';
title2 '----------------------';
proc print data=numbers(rename=(id1-id3=pan1-pan3));run;
title 'Output for clu_change Table';
title2 '---------------------------';
proc print data=clu_change;run;


Ksharp

sas_Forum
Calcite | Level 5

ksharp actually my problem is not with the rename can you check the variables  clu and household in the dataset want actually i dont want to change the clu variable unless there is any change in  variables id1 id2 and id3.

Actually in want dataset the clu variable output should be

clu

1

1

1

4

5

5

I am asking about this i this there is a confusion so that is the reason i have send that text file to you...

Thqs for your help.

Ksharp
Super User

My final dataset is not 'want' ,but 'clu_change' ;

in 'clu_change', NEW_CLUSTER_ID is clu which you mean.

You can rename it on your own, if you want.

data Numbers;
infile cards dsd missover dlm="|";
input id1$ id2$ id3$ sas_id clu Run_date;
format Run_date date9.;
informat Run_date date9.;
cards;
a| | |1|1|24feb2012
|a| |2|2|24feb2012
| |a|3|3|24feb2012
b|b|b|4|4|24feb2012
e|e| |5|5|24feb2012
x|y|e|6|6|24feb2012
;
run;
data test;
 set numbers;
 rename id1=pan1 id2=pan2 id3=pan3 ;
run;

 

options compress=yes;
data want(keep=pan1 pan2 pan3 sas_id clu Run_date household);
 declare hash ha(hashexp : 20,ordered : 'a');
 declare hiter hi('ha');
  ha.definekey('count');
  ha.definedata('count','pan1','pan2','pan3','sas_id', 'clu', 'Run_date');
  ha.definedone();
 declare hash _ha(hashexp: 20,ordered : 'a');
  _ha.definekey('key');
  _ha.definedone();

do until(last);
 set test end=last; 
 if cmiss(pan1,pan2,pan3) lt 3 then do;
                                           count+1;
                                           ha.add();
                                         end;
end;

length key $ 40;
array h{3} $ 40 pan1 pan2 pan3 ;
_rc=hi.first();
do while(_rc eq 0);
 household+1;
 do i=1 to 3;
   if not missing(h{i}) then do; key=h{i}; _ha.replace();end;
 end;   
 do until(x=1);
    x=1;
    rc=hi.first();
    do while(rc=0);
      found=0;
      do j=1 to 3;
      key=h{j};rcc=_ha.check();
      if rcc =0 then found=1;
      end;
      if found then do;
                      do k=1 to 3;
                      if not missing(h{k}) then do; key=h{k};_ha.replace();end;
                      end;
                      output;x=0; _count=count;
                    end;
      rc=hi.next();
      if found then rx=ha.remove(key : _count);
     end;
 end;
 _ha.clear();
_rc=hi.first();
end; 
run;

data clu_change(drop= household);
 set want(rename=(clu=OLD_CLUSTER_ID run_date=FROM_DATE));
 by  household;
 length NEW_CLUSTER_ID $ 40;
 retain  NEW_CLUSTER_ID;
 retain CHANGE_DATE "&sysdate"d;
 format CHANGE_DATE date9.;
 if first.household then  NEW_CLUSTER_ID= OLD_CLUSTER_ID;
run;

Ksharp

SAS Innovate 2025: Save the Date

 SAS Innovate 2025 is scheduled for May 6-9 in Orlando, FL. Sign up to be first to learn about the agenda and registration!

Save the date!

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

SAS Training: Just a Click Away

 Ready to level-up your skills? Choose your own adventure.

Browse our catalog!

Discussion stats
  • 9 replies
  • 1285 views
  • 0 likes
  • 2 in conversation