Hello ,
As per below data table 01 would like to create two data sets (table 02 and 03)
In one data set need to unique values if any location is selected then location_name values will be <var names> like in below table 02 , if any multiple locations marked then location_name have value = MULTIPLE
for another data set need if any location_name have value = MULTIPLE then those multiple location number with chronological order like table 03
Note : Locations number will give chronological order like NY=1 , NJ=2 , CA=3 .....
Table 01:
SUBNUM | VISNAME | TEST_DEC | TESTDAT | NY | NJ | CA | CH | DH | HY | MU | LV | BJ | MS | LN |
001 | VSIST1 | X | 16Apr2019 | X | ||||||||||
002 | VSIST1 | X | 06May2019 | X | ||||||||||
002 | VSIST2 | X | 13Jun2019 | X | ||||||||||
003 | VSIST1 | X | 22Apr2019 | X | ||||||||||
004 | VSIST1 | X | 11Sep2019 | |||||||||||
004 | VSIST2 | X | 05Nov2019 | X | X | |||||||||
004 | VSIST2 | X | 03Dec2019 | |||||||||||
005 | VSIST1 | X | 12Dec2019 | X | X | X | ||||||||
006 | VSIST1 | X | 30Oct2019 | |||||||||||
007 | VSIST1 | X | 23Oct2019 | X | ||||||||||
008 | VSIST1 | X | 02Jan2020 | X |
Data set 01/Table 02 :
SUBNUM | VISNAME | TEST_DEC | TESTDAT | LOCATION_NAME | YES/NO |
001 | VSIST1 | YES | 16Apr2019 | CA | YES |
002 | VSIST1 | YES | 06May2019 | HY | YES |
002 | VSIST2 | YES | 13Jun2019 | CH | YES |
003 | VSIST1 | YES | 22Apr2019 | CH | YES |
004 | VSIST1 | YES | 11Sep2019 | MISSING | NO |
004 | VSIST2 | YES | 05Nov2019 | MULTIPLE | YES |
004 | VSIST2 | YES | 03Dec2019 | MISSING | NO |
005 | VSIST1 | YES | 12Dec2019 | MULTIPLE | YES |
006 | VSIST1 | YES | 30Oct2019 | MS | YES |
007 | VSIST1 | YES | 23Oct2019 | CA | YES |
008 | VSIST1 | YES | 02Jan2020 | BJ | YES |
Data set 03 /Table 03:
SUBNUM | VISNAME | TEST_DEC | TESTDAT | LOCATION_NAME | YES/NO |
004 | VSIST2 | YES | 05Nov2019 | LOC_4 | YES |
004 | VSIST2 | YES | 05Nov2019 | LOC_6 | YES |
005 | VSIST1 | X | 12Dec2019 | LOC_5 | YES |
005 | VSIST1 | X | 12Dec2019 | LOC_8 | YES |
005 | VSIST1 | X | 12Dec2019 | LOC_10 | YES |
Here is a proposal to achieve this.
Let me know if that does meet your expectations!
Best,
data dataset1;
infile datalines dlm="09"x;
input SUBNUM VISNAME $ TEST_DEC $ TESTDAT:date9. NY $ NJ $ CA $ CH $ DH $ HY $ MU $ LV $ BJ $ MS $ LN $;
datalines;
001 VSIST1 X 16Apr2019 X
002 VSIST1 X 06May2019 X
002 VSIST2 X 13Jun2019 X
003 VSIST1 X 22Apr2019 X
004 VSIST1 X 11Sep2019
004 VSIST2 X 05Nov2019 X X
004 VSIST2 X 03Dec2019
005 VSIST1 X 12Dec2019 X X X
006 VSIST1 X 30Oct2019
007 VSIST1 X 23Oct2019 X
008 VSIST1 X 02Jan2020 X
;
run;
/*DATASET 2 */
data dataset2_temp1;
set dataset1;
by SUBNUM VISNAME TEST_DEC TESTDAT;
array _location (*) $ NY NJ CA CH DH HY MU LV BJ MS LN;
do i=1 to dim(_location);
if _location(i) ne '' then do;
LOCATION_NAME = vname(_location(i));
output;
end;
end;
if "X" not in _location then do;
LOCATION_NAME = 'MISSING';
output;
end;
drop i NY NJ CA CH DH HY MU LV BJ MS LN;
run;
proc transpose data=dataset2_temp1 out=dataset2_temp2 (drop= _name_);
var LOCATION_NAME;
by SUBNUM VISNAME TEST_DEC TESTDAT;
run;
data dataset2;
set dataset2_temp2;
format TESTDAT date9.;
length LOCATION_NAME $ 20;
LOCATION_NAME = catx(" ",of col:);
if countw(LOCATION_NAME) > 1 then LOCATION_NAME = 'MULTIPLE';
YES_NO = "YES";
if LOCATION_NAME = 'MISSING' then YES_NO = "NO";
drop col:;
run;
/*DATASET 3 */
data dataset3_temp1;
set dataset1;
format TESTDAT date9.;
array _loc (*) $ NY NJ CA CH DH HY MU LV BJ MS LN;
do i=1 to dim(_loc);
if _loc(i) ne '' then do;
LOCATION_NAME = compress("LOC_"||i);
YES_NO = "YES";
output;
end;
end;
drop NY NJ CA CH DH HY MU LV BJ MS LN;
run;
proc sql;
create table dataset3 as
select SUBNUM, VISNAME, TEST_DEC, TESTDAT, LOCATION_NAME, YES_NO
from dataset3_temp1
group by SUBNUM, VISNAME, TEST_DEC, TESTDAT
having count(*)> 1
order by SUBNUM, VISNAME, TEST_DEC, TESTDAT, i;
quit;
Here is a proposal to achieve this.
Let me know if that does meet your expectations!
Best,
data dataset1;
infile datalines dlm="09"x;
input SUBNUM VISNAME $ TEST_DEC $ TESTDAT:date9. NY $ NJ $ CA $ CH $ DH $ HY $ MU $ LV $ BJ $ MS $ LN $;
datalines;
001 VSIST1 X 16Apr2019 X
002 VSIST1 X 06May2019 X
002 VSIST2 X 13Jun2019 X
003 VSIST1 X 22Apr2019 X
004 VSIST1 X 11Sep2019
004 VSIST2 X 05Nov2019 X X
004 VSIST2 X 03Dec2019
005 VSIST1 X 12Dec2019 X X X
006 VSIST1 X 30Oct2019
007 VSIST1 X 23Oct2019 X
008 VSIST1 X 02Jan2020 X
;
run;
/*DATASET 2 */
data dataset2_temp1;
set dataset1;
by SUBNUM VISNAME TEST_DEC TESTDAT;
array _location (*) $ NY NJ CA CH DH HY MU LV BJ MS LN;
do i=1 to dim(_location);
if _location(i) ne '' then do;
LOCATION_NAME = vname(_location(i));
output;
end;
end;
if "X" not in _location then do;
LOCATION_NAME = 'MISSING';
output;
end;
drop i NY NJ CA CH DH HY MU LV BJ MS LN;
run;
proc transpose data=dataset2_temp1 out=dataset2_temp2 (drop= _name_);
var LOCATION_NAME;
by SUBNUM VISNAME TEST_DEC TESTDAT;
run;
data dataset2;
set dataset2_temp2;
format TESTDAT date9.;
length LOCATION_NAME $ 20;
LOCATION_NAME = catx(" ",of col:);
if countw(LOCATION_NAME) > 1 then LOCATION_NAME = 'MULTIPLE';
YES_NO = "YES";
if LOCATION_NAME = 'MISSING' then YES_NO = "NO";
drop col:;
run;
/*DATASET 3 */
data dataset3_temp1;
set dataset1;
format TESTDAT date9.;
array _loc (*) $ NY NJ CA CH DH HY MU LV BJ MS LN;
do i=1 to dim(_loc);
if _loc(i) ne '' then do;
LOCATION_NAME = compress("LOC_"||i);
YES_NO = "YES";
output;
end;
end;
drop NY NJ CA CH DH HY MU LV BJ MS LN;
run;
proc sql;
create table dataset3 as
select SUBNUM, VISNAME, TEST_DEC, TESTDAT, LOCATION_NAME, YES_NO
from dataset3_temp1
group by SUBNUM, VISNAME, TEST_DEC, TESTDAT
having count(*)> 1
order by SUBNUM, VISNAME, TEST_DEC, TESTDAT, i;
quit;
Thank you
Yes it's worked perfectly
Awesome!
Thank you @raja777pharma
Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.