Hi all,
I have 36 (3 codes per month) enrollment type codes and wanted to generate flags per month to show if one person had a particular enrollment type. The array code I use as below is so long. Is there anyway to simplify/shorten it?
/* Dummy data I have */
data have;
infile datalines truncover dsd;
input DummyID $ Enroll_TYPE_CD_01_01 Enroll_TYPE_CD_01_02 Enroll_TYPE_CD_01_03 Enroll_TYPE_CD_01_04 Enroll_TYPE_CD_01_05 Enroll_TYPE_CD_01_06
Enroll_TYPE_CD_01_07 Enroll_TYPE_CD_01_02 Enroll_TYPE_CD_01_09 Enroll_TYPE_CD_01_10 Enroll_TYPE_CD_01_11 Enroll_TYPE_CD_01_12
Enroll_TYPE_CD_02_01 Enroll_TYPE_CD_02_02 Enroll_TYPE_CD_02_03 Enroll_TYPE_CD_02_04 Enroll_TYPE_CD_02_05 Enroll_TYPE_CD_02_06
Enroll_TYPE_CD_02_07 Enroll_TYPE_CD_02_02 Enroll_TYPE_CD_02_09 Enroll_TYPE_CD_02_10 Enroll_TYPE_CD_02_11 Enroll_TYPE_CD_02_12
Enroll_TYPE_CD_03_01 Enroll_TYPE_CD_03_02 Enroll_TYPE_CD_03_03 Enroll_TYPE_CD_03_04 Enroll_TYPE_CD_03_05 Enroll_TYPE_CD_03_06
Enroll_TYPE_CD_03_07 Enroll_TYPE_CD_03_02 Enroll_TYPE_CD_03_09 Enroll_TYPE_CD_03_10 Enroll_TYPE_CD_03_11 Enroll_TYPE_CD_03_12
;
datalines;
Person01,1,1,1,1,1,1,1,1,1,1,1,1,,,,,,,,,,,,,,,,,,,,,,,,
Person02,1,1,1,1,1,1,2,1,1,1,1,1,,,,,,,3,,,,,,,,,,,,,,,,,
Person03,2,2,2,2,2,2,2,2,2,2,2,2,,,,,,,,,,,,,,,,,,,,,,,,
Person04,1,1,1,1,1,1,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Person05,1,1,1,1,1,1,1,1,1,1,1,1,,,,,,,,,,,,,,,,,,,,,,,,
Person06,1,1,1,1,1,1,1,1,1,1,1,1,,,,,,,,,,,,,,,,,,,,,,,,
Person07,1,1,1,1,1,1,1,1,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,
Person08,1,1,1,1,1,1,1,1,1,1,1,1,,,,,,,,,,,,,,,,,,,,,,,,
Person09,1,1,1,1,1,1,1,1,1,1,1,1,,,,,,,,,,,,,,,,,,,,,,,,
Person10,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,,,,,,,,,,,,,,,,,,,,,
Person11,2,2,2,2,2,2,2,2,2,2,2,2,,,,,,,,,,,,,,,,,,,,,,,,
Person12,,,,,,,2,2,2,2,2,2,,,,,,,,,,,,,,,,,,,,,,,,
Person13,1,1,1,1,1,1,1,1,1,1,1,1,,,,,,,,,,,,,,,,,,,,,,,,
Person14,1,1,1,1,1,1,1,1,1,1,1,1,,,,,,,,,,,,,,,,,,,,,,,,
Person15,2,2,2,2,2,2,2,2,2,1,1,1,,,,,,,,,,,,,,,,,,,,,,,,
Person16,1,1,1,1,1,1,2,2,2,1,1,1,,,,,,,1,,,,,,,,,,,,,,,,,
Person17,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,,,,,,,,,,,,
Person18,2,2,2,2,1,1,1,1,1,1,1,1,,,,,,,,,,,,,,,,,,,,,,,,
Person19,3,3,,,,,3,,,,,,2,2,3,3,3,3,,,,,,,,,2,2,2,2,3,,,,,
Person20,4,4,4,4,4,4,4,4,4,4,4,,,,,,,,,,,,,,,,,,,,,,,,,
;
run;
/* Code I use */
data have1;
set have;
array Enrl_tp_01 Enroll_TYPE_CD_01_01 Enroll_TYPE_CD_02_01 Enroll_TYPE_CD_03_01;
array Enrl_tp_02 Enroll_TYPE_CD_01_02 Enroll_TYPE_CD_02_02 Enroll_TYPE_CD_03_02;
array Enrl_tp_03 Enroll_TYPE_CD_01_03 Enroll_TYPE_CD_02_03 Enroll_TYPE_CD_03_03;
array Enrl_tp_04 Enroll_TYPE_CD_01_04 Enroll_TYPE_CD_02_04 Enroll_TYPE_CD_03_04;
array Enrl_tp_05 Enroll_TYPE_CD_01_05 Enroll_TYPE_CD_02_05 Enroll_TYPE_CD_03_05;
array Enrl_tp_06 Enroll_TYPE_CD_01_06 Enroll_TYPE_CD_02_06 Enroll_TYPE_CD_03_06;
array Enrl_tp_07 Enroll_TYPE_CD_01_07 Enroll_TYPE_CD_02_07 Enroll_TYPE_CD_03_07;
array Enrl_tp_08 Enroll_TYPE_CD_01_08 Enroll_TYPE_CD_02_08 Enroll_TYPE_CD_03_08;
array Enrl_tp_09 Enroll_TYPE_CD_01_09 Enroll_TYPE_CD_02_09 Enroll_TYPE_CD_03_09;
array Enrl_tp_10 Enroll_TYPE_CD_01_10 Enroll_TYPE_CD_02_10 Enroll_TYPE_CD_03_10;
array Enrl_tp_11 Enroll_TYPE_CD_01_11 Enroll_TYPE_CD_02_11 Enroll_TYPE_CD_03_11;
array Enrl_tp_12 Enroll_TYPE_CD_01_12 Enroll_TYPE_CD_02_12 Enroll_TYPE_CD_03_12;
array Grp_flg_01 flag_Enrlcat1_01_01 flag_Enrlcat1_02_01 flag_Enrlcat1_03_01;
array Grp_flg_02 flag_Enrlcat1_01_02 flag_Enrlcat1_02_02 flag_Enrlcat1_03_02;
array Grp_flg_03 flag_Enrlcat1_01_03 flag_Enrlcat1_02_03 flag_Enrlcat1_03_03;
array Grp_flg_04 flag_Enrlcat1_01_04 flag_Enrlcat1_02_04 flag_Enrlcat1_03_04;
array Grp_flg_05 flag_Enrlcat1_01_05 flag_Enrlcat1_02_05 flag_Enrlcat1_03_05;
array Grp_flg_06 flag_Enrlcat1_01_06 flag_Enrlcat1_02_06 flag_Enrlcat1_03_06;
array Grp_flg_07 flag_Enrlcat1_01_07 flag_Enrlcat1_02_07 flag_Enrlcat1_03_07;
array Grp_flg_08 flag_Enrlcat1_01_08 flag_Enrlcat1_02_08 flag_Enrlcat1_03_08;
array Grp_flg_09 flag_Enrlcat1_01_09 flag_Enrlcat1_02_09 flag_Enrlcat1_03_09;
array Grp_flg_10 flag_Enrlcat1_01_10 flag_Enrlcat1_02_10 flag_Enrlcat1_03_10;
array Grp_flg_11 flag_Enrlcat1_01_11 flag_Enrlcat1_02_11 flag_Enrlcat1_03_11;
array Grp_flg_12 flag_Enrlcat1_01_12 flag_Enrlcat1_02_12 flag_Enrlcat1_03_12;
do over Enrl_tp_01;
if Enrl_tp_01=1 then Grp_flg_01=1;
else Grp_flg_01=0;
end;
flag_Enrlcat1_201801=max(flag_Enrlcat1_01_01, flag_Enrlcat1_02_01, flag_Enrlcat1_03_01);
do over Enrl_tp_02;
if Enrl_tp_02=1 then Grp_flg_02=1;
else Grp_flg_02=0;
end;
flag_Enrlcat1_201802=max(flag_Enrlcat1_01_02, flag_Enrlcat1_02_02, flag_Enrlcat1_03_02);
do over Enrl_tp_03;
if Enrl_tp_03=1 then Grp_flg_03=1;
else Grp_flg_03=0;
end;
flag_Enrlcat1_201803=max(flag_Enrlcat1_01_03, flag_Enrlcat1_02_03, flag_Enrlcat1_03_03);
do over Enrl_tp_04;
if Enrl_tp_04=1 then Grp_flg_04=1;
else Grp_flg_04=0;
end;
flag_Enrlcat1_201804=max(flag_Enrlcat1_01_04, flag_Enrlcat1_02_04, flag_Enrlcat1_03_04);
do over Enrl_tp_05;
if Enrl_tp_05=1 then Grp_flg_05=1;
else Grp_flg_05=0;
end;
flag_Enrlcat1_201805=max(flag_Enrlcat1_01_05, flag_Enrlcat1_02_05, flag_Enrlcat1_03_05);
do over Enrl_tp_06;
if Enrl_tp_06=1 then Grp_flg_06=1;
else Grp_flg_06=0;
end;
flag_Enrlcat1_201806=max(flag_Enrlcat1_01_06, flag_Enrlcat1_02_06, flag_Enrlcat1_03_06);
do over Enrl_tp_07;
if Enrl_tp_07=1 then Grp_flg_07=1;
else Grp_flg_07=0;
end;
flag_Enrlcat1_201807=max(flag_Enrlcat1_01_07, flag_Enrlcat1_02_07, flag_Enrlcat1_03_07);
do over Enrl_tp_08;
if Enrl_tp_08=1 then Grp_flg_08=1;
else Grp_flg_08=0;
end;
flag_Enrlcat1_201808=max(flag_Enrlcat1_01_08, flag_Enrlcat1_02_08, flag_Enrlcat1_03_08);
do over Enrl_tp_09;
if Enrl_tp_09=1 then Grp_flg_09=1;
else Grp_flg_09=0;
end;
flag_Enrlcat1_201809=max(flag_Enrlcat1_01_09, flag_Enrlcat1_02_09, flag_Enrlcat1_03_09);
do over Enrl_tp_10;
if Enrl_tp_10=1 then Grp_flg_10=1;
else Grp_flg_10=0;
end;
flag_Enrlcat1_201810=max(flag_Enrlcat1_01_10, flag_Enrlcat1_02_10, flag_Enrlcat1_03_10);
do over Enrl_tp_11;
if Enrl_tp_11=1 then Grp_flg_11=1;
else Grp_flg_11=0;
end;
flag_Enrlcat1_201811=max(flag_Enrlcat1_01_11, flag_Enrlcat1_02_11, flag_Enrlcat1_03_11);
do over Enrl_tp_12;
if Enrl_tp_12=1 then Grp_flg_12=1;
else Grp_flg_12=0;
end;
flag_Enrlcat1_201812=max(flag_Enrlcat1_01_12, flag_Enrlcat1_02_12, flag_Enrlcat1_03_12);
run;
/* Dummy data I want to generate */
proc sql;
create table want as
select distinct DummyID
,flag_Enrlcat1_201801
,flag_Enrlcat1_201802
,flag_Enrlcat1_201803
,flag_Enrlcat1_201804
,flag_Enrlcat1_201805
,flag_Enrlcat1_201806
,flag_Enrlcat1_201807
,flag_Enrlcat1_201808
,flag_Enrlcat1_201809
,flag_Enrlcat1_201810
,flag_Enrlcat1_201811
,flag_Enrlcat1_201812
from have1
;quit;
Thank you!
Just use periods to mark the missing values and the data step to create the example data is much easier to write (and read).
data have;
input DummyID $
Enroll_TYPE_CD_01_01 - Enroll_TYPE_CD_01_12
Enroll_TYPE_CD_02_01 - Enroll_TYPE_CD_02_12
Enroll_TYPE_CD_03_01 - Enroll_TYPE_CD_03_12
;
datalines;
Person01 1 1 1 1 1 1 1 1 1 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . .
Person02 1 1 1 1 1 1 2 1 1 1 1 1 . . . . . . 3 . . . . . . . . . . . . . . . . .
Person03 2 2 2 2 2 2 2 2 2 2 2 2 . . . . . . . . . . . . . . . . . . . . . . . .
Person04 1 1 1 1 1 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . . . . . .
Person05 1 1 1 1 1 1 1 1 1 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . .
Person06 1 1 1 1 1 1 1 1 1 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . .
Person07 1 1 1 1 1 1 1 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . . . .
Person08 1 1 1 1 1 1 1 1 1 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . .
Person09 1 1 1 1 1 1 1 1 1 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . .
Person10 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 . . . . . . . . . . . . . . . . . . . . .
Person11 2 2 2 2 2 2 2 2 2 2 2 2 . . . . . . . . . . . . . . . . . . . . . . . .
Person12 . . . . . . 2 2 2 2 2 2 . . . . . . . . . . . . . . . . . . . . . . . .
Person13 1 1 1 1 1 1 1 1 1 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . .
Person14 1 1 1 1 1 1 1 1 1 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . .
Person15 2 2 2 2 2 2 2 2 2 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . .
Person16 1 1 1 1 1 1 2 2 2 1 1 1 . . . . . . 1 . . . . . . . . . . . . . . . . .
Person17 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 . . . . . . . . . . . .
Person18 2 2 2 2 1 1 1 1 1 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . .
Person19 3 3 . . . . 3 . . . . . 2 2 3 3 3 3 . . . . . . . . 2 2 2 2 3 . . . . .
Person20 4 4 4 4 4 4 4 4 4 4 4 . . . . . . . . . . . . . . . . . . . . . . . . .
;
Since you seem to only want those FLAG_.... variables this should do it much easier (and clearer). You just need two arrays. One for the input variables and one for the output variables.
data want;
set have;
array enroll [3,12]
Enroll_TYPE_CD_01_01 - Enroll_TYPE_CD_01_12
Enroll_TYPE_CD_02_01 - Enroll_TYPE_CD_02_12
Enroll_TYPE_CD_03_01 - Enroll_TYPE_CD_03_12
;
array flag [12] flag_Enrlcat1_201801 - flag_Enrlcat1_201812 ;
do month=1 to 12;
do type=1 to 3 ;
flag[month]=max(0,flag[month],enroll[type,month]=1 );
end;
end;
keep dummyid flag_: ;
run;
f f f f f f f f f f f f l l l l l l l l l l l l a a a a a a a a a a a a g g g g g g g g g g g g _ _ _ _ _ _ _ _ _ _ _ _ E E E E E E E E E E E E n n n n n n n n n n n n r r r r r r r r r r r r l l l l l l l l l l l l c c c c c c c c c c c c a a a a a a a a a a a a t t t t t t t t t t t t 1 1 1 1 1 1 1 1 1 1 1 1 D _ _ _ _ _ _ _ _ _ _ _ _ u 2 2 2 2 2 2 2 2 2 2 2 2 m 0 0 0 0 0 0 0 0 0 0 0 0 m 1 1 1 1 1 1 1 1 1 1 1 1 O y 8 8 8 8 8 8 8 8 8 8 8 8 b I 0 0 0 0 0 0 0 0 0 1 1 1 s D 1 2 3 4 5 6 7 8 9 0 1 2 1 Person01 1 1 1 1 1 1 1 1 1 1 1 1 2 Person02 1 1 1 1 1 1 0 1 1 1 1 1 3 Person03 0 0 0 0 0 0 0 0 0 0 0 0 4 Person04 1 1 1 1 1 1 1 1 0 0 0 0 5 Person05 1 1 1 1 1 1 1 1 1 1 1 1 6 Person06 1 1 1 1 1 1 1 1 1 1 1 1 7 Person07 1 1 1 1 1 1 1 1 1 1 0 0 8 Person08 1 1 1 1 1 1 1 1 1 1 1 1 9 Person09 1 1 1 1 1 1 1 1 1 1 1 1 10 Person10 1 1 1 0 0 0 0 0 0 0 0 0 11 Person11 0 0 0 0 0 0 0 0 0 0 0 0 12 Person12 0 0 0 0 0 0 0 0 0 0 0 0 13 Person13 1 1 1 1 1 1 1 1 1 1 1 1 14 Person14 1 1 1 1 1 1 1 1 1 1 1 1 15 Person15 0 0 0 0 0 0 0 0 0 1 1 1 16 Person16 1 1 1 1 1 1 1 0 0 1 1 1 17 Person17 1 1 1 1 1 1 1 1 1 1 1 1 18 Person18 0 0 0 0 1 1 1 1 1 1 1 1 19 Person19 0 0 0 0 0 0 0 0 0 0 0 0 20 Person20 0 0 0 0 0 0 0 0 0 0 0 0
1st thing. I think you have a typo in your have data set. The variable Enroll_TYPE_CD_01_02 appears twice. One of them should be Enroll_TYPE_CD_01_08 right?
I simplified the code that creates the have data set a bit. Is the below data correct?
data have;
infile datalines truncover dsd;
input DummyID $ Enroll_TYPE_CD_01_01 - Enroll_TYPE_CD_01_12
Enroll_TYPE_CD_02_01 - Enroll_TYPE_CD_02_12
Enroll_TYPE_CD_03_01 - Enroll_TYPE_CD_03_12
;
datalines;
Person01,1,1,1,1,1,1,1,1,1,1,1,1, , , , , , , , , , , , , , , , , , , , , , , ,
Person02,1,1,1,1,1,1,2,1,1,1,1,1, , , , , , ,3, , , , , , , , , , , , , , , , ,
Person03,2,2,2,2,2,2,2,2,2,2,2,2, , , , , , , , , , , , , , , , , , , , , , , ,
Person04,1,1,1,1,1,1,1,1, , , , , , , , , , , , , , , , , , , , , , , , , , , ,
Person05,1,1,1,1,1,1,1,1,1,1,1,1, , , , , , , , , , , , , , , , , , , , , , , ,
Person06,1,1,1,1,1,1,1,1,1,1,1,1, , , , , , , , , , , , , , , , , , , , , , , ,
Person07,1,1,1,1,1,1,1,1,1,1, , , , , , , , , , , , , , , , , , , , , , , , , ,
Person08,1,1,1,1,1,1,1,1,1,1,1,1, , , , , , , , , , , , , , , , , , , , , , , ,
Person09,1,1,1,1,1,1,1,1,1,1,1,1, , , , , , , , , , , , , , , , , , , , , , , ,
Person10,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1, , , , , , , , , , , , , , , , , , , , ,
Person11,2,2,2,2,2,2,2,2,2,2,2,2, , , , , , , , , , , , , , , , , , , , , , , ,
Person12, , , , , , ,2,2,2,2,2,2, , , , , , , , , , , , , , , , , , , , , , , ,
Person13,1,1,1,1,1,1,1,1,1,1,1,1, , , , , , , , , , , , , , , , , , , , , , , ,
Person14,1,1,1,1,1,1,1,1,1,1,1,1, , , , , , , , , , , , , , , , , , , , , , , ,
Person15,2,2,2,2,2,2,2,2,2,1,1,1, , , , , , , , , , , , , , , , , , , , , , , ,
Person16,1,1,1,1,1,1,2,2,2,1,1,1, , , , , , ,1, , , , , , , , , , , , , , , , ,
Person17,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, , , , , , , , , , , ,
Person18,2,2,2,2,1,1,1,1,1,1,1,1, , , , , , , , , , , , , , , , , , , , , , , ,
Person19,3,3, , , , ,3, , , , , ,2,2,3,3,3,3, , , , , , , , ,2,2,2,2,3, , , , ,
Person20,4,4,4,4,4,4,4,4,4,4,4, , , , , , , , , , , , , , , , , , , , , , , , ,
;
Ok. This gives you the same result as your posted code
data want(drop = enroll: v);
set have;
array enroll Enroll_TYPE_CD_01_01 -- Enroll_TYPE_CD_03_12;
array flag{*} flag_Enrlcat1_201801 - flag_Enrlcat1_201812 (12 * 0);
call stdize('replace', 'mult=', 0, of flag[*], _N_);
do over enroll;
v = input(substr(vname(enroll), length(vname(enroll)) - 1), 8.);
if enroll = 1 then flag[v] = 1;
end;
run;
Result:
DummyID flag_Enrlcat1_201801 ... flag_Enrlcat1_201812 Person01 1 1 1 1 1 1 1 1 1 1 1 1 Person02 1 1 1 1 1 1 0 1 1 1 1 1 Person03 0 0 0 0 0 0 0 0 0 0 0 0 Person04 1 1 1 1 1 1 1 1 0 0 0 0 Person05 1 1 1 1 1 1 1 1 1 1 1 1 Person06 1 1 1 1 1 1 1 1 1 1 1 1 Person07 1 1 1 1 1 1 1 1 1 1 0 0 Person08 1 1 1 1 1 1 1 1 1 1 1 1 Person09 1 1 1 1 1 1 1 1 1 1 1 1 Person10 1 1 1 0 0 0 0 0 0 0 0 0 Person11 0 0 0 0 0 0 0 0 0 0 0 0 Person12 0 0 0 0 0 0 0 0 0 0 0 0 Person13 1 1 1 1 1 1 1 1 1 1 1 1 Person14 1 1 1 1 1 1 1 1 1 1 1 1 Person15 0 0 0 0 0 0 0 0 0 1 1 1 Person16 1 1 1 1 1 1 1 0 0 1 1 1 Person17 1 1 1 1 1 1 1 1 1 1 1 1 Person18 0 0 0 0 1 1 1 1 1 1 1 1 Person19 0 0 0 0 0 0 0 0 0 0 0 0 Person20 0 0 0 0 0 0 0 0 0 0 0 0
If you are going to use DO OVER why go the whole way and use implicit indexing for both arrays?
data do_over;
set have;
array enroll Enroll_TYPE_CD_01_01 -- Enroll_TYPE_CD_03_12;
array flag (month) flag_Enrlcat1_201801 - flag_Enrlcat1_201812 (12 * 0);
call stdize('replace', 'mult=', 0, of flag_: , _N_);
do over enroll;
month = input(substr(vname(enroll), length(vname(enroll)) - 1), 8.);
if enroll = 1 then flag = 1;
end;
drop enroll: month;
run;
@Tom Good question. Just a preference thing I guess. I like the implicit array in many situations. However I don't like when I'm forced to specify the index variable instead of _I_ explicitly. To me, It's not as slick anymore. Also, in this situation, I think it makes it more clear that flag is not a variable, but an array.
Speaking of slick, the 2d array is probably the way to go here. Crossed my mind, but I couldn't quite get there 🙂 One of the few good real-life uses of the 2d array.
Just use periods to mark the missing values and the data step to create the example data is much easier to write (and read).
data have;
input DummyID $
Enroll_TYPE_CD_01_01 - Enroll_TYPE_CD_01_12
Enroll_TYPE_CD_02_01 - Enroll_TYPE_CD_02_12
Enroll_TYPE_CD_03_01 - Enroll_TYPE_CD_03_12
;
datalines;
Person01 1 1 1 1 1 1 1 1 1 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . .
Person02 1 1 1 1 1 1 2 1 1 1 1 1 . . . . . . 3 . . . . . . . . . . . . . . . . .
Person03 2 2 2 2 2 2 2 2 2 2 2 2 . . . . . . . . . . . . . . . . . . . . . . . .
Person04 1 1 1 1 1 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . . . . . .
Person05 1 1 1 1 1 1 1 1 1 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . .
Person06 1 1 1 1 1 1 1 1 1 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . .
Person07 1 1 1 1 1 1 1 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . . . .
Person08 1 1 1 1 1 1 1 1 1 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . .
Person09 1 1 1 1 1 1 1 1 1 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . .
Person10 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 . . . . . . . . . . . . . . . . . . . . .
Person11 2 2 2 2 2 2 2 2 2 2 2 2 . . . . . . . . . . . . . . . . . . . . . . . .
Person12 . . . . . . 2 2 2 2 2 2 . . . . . . . . . . . . . . . . . . . . . . . .
Person13 1 1 1 1 1 1 1 1 1 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . .
Person14 1 1 1 1 1 1 1 1 1 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . .
Person15 2 2 2 2 2 2 2 2 2 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . .
Person16 1 1 1 1 1 1 2 2 2 1 1 1 . . . . . . 1 . . . . . . . . . . . . . . . . .
Person17 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 . . . . . . . . . . . .
Person18 2 2 2 2 1 1 1 1 1 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . .
Person19 3 3 . . . . 3 . . . . . 2 2 3 3 3 3 . . . . . . . . 2 2 2 2 3 . . . . .
Person20 4 4 4 4 4 4 4 4 4 4 4 . . . . . . . . . . . . . . . . . . . . . . . . .
;
Since you seem to only want those FLAG_.... variables this should do it much easier (and clearer). You just need two arrays. One for the input variables and one for the output variables.
data want;
set have;
array enroll [3,12]
Enroll_TYPE_CD_01_01 - Enroll_TYPE_CD_01_12
Enroll_TYPE_CD_02_01 - Enroll_TYPE_CD_02_12
Enroll_TYPE_CD_03_01 - Enroll_TYPE_CD_03_12
;
array flag [12] flag_Enrlcat1_201801 - flag_Enrlcat1_201812 ;
do month=1 to 12;
do type=1 to 3 ;
flag[month]=max(0,flag[month],enroll[type,month]=1 );
end;
end;
keep dummyid flag_: ;
run;
f f f f f f f f f f f f l l l l l l l l l l l l a a a a a a a a a a a a g g g g g g g g g g g g _ _ _ _ _ _ _ _ _ _ _ _ E E E E E E E E E E E E n n n n n n n n n n n n r r r r r r r r r r r r l l l l l l l l l l l l c c c c c c c c c c c c a a a a a a a a a a a a t t t t t t t t t t t t 1 1 1 1 1 1 1 1 1 1 1 1 D _ _ _ _ _ _ _ _ _ _ _ _ u 2 2 2 2 2 2 2 2 2 2 2 2 m 0 0 0 0 0 0 0 0 0 0 0 0 m 1 1 1 1 1 1 1 1 1 1 1 1 O y 8 8 8 8 8 8 8 8 8 8 8 8 b I 0 0 0 0 0 0 0 0 0 1 1 1 s D 1 2 3 4 5 6 7 8 9 0 1 2 1 Person01 1 1 1 1 1 1 1 1 1 1 1 1 2 Person02 1 1 1 1 1 1 0 1 1 1 1 1 3 Person03 0 0 0 0 0 0 0 0 0 0 0 0 4 Person04 1 1 1 1 1 1 1 1 0 0 0 0 5 Person05 1 1 1 1 1 1 1 1 1 1 1 1 6 Person06 1 1 1 1 1 1 1 1 1 1 1 1 7 Person07 1 1 1 1 1 1 1 1 1 1 0 0 8 Person08 1 1 1 1 1 1 1 1 1 1 1 1 9 Person09 1 1 1 1 1 1 1 1 1 1 1 1 10 Person10 1 1 1 0 0 0 0 0 0 0 0 0 11 Person11 0 0 0 0 0 0 0 0 0 0 0 0 12 Person12 0 0 0 0 0 0 0 0 0 0 0 0 13 Person13 1 1 1 1 1 1 1 1 1 1 1 1 14 Person14 1 1 1 1 1 1 1 1 1 1 1 1 15 Person15 0 0 0 0 0 0 0 0 0 1 1 1 16 Person16 1 1 1 1 1 1 1 0 0 1 1 1 17 Person17 1 1 1 1 1 1 1 1 1 1 1 1 18 Person18 0 0 0 0 1 1 1 1 1 1 1 1 19 Person19 0 0 0 0 0 0 0 0 0 0 0 0 20 Person20 0 0 0 0 0 0 0 0 0 0 0 0
Hi,
This sort of problem where you use arrays to loop over codes and months can often be made much easier if you transpose your data into a vertical format, rather than wide. In this case, I would transpose the data to make each ID have 36 records (12 months * 3 codes per month). You could do that like:
data tran (keep=dummyid month code EnrollmentType) ;
set have ;
array codes{3,12}
Enroll_TYPE_CD_01_01 - Enroll_TYPE_CD_01_12
Enroll_TYPE_CD_02_01 - Enroll_TYPE_CD_02_12
Enroll_TYPE_CD_03_01 - Enroll_TYPE_CD_03_12
;
do month=1 to 12 ;
do code=1 to 3 ;
EnrollmentType=codes(code,month) ;
output ;
end ;
end ;
run ;
Once you have data in that format, it's straight forward to summarize by month, or code, or whatever you want. I think what you want is:
proc sql ;
select dummyid, month, max((EnrollmentType=1)) as EnrollmentCat1
from tran
group by dummyid,month
;
quit ;
Note that since your variable is named EnrollmentCat1, this suggests you have other enrollment categories of interest. So in you may want to transpose your data to have one record per dummyid, enrollmentCat, month, code.
Early in my career I was a fan of using arrays to loop over data like this. Then I was lucky to have a boss who pointed out that often if you restructure your data, the code to analyze the data becomes much easier.
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.