I ended up doing a variation of this. Since this is a subject-level dataset, it can't stay in long format.
data have;
infile datalines dlm= '*' truncover;
label
ID = "Participant ID"
AVSLST = "All Visits Completed"
PPCENSVSx = "Censored Visits (have)"
PPCENSVS = "Censored Visits (want)"
;
length AVSLST PPCENSVSx PPCENSVS $30.;
input ID :$6. AVSLST $ PPCENSVSx $ PPCENSVS $;
datalines;
1 * 01, 02, 03, 04, 05, 06, 07, 08 * 06, 12, 10 * 06, 10, 12
2 * 01, 02, 03, 04, 05, 06, 07 * 04, 05, 06, 07, 04, 02 * 02, 04, 05, 06, 07
3 * 01, 02, 03, 05, 06, 07, 08 * 07, 08, 03, 07, 04 * 03, 04, 07, 08
4
5 *01
6 * *01
;
data long;
set have;
length VISNO $2.;
do i= 1 to countc(PPCENSVSx, ',')+1;
VISNO= scan(PPCENSVSx, i);
output;
end;
proc sort data= long out= long2 nodupkeys; by ID VISNO;
data want;
length PPCENSVS $54.;
do until(last.ID);
set long2;
by ID;
PPCENSVS= catx(', ', PPCENSVS, VISNO);
end;
keep ID PPCENSVS;
run;
... View more