Hello,
my solution is based on reading data twice, first in order to create the maximum number of observation per id, and second to generate the transposed data:
data inp;
infile datalines missover;
input @;
if anyalpha(substr(_infile_,3,4)) then do;
input @3 id:$8. City $ 11-18 date:mmddyy. Diagnosis Procedure;
end;
else do;
input @30 Diagnosis Procedure;
end;
format date mmddyy.;
datalines;
1 mc21898 City, NM 4/28/2007 4.74 368.47
2 . . . 86.35 326.87
3 mc59002 City, UT 8/8/2007 88.61 966.07
4 . . . 49.22 625.9
5 . . . 333.18
6 . . . 922.09
7 . . . 847.55
8 mc21898 City, NM 6/4/2007 41.17 869.38
9 . . . 43.26 193.39
10 . . . 86.35 799.5
11 . . . 4.06 641.92
12 . . . 536.27
13 . . . 971.87
14 mc14473 Cty , AZ 8/16/2007 64.11 60.2
15 . . . 91.56 663.38
16 . . . 91.58 800.32
17 . . . 684.24
18 . . . 529.19
19 . . . 747.11
20 . . . 849.6
;
run;
data out ;
retain rec_id city_from date_service ;
do until (last);
set inp (keep=id) end=last;
by id notsorted;
if first.id and ^missing(id) then count=1;
else count+1;
end;
call symput('new',put(count,7.));
array diag{&new};
array proc{&new};
do until (n_last);
set inp (rename=(id= old_id City =old_City date=old_date)) end=n_last;
by old_id notsorted;
if first.old_id and ^missing(old_id) and ^missing(lag(Diagnosis)) then output;
if first.old_id and ^missing(old_id) then do;
n_count=1;
rec_id=old_id;
city_from=old_City;
date_service=old_date;
end;
else n_count+1;
Diag{n_count}=Diagnosis;
Proc{n_count}=Procedure;
if n_last then output;
end;
format date_service mmddyy10.;
drop old_id old_City old_date id count n_count Diagnosis Procedure;
run;
Marius