Not 100% clear on your criteria as some things don't make sense to me but this shows how to check for the difference in time and a new diagnosis.
You should be able to create your variable as desired with this information.
data have;
infile cards dlm='|';
input patientID $ time_period combined : $40. running_total ;
cards;
wh7| 1| C400_C401_C408| 1
wh7| 4| C400_C401_C408_C409| 2
wh7| 10| C400_C401_C408_C409| 2
wh7| 15| C400_C401_C408| 2
wh4| 1| C030_C039_C031| 1
wh4| 2| C030_C039| 1
wh4| 5| C030_C049| 2
wh5| 1| F107_F108| 1
wh5| 3| F107_F109| 2
wh5| 5| F107_F109| 2
wh5| 7| F107_F108_F109|2
;;;;
run;
data want;
set have (rename=running_total = total_check);
by patientID notsorted;
retain diag_list;
if first.patientID then do; diag_list = combined; running_total = 1; end;
dif_time = dif(time_period);
nwords = countw(combined, '_');
*see if new diagnosis is in the list;
diag_found=0;
if not first.patientID then do i=1 to nwords;
diag = scan(combined, i, '_');
if findw(diag_list, diag, '_', 't')=0 then diag_not_found=1;
end;
if diag_not_found then do;
running_total+1;
diag_list = combined;
end;
drop i diag;
run;
... View more