One more version which is a little bit more I/O efficient, Bart data _null_;
if 0 then set codes nobs=nobs;
call symputx('_NOBS_', nobs, "G");
stop;
run;
data want2(keep = ID DESCRIPTION CD rename=(CD=CODE));
/* load data into memory in temporary arrays */
array _CODE[&_NOBS_.] $ 10 _temporary_;
array _YES[&_NOBS_.] $ 100 _temporary_;
array _NO[&_NOBS_.] $ 100 _temporary_;
do until(eof);
set codes end=eof curobs=curobs;
_CODE[curobs] = CODE;
_YES[curobs] = YES;
_NO[curobs] = NO;
end;
/* loop to get each observation from SUBJECT and... */
do until(_EOF_);
set subject end=_EOF_ curobs=_N_; length CD $ 100;
CD = "";
/* ... you are looping through all observations from CODES,
but now you use data stored in arrays instead of dataset on disk */
do point = 1 to &_NOBS_.;
CODE = _CODE[point];
YES = _YES[point];
NO = _NO[point];
drop yes no;
put _N_= point=;
/* then you are parsing the YES list of "pains" for a given code
and traverse through that list
*/
i=1; length Y $ 50;
Y = dequote(scan(YES, i, " ", "q"));
do while (Y ne "");
put i= Y=;
if FIND(DESCRIPTION, Y , 'i') then do; /* if you find "pain" in Description ...*/
NEGATIVE=0; /* ... you look for posible negations in NO list */
j=1; length N $ 50;
N = dequote(scan(NO, j, " ", "q"));
do while (N ne "");
if FIND(DESCRIPTION, N , 'i') then NEGATIVE=1;
put j= N= NEGATIVE=;
j=j+1;
N = dequote(scan(NO, j, " ", "q"));
end;
if not NEGATIVE then CD = catx(" ", CD, CODE); /* if there is no negation you adding a code to the subject */
end;
i=i+1;
y = dequote(scan(yes, i, " ", "q"));
end;
end;
output;
end;
stop;
run;
... View more