Since there are multiple issues involved, I thought they should be handled separately. 1. work to fill in the missing middle names 2. combining zip codes into 1obs per person 3. combining other information into 1obs (I processed this as a SSN unique key)
Please forgive the abbreviations for variable names and types.
/* CREATE TESTDATA */
data original;
attrib fn length=$20 label='FIRST_NAME';
attrib mn length=$20 label='MIDDLE_NAME';
attrib ln length=$20 label='LAST_NAME';
attrib sex length=$1 label='Sex';
attrib dob length=$10 label='DOB';
attrib ssn length=$20 label='SSN';
attrib hsys length=$20 label='health_sys';
attrib mrn length=$20 label='MRN';
attrib mrnid length=$20 label='hsys_MRN_ID';
attrib zip length=$5 label='MIDBZip';
infile cards missover dsd;
input fn mn ln sex dob ssn hsys mrn mrnid zip $;
cards;
John,Q,Public,M,1/1/2010,987654321,Mercy,2222222,Mercy_2222222,34567
John,Q,Public,M,1/1/2010,987654321,Mercy,2222222,Mercy_2222222,34577
John, ,Public,M,1/1/2010,987654321,Saint,333333,Saint_333333,34568
John, ,Public,M,1/1/2010,987654321,Saint,333333,Saint_333333,34577
;
run;
/* BASE DATASET:START */
proc sort data=original out=sorted;
by ssn descending mn;
run;
/* BASE DATASET:END */
/* ZIP DATASET:START */
proc sort data=sorted out=tmp1(keep=ssn zip) nodupkey;
by ssn zip;
run;
data tmp2;
set tmp1; length abc $1;
by ssn;
cnt+1;
abc=byte(64+cnt);
run;
proc transpose data=tmp2 out=zip(keep=ssn zip_:) prefix=zip_;
var zip;
by ssn;
ID abc;
run;
/* ZIP DATASET:END */
/* PERSONALINFO DATASET:START */
data tmp3(drop=middlename zip cnt);
set sorted;
retain middlename;
by ssn descending mn;
if first.ssn then do
middlename=mn;
cnt=1;
end; else
do;
cnt+1;
end;
if mn='' then mn=middlename;
run;
proc sort data=tmp3 out=personal_all nodup;
by ssn;
run;
proc sort data=personal_all out=personal(keep=fn--ssn) force;
by ssn;
run;
proc sort data=personal nodup;
by ssn;
run;
/* PERSONALINFO DATASET:END */
/* TRANSPOSE BY VARIABLES:START */
proc sort data=personal_all out=trn(keep=ssn--mrnid) force;
by ssn;
run;
proc transpose data=trn out=trn_hsys(keep=ssn hsys_:) prefix=hsys_;
var hsys;
by ssn;
run;
proc transpose data=trn out=trn_mrn(keep=ssn mrn_:) prefix=mrn_;
var mrn;
by ssn;
run;
proc transpose data=trn out=trn_mrnid(keep=ssn mrnid_:) prefix=mrnid_;
var mrnid;
by ssn;
run;
/* TRANSPOSE:END */
/* MERGE ALL */
data transposed;
merge personal trn_: zip;
by ssn;
run;
... View more