Reading in and cleaning the registry dataset (archive file with very messy data!)
Data Local.Full_Registry ; /* re-order,rename, and upcase fields*/ Attrib Registry_ID Format=5. CertKey_B Format=$12. Label="Birth Certificate Key" Year Format=4. BState Format=$2. Sex_R Format=$1. Kid_Full_R Format=$200. Label="" Kid_First_R Format=$50. Label="" Kid_Middle_R Format=$50. Label="" Kid_First_Middle_R Format=$100. Kid_Last_R Format=$50. Label="" Kid_Suffix_R Format=$10. Label="" Mom_Full_R Format=$200. Label="" Mom_First_R Format=$50. Label="" Mom_Middle_R Format=$50. Label="" Mom_First_Middle_R Format=$100. Mom_Last_R Format=$50. Label="" Mom_Maiden_R Format=$50. Label="" DoB_Num_R Format=10. Label="Numeric Kid DoB YYYYMMDD" MDoB_Num_R Format=10. Label="Numeric Mom DoB YYYYMMDD" DoB_R Format=YYMMDD10. Label="Kid DoB YYYYMMDD" ; Set Local.Registry_Backup (Rename=( CertKey = CertKey_B FirstName = Kid_First_R MiddleName = Kid_Middle_R LastName = Kid_Last_R ChildsSuffix = Kid_Suffix_R MotherFirstName = Mom_First_R MotherMiddleName = Mom_Middle_R MotherLastName = Mom_Last_R MotherMaidenName = Mom_Maiden_R /* Sex = Sex_R*/ DateOfBirth = DoB_R MotherDateofBirth = MDoB_R )); Registry_ID = _N_; IDoB_Yr_R = Year; If Gender = 1 Then Sex_R = Put("M",$1.); Else If Gender = 2 Then Sex_R = Put("F",$1.); Else Sex_R = Put("U",$1.);
Kid_Suffix_R = Put(Compress(Kid_Suffix_R),$10.); BState = Substr(CertKey_B,5,2);
If Mom_First_R In ( "24838", "26248", "26881", "27549", "27760", "27920", "28048", "28508", "28671", "28762", "28917", "29053", "29293", "29501", "29560", "29859", "29974", "30157", "30822", "30982", "31723", "31775", "32097", "32165", "32537", "33939", "34027", "34048", "34118", "34344", "34347") And Missing(MDoB_R)=1 Then Do; If Mom_First_R = Mom_Last_R Then Mom_Last_R=""; MDoB = 1 * Mom_First_R; Mom_First_R = ""; End; If Mom_First_R In ( "19720117", "19780911", "19790628", "19791207", "19820310", "19850410", "19861006", "19880113", "19921005", "19960807", "20110309", "20110413", "20110601", "20110607", "20110608", "20110614", "20110622", "20110708", "20110804", "20110822", "20110912", "20110914", "20110917", "20111220", "20111230", "20120104", "20120116", "20120119", "20120202", "20120208", "20120213", "20120309", "20120319", "20120314", "20120319", "20120408", "20120411", "20120412", "20120525", "20120610", "20120613", "20120625", "20120711", "20120724", "20120803", "20120814", "20120822", "20120823", "20120903", "20120913", "20120923", "20121108", "20121205", "20130104", "20130308", "20130327", "20130606", "20130608", "20130611", "20130617", "20130710", "20131016", "20131119", "20140101", "20140108", "20140124", "20140224", "20140307", "20140320", "20140409", "20140421", "20140430", "20140512", "20140514", "20140530", "20140611", "20140613", "20140620", "20140808", "20140811", "20140813", "20140910", "20140923", "20141116", "20141206", "20141208", "20141216", "20150114", "20150130", "20150211", "20150221", "20150310", "20150311", "20150321", "201203119") And Missing(MDoB)=1 Then Do; If Mom_First_R = Mom_Last_R Then Mom_Last_R=""; MDoB_Yr = Substr(Mom_First_R,1,4)*1; MDoB_Mo = Substr(Mom_First_R,5,2)*1; MDoB_Dy = Substr(Mom_First_R,7,2)*1; Call Missing(Mom_First_R); Call Missing(Mom_Last_R); Call Missing(Mom_Maiden_R);; End; If Mom_Last_R In ("NEITHER NEWBORN","1ST BORN","0","1","#N/A","-") Then Call Missing(Mom_Last_R); If Mom_Last_R EQ ".COACHMAN" Then Mom_Last_R = "COACHMAN";
* MotherDate of Birth is as YYYYMMDD;
If Missing(MDoB_R)=0 Then MDoB_Num_R =((100*Year (MDoB_R)) + Month(MDoB_R))*100 + Day (MDoB_R); Drop MDoB_Yr MDoB_Mo MDoB_Dy;
* Infant Date of Birth - for matching; DoB_Num_R =((100*Year (DoB_R)) + Month(DoB_R))*100 + Day (DoB_R);
If Substr(Kid_Last_R,1,9) EQ "HUPPERICH" Then Do; Kid_Last_R = "HUPPERICH"; Kid_Suffix_R = "JR"; End;
If Mom_Last_R eq "(PRICE)CAPEN" Then Mom_Last_R = "PRICE-CAPEN"; If Mom_Last_R eq "(YAHIAOUI) FOTHERINGHAM" Then Mom_Last_R = "YAHIAOUI-FOTHERINGHAM"; If Substr(Mom_Maiden_R,1,1) In ("#","0","1","2","3","4","5","6","7","8","9") Then Mom_Maiden_R = "";
If Trimn(Kid_Middle_R) EQ "BOYASHLEY" Then Kid_Middle_R = "" ;
If Index (Kid_First_R,"#3" ) >0 Then Kid_First_R= Substr(Kid_First_R,1,Index (Kid_First_R,"#3")-1);
If Index (Kid_First_R,"(NONE)") >0 Or Index (Kid_First_R,"BG" ) >0 Or Index (Kid_First_R,"BB" ) >0 Or Index (Kid_First_R,"BABY" ) >0 Or Index (Kid_First_R,"BOY" ) >0 Or Index (Kid_First_R,"GIRL" ) >0 Or Index (Kid_First_R,"INFANT") >0 Or Index (Kid_First_R,"TWIN" ) >0 Or Index (Kid_First_R,"UNAMED") >0 Or Index (Kid_First_R,"1" ) >0 Then Do; Call Missing(Kid_First_R ); Call Missing(Kid_Middle_R ); End;
If Substr(Kid_Middle_R,1,1) eq "0" Or Substr(Kid_Middle_R,1,1) eq "1" Or Substr(Kid_Middle_R,1,1) eq "2" Or Substr(Kid_Middle_R,1,1) eq "?" Or Substr(Kid_Middle_R,1,2) eq "BG" Or Substr(Kid_Middle_R,1,2) eq "BB" Or Substr(Kid_Middle_R,1,3) eq "BOY " Or Substr(Kid_Middle_R,1,4) eq "BABY" Or Substr(Kid_Middle_R,1,4) eq "GIRL" Or Substr(Kid_Middle_R,1,6) eq "INFANT" Then Do; Call Missing(Kid_First_R ); Call Missing(Kid_Middle_R ); End;
If Index(Kid_First_R,"TWIN")>0 Then Call Missing(Kid_First_R);
If Strip(Kid_Last_R) eq "1" Then Call Missing(Kid_Last_R); If Index(Kid_Last_R,"III")>0 Then Do; Kid_Suffix_R = Put("III",$3.); Kid_Last_R = Tranwrd(Kid_Last_R,"","III"); End; Else If Index(Kid_Last_R," JR")>0 Then Do; Kid_Suffix_R = Put("JR",$2.); Kid_Last_R = Tranwrd(Kid_Last_R,"","JR"); End; Else If Index(Kid_Last_R," II")>0 Then Do; Kid_Suffix_R = Put("II",$2.); Kid_Last_R = Tranwrd(Kid_Last_R,"","II");; End; Array Fix_Names (6) Kid_First_R Kid_Middle_R Kid_Last_R Mom_First_R Mom_Middle_R Mom_Maiden_R; Do i = 1 To 6; If Fix_Names (I) In ("UNKNOWN","UNATAINABLE","UNKOWN") Then Fix_Names(I) = ""; If Index(Fix_Names (I),".")>0 Then Fix_Names (I) = ""; End; Drop I; If Mom_Maiden_R EQ "ONLY" Then Call Missing(Mom_Maiden_R); If Trimn(Mom_First_R) In ("#N/A" ,"0","1") Then Call Missing(Mom_First_R ); If Trimn(Mom_Last_R ) In ("***LOOK UP MOM","1") Then Call Missing(Mom_Last_R );
Kid_First_R = Trimn(Compress(Upcase(Kid_First_R ),",?",)); Kid_Middle_R = Trimn(Compress(Upcase(Kid_Middle_R),",?",)); Kid_Last_R = Trimn(Compress(Upcase(Kid_Last_R ),",?",)); Kid_Suffix_R = Trimn(Compress(Upcase(Kid_Suffix_R),",?",));
Mom_First_R = Trimn(Compress(Upcase(Mom_First_R ),",?",)); Mom_Middle_R = Trimn(Compress(Upcase(Mom_Middle_R),",?",)); Mom_Maiden_R = Trimn(Compress(Upcase(Mom_Maiden_R),",?",)); Mom_Last_R = Trimn(Compress(Upcase(Mom_Last_R ) ,",?",));
Kid_First_Middle_R = Compbl(cat(Trimn(Kid_First_R)," ",Trimn(Kid_Middle_R) ) ); Mom_First_Middle_R = Compbl(cat(Trimn(Mom_First_R)," ",Trimn(Mom_Middle_R) ) );
Kid_Full_R = Compbl(Cat(Trimn(Kid_First_R), " ", Trimn(Substr(Kid_Middle_R,1,1))," ", Trimn(Kid_Last_R)," ", Trimn(Kid_Suffix_R))); Mom_Full_R = Compbl(Cat(Trimn(Mom_First_R), " ", Trimn(Substr(Mom_Middle_R,1,1))," ", Trimn(Mom_Last_R)," ", Trimn(Mom_Maiden_R))); ; Run; ;
****************************************************************************;; Proc Sort Data=Local.Full_Registry NodupKey; By CertKey_B ; Run; Proc Sort Data=Local.Full_Births NodupKey; By CertKey_B ; Run;
Data Local.Full_Births_Registry Local.CertKey_B_Birth_ID_List (Keep=CertKey_B Birth_ID Registry_ID); Attrib Source Format=$23. Registry_ID Format=9. Label = "Birth Defect Case ID" Birth_ID Format=9. Label = "Birth File Case ID" CertKey_B Format=$12. Label = "CertKey_B" Sex_B Format=$1. Label = "Child's gender" IDoB_Yr Format=4. BMICategory Format=1. Label = "BMI Category" Total_Order Format=2. Label = "Order for present Birth" Pay Format=1. Label = "Principal Payor" MEthnicE Format=$3. IDoB_Yr Format=4. Label = "Child's Year of Birth" OWGest Format=2. Label = "Obstetric estimate of gestation" Gest_Comb Format=2. Gest_Calc Format=2. Label = "Calculated estimate of gestation" BWG Format=4. Label = "Birthweight in Grams" Plur Format=2. Label = "Plurality" Total_Order Format=2. Label = "Birth Order" Kid_Full_B Format=$200. Kid_Full_R Format=$200. Kid_First_Middle_B Format=$200. Kid_First_Middle_R Format=$100. Kid_Last_B Format=$50. Kid_Last_R Format=$50. Kid_Last_Suffix_B Format=$200. Kid_Suffix_B Format=$10. Kid_Suffix_R Format=$10. Mom_Full_B Format=$200. Mom_Full_R Format=$200. Mom_First_B Format=$50. Mom_First_R Format=$50. Mom_Middle_B Format=$50./* Mom_Middle_R Format=$50.*/ Mom_First_Middle_B Format=$100. Mom_First_Middle_R Format=$100. Mom_Last_B Format=$50. Mom_Last_R Format=$50. Mom_Maiden_B Format=$50. Mom_Maiden_R Format=$50. Mom_MidInit_B Format=$1. Mom_Suffix_B Format=$10. Dad_First_B Format=$50. Dad_Full_B Format=$200. Dad_First_Middle_B Format=$200. Dad_Middle_B Format=$50. Dad_Last_B Format=$50. Dad_Last_Suffix_B Format=$60. Dad_Suffix_B Format=$10. ; Merge Local.Full_Births (In=A ) Local.Full_registry (In=B Keep= Registry_ID CertKey_B Kid_Full_R Mom_Full_R Kid_First_R Kid_First_Middle_R Kid_Middle_R Kid_Last_R Kid_Suffix_R Mom_First_R Mom_First_Middle_R Mom_Middle_R Mom_Last_R Mom_Maiden_R ICD_Code1 -- ICD_Code17 D_Descrip1 -- D_Descrip17 D_Status1 -- D_Status17 ) ; By CertKey_B; *If A; If A And B Then Source = "Registry AND Births "; Else If A And Not B Then Source = "Births without Registry"; Else If Not A And B Then Source = "Registry without Births";
*Mom_Middle_R =Put(Hold_Mom_Middle_R,$50.);
If Missing(Kid_First_B )=1 And Missing(Kid_First_R )=0 Then Kid_First_B = Kid_First_R; If Missing(Kid_Middle_B)=1 And Missing(Kid_Middle_R)=0 Then Kid_Middle_B = Kid_Middle_R; If Missing(Kid_Last_B )=1 And Missing(Kid_Last_R )=0 Then Kid_Last_B = Kid_Last_R; If Missing(Kid_Full_B )=1 And Missing(Kid_Full_R )=0 Then Kid_Full_B = Mom_Full_R;
Kid_First_Middle_B = Catt(Kid_First_B," ",Kid_Middle_B); Kid_Last_Suffix_B = Catt(Kid_Last_B ," ",Kid_Suffix_B);
If Mom_First_B Eq "" And Mom_First_R NE "" Then Mom_First_B = Mom_First_R; If Mom_Middle_B Eq "" And Mom_Middle_R NE "" Then Mom_Middle_B = Mom_Middle_R; If Mom_Maiden_B Eq "" And Mom_Maiden_R NE "" Then Mom_Maiden_B = Mom_Maiden_R; If Mom_Last_B Eq "" And Mom_Last_R NE "" Then Mom_Last_B = Mom_Last_R; If Mom_Full_B Eq "" And Mom_Full_R NE "" Then Mom_Full_B = Mom_Full_R;
Run; ;
... View more