Thank you in advance for your help Kurt! Here you go: /* begin matching process */ data def.grp1 def.grp2; set def.combined_finalab; array x[4] var11 Financial_Fraud__Irregularities Errors___Accounting_and_Clerical Other_Significant_Issues; obs=_n_; listCount1=0; listCount2=0; listCount3=0; listCount4=0; do i = 1 to 4; if input(scan(var11,i,','),4.) GT .z then listCount1=sum(listCount1,1); if input(scan(Financial_Fraud__Irregularities,i,','),4.) GT .z then listCount2=sum(listCount2,1); if input(scan(Errors___Accounting_and_Clerical,i,','),4.) GT .z then listCount3=sum(listCount3,1); if input(scan(Other_Significant_Issues,i,','),4.) GT .z then listCount4=sum(listCount4,1); end; listcount=sum(listcount1,listcount2,listcount3,listcount4); if littler=0 then output def.grp1; else output def.grp2; drop i; run; /* Expand to 1 record per item in the list */ data def.grp1Tall; set def.grp1; do i = 1 to listCount1; value=100+input(scan(var11,i,','),4.); output; end; do i = 1 to listCount2; value=200+input(scan(Financial_Fraud__Irregularities,i,','),4.); output; end; do i = 1 to listCount3; value=300+input(scan(Errors___Accounting_and_Clerical,i,','),4.); output; end; do i = 1 to listCount4; value=400+input(scan(Other_Significant_Issues,i,','),4.); output; end; drop i; run; data def.grp2Tall; set def.grp2; do i = 1 to listCount1; value=100+input(scan(var11,i,','),4.); output; end; do i = 1 to listCount2; value=200+input(scan(Financial_Fraud__Irregularities,i,','),4.); output; end; do i = 1 to listCount3; value=300+input(scan(Errors___Accounting_and_Clerical,i,','),4.); output; end; do i = 1 to listCount4; value=400+input(scan(Other_Significant_Issues,i,','),4.); output; end; drop i; run; /* For each observation in grp1, find the number matching in grp2. Only keep the records where the number of matches equals the number in the grp1 list */ proc sql; create table def.temp as select a.listCount, a.obs, b.obs as obs2, count(*) as matchCount from def.grp1Tall as a, def.grp2Tall as b where a.value = b.value and a.listCount LE b.listCount group by 1,2,3 having listCount = matchCount; quit; /* Finally, join the two */ proc sql; create table def.final as select a.*, b.obs2, c.cik_code as cik_code2, c.var11 as var112, c. Financial_Fraud__Irregularities as Financial_Fraud__Irregularities2, c. Errors___Accounting_and_Clerical as E2, c. Other_Significant_Issues as Other_Significant_Issues2, c.lnassets as lnassets2, c.roa as roa2, c.size as size2, c.mtb as mtb2, c.lev as lev2, c.fcf as fcf2, c.loss as loss2, c.foreign as foreign2, c.segment as segment2, c.abnbhar as abnbhar2, c.big4 as big42, c.city_expertise3 as city_expertise32, c.national_expertise3 as national_expertise32, c.icweakness as icweakness2, c.clientimp as clientimp2, c.contten as contten2, c.notbusy as notbusy2, c.littler as littler2, c.multiplerestatements as multiplerestatements2, c.sic as sic2, c.lnccse as lnccse2, c.effectx as effectx2, c.fyear as fyear2 from def.grp1 a, def.temp b, def.grp2 c where a.obs=b.obs and c.obs=b.obs2; quit; data def.final2; set def.final; if roa=. then delete; if roa2=. then delete; if lnassets=. then delete; if lnassets2=. then delete; distance=lnassets2-lnassets; run; proc sort data=def.final2; by obs distance; run; data def.final3; set def.final2; by obs distance; if first.obs; run; data def.final4; set def.final3; distance2=roa2-roa; run; proc sort data=def.final4; by obs distance2; run; data def.final5; set def.final4; by obs distance2; if first.obs; run; proc means data=def.final5; run; data def.bigr def.littler; set def.final5; if littler=0 then output def.bigr; if littler2=1 then output def.littler; run; data def.bigr1; set def.bigr; keep cik_code fyear sic size mtb lev fcf roa loss foreign segment abnbhar big4 city_expertise3 national_expertise3 icweakness clientimp contten notbusy lnccse effectx var11 Financial_Fraud__Irregularities Errors___Accounting_and_Clerical Other_Significant_Issues littler multiplerestatements lnassets; run; data def.littler1; set def.littler; keep cik_code2 fyear2 sic2 size2 mtb2 lev2 fcf2 roa2 loss2 foreign2 segment2 abnbhar2 big42 city_expertise32 national_expertise32 icweakness2 clientimp2 contten2 notbusy2 lnccse2 effectx2 var112 Financial_Fraud__Irregularities2 E2 Other_Significant_Issues2 littler2 multiplerestatements2 lnassets2; run; data def.littler2; set def.littler1; rename cik_code2=cik_code; rename fyear2=fyear; rename sic2=sic; rename size2=size; rename mtb2=mtb; rename lev2=lev; rename fcf2=fcf; rename roa2=roa; rename loss2=loss; rename foreign2=foreign; rename segment2=segment; rename abnbhar2=abnbhar; rename big42=big4; rename city_expertise32=city_expertise3; rename national_expertise32=national_expertise3; rename icweakness2=icweakness; rename clientimp2=clientimp; rename contten2=contten; rename notbusy2=notbusy; rename lnccse2=lnccse; rename effectx2=effectx; rename littler2=littler; rename multiplerestatements2=multiplerestatements; run; proc sort data=def.bigr1 out=def.bigr2; by cik_code fyear; run; proc sort data=def.littler2 out=def.littler3; by cik_code fyear; run; data def.combinedfinal; set def.bigr2 def.littler3; run; data def.combinedfinal1; set def.combinedfinal; run; proc means data=def.combinedfinal1; run; title "Matching Results without SIC"; run; proc logistic data= def.combinedfinal1; class fyear SIC; class Loss / param=ref ref=first; class Foreign / param=ref ref=first; class Big4 / param=ref ref=first; class City_Expertise3 / param=ref ref=first; class Natio6nal_Expertise3 / param=ref ref=first; class ICWeakness / param=ref ref=first; class Notbusy / param=ref ref=first; class MultipleRestatements / param=ref ref=first; model LittleR(event="1") = contten clientimp multiplerestatements lnccse city_expertise3 national_expertise3 big4 size mtb segment foreign lev fcf roa abnbhar icweakness notbusy loss fyear sic / link=probit RSQ; output out= Def.combinedfinal1_prob3 p=Probability; run;
... View more