BookmarkSubscribeRSS Feed
jjadalla
Obsidian | Level 7

Hello!

For some reason, my matching code is not giving me what I want.  I attached my code and the SAS dataset to start with.  Here's what I want.  I want to match with replacement where there are unique observations of littler=0.  If 1 observation of littler=0 matches with an observation of littler=1, it is OK for a different observation of littler=0 to match with the same observation of littler=1.  However, each littler=0 in the match needs to be unique.  I am not getting that.  Please let me know what I'm doing wrong.

 

Thank you so much for your help!

 

God bless, best regards, and take care,

Jadallah

2 REPLIES 2
Kurt_Bremser
Super User

Post datasets as data steps with datalines; post all SAS codes using the "little running man".

 

There is no need for zip archives or MS Office files.

jjadalla
Obsidian | Level 7

Thank you in advance for your help Kurt!  Here you go:

/* begin matching process  */

data def.grp1 def.grp2;
set def.combined_finalab;
array x[4] var11 Financial_Fraud__Irregularities
Errors___Accounting_and_Clerical Other_Significant_Issues;
obs=_n_;
listCount1=0;
listCount2=0;
listCount3=0;
listCount4=0;
do i = 1 to 4;
  if input(scan(var11,i,','),4.) GT .z then
  listCount1=sum(listCount1,1);
  if input(scan(Financial_Fraud__Irregularities,i,','),4.) GT .z then
  listCount2=sum(listCount2,1);
  if input(scan(Errors___Accounting_and_Clerical,i,','),4.) GT .z then
  listCount3=sum(listCount3,1);
  if input(scan(Other_Significant_Issues,i,','),4.) GT .z then
  listCount4=sum(listCount4,1);
end;
listcount=sum(listcount1,listcount2,listcount3,listcount4);
if littler=0 then output def.grp1;
else output def.grp2;
drop i;
run; 

/* Expand to 1 record per item in the list */

data def.grp1Tall;
set def.grp1;
do i = 1 to listCount1;
  value=100+input(scan(var11,i,','),4.);
  output;
end;
do i = 1 to listCount2;
  value=200+input(scan(Financial_Fraud__Irregularities,i,','),4.);
  output;
end;
do i = 1 to listCount3;
  value=300+input(scan(Errors___Accounting_and_Clerical,i,','),4.);
  output;
end;
do i = 1 to listCount4;
  value=400+input(scan(Other_Significant_Issues,i,','),4.);
  output;
end;
drop i;
run;
 
data def.grp2Tall;
set def.grp2;
do i = 1 to listCount1;
  value=100+input(scan(var11,i,','),4.);
  output;
end;
do i = 1 to listCount2;
  value=200+input(scan(Financial_Fraud__Irregularities,i,','),4.);
  output;
end;
do i = 1 to listCount3;
  value=300+input(scan(Errors___Accounting_and_Clerical,i,','),4.);
  output;
end;
do i = 1 to listCount4;
  value=400+input(scan(Other_Significant_Issues,i,','),4.);
  output;
end;
drop i;
run;

/* For each observation in grp1, find the number matching
   in grp2.  Only keep the records where the number of matches equals
   the number in the grp1 list
*/

proc sql;
create table def.temp as
select a.listCount, a.obs, b.obs as obs2, count(*) as matchCount
from def.grp1Tall as a, def.grp2Tall as b
where a.value = b.value
and a.listCount LE b.listCount
group by 1,2,3
having listCount = matchCount;
quit;

/* Finally, join the two */
proc sql;
create table def.final as
select a.*, b.obs2, c.cik_code as cik_code2, c.var11 as var112, c. Financial_Fraud__Irregularities as Financial_Fraud__Irregularities2, c. Errors___Accounting_and_Clerical as E2, c. Other_Significant_Issues as Other_Significant_Issues2, c.lnassets as lnassets2, c.roa as roa2,
c.size as size2, c.mtb as mtb2, c.lev as lev2, c.fcf as fcf2, c.loss as loss2, c.foreign as foreign2, c.segment as segment2, c.abnbhar as abnbhar2, c.big4 as big42, c.city_expertise3 as city_expertise32, c.national_expertise3 as national_expertise32, c.icweakness as icweakness2, c.clientimp as clientimp2, c.contten as contten2, c.notbusy as notbusy2, c.littler as littler2, c.multiplerestatements as multiplerestatements2, c.sic as sic2, c.lnccse as lnccse2, c.effectx as effectx2, c.fyear as fyear2
from def.grp1 a, def.temp b, def.grp2 c
where a.obs=b.obs
  and c.obs=b.obs2;
quit;

data def.final2;
set def.final;
if roa=. then delete;
if roa2=. then delete;
if lnassets=. then delete;
if lnassets2=. then delete;
distance=lnassets2-lnassets;
run;

proc sort data=def.final2;
by obs distance;
run;

data def.final3;
set def.final2;
by obs distance;
if first.obs;
run;

data def.final4;
set def.final3;
distance2=roa2-roa;
run;

proc sort data=def.final4;
by obs distance2;
run;

data def.final5;
set def.final4;
by obs distance2;
if first.obs;
run;

proc means data=def.final5;
run;

data def.bigr def.littler;
set def.final5;
if littler=0 then output def.bigr;
if littler2=1 then output def.littler;
run; 

data def.bigr1;
set def.bigr;
keep cik_code fyear sic size mtb lev fcf roa loss foreign segment abnbhar big4 city_expertise3 national_expertise3 icweakness clientimp contten notbusy lnccse effectx var11 Financial_Fraud__Irregularities Errors___Accounting_and_Clerical Other_Significant_Issues littler multiplerestatements lnassets;
run;

data def.littler1;
set def.littler;
keep cik_code2 fyear2 sic2 size2 mtb2 lev2 fcf2 roa2 loss2 foreign2 segment2 abnbhar2 big42 city_expertise32 national_expertise32 icweakness2 clientimp2 contten2 notbusy2 lnccse2 effectx2 var112 Financial_Fraud__Irregularities2 E2 Other_Significant_Issues2 littler2 multiplerestatements2 lnassets2;
run;

data def.littler2;
set def.littler1;
rename cik_code2=cik_code;
rename fyear2=fyear;
rename sic2=sic;
rename size2=size;
rename mtb2=mtb;
rename lev2=lev;
rename fcf2=fcf;
rename roa2=roa;
rename loss2=loss;
rename foreign2=foreign;
rename segment2=segment;
rename abnbhar2=abnbhar;
rename big42=big4;
rename city_expertise32=city_expertise3;
rename national_expertise32=national_expertise3;
rename icweakness2=icweakness;
rename clientimp2=clientimp;
rename contten2=contten;
rename notbusy2=notbusy;
rename lnccse2=lnccse;
rename effectx2=effectx;
rename littler2=littler;
rename multiplerestatements2=multiplerestatements;
run;

proc sort data=def.bigr1 out=def.bigr2; by cik_code fyear;
run;

proc sort data=def.littler2 out=def.littler3; by cik_code fyear;
run;

data def.combinedfinal;
set def.bigr2 def.littler3;
run;

data def.combinedfinal1;
set def.combinedfinal;
run;

proc means data=def.combinedfinal1;
run;

title "Matching Results without SIC";
run;

proc logistic data= def.combinedfinal1;
  class fyear SIC;
  class Loss / param=ref ref=first;
  class Foreign / param=ref ref=first;
  class Big4 / param=ref ref=first;
  class City_Expertise3 / param=ref ref=first; 
  class Natio6nal_Expertise3 / param=ref ref=first;
  class ICWeakness / param=ref ref=first;
  class Notbusy / param=ref ref=first;
  class MultipleRestatements / param=ref ref=first;
  model LittleR(event="1") = contten clientimp multiplerestatements
  lnccse city_expertise3 national_expertise3 big4 size mtb segment
  foreign lev fcf roa abnbhar icweakness notbusy loss fyear sic
 / link=probit RSQ;
output out= Def.combinedfinal1_prob3 p=Probability;
run;

SAS Innovate 2025: Call for Content

Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!

Submit your idea!

SAS Enterprise Guide vs. SAS Studio

What’s the difference between SAS Enterprise Guide and SAS Studio? How are they similar? Just ask SAS’ Danny Modlin.

Find more tutorials on the SAS Users YouTube channel.

Click image to register for webinarClick image to register for webinar

Classroom Training Available!

Select SAS Training centers are offering in-person courses. View upcoming courses for:

View all other training opportunities.

Discussion stats
  • 2 replies
  • 848 views
  • 0 likes
  • 2 in conversation