Hello,
dataset b: The end date for Key=27 is before the start date. I changed that.
I wrote a little program that also works when you have thousands of account_id's (instead of just 1).
I'm not completely sure that I fully grasp your request, but the below code (or something close) should do the job:
Take care:
This code is quite greedy (especially when you would / could have multiple shopping dates for an account in table a --> Cartesian product due to repeats of by-values in table a AND table b) but it has the advantage of simplicity. Faster solutions are possible (using hash tables for example) but they would be more difficult to read and understand.
data a;
Account_ID='00001'; Shopping_Date='03/20/2016'; output;
run;
data b;
Key=26; Account_ID='00001'; Start_Date='02/18/2016'; End_Date='03/19/2016'; Score=709; output;
Key=27; Account_ID='00001'; Start_Date='03/19/2016'; End_Date='04/11/2016'; Score=715; output;
Key=28; Account_ID='00001'; Start_Date='04/12/2016'; End_Date='05/11/2016'; Score=718; output;
Key=29; Account_ID='00001'; Start_Date='05/12/2016'; End_Date='12/31/9999'; Score=702; output;
run;
data a; set a;
Shopping_Date1=input(Shopping_Date,mmddyy10.);
format Shopping_Date1 date9.;
run;
data b; set b;
Start_Date1=input(Start_Date,mmddyy10.);
End_Date1 =input(End_Date, mmddyy10.);
format Start_Date1 End_Date1 date9.;
run;
PROC SQL noprint;
create table c(where=(Shopping_Date1 between Start_Date1 AND End_Date1)) as
select b.Key , b.Account_ID , b.Start_Date1 , b.End_Date1 , b.Score , a.Shopping_Date1
, INTCK('DAY',Start_Date1,Shopping_Date1) as DaysDiff
from work.a a
, work.b b
where a.Account_ID = b.Account_ID
order by b.Account_ID , DaysDiff;
QUIT;
data c; set c; by Account_ID DaysDiff; if first.Account_ID; run;
/* end of program */
Cheers,
Koen
... View more