BookmarkSubscribeRSS Feed
lillymaginta
Lapis Lazuli | Level 10

i am  trying to find id with 12 months of continous enrollment (no gap) before and after date_h. Each id can have multiple enrollment period (several start_e and end_e dates). In this example only id 1 should be included (have 12 months before and 12 months after).

id 2 will be excluded because there was a gap in coverage between 8/8/2006 and 1/1/2007

id 3 will be excluded because there was 12 months of continous enrollment before but not after date_h

 
id start_e end_e     date_h
1 1/1/2005 1/1/2006  2/8/2008
1 2/3/2006 4/5/2013
2 5/7/2005 8/8/2006  4/5/2007
2 1/1/2007 2/2/2012
3 5/9/2005 5/9/2007  1/1/2007
3 6/4/2008 7/7/2012
2 REPLIES 2
PGStats
Opal | Level 21

Use a self join:

 

data have;
infile datalines truncover;
input id (start_e end_e date_h) (:mmddyy10.);
format start_e end_e date_h yymmdd10.;
datalines;
1 1/1/2005 1/1/2006  2/8/2008
1 2/3/2006 4/5/2013
2 5/7/2005 8/8/2006  4/5/2007
2 1/1/2007 2/2/2012
3 5/9/2005 5/9/2007  1/1/2007
3 6/4/2008 7/7/2012
;

proc sql;
create table want as
select a.id, a.date_h, b.start_e, b.end_e
from have as a inner join have as b
    on  a.id=b.id and
        b.start_e <= intnx("MONTH", a.date_h, -12, "SAME") and
        b.end_e   >= intnx("MONTH", a.date_h,  12, "SAME")
where a.date_h is not missing;
select * from want;
quit;
PG
TomKari
Onyx | Level 15

Another option. Note, both this and PG's need a LOT of testing...these are always very tricky!

 

data have;
informat start_e end_e date_h mmddyy10.;
format start_e end_e date_h date.;
input id start_e end_e date_h;
cards;
1 1/1/2005 1/1/2006  2/8/2008
1 2/3/2006 4/5/2013 .
2 5/7/2005 8/8/2006  4/5/2007
2 1/1/2007 2/2/2012 .
3 5/9/2005 5/9/2007  1/1/2007
3 6/4/2008 7/7/2012 .
run;

/* StartYear is a year prior to your first date, YearCount encompasses all of the years in your list */
%let StartYear = 2000;
%let YearCount = 20; /* Max 85 years */

data _null_; /* Set some macro variables */
StartDate = mdy(1, 1, &StartYear.); /* Jan 1 of start year */
EndDate = intnx('year', StartDate, (&YearCount - 1), 'end'); /* Dec 31  of end year */
NumberOfDays = intck('day', StartDate, EndDate) + 1; /* Number of days between them, inclusive */
call symput('StartDate', put(StartDate, 8.));
call symput('EndDate', put(EndDate, 8.));
call symput('NumberOfDays', put(NumberOfDays, 8.));
run;

data HDates; /* Pull out the date_h information into a separate dataset */
set have;
keep id date_h;
if ^missing(date_h);
run;

proc sort data=have(drop=date_h); /* Sort ranges by ID, for BY processing */
by id;
run;

data GoodDates; /* Set an array of 1's for the dates in the ranges */
length DateList $&NumberOfDays.;
drop _: start_e end_e;
set have;
by id;
if first.id /* zero out the array at the beginning of an id */
then DateList = repeat('0', &NumberOfDays. - 1);

_Start = start_e - &StartDate. + 1; /* Adjust the dates so that start date is 1, to use with the character variable */
_NumDays = end_e - start_e + 1;
substr(DateList, _Start, _Numdays) = repeat('1', _Numdays - 1); /* for all of the days in the range, set the corresponding bytes to 1 */

if last.id /* Finished with this id */
then output;
run;

proc sql; /* Merge the date ranges with the date_h values */
create table CombinedDates
as select GoodDates.*, HDates.date_h
from GoodDates inner join HDates on GoodDates.id = HDates.id;
quit;

data want;
drop _: DateList;
set CombinedDates;
_Start = intnx('year', date_h, -1, 'same') - &StartDate. + 1; /* get the start location and the length of 1 year either side of date_h */
_End = intnx('year', date_h, 1, 'same') - &StartDate. + 1;
_NumDays = _End - _Start + 1;
if substr(DateList, _Start, _Numdays) = repeat('1', _Numdays - 1) /* Check if the character variable is all 1s for the desired locations */
then GoodRecord = 1;
else GoodRecord = 0;
run;

sas-innovate-2024.png

Available on demand!

Missed SAS Innovate Las Vegas? Watch all the action for free! View the keynotes, general sessions and 22 breakouts on demand.

 

Register now!

What is ANOVA?

ANOVA, or Analysis Of Variance, is used to compare the averages or means of two or more populations to better understand how they differ. Watch this tutorial for more.

Find more tutorials on the SAS Users YouTube channel.

Discussion stats
  • 2 replies
  • 2321 views
  • 1 like
  • 3 in conversation