Programming the statistical procedures from SAS

12 months of continuous enrollment before and after a reference date (multiple rows per id)

Reply
Frequent Contributor
Posts: 110

12 months of continuous enrollment before and after a reference date (multiple rows per id)

i am  trying to find id with 12 months of continous enrollment (no gap) before and after date_h. Each id can have multiple enrollment period (several start_e and end_e dates). In this example only id 1 should be included (have 12 months before and 12 months after).

id 2 will be excluded because there was a gap in coverage between 8/8/2006 and 1/1/2007

id 3 will be excluded because there was 12 months of continous enrollment before but not after date_h

 
id start_e end_e     date_h
1 1/1/2005 1/1/2006  2/8/2008
1 2/3/2006 4/5/2013
2 5/7/2005 8/8/2006  4/5/2007
2 1/1/2007 2/2/2012
3 5/9/2005 5/9/2007  1/1/2007
3 6/4/2008 7/7/2012
Respected Advisor
Posts: 4,606

Re: 12 months of continuous enrollment before and after a reference date (multiple rows per id)

Use a self join:

 

data have;
infile datalines truncover;
input id (start_e end_e date_h) (:mmddyy10.);
format start_e end_e date_h yymmdd10.;
datalines;
1 1/1/2005 1/1/2006  2/8/2008
1 2/3/2006 4/5/2013
2 5/7/2005 8/8/2006  4/5/2007
2 1/1/2007 2/2/2012
3 5/9/2005 5/9/2007  1/1/2007
3 6/4/2008 7/7/2012
;

proc sql;
create table want as
select a.id, a.date_h, b.start_e, b.end_e
from have as a inner join have as b
    on  a.id=b.id and
        b.start_e <= intnx("MONTH", a.date_h, -12, "SAME") and
        b.end_e   >= intnx("MONTH", a.date_h,  12, "SAME")
where a.date_h is not missing;
select * from want;
quit;
PG
Trusted Advisor
Posts: 1,036

Re: 12 months of continuous enrollment before and after a reference date (multiple rows per id)

Another option. Note, both this and PG's need a LOT of testing...these are always very tricky!

 

data have;
informat start_e end_e date_h mmddyy10.;
format start_e end_e date_h date.;
input id start_e end_e date_h;
cards;
1 1/1/2005 1/1/2006  2/8/2008
1 2/3/2006 4/5/2013 .
2 5/7/2005 8/8/2006  4/5/2007
2 1/1/2007 2/2/2012 .
3 5/9/2005 5/9/2007  1/1/2007
3 6/4/2008 7/7/2012 .
run;

/* StartYear is a year prior to your first date, YearCount encompasses all of the years in your list */
%let StartYear = 2000;
%let YearCount = 20; /* Max 85 years */

data _null_; /* Set some macro variables */
StartDate = mdy(1, 1, &StartYear.); /* Jan 1 of start year */
EndDate = intnx('year', StartDate, (&YearCount - 1), 'end'); /* Dec 31  of end year */
NumberOfDays = intck('day', StartDate, EndDate) + 1; /* Number of days between them, inclusive */
call symput('StartDate', put(StartDate, 8.));
call symput('EndDate', put(EndDate, 8.));
call symput('NumberOfDays', put(NumberOfDays, 8.));
run;

data HDates; /* Pull out the date_h information into a separate dataset */
set have;
keep id date_h;
if ^missing(date_h);
run;

proc sort data=have(drop=date_h); /* Sort ranges by ID, for BY processing */
by id;
run;

data GoodDates; /* Set an array of 1's for the dates in the ranges */
length DateList $&NumberOfDays.;
drop _: start_e end_e;
set have;
by id;
if first.id /* zero out the array at the beginning of an id */
then DateList = repeat('0', &NumberOfDays. - 1);

_Start = start_e - &StartDate. + 1; /* Adjust the dates so that start date is 1, to use with the character variable */
_NumDays = end_e - start_e + 1;
substr(DateList, _Start, _Numdays) = repeat('1', _Numdays - 1); /* for all of the days in the range, set the corresponding bytes to 1 */

if last.id /* Finished with this id */
then output;
run;

proc sql; /* Merge the date ranges with the date_h values */
create table CombinedDates
as select GoodDates.*, HDates.date_h
from GoodDates inner join HDates on GoodDates.id = HDates.id;
quit;

data want;
drop _: DateList;
set CombinedDates;
_Start = intnx('year', date_h, -1, 'same') - &StartDate. + 1; /* get the start location and the length of 1 year either side of date_h */
_End = intnx('year', date_h, 1, 'same') - &StartDate. + 1;
_NumDays = _End - _Start + 1;
if substr(DateList, _Start, _Numdays) = repeat('1', _Numdays - 1) /* Check if the character variable is all 1s for the desired locations */
then GoodRecord = 1;
else GoodRecord = 0;
run;

Ask a Question
Discussion stats
  • 2 replies
  • 242 views
  • 1 like
  • 3 in conversation