Hi All,
I have members that are filling drugs that belong to one of two lists (A/B). I am looking to calculate the overlap between lists by member.
I am currently looking to flag any claims where the days overlap hits at least a 50% threshold (e.g. 18 days overlap for 30 days supply =60%). I would also like to see the increase in claims observed if the threshold was decreased to 30% or 10%. I also want to show the average days overlap/claim
I've included a sample claims set below.
ID | DRUG | START_DT | DAYS_SUPP | END_DT |
1 | A | 10/14/19 | 30 | 11/14/19 |
1 | B | 11/6/19 | 30 | 12/6/19 |
1 | A | 12/9/19 | 30 | 1/9/20 |
2 | A | 10/1/19 | 30 | 11/1/19 |
2 | B | 10/25/19 | 30 | 11/25/19 |
2 | B | 12/1/19 | 30 | 12/31/19 |
3 | A | 10/6/19 | 30 | 11/6/19 |
3 | B | 10/1/19 | 30 | 11/1/19 |
4 | A | 11/1/19 | 30 | 11/30/19 |
4 | B | 11/3/19 | 30 | 12/3/19 |
4 | B | 12/15/19 | 30 | 1/15/20 |
5 | B | 10/5/19 | 30 | 11/5/19 |
5 | A | 10/1/19 | 30 | 11/1/19 |
Desired Output:
ID Total_Overlap_Days
1 8
2 6
3 26
4 28
5 26
Thanks for all your help!
OK.just need one more data step:
data x;
input id drug $ (start_dt end_dt) (:mmddyy8.) ;
cards ;
1 A 10/14/19 11/14/19
1 B 11/06/19 12/06/19
1 A 12/09/19 01/09/20
2 A 10/01/19 11/01/19
2 B 10/25/19 11/25/19
2 B 12/01/19 12/31/19
3 A 10/06/19 11/06/19
3 B 10/01/19 11/01/19
;
data temp;
set x;
do date=start_dt to end_dt;
output;
end;
keep id drug date;
format date mmddyy8.;
run;
proc sql;
create table temp2 as
select distinct *
from temp
order by id ,date, drug;
quit;
data temp3;
length list $ 20;
do until(last.date);
set temp2;
by id date;
list=catx(' ',list,drug);
end;
run;
data want;
set temp3;
by id list notsorted;
retain first;
if first.list then first=date;
if last.list then do;last=date;output; end;
drop date;
format first last mmddyy8.;
run;
data final_want;
set want(where=(list='A B'));
Total_Overlap_Days =last-first;
run;
So what does your desired result look like here?
You may also want to verify that your End_dt is appropriate for the start. If this is your actual data you may have the end_dt one day too late to match the days_supp depending on when you expect the first dose to be used.
data example; date='14Oct2019'd; date2= date+30; format date date2 date9.; run;
The logically simplest way is to "paintbrush" the dates into an array or a hash table; then you need only simple by-group processing. For example, using a hash table as the medium of choice:
data have ;
input id drug $ (start_dt end_dt) (:mmddyy8.) ;
cards ;
1 A 10/14/19 11/14/19
1 B 11/06/19 12/06/19
1 A 12/09/19 01/09/20
2 A 10/01/19 11/01/19
2 B 10/25/19 11/25/19
2 B 12/01/19 12/31/19
3 A 10/06/19 11/06/19
3 B 10/01/19 11/01/19
4 A 11/01/19 11/30/19
4 B 11/03/19 12/03/19
4 B 12/15/19 01/15/20
5 B 10/05/19 11/05/19
5 A 10/01/19 11/01/19
;
data _null_ ;
dcl hash h (ordered:"a") ;
h.definekey ("id", "date") ;
h.definedata ("id", "date", "olap", "olap_ct") ;
h.definedone () ;
do until (z) ;
set have end = z ;
do date = start_dt to end_dt ;
if h.find() ne 0 then olap_ct = 1 ;
else olap_ct + 1 ;
olap = olap_ct > 1 ;
h.replace() ;
end ;
end ;
h.output (dataset:"hash") ;
run ;
data want (keep = id start_dt end_dt olap_ct) ;
do until (last.olap) ;
set hash ;
by id olap notsorted ;
if first.olap then start_dt = date ;
end ;
if olap ;
end_dt = date ;
format start_dt end_dt yymmdd10. ;
run ;
One caveat of the above is that it can create quite a sizable hash table, particularly if you process claims, which usually are big files. To avoid overtaxing memory in such a case, we can make use of the existing sorted order by ID, so that the largest chunk of data loaded into the hash is dictated by the largest BY group.
At the same time, rather than to concatenate the hashes from all BY groups into file HASH for subsequent BY processing, we can replace it with artificial BY processing on the fly using control-break logic while enumerating the hash table with an iterator. A dummy item can be added to each partial hash to simplify the control-break code (as done below). This way, everything is done in a single step, and memory usage is kept in check. E.g.:
data want (keep = id start_dt end_dt olap_ct) ;
if _n_ = 1 then do ;
dcl hash h (ordered:"a") ;
h.definekey ("id", "date") ;
h.definedata ("id", "date", "olap", "olap_ct") ;
h.definedone () ;
dcl hiter hi ("h") ;
end ;
do until (last.id) ;
set have ;
by id ;
do date = start_dt to end_dt ;
if h.find() ne 0 then olap_ct = 1 ;
else olap_ct + 1 ;
olap = olap_ct > 1 ;
h.replace() ;
end ;
end ;
hi.last() ;
date + 1 ;
olap = 0 ;
h.add() ;
format start_dt end_dt yymmdd10. ;
do _n_ = hi.first() by 0 while (_n_ = 0) ;
if olap and ^ _olap then start_dt = date ;
if ^ olap and _olap then do ;
end_dt = _date ;
olap_ct = _olap_ct ;
output ;
end ;
_olap = olap ;
_olap_ct = olap_ct ;
_date = date ;
_n_ = hi.next() ;
end ;
h.clear() ;
run ;
Note that in the output file WANT start_dt and end_dt represent the endpoints of the date intervals where the drugs overlap. OLAP_CT gives the number of drugs overlapping within the interval. So, if you had more than 2 drugs per member, and within some interval 3 or more drugs overlapped, you'd see the corresponding number. Of course, the OLAP_CT=1 case (no overlap) is filtered out.
Kind regards
Paul D.
If understood what you mean.
data x;
input id drug $ (start_dt end_dt) (:mmddyy8.) ;
cards ;
1 A 10/14/19 11/14/19
1 B 11/06/19 12/06/19
1 A 12/09/19 01/09/20
2 A 10/01/19 11/01/19
2 B 10/25/19 11/25/19
2 B 12/01/19 12/31/19
3 A 10/06/19 11/06/19
3 B 10/01/19 11/01/19
;
data temp;
set x;
do date=start_dt to end_dt;
output;
end;
keep id drug date;
format date mmddyy8.;
run;
proc sql;
create table temp2 as
select distinct *
from temp
order by id ,date, drug;
quit;
data temp3;
length list $ 20;
do until(last.date);
set temp2;
by id date;
list=catx(' ',list,drug);
end;
run;
data want;
set temp3;
by id list notsorted;
retain first;
if first.list then first=date;
if last.list then do;last=date;output; end;
drop date;
format first last mmddyy8.;
run;
Thanks for the response. I actually edited my original question to include the desired output that I'm looking for.
With the desired output provided, it's better. But your counts don't add up. The way I count manually - and both programs below count, the output expected from your input should be:
ID total_overlap_days ----------------------- 1 9 2 8 3 27 4 28 5 28
Apropos, I find it unnecessary and development-hindering to sample input of the kind you've provided, particularly in terms of the dates presented in the diabolical MM/YY/DD format. Something like this:
data have ; input id drug $ start_dt end_dt ; cards ; 1 A 1 10 1 B 7 12 1 A 14 17 2 A 1 20 2 B 10 16 2 B 19 23 ;
is way better because with it I can count the overlaps per ID manually in a minute, plus play with the input data and evaluate the expected output much nimbler.
At any rate, if the total overlap count is all you want, you can simply do:
data have ;
input id drug $ (start_dt end_dt) (:mmddyy8.) ;
cards ;
1 A 10/14/19 11/14/19
1 B 11/06/19 12/06/19
1 A 12/09/19 01/09/20
2 A 10/01/19 11/01/19
2 B 10/25/19 11/25/19
2 B 12/01/19 12/31/19
3 A 10/06/19 11/06/19
3 B 10/01/19 11/01/19
4 A 11/01/19 11/30/19
4 B 11/03/19 12/03/19
4 B 12/15/19 01/15/20
5 B 10/05/19 11/05/19
5 A 10/01/19 11/01/19
;
data v / view = v ;
set have ;
do date = start_dt to end_dt ;
output ;
end ;
run ;
proc sql ;
create table want_sql as
select id, count (q) as Total_Overlap_Count
from (select id, date, count (distinct drug) as q from v group 1, 2 having q > 1)
group id
;
quit ;
However, it may be more efficient to do it this way because the aggregation is done on the fly:
data want_hash (keep = id total_overlap_days) ;
if _n_ = 1 then do ;
dcl hash h (ordered:"a") ;
h.definekey ("date") ;
h.definedata ("drug") ;
h.definedone () ;
end ;
do until (last.id) ;
set have ;
by id ;
_drug = drug ;
do date = start_dt to end_dt ;
if h.find() ne 0 then h.add() ;
else if drug ne _drug then Total_Overlap_Days = sum (Total_Overlap_Days, 1) ;
end ;
end ;
h.clear() ;
run ;
Kind regards
Paul D.
OK.just need one more data step:
data x;
input id drug $ (start_dt end_dt) (:mmddyy8.) ;
cards ;
1 A 10/14/19 11/14/19
1 B 11/06/19 12/06/19
1 A 12/09/19 01/09/20
2 A 10/01/19 11/01/19
2 B 10/25/19 11/25/19
2 B 12/01/19 12/31/19
3 A 10/06/19 11/06/19
3 B 10/01/19 11/01/19
;
data temp;
set x;
do date=start_dt to end_dt;
output;
end;
keep id drug date;
format date mmddyy8.;
run;
proc sql;
create table temp2 as
select distinct *
from temp
order by id ,date, drug;
quit;
data temp3;
length list $ 20;
do until(last.date);
set temp2;
by id date;
list=catx(' ',list,drug);
end;
run;
data want;
set temp3;
by id list notsorted;
retain first;
if first.list then first=date;
if last.list then do;last=date;output; end;
drop date;
format first last mmddyy8.;
run;
data final_want;
set want(where=(list='A B'));
Total_Overlap_Days =last-first;
run;
SAS Innovate 2025 is scheduled for May 6-9 in Orlando, FL. Sign up to be first to learn about the agenda and registration!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.