Hello SAS experts out there,
I've tried the following program, see the link below, to identify concomitant use of medicines, I run the program for two medicines, A and B, and looking at the outcome, combinations of A-A, B-B, B-B-A,... etc exist in the final output. Anyone who can really explain to me what does this mean- A-A, B-B, B-B-C.
https://communities.sas.com/t5/SAS-Programming/Concomitant-drug-medication-use/td-p/339848
Thanks a lot,
Sorry, I selected it as attachment, but didn't check. This time I use a code box:
/******************************************************************************/
/* RensInterval 		                        erlu   29.1.2013      */
/*                                                                            */
/* Makroen anvendes til rensning af SAS-datasæt, hvor de enkelte records      */
/* udtrykker tidsintervaller angivet ved en startdato og en slutdato.         */
/*                                                                            */
/* Makroen læser et angivet datasæt og danner et outputdatasæt, hvor          */
/* flere poster med tidsintervaller i umiddelbar fortsættelse eller overlap-  */
/* pende er komprimeret til eet interval.                                     */
/*                                                                            */
/* Makroen checker, om datasættet indeholder flere poster, hvor alle          */
/* variabelværdier er fælles bortset fra tidsintervallet, og hvor             */
/* tidsintervallerne er fortløbende eller overlappende, således at startdato  */
/* på en følgende post er <= dagen efter slutdato på den foregående post.     */
/* De resulterende intervalvariable får samme format som formatet på den      */
/* variabel, der er angivet son datefirst.                                    */
/*                                                                            */
/* Kaldsargumenter:                                                           */
/*                                                                            */
/* Positionelle:                                                              */
/*      Inputdatasæt	                                                      */
/*                                                                            */
/*      Outputdatasæt                                                         */
/*                                                                            */
/* Keyword:                                                                   */
/*      Datefirst=datefirst  Anvendes kun hvis varnavn ikke er datefirst      */
/*                                                                            */
/*      Datelast=datelast    Anvendes kun hvis varnavn ikke er datelast       */
/*                                                                            */
/******************************************************************************/
%macro RensInterval(thisinds,thisoutds,datefirst=DateFirst,datelast=DateLast);
	%local dropstmt datofmt SelectList WhereList datofmtstr;
	%* Check eksistens af inputdatasæt;
	%if %sysfunc(exist(&thisinds)) = 0 %then %do; 
		%put ERROR: Inputdataset &thisinds findes ikke;
		%goto wexit;
	%end;
	%let GeneratedKey = qxzworkid;
	%let dropstmt = drop &GeneratedKey;
	%let ContentDs = qxw1;
	proc datasets nolist;
		delete qxw1 qxw2 qxw3;
	quit;
	%* Dan datasæt med variabelspecifikationer;
	proc contents data=&thisinds noprint short out=&ContentDs;
	run;
	%* initier datoformat;
	%let datofmt = %str();
	%* dan makrovariable ud fra proc contents;
	data _null_; set &ContentDs end=eof;
		length SelectList WhereList $4096 fmt $60;
		retain SelectList WhereList;
		retain DatoVarChk 0;
		if lowcase(name) NE lowcase("&datefirst") and lowcase(name) NE lowcase("&datelast") then do;
			SelectList = catx(',', SelectList, name);
			WhereList = catx(' AND ', wherelist, 'a.'||trim(name)||'=b.'||trim(name));
		end;
		%* fang format på datefirst;
		if lowcase(name) = lowcase("&datefirst") then do;
			if lowcase(format) = 'date' and formatl = 0 then fmt = 'DATE.';
			else if lowcase(format) = 'datetime' and formatl = 0 then fmt = 'DATETIME.';
			else do;
				fmt = trim(format)||strip(put(formatl,8.0))||'.';
				if formatd > 0 then fmt = trim(fmt)||strip(put(formatd,8.0)); 
			end;
			call symputx('datofmt',fmt);
		end;
		%* fang de 2 datointervalvariable i datasættet;
		if lowcase(name) = lowcase("&datefirst") then DatoVarChk = DatoVarChk + 1;
		else if lowcase(name) = lowcase("&datelast") then DatoVarChk = DatoVarChk + 1;
		if eof then do;
			call symput('SelectList',trim(SelectList));
			call symput('WhereList',trim(WhereList));
			call symputx('DatoVarChk',trim(DatoVarChk));
		end;
	run;
	%* stop ved manglende datovariable;
	%if &DatoVarChk ne 2 %then %do;
		%put ERROR: Manglende intervalvariable - &datefirst eller &datelast ikke fundet i datasæt: &thisinds;
		%goto wexit;
	%end;
	%* dan datoformatstreng til at sætte format op outputdatoer;
	%if &datofmt = %str(0.) %then %let datofmtstr = %str();
	%else %do;
		%let datofmtstr = format qxnewfirst qxnewlast &datofmt;
		%put NOTE: Format til output-intervalvariable: &datofmt;
	%end;
	%put NOTE: SelectList = &SelectList;
	%put NOTE: WhereList = &WhereList;
	%* dan id-nummereret distinct liste over variable (undtagen datovariable);
	proc sql; 
		create table qxw2 as 
			select 
				monotonic() as &GeneratedKey,
				&SelectList 
			from (select distinct &SelectList from &thisinds as b) as a
			order by &GeneratedKey;
	quit;
	%* flet id-numrene ind på hele datasættet og sorter;
	proc sql;
		create table qxw3 as 
			select 
				a.*,
				b.&GeneratedKey
			from &thisinds as a, qxw2 as b
			where &WhereList
			order by &GeneratedKey, &datefirst, &datelast;
	quit;
	%* Dan resulterende intervaller;
	data &thisoutds(rename=(qxnewfirst=&datefirst qxnewlast=&datelast)); 
		set qxw3; by &GeneratedKey;
		retain qxnewfirst qxnewlast;
		&datofmtstr;
		drop &datefirst &datelast;
		&dropstmt;
		if first.&GeneratedKey then do;
			qxnewfirst = &datefirst;
			qxnewlast = &datelast;
		end;
		else do;
			if &datefirst > qxnewlast + 1 then do; 
				output;
				qxnewfirst = &datefirst;
				qxnewlast = &datelast;
			end;
			else if &datelast > qxnewlast then qxnewlast = &datelast;
		end;
		if last.&GeneratedKey then output;
	run;
	%wexit:
	proc datasets nolist;
		delete qxw1 qxw2 qxw3;
	quit;
%mend;
It seems that PGStat's program is not designed to handle more than one prescription of a given drug at the same time for a given ID.
Try running the program with the following input, where two prescriptions of drug B are overlapping:
data have;
input ID	DRUG $	START_DT :mmddyy.	DAYS_SUPP	END_DT :mmddyy.;
datalines;
1	A	2/17/10	30	3/19/10
1	B	4/5/10	30  5/5/10
1	B	5/1/10	30	6/1/10
1	C	5/1/10	60	7/1/10
;
run;
It is not easy to modify the code to handle this situation. But I have a quick fix, if you consider overlapping prescriptions of the same drug as a different problem. The intervals can be collapsed, so a drug's duration is an interval with at least one precription of the same drug.
I think this will give the results you expect:
data have0;
input ID	DRUG $	START_DT :mmddyy.	DAYS_SUPP	END_DT :mmddyy.;
datalines;
1	A	2/17/10	30	3/19/10
1	B	4/5/10	30  5/5/10
1	B	5/1/10	30	6/1/10
1	C	5/1/10	60	7/1/10
;
run;
%RensInterval(have0,have,datefirst=START_DT,datelast=END_DT);
data days;
set have;
<etc - rest of code>
The macro is attached as a sas file. All comments are in danish, but the use of arguments should be obvious.
Note that it checks for identical records except dates, so in this case, prescriptions of the same drug with different durations will not be seen as the same drug. If that's a problem, drop the duration variable from input before calling the macro and recreate it afterwards as (endDate-startDate)+1.
Dear EriK, I couldn't find the macro?
Sorry, I selected it as attachment, but didn't check. This time I use a code box:
/******************************************************************************/
/* RensInterval 		                        erlu   29.1.2013      */
/*                                                                            */
/* Makroen anvendes til rensning af SAS-datasæt, hvor de enkelte records      */
/* udtrykker tidsintervaller angivet ved en startdato og en slutdato.         */
/*                                                                            */
/* Makroen læser et angivet datasæt og danner et outputdatasæt, hvor          */
/* flere poster med tidsintervaller i umiddelbar fortsættelse eller overlap-  */
/* pende er komprimeret til eet interval.                                     */
/*                                                                            */
/* Makroen checker, om datasættet indeholder flere poster, hvor alle          */
/* variabelværdier er fælles bortset fra tidsintervallet, og hvor             */
/* tidsintervallerne er fortløbende eller overlappende, således at startdato  */
/* på en følgende post er <= dagen efter slutdato på den foregående post.     */
/* De resulterende intervalvariable får samme format som formatet på den      */
/* variabel, der er angivet son datefirst.                                    */
/*                                                                            */
/* Kaldsargumenter:                                                           */
/*                                                                            */
/* Positionelle:                                                              */
/*      Inputdatasæt	                                                      */
/*                                                                            */
/*      Outputdatasæt                                                         */
/*                                                                            */
/* Keyword:                                                                   */
/*      Datefirst=datefirst  Anvendes kun hvis varnavn ikke er datefirst      */
/*                                                                            */
/*      Datelast=datelast    Anvendes kun hvis varnavn ikke er datelast       */
/*                                                                            */
/******************************************************************************/
%macro RensInterval(thisinds,thisoutds,datefirst=DateFirst,datelast=DateLast);
	%local dropstmt datofmt SelectList WhereList datofmtstr;
	%* Check eksistens af inputdatasæt;
	%if %sysfunc(exist(&thisinds)) = 0 %then %do; 
		%put ERROR: Inputdataset &thisinds findes ikke;
		%goto wexit;
	%end;
	%let GeneratedKey = qxzworkid;
	%let dropstmt = drop &GeneratedKey;
	%let ContentDs = qxw1;
	proc datasets nolist;
		delete qxw1 qxw2 qxw3;
	quit;
	%* Dan datasæt med variabelspecifikationer;
	proc contents data=&thisinds noprint short out=&ContentDs;
	run;
	%* initier datoformat;
	%let datofmt = %str();
	%* dan makrovariable ud fra proc contents;
	data _null_; set &ContentDs end=eof;
		length SelectList WhereList $4096 fmt $60;
		retain SelectList WhereList;
		retain DatoVarChk 0;
		if lowcase(name) NE lowcase("&datefirst") and lowcase(name) NE lowcase("&datelast") then do;
			SelectList = catx(',', SelectList, name);
			WhereList = catx(' AND ', wherelist, 'a.'||trim(name)||'=b.'||trim(name));
		end;
		%* fang format på datefirst;
		if lowcase(name) = lowcase("&datefirst") then do;
			if lowcase(format) = 'date' and formatl = 0 then fmt = 'DATE.';
			else if lowcase(format) = 'datetime' and formatl = 0 then fmt = 'DATETIME.';
			else do;
				fmt = trim(format)||strip(put(formatl,8.0))||'.';
				if formatd > 0 then fmt = trim(fmt)||strip(put(formatd,8.0)); 
			end;
			call symputx('datofmt',fmt);
		end;
		%* fang de 2 datointervalvariable i datasættet;
		if lowcase(name) = lowcase("&datefirst") then DatoVarChk = DatoVarChk + 1;
		else if lowcase(name) = lowcase("&datelast") then DatoVarChk = DatoVarChk + 1;
		if eof then do;
			call symput('SelectList',trim(SelectList));
			call symput('WhereList',trim(WhereList));
			call symputx('DatoVarChk',trim(DatoVarChk));
		end;
	run;
	%* stop ved manglende datovariable;
	%if &DatoVarChk ne 2 %then %do;
		%put ERROR: Manglende intervalvariable - &datefirst eller &datelast ikke fundet i datasæt: &thisinds;
		%goto wexit;
	%end;
	%* dan datoformatstreng til at sætte format op outputdatoer;
	%if &datofmt = %str(0.) %then %let datofmtstr = %str();
	%else %do;
		%let datofmtstr = format qxnewfirst qxnewlast &datofmt;
		%put NOTE: Format til output-intervalvariable: &datofmt;
	%end;
	%put NOTE: SelectList = &SelectList;
	%put NOTE: WhereList = &WhereList;
	%* dan id-nummereret distinct liste over variable (undtagen datovariable);
	proc sql; 
		create table qxw2 as 
			select 
				monotonic() as &GeneratedKey,
				&SelectList 
			from (select distinct &SelectList from &thisinds as b) as a
			order by &GeneratedKey;
	quit;
	%* flet id-numrene ind på hele datasættet og sorter;
	proc sql;
		create table qxw3 as 
			select 
				a.*,
				b.&GeneratedKey
			from &thisinds as a, qxw2 as b
			where &WhereList
			order by &GeneratedKey, &datefirst, &datelast;
	quit;
	%* Dan resulterende intervaller;
	data &thisoutds(rename=(qxnewfirst=&datefirst qxnewlast=&datelast)); 
		set qxw3; by &GeneratedKey;
		retain qxnewfirst qxnewlast;
		&datofmtstr;
		drop &datefirst &datelast;
		&dropstmt;
		if first.&GeneratedKey then do;
			qxnewfirst = &datefirst;
			qxnewlast = &datelast;
		end;
		else do;
			if &datefirst > qxnewlast + 1 then do; 
				output;
				qxnewfirst = &datefirst;
				qxnewlast = &datelast;
			end;
			else if &datelast > qxnewlast then qxnewlast = &datelast;
		end;
		if last.&GeneratedKey then output;
	run;
	%wexit:
	proc datasets nolist;
		delete qxw1 qxw2 qxw3;
	quit;
%mend;
I don't have time to fully develop this.
However, when I have faced a similar problem in the past, I have taken advantage of SQL's Cartesian product:
data have;
input ID DRUG $ START_DT :date9. DAYS_SUPP END_DT :date9.;
format start_dt end_dt date9.;
datalines;
1  A  17FEB2010 30 19MAR2010
1  B  05APR2010 30 05MAY2010
1  B  01MAY2010 30 01JUN2010
1  C  01MAY2010 60 01JUL2010
;
run;
* create a surrogate key ;
data have;
   sk+1;
   set have;
run;
proc sql;
   create table crossjoin as
   select
      a.id        as a_id
     ,b.id        as b_id
     ,a.drug      as a_drug
     ,b.drug      as b_drug
     ,a.start_dt  as a_start_dt
     ,b.start_dt  as b_start_dt
     ,a.end_dt    as a_end_dt
     ,b.end_dt    as b_end_dt
     ,a.days_supp as a_days_supp
     ,b.days_supp as b_days_supp
   from
      have a
   cross join
      have b
   where
      a.sk ^= b.sk
   ;
quit;
data want;
   set crossjoin;
   where b_start_dt between a_start_dt and a_end_dt;
run;
Carefully review the pattern in the cross join output, and pick out your date overlaps.
I think I'm close, but there may be duplicates where you need to augment the where clause. Such as the same data, but with the drug order reversed.
Once you have only the records you want, it should be easy to transform them into your desired output.
Note the where clause could be combined with the cross join; I've coded it this way for easier debugging of the date crossing patterns.
If your data is huge this could have poor performance: 1000 source records cross joined = 1,000,000 records, but most of them would get filtered out by the where clause.
Hope this helps...
It's finally time to hack! Remember to visit the SAS Hacker's Hub regularly for news and updates.
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.
