/*You do not need to do this. I have placed this here to simulate the D_TEST library you have in your environment*/
%let dir = %sysfunc(dcreate(test, %sysfunc(pathname(work))));
/*You do not need to do this. I have placed this here to simulate the D_TEST library you have in your environment*/
libname d_test base "&dir";
/*Clean work library*/
proc datasets lib=work kill mt=data nolist nodetails nowarn;
quit;
/*Create data in D_TEST library.*/
data d_test.A;
infile datalines;
input Subjid $ Start_date ddmmyy10.;
format start_date mmddyy10.;
datalines;
Subj1 01/01/2001
Subj2 02/02/2002
Subj3 03/03/2003
Subj4 04/04/2004
;
run;
/*Create data in D_TEST library.*/
data d_test.B;
infile datalines;
input Subjid $ end_date ddmmyy10.;
format end_date mmddyy10.;
datalines;
Subj1 01/01/2021
Subj2 02/02/2022
Subj3 03/03/2023
Subj4 04/04/2024
;
run;
/*Create data in D_TEST library.*/
Data d_test.C;
infile datalines;
input Subjid $ lab_date ddmmyy10.;
format lab_date mmddyy10.;
datalines;
Subj1 01/01/1999
Subj2 02/02/2001
Subj3 03/03/2001
Subj4 04/04/2025
;
run;
/*Create data in D_TEST library.*/
data d_test.D;
infile datalines ;
input Subjid $ Test_date ddmmyy10.;
format test_date edit_date mmddyy10.;
edit_date = test_date;
datalines;
Subj1 01-01-2011
Subj2 01-02-2011
Subj3 03-03-2025
Subj4 04-04-2014
;
run;
/*Create Config data for use in macro. EXCLUDE A and B Since these refer to the reference datasets*/
proc contents data = d_test._all_ out=config(rename=(memname = data name = varname) where=(format = 'MMDDYY' and data not in ("A" "B"))) nodetails memtype=data;
run;
/*Sort config by data for by processing*/
proc sort data=config;
by data;
run;
options mprint symbolgen;
%macro check_for_violation;
%local i obs operator j;
proc sql;
create table all_data as
select memname from dictionary.tables
/*EXCULDE A and B from since Since these refer to the reference datasets*/
where libname='D_TEST' and memname not in ("A" "B");
%let obs = &sqlObs;
%do i=1 %to &sqlObs;
%local ds&i;
%end;
select memname into: ds1- from all_data;
quit;
data _null_;
set config;
by data;
if first.data then do;
size=0;
varcnt=0;
end;
size+1;
varcnt+1;
call symputx(catx('_', data, varcnt), varname,'l');
if last.data then call symputx(catx('_', data, 'size'), varcnt,'l');
run;
%let operator=;
proc sql;
%do i=1 %to &obs;
create table &&ds&i.._violate as
select l.*
from D_TEST.&&ds&i as l left join
D_TEST.a as r
on l.subjid=r.subjid left join D_TEST.b as m
on r.subjid=m.subjid
where
%do j=1 %to %unquote(%nrstr(&)&&ds&i.._size);
&operator
(. < l.%unquote(%nrstr(&)&&ds&i.._&j) < r.start_date) or (l.%unquote(%nrstr(&)&&ds&i.._&j) > m.end_date > .)
%let operator=or;
%end;
;
%let operator=;
%end;
quit;
%mend check_for_violation;
%check_for_violation;
Can you try this?
data A;
infile datalines;
input Subjid $ Start_date ddmmyy10.;
format start_date e8601da.;
datalines;
Subj1 01/01/2001
Subj2 02/02/2002
Subj3 03/03/2003
Subj4 04/04/2004
;
run;
data B;
infile datalines;
input Subjid $ end_date ddmmyy10.;
format end_date e8601da.;
datalines;
Subj1 01/01/2021
Subj2 02/02/2022
Subj3 03/03/2023
Subj4 04/04/2024
;
run;
Data C;
infile datalines;
input Subjid $ lab_date ddmmyy10.;
format lab_date e8601da.;
datalines;
Subj1 01/01/1999
Subj2 02/02/2001
Subj3 03/03/2001
Subj4 04/04/2025
;
run;
data D;
infile datalines;
input Subjid $ Test_date ddmmyy10.;
format test_date e8601da.;
datalines;
Subj1 01/01/2011
Subj2 02/02/2011
Subj3 03/03/2025
Subj4 04/04/2014
;
run;
proc sql;
create table a_b as
select a.*
,b.end_date
from a natural join b;
quit;
options mprint;
%macro checkForViolation(datasets=C#D,dates=lab_date#test_date);
%local i;
%do i=1 %to %sysfunc(countw(&datasets, #));
proc sql;
create table %scan(&datasets, &i, #)_violate as
select l.*
from %scan(&datasets, &i, #) as l left join a_b as r
on l.subjid=r.subjid
where l.%scan(&dates, &i, #) < r.start_date or l.%scan(&dates, &i, #) > r.end_date;
quit;
%end;
%mend checkForViolation;
%checkForViolation;
Is this what you mean
You wrote: " I have about 52 datasets with dates to be checked , most has different date variable"
Do you have a list of those files? or maybe they are located in a single directory/libname?
Do you have any rule for recognising that date variables? e.g., the end with "...DT" or "...date" ? Or maybe you have a list of those variables?
Bart
All datasets are in the same directory/libname, each dataset have different date variable that I need to check against start date and end date.
I am trying to find out all the subjects /records where the date entered is before start date OR after end date. Start date and end date comes from 2 different datasets and those 2 dates are checked with all dates entered in other 52 datasets in the same directory/libname.
Hi @SAS-PD , Did you try the solution I gave you above? Also, if all these datasets are in the same library, does that mean you have 54 datasets? 2 for the start and end dates and 52 to check against. Lastly as mentioned by @yabwon do the date variables have common prefix or suffixes we can use to write logic to easily select them dynamically?
Hi Mazi, thank you for your response! above code did work and fetched records that did not fall between start and end date. 🙂 There are total of 52 datasets in the library including 2 with start and end date. There is no common prefix or suffix for a date variable, I need to check all the dates that are present in each dataset. For data cleaning I am trying to check if there are any records that has date not between start and end date in the same library.
proc datasets lib=work kill mt=data nolist nodetails nowarn;
quit;
data A;
infile datalines;
input Subjid $ Start_date ddmmyy10.;
format start_date e8601da.;
datalines;
Subj1 01/01/2001
Subj2 02/02/2002
Subj3 03/03/2003
Subj4 04/04/2004
;
run;
data B;
infile datalines;
input Subjid $ end_date ddmmyy10.;
format end_date e8601da.;
datalines;
Subj1 01/01/2021
Subj2 02/02/2022
Subj3 03/03/2023
Subj4 04/04/2024
;
run;
Data C;
infile datalines;
input Subjid $ lab_date ddmmyy10.;
format lab_date e8601da.;
datalines;
Subj1 01/01/1999
Subj2 02/02/2001
Subj3 03/03/2001
Subj4 04/04/2025
;
run;
data D;
infile datalines ;
input Subjid $ Test_date ddmmyy10.;
format test_date edit_date e8601da.;
edit_date = test_date;
datalines;
Subj1 01-01-2011
Subj2 01-02-2011
Subj3 03-03-2025
Subj4 04-04-2014
;
run;
data config;
length data varname $32;
infile datalines dlm=',' missover;
input data $ varname $;
/*Add all datasets and each variable to check against in the config dataset*/
datalines;
C,lab_date
D,test_date
D,edit_date
;
run;
options mprint symbolgen;
%macro check_for_violation;
%local i obs operator j;
proc sql;
create table all_data as
select memname from dictionary.tables
where libname='WORK' and memname not in ("A" "B" "CONFIG" "ALL_DATA");
%let obs = &sqlObs;
%do i=1 %to &sqlObs;
%local ds&i;
%end;
select memname into: ds1- from all_data;
quit;
data _null_;
set config;
by data notsorted;
if first.data then do;
size=0;
varcnt=0;
end;
size+1;
varcnt+1;
call symputx(catx('_', data, varcnt), varname,'l');
if last.data then call symputx(catx('_', data, 'size'), varcnt,'l');
run;
%let operator=;
proc sql;
%do i=1 %to &obs;
create table &&ds&i.._violate as
select l.*
from &&ds&i as l left join
a as r
on l.subjid=r.subjid left join b as m
on r.subjid=m.subjid
where
%do j=1 %to %unquote(%nrstr(&)&&ds&i.._size);
&operator
(. < l.%unquote(%nrstr(&)&&ds&i.._&j) < r.start_date) or (l.%unquote(%nrstr(&)&&ds&i.._&j) > m.end_date > .)
%let operator=or;
%end;
;
%let operator=;
%end;
quit;
%mend check_for_violation;
%check_for_violation;
Hi @SAS-PD , I updated the code to use a config dataset to make it easier to specify datasets and their variables. All you would need to do is add each of your datasets to the config and the variables related to that dataset that you want checked. Since your variables do not follow any convention as you said earlier, this is the best I could come up with. I hope it helps.
Is there a way to use dataset name and date variable from
proc contents data = d_test._all_ out=content; run;
data content1;
set content;
where format = 'MMDDYY' and name ^= 'EDIT_DATE';
keep libname memname name label n;
n+1;
run;
Hi Mazi, may I please have the updated version?
/*You do not need to do this. I have placed this here to simulate the D_TEST library you have in your environment*/
%let dir = %sysfunc(dcreate(test, %sysfunc(pathname(work))));
/*You do not need to do this. I have placed this here to simulate the D_TEST library you have in your environment*/
libname d_test base "&dir";
/*Clean work library*/
proc datasets lib=work kill mt=data nolist nodetails nowarn;
quit;
/*Create data in D_TEST library.*/
data d_test.A;
infile datalines;
input Subjid $ Start_date ddmmyy10.;
format start_date mmddyy10.;
datalines;
Subj1 01/01/2001
Subj2 02/02/2002
Subj3 03/03/2003
Subj4 04/04/2004
;
run;
/*Create data in D_TEST library.*/
data d_test.B;
infile datalines;
input Subjid $ end_date ddmmyy10.;
format end_date mmddyy10.;
datalines;
Subj1 01/01/2021
Subj2 02/02/2022
Subj3 03/03/2023
Subj4 04/04/2024
;
run;
/*Create data in D_TEST library.*/
Data d_test.C;
infile datalines;
input Subjid $ lab_date ddmmyy10.;
format lab_date mmddyy10.;
datalines;
Subj1 01/01/1999
Subj2 02/02/2001
Subj3 03/03/2001
Subj4 04/04/2025
;
run;
/*Create data in D_TEST library.*/
data d_test.D;
infile datalines ;
input Subjid $ Test_date ddmmyy10.;
format test_date edit_date mmddyy10.;
edit_date = test_date;
datalines;
Subj1 01-01-2011
Subj2 01-02-2011
Subj3 03-03-2025
Subj4 04-04-2014
;
run;
/*Create Config data for use in macro. EXCLUDE A and B Since these refer to the reference datasets*/
proc contents data = d_test._all_ out=config(rename=(memname = data name = varname) where=(format = 'MMDDYY' and data not in ("A" "B"))) nodetails memtype=data;
run;
/*Sort config by data for by processing*/
proc sort data=config;
by data;
run;
options mprint symbolgen;
%macro check_for_violation;
%local i obs operator j;
proc sql;
create table all_data as
select memname from dictionary.tables
/*EXCULDE A and B from since Since these refer to the reference datasets*/
where libname='D_TEST' and memname not in ("A" "B");
%let obs = &sqlObs;
%do i=1 %to &sqlObs;
%local ds&i;
%end;
select memname into: ds1- from all_data;
quit;
data _null_;
set config;
by data;
if first.data then do;
size=0;
varcnt=0;
end;
size+1;
varcnt+1;
call symputx(catx('_', data, varcnt), varname,'l');
if last.data then call symputx(catx('_', data, 'size'), varcnt,'l');
run;
%let operator=;
proc sql;
%do i=1 %to &obs;
create table &&ds&i.._violate as
select l.*
from D_TEST.&&ds&i as l left join
D_TEST.a as r
on l.subjid=r.subjid left join D_TEST.b as m
on r.subjid=m.subjid
where
%do j=1 %to %unquote(%nrstr(&)&&ds&i.._size);
&operator
(. < l.%unquote(%nrstr(&)&&ds&i.._&j) < r.start_date) or (l.%unquote(%nrstr(&)&&ds&i.._&j) > m.end_date > .)
%let operator=or;
%end;
;
%let operator=;
%end;
quit;
%mend check_for_violation;
%check_for_violation;
Can you try this?
Thank you very much, Mazi
It worked like a charm 🙂 Appreciate your time and efforts!
Are you sure you want it to be:
"subjects with date(s) before dataset A start date and after dataset B end date."
I think it should be:
"subjects with date(s) before dataset A start date OR after dataset B end date."
?
Bart
Is subjid unique in all datasets?
What are the names for the date variables in all the other datasets?
Is subjid character or numeric?
Not that the last question would be unnecessary if you had posted your example data in the proper manner - working DATA steps with DATALINES.
Hi Kurt, thank you for your response!
Is subjid unique in all datasets? -->yes
What are the names for the date variables in all the other datasets? -->does not have any common prefix or suffix
Is subjid character or numeric? --> character
Sorry about not posting working DATA steps with DATALINES...this is my first post and will keep this in mind moving forward.
Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.