Objective;
data raw_data;
input Subject_id $ Test_Code $ Result $;
datalines;
1001 IE01 Fail
1001 IE02 Pass
1001 IE03 Pass
1001 EX01 Pass
1001 EX01 Fail
1001 EX01 Fail
1003 IE01 Fail
1003 IE02 Fail
1003 EX01 Pass
1003 EX02 Pass
;
output required;
Subject_id | Test_Code | Result |
1001 | IE01 | Fail |
1001 | EX01 | Pass |
My code;
data raw_data;
input Subject_id $ Test_Code $ Result $;
datalines;
1001 IE01 Fail
1001 IE02 Pass
1001 IE03 Pass
1001 EX01 Pass
1001 EX01 Fail
1001 EX01 Fail
1003 IE01 Fail
1003 IE02 Fail
1003 EX01 Pass
1003 EX02 Pass
;
proc sql;
create table want as
select *
from raw_data
group by subject_id,substr(test_code,1,2)
having sum(Result='Fail')=1 and Result='Fail' or sum(Result='Pass')=1 and Result='Pass';
quit;
data raw_data;
input Subject_id $ Test_Code $ Result $;
datalines;
1001 IE01 Fail
1001 IE02 Pass
1001 IE03 Pass
1001 EX01 Pass
1001 EX01 Fail
1001 EX01 Fail
1003 IE01 Fail
1003 IE02 Fail
1003 EX01 Pass
1003 EX02 Pass
;
proc sql;
create table want as
select *
from raw_data
group by subject_id,substr(test_code,1,2)
having sum(Result='Fail')=1 and Result='Fail' or sum(Result='Pass')=1 and Result='Pass';
quit;
Thanks a lot for your wonderful support, but small request, could you please write in datastep.
Thanks a lot.
Hi @tsureshinvites Here is simple Datastep approach that you will easily understand. Please review the comments
data raw_data;
input Subject_id $ Test_Code $ Result $;
datalines;
1001 IE01 Fail
1001 IE02 Pass
1001 IE03 Pass
1001 EX01 Pass
1001 EX01 Fail
1001 EX01 Fail
1003 IE01 Fail
1003 IE02 Fail
1003 EX01 Pass
1003 EX02 Pass
;
/*Get the group */
data temp;
set raw_data;
t=substr(test_code,1,2);
run;
/*Sort for by group processing*/
proc sort data=temp;
by Subject_id t;
run;
/*Flag the observations based on the condition for each by group*/
data temp1;
set temp;
by Subject_id t;
if first.t then do;
f1=.;
f2=.;
end;
if Result='Fail' then f1+1;
else if Result='Pass' then f2+1;
if last.t;
keep subject_id t f:;
run;
/*Merge back the flagged result from the previous with the original and filter*/
data want;
merge temp temp1;
by Subject_id t;
if Result='Fail' and f1=1 or Result='Pass' and f2=1;
drop f: t;
run;
If you are learning, the above is the easiest to follow
Here is a way to do that using data / proc step
But in my opinion, proc sql remains the simplest way to do that.
proc freq data=raw_data noprint;
table Subject_id * Result / out=stat_IE (where=(count=1) drop=percent);
where substr(Test_code,1,2) = "IE" and Result = "Fail";
run;
/* IE */
data stat_IE2;
if _n_ = 1 then do;
declare hash h (dataset:'stat_IE');
h.definekey('Subject_id','Result');
h.definedone();
end;
set raw_data;
where substr(Test_code,1,2) = "IE";
if h.find() = 0 then output;
run;
/* EX */
proc freq data=raw_data noprint;
table Subject_id * Result / out=stat_EX (where=(count=1) drop=percent);
where substr(Test_code,1,2) = "EX" and Result = "Pass";
run;
data stat_EX2;
if _n_ = 1 then do;
declare hash h (dataset:'stat_EX');
h.definekey('Subject_id','Result');
h.definedone();
end;
set raw_data;
where substr(Test_code,1,2) = "EX";
if h.find() = 0 then output;
run;
/* Final dataset : dataset WANT*/
data want;
if _n_ = 1 then do;
declare hash h1 (dataset:'stat_EX2');
h1.definekey('Subject_id');
h1.definedone();
declare hash h2 (dataset:'stat_IE2');
h2.definekey('Subject_id');
h2.definedone();
end;
set stat_EX2 stat_IE2;
if h1.find() = 0 and h2.find() = 0 then output;
run;
Assuming the data has been sorted .
data raw_data;
input Subject_id $ Test_Code $ Result $;
datalines;
1001 IE01 Fail
1001 IE02 Pass
1001 IE03 Pass
1001 EX01 Pass
1001 EX01 Fail
1001 EX01 Fail
1003 IE01 Fail
1003 IE02 Fail
1003 EX01 Pass
1003 EX02 Pass
;
data want;
one=0;two=0;
do until(last.Subject_id);
set raw_data;
by Subject_id;
if Test_code =: 'IE' and Result='Fail' then one+1;
if Test_code =: 'EX' and Result='Pass' then two+1;
end;
do until(last.Subject_id);
set raw_data;
by Subject_id;
if one=1 and two=1 then do;
if Test_code =: 'IE' and Result='Fail' or Test_code =: 'EX' and Result='Pass' then output;
end;
end;
drop one two;
run;
Join us for SAS Innovate 2025, our biggest and most exciting global event of the year, in Orlando, FL, from May 6-9. Sign up by March 14 for just $795.
SAS' Charu Shankar shares her PROC SQL expertise by showing you how to master the WHERE clause using real winter weather data.
Find more tutorials on the SAS Users YouTube channel.