Objective;
data raw_data;
input Subject_id $ Test_Code $ Result $;
datalines;
1001 IE01 Fail
1001 IE02 Pass
1001 IE03 Pass
1001 EX01 Pass
1001 EX01 Fail
1001 EX01 Fail
1003 IE01 Fail
1003 IE02 Fail
1003 EX01 Pass
1003 EX02 Pass
;
output required;
Subject_id | Test_Code | Result |
1001 | IE01 | Fail |
1001 | EX01 | Pass |
My code;
data raw_data;
input Subject_id $ Test_Code $ Result $;
datalines;
1001 IE01 Fail
1001 IE02 Pass
1001 IE03 Pass
1001 EX01 Pass
1001 EX01 Fail
1001 EX01 Fail
1003 IE01 Fail
1003 IE02 Fail
1003 EX01 Pass
1003 EX02 Pass
;
proc sql;
create table want as
select *
from raw_data
group by subject_id,substr(test_code,1,2)
having sum(Result='Fail')=1 and Result='Fail' or sum(Result='Pass')=1 and Result='Pass';
quit;
data raw_data;
input Subject_id $ Test_Code $ Result $;
datalines;
1001 IE01 Fail
1001 IE02 Pass
1001 IE03 Pass
1001 EX01 Pass
1001 EX01 Fail
1001 EX01 Fail
1003 IE01 Fail
1003 IE02 Fail
1003 EX01 Pass
1003 EX02 Pass
;
proc sql;
create table want as
select *
from raw_data
group by subject_id,substr(test_code,1,2)
having sum(Result='Fail')=1 and Result='Fail' or sum(Result='Pass')=1 and Result='Pass';
quit;
Thanks a lot for your wonderful support, but small request, could you please write in datastep.
Thanks a lot.
Hi @tsureshinvites Here is simple Datastep approach that you will easily understand. Please review the comments
data raw_data;
input Subject_id $ Test_Code $ Result $;
datalines;
1001 IE01 Fail
1001 IE02 Pass
1001 IE03 Pass
1001 EX01 Pass
1001 EX01 Fail
1001 EX01 Fail
1003 IE01 Fail
1003 IE02 Fail
1003 EX01 Pass
1003 EX02 Pass
;
/*Get the group */
data temp;
set raw_data;
t=substr(test_code,1,2);
run;
/*Sort for by group processing*/
proc sort data=temp;
by Subject_id t;
run;
/*Flag the observations based on the condition for each by group*/
data temp1;
set temp;
by Subject_id t;
if first.t then do;
f1=.;
f2=.;
end;
if Result='Fail' then f1+1;
else if Result='Pass' then f2+1;
if last.t;
keep subject_id t f:;
run;
/*Merge back the flagged result from the previous with the original and filter*/
data want;
merge temp temp1;
by Subject_id t;
if Result='Fail' and f1=1 or Result='Pass' and f2=1;
drop f: t;
run;
If you are learning, the above is the easiest to follow
Here is a way to do that using data / proc step
But in my opinion, proc sql remains the simplest way to do that.
proc freq data=raw_data noprint;
table Subject_id * Result / out=stat_IE (where=(count=1) drop=percent);
where substr(Test_code,1,2) = "IE" and Result = "Fail";
run;
/* IE */
data stat_IE2;
if _n_ = 1 then do;
declare hash h (dataset:'stat_IE');
h.definekey('Subject_id','Result');
h.definedone();
end;
set raw_data;
where substr(Test_code,1,2) = "IE";
if h.find() = 0 then output;
run;
/* EX */
proc freq data=raw_data noprint;
table Subject_id * Result / out=stat_EX (where=(count=1) drop=percent);
where substr(Test_code,1,2) = "EX" and Result = "Pass";
run;
data stat_EX2;
if _n_ = 1 then do;
declare hash h (dataset:'stat_EX');
h.definekey('Subject_id','Result');
h.definedone();
end;
set raw_data;
where substr(Test_code,1,2) = "EX";
if h.find() = 0 then output;
run;
/* Final dataset : dataset WANT*/
data want;
if _n_ = 1 then do;
declare hash h1 (dataset:'stat_EX2');
h1.definekey('Subject_id');
h1.definedone();
declare hash h2 (dataset:'stat_IE2');
h2.definekey('Subject_id');
h2.definedone();
end;
set stat_EX2 stat_IE2;
if h1.find() = 0 and h2.find() = 0 then output;
run;
Assuming the data has been sorted .
data raw_data;
input Subject_id $ Test_Code $ Result $;
datalines;
1001 IE01 Fail
1001 IE02 Pass
1001 IE03 Pass
1001 EX01 Pass
1001 EX01 Fail
1001 EX01 Fail
1003 IE01 Fail
1003 IE02 Fail
1003 EX01 Pass
1003 EX02 Pass
;
data want;
one=0;two=0;
do until(last.Subject_id);
set raw_data;
by Subject_id;
if Test_code =: 'IE' and Result='Fail' then one+1;
if Test_code =: 'EX' and Result='Pass' then two+1;
end;
do until(last.Subject_id);
set raw_data;
by Subject_id;
if one=1 and two=1 then do;
if Test_code =: 'IE' and Result='Fail' or Test_code =: 'EX' and Result='Pass' then output;
end;
end;
drop one two;
run;
Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!
SAS' Charu Shankar shares her PROC SQL expertise by showing you how to master the WHERE clause using real winter weather data.
Find more tutorials on the SAS Users YouTube channel.