Solved: Re: Extract multiple criteria per subject

tsureshinvites · Posted 01-03-2020 07:35 AM

Objective;

I need to pick the subject(s) who has failed only one test(IE) and passed only one test (ex).

then output, both condition should match for a subject.

data raw_data;
input Subject_id $ Test_Code $ Result $;
datalines;
1001 IE01 Fail
1001 IE02 Pass
1001 IE03 Pass
1001 EX01 Pass
1001 EX01 Fail
1001 EX01 Fail
1003 IE01 Fail
1003 IE02 Fail
1003 EX01 Pass
1003 EX02 Pass
;

output required;

Subject_id	Test_Code	Result
1001	IE01	Fail
1001	EX01	Pass

My code;

novinosrin · Posted 01-03-2020 07:46 AM

data raw_data;
input Subject_id $ Test_Code $ Result $;
datalines;
1001 IE01 Fail
1001 IE02 Pass
1001 IE03 Pass
1001 EX01 Pass
1001 EX01 Fail
1001 EX01 Fail
1003 IE01 Fail
1003 IE02 Fail
1003 EX01 Pass
1003 EX02 Pass
;

proc sql;
create table want as
select *
from raw_data
group by subject_id,substr(test_code,1,2)
having sum(Result='Fail')=1 and Result='Fail' or sum(Result='Pass')=1 and Result='Pass';
quit;

View solution in original post

novinosrin · Posted 01-03-2020 07:46 AM

data raw_data;
input Subject_id $ Test_Code $ Result $;
datalines;
1001 IE01 Fail
1001 IE02 Pass
1001 IE03 Pass
1001 EX01 Pass
1001 EX01 Fail
1001 EX01 Fail
1003 IE01 Fail
1003 IE02 Fail
1003 EX01 Pass
1003 EX02 Pass
;

proc sql;
create table want as
select *
from raw_data
group by subject_id,substr(test_code,1,2)
having sum(Result='Fail')=1 and Result='Fail' or sum(Result='Pass')=1 and Result='Pass';
quit;

tsureshinvites · Posted 01-03-2020 08:08 AM

Thanks a lot for your wonderful support, but small request, could you please write in datastep.

Thanks a lot.

novinosrin · Posted 01-03-2020 08:36 AM

Hi @tsureshinvites Here is simple Datastep approach that you will easily understand. Please review the comments



data raw_data;
input Subject_id $ Test_Code $ Result $;
datalines;
1001 IE01 Fail
1001 IE02 Pass
1001 IE03 Pass
1001 EX01 Pass
1001 EX01 Fail
1001 EX01 Fail
1003 IE01 Fail
1003 IE02 Fail
1003 EX01 Pass
1003 EX02 Pass
;

/*Get the group */

data temp;
set raw_data;
t=substr(test_code,1,2);
run;

/*Sort for by group processing*/
proc sort data=temp;
by Subject_id t;
run;
/*Flag the observations based on the condition for each by group*/
data temp1;
set temp;
by Subject_id t;
if first.t then do;
f1=.;
f2=.;
end;
if Result='Fail' then f1+1;
else if Result='Pass' then f2+1;
if last.t;
keep subject_id t f:;
run;

/*Merge back the flagged result from the previous with the original and filter*/
data want;
merge temp temp1;
by Subject_id t;
if Result='Fail' and f1=1 or Result='Pass' and f2=1;
drop f: t;
run;

If you are learning, the above is the easiest to follow

ed_sas_member · Posted 01-03-2020 08:33 AM

Hi @tsureshinvites

Here is a way to do that using data / proc step

But in my opinion, proc sql remains the simplest way to do that.

proc freq data=raw_data noprint;
	table Subject_id * Result / out=stat_IE (where=(count=1) drop=percent);
	where substr(Test_code,1,2) = "IE" and Result = "Fail";
run;

/* IE */
data stat_IE2;
	if _n_ = 1 then do;
		declare hash h (dataset:'stat_IE');
		h.definekey('Subject_id','Result'); 
		h.definedone();
	end;
	set raw_data;
	where substr(Test_code,1,2) = "IE";
	if h.find() = 0 then output;
run;

/* EX */

proc freq data=raw_data noprint;
	table Subject_id * Result / out=stat_EX (where=(count=1) drop=percent);
	where substr(Test_code,1,2) = "EX" and Result = "Pass";
run;

data stat_EX2;
	if _n_ = 1 then do;
		declare hash h (dataset:'stat_EX');
		h.definekey('Subject_id','Result'); 
		h.definedone();
	end;
	set raw_data;
	where substr(Test_code,1,2) = "EX";
	if h.find() = 0 then output;
run;

/* Final dataset : dataset WANT*/
data want;
	if _n_ = 1 then do;
		declare hash h1 (dataset:'stat_EX2');
		h1.definekey('Subject_id'); 
		h1.definedone();
		declare hash h2 (dataset:'stat_IE2');
		h2.definekey('Subject_id'); 
		h2.definedone();
	end;

	set stat_EX2 stat_IE2;
	if h1.find() = 0 and h2.find() = 0 then output;
run;

Ksharp · Posted 01-03-2020 08:45 AM

Assuming the data has been sorted .

data raw_data;
input Subject_id $ Test_Code $ Result $;
datalines;
1001 IE01 Fail
1001 IE02 Pass
1001 IE03 Pass
1001 EX01 Pass
1001 EX01 Fail
1001 EX01 Fail
1003 IE01 Fail
1003 IE02 Fail
1003 EX01 Pass
1003 EX02 Pass
;

data want;
one=0;two=0;
 do until(last.Subject_id);
  set raw_data;
  by Subject_id;
  if Test_code =: 'IE' and Result='Fail' then one+1;
  if Test_code =: 'EX' and Result='Pass' then two+1;
 end;
 do until(last.Subject_id);
  set raw_data;
  by Subject_id;
  if one=1 and two=1 then do;
   if Test_code =: 'IE' and Result='Fail' or  Test_code =: 'EX' and Result='Pass' then output;
  end;
 end;
drop one two;
run;

Extract multiple criteria per subject

Re: Extract multiple criteria per subject

Re: Extract multiple criteria per subject

Re: Extract multiple criteria per subject

Re: Extract multiple criteria per subject

Re: Extract multiple criteria per subject

Re: Extract multiple criteria per subject

SAS Innovate 2025: Call for Content