Hello all,
I am applying proc univariate to find 60th percentile for each variable, i want to find those observations that have at least 3 values less than 60th percentile, how can i modify the dataset sub? appreciate for any help.
proc univariate data=mydata ;
VAR v1 v2 v3 v4 ;
output out=output1 pctlpts = 60
pctlpre = v1 v2 v3 v4
pctlname = _60 ;
run;
data sub;
if _n_ eq 1 then set output1;
set mydata;
if
v1=<v1_60 or v2=<v2_60 or v3=<v3_60 or v4=<v4_60 ;
run;
If V1 through V4 never have missing values then this should work:
data sub;
if _n_ eq 1 then set output1;
set mydata;
if (v1=<v1_60)+(v2=<v2_60)+(v3=<v3_60)+(v4=<v4_60)>=3;
run;
The expression (v1<=v1_60) generates a zero (false) or 1 (true). You want at least 3 trues.
Now if v1..v4 can have missing values, then:
data sub;
if _n_ eq 1 then set output1;
set mydata;
if (min(v1,v1_60)=v1)+(min(v2,v2_60)=v3)+(min(v3,v3_60)=v3)+(min(v4,v4_60)=v4) >=3;
run;
If V1 through V4 never have missing values then this should work:
data sub;
if _n_ eq 1 then set output1;
set mydata;
if (v1=<v1_60)+(v2=<v2_60)+(v3=<v3_60)+(v4=<v4_60)>=3;
run;
The expression (v1<=v1_60) generates a zero (false) or 1 (true). You want at least 3 trues.
Now if v1..v4 can have missing values, then:
data sub;
if _n_ eq 1 then set output1;
set mydata;
if (min(v1,v1_60)=v1)+(min(v2,v2_60)=v3)+(min(v3,v3_60)=v3)+(min(v4,v4_60)=v4) >=3;
run;
Bit of a drag but below will give you exactly three values lower the 60th percentile (SASHELP.CARS used):
data cars (keep= make invoice enginesize cylinders horsepower);
set sashelp.cars;
run;
proc univariate data=cars;
VAR invoice enginesize cylinders horsepower;
output out=output1 pctlpts = 60
pctlpre = invoice enginesize cylinders horsepower
pctlname = _60;
run;
data sub_invoice (keep=make invoice invoice_60)
sub_enginesize (keep=make enginesize enginesize_60)
sub_cylinders (keep=make cylinders cylinders_60)
sub_horsepower (keep=make horsepower horsepower_60);
if _n_ eq 1 then
set output1;
set cars;
if invoice < invoice_60 then
output sub_invoice;
if enginesize < enginesize_60 then
output sub_enginesize;
if cylinders < cylinders_60 then
output sub_cylinders;
if horsepower < horsepower_60 then
output sub_horsepower;
run;
%macro need(in=,out=,byvar=,idvar=);
proc sql outobs=3 nowarn;
create table &out. as
select distinct *
from &in.
order by &byvar. desc,&idvar.;
drop table &in.;
quit;
proc sort data=&out.;
by &idvar.;
run;
%mend;
%need(in=sub_invoice,out=invoice_out,byvar=invoice,idvar=make);
%need(in=sub_enginesize,out=enginesize_out,byvar=enginesize,idvar=make);
%need(in=sub_cylinders,out=cylinders_out,byvar=cylinders,idvar=make);
%need(in=sub_horsepower,out=horsepower_out,byvar=horsepower,idvar=make);
data final;
format make $25.;
set invoice_out enginesize_out cylinders_out horsepower_out;
run;
SAS Innovate 2025 is scheduled for May 6-9 in Orlando, FL. Sign up to be first to learn about the agenda and registration!
ANOVA, or Analysis Of Variance, is used to compare the averages or means of two or more populations to better understand how they differ. Watch this tutorial for more.
Find more tutorials on the SAS Users YouTube channel.