BookmarkSubscribeRSS Feed
dustychair
Pyrite | Level 9

Hi all,

I am running the attached code for grade 3 multiple-form data. After 3 forms I get a "disk space full" message and then I kill the job. Every time I run this code only for 3 forms to avoid such an error. However, I have 15 forms and 9 more grades. I think my code is producing many files each time. Is there a way to improve this code to save space?


libname IN "P:\PROJECTS\SS\2023\ST\INPUTS\1_IRT_FILES\";
%macro imp (sheet);
proc import out=&sheet.
datafile="P:\PROJECTS\SS\2023\ST\INPUTS\CUTS.xlsx"
dbms=xlsx replace;
sheet="&sheet.";

		run;
		%mend imp;
		%imp(SUM);
		%imp(CLM);
%macro inpt(d,g,f);
data const_sum_&d._&g._&f.;
set SUM;
if test="&d.&g.";
run;
%mend (inpt);
%inpt(MAT,03,1);

%macro map (d,g,t,f);
data maps_&d.&g._&t._&f.;
set in.ss23_irt_&d.&g.;
if find(UPCASE(Form_ID),"&t.");
run;
%mend (map);
%map (MAT,03, FHEB,1);
%map(MAT,03, FCEB,1);
%map(MAT,03, FHSB,1);
%map(MAT,03, FNEB,1);
%map(MAT,03, FOEB1,1);
%map(MAT,03, FOEB2,1);
%map(MAT,03, FOSB,1);
%map(MAT,03, FPEB,1);
%map(MAT,03,FPSB,1);
%map(MAT,03, FREB,1);
%map(MAT,03, FTEB1,1);
%map(MAT,03, FTEB2,1);
%map(MAT,03, FTSB,1);

%macro mat (d,g,t,f);

data input_&d._&g._&t._&f.;
set maps_&d.&g._&t._&f.;
run;

data irt_&d._&g._&t._&f.; 
set input_&d._&g._&t._&f.;
do theta=-15 to 15 by .0001; 
output; 
end;
run;


data resp_curve_&d._&g._&t._&f.;
	set irt_&d._&g._&t._&f.;

	weight=weight1;
	const=1.7;
if max_points=1 then do;
num1=exp((const*irt_A)*(theta-irt_B));
probability1=num1/(num1+1);
exp_scr_tt=probability1*1;
exp_scr_tt_w=probability1*1;
si2=1*probability1;
	end;

else if max_points=2 then do;
num0=(const*irt_A)*(theta-irt_B+irt_step1);
num1=(const*irt_A)*(theta-irt_B+irt_step2);
num2=(const*irt_A)*(theta-irt_B+irt_step3);
num_all=exp(num0)+ exp(num0+num1)+ exp(num0+num1+num2);

probability0=(exp(num0))/num_all;
probability1=(exp(num0+num1))/num_all;
probability2=(exp(num0+num1+num2))/num_all;
exp_scr0=probability0*0;
exp_scr1=probability1*1;
exp_scr2=probability2*2;
exp_scr_tt=	 exp_scr0+ exp_scr1+ exp_scr2;
exp_scr_tt_w=exp_scr0+ exp_scr1+ exp_scr2;
si2=1*probability1+4*probability2;
end;


else if max_points=3  then do;
num0=(const*irt_A)*(theta-irt_B+irt_step1);
num1=(const*irt_A)*(theta-irt_B+irt_step2);
num2=(const*irt_A)*(theta-irt_B+irt_step3);
num3=(const*irt_A)*(theta-irt_B+irt_step4);
num_all=exp(num0)+ exp(num0+num1)+ exp(num0+num1+num2)+ exp(num0+num1+num2+num3);

probability0=(exp(num0))/num_all;
probability1=(exp(num0+num1))/num_all;
probability2=(exp(num0+num1+num2))/num_all;
probability3=(exp(num0+num1+num2+num3))/num_all;
exp_scr0=probability0*0;
exp_scr1=probability1*1;
exp_scr2=probability2*2;
exp_scr3=probability3*3;
exp_scr_tt=	 exp_scr0+ exp_scr1+exp_scr2+exp_scr3;
exp_scr_tt_w=(exp_scr0+ exp_scr1+exp_scr2+exp_scr3)*weight;
si2=1*probability1+4*probability2+9*probability3;
end;

else if max_points=4  then do;
num0=(const*irt_A)*(theta-irt_B+irt_step1);
num1=(const*irt_A)*(theta-irt_B+irt_step2);
num2=(const*irt_A)*(theta-irt_B+irt_step3);
num3=(const*irt_A)*(theta-irt_B+irt_step4);
num4=(const*irt_A)*(theta-irt_B+irt_step5);
num_all=exp(num0)+ exp(num0+num1)+ exp(num0+num1+num2)+ exp(num0+num1+num2+num3)+exp(num0+num1+num2+num3+num4);

probability0=(exp(num0))/num_all;
probability1=(exp(num0+num1))/num_all;
probability2=(exp(num0+num1+num2))/num_all;
probability3=(exp(num0+num1+num2+num3))/num_all;
probability4=(exp(num0+num1+num2+num3+num4))/num_all;
exp_scr0=probability0*0;
exp_scr1=probability1*1;
exp_scr2=probability2*2;
exp_scr3=probability3*3;
exp_scr4=probability4*4;
exp_scr_tt=	 (exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4);
exp_scr_tt_w=	 (exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4)*weight;
si2=1*probability1+4*probability2+9*probability3+16*probability4;
end;


else if max_points=5 then do;
num0=(const*irt_A)*(theta-irt_B+irt_step1);
num1=(const*irt_A)*(theta-irt_B+irt_step2);
num2=(const*irt_A)*(theta-irt_B+irt_step3);
num3=(const*irt_A)*(theta-irt_B+irt_step4);
num4=(const*irt_A)*(theta-irt_B+irt_step5);
num5=(const*irt_A)*(theta-irt_B+irt_step6);
num_all=exp(num0)+ exp(num0+num1)+ exp(num0+num1+num2)+ exp(num0+num1+num2+num3)+exp(num0+num1+num2+num3+num4)+exp(num0+num1+num2+num3+num4+num5);

probability0=(exp(num0))/num_all;
probability1=(exp(num0+num1))/num_all;
probability2=(exp(num0+num1+num2))/num_all;
probability3=(exp(num0+num1+num2+num3))/num_all;
probability4=(exp(num0+num1+num2+num3+num4))/num_all;
probability5=(exp(num0+num1+num2+num3+num4+num5))/num_all;
exp_scr0=probability0*0;
exp_scr1=probability1*1;
exp_scr2=probability2*2;
exp_scr3=probability3*3;
exp_scr4=probability4*4;
exp_scr5=probability5*5;

exp_scr_tt=	 exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4+exp_scr5;
exp_scr_tt_w=	 exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4+exp_scr5;
si2=1*probability1+4*probability2+9*probability3+16*probability4+25*probability5;
end;

else if max_points=6 then do;
num0=(const*irt_A)*(theta-irt_B+irt_step1);
num1=(const*irt_A)*(theta-irt_B+irt_step2);
num2=(const*irt_A)*(theta-irt_B+irt_step3);
num3=(const*irt_A)*(theta-irt_B+irt_step4);
num4=(const*irt_A)*(theta-irt_B+irt_step5);
num5=(const*irt_A)*(theta-irt_B+irt_step6);
num6=(const*irt_A)*(theta-irt_B+irt_step7);

num_all=exp(num0)+ exp(num0+num1)+ exp(num0+num1+num2)+ 
exp(num0+num1+num2+num3)+exp(num0+num1+num2+num3+num4)+exp(num0+num1+num2+num3+num4+num5)+exp(num0+num1+num2+num3+num4+num5+num6);

probability0=(exp(num0))/num_all;
probability1=(exp(num0+num1))/num_all;
probability2=(exp(num0+num1+num2))/num_all;
probability3=(exp(num0+num1+num2+num3))/num_all;
probability4=(exp(num0+num1+num2+num3+num4))/num_all;
probability5=(exp(num0+num1+num2+num3+num4+num5))/num_all;
probability6=(exp(num0+num1+num2+num3+num4+num5+num6))/num_all;
exp_scr0=probability0*0;
exp_scr1=probability1*1;
exp_scr2=probability2*2;
exp_scr3=probability3*3;
exp_scr4=probability4*4;
exp_scr5=probability5*5;
exp_scr6=probability6*6;
exp_scr_tt=	 exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4+exp_scr5+exp_scr6;
exp_scr_tt_w=	 exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4+exp_scr5+exp_scr6;
si2=1*probability1+4*probability2+9*probability3+16*probability4+25*probability5+36*probability6;

end;
Li=((1.7*irt_A)**2)*(si2-(exp_scr_tt)**2);

	run;
proc sql;
	create table TCC_&d._&g._&t._&f. as 
	select distinct  theta, sum(exp_scr_tt_w) as TCC format 10.6, 
		sum(Li) as TIF format 10.6,  
		(calculated TIF)**-0.5 as Theta_CSEM format 10.6 
		from resp_curve_&d._&g._&t._&f.
	group by theta
	order by theta
	;
quit;


data newA_&d._&g._&t._&f.;
	length RawScore 8.;
	set tcc_&d._&g._&t._&f.; 
		RawScore=floor(tcc);
run;

data x_&d._&g._&t._&f.;
	set newA_&d._&g._&t._&f.;
	by rawscore;
	if first.rawscore;

run;

data y_&d._&g._&t._&f.;
	set newA_&d._&g._&t._&f. end=eof;
	by rawscore;
	if last.rawscore and eof;

	rawscore=rawscore+1;

run;

data csem_&d._&g._&t._&f.(keep=rawscore theta_csem);
	set x_&d._&g._&t._&f. y_&d._&g._&t._&f.;
run;

proc sql noprint;
	create table UpperBounda_&d._&g._&t._&f. as
   		select RawScore,  min(tcc) as UpperTCC, max(Theta_CSEM) as uppercsem from newA_&d._&g._&t._&f.
   		group by RawScore
	;

  	create table LowerBounda_&d._&g._&t._&f. as
    	select RawScore+1 as RawScore, max(tcc) as LowerTCC, max(Theta_CSEM) as LowerCSEM from newA_&d._&g._&t._&f.
   		group by RawScore
	;

  	create table Bounda_&d._&g._&t._&f. as  
   		select coalesce(a.RawScore, b.RawScore) as RawScore, UpperTCC, 
			LowerTCC, uppercsem, lowercsem, c.Theta as UpperTheta, d.Theta as LowerTheta
   		from UpperBounda_&d._&g._&t._&f. as a full join LowerBounda_&d._&g._&t._&f. as b
   		on a.RawScore=b.RawScore
                        left join newa_&d._&g._&t._&f. as c			
  		on a.UpperTCC=c.tcc
                        left join newa_&d._&g._&t._&f. as d					
   		on b.LowerTCC=d.tcc
                        left join newa_&d._&g._&t._&f. as e			
   		on a.UpperTCC=e.tcc
                        left join newa_&d._&g._&t._&f. as f					
   		on b.LowerTCC=f.tcc
	;

quit;

data newB_&d._&g._&t._&f.;
	merge bounda_&d._&g._&t._&f. csem_&d._&g._&t._&f. ;
	by rawscore;

 	if RawScore ne UpperTCC and RawScore ne LowerTCC then do;
		if not missing(UpperTheta) and not missing(LowerTheta) then do;
			m=(UpperTheta-LowerTheta)/(UpperTCC-LowerTCC);
		   	Theta=m*(RawScore-LowerTCC)+LowerTheta;
		end;
  		else do;
  			Theta=max(UpperTheta,LowerTheta);
  		end;

 	end;

	if RawScore=UpperTCC then Theta=UpperTCC;
	if RawScore=LowerTCC then Theta=LowerTCC;
/*	set x;*/

data Const_sum_&d._&g._&f. ;
set Const_sum_&d._&g._&f.;
do rawscore=0 to 60;
output;
end;

data newB2_&d._&g._&t._&f.;
set newB_&d._&g._&t._&f.  Const_sum_&d._&g._&f.;
by ;
run;

proc sql;
create table final_&d._&g._&t._&f. as 
select distinct a.*,b.SA2016,b.SB2016,b.L2,b.L3,b.L4,b.L5
from  newB_&d._&g._&t._&f. a left join  Const_sum_&d._&g._&f.  b
on a.rawscore  =b.rawscore;
quit;

data final_&d._&g._&t._&f.;
set final_&d._&g._&t._&f.;
scale_score=round((theta*SA2016)+SB2016,1.);
scale_score_ur=((theta*SA2016)+SB2016);
	csem_ss=round(theta_csem*SA2016,0.1);
	csem_ts=round(theta_csem,0.01);
	theta_n=round(theta,0.01);
	theta_csem_n=round(theta_csem,0.01);
	if theta < L2 then pl="Level 1";
	else if L2 <= theta < L3 then pl="Level 2";
	else if L3 <= theta < L4 then pl="Level 3";
	else if L4 <= theta < L5 then pl="Level 4";
	else if L5 <= theta   then pl="Level 5";
run;
data xy_&d._&g._&t._&f.;
set final_&d._&g._&t._&f. ;

			if scale_score ge 650 and scale_score le 850;
		run;

		data _null_;
			set xy_&d._&g._&t._&f.  end=eof;
				if _N_=1 and csem_ss lt 20 then call symput("loss_csem",trim(csem_ss));
				else if _N_=1 and csem_ss gt 20 then call symput("loss_csem","20");

				if eof and csem_ss lt 20 then call symput("hoss_csem",trim(csem_ss));
				else if eof and csem_ss gt 20 then call symput("hoss_csem","20");
				run;

	data final_&d._&g._v2_&t._&f.;
	set  final_&d._&g._&t._&f.;
					if scale_score lt 650 then do;
				scale_score=650;
				csem_ss=&loss_csem.;
			end;
			else if scale_score gt 850 then do;
				scale_score=850;
				csem_ss=&hoss_csem.;
			end;

			if csem_ss gt 20 then csem_ss=20;

			
keep rawscore j SA2016 SB2016 theta pl scale_score csem_ss csem_ts theta_csem theta_csem_n theta_n;
run;

	
proc export
	data=final_&d._&g._v2_&t._&f.
	file="P:\Cluster\SS\ST\2023\MAT\OUTPUT\G&g.\final_&d._&g._v2_&t._&f..xlsx"
	dbms=xlsx replace;
	run;
	 
	 
%mend (mat);
%mat (MAT,03, FHEB,1);
%mat(MAT,03, FCEB,1);
%mat(MAT,03, FHSB,1);
%mat(MAT,03, FNEB,1);
%mat(MAT,03, FOEB1,1);
%mat(MAT,03, FOEB2,1);
%mat(MAT,03, FOSB,1);
%mat(MAT,03, FPEB,1);
%mat(MAT,03,FPSB,1);
%mat(MAT,03, FREB,1);
%mat(MAT,03, FTEB1,1);
%mat(MAT,03, FTEB2,1);
%mat(MAT,03, FTSB,1);

 

 

Thanks! 

3 REPLIES 3
Patrick
Opal | Level 21

On top of using Proc Delete to remove no more required intermediary tables what really increases your data volumes 300001 fold is below data step:

data irt_&d._&g._&t._&f.;
  set input_&d._&g._&t._&f.;
  do theta=-15 to 15 by .0001;
    output;
  end;
run;

After this you've got the next data step that does some calculations and populates some new variables. ...And then you sum these new variables by theta using Proc SQL.

  proc sql;
    create table TCC_&d._&g._&t._&f. as 
      select distinct  theta, sum(exp_scr_tt_w) as TCC format 10.6, 
        sum(Li) as TIF format 10.6,  
        (calculated TIF)**-0.5 as Theta_CSEM format 10.6 
      from resp_curve_&d._&g._&t._&f.
        group by theta
          order by theta
    ;
  quit;

 

You can keep your data volumes much much lower and increase performance significantly if you combine the looping over theta creating the sums into a single data step. 

Taking a minimum change approach to your current code below what I suggest.

 

1. Remove this code

Patrick_0-1701847605793.png

2. Replace the next data step that creates table resp_curve_&d._&g._&t._&f. with the following code (untested)

Spoiler
  data TCC_&d._&g._&t._&f.;
    set maps_&d.&g._&t._&f.;
    weight=weight1;
    const=1.7;

    do theta=-15 to 15 by .0001;
      do _point=1 to _nobs;
        set set irt_&d._&g._&t._&f. nobs=_nobs point=_point;

        if max_points=1 then
          do;
            num1=exp((const*irt_A)*(theta-irt_B));
            probability1=num1/(num1+1);
            exp_scr_tt=probability1*1;
            exp_scr_tt_w=probability1*1;
            si2=1*probability1;
          end;
        else if max_points=2 then
          do;
            num0=(const*irt_A)*(theta-irt_B+irt_step1);
            num1=(const*irt_A)*(theta-irt_B+irt_step2);
            num2=(const*irt_A)*(theta-irt_B+irt_step3);
            num_all=exp(num0)+ exp(num0+num1)+ exp(num0+num1+num2);
            probability0=(exp(num0))/num_all;
            probability1=(exp(num0+num1))/num_all;
            probability2=(exp(num0+num1+num2))/num_all;
            exp_scr0=probability0*0;
            exp_scr1=probability1*1;
            exp_scr2=probability2*2;
            exp_scr_tt=  exp_scr0+ exp_scr1+ exp_scr2;
            exp_scr_tt_w=exp_scr0+ exp_scr1+ exp_scr2;
            si2=1*probability1+4*probability2;
          end;
        else if max_points=3  then
          do;
            num0=(const*irt_A)*(theta-irt_B+irt_step1);
            num1=(const*irt_A)*(theta-irt_B+irt_step2);
            num2=(const*irt_A)*(theta-irt_B+irt_step3);
            num3=(const*irt_A)*(theta-irt_B+irt_step4);
            num_all=exp(num0)+ exp(num0+num1)+ exp(num0+num1+num2)+ exp(num0+num1+num2+num3);
            probability0=(exp(num0))/num_all;
            probability1=(exp(num0+num1))/num_all;
            probability2=(exp(num0+num1+num2))/num_all;
            probability3=(exp(num0+num1+num2+num3))/num_all;
            exp_scr0=probability0*0;
            exp_scr1=probability1*1;
            exp_scr2=probability2*2;
            exp_scr3=probability3*3;
            exp_scr_tt=  exp_scr0+ exp_scr1+exp_scr2+exp_scr3;
            exp_scr_tt_w=(exp_scr0+ exp_scr1+exp_scr2+exp_scr3)*weight;
            si2=1*probability1+4*probability2+9*probability3;
          end;
        else if max_points=4  then
          do;
            num0=(const*irt_A)*(theta-irt_B+irt_step1);
            num1=(const*irt_A)*(theta-irt_B+irt_step2);
            num2=(const*irt_A)*(theta-irt_B+irt_step3);
            num3=(const*irt_A)*(theta-irt_B+irt_step4);
            num4=(const*irt_A)*(theta-irt_B+irt_step5);
            num_all=exp(num0)+ exp(num0+num1)+ exp(num0+num1+num2)+ exp(num0+num1+num2+num3)+exp(num0+num1+num2+num3+num4);
            probability0=(exp(num0))/num_all;
            probability1=(exp(num0+num1))/num_all;
            probability2=(exp(num0+num1+num2))/num_all;
            probability3=(exp(num0+num1+num2+num3))/num_all;
            probability4=(exp(num0+num1+num2+num3+num4))/num_all;
            exp_scr0=probability0*0;
            exp_scr1=probability1*1;
            exp_scr2=probability2*2;
            exp_scr3=probability3*3;
            exp_scr4=probability4*4;
            exp_scr_tt=  (exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4);
            exp_scr_tt_w=  (exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4)*weight;
            si2=1*probability1+4*probability2+9*probability3+16*probability4;
          end;
        else if max_points=5 then
          do;
            num0=(const*irt_A)*(theta-irt_B+irt_step1);
            num1=(const*irt_A)*(theta-irt_B+irt_step2);
            num2=(const*irt_A)*(theta-irt_B+irt_step3);
            num3=(const*irt_A)*(theta-irt_B+irt_step4);
            num4=(const*irt_A)*(theta-irt_B+irt_step5);
            num5=(const*irt_A)*(theta-irt_B+irt_step6);
            num_all=exp(num0)+ exp(num0+num1)+ exp(num0+num1+num2)+ exp(num0+num1+num2+num3)+exp(num0+num1+num2+num3+num4)+exp(num0+num1+num2+num3+num4+num5);
            probability0=(exp(num0))/num_all;
            probability1=(exp(num0+num1))/num_all;
            probability2=(exp(num0+num1+num2))/num_all;
            probability3=(exp(num0+num1+num2+num3))/num_all;
            probability4=(exp(num0+num1+num2+num3+num4))/num_all;
            probability5=(exp(num0+num1+num2+num3+num4+num5))/num_all;
            exp_scr0=probability0*0;
            exp_scr1=probability1*1;
            exp_scr2=probability2*2;
            exp_scr3=probability3*3;
            exp_scr4=probability4*4;
            exp_scr5=probability5*5;
            exp_scr_tt=  exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4+exp_scr5;
            exp_scr_tt_w=  exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4+exp_scr5;
            si2=1*probability1+4*probability2+9*probability3+16*probability4+25*probability5;
          end;
        else if max_points=6 then
          do;
            num0=(const*irt_A)*(theta-irt_B+irt_step1);
            num1=(const*irt_A)*(theta-irt_B+irt_step2);
            num2=(const*irt_A)*(theta-irt_B+irt_step3);
            num3=(const*irt_A)*(theta-irt_B+irt_step4);
            num4=(const*irt_A)*(theta-irt_B+irt_step5);
            num5=(const*irt_A)*(theta-irt_B+irt_step6);
            num6=(const*irt_A)*(theta-irt_B+irt_step7);
            num_all=exp(num0)+ exp(num0+num1)+ exp(num0+num1+num2)+ 
              exp(num0+num1+num2+num3)+exp(num0+num1+num2+num3+num4)+exp(num0+num1+num2+num3+num4+num5)+exp(num0+num1+num2+num3+num4+num5+num6);
            probability0=(exp(num0))/num_all;
            probability1=(exp(num0+num1))/num_all;
            probability2=(exp(num0+num1+num2))/num_all;
            probability3=(exp(num0+num1+num2+num3))/num_all;
            probability4=(exp(num0+num1+num2+num3+num4))/num_all;
            probability5=(exp(num0+num1+num2+num3+num4+num5))/num_all;
            probability6=(exp(num0+num1+num2+num3+num4+num5+num6))/num_all;
            exp_scr0=probability0*0;
            exp_scr1=probability1*1;
            exp_scr2=probability2*2;
            exp_scr3=probability3*3;
            exp_scr4=probability4*4;
            exp_scr5=probability5*5;
            exp_scr6=probability6*6;
            exp_scr_tt=  exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4+exp_scr5+exp_scr6;
            exp_scr_tt_w=  exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4+exp_scr5+exp_scr6;
            si2=1*probability1+4*probability2+9*probability3+16*probability4+25*probability5+36*probability6;
          end;

        Li=((1.7*irt_A)**2)*(si2-(exp_scr_tt)**2);

        TCC + exp_scr_tt_w;
        TIF + Li;

      end;

      Theta_CSEM=TIF**-0.5;
      output;

      call missing(Li, TCC, TIF, si2, of num:, of probability:, of exp_scr:);
      keep theta TCC TIF Theta_CSEM;
      format TCC TIF Theta_CSEM format 10.6;
    end;
    stop;
  run;

3. Remove the SQL as summing is now already covered with above data step. And because the data step loop over theta also the sort order will be the same.

Patrick_1-1701847837464.png

 

There would be many more opportunities to streamline your code but above change will make by far the biggest impact in regards of data volumes and performance.

 

 

 

 

Patrick
Opal | Level 21

@dustychair To add to my previous post looking at the next few data steps makes me a bit question how sound your logic is (sorry to be blunt).

 

This step reads the result from the summing data step (or in your original code from the SQL that does the summing) and you do nothing else than create a new variable RawScore that's based on the calculated var TCC. Your source table will have one row per distinct value of theta.

  data newA_&d._&g._&t._&f.;
    length RawScore 8.;
    set tcc_&d._&g._&t._&f.;
    RawScore=floor(tcc);
  run;

 

In the next two steps you then set the newly created table by rawscore and select the max and min values - and for the max value only the very last row.

If this really works meaning the data is pre-sorted by rawscore and given how TCC gets calculated then what this means is that you wouldn't need to loop over Theta in such an extend. The only thing you would need to do for the same outcome is to loop over theta in this way: do theta=-15 by .0001 and you stop as soon as floor(TCC) changes, and then a 2nd loop theta=15 by -.0001 and again stop as soon as floor(tcc) changes. ...and then you'd likely only loop a few times over your source data instead of 300001 times.

  data x_&d._&g._&t._&f.;
    set newA_&d._&g._&t._&f.;
    by rawscore;

    if first.rawscore;
  run;

  data y_&d._&g._&t._&f.;
    set newA_&d._&g._&t._&f. end=eof;
    by rawscore;

    if last.rawscore and eof;
    rawscore=rawscore+1;
  run;

 You ideally first develop clean code that works for a single data source and only then add all the macro logic.

 

SAS Innovate 2025: Call for Content

Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!

Submit your idea!

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

Click image to register for webinarClick image to register for webinar

Classroom Training Available!

Select SAS Training centers are offering in-person courses. View upcoming courses for:

View all other training opportunities.

Discussion stats
  • 3 replies
  • 606 views
  • 5 likes
  • 3 in conversation