Disk space full

dustychair · Posted 12-05-2023 11:31 PM

Hi all,

I am running the attached code for grade 3 multiple-form data. After 3 forms I get a "disk space full" message and then I kill the job. Every time I run this code only for 3 forms to avoid such an error. However, I have 15 forms and 9 more grades. I think my code is producing many files each time. Is there a way to improve this code to save space?


libname IN "P:\PROJECTS\SS\2023\ST\INPUTS\1_IRT_FILES\";
%macro imp (sheet);
proc import out=&sheet.
datafile="P:\PROJECTS\SS\2023\ST\INPUTS\CUTS.xlsx"
dbms=xlsx replace;
sheet="&sheet.";

		run;
		%mend imp;
		%imp(SUM);
		%imp(CLM);
%macro inpt(d,g,f);
data const_sum_&d._&g._&f.;
set SUM;
if test="&d.&g.";
run;
%mend (inpt);
%inpt(MAT,03,1);

%macro map (d,g,t,f);
data maps_&d.&g._&t._&f.;
set in.ss23_irt_&d.&g.;
if find(UPCASE(Form_ID),"&t.");
run;
%mend (map);
%map (MAT,03, FHEB,1);
%map(MAT,03, FCEB,1);
%map(MAT,03, FHSB,1);
%map(MAT,03, FNEB,1);
%map(MAT,03, FOEB1,1);
%map(MAT,03, FOEB2,1);
%map(MAT,03, FOSB,1);
%map(MAT,03, FPEB,1);
%map(MAT,03,FPSB,1);
%map(MAT,03, FREB,1);
%map(MAT,03, FTEB1,1);
%map(MAT,03, FTEB2,1);
%map(MAT,03, FTSB,1);

%macro mat (d,g,t,f);

data input_&d._&g._&t._&f.;
set maps_&d.&g._&t._&f.;
run;

data irt_&d._&g._&t._&f.; 
set input_&d._&g._&t._&f.;
do theta=-15 to 15 by .0001; 
output; 
end;
run;


data resp_curve_&d._&g._&t._&f.;
	set irt_&d._&g._&t._&f.;

	weight=weight1;
	const=1.7;
if max_points=1 then do;
num1=exp((const*irt_A)*(theta-irt_B));
probability1=num1/(num1+1);
exp_scr_tt=probability1*1;
exp_scr_tt_w=probability1*1;
si2=1*probability1;
	end;

else if max_points=2 then do;
num0=(const*irt_A)*(theta-irt_B+irt_step1);
num1=(const*irt_A)*(theta-irt_B+irt_step2);
num2=(const*irt_A)*(theta-irt_B+irt_step3);
num_all=exp(num0)+ exp(num0+num1)+ exp(num0+num1+num2);

probability0=(exp(num0))/num_all;
probability1=(exp(num0+num1))/num_all;
probability2=(exp(num0+num1+num2))/num_all;
exp_scr0=probability0*0;
exp_scr1=probability1*1;
exp_scr2=probability2*2;
exp_scr_tt=	 exp_scr0+ exp_scr1+ exp_scr2;
exp_scr_tt_w=exp_scr0+ exp_scr1+ exp_scr2;
si2=1*probability1+4*probability2;
end;


else if max_points=3  then do;
num0=(const*irt_A)*(theta-irt_B+irt_step1);
num1=(const*irt_A)*(theta-irt_B+irt_step2);
num2=(const*irt_A)*(theta-irt_B+irt_step3);
num3=(const*irt_A)*(theta-irt_B+irt_step4);
num_all=exp(num0)+ exp(num0+num1)+ exp(num0+num1+num2)+ exp(num0+num1+num2+num3);

probability0=(exp(num0))/num_all;
probability1=(exp(num0+num1))/num_all;
probability2=(exp(num0+num1+num2))/num_all;
probability3=(exp(num0+num1+num2+num3))/num_all;
exp_scr0=probability0*0;
exp_scr1=probability1*1;
exp_scr2=probability2*2;
exp_scr3=probability3*3;
exp_scr_tt=	 exp_scr0+ exp_scr1+exp_scr2+exp_scr3;
exp_scr_tt_w=(exp_scr0+ exp_scr1+exp_scr2+exp_scr3)*weight;
si2=1*probability1+4*probability2+9*probability3;
end;

else if max_points=4  then do;
num0=(const*irt_A)*(theta-irt_B+irt_step1);
num1=(const*irt_A)*(theta-irt_B+irt_step2);
num2=(const*irt_A)*(theta-irt_B+irt_step3);
num3=(const*irt_A)*(theta-irt_B+irt_step4);
num4=(const*irt_A)*(theta-irt_B+irt_step5);
num_all=exp(num0)+ exp(num0+num1)+ exp(num0+num1+num2)+ exp(num0+num1+num2+num3)+exp(num0+num1+num2+num3+num4);

probability0=(exp(num0))/num_all;
probability1=(exp(num0+num1))/num_all;
probability2=(exp(num0+num1+num2))/num_all;
probability3=(exp(num0+num1+num2+num3))/num_all;
probability4=(exp(num0+num1+num2+num3+num4))/num_all;
exp_scr0=probability0*0;
exp_scr1=probability1*1;
exp_scr2=probability2*2;
exp_scr3=probability3*3;
exp_scr4=probability4*4;
exp_scr_tt=	 (exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4);
exp_scr_tt_w=	 (exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4)*weight;
si2=1*probability1+4*probability2+9*probability3+16*probability4;
end;


else if max_points=5 then do;
num0=(const*irt_A)*(theta-irt_B+irt_step1);
num1=(const*irt_A)*(theta-irt_B+irt_step2);
num2=(const*irt_A)*(theta-irt_B+irt_step3);
num3=(const*irt_A)*(theta-irt_B+irt_step4);
num4=(const*irt_A)*(theta-irt_B+irt_step5);
num5=(const*irt_A)*(theta-irt_B+irt_step6);
num_all=exp(num0)+ exp(num0+num1)+ exp(num0+num1+num2)+ exp(num0+num1+num2+num3)+exp(num0+num1+num2+num3+num4)+exp(num0+num1+num2+num3+num4+num5);

probability0=(exp(num0))/num_all;
probability1=(exp(num0+num1))/num_all;
probability2=(exp(num0+num1+num2))/num_all;
probability3=(exp(num0+num1+num2+num3))/num_all;
probability4=(exp(num0+num1+num2+num3+num4))/num_all;
probability5=(exp(num0+num1+num2+num3+num4+num5))/num_all;
exp_scr0=probability0*0;
exp_scr1=probability1*1;
exp_scr2=probability2*2;
exp_scr3=probability3*3;
exp_scr4=probability4*4;
exp_scr5=probability5*5;

exp_scr_tt=	 exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4+exp_scr5;
exp_scr_tt_w=	 exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4+exp_scr5;
si2=1*probability1+4*probability2+9*probability3+16*probability4+25*probability5;
end;

else if max_points=6 then do;
num0=(const*irt_A)*(theta-irt_B+irt_step1);
num1=(const*irt_A)*(theta-irt_B+irt_step2);
num2=(const*irt_A)*(theta-irt_B+irt_step3);
num3=(const*irt_A)*(theta-irt_B+irt_step4);
num4=(const*irt_A)*(theta-irt_B+irt_step5);
num5=(const*irt_A)*(theta-irt_B+irt_step6);
num6=(const*irt_A)*(theta-irt_B+irt_step7);

num_all=exp(num0)+ exp(num0+num1)+ exp(num0+num1+num2)+ 
exp(num0+num1+num2+num3)+exp(num0+num1+num2+num3+num4)+exp(num0+num1+num2+num3+num4+num5)+exp(num0+num1+num2+num3+num4+num5+num6);

probability0=(exp(num0))/num_all;
probability1=(exp(num0+num1))/num_all;
probability2=(exp(num0+num1+num2))/num_all;
probability3=(exp(num0+num1+num2+num3))/num_all;
probability4=(exp(num0+num1+num2+num3+num4))/num_all;
probability5=(exp(num0+num1+num2+num3+num4+num5))/num_all;
probability6=(exp(num0+num1+num2+num3+num4+num5+num6))/num_all;
exp_scr0=probability0*0;
exp_scr1=probability1*1;
exp_scr2=probability2*2;
exp_scr3=probability3*3;
exp_scr4=probability4*4;
exp_scr5=probability5*5;
exp_scr6=probability6*6;
exp_scr_tt=	 exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4+exp_scr5+exp_scr6;
exp_scr_tt_w=	 exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4+exp_scr5+exp_scr6;
si2=1*probability1+4*probability2+9*probability3+16*probability4+25*probability5+36*probability6;

end;
Li=((1.7*irt_A)**2)*(si2-(exp_scr_tt)**2);

	run;
proc sql;
	create table TCC_&d._&g._&t._&f. as 
	select distinct  theta, sum(exp_scr_tt_w) as TCC format 10.6, 
		sum(Li) as TIF format 10.6,  
		(calculated TIF)**-0.5 as Theta_CSEM format 10.6 
		from resp_curve_&d._&g._&t._&f.
	group by theta
	order by theta
	;
quit;


data newA_&d._&g._&t._&f.;
	length RawScore 8.;
	set tcc_&d._&g._&t._&f.; 
		RawScore=floor(tcc);
run;

data x_&d._&g._&t._&f.;
	set newA_&d._&g._&t._&f.;
	by rawscore;
	if first.rawscore;

run;

data y_&d._&g._&t._&f.;
	set newA_&d._&g._&t._&f. end=eof;
	by rawscore;
	if last.rawscore and eof;

	rawscore=rawscore+1;

run;

data csem_&d._&g._&t._&f.(keep=rawscore theta_csem);
	set x_&d._&g._&t._&f. y_&d._&g._&t._&f.;
run;

proc sql noprint;
	create table UpperBounda_&d._&g._&t._&f. as
   		select RawScore,  min(tcc) as UpperTCC, max(Theta_CSEM) as uppercsem from newA_&d._&g._&t._&f.
   		group by RawScore
	;

  	create table LowerBounda_&d._&g._&t._&f. as
    	select RawScore+1 as RawScore, max(tcc) as LowerTCC, max(Theta_CSEM) as LowerCSEM from newA_&d._&g._&t._&f.
   		group by RawScore
	;

  	create table Bounda_&d._&g._&t._&f. as  
   		select coalesce(a.RawScore, b.RawScore) as RawScore, UpperTCC, 
			LowerTCC, uppercsem, lowercsem, c.Theta as UpperTheta, d.Theta as LowerTheta
   		from UpperBounda_&d._&g._&t._&f. as a full join LowerBounda_&d._&g._&t._&f. as b
   		on a.RawScore=b.RawScore
                        left join newa_&d._&g._&t._&f. as c			
  		on a.UpperTCC=c.tcc
                        left join newa_&d._&g._&t._&f. as d					
   		on b.LowerTCC=d.tcc
                        left join newa_&d._&g._&t._&f. as e			
   		on a.UpperTCC=e.tcc
                        left join newa_&d._&g._&t._&f. as f					
   		on b.LowerTCC=f.tcc
	;

quit;

data newB_&d._&g._&t._&f.;
	merge bounda_&d._&g._&t._&f. csem_&d._&g._&t._&f. ;
	by rawscore;

 	if RawScore ne UpperTCC and RawScore ne LowerTCC then do;
		if not missing(UpperTheta) and not missing(LowerTheta) then do;
			m=(UpperTheta-LowerTheta)/(UpperTCC-LowerTCC);
		   	Theta=m*(RawScore-LowerTCC)+LowerTheta;
		end;
  		else do;
  			Theta=max(UpperTheta,LowerTheta);
  		end;

 	end;

	if RawScore=UpperTCC then Theta=UpperTCC;
	if RawScore=LowerTCC then Theta=LowerTCC;
/*	set x;*/

data Const_sum_&d._&g._&f. ;
set Const_sum_&d._&g._&f.;
do rawscore=0 to 60;
output;
end;

data newB2_&d._&g._&t._&f.;
set newB_&d._&g._&t._&f.  Const_sum_&d._&g._&f.;
by ;
run;

proc sql;
create table final_&d._&g._&t._&f. as 
select distinct a.*,b.SA2016,b.SB2016,b.L2,b.L3,b.L4,b.L5
from  newB_&d._&g._&t._&f. a left join  Const_sum_&d._&g._&f.  b
on a.rawscore  =b.rawscore;
quit;

data final_&d._&g._&t._&f.;
set final_&d._&g._&t._&f.;
scale_score=round((theta*SA2016)+SB2016,1.);
scale_score_ur=((theta*SA2016)+SB2016);
	csem_ss=round(theta_csem*SA2016,0.1);
	csem_ts=round(theta_csem,0.01);
	theta_n=round(theta,0.01);
	theta_csem_n=round(theta_csem,0.01);
	if theta < L2 then pl="Level 1";
	else if L2 <= theta < L3 then pl="Level 2";
	else if L3 <= theta < L4 then pl="Level 3";
	else if L4 <= theta < L5 then pl="Level 4";
	else if L5 <= theta   then pl="Level 5";
run;
data xy_&d._&g._&t._&f.;
set final_&d._&g._&t._&f. ;

			if scale_score ge 650 and scale_score le 850;
		run;

		data _null_;
			set xy_&d._&g._&t._&f.  end=eof;
				if _N_=1 and csem_ss lt 20 then call symput("loss_csem",trim(csem_ss));
				else if _N_=1 and csem_ss gt 20 then call symput("loss_csem","20");

				if eof and csem_ss lt 20 then call symput("hoss_csem",trim(csem_ss));
				else if eof and csem_ss gt 20 then call symput("hoss_csem","20");
				run;

	data final_&d._&g._v2_&t._&f.;
	set  final_&d._&g._&t._&f.;
					if scale_score lt 650 then do;
				scale_score=650;
				csem_ss=&loss_csem.;
			end;
			else if scale_score gt 850 then do;
				scale_score=850;
				csem_ss=&hoss_csem.;
			end;

			if csem_ss gt 20 then csem_ss=20;

			
keep rawscore j SA2016 SB2016 theta pl scale_score csem_ss csem_ts theta_csem theta_csem_n theta_n;
run;

	
proc export
	data=final_&d._&g._v2_&t._&f.
	file="P:\Cluster\SS\ST\2023\MAT\OUTPUT\G&g.\final_&d._&g._v2_&t._&f..xlsx"
	dbms=xlsx replace;
	run;
	 
	 
%mend (mat);
%mat (MAT,03, FHEB,1);
%mat(MAT,03, FCEB,1);
%mat(MAT,03, FHSB,1);
%mat(MAT,03, FNEB,1);
%mat(MAT,03, FOEB1,1);
%mat(MAT,03, FOEB2,1);
%mat(MAT,03, FOSB,1);
%mat(MAT,03, FPEB,1);
%mat(MAT,03,FPSB,1);
%mat(MAT,03, FREB,1);
%mat(MAT,03, FTEB1,1);
%mat(MAT,03, FTEB2,1);
%mat(MAT,03, FTSB,1);

Thanks!

Kurt_Bremser · Posted 12-06-2023 01:40 AM

Use PROC DELETE to remove intermediate datasets you do not need any longer.

Maxims of Maximally Efficient SAS Programmers
How to convert datasets to data steps
The macro for direct download as ZIP
How to post code
Please vote for Provide Sequential Search Capability for Hash Objects
How to deal with locked files on UNIX

Patrick · Posted 12-06-2023 02:31 AM

On top of using Proc Delete to remove no more required intermediary tables what really increases your data volumes 300001 fold is below data step:

data irt_&d._&g._&t._&f.;
  set input_&d._&g._&t._&f.;
  do theta=-15 to 15 by .0001;
    output;
  end;
run;

After this you've got the next data step that does some calculations and populates some new variables. ...And then you sum these new variables by theta using Proc SQL.

  proc sql;
    create table TCC_&d._&g._&t._&f. as 
      select distinct  theta, sum(exp_scr_tt_w) as TCC format 10.6, 
        sum(Li) as TIF format 10.6,  
        (calculated TIF)**-0.5 as Theta_CSEM format 10.6 
      from resp_curve_&d._&g._&t._&f.
        group by theta
          order by theta
    ;
  quit;

You can keep your data volumes much much lower and increase performance significantly if you combine the looping over theta creating the sums into a single data step.

Taking a minimum change approach to your current code below what I suggest.

1. Remove this code

2. Replace the next data step that creates table resp_curve_&d._&g._&t._&f. with the following code (untested)

Spoiler

  data TCC_&d._&g._&t._&f.;
    set maps_&d.&g._&t._&f.;
    weight=weight1;
    const=1.7;

    do theta=-15 to 15 by .0001;
      do _point=1 to _nobs;
        set set irt_&d._&g._&t._&f. nobs=_nobs point=_point;

        if max_points=1 then
          do;
            num1=exp((const*irt_A)*(theta-irt_B));
            probability1=num1/(num1+1);
            exp_scr_tt=probability1*1;
            exp_scr_tt_w=probability1*1;
            si2=1*probability1;
          end;
        else if max_points=2 then
          do;
            num0=(const*irt_A)*(theta-irt_B+irt_step1);
            num1=(const*irt_A)*(theta-irt_B+irt_step2);
            num2=(const*irt_A)*(theta-irt_B+irt_step3);
            num_all=exp(num0)+ exp(num0+num1)+ exp(num0+num1+num2);
            probability0=(exp(num0))/num_all;
            probability1=(exp(num0+num1))/num_all;
            probability2=(exp(num0+num1+num2))/num_all;
            exp_scr0=probability0*0;
            exp_scr1=probability1*1;
            exp_scr2=probability2*2;
            exp_scr_tt=  exp_scr0+ exp_scr1+ exp_scr2;
            exp_scr_tt_w=exp_scr0+ exp_scr1+ exp_scr2;
            si2=1*probability1+4*probability2;
          end;
        else if max_points=3  then
          do;
            num0=(const*irt_A)*(theta-irt_B+irt_step1);
            num1=(const*irt_A)*(theta-irt_B+irt_step2);
            num2=(const*irt_A)*(theta-irt_B+irt_step3);
            num3=(const*irt_A)*(theta-irt_B+irt_step4);
            num_all=exp(num0)+ exp(num0+num1)+ exp(num0+num1+num2)+ exp(num0+num1+num2+num3);
            probability0=(exp(num0))/num_all;
            probability1=(exp(num0+num1))/num_all;
            probability2=(exp(num0+num1+num2))/num_all;
            probability3=(exp(num0+num1+num2+num3))/num_all;
            exp_scr0=probability0*0;
            exp_scr1=probability1*1;
            exp_scr2=probability2*2;
            exp_scr3=probability3*3;
            exp_scr_tt=  exp_scr0+ exp_scr1+exp_scr2+exp_scr3;
            exp_scr_tt_w=(exp_scr0+ exp_scr1+exp_scr2+exp_scr3)*weight;
            si2=1*probability1+4*probability2+9*probability3;
          end;
        else if max_points=4  then
          do;
            num0=(const*irt_A)*(theta-irt_B+irt_step1);
            num1=(const*irt_A)*(theta-irt_B+irt_step2);
            num2=(const*irt_A)*(theta-irt_B+irt_step3);
            num3=(const*irt_A)*(theta-irt_B+irt_step4);
            num4=(const*irt_A)*(theta-irt_B+irt_step5);
            num_all=exp(num0)+ exp(num0+num1)+ exp(num0+num1+num2)+ exp(num0+num1+num2+num3)+exp(num0+num1+num2+num3+num4);
            probability0=(exp(num0))/num_all;
            probability1=(exp(num0+num1))/num_all;
            probability2=(exp(num0+num1+num2))/num_all;
            probability3=(exp(num0+num1+num2+num3))/num_all;
            probability4=(exp(num0+num1+num2+num3+num4))/num_all;
            exp_scr0=probability0*0;
            exp_scr1=probability1*1;
            exp_scr2=probability2*2;
            exp_scr3=probability3*3;
            exp_scr4=probability4*4;
            exp_scr_tt=  (exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4);
            exp_scr_tt_w=  (exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4)*weight;
            si2=1*probability1+4*probability2+9*probability3+16*probability4;
          end;
        else if max_points=5 then
          do;
            num0=(const*irt_A)*(theta-irt_B+irt_step1);
            num1=(const*irt_A)*(theta-irt_B+irt_step2);
            num2=(const*irt_A)*(theta-irt_B+irt_step3);
            num3=(const*irt_A)*(theta-irt_B+irt_step4);
            num4=(const*irt_A)*(theta-irt_B+irt_step5);
            num5=(const*irt_A)*(theta-irt_B+irt_step6);
            num_all=exp(num0)+ exp(num0+num1)+ exp(num0+num1+num2)+ exp(num0+num1+num2+num3)+exp(num0+num1+num2+num3+num4)+exp(num0+num1+num2+num3+num4+num5);
            probability0=(exp(num0))/num_all;
            probability1=(exp(num0+num1))/num_all;
            probability2=(exp(num0+num1+num2))/num_all;
            probability3=(exp(num0+num1+num2+num3))/num_all;
            probability4=(exp(num0+num1+num2+num3+num4))/num_all;
            probability5=(exp(num0+num1+num2+num3+num4+num5))/num_all;
            exp_scr0=probability0*0;
            exp_scr1=probability1*1;
            exp_scr2=probability2*2;
            exp_scr3=probability3*3;
            exp_scr4=probability4*4;
            exp_scr5=probability5*5;
            exp_scr_tt=  exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4+exp_scr5;
            exp_scr_tt_w=  exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4+exp_scr5;
            si2=1*probability1+4*probability2+9*probability3+16*probability4+25*probability5;
          end;
        else if max_points=6 then
          do;
            num0=(const*irt_A)*(theta-irt_B+irt_step1);
            num1=(const*irt_A)*(theta-irt_B+irt_step2);
            num2=(const*irt_A)*(theta-irt_B+irt_step3);
            num3=(const*irt_A)*(theta-irt_B+irt_step4);
            num4=(const*irt_A)*(theta-irt_B+irt_step5);
            num5=(const*irt_A)*(theta-irt_B+irt_step6);
            num6=(const*irt_A)*(theta-irt_B+irt_step7);
            num_all=exp(num0)+ exp(num0+num1)+ exp(num0+num1+num2)+ 
              exp(num0+num1+num2+num3)+exp(num0+num1+num2+num3+num4)+exp(num0+num1+num2+num3+num4+num5)+exp(num0+num1+num2+num3+num4+num5+num6);
            probability0=(exp(num0))/num_all;
            probability1=(exp(num0+num1))/num_all;
            probability2=(exp(num0+num1+num2))/num_all;
            probability3=(exp(num0+num1+num2+num3))/num_all;
            probability4=(exp(num0+num1+num2+num3+num4))/num_all;
            probability5=(exp(num0+num1+num2+num3+num4+num5))/num_all;
            probability6=(exp(num0+num1+num2+num3+num4+num5+num6))/num_all;
            exp_scr0=probability0*0;
            exp_scr1=probability1*1;
            exp_scr2=probability2*2;
            exp_scr3=probability3*3;
            exp_scr4=probability4*4;
            exp_scr5=probability5*5;
            exp_scr6=probability6*6;
            exp_scr_tt=  exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4+exp_scr5+exp_scr6;
            exp_scr_tt_w=  exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4+exp_scr5+exp_scr6;
            si2=1*probability1+4*probability2+9*probability3+16*probability4+25*probability5+36*probability6;
          end;

        Li=((1.7*irt_A)**2)*(si2-(exp_scr_tt)**2);

        TCC + exp_scr_tt_w;
        TIF + Li;

      end;

      Theta_CSEM=TIF**-0.5;
      output;

      call missing(Li, TCC, TIF, si2, of num:, of probability:, of exp_scr:);
      keep theta TCC TIF Theta_CSEM;
      format TCC TIF Theta_CSEM format 10.6;
    end;
    stop;
  run;

data TCC_&d._&g._&t._&f.; set maps_&d.&g._&t._&f.; weight=weight1; const=1.7; do theta=-15 to 15 by .0001; do _point=1 to _nobs; set set irt_&d._&g._&t._&f. nobs=_nobs point=_point; if max_points=1 then do; num1=exp((const*irt_A)*(theta-irt_B)); probability1=num1/(num1+1); exp_scr_tt=probability1*1; exp_scr_tt_w=probability1*1; si2=1*probability1; end; else if max_points=2 then do; num0=(const*irt_A)*(theta-irt_B+irt_step1); num1=(const*irt_A)*(theta-irt_B+irt_step2); num2=(const*irt_A)*(theta-irt_B+irt_step3); num_all=exp(num0)+ exp(num0+num1)+ exp(num0+num1+num2); probability0=(exp(num0))/num_all; probability1=(exp(num0+num1))/num_all; probability2=(exp(num0+num1+num2))/num_all; exp_scr0=probability0*0; exp_scr1=probability1*1; exp_scr2=probability2*2; exp_scr_tt= exp_scr0+ exp_scr1+ exp_scr2; exp_scr_tt_w=exp_scr0+ exp_scr1+ exp_scr2; si2=1*probability1+4*probability2; end; else if max_points=3 then do; num0=(const*irt_A)*(theta-irt_B+irt_step1); num1=(const*irt_A)*(theta-irt_B+irt_step2); num2=(const*irt_A)*(theta-irt_B+irt_step3); num3=(const*irt_A)*(theta-irt_B+irt_step4); num_all=exp(num0)+ exp(num0+num1)+ exp(num0+num1+num2)+ exp(num0+num1+num2+num3); probability0=(exp(num0))/num_all; probability1=(exp(num0+num1))/num_all; probability2=(exp(num0+num1+num2))/num_all; probability3=(exp(num0+num1+num2+num3))/num_all; exp_scr0=probability0*0; exp_scr1=probability1*1; exp_scr2=probability2*2; exp_scr3=probability3*3; exp_scr_tt= exp_scr0+ exp_scr1+exp_scr2+exp_scr3; exp_scr_tt_w=(exp_scr0+ exp_scr1+exp_scr2+exp_scr3)*weight; si2=1*probability1+4*probability2+9*probability3; end; else if max_points=4 then do; num0=(const*irt_A)*(theta-irt_B+irt_step1); num1=(const*irt_A)*(theta-irt_B+irt_step2); num2=(const*irt_A)*(theta-irt_B+irt_step3); num3=(const*irt_A)*(theta-irt_B+irt_step4); num4=(const*irt_A)*(theta-irt_B+irt_step5); num_all=exp(num0)+ exp(num0+num1)+ exp(num0+num1+num2)+ exp(num0+num1+num2+num3)+exp(num0+num1+num2+num3+num4); probability0=(exp(num0))/num_all; probability1=(exp(num0+num1))/num_all; probability2=(exp(num0+num1+num2))/num_all; probability3=(exp(num0+num1+num2+num3))/num_all; probability4=(exp(num0+num1+num2+num3+num4))/num_all; exp_scr0=probability0*0; exp_scr1=probability1*1; exp_scr2=probability2*2; exp_scr3=probability3*3; exp_scr4=probability4*4; exp_scr_tt= (exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4); exp_scr_tt_w= (exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4)*weight; si2=1*probability1+4*probability2+9*probability3+16*probability4; end; else if max_points=5 then do; num0=(const*irt_A)*(theta-irt_B+irt_step1); num1=(const*irt_A)*(theta-irt_B+irt_step2); num2=(const*irt_A)*(theta-irt_B+irt_step3); num3=(const*irt_A)*(theta-irt_B+irt_step4); num4=(const*irt_A)*(theta-irt_B+irt_step5); num5=(const*irt_A)*(theta-irt_B+irt_step6); num_all=exp(num0)+ exp(num0+num1)+ exp(num0+num1+num2)+ exp(num0+num1+num2+num3)+exp(num0+num1+num2+num3+num4)+exp(num0+num1+num2+num3+num4+num5); probability0=(exp(num0))/num_all; probability1=(exp(num0+num1))/num_all; probability2=(exp(num0+num1+num2))/num_all; probability3=(exp(num0+num1+num2+num3))/num_all; probability4=(exp(num0+num1+num2+num3+num4))/num_all; probability5=(exp(num0+num1+num2+num3+num4+num5))/num_all; exp_scr0=probability0*0; exp_scr1=probability1*1; exp_scr2=probability2*2; exp_scr3=probability3*3; exp_scr4=probability4*4; exp_scr5=probability5*5; exp_scr_tt= exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4+exp_scr5; exp_scr_tt_w= exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4+exp_scr5; si2=1*probability1+4*probability2+9*probability3+16*probability4+25*probability5; end; else if max_points=6 then do; num0=(const*irt_A)*(theta-irt_B+irt_step1); num1=(const*irt_A)*(theta-irt_B+irt_step2); num2=(const*irt_A)*(theta-irt_B+irt_step3); num3=(const*irt_A)*(theta-irt_B+irt_step4); num4=(const*irt_A)*(theta-irt_B+irt_step5); num5=(const*irt_A)*(theta-irt_B+irt_step6); num6=(const*irt_A)*(theta-irt_B+irt_step7); num_all=exp(num0)+ exp(num0+num1)+ exp(num0+num1+num2)+ exp(num0+num1+num2+num3)+exp(num0+num1+num2+num3+num4)+exp(num0+num1+num2+num3+num4+num5)+exp(num0+num1+num2+num3+num4+num5+num6); probability0=(exp(num0))/num_all; probability1=(exp(num0+num1))/num_all; probability2=(exp(num0+num1+num2))/num_all; probability3=(exp(num0+num1+num2+num3))/num_all; probability4=(exp(num0+num1+num2+num3+num4))/num_all; probability5=(exp(num0+num1+num2+num3+num4+num5))/num_all; probability6=(exp(num0+num1+num2+num3+num4+num5+num6))/num_all; exp_scr0=probability0*0; exp_scr1=probability1*1; exp_scr2=probability2*2; exp_scr3=probability3*3; exp_scr4=probability4*4; exp_scr5=probability5*5; exp_scr6=probability6*6; exp_scr_tt= exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4+exp_scr5+exp_scr6; exp_scr_tt_w= exp_scr0+ exp_scr1+exp_scr2+exp_scr3+exp_scr4+exp_scr5+exp_scr6; si2=1*probability1+4*probability2+9*probability3+16*probability4+25*probability5+36*probability6; end; Li=((1.7*irt_A)**2)*(si2-(exp_scr_tt)**2); TCC + exp_scr_tt_w; TIF + Li; end; Theta_CSEM=TIF**-0.5; output; call missing(Li, TCC, TIF, si2, of num:, of probability:, of exp_scr:); keep theta TCC TIF Theta_CSEM; format TCC TIF Theta_CSEM format 10.6; end; stop; run;

3. Remove the SQL as summing is now already covered with above data step. And because the data step loop over theta also the sort order will be the same.

There would be many more opportunities to streamline your code but above change will make by far the biggest impact in regards of data volumes and performance.

Patrick · Posted 12-06-2023 03:28 AM

@dustychair To add to my previous post looking at the next few data steps makes me a bit question how sound your logic is (sorry to be blunt).

This step reads the result from the summing data step (or in your original code from the SQL that does the summing) and you do nothing else than create a new variable RawScore that's based on the calculated var TCC. Your source table will have one row per distinct value of theta.

  data newA_&d._&g._&t._&f.;
    length RawScore 8.;
    set tcc_&d._&g._&t._&f.;
    RawScore=floor(tcc);
  run;

In the next two steps you then set the newly created table by rawscore and select the max and min values - and for the max value only the very last row.

If this really works meaning the data is pre-sorted by rawscore and given how TCC gets calculated then what this means is that you wouldn't need to loop over Theta in such an extend. The only thing you would need to do for the same outcome is to loop over theta in this way: do theta=-15 by .0001 and you stop as soon as floor(TCC) changes, and then a 2nd loop theta=15 by -.0001 and again stop as soon as floor(tcc) changes. ...and then you'd likely only loop a few times over your source data instead of 300001 times.

  data x_&d._&g._&t._&f.;
    set newA_&d._&g._&t._&f.;
    by rawscore;

    if first.rawscore;
  run;

  data y_&d._&g._&t._&f.;
    set newA_&d._&g._&t._&f. end=eof;
    by rawscore;

    if last.rawscore and eof;
    rawscore=rawscore+1;
  run;

You ideally first develop clean code that works for a single data source and only then add all the macro logic.

Disk space full

Re: Disk space full

Re: Disk space full

Re: Disk space full

Disk space full

Re: Disk space full

Re: Disk space full

Re: Disk space full

SAS Innovate 2025: Call for Content

Click image to register for webinar

Classroom Training Available!