Solved: Conditional summing Based on years

SasPerson85 · Posted 01-17-2020 05:07 PM

I have a table that looks like this:

ID	EXPENDED_YR_1	EXPENDED_AMOUNT_1	EXPENDED_YR_2	EXPENDED_AMOUNT_2	EXPENDED_YR_3	EXPENDED_AMOUNT_3	EXPENDED_YR_4	EXPENDED_AMOUNT_4	EXPENDED_YR_5	EXPENDED_AMOUNT_5
197168814080	2019	1223.71	2019	2832.47
197170819072	2018	6000
197174228992	2018	45532.25	2019	8493.22	2019	656.55	2019	24455.78	2019	8499.91
197174243328
197169281024	2019	1024.01	2019	1890.14	2019	1046.24

There is an unique ID number with 5 expended_year columns and 5 expended_amount columns. The numbers on the end of each column name correspond with each other. So Expended_YR_1 corresponds to Expended_amount_1, Expended_YR_2 corresponds to Expended_amount_2, etc..

The desired output should look like this:

ID	ExpendituresYTD_2018	ExpendituresYTD_2019
197168814080	0	4056.18
197170819072	6000	0
197174228992	45532.25	42105.46
197174243328	0	0
197169281024	0	3960.39

So summing by year for each record.

Thank you for your help in advance.

hashman · Posted 01-17-2020 06:25 PM

@SasPerson85:

Something in this vein would do:

data have ;                                                                                                                             
  input id expended_yr_1 expended_amount_1                                                                                              
           expended_yr_2 expended_amount_2                                                                                              
           expended_yr_3 expended_amount_3                                                                                              
           expended_yr_4 expended_amount_4                                                                                              
           expended_yr_5 expended_amount_5                                                                                              
  ;                                                                                                                                     
  cards ;                                                                                                                               
197168814080 2019  1223.71 2019 2832.47    .     .      .      .      .     .                                                           
197170819072 2018  6000       .     .      .     .      .      .      .     .                                                           
197174228992 2018 45532.25 2019 8493.22 2019  656.55 2019 24455.78 2019 8499.91                                                         
197174243328    .      .      .     .      .     .      .      .      .     .                                                           
197169281024 2019  1024.01 2019 1890.14 2019 1046.24    .      .      .     .                                                           
;                                                                                                                                       
run ;                                                                                                                                   
                                                                                                                                        
data want (keep = id expenditure:) ;                                                                                                    
  set have ;                                                                                                                            
  array yr expended_yr: ;                                                                                                               
  array am expended_am: ;                                                                                                               
  expendituresytd_2018 = 0 ;                                                                                                            
  expendituresytd_2019 = 0 ;                                                                                                            
  do over yr ;                                                                                                                          
    if      yr = 2018 then expendituresytd_2018 + am ;                                                                                  
    else if yr = 2019 then expendituresytd_2019 + am ;                                                                                  
  end ;                                                                                                                                 
run ;

Kind regards

Paul D.

View solution in original post

ed_sas_member · Posted 01-17-2020 05:53 PM

HI @SasPerson85

You can try this:

data have;
	infile datalines dlm="09"x;
	input ID	EXPENDED_YR_1	EXPENDED_AMOUNT_1	EXPENDED_YR_2	EXPENDED_AMOUNT_2	EXPENDED_YR_3	EXPENDED_AMOUNT_3	EXPENDED_YR_4	EXPENDED_AMOUNT_4	EXPENDED_YR_5	EXPENDED_AMOUNT_5;
	datalines;
197168814080	2019	1223.71	2019	2832.47	 	 	 	 	 	 
197170819072	2018	6000	 	 	 	 	 	 	 	 
197174228992	2020	45532.25	2019	8493.22	2019	656.55	2019	24455.78	2019	8499.91
197174243328	 	 	 	 	 	 	 	 	 	 
197169281024	2019	1024.01	2019	1890.14	2019	1046.24	 	 	 	 
	;
run;

options missing="0";

data have_exp;
	set have;
	array EXPENDED_YR_(5);
	array EXPENDED_AMOUNT_(5);
	do i=1 to 5;
		EXPENDED_YR = EXPENDED_YR_(i);
		EXPENDED_AMOUNT = EXPENDED_AMOUNT_(i);
		output;
	end;
	keep ID EXPENDED_YR EXPENDED_AMOUNT;
run;

proc means data=have_exp sum noprint missing;
	var EXPENDED_AMOUNT;
	class ID EXPENDED_YR;
	ways 2;
	output out=have_sum (drop=_type_ _freq_) sum=sum;
run;

data have_sum2;
	set have_sum;
	by ID;
	if EXPENDED_YR = . then  EXPENDED_YR = 0;
	if SUM = . then  SUM = 0;
run;

proc transpose data=have_sum2 out=want(drop=_name_ ExpendituresYTD_0) prefix=ExpendituresYTD_;
	var sum;
	by ID;
	ID EXPENDED_YR;
run;

options missing=".";

PaigeMiller · Posted 01-17-2020 06:13 PM

There are much better ways to arrange the data, that would make processing much simpler.

Example:

ID        Year           Amount
111      2018          265.32
111      2019         1244.09
112      2018           27.22

--
Paige Miller

hashman · Posted 01-17-2020 06:25 PM

@SasPerson85:

Something in this vein would do:

data have ;                                                                                                                             
  input id expended_yr_1 expended_amount_1                                                                                              
           expended_yr_2 expended_amount_2                                                                                              
           expended_yr_3 expended_amount_3                                                                                              
           expended_yr_4 expended_amount_4                                                                                              
           expended_yr_5 expended_amount_5                                                                                              
  ;                                                                                                                                     
  cards ;                                                                                                                               
197168814080 2019  1223.71 2019 2832.47    .     .      .      .      .     .                                                           
197170819072 2018  6000       .     .      .     .      .      .      .     .                                                           
197174228992 2018 45532.25 2019 8493.22 2019  656.55 2019 24455.78 2019 8499.91                                                         
197174243328    .      .      .     .      .     .      .      .      .     .                                                           
197169281024 2019  1024.01 2019 1890.14 2019 1046.24    .      .      .     .                                                           
;                                                                                                                                       
run ;                                                                                                                                   
                                                                                                                                        
data want (keep = id expenditure:) ;                                                                                                    
  set have ;                                                                                                                            
  array yr expended_yr: ;                                                                                                               
  array am expended_am: ;                                                                                                               
  expendituresytd_2018 = 0 ;                                                                                                            
  expendituresytd_2019 = 0 ;                                                                                                            
  do over yr ;                                                                                                                          
    if      yr = 2018 then expendituresytd_2018 + am ;                                                                                  
    else if yr = 2019 then expendituresytd_2019 + am ;                                                                                  
  end ;                                                                                                                                 
run ;

Kind regards

Paul D.

novinosrin · Posted 01-17-2020 06:30 PM



data have;
infile cards truncover;
input ID :$20.	EXPENDED_YR_1	EXPENDED_AMOUNT_1	EXPENDED_YR_2	EXPENDED_AMOUNT_2	EXPENDED_YR_3	EXPENDED_AMOUNT_3	EXPENDED_YR_4	EXPENDED_AMOUNT_4	EXPENDED_YR_5	EXPENDED_AMOUNT_5;
cards;
197168814080	2019	1223.71	2019	2832.47	 	 	 	 	 	 
197170819072	2018	6000	 	 	 	 	 	 	 	 
197174228992	2018	45532.25	2019	8493.22	2019	656.55	2019	24455.78	2019	8499.91
197174243328	 	 	 	 	 	 	 	 	 	 
197169281024	2019	1024.01	2019	1890.14	2019	1046.24	 	
; 


/*Transpose wide to long*/
data temp;
 set have;
 array t(5,2) EXPENDED_YR_1--EXPENDED_AMOUNT_5;
 do i=1 to 5 until(not year);
  year=t(i,1);
  Amt=t(i,2);
   output;
 end;
 keep id year amt;
run;
options missing='0';
proc summary data=temp  nway missing completetypes;
   class ID year;
	var amt;
	output out=temp2(where=(year ne .) drop=_type_ _freq_) sum=;
run;
proc transpose data=temp2 out=want(drop=_:) prefix=ExpendituresYTD_;
by id ;
var amt;
id year;
run;

proc print noobs;run;

ID	ExpendituresYTD_2018	ExpendituresYTD_2019
197168814080	0	4056.18
197169281024	0	3960.39
197170819072	6000.00	0
197174228992	45532.25	42105.46
197174243328	0	0

mkeintz · Posted 01-17-2020 11:43 PM

You already have matching arrays of year variables (range from 2018 to 2020) and expenditure variables. Since you want total for a subset of those years (2018,2019), this is an excellent situation to use an array (EXPYTD below) with lower bound 2018 and upper bound 2019:

data have;
	infile datalines truncover;
	input ID	EXPENDED_YR_1	EXPENDED_AMOUNT_1	EXPENDED_YR_2	EXPENDED_AMOUNT_2	EXPENDED_YR_3	EXPENDED_AMOUNT_3	EXPENDED_YR_4	EXPENDED_AMOUNT_4	EXPENDED_YR_5	EXPENDED_AMOUNT_5;
	datalines;
197168814080 2019 1223.71 2019 2832.47
197170819072 2018 6000
197174228992 2020 45532.25 2019 8493.22 2019 656.55 2019 24455.78 2019 8499.91
197174243328
197169281024 2019 1024.01 2019 1890.14 2019 1046.24
run;

data want (drop=i);
  set have;
  array expytd {2018:2019} expenditures_ytd2018 expenditures_ytd2019 ;
  do i=2018 to 2019; expytd{i}=0; end;

  array yr    expended_yr: ;
  array amt   expended_amount: ;
  do over yr ;
    if yr in (2018,2019) then expytd{yr}=sum(expytd{yr},amt);
  end;
run;

--------------------------
The hash OUTPUT method will overwrite a SAS data set, but not append. That can be costly. Consider voting for Add a HASH object method which would append a hash object to an existing SAS data set

Would enabling PROC SORT to simultaneously output multiple datasets be useful? Then vote for
Allow PROC SORT to output multiple datasets

--------------------------

Conditional summing Based on years

Re: Conditional summing Based on years

Re: Conditional summing Based on years

Re: Conditional summing Based on years

Re: Conditional summing Based on years

Re: Conditional summing Based on years

Re: Conditional summing Based on years

Catch up on SAS Innovate 2026

Conditional summing Based on years

Re: Conditional summing Based on years

Re: Conditional summing Based on years

Re: Conditional summing Based on years

Re: Conditional summing Based on years

Re: Conditional summing Based on years

Re: Conditional summing Based on years

Catch up on SAS Innovate 2026

SAS Training: Just a Click Away