BookmarkSubscribeRSS Feed
☑ This topic is solved. Need further help from the community? Please sign in and ask a new question.
yellowyellowred
Obsidian | Level 7

I have a data set that looks like this:

Person_IDPlan_IDAge_groupTypeTimepointPaymentResult
90110110to20A05000
901

119

10to20A14001
901

129

10to20A32500
905

203

21to30B01000
905

287

21to30B11500
905

301

21to30B21001
910

401

21to30B09000
910

450

21to30B18001
910

500

21to30B21001
910

600

21to30B31501
920

700

10to20A03001
920

800

10to20A11001

 

What I want is to output a table like this:

Age_groupTypeTime_periodNumber of peopleProportion of people with result=1 before paymentProportion of people with result = 1 after paymentTotal marginal payment 
10to20A0 to 1250%100%800 (500 + 300)
21to30B0 to 120%50%1000 (100 + 900)
21to30B1 to 2250%100%950 (800 + 150)
21to30B2 to 31100%100%100 (the payment for plan_ID = 500)

 

So the logic is this:  
Each Person_ID can have multiple plan_ID's, which show as a unique row. For each Person_ID, their Age_group and Type will remain the same for different Plan_ID's. For each subsequent Plan_ID, the timepoint increments by 1. The first timepoint for a Person_ID will always start at 0.
  
I am only looking at 1-step jumps, so from 0 to 1, 1 to 2, 2 to 3 etc, which is why the jump from 1 to 3 is excluded.  

The "number of people" column in the result table is the count of how many unique Person_ID's there are with the same Type and Time_period. The Time_period variable e.g. "0 to 1" represents data for people who have a "0" timepoint and a "1" timepoint.  
  
The "Proportion of people with result=1 before payment" column calculates the proportion of unique Person_ID's with "Result" = 1 at the beginning time point, e.g. for Time_period = "0 to 1", we look at the Timepoint = 0. Similarly for the following column.    
  
The total marginal payment sums up the total payment received by the age_group with that particular Type and Time_period which would have caused any change in the proportion. E.g. The first calculation of 800 (500 + 300) is for the Time_period = "0 to 1" meaning we only sum the payments at timepoint = 0. Similarly, for Time_period = "1 to 2", we only look at payments at Timepoint = 1.  
  
Can someone please show how I'd do this? I'm really bad at SAS. Hopefully I was clear in this exercise.  

 Note that the first four variables in the starting table can have a possibly infinite amount of values.  


Here is code used to generate that starting table:  

data person;
   input Person_ID $ Plan_ID $ Age_group $ Type $ Timepoint Payment Result;
   datalines;
901 101 10to20 A 0 500 0
901 119 10to20 A 1 400 1
901 129 10to20 A 3 250 0
905 203 21to30 B 0 100 0
905 287 21to30 B 1 150 0
905 301 21to30 B 2 100 1
910 401 21to30 B 0 900 0
910 450 21to30 B 1 800 1
910 500 21to30 B 2 100 1
920 700 10to20 A 0 300 1
920 800 10to20 A 1 100 1
;

 

1 ACCEPTED SOLUTION

Accepted Solutions
Ksharp
Super User

OK. Maybe I understand what you mean now. Try this one :

 

data person;
   input Person_ID $ Plan_ID $ Age_group $ Type $ Timepoint Payment Result;
   datalines;
901 101 10to20 A 0 500 0
901 119 10to20 A 1 400 1
901 129 10to20 A 3 250 0
905 203 21to30 B 0 100 0
905 287 21to30 B 1 150 0
905 301 21to30 B 2 100 1
910 401 21to30 B 0 900 0
910 450 21to30 B 1 800 1
910 500 21to30 B 2 100 1
910 600 21to30 B 3 150 1
920 700 10to20 A 0 300 1
920 800 10to20 A 1 100 1
;

proc sql;
create table level as
select distinct Person_ID,Age_group , Type , Timepoint
 from person
  order by 1,2,3,4;
quit;
data level1;
 set level;
 lag_Timepoint=lag(Timepoint);
 if Person_ID=lag(Person_ID) and Age_group=lag(Age_group) and Type=lag(Type) and dif(Timepoint)=1 then do; 
   first=lag_Timepoint; last=Timepoint;output;
 end;
 drop lag_Timepoint Timepoint;
run;

proc sql;
create table level2 as
select distinct Age_group , Type , first,last
 from level1;
quit;

%macro report(Age_group=,Type=,first=,last=);
data key;
 set level1;
 if  Age_group="&Age_group." and Type="&Type."  and first=&first. and last=&last.;
run;
data key1;
 set key;
Timepoint=first;output;
Timepoint=last;output;
drop first last;
run;

data have;
if _n_=1 then do;
  if 0 then set key;
  declare hash h(dataset:'key1');
  h.definekey('Person_ID','Age_group','Type','Timepoint');
  h.definedone();
end;
 set person;
 if h.check()=0;
run;
proc sql;
create table temp as
select "&Age_group." as Age_group length=80,"&Type." as Type length=80,
 catx(' ',&first.,'to',&last.) as Time_period length=80,
 (select count(distinct Person_ID) from have) as n_people,
 (select count(distinct Person_ID) from have where Timepoint=&first. and result=1)/
 (select count(distinct Person_ID) from have where Timepoint=&first. ) as prop_before_pay format=percent8.2,
 (select count(distinct Person_ID) from have where Timepoint=&last. and result=1)/
 (select count(distinct Person_ID) from have where Timepoint=&last. ) as prop_after_pay format=percent8.2,
 (select sum(payment) from have where Timepoint=&first.) as Total_payment
 from have(obs=1);
quit;
proc append base=want data=temp force;run;
run;
%mend;




proc delete data=want;run;
data _null_;
 set level2;
 call execute(catt('%report(Age_group=',Age_group,',Type=',Type,',first=',first,',last=',last,')'));
run;

View solution in original post

3 REPLIES 3
Ksharp
Super User
data person;
   input Person_ID $ Plan_ID $ Age_group $ Type $ Timepoint Payment Result;
   datalines;
901 101 10to20 A 0 500 0
901 119 10to20 A 1 400 1
901 129 10to20 A 3 250 0
905 203 21to30 B 0 100 0
905 287 21to30 B 1 150 0
905 301 21to30 B 2 100 1
910 401 21to30 B 0 900 0
910 450 21to30 B 1 800 1
910 500 21to30 B 2 100 1
910	600 21to30 B 3 150 1
920 700 10to20 A 0 300 1
920 800 10to20 A 1 100 1
;

proc sql;
create table level as
select distinct Age_group , Type , Timepoint
 from person
  order by 1,2,3;
quit;
data level1;
 set level;
 lag_Timepoint=lag(Timepoint);
 if Age_group=lag(Age_group) and Type=lag(Type) and dif(Timepoint)=1 then do; 
   first=lag_Timepoint; last=Timepoint;output;
 end;
 drop lag_Timepoint Timepoint;
run;


%macro report(Age_group=,Type=,first=,last=);
data have;
 set person;
 if Age_group="&Age_group." and Type="&Type."  and Timepoint in (&first. &last.);
run;
proc sql;
create table temp as
select "&Age_group." as Age_group length=80,"&Type." as Type length=80,
 catx(' ',&first.,'to',&last.) as Time_period length=80,
 (select count(distinct Person_ID) from have) as n_people,
 (select count(distinct Person_ID) from have where Timepoint=&first. and result=1)/
 (select count(distinct Person_ID) from have where Timepoint=&first. ) as prop_before_pay format=percent8.2,
 (select count(distinct Person_ID) from have where Timepoint=&last. and result=1)/
 (select count(distinct Person_ID) from have where Timepoint=&last. ) as prop_after_pay format=percent8.2,
 (select sum(payment) from have where Timepoint=&first.) as Total_payment
 from have(obs=1);
quit;
proc append base=want data=temp force;run;
run;
%mend;




proc delete data=want;run;
data _null_;
 set level1;
 call execute(catt('%report(Age_group=',Age_group,',Type=',Type,',first=',first,',last=',last,')'));
run;
yellowyellowred
Obsidian | Level 7

Hi @Ksharp , thanks so much for your help.  
It almost worked for me, when I run your code exactly, the final entry (4th row) gives me:  

Age_groupTypeTime_periodn_people...total_payment
..................
21to30B2 to 32... 200

 

but from the starting table, there is only one Person_ID (Person_ID = 910) with a Timepoint = 2 and Timepoint = 3, so for Time_period = "2 to 3", there is only n_people = 1, and also the total_payment is 100 (9th row in the starting table). I tried adjusting and debugging but I couldn't figure out what caused this double up

Ksharp
Super User

OK. Maybe I understand what you mean now. Try this one :

 

data person;
   input Person_ID $ Plan_ID $ Age_group $ Type $ Timepoint Payment Result;
   datalines;
901 101 10to20 A 0 500 0
901 119 10to20 A 1 400 1
901 129 10to20 A 3 250 0
905 203 21to30 B 0 100 0
905 287 21to30 B 1 150 0
905 301 21to30 B 2 100 1
910 401 21to30 B 0 900 0
910 450 21to30 B 1 800 1
910 500 21to30 B 2 100 1
910 600 21to30 B 3 150 1
920 700 10to20 A 0 300 1
920 800 10to20 A 1 100 1
;

proc sql;
create table level as
select distinct Person_ID,Age_group , Type , Timepoint
 from person
  order by 1,2,3,4;
quit;
data level1;
 set level;
 lag_Timepoint=lag(Timepoint);
 if Person_ID=lag(Person_ID) and Age_group=lag(Age_group) and Type=lag(Type) and dif(Timepoint)=1 then do; 
   first=lag_Timepoint; last=Timepoint;output;
 end;
 drop lag_Timepoint Timepoint;
run;

proc sql;
create table level2 as
select distinct Age_group , Type , first,last
 from level1;
quit;

%macro report(Age_group=,Type=,first=,last=);
data key;
 set level1;
 if  Age_group="&Age_group." and Type="&Type."  and first=&first. and last=&last.;
run;
data key1;
 set key;
Timepoint=first;output;
Timepoint=last;output;
drop first last;
run;

data have;
if _n_=1 then do;
  if 0 then set key;
  declare hash h(dataset:'key1');
  h.definekey('Person_ID','Age_group','Type','Timepoint');
  h.definedone();
end;
 set person;
 if h.check()=0;
run;
proc sql;
create table temp as
select "&Age_group." as Age_group length=80,"&Type." as Type length=80,
 catx(' ',&first.,'to',&last.) as Time_period length=80,
 (select count(distinct Person_ID) from have) as n_people,
 (select count(distinct Person_ID) from have where Timepoint=&first. and result=1)/
 (select count(distinct Person_ID) from have where Timepoint=&first. ) as prop_before_pay format=percent8.2,
 (select count(distinct Person_ID) from have where Timepoint=&last. and result=1)/
 (select count(distinct Person_ID) from have where Timepoint=&last. ) as prop_after_pay format=percent8.2,
 (select sum(payment) from have where Timepoint=&first.) as Total_payment
 from have(obs=1);
quit;
proc append base=want data=temp force;run;
run;
%mend;




proc delete data=want;run;
data _null_;
 set level2;
 call execute(catt('%report(Age_group=',Age_group,',Type=',Type,',first=',first,',last=',last,')'));
run;

SAS Innovate 2025: Save the Date

 SAS Innovate 2025 is scheduled for May 6-9 in Orlando, FL. Sign up to be first to learn about the agenda and registration!

Save the date!

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

SAS Training: Just a Click Away

 Ready to level-up your skills? Choose your own adventure.

Browse our catalog!

Discussion stats
  • 3 replies
  • 587 views
  • 1 like
  • 2 in conversation