I would take out and segregate all 18 years and over data in one dataset, and another for 16 years and over.
When I have that I can join them on matching date and gender and get their difference.
Once I have the difference I can add it to the "18 to 24 years" value.
My code can probably be optimised to make it shorter.
/*Take all '18 years and over' rows*/
proc sql;
create table data_18 as(
select * from have
where Age_group = '18 years and over'
);
quit;
/*Take all '16 years and over' rows*/
proc sql;
create table data_16 as(
select * from have
where Age_group = '16 years and over'
);
quit;
/*Compute difference*/
proc sql;
create table diff_of_16_and_18 as(
select t1.Date,
t1.Gender,
'16 to 24 years' as Age_Group,
sum(t1.Population_in_thousands,-t2.Population_in_thousands) as Population_in_thousands
from data_18 t1 left join data_16 t2
on t1.Date=t2.Date
and t1.Gender=t2.Gender
);
quit;
/*Combine the two tables: (1) original table, without the "16- and 18- years"*/
/* (2) difference table, ready for joining to "18-24 years"*/
data want;
set have (where=(Age_group not in ('16 years and over' '18 years and over')))
diff_of_16_and_18
;
if Age_group in ('18 to 24 years') then new_age_group='16 to 24 years'; /*Rename "18 to 24 years" to correct grouping so they can be summed together in next step*/
else New_age_group = Age_group;
run;
/*Sum by new age group*/
proc sql;
create table sum_by_new_age_group as(
select Date,
Gender,
New_age_group,
sum(Population_in_Thousands) as Population_in_Thousands
from want
group by Date, Gender, New_age_group
);
quit;
... View more