I have a dataset that looks like this:
Year | Industy | Emp |
2001 | 011 | 1 |
2002 | 011 | 2 |
2001 | 010 | 1 |
2001 | 012 | 3 |
2002 | 012 | 1 |
2001 | 020 | 4 |
2002 | 020 | 2 |
I have to create new rows based on these groups:
01=011,010,012
02=020
A=01,02
I need to sum over the groups by year so that the final data set loos as this:
Year | Industy | Emp |
2001 | 011 | 1 |
2002 | 011 | 2 |
2001 | 010 | 1 |
2001 | 012 | 3 |
2002 | 012 | 1 |
2001 | 020 | 4 |
2002 | 020 | 2 |
2001 | 01 | 5 |
2002 | 01 | 3 |
2001 | 02 | 4 |
2002 | 02 | 2 |
2001 | A | 9 |
2002 | A | 5 |
Like this?
data HAVE;
input YEAR INDUSTRY $ EMP;
cards;
2001 011 1
2002 011 2
2001 010 1
2001 012 3
2002 012 1
2001 020 4
2002 020 2
run;
proc format;
value $indgrp (multilabel) '010'-'012' ='01'
'020' ='02'
'010'-'019','020'-'029' ='A' ;
run;
proc summary data=HAVE nway ;
class INDUSTRY /mlf;
class YEAR ;
var EMP;
format INDUSTRY $indgrp.;
output out=SUMS(keep=YEAR INDUSTRY EMP) sum=;
run;
data WANT;
set HAVE SUMS(in=S);
format INDUSTRY ;
if S then INDUSTRY=put(INDUSTRY, $indgrp.);
run;
YEAR INDUSTRY EMP
2001 011 1
2002 011 2
2001 010 1
2001 012 3
2002 012 1
2001 020 4
2002 020 2
2001 01 5
2002 01 3
2001 02 4
2002 02 2
2001 A 9
2002 A 5
Like this?
data HAVE;
input YEAR INDUSTRY $ EMP;
cards;
2001 011 1
2002 011 2
2001 010 1
2001 012 3
2002 012 1
2001 020 4
2002 020 2
run;
proc format;
value $indgrp (multilabel) '010'-'012' ='01'
'020' ='02'
'010'-'019','020'-'029' ='A' ;
run;
proc summary data=HAVE nway ;
class INDUSTRY /mlf;
class YEAR ;
var EMP;
format INDUSTRY $indgrp.;
output out=SUMS(keep=YEAR INDUSTRY EMP) sum=;
run;
data WANT;
set HAVE SUMS(in=S);
format INDUSTRY ;
if S then INDUSTRY=put(INDUSTRY, $indgrp.);
run;
YEAR INDUSTRY EMP
2001 011 1
2002 011 2
2001 010 1
2001 012 3
2002 012 1
2001 020 4
2002 020 2
2001 01 5
2002 01 3
2001 02 4
2002 02 2
2001 A 9
2002 A 5
Or this:
data HAVE;
input YEAR INDUSTRY $ EMP;
cards;
2001 011 1
2002 011 2
2001 010 1
2001 012 3
2002 012 1
2001 020 4
2002 020 2
;
data groups;
input group $ industry $;
datalines;
01 011
01 010
01 012
02 020
A 011
A 010
A 012
A 020
;
proc sql;
create table want as
select year, industry, emp from have
union all
select year, group as industry, sum(emp) as emp
from have inner join groups on have.industry=groups.industry
group by group, year;
quit;
Don't miss out on SAS Innovate - Register now for the FREE Livestream!
Can't make it to Vegas? No problem! Watch our general sessions LIVE or on-demand starting April 17th. Hear from SAS execs, best-selling author Adam Grant, Hot Ones host Sean Evans, top tech journalist Kara Swisher, AI expert Cassie Kozyrkov, and the mind-blowing dance crew iLuminate! Plus, get access to over 20 breakout sessions.
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.