Is there a way to use a conditional proc transpose and an array to convert the table have to want? When I split the datasets into different datasets to merge them together after I'm running into environment constraints. The general logic is to split a field into two fields based on whether it has the word college or not.
/*Step: 1 Transpose a wide data set to create dataset have*/
/*Step: 2 Split set have into sepeate datasets*/
/*Step: 3 Create set want from separate datsets*/
proc sql;
create table have (
gender varchar(50),
academic_period varchar(50),
enrolled num(1)
);
quit;
proc sql;
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2008', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2009', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2010', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2011', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2012', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2013', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2014', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2015', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2016', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2017', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2018', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2019', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2020', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2008', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2009', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2010', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2011', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2012', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2013', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2014', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2015', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2016', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2017', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2018', 1);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2019', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2020', 0);
quit;
proc sql;
create table want (
gender varchar(50),
academic_period varchar(50),
enrolled_university num(1),
enrolled_college num(1)
);
quit;
proc sql;
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2008', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2009', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2010', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2011', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2012', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2013', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2014', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2015', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2016', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2017', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2018', 0, 1);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2019', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2020', 0, 0);
quit;
Does this do what you're looking for?
proc sql;
create table have (gender varchar(50), academic_period varchar(50), enrolled
num(1) );
quit;
proc sql;
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2008',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2009',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2010',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2011',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2012',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2013',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2014',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2015',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2016',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2017',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2018',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2019',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2020',
0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2008', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2009', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2010', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2011', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2012', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2013', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2014', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2015', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2016', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2017', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2018', 1);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2019', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2020', 0);
quit;
data clean;
set have;
if find(academic_period, "COLLEGE") then
do;
year_index=3;
col="University";
end;
else
do;
year_index=2;
col="College";
end;
semester=scan(academic_period, 1, "_");
year=input(scan(academic_period, year_index, "_"), 8.);
run;
proc sort data=clean;
by gender year semester;
run;
proc transpose data=clean out=wide;
by gender year semester;
id col;
var enrolled;
run;
You could keep year/semester as one column but using two seems cleaner to me and I think it may likely come in useful later on for reporting. Plus it's easy to combine/split as needed.
I don't see how this is either transposing or splitting? Perhaps you just didn't included enough different values in your input ENROLLED variable? Let's insert a few 1's and see if the results and you can tell us if this is what you meant.
data have;
input gender :$50. academic_period :$50. enrolled;
cards;
M FALL_2008 1
M FALL_2009 0
M FALL_2010 0
M FALL_2011 1
M FALL_2012 0
M FALL_2013 0
M FALL_2014 0
M FALL_2015 0
M FALL_2016 0
M FALL_2017 0
M FALL_2018 0
M FALL_2019 0
M FALL_2020 0
M FALL_COLLEGE_2008 0
M FALL_COLLEGE_2009 1
M FALL_COLLEGE_2010 0
M FALL_COLLEGE_2011 0
M FALL_COLLEGE_2012 0
M FALL_COLLEGE_2013 0
M FALL_COLLEGE_2014 0
M FALL_COLLEGE_2015 0
M FALL_COLLEGE_2016 0
M FALL_COLLEGE_2017 0
M FALL_COLLEGE_2018 1
M FALL_COLLEGE_2019 0
M FALL_COLLEGE_2020 0
;
data want;
set have;
enrolled_university=0;
enrolled_college=0;
if index(academic_period,'COLLEGE') then enrolled_college=enrolled;
else enrolled_university=enrolled;
run;
proc print data=want;
run;
Results:
enrolled_ enrolled_ Obs gender academic_period enrolled university college 1 M FALL_2008 1 1 0 2 M FALL_2009 0 0 0 3 M FALL_2010 0 0 0 4 M FALL_2011 1 1 0 5 M FALL_2012 0 0 0 6 M FALL_2013 0 0 0 7 M FALL_2014 0 0 0 8 M FALL_2015 0 0 0 9 M FALL_2016 0 0 0 10 M FALL_2017 0 0 0 11 M FALL_2018 0 0 0 12 M FALL_2019 0 0 0 13 M FALL_2020 0 0 0 14 M FALL_COLLEGE_2008 0 0 0 15 M FALL_COLLEGE_2009 1 0 1 16 M FALL_COLLEGE_2010 0 0 0 17 M FALL_COLLEGE_2011 0 0 0 18 M FALL_COLLEGE_2012 0 0 0 19 M FALL_COLLEGE_2013 0 0 0 20 M FALL_COLLEGE_2014 0 0 0 21 M FALL_COLLEGE_2015 0 0 0 22 M FALL_COLLEGE_2016 0 0 0 23 M FALL_COLLEGE_2017 0 0 0 24 M FALL_COLLEGE_2018 1 0 1 25 M FALL_COLLEGE_2019 0 0 0 26 M FALL_COLLEGE_2020 0 0 0
Academic_period is the unique key. (Id + academic period would be the unique key in a larger dataset.)
Does this do what you're looking for?
proc sql;
create table have (gender varchar(50), academic_period varchar(50), enrolled
num(1) );
quit;
proc sql;
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2008',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2009',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2010',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2011',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2012',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2013',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2014',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2015',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2016',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2017',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2018',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2019',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2020',
0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2008', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2009', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2010', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2011', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2012', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2013', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2014', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2015', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2016', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2017', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2018', 1);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2019', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2020', 0);
quit;
data clean;
set have;
if find(academic_period, "COLLEGE") then
do;
year_index=3;
col="University";
end;
else
do;
year_index=2;
col="College";
end;
semester=scan(academic_period, 1, "_");
year=input(scan(academic_period, year_index, "_"), 8.);
run;
proc sort data=clean;
by gender year semester;
run;
proc transpose data=clean out=wide;
by gender year semester;
id col;
var enrolled;
run;
You could keep year/semester as one column but using two seems cleaner to me and I think it may likely come in useful later on for reporting. Plus it's easy to combine/split as needed.
Reeza,
Thanks for posting, this is what I was looking for. I'm still figuring proc transpose out so I got stumped with what I had to do before the proc transpose.
Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.