Is there a way to use a conditional proc transpose and an array to convert the table have to want? When I split the datasets into different datasets to merge them together after I'm running into environment constraints. The general logic is to split a field into two fields based on whether it has the word college or not.
/*Step: 1 Transpose a wide data set to create dataset have*/
/*Step: 2 Split set have into sepeate datasets*/
/*Step: 3 Create set want from separate datsets*/
proc sql;
create table have (
gender varchar(50),
academic_period varchar(50),
enrolled num(1)
);
quit;
proc sql;
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2008', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2009', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2010', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2011', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2012', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2013', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2014', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2015', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2016', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2017', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2018', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2019', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2020', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2008', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2009', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2010', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2011', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2012', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2013', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2014', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2015', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2016', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2017', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2018', 1);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2019', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2020', 0);
quit;
proc sql;
create table want (
gender varchar(50),
academic_period varchar(50),
enrolled_university num(1),
enrolled_college num(1)
);
quit;
proc sql;
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2008', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2009', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2010', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2011', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2012', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2013', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2014', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2015', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2016', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2017', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2018', 0, 1);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2019', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2020', 0, 0);
quit;
Does this do what you're looking for?
proc sql;
create table have (gender varchar(50), academic_period varchar(50), enrolled
num(1) );
quit;
proc sql;
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2008',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2009',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2010',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2011',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2012',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2013',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2014',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2015',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2016',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2017',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2018',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2019',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2020',
0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2008', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2009', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2010', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2011', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2012', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2013', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2014', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2015', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2016', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2017', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2018', 1);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2019', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2020', 0);
quit;
data clean;
set have;
if find(academic_period, "COLLEGE") then
do;
year_index=3;
col="University";
end;
else
do;
year_index=2;
col="College";
end;
semester=scan(academic_period, 1, "_");
year=input(scan(academic_period, year_index, "_"), 8.);
run;
proc sort data=clean;
by gender year semester;
run;
proc transpose data=clean out=wide;
by gender year semester;
id col;
var enrolled;
run;
You could keep year/semester as one column but using two seems cleaner to me and I think it may likely come in useful later on for reporting. Plus it's easy to combine/split as needed.
I don't see how this is either transposing or splitting? Perhaps you just didn't included enough different values in your input ENROLLED variable? Let's insert a few 1's and see if the results and you can tell us if this is what you meant.
data have;
input gender :$50. academic_period :$50. enrolled;
cards;
M FALL_2008 1
M FALL_2009 0
M FALL_2010 0
M FALL_2011 1
M FALL_2012 0
M FALL_2013 0
M FALL_2014 0
M FALL_2015 0
M FALL_2016 0
M FALL_2017 0
M FALL_2018 0
M FALL_2019 0
M FALL_2020 0
M FALL_COLLEGE_2008 0
M FALL_COLLEGE_2009 1
M FALL_COLLEGE_2010 0
M FALL_COLLEGE_2011 0
M FALL_COLLEGE_2012 0
M FALL_COLLEGE_2013 0
M FALL_COLLEGE_2014 0
M FALL_COLLEGE_2015 0
M FALL_COLLEGE_2016 0
M FALL_COLLEGE_2017 0
M FALL_COLLEGE_2018 1
M FALL_COLLEGE_2019 0
M FALL_COLLEGE_2020 0
;
data want;
set have;
enrolled_university=0;
enrolled_college=0;
if index(academic_period,'COLLEGE') then enrolled_college=enrolled;
else enrolled_university=enrolled;
run;
proc print data=want;
run;
Results:
enrolled_ enrolled_ Obs gender academic_period enrolled university college 1 M FALL_2008 1 1 0 2 M FALL_2009 0 0 0 3 M FALL_2010 0 0 0 4 M FALL_2011 1 1 0 5 M FALL_2012 0 0 0 6 M FALL_2013 0 0 0 7 M FALL_2014 0 0 0 8 M FALL_2015 0 0 0 9 M FALL_2016 0 0 0 10 M FALL_2017 0 0 0 11 M FALL_2018 0 0 0 12 M FALL_2019 0 0 0 13 M FALL_2020 0 0 0 14 M FALL_COLLEGE_2008 0 0 0 15 M FALL_COLLEGE_2009 1 0 1 16 M FALL_COLLEGE_2010 0 0 0 17 M FALL_COLLEGE_2011 0 0 0 18 M FALL_COLLEGE_2012 0 0 0 19 M FALL_COLLEGE_2013 0 0 0 20 M FALL_COLLEGE_2014 0 0 0 21 M FALL_COLLEGE_2015 0 0 0 22 M FALL_COLLEGE_2016 0 0 0 23 M FALL_COLLEGE_2017 0 0 0 24 M FALL_COLLEGE_2018 1 0 1 25 M FALL_COLLEGE_2019 0 0 0 26 M FALL_COLLEGE_2020 0 0 0
Academic_period is the unique key. (Id + academic period would be the unique key in a larger dataset.)
Does this do what you're looking for?
proc sql;
create table have (gender varchar(50), academic_period varchar(50), enrolled
num(1) );
quit;
proc sql;
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2008',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2009',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2010',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2011',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2012',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2013',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2014',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2015',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2016',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2017',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2018',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2019',
0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2020',
0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2008', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2009', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2010', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2011', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2012', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2013', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2014', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2015', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2016', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2017', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2018', 1);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2019', 0);
insert into have (gender, academic_period, enrolled) values ('M',
'FALL_COLLEGE_2020', 0);
quit;
data clean;
set have;
if find(academic_period, "COLLEGE") then
do;
year_index=3;
col="University";
end;
else
do;
year_index=2;
col="College";
end;
semester=scan(academic_period, 1, "_");
year=input(scan(academic_period, year_index, "_"), 8.);
run;
proc sort data=clean;
by gender year semester;
run;
proc transpose data=clean out=wide;
by gender year semester;
id col;
var enrolled;
run;
You could keep year/semester as one column but using two seems cleaner to me and I think it may likely come in useful later on for reporting. Plus it's easy to combine/split as needed.
Reeza,
Thanks for posting, this is what I was looking for. I'm still figuring proc transpose out so I got stumped with what I had to do before the proc transpose.
Registration is now open for SAS Innovate 2025 , our biggest and most exciting global event of the year! Join us in Orlando, FL, May 6-9.
Sign up by Dec. 31 to get the 2024 rate of just $495.
Register now!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.