BookmarkSubscribeRSS Feed
🔒 This topic is solved and locked. Need further help from the community? Please sign in and ask a new question.
DavidPhillips2
Rhodochrosite | Level 12

Is there a way to use a conditional proc transpose and an array to convert the table have to want?  When I split the datasets into different datasets to merge them together after I'm running into environment constraints.  The general logic is to split a field into two fields based on whether it has the word college or not.

 

/*Step: 1 Transpose a wide data set to create dataset have*/
/*Step: 2 Split set have into sepeate datasets*/
/*Step: 3 Create set want from separate datsets*/

proc sql;
create table have (
	gender varchar(50),
	academic_period varchar(50),
	enrolled num(1)
);
quit;
proc sql;

insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2008', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2009', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2010', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2011', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2012', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2013', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2014', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2015', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2016', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2017', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2018', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2019', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2020', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2008', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2009', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2010', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2011', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2012', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2013', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2014', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2015', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2016', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2017', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2018', 1);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2019', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2020', 0);
quit;


proc sql;
create table want (
	gender varchar(50),
	academic_period varchar(50),
	enrolled_university num(1),
	enrolled_college num(1)
);
quit;

proc sql;

insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2008', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2009', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2010', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2011', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2012', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2013', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2014', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2015', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2016', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2017', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2018', 0, 1);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2019', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2020', 0, 0);
quit;
1 ACCEPTED SOLUTION

Accepted Solutions
Reeza
Super User

Does this do what you're looking for?

 

  • Split Semester and Year into their own fields to uniquely identify each semester/year for merging/alignment
  • Assign values of College and University to each row to allow for them to be the column headers
  • Use PROC TRANSPOSE to flip data

 

proc sql;
	create table have (gender varchar(50), academic_period varchar(50), enrolled 
		num(1) );
quit;

proc sql;
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2008', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2009', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2010', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2011', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2012', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2013', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2014', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2015', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2016', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2017', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2018', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2019', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2020', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2008', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2009', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2010', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2011', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2012', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2013', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2014', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2015', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2016', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2017', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2018', 1);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2019', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2020', 0);
quit;

data clean;
	set have;

	if find(academic_period, "COLLEGE") then
		do;
			year_index=3;
			col="University";
		end;
	else
		do;
			year_index=2;
			col="College";
		end;
	semester=scan(academic_period, 1, "_");
	year=input(scan(academic_period, year_index, "_"), 8.);
run;

proc sort data=clean; 
by gender year semester;
run;

proc transpose data=clean out=wide;
by gender year semester;
id col;
var enrolled;
run;

You could keep year/semester as one column but using two seems cleaner to me and I think it may likely come in useful later on for reporting. Plus it's easy to combine/split as needed.

View solution in original post

5 REPLIES 5
Tom
Super User Tom
Super User

I don't see how this is either transposing or splitting?  Perhaps you just didn't included enough different values in your input ENROLLED variable?  Let's insert a few 1's and see if the results and you can tell us if this is what you meant.

data have;
  input gender :$50. academic_period :$50. enrolled;
cards;
M FALL_2008 1
M FALL_2009 0
M FALL_2010 0
M FALL_2011 1
M FALL_2012 0
M FALL_2013 0
M FALL_2014 0
M FALL_2015 0
M FALL_2016 0
M FALL_2017 0
M FALL_2018 0
M FALL_2019 0
M FALL_2020 0
M FALL_COLLEGE_2008 0
M FALL_COLLEGE_2009 1
M FALL_COLLEGE_2010 0
M FALL_COLLEGE_2011 0
M FALL_COLLEGE_2012 0
M FALL_COLLEGE_2013 0
M FALL_COLLEGE_2014 0
M FALL_COLLEGE_2015 0
M FALL_COLLEGE_2016 0
M FALL_COLLEGE_2017 0
M FALL_COLLEGE_2018 1
M FALL_COLLEGE_2019 0
M FALL_COLLEGE_2020 0
;

data want;
  set have;
  enrolled_university=0;
  enrolled_college=0;
  if index(academic_period,'COLLEGE') then enrolled_college=enrolled;
  else enrolled_university=enrolled;
run;

proc print data=want;
run;

Results:

                                                   enrolled_    enrolled_
Obs    gender    academic_period      enrolled    university     college

  1      M       FALL_2008                1            1            0
  2      M       FALL_2009                0            0            0
  3      M       FALL_2010                0            0            0
  4      M       FALL_2011                1            1            0
  5      M       FALL_2012                0            0            0
  6      M       FALL_2013                0            0            0
  7      M       FALL_2014                0            0            0
  8      M       FALL_2015                0            0            0
  9      M       FALL_2016                0            0            0
 10      M       FALL_2017                0            0            0
 11      M       FALL_2018                0            0            0
 12      M       FALL_2019                0            0            0
 13      M       FALL_2020                0            0            0
 14      M       FALL_COLLEGE_2008        0            0            0
 15      M       FALL_COLLEGE_2009        1            0            1
 16      M       FALL_COLLEGE_2010        0            0            0
 17      M       FALL_COLLEGE_2011        0            0            0
 18      M       FALL_COLLEGE_2012        0            0            0
 19      M       FALL_COLLEGE_2013        0            0            0
 20      M       FALL_COLLEGE_2014        0            0            0
 21      M       FALL_COLLEGE_2015        0            0            0
 22      M       FALL_COLLEGE_2016        0            0            0
 23      M       FALL_COLLEGE_2017        0            0            0
 24      M       FALL_COLLEGE_2018        1            0            1
 25      M       FALL_COLLEGE_2019        0            0            0
 26      M       FALL_COLLEGE_2020        0            0            0
Reeza
Super User
Are you by chance missing an ID field that would allow these to be mapped to the same person over the same semester/year?
DavidPhillips2
Rhodochrosite | Level 12

Academic_period is the unique key.  (Id + academic period would be the unique key in a larger dataset.)

Reeza
Super User

Does this do what you're looking for?

 

  • Split Semester and Year into their own fields to uniquely identify each semester/year for merging/alignment
  • Assign values of College and University to each row to allow for them to be the column headers
  • Use PROC TRANSPOSE to flip data

 

proc sql;
	create table have (gender varchar(50), academic_period varchar(50), enrolled 
		num(1) );
quit;

proc sql;
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2008', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2009', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2010', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2011', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2012', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2013', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2014', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2015', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2016', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2017', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2018', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2019', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2020', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2008', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2009', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2010', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2011', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2012', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2013', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2014', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2015', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2016', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2017', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2018', 1);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2019', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2020', 0);
quit;

data clean;
	set have;

	if find(academic_period, "COLLEGE") then
		do;
			year_index=3;
			col="University";
		end;
	else
		do;
			year_index=2;
			col="College";
		end;
	semester=scan(academic_period, 1, "_");
	year=input(scan(academic_period, year_index, "_"), 8.);
run;

proc sort data=clean; 
by gender year semester;
run;

proc transpose data=clean out=wide;
by gender year semester;
id col;
var enrolled;
run;

You could keep year/semester as one column but using two seems cleaner to me and I think it may likely come in useful later on for reporting. Plus it's easy to combine/split as needed.

DavidPhillips2
Rhodochrosite | Level 12

Reeza,

 

Thanks for posting, this is what I was looking for.  I'm still figuring proc transpose out so I got stumped with what I had to do before the proc transpose.

SAS Innovate 2025: Register Now

Registration is now open for SAS Innovate 2025 , our biggest and most exciting global event of the year! Join us in Orlando, FL, May 6-9.
Sign up by Dec. 31 to get the 2024 rate of just $495.
Register now!

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

SAS Training: Just a Click Away

 Ready to level-up your skills? Choose your own adventure.

Browse our catalog!

Discussion stats
  • 5 replies
  • 763 views
  • 2 likes
  • 3 in conversation