BookmarkSubscribeRSS Feed
🔒 This topic is solved and locked. Need further help from the community? Please sign in and ask a new question.
DavidPhillips2
Rhodochrosite | Level 12

Is there a way to use a conditional proc transpose and an array to convert the table have to want?  When I split the datasets into different datasets to merge them together after I'm running into environment constraints.  The general logic is to split a field into two fields based on whether it has the word college or not.

 

/*Step: 1 Transpose a wide data set to create dataset have*/
/*Step: 2 Split set have into sepeate datasets*/
/*Step: 3 Create set want from separate datsets*/

proc sql;
create table have (
	gender varchar(50),
	academic_period varchar(50),
	enrolled num(1)
);
quit;
proc sql;

insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2008', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2009', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2010', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2011', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2012', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2013', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2014', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2015', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2016', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2017', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2018', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2019', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2020', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2008', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2009', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2010', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2011', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2012', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2013', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2014', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2015', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2016', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2017', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2018', 1);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2019', 0);
insert into have (gender, academic_period, enrolled) values ('M', 'FALL_COLLEGE_2020', 0);
quit;


proc sql;
create table want (
	gender varchar(50),
	academic_period varchar(50),
	enrolled_university num(1),
	enrolled_college num(1)
);
quit;

proc sql;

insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2008', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2009', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2010', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2011', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2012', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2013', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2014', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2015', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2016', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2017', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2018', 0, 1);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2019', 0, 0);
insert into want (gender, academic_period, enrolled_university, enrolled_college) values ('M', 'FALL_2020', 0, 0);
quit;
1 ACCEPTED SOLUTION

Accepted Solutions
Reeza
Super User

Does this do what you're looking for?

 

  • Split Semester and Year into their own fields to uniquely identify each semester/year for merging/alignment
  • Assign values of College and University to each row to allow for them to be the column headers
  • Use PROC TRANSPOSE to flip data

 

proc sql;
	create table have (gender varchar(50), academic_period varchar(50), enrolled 
		num(1) );
quit;

proc sql;
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2008', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2009', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2010', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2011', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2012', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2013', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2014', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2015', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2016', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2017', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2018', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2019', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2020', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2008', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2009', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2010', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2011', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2012', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2013', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2014', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2015', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2016', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2017', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2018', 1);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2019', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2020', 0);
quit;

data clean;
	set have;

	if find(academic_period, "COLLEGE") then
		do;
			year_index=3;
			col="University";
		end;
	else
		do;
			year_index=2;
			col="College";
		end;
	semester=scan(academic_period, 1, "_");
	year=input(scan(academic_period, year_index, "_"), 8.);
run;

proc sort data=clean; 
by gender year semester;
run;

proc transpose data=clean out=wide;
by gender year semester;
id col;
var enrolled;
run;

You could keep year/semester as one column but using two seems cleaner to me and I think it may likely come in useful later on for reporting. Plus it's easy to combine/split as needed.

View solution in original post

5 REPLIES 5
Tom
Super User Tom
Super User

I don't see how this is either transposing or splitting?  Perhaps you just didn't included enough different values in your input ENROLLED variable?  Let's insert a few 1's and see if the results and you can tell us if this is what you meant.

data have;
  input gender :$50. academic_period :$50. enrolled;
cards;
M FALL_2008 1
M FALL_2009 0
M FALL_2010 0
M FALL_2011 1
M FALL_2012 0
M FALL_2013 0
M FALL_2014 0
M FALL_2015 0
M FALL_2016 0
M FALL_2017 0
M FALL_2018 0
M FALL_2019 0
M FALL_2020 0
M FALL_COLLEGE_2008 0
M FALL_COLLEGE_2009 1
M FALL_COLLEGE_2010 0
M FALL_COLLEGE_2011 0
M FALL_COLLEGE_2012 0
M FALL_COLLEGE_2013 0
M FALL_COLLEGE_2014 0
M FALL_COLLEGE_2015 0
M FALL_COLLEGE_2016 0
M FALL_COLLEGE_2017 0
M FALL_COLLEGE_2018 1
M FALL_COLLEGE_2019 0
M FALL_COLLEGE_2020 0
;

data want;
  set have;
  enrolled_university=0;
  enrolled_college=0;
  if index(academic_period,'COLLEGE') then enrolled_college=enrolled;
  else enrolled_university=enrolled;
run;

proc print data=want;
run;

Results:

                                                   enrolled_    enrolled_
Obs    gender    academic_period      enrolled    university     college

  1      M       FALL_2008                1            1            0
  2      M       FALL_2009                0            0            0
  3      M       FALL_2010                0            0            0
  4      M       FALL_2011                1            1            0
  5      M       FALL_2012                0            0            0
  6      M       FALL_2013                0            0            0
  7      M       FALL_2014                0            0            0
  8      M       FALL_2015                0            0            0
  9      M       FALL_2016                0            0            0
 10      M       FALL_2017                0            0            0
 11      M       FALL_2018                0            0            0
 12      M       FALL_2019                0            0            0
 13      M       FALL_2020                0            0            0
 14      M       FALL_COLLEGE_2008        0            0            0
 15      M       FALL_COLLEGE_2009        1            0            1
 16      M       FALL_COLLEGE_2010        0            0            0
 17      M       FALL_COLLEGE_2011        0            0            0
 18      M       FALL_COLLEGE_2012        0            0            0
 19      M       FALL_COLLEGE_2013        0            0            0
 20      M       FALL_COLLEGE_2014        0            0            0
 21      M       FALL_COLLEGE_2015        0            0            0
 22      M       FALL_COLLEGE_2016        0            0            0
 23      M       FALL_COLLEGE_2017        0            0            0
 24      M       FALL_COLLEGE_2018        1            0            1
 25      M       FALL_COLLEGE_2019        0            0            0
 26      M       FALL_COLLEGE_2020        0            0            0
Reeza
Super User
Are you by chance missing an ID field that would allow these to be mapped to the same person over the same semester/year?
DavidPhillips2
Rhodochrosite | Level 12

Academic_period is the unique key.  (Id + academic period would be the unique key in a larger dataset.)

Reeza
Super User

Does this do what you're looking for?

 

  • Split Semester and Year into their own fields to uniquely identify each semester/year for merging/alignment
  • Assign values of College and University to each row to allow for them to be the column headers
  • Use PROC TRANSPOSE to flip data

 

proc sql;
	create table have (gender varchar(50), academic_period varchar(50), enrolled 
		num(1) );
quit;

proc sql;
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2008', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2009', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2010', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2011', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2012', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2013', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2014', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2015', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2016', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2017', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2018', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2019', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 'FALL_2020', 
		0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2008', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2009', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2010', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2011', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2012', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2013', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2014', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2015', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2016', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2017', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2018', 1);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2019', 0);
	insert into have (gender, academic_period, enrolled) values ('M', 
		'FALL_COLLEGE_2020', 0);
quit;

data clean;
	set have;

	if find(academic_period, "COLLEGE") then
		do;
			year_index=3;
			col="University";
		end;
	else
		do;
			year_index=2;
			col="College";
		end;
	semester=scan(academic_period, 1, "_");
	year=input(scan(academic_period, year_index, "_"), 8.);
run;

proc sort data=clean; 
by gender year semester;
run;

proc transpose data=clean out=wide;
by gender year semester;
id col;
var enrolled;
run;

You could keep year/semester as one column but using two seems cleaner to me and I think it may likely come in useful later on for reporting. Plus it's easy to combine/split as needed.

DavidPhillips2
Rhodochrosite | Level 12

Reeza,

 

Thanks for posting, this is what I was looking for.  I'm still figuring proc transpose out so I got stumped with what I had to do before the proc transpose.

sas-innovate-2024.png

Available on demand!

Missed SAS Innovate Las Vegas? Watch all the action for free! View the keynotes, general sessions and 22 breakouts on demand.

 

Register now!

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

Click image to register for webinarClick image to register for webinar

Classroom Training Available!

Select SAS Training centers are offering in-person courses. View upcoming courses for:

View all other training opportunities.

Discussion stats
  • 5 replies
  • 417 views
  • 2 likes
  • 3 in conversation