Solved: Re: Vertically Stack Dataset and Only Keeping Rows with Matching IDs

bcp2122 · Posted 04-16-2020 04:37 AM

I am looking to stack 5 years worth of data: each year is a different table and they all have the same columns/variables. In the final dataset, I only want to keep observations for subjects who were there for the 5 years.

EXAMPLE:

Table 1:

ID	First Name	Count	Year
1	Alpha	5	2015
2	Beta	12	2015
3	Gamma	4	2015

Table 2:

ID	First Name	Count	Year
1	Alpha	32	2016
3	Gamma	17	2016
4	Delta	2	2016

Table 3:

ID	First Name	Count	Year
1	Alpha	14	2017
2	Beta	3	2017
3	Gamma	1	2017

FINAL OUTPUT:

ID	First Name	Count	Year
1	Alpha	5	2015
1	Alpha	32	2016
1	Alpha	14	2017
3	Gamma	4	2015
3	Gamma	17	2016
3	Gamma	1	2017

ed_sas_member · Posted 04-16-2020 04:52 AM

Hi @bcp2122

Here is an approach to achieve this:

data table1;
	infile datalines dlm="09"x;
	input ID First_Name $	Count	Year;
	datalines;
1	Alpha	5	2015
2	Beta	12	2015
3	Gamma	4	2015
;
run;
 
data table2;
	infile datalines dlm="09"x;
	input ID First_Name $	Count	Year;
	datalines;
1	Alpha	32	2016
3	Gamma	17	2016
4	Delta	2	2016
;
run;

data table3;
	infile datalines dlm="09"x;
	input ID First_Name $	Count	Year;
	datalines;
1	Alpha	14	2017
2	Beta	3	2017
3	Gamma	1	2017
;
run;

data want;
	set table1 table2 table3;
run;

proc sql;
	select *
	from want
	group by id 
	having count(id) = (select count(distinct year) from want);
quit;

Best,

View solution in original post

ed_sas_member · Posted 04-16-2020 04:52 AM

Hi @bcp2122

Here is an approach to achieve this:

data table1;
	infile datalines dlm="09"x;
	input ID First_Name $	Count	Year;
	datalines;
1	Alpha	5	2015
2	Beta	12	2015
3	Gamma	4	2015
;
run;
 
data table2;
	infile datalines dlm="09"x;
	input ID First_Name $	Count	Year;
	datalines;
1	Alpha	32	2016
3	Gamma	17	2016
4	Delta	2	2016
;
run;

data table3;
	infile datalines dlm="09"x;
	input ID First_Name $	Count	Year;
	datalines;
1	Alpha	14	2017
2	Beta	3	2017
3	Gamma	1	2017
;
run;

data want;
	set table1 table2 table3;
run;

proc sql;
	select *
	from want
	group by id 
	having count(id) = (select count(distinct year) from want);
quit;

Best,

yabwon · Posted 04-16-2020 05:09 AM

HI,

here is another approach (double DoW-loop + hash):

data table1;
	infile datalines dlm=" ";
	input ID First_Name : $	Count	Year;
	datalines;
1 Alpha 5 2015
2 Beta 12 2015
3 Gamma 4 2015
;
run;
 
data table2;
	infile datalines dlm=" ";
	input ID First_Name : $	Count	Year;
	datalines;
1 Alpha 32 2016
3 Gamma 17 2016
4 Delta 2 2016
;
run;

data table3;
	infile datalines dlm=" ";
	input ID First_Name : $	Count	Year;
	datalines;
1 Alpha 14 2017
2 Beta 3 2017
3 Gamma 1 2017
;
run;

%let n = 3; /* the number of years */
data want;
  declare hash FirstName();
  FirstName.defineKey("First_Name");
  FirstName.defineData("_IORC_");
  FirstName.defineDone();

  do until(EOF);
	 set table1 table2 table3 end=EOF;
   if 0 ne FirstName.find() then _IORC_ = 1;
                            else _IORC_ + 1;  
   _N_ = FirstName.replace();
  end;

  EOF = 0;
  do until(EOF);
	 set table1 table2 table3 end=EOF;
   _N_ = FirstName.find(); 
   if _IORC_ => &n. then output;
  end;
  
  stop;
run;
proc print;
run;

All the best

Bart

_______________
Polish SAS Users Group: www.polsug.com and communities.sas.com/polsug

"SAS Packages: the way to share" at SGF2020 Proceedings (the latest version), GitHub Repository, and YouTube Video.
Hands-on-Workshop: "Share your code with SAS Packages"
"My First SAS Package: A How-To" at SGF2021 Proceedings

SAS Ballot Ideas: one: SPF in SAS, two, and three
SAS Documentation

Kurt_Bremser · Posted 04-16-2020 05:14 AM

For the pure fun of it, using transpose and merge:

%macro process;
%do i = 1 %to 3;

proc sort data=table&i.;
by first_name;
run;

proc transpose data=table&i. out=trans&i. (drop=_name_) prefix=_;
var count;
id year;
by first_name;
run;

%end;

data all;
merge
%do i = 1 %to 3;
  trans&i. (in=in&i.)
%end;
;
by first_name;
if
%do i = 1 %to 3;
  %if &i. ne 1 %then and ;
  in&i.
%end;
;
run;

proc transpose data=all out=pre_want (rename=(col1=count));
by first_name;
var _:;
run;

data want;
set pre_want;
year = input(substr(_name_,2),best.);
drop _name_;
run;
%mend;

%process

😉

Maxims of Maximally Efficient SAS Programmers
How to convert datasets to data steps
The macro for direct download as ZIP
How to post code
Please vote for Provide Sequential Search Capability for Hash Objects
How to deal with locked files on UNIX

yabwon · Posted 04-16-2020 05:35 AM

Kurt,

following your approach

data have;
  set table1 table2 table3;
run;

proc sort data = have ;
  by ID First_Name Year;
run;

proc transpose data = have out = haveT(drop = _name_) prefix=y_;
  by ID First_Name;
  id year;
  var Count;
run;

data want;
  set haveT;
  array y_[*] y_:;
  if not(. in y_);
  do _N_ = lbound(y_) to hbound(y_);
     year = input(compress(vname(y_[_N_]),,"kd"), best32.);
     Count = y_[_N_];
     output;
  end;
  keep ID First_Name Year Count;
run;
proc print;
run;

🙂 🙂

All the best

Bart

_______________
Polish SAS Users Group: www.polsug.com and communities.sas.com/polsug

"SAS Packages: the way to share" at SGF2020 Proceedings (the latest version), GitHub Repository, and YouTube Video.
Hands-on-Workshop: "Share your code with SAS Packages"
"My First SAS Package: A How-To" at SGF2021 Proceedings

SAS Ballot Ideas: one: SPF in SAS, two, and three
SAS Documentation

Ksharp · Posted 04-16-2020 06:50 AM

data table1;
	infile datalines expandtabs truncover;
	input ID First_Name $	Count	Year;
	datalines;
1	Alpha	5	2015
2	Beta	12	2015
3	Gamma	4	2015
;
run;
 
data table2;
	infile datalines expandtabs truncover;
	input ID First_Name $	Count	Year;
	datalines;
1	Alpha	32	2016
3	Gamma	17	2016
4	Delta	2	2016
;
run;

data table3;
	infile datalines expandtabs truncover;
	input ID First_Name $	Count	Year;
	datalines;
1	Alpha	14	2017
2	Beta	3	2017
3	Gamma	1	2017
;
run;
proc sql;
create table want as
select * from
(
select * from table1
union all corr
select * from table2
union all corr
select * from table3
)
where id in
(
select id from table1
intersect
select id from table2
intersect
select id from table3
)
order by 1,4;
quit;

Tom · Posted 04-16-2020 02:11 PM

You can use a double dow loop. One to count and the other to re-read and either output or not.

data want;
  do _N_=1 by 1 until(last.id);
     set table1-table5;
     by id year;
  end;
  do until(last.id);
    set table1-table5;
    by id year;
    if _n_=5 then output;
  end;
run;

Catch up on SAS Innovate 2026

SAS Training: Just a Click Away