I am looking to stack 5 years worth of data: each year is a different table and they all have the same columns/variables. In the final dataset, I only want to keep observations for subjects who were there for the 5 years.
EXAMPLE:
Table 1:
ID | First Name | Count | Year |
1 | Alpha | 5 | 2015 |
2 | Beta | 12 | 2015 |
3 | Gamma | 4 | 2015 |
Table 2:
ID | First Name | Count | Year |
1 | Alpha | 32 | 2016 |
3 | Gamma | 17 | 2016 |
4 | Delta | 2 | 2016 |
Table 3:
ID | First Name | Count | Year |
1 | Alpha | 14 | 2017 |
2 | Beta | 3 | 2017 |
3 | Gamma | 1 | 2017 |
FINAL OUTPUT:
ID | First Name | Count | Year |
1 | Alpha | 5 | 2015 |
1 | Alpha | 32 | 2016 |
1 | Alpha | 14 | 2017 |
3 | Gamma | 4 | 2015 |
3 | Gamma | 17 | 2016 |
3 | Gamma | 1 | 2017 |
Hi @bcp2122
Here is an approach to achieve this:
data table1;
infile datalines dlm="09"x;
input ID First_Name $ Count Year;
datalines;
1 Alpha 5 2015
2 Beta 12 2015
3 Gamma 4 2015
;
run;
data table2;
infile datalines dlm="09"x;
input ID First_Name $ Count Year;
datalines;
1 Alpha 32 2016
3 Gamma 17 2016
4 Delta 2 2016
;
run;
data table3;
infile datalines dlm="09"x;
input ID First_Name $ Count Year;
datalines;
1 Alpha 14 2017
2 Beta 3 2017
3 Gamma 1 2017
;
run;
data want;
set table1 table2 table3;
run;
proc sql;
select *
from want
group by id
having count(id) = (select count(distinct year) from want);
quit;
Best,
Hi @bcp2122
Here is an approach to achieve this:
data table1;
infile datalines dlm="09"x;
input ID First_Name $ Count Year;
datalines;
1 Alpha 5 2015
2 Beta 12 2015
3 Gamma 4 2015
;
run;
data table2;
infile datalines dlm="09"x;
input ID First_Name $ Count Year;
datalines;
1 Alpha 32 2016
3 Gamma 17 2016
4 Delta 2 2016
;
run;
data table3;
infile datalines dlm="09"x;
input ID First_Name $ Count Year;
datalines;
1 Alpha 14 2017
2 Beta 3 2017
3 Gamma 1 2017
;
run;
data want;
set table1 table2 table3;
run;
proc sql;
select *
from want
group by id
having count(id) = (select count(distinct year) from want);
quit;
Best,
HI,
here is another approach (double DoW-loop + hash):
data table1;
infile datalines dlm=" ";
input ID First_Name : $ Count Year;
datalines;
1 Alpha 5 2015
2 Beta 12 2015
3 Gamma 4 2015
;
run;
data table2;
infile datalines dlm=" ";
input ID First_Name : $ Count Year;
datalines;
1 Alpha 32 2016
3 Gamma 17 2016
4 Delta 2 2016
;
run;
data table3;
infile datalines dlm=" ";
input ID First_Name : $ Count Year;
datalines;
1 Alpha 14 2017
2 Beta 3 2017
3 Gamma 1 2017
;
run;
%let n = 3; /* the number of years */
data want;
declare hash FirstName();
FirstName.defineKey("First_Name");
FirstName.defineData("_IORC_");
FirstName.defineDone();
do until(EOF);
set table1 table2 table3 end=EOF;
if 0 ne FirstName.find() then _IORC_ = 1;
else _IORC_ + 1;
_N_ = FirstName.replace();
end;
EOF = 0;
do until(EOF);
set table1 table2 table3 end=EOF;
_N_ = FirstName.find();
if _IORC_ => &n. then output;
end;
stop;
run;
proc print;
run;
All the best
Bart
For the pure fun of it, using transpose and merge:
%macro process;
%do i = 1 %to 3;
proc sort data=table&i.;
by first_name;
run;
proc transpose data=table&i. out=trans&i. (drop=_name_) prefix=_;
var count;
id year;
by first_name;
run;
%end;
data all;
merge
%do i = 1 %to 3;
trans&i. (in=in&i.)
%end;
;
by first_name;
if
%do i = 1 %to 3;
%if &i. ne 1 %then and ;
in&i.
%end;
;
run;
proc transpose data=all out=pre_want (rename=(col1=count));
by first_name;
var _:;
run;
data want;
set pre_want;
year = input(substr(_name_,2),best.);
drop _name_;
run;
%mend;
%process
😉
Kurt,
following your approach
data have;
set table1 table2 table3;
run;
proc sort data = have ;
by ID First_Name Year;
run;
proc transpose data = have out = haveT(drop = _name_) prefix=y_;
by ID First_Name;
id year;
var Count;
run;
data want;
set haveT;
array y_[*] y_:;
if not(. in y_);
do _N_ = lbound(y_) to hbound(y_);
year = input(compress(vname(y_[_N_]),,"kd"), best32.);
Count = y_[_N_];
output;
end;
keep ID First_Name Year Count;
run;
proc print;
run;
🙂 🙂
All the best
Bart
data table1;
infile datalines expandtabs truncover;
input ID First_Name $ Count Year;
datalines;
1 Alpha 5 2015
2 Beta 12 2015
3 Gamma 4 2015
;
run;
data table2;
infile datalines expandtabs truncover;
input ID First_Name $ Count Year;
datalines;
1 Alpha 32 2016
3 Gamma 17 2016
4 Delta 2 2016
;
run;
data table3;
infile datalines expandtabs truncover;
input ID First_Name $ Count Year;
datalines;
1 Alpha 14 2017
2 Beta 3 2017
3 Gamma 1 2017
;
run;
proc sql;
create table want as
select * from
(
select * from table1
union all corr
select * from table2
union all corr
select * from table3
)
where id in
(
select id from table1
intersect
select id from table2
intersect
select id from table3
)
order by 1,4;
quit;
You can use a double dow loop. One to count and the other to re-read and either output or not.
data want;
do _N_=1 by 1 until(last.id);
set table1-table5;
by id year;
end;
do until(last.id);
set table1-table5;
by id year;
if _n_=5 then output;
end;
run;
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.