data work.delete;
input firm $ year profit;
datalines; A 2001 90 A 2002 80 A 2003 70 A 2004 90 A 2005 60 B 2001 70 B 2002 80 B 2003 90 C 2001 70 C 2002 80 C 2003 90 C 2004 60 C 2005 80 D 2001 10 D 2002 20
I wish to delete those firms that does not have observation for 5 years. My desired output is A 2001 90 A 2002 80 A 2003 70 A 2004 90 A 2005 60 C 2001 70 C 2002 80 C 2003 90 C 2004 60 C 2005 80
data work.delete;
input firm $ year profit;
datalines;
A 2001 90
A 2002 80
A 2003 70
A 2004 90
A 2005 60
B 2001 70
B 2002 80
B 2003 90
C 2001 70
C 2002 80
C 2003 90
C 2004 60
C 2005 80
D 2001 10
D 2002 20
;
proc sql;
create table want as
select *
from delete
group by firm
having count(distinct year)>=5;
quit;
data work.delete;
input firm $ year profit;
datalines;
A 2001 90
A 2002 80
A 2003 70
A 2004 90
A 2005 60
B 2001 70
B 2002 80
B 2003 90
C 2001 70
C 2002 80
C 2003 90
C 2004 60
C 2005 80
D 2001 10
D 2002 20
;
proc sql;
create table want as
select *
from delete
group by firm
having count(distinct year)>=5;
quit;
data work.delete;
input firm $ year profit;
datalines;
A 2001 90
A 2002 80
A 2003 70
A 2004 90
A 2005 60
B 2001 70
B 2002 80
B 2003 90
C 2001 70
C 2002 80
C 2003 90
C 2004 60
C 2005 80
D 2001 10
D 2002 20
;
data _null_;
if _n_=1 then do;
dcl hash H (dataset:'delete',ordered: "A", multidata:'y') ;
h.definekey ("firm") ;
h.definedata (all:'y') ;
h.definedone () ;
end;
_n_=0;
do until(last.firm);
set delete end=lr;
by firm year;
if first.year then _n_+1;
end;
if _n_<5 then h.remove();
if lr then h.output(dataset:'want');
run;