I want to use only collumns with at least five observations and a gap not greater than two years between observations, so I want to delete the first collumn and only keep the second and thirth.
data _NULL_;
set T01_input end=end_of_file;
by year;
array gaps {3} _TEMPORARY_;
array gaps_max {3} _TEMPORARY_;
array counts {3} _TEMPORARY_;
array cols {3} col1-col3;
do i=1 to 3;
if cols{i}=. then gaps{i}+1;
else gaps{i}=0;
if gaps{i}>gaps_max{i} then gaps_max{i}=gaps{i};
if cols{i}=1 then counts{i}+1;
end;
if end_of_file
then do i=1 to 3;
variable=vname(cols{i});
max=gaps_max{i};
count=counts{i};
put "Maximum number of consecutive gaps for" variable ": " max "and number of value: " count;
if max gt 2 and count ge 5 then call symput("variables_to_drop",symget('variables_to_drop')||" "||vname(cols{i}));
end;
run;
%put Variables to drop: &variables_to_drop;
proc sql;
create table T02_output as
select *
from T01_input(drop=&variables_to_Drop);
quit;
This will appear in the log:
Maximum number of consecutive gaps forcol1 : 3 and number of value: 5
Maximum number of consecutive gaps forcol2 : 2 and number of value: 5
Maximum number of consecutive gaps forcol3 : 0 and number of value: 8
Variables to drop: col1