Hi,
Could someone help me with the SAS code to find associations(ie. cross tabulations)
between exposed variables? Exposed(exp=1), unexposed=0. Actually, I have a dataset with 30 idchems (over 100,000 obs in total) but I am using only
4 of them as a test to see if the method will work. I used proc transpose to read the idchems as
idchem1=990005........... idchem4=211700 and finally found the cross tabulations using proc freq
shown in the SAS output. Proc transpose worked well with the test dataset and I was able to find
the associations(cross tabulations) as shown in output(attached). Proc transpose for the larger dataset
was a problem; SAS couldn't transpose and display all the dataset for the 30 idchems;
it transposed and displayed a dataset having only 4 idchems. Could someone help me with a SAS code
for another method without the proc transpose or even if with a proc transpose a shorter datastep ie.
one that incorporates the dataset with 30 idchems at a time.
In summary, a SAS code/method that will help me eventually to find the cross associations for the exposed(1)
and unexposed(0) other than the method I used, which of course will not be very efficient for the larger
data with 30 idchems. SAS code used is found below and results is attached.
Thanks in advance.
ak.
data idnew1;
input id$ job idchem;
datalines;
os1 1 990005
os1 1 990021
os1 1 211700
os1 2 211700
os1 2 990021
os1 2 210701
os1 2 990005
os2 1 210701
os2 1 990005
os2 2 990021
os2 3 210701
os2 3 990005
os3 3 210701
os3 1 211700
os4 1 210701
os4 1 990005
os4 1 211700
;
run;
/*TRANSPOSING VARIABLES*/
proc sort data=idnew1; by id job;
proc transpose data=idnew1 out=idnew1b prefix=idchem;
by id job;
/*id job;*/
var idchem;
run;
/*Cla exposure*/
data clat;
set idnew1b;
if idchem1='990005' or idchem2='990005' or idchem3='990005' or idchem4='990005' then cla_exp=1;
else cla_exp=0;
id_job=catx('_', id, job);
put _all_;
drop _name_;
run;
proc print data=clat;
Title "Cla exposure";
run;
/*Bio exposure*/
data biot;
set idnew1b;
if idchem1='990021' or idchem2='990021' or idchem3='990021' or idchem4='990021' then bio_exp=1;
else bio_exp=0;
id_job=catx('_', id, job);
put _all_;
drop _name_;
run;
Title "Bio exposure";
proc print data=biot;
run;
/*Amo exposure*/
data amot;
set idnew1b;
if idchem1='210701' or idchem2='210701' or idchem3='210701' or idchem4='210701' then amo_exp=1;
else amo_exp=0;
id_job=catx('_', id, job);
put _all_;
drop _name_;
run;
proc print data=amot;
Title "Amo exposure";
run;
/*Chl exposure*/
data chlt;
set idnew1b;
if idchem1='211700' or idchem2='211700' or idchem3='211700'or idchem4='211700' then chl_exp=1;
else chl_exp=0;
id_job=catx('_', id, job);
put _all_;
drop _name_;
run;
proc print data=chlt;
Title "chl exposure";
run;
/* Merging clat,biot and amot files*/
data mlt; merge clat biot amot chlt;
run;
proc print data=mlt;
Title "Merged exposure files for cla ,bio, amo and chl pollutants";
run;
/*CROSS ASSOCIATIONS:clat,biot,amot,chlt*/
proc freq data=mlt;
tables cla_exp*bio_exp;
tables cla_exp*amo_exp;
tables cla_exp*chl_exp;
tables bio_exp*cla_exp;
tables bio_exp*amo_exp;
tables bio_exp*chl_exp;
run;
Hi @ak2011
You can try this. Hope this help!
The only thing to adapt is the decode of idchem variable at the beginning of the program in order to create the corresponding character variable with the flag name.
Best,
data idnew1;
input id$ job idchem;
id_job=catx('_', id, job);
length idchem_char $7;
if idchem = 990005 then idchem_char = 'cla_exp';
else if idchem = 990021 then idchem_char = 'bio_exp';
else if idchem = 210701 then idchem_char = 'amo_exp';
else if idchem = 211700 then idchem_char = 'chl_exp'; /* to adapt */
datalines;
os1 1 990005
os1 1 990021
os1 1 211700
os1 2 211700
os1 2 990021
os1 2 210701
os1 2 990005
os2 1 210701
os2 1 990005
os2 2 990021
os2 3 210701
os2 3 990005
os3 3 210701
os3 1 211700
os4 1 210701
os4 1 990005
os4 1 211700
;
run;
/* 1. Generate macrovariables */
proc sql noprint;
/* Retrieve the list of distinct idchems (num) in a macrovariable : &list_idchem */
select distinct idchem into:list_idchem separated by " " from idnew1;
/* Retrieve the list of distinct idchems (format) in a macrovariable : &list_idchem_char */
select distinct idchem_char into:list_idchem_char separated by " " from idnew1;
/* Retrieve the number of distinct idchems in a macrovariable : &count_idchem */
select count(distinct idchem_char) into:count_idchem trimmed from idnew1;
quit;
data _null_;
length temp $2000;
temp=cat('"',tranwrd("&list_idchem_char"," ",'" "'),'"');
call symput('list_idchem_char_q',temp);
run;
%put &list_idchem;
%put &list_idchem_char;
%put &list_idchem_char_q;
%put &count_idchem;
/* 2. Transpose data and concatenate idchem */
proc sort data=idnew1;
by id_job;
run;
data idnew_tr;
set idnew1;
array flag(4) $ _temporary_;
by id_job;
retain flag;
if first.id_job then do;
count=0;
call missing(of flag(*));
end;
count+1;
do i=1 to dim(flag);
flag(count)=idchem_char;
end;
idchem_list = catx(" ", of flag(*));
if last.id_job then output;
drop count i idchem idchem_char id job;
run;
/* 3. Create flag variables */
data idnew_flag;
set idnew_tr;
array list(&count_idchem) $ (&list_idchem_char_q);
array flag(*) &list_idchem_char;
do i=1 to dim(list);
if indexw(idchem_list, list(i)) > 0 then flag(i) = 1;
else flag(i)=0;
end;
drop i idchem_list list1-list&count_idchem;
run;
proc print data=idnew_flag;
run;
/* 3. Generate as many 2x2 tables as combinations */
%macro combi (dataset);
proc sql;
select count(*) into :nbvar from dictionary.columns where libname='WORK' and memname='IDNEW_FLAG' ;
select name into :mv1- :mv%cmpres(&nbvar) from dictionary.columns where libname='WORK' and memname='IDNEW_FLAG' ;
quit;
%do i=2 %to %eval(&nbvar) ;
%do j=%eval(&i+1) %to %eval(&nbvar);
proc freq data=idnew_flag;
tables &&mv&i*&&mv&j ;
run;
%end;
%end;
%mend;
%combi ();
Hi @ak2011
You can try this. Hope this help!
The only thing to adapt is the decode of idchem variable at the beginning of the program in order to create the corresponding character variable with the flag name.
Best,
data idnew1;
input id$ job idchem;
id_job=catx('_', id, job);
length idchem_char $7;
if idchem = 990005 then idchem_char = 'cla_exp';
else if idchem = 990021 then idchem_char = 'bio_exp';
else if idchem = 210701 then idchem_char = 'amo_exp';
else if idchem = 211700 then idchem_char = 'chl_exp'; /* to adapt */
datalines;
os1 1 990005
os1 1 990021
os1 1 211700
os1 2 211700
os1 2 990021
os1 2 210701
os1 2 990005
os2 1 210701
os2 1 990005
os2 2 990021
os2 3 210701
os2 3 990005
os3 3 210701
os3 1 211700
os4 1 210701
os4 1 990005
os4 1 211700
;
run;
/* 1. Generate macrovariables */
proc sql noprint;
/* Retrieve the list of distinct idchems (num) in a macrovariable : &list_idchem */
select distinct idchem into:list_idchem separated by " " from idnew1;
/* Retrieve the list of distinct idchems (format) in a macrovariable : &list_idchem_char */
select distinct idchem_char into:list_idchem_char separated by " " from idnew1;
/* Retrieve the number of distinct idchems in a macrovariable : &count_idchem */
select count(distinct idchem_char) into:count_idchem trimmed from idnew1;
quit;
data _null_;
length temp $2000;
temp=cat('"',tranwrd("&list_idchem_char"," ",'" "'),'"');
call symput('list_idchem_char_q',temp);
run;
%put &list_idchem;
%put &list_idchem_char;
%put &list_idchem_char_q;
%put &count_idchem;
/* 2. Transpose data and concatenate idchem */
proc sort data=idnew1;
by id_job;
run;
data idnew_tr;
set idnew1;
array flag(4) $ _temporary_;
by id_job;
retain flag;
if first.id_job then do;
count=0;
call missing(of flag(*));
end;
count+1;
do i=1 to dim(flag);
flag(count)=idchem_char;
end;
idchem_list = catx(" ", of flag(*));
if last.id_job then output;
drop count i idchem idchem_char id job;
run;
/* 3. Create flag variables */
data idnew_flag;
set idnew_tr;
array list(&count_idchem) $ (&list_idchem_char_q);
array flag(*) &list_idchem_char;
do i=1 to dim(list);
if indexw(idchem_list, list(i)) > 0 then flag(i) = 1;
else flag(i)=0;
end;
drop i idchem_list list1-list&count_idchem;
run;
proc print data=idnew_flag;
run;
/* 3. Generate as many 2x2 tables as combinations */
%macro combi (dataset);
proc sql;
select count(*) into :nbvar from dictionary.columns where libname='WORK' and memname='IDNEW_FLAG' ;
select name into :mv1- :mv%cmpres(&nbvar) from dictionary.columns where libname='WORK' and memname='IDNEW_FLAG' ;
quit;
%do i=2 %to %eval(&nbvar) ;
%do j=%eval(&i+1) %to %eval(&nbvar);
proc freq data=idnew_flag;
tables &&mv&i*&&mv&j ;
run;
%end;
%end;
%mend;
%combi ();
I think what you want is one observations per ID/JOB, with N dummy variables (where N is the number of distinct IDCHEM values) over the entire original datal.
You can do the following:
data idnew1;
input id$ job idchem;
datalines;
os1 1 990005
os1 1 990021
os1 1 211700
os1 2 211700
os1 2 990021
os1 2 210701
os1 2 990005
os2 1 210701
os2 1 990005
os2 2 990021
os2 3 210701
os2 3 990005
os3 3 210701
os3 1 211700
os4 1 210701
os4 1 990005
os4 1 211700
run;
proc sort data=idnew1 out=have;
by id job;
run;
/* Generate a dataset FREQS of all realised IDCHEM values, in sorted order */
proc freq data=idnew1 noprint;
tables idchem / out=freqs;
run;
/* Make a template dataset (0 obs) of sorted varnames (dummy_210701 dummy211700 ...)
corresponding to the IDCHEM values */
proc transpose data=freqs (keep=idchem) out=dummy_vars (drop=_name_ ) prefix=dummy_;
id idchem;
run;
/* Make a single transposed list of sorted IDCHEM values, for value checking */
proc transpose data=freqs (keep=idchem) out=ref_values (drop=_name_) prefix=_value_;
var idchem;
run;
data want (drop=_:);
if _n_=1 then set ref_values; /* Get array of reference CHEM values */
do until (last.job);
set have;
by id job;
if 0 then set dummy_vars; /* Just to establish the DUMMY var names */
array dummy{*} dummy_: ;
if first.job then do _i=1 to dim(dummy); dummy{_i}=0; end;
dummy{whichn(idchem,of _value_:)}=1;
end;
run;
Hi @ak2011
It seems that you have kept only the following variables on line 75 -> data chem1(keep=id job idchem)
You should add also data idchem_char
Best,
Hi @ak2011
It seems to be a little mistake in the code.
Please put
flag(count)=idchem_char;
instead of
do i=1 to dim(flag);
flag(count)=idchem_char;
end;
Best,
HI @ak2011
No problem
You can drop the I variable which is no more used in the drop statement as follows:
drop idchem_list list1-list&count_idchem; /*line 97*/
and also put &count_idchem line 82 instead of 4. This macro variable is created during the first step (proc sql)
array flag(&count_idchem) $ _temporary_; /*line 82*/
Hi @ak2011
It seems that there are two mistakes in the code:
temp=cat('"',tranwrd(strip("&list_idchem_char")," ",'" "'),'"');
/*do i=1 to dim(flag);*/
flag(count)=idchem_char;
/*end;*/
idchem_list = catx(" ", of flag(*));
if last.id_job then output;
drop count idchem idchem_char id job;
Good news: We've extended SAS Hackathon registration until Sept. 12, so you still have time to be part of our biggest event yet – our five-year anniversary!
What’s the difference between SAS Enterprise Guide and SAS Studio? How are they similar? Just ask SAS’ Danny Modlin.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.