Hello,
I need to create an edge list from a table that includes the affiliation of each doctor to up to 5 hospitals. Data looks like below:
Obs | doctor | hosp1 | hosp2 | hosp3 | hosp4 | hosp5 |
---|---|---|---|---|---|---|
1 | 1003000522 | 100072 | . | |||
2 | 1003002106 | 100072 | . | |||
3 | 1003004870 | . | . | |||
4 | 1003005810 | 100128 | . | |||
5 | 1003006552 | 100121 | 100157 | . | ||
6 | 1003008533 | 100018 | 100012 | 100244 | . |
This is really a two-step task: (1) write out all the pairs, and (2) get a frequency of all pairs. The second step is trivial - it's what PROC FREQ is made for. The first step is straightforward:
data have;
infile cards truncover;
input Obs doctor hosp1 hosp2 hosp3 hosp4 hosp5;
cards;
1 1003000522 100072 .
2 1003002106 100072 .
3 1003004870 . .
4 1003005810 100128 .
5 1003006552 100121 100157 .
6 1003008533 100018 100012 100244 .
;
data temp (keep=obs doctor hosp1 hosp2);
set have (rename=(hosp1=dummy1 hosp2=dummy2));
array h {*} dummy1-dummy2 hosp3-hosp5;
nh=n(of h{*});
if nh>0;
do h1=1 to nh;
do h2=h1 to nh;
hosp1=min(h{h1},h{h2});
hosp2=max(h{h1},h{h2});
output;
end;
end;
run;
proc freq data=temp noprint ;
tables hosp1 * hosp2 /out=want (keep=hosp1 hosp2 count);
run;
data have;
infile cards truncover;
input Obs doctor hosp1 hosp2 hosp3 hosp4 hosp5;
cards;
1 1003000522 100072 .
2 1003002106 100072 .
3 1003004870 . .
4 1003005810 100128 .
5 1003006552 100121 100157 .
6 1003008533 100018 100012 100244 .
;
proc transpose data=have out=temp(drop=_name_ where=(col1 ne .));
by doctor;
var hosp:;
run;
data want ;
if _n_=1 then do;
if 0 then set temp(rename=(col1=hosp1)) temp(rename=(col1=hosp2));
dcl hash H (dataset:'temp(rename=(col1=hosp2))',multidata:'y') ;
h.definekey ("doctor") ;
h.definedata ("hosp2") ;
h.definedone () ;
end;
do Shared_Docs=1 by 1 until(last.hosp1);
set temp(rename=(col1=hosp1));
by hosp1 notsorted;
do until(not _t) ;
do rc = h.find() by 0 while (rc = 0) ;
h.has_next(result:_t) ;
if hosp1=hosp2 then h.removedup() ;
if last.hosp1 then output;
rc=h.find_next();
end ;
end ;
end;
drop _t rc doctor;
run;
/*Or perhaps a better bet to Sort the transposed dataset before the Hash*/
data have;
infile cards truncover;
input Obs doctor hosp1 hosp2 hosp3 hosp4 hosp5;
cards;
1 1003000522 100072 .
2 1003002106 100072 .
3 1003004870 . .
4 1003005810 100128 .
5 1003006552 100121 100157 .
6 1003008533 100018 100012 100244 .
;
proc transpose data=have out=temp(drop=_name_ where=(col1 ne .));
by doctor;
var hosp:;
run;
proc sort data=temp ;
by col1;
run;
data want ;
if _n_=1 then do;
if 0 then set temp(rename=(col1=hosp1)) temp(rename=(col1=hosp2));
dcl hash H (dataset:'temp(rename=(col1=hosp2))',multidata:'y') ;
h.definekey ("doctor") ;
h.definedata ("hosp2") ;
h.definedone () ;
end;
do Shared_Docs=1 by 1 until(last.hosp1);
set temp(rename=(col1=hosp1));
by hosp1 ;
do until(not _t) ;
do rc = h.find() by 0 while (rc = 0) ;
h.has_next(result:_t) ;
if hosp1=hosp2 then h.removedup() ;
if last.hosp1 then output;
rc=h.find_next();
end ;
end ;
end;
drop _t rc doctor;
run;
This is really a two-step task: (1) write out all the pairs, and (2) get a frequency of all pairs. The second step is trivial - it's what PROC FREQ is made for. The first step is straightforward:
data have;
infile cards truncover;
input Obs doctor hosp1 hosp2 hosp3 hosp4 hosp5;
cards;
1 1003000522 100072 .
2 1003002106 100072 .
3 1003004870 . .
4 1003005810 100128 .
5 1003006552 100121 100157 .
6 1003008533 100018 100012 100244 .
;
data temp (keep=obs doctor hosp1 hosp2);
set have (rename=(hosp1=dummy1 hosp2=dummy2));
array h {*} dummy1-dummy2 hosp3-hosp5;
nh=n(of h{*});
if nh>0;
do h1=1 to nh;
do h2=h1 to nh;
hosp1=min(h{h1},h{h2});
hosp2=max(h{h1},h{h2});
output;
end;
end;
run;
proc freq data=temp noprint ;
tables hosp1 * hosp2 /out=want (keep=hosp1 hosp2 count);
run;
Thank you very much for your help. Your solution worked! I wish I could code like you!
Here another option using a hash object.
data have;
infile cards truncover;
input Obs doctor hosp1 hosp2 hosp3 hosp4 hosp5;
cards;
1 1003000522 100072 .
2 1003002106 100072 .
3 1003004870 . .
4 1003005810 100128 .
5 1003006552 100121 100157 .
6 1003008533 100018 100012 100244 .
;
data _null_;
if _n_=1 then
do;
dcl hash h1(ordered:'y', suminc: 'count');
h1.defineKey('hospital1','hospital2');
h1.defineData('hospital1','hospital2','n_docs');
h1.defineDone();
retain count 1;
end;
set have end=last;
array hosp {*} hosp1-hosp5;
do _i=1 to dim(hosp) while(not missing(hosp[_i]));
hospital1=hosp[_i];
do _k=_i to dim(hosp) while(not missing(hosp[_k]));
hospital2=hosp[_k];
_rc=h1.sum(sum: n_docs);
n_docs+1;
_rc=h1.replace();
end;
end;
if last then h1.output(dataset:'want');
run;
proc print data=want;
run;
Available on demand!
Missed SAS Innovate Las Vegas? Watch all the action for free! View the keynotes, general sessions and 22 breakouts on demand.
Need to connect to databases in SAS Viya? SAS’ David Ghan shows you two methods – via SAS/ACCESS LIBNAME and SAS Data Connector SASLIBS – in this video.
Find more tutorials on the SAS Users YouTube channel.