Dear all,
I using tring to create a sankey graph like that of @JeffMeyers. I tried to understand each step but when I got to creating the coordinates for the rectangles I got a little bit confused . I have more nodes than Jeff had. For this post I have just entered in my data in the macro without changing anything in the macro. Maybe some can help. This is the data and the
data have;
infile datalines;
input patid Drug_type $44. visit drug_type_num 2.;
datalines;
1 Cefalexin+Baclofen+Betahistine 1 12
2 Calcipotriol+Baclofen+Betahistine 1 16
3 Folic acid+Baclofen 1 26
4 Piriton+Baclofen 1 3
5 Fentanyl/Co-beneldopa+Baclofen+Betahistine 1 36
6 Folic acid+Baclofen 1 26
7 Fentanyl/Co-beneldopa+Baclofen 1 35
8 Folic acid+Baclofen 1 26
9 Cefalexin+Baclofen 1 11
10 Fentanyl/Co-beneldopa+Baclofen 1 35
11 Allopurinol+Baclofen+Betahistine 1 33
12 Folic acid+Baclofen+Betahistine 1 27
13 Cefalexin+Baclofen 1 11
14 Allopurinol+Baclofen+Betahistine 1 33
15 Cefalexin+Baclofen 1 11
16 Folic acid+Baclofen 1 26
17 Piriton+Baclofen 1 3
18 Folic acid+Baclofen+Betahistine 1 27
19 Allopurinol+Baclofen+Betahistine 1 33
20 Piriton+Baclofen 1 3
21 Cefalexin+Baclofen 1 11
22 Piriton+Baclofen 1 3
23 Cefalexin+Baclofen 1 11
24 Cefalexin+Baclofen+Betahistine 1 12
25 Cefalexin+Baclofen 1 11
26 Cefalexin+Baclofen 1 11
27 Fentanyl/Co-beneldopa+Baclofen 1 35
28 Diclofenac+Baclofen 1 22
29 Diclofenac+Baclofen 1 22
30 Fentanyl/Co-beneldopa+Baclofen 1 35
31 Piriton+Baclofen 1 3
32 Piriton+Baclofen 1 3
33 Calcipotriol+Baclofen 1 15
34 Cefalexin+Baclofen+OtherConsolidation 1 14
35 Cefalexin+Baclofen 1 11
36 Piriton+Baclofen 1 3
37 Cefalexin+Baclofen 1 11
38 Cefalexin+Baclofen 1 11
39 Piriton+Baclofen+Acrivastine 1 4
40 Piriton+Baclofen 1 3
41 Fentanyl/Co-beneldopa+Baclofen 1 35
42 Allopurinol+Baclofen+Betahistine 1 33
43 Cefalexin+Baclofen 1 11
44 Cefalexin+Baclofen 1 11
45 Cefalexin/Felodipine+Baclofen 1 18
46 Allopurinol+Baclofen+Betahistine 1 33
47 Piriton+Baclofen 1 3
48 Calcipotriol+Baclofen 1 15
49 Fentanyl/Co-beneldopa+Baclofen 1 35
50 Cefalexin+Baclofen 1 11
51 Cefalexin+Baclofen 1 11
52 Diclofenac+Baclofen+Betahistine 1 23
53 Calcipotriol+Baclofen 1 15
54 Cefalexin+Baclofen 1 11
55 Allopurinol+Baclofen 1 32
56 Allopurinol+Baclofen+Betahistine 1 33
57 Dosulepin+Baclofen 1 20
58 Allopurinol+Baclofen+Betahistine 1 33
59 Piriton+Baclofen 1 3
60 Cefalexin+Baclofen 1 11
61 Piriton+Baclofen 1 3
62 Cefalexin+Baclofen 1 11
63 Piriton+Baclofen 1 3
64 Piriton+Baclofen 1 3
65 Calcipotriol+Baclofen+Betahistine 1 16
66 Piriton+Baclofen 1 3
67 Piriton+Baclofen+Indapamide 1 5
68 1
69 Calcipotriol+Baclofen 1 15
70 Fentanyl/Co-beneldopa+Baclofen 1 35
71 Calcipotriol+Baclofen+Betahistine 1 16
72 Diclofenac+Baclofen 1 22
73 Baclofen+Betahistine 1 38
74 Cefalexin+Baclofen 1 11
75 Piriton-Based+Baclofen 1 7
76 Cefalexin+Baclofen 1 11
77 Cefalexin+Baclofen 1 11
78 Calcipotriol+Baclofen 1 15
79 Calcipotriol+Baclofen+Acrivastine 1 17
80 Cefalexin+Baclofen 1 11
81 Allopurinol+Baclofen+Acrivastine 1 34
82 Baclofen 1 37
83 Piriton+Baclofen 1 3
84 Piriton+Baclofen 1 3
85 Piriton+Baclofen 1 3
86 Fentanyl/Co-beneldopa+Baclofen 1 35
87 Piriton+Baclofen 1 3
88 Allopurinol+Baclofen 1 32
89 Calcipotriol+Baclofen 1 15
90 Cefalexin+Baclofen 1 11
91 Calcipotriol+Baclofen+Betahistine 1 16
92 Piriton+Baclofen 1 3
93 Cefalexin+Baclofen 1 11
94 Piriton+Baclofen 1 3
95 Calcipotriol+Baclofen+Betahistine 1 16
96 Allopurinol+Baclofen 1 32
97 Allopurinol+Baclofen 1 32
98 Allopurinol+Baclofen+Betahistine 1 33
1 Acrivastine 2 25
2 Acrivastine 2 25
3 Acrivastine 2 25
4 Acrivastine 2 25
5 Acrivastine 2 25
6 Acrivastine 2 25
7 Acrivastine 2 25
8 Acrivastine 2 25
9 Acrivastine 2 25
10 Acrivastine 2 25
11 Acrivastine 2 25
12 Acrivastine 2 25
13 Acrivastine 2 25
14 Acrivastine 2 25
15 Acrivastine 2 25
16 Acrivastine 2 25
17 Acrivastine 2 25
18 Acrivastine 2 25
19 Acrivastine 2 25
20 Acrivastine 2 25
21 Acrivastine 2 25
22 Acrivastine 2 25
23 Acrivastine 2 25
24 Acrivastine 2 25
25 Acrivastine 2 25
26 Acrivastine 2 25
27 Acrivastine 2 25
28 Acrivastine 2 25
29 Acrivastine 2 25
30 Acrivastine 2 25
31 Acrivastine 2 25
30 Acrivastine 2 25
33 Acrivastine 2 25
34 Acrivastine 2 25
35 Acrivastine 2 25
36 Acrivastine 2 25
37 Acrivastine 2 25
38 Acrivastine 2 25
39 Acrivastine 2 25
40 Acrivastine 2 25
41 Acrivastine 2 25
42 Acrivastine 2 25
43 Acrivastine 2 25
44 Acrivastine 2 25
45 Acrivastine 2 25
46 Acrivastine 2 25
47 Acrivastine 2 25
48 Acrivastine 2 25
49 Acrivastine 2 25
50 Acrivastine 2 25
51 Acrivastine 2 25
52 Acrivastine 2 25
53 Acrivastine 2 25
54 Acrivastine 2 25
55 Acrivastine 2 25
56 Acrivastine 2 25
57 Acrivastine 2 25
58 Acrivastine 2 25
59 Acrivastine 2 25
60 Acrivastine 2 25
61 Acrivastine 2 25
62 Acrivastine 2 25
63 Acrivastine 2 25
64 Acrivastine 2 25
65 Acrivastine 2 25
66 Acrivastine 2 25
67 Acrivastine 2 25
68 Acrivastine 2 25
69 Acrivastine 2 25
70 Acrivastine 2 25
71 Acrivastine 2 25
72 Acrivastine 2 25
73 Acrivastine 2 25
74 Acrivastine 2 25
75 Acrivastine 2 25
76 Acrivastine 2 25
77 Acrivastine 2 25
78 Acrivastine 2 25
79 Acrivastine 2 25
80 Acrivastine 2 25
81 Acrivastine 2 25
82 Acrivastine 2 25
83 Acrivastine 2 25
84 Acrivastine 2 25
85 Acrivastine 2 25
86 Acrivastine 2 25
87 Acrivastine 2 25
88 Acrivastine 2 25
89 Acrivastine 2 25
90 Acrivastine 2 25
91 Acrivastine 2 25
92 Acrivastine 2 25
93 Acrivastine 2 25
94 Acrivastine 2 25
95 Acrivastine 2 25
96 Acrivastine 2 25
97 Acrivastine 2 25
98 Acrivastine 2 25
1 3
2 3
3 BAC+Baclofen 3 1
4 BAC+Baclofen 3 1
5 BAC+Baclofen 3 1
6 3
7 3
8 3
9 3
10 3
11 Piriton+Baclofen 3 3
12 3
13 3
14 Piriton+Baclofen 3 3
15 3
16 3
17 Dosulepin+Baclofen 3 20
18 3
19 3
20 3
21 3
22 3
23 Piriton+Baclofen 3 3
24 BAC+Baclofen 3 1
25 BAC+Baclofen 3 1
26 3
27 3
28 3
29 3
30 3
31 3
32 Dosulepin+Baclofen 3 20
33 Glimepiride 3 29
34 Allopurinol+Indapamide 3 31
35 Piriton+Baclofen+Temsirolimus 3 6
36 Baclofen+Lactulose 3 39
37 Piriton+Baclofen 3 3
38 Piriton 3 2
39 3
40 Allopurinol 3 30
41 Heparinoid 3 9
42 Piriton+Baclofen 3 3
43 3
44 3
45 3
46 Piriton+Baclofen 3 3
47 Allopurinol+Baclofen 3 32
48 Piriton+Baclofen 3 3
49 Allopurinol+Baclofen 3 32
50 3
51 3
52 Allopurinol+Baclofen 3 32
53 3
54 3
55 Piriton-Based+Baclofen+Irbesartan 3 8
56 Allopurinol 3 30
57 3
58 3
59 3
60 Baclofen+Lactulose 3 39
61 3
62 Indapamide 3 28
63 3
64 3
65 3
66 3
67 Ketoconazole+Baclofen+Heparinoid 3 41
68 Baclofen+Indapamide 3 40
69 Latanoprost+Baclofen 3 24
70 3
71 3
72 3
73 Glimepiride 3 29
74 Diclofenac+Baclofen 3 22
75 3
76 3
77 3
78 3
79 3
80 Allopurinol+Baclofen 3 32
81 Baclofen+Lactulose 3 39
82 Baclofen 3 37
83 Lamotrigine+Baclofen 3 19
84 Cefalexin+Baclofen+Heparinoid 3 13
85 3
86 Piriton+Baclofen 3 3
87 Cefalexin+Baclofen 3 11
88 3
89 Allopurinol+Baclofen 3 32
90 3
91 3
92 3
93 Piriton+Baclofen 3 3
94 Allopurinol+Baclofen 3 32
95 Latanoprost 3 10
96 3
97 Letrozole+Baclofen 3 21
98 3
;
run;
/*
Author: Jeff Myers
Source: https://communities.sas.com/t5/Graphics-Programming/Sankey-Diagram-Decision-Tree-etc/m-p/719812#
*/
option mprint;
%macro sankey(data=, id=, nodes=,group=, barwidth=5, bargap=5, points=20, curve_rectangle_gap=0 , antialias=200000,
width=16in,height=8in, plotname=_sankey, font_size=12pt);
/**Rename Variables and create a temporary dataset**/
data _temp;
merge &data (keep=&id rename=(&id=id))
&data (keep=&nodes rename=(&nodes=nodes))
&data (keep=&group rename=(&group=group));
run;
/**Grab Labels for Group and Nodes variables**/
data _null_;
set _temp (obs=1);
%local group_label nodes_label;
call symput('nodes_label',strip(vlabel(nodes)));
call symput('group_label',strip(vlabel(group)));
run;
/**Grab cross tab frequency of groups and nodes for rectangles**/
proc freq data=_temp noprint;
table nodes*group / outpct out=_frq (keep=nodes group count pct_row);
run;
/**Assign a group level value for use in arrays later**/
data _levels;
set _frq;
by nodes group;
if first.nodes then group_lvl=0;
group_lvl+1;
run;
/**Grab number and values of unique Node values**/
proc sql noprint;
%local n_nodes i null;
select count(distinct nodes) into :n_nodes separated by ''
from _frq;
select distinct nodes format=12. into :node1-
from _frq;
%do i = 1 %to &n_nodes;
%local n_group&i ;
%end;
/**Count how many groups are in each Node**/
select nodes,count(distinct group) into :null,:n_group1-
from _frq group by nodes;
quit;
/*Create coordinates for rectangles**/
/*BARWIDTH controls the width of rectangles, BARGAP assigns a percentage for white space*/
data _rectangles;
set _frq;
by nodes group;
array _node_n {&n_nodes}
(%do i = 1 %to &n_nodes;
%if &i>1 %then %do; , %end;
&&n_group&i
%end;);
if first.nodes then do;
last_y=0;nodes_count+1;group_count=0;
end;
if first.group then do;
last_x=100*(nodes_count-1)/(&n_nodes-1);
group_count+1;
end;
retain last_x last_y;
rectangle_id=catx('-',nodes_count,group_count);
x=last_x;y=last_y;output;
x=last_x+&barwidth;y=last_y;output;
x=last_x+&barwidth;y=last_y+((100-&bargap)/100)*pct_row;output;
x=last_x;y=last_y+((100-&bargap)/100)*pct_row;output;
last_y=y+&bargap/_node_n(nodes_count);
drop _node:;
run;
/*Find the unique paths going out of each node between groups*/
proc sort data=_temp;
by id;
data _paths;
set _temp;
by id;
array node_ {&n_nodes};
retain node_;
if first.id then call missing(of node_(*));
%do i=1 %to &n_nodes;
%if &i>1 %then %do; else %end;
if nodes=&&node&i then node_lvl=&i;
%end;
node_(node_lvl)=group;
if last.id then do;
do i=1 to dim(node_)-1;
if ^missing(node_(i)) then do;
start=node_(i);
starting_node=i;
do j = i+1 to dim(node_);
if ^missing(node_(j)) then do;
end=node_(j);
ending_node=j;
output;
j=dim(node_);
end;
end;
end;
end;
end;
keep id start starting_node end ending_node node_:;
run;
/**Get counts for each path**/
proc sort data=_paths;
by starting_node ending_node start end;
run;
proc freq data=_paths noprint;
by starting_node ending_node;
table start*end / list missing out=_frq2;
run;
/**Grab counts and values needed to create the Connecting Curves**/
proc sql noprint;
create table _paths2 as
select
/**Numbers for starting groups**/
a.starting_node,a.start,c.count as start_group_n,
/*Grab location and height of rectangles*/
c2.y_min as start_group_min,c2.y_max as start_group_max,c2.y_max-c2.y_min as start_group_diff,c2.x_max+&curve_rectangle_gap as start_group_x,
c3.group_lvl as start_index, /*Used for arrays*/
/**Numbers for ending groups**/
a.ending_node,a.end,e.count as end_group_n,
/*Grab location and height of rectangles*/
e2.y_min as end_group_min,e2.y_max as end_group_max,e2.y_max-e2.y_min as end_group_diff,e2.x_min-&curve_rectangle_gap as end_group_x,
e3.group_lvl as end_index,/*Used for arrays*/
a.count as n_move /*Number of patients in the current path*/
from _frq2 a
left join _frq c on a.starting_node=c.nodes and a.start=c.group
left join _frq e on a.ending_node=e.nodes and a.end=e.group
left join (select nodes,group,min(y) as y_min,max(y) as y_max, max(x) as x_max
from _rectangles group by nodes,group) as c2
on a.starting_node=c2.nodes and a.start=c2.group
left join (select nodes,group,min(y) as y_min,max(y) as y_max,min(x) as x_min
from _rectangles group by nodes,group) as e2
on a.ending_node=e2.nodes and a.end=e2.group
left join _levels c3 on a.starting_node=c3.nodes and a.start=c3.group
left join _levels e3 on a.ending_node=e3.nodes and a.end=e3.group
order by starting_node,ending_node,start,end;
/**Grab number of distinct group values for array**/
%local n_grps;
select count(distinct end) into :n_grps separated by ''
from _paths2;
/**Grab x-axis location for node labels**/
create table _node_labels as
select nodes, (max(x)+min(x))/2 as x_label from _rectangles group by nodes;
quit;
data _paths3;
set _paths2;
by starting_node ending_node start end;
/**Hold running totals for group counts**/
array start_n {&n_nodes,&n_grps} (%sysevalf(&n_nodes*&n_grps)*0) ;
array end_n {&n_nodes,&n_grps} (%sysevalf(&n_nodes*&n_grps)*0) ;
/***Build the Bezier Curve Connectors: Use Cubic Bezier Equation:
B(t)=(1-t)^3*P0 + 3(1-t)^2*t*P1 + 3(1-t)*t^2*P2 + t^3*P3, 0 <= t <= 1***/
length path_index $20.;
path_index=catx('-',starting_node,start_index,end_index);
/*Find y-axis values for start/end corners of Bezier curves*/
start_y1=start_group_min+start_group_diff*(start_n(starting_node,start_index)/start_group_n);
start_y2=start_y1+start_group_diff*(n_move/start_group_n);
end_y1=end_group_min+end_group_diff*(end_n(ending_node,end_index)/end_group_n)+end_group_diff*(n_move/end_group_n);
end_y2=end_y1-end_group_diff*(n_move/end_group_n);
/**Bezier Curve 1: From Left group to Right Group path**/
do t = 0 to 1 by 1/25;
x=((1-t)**3)*start_group_x+
3*((1-t)**2)*t*(start_group_x+(end_group_x-start_group_x)/3)+
3*(1-t)*(t**2)*(start_group_x+2*(end_group_x-start_group_x)/3)+
(t**3)*end_group_x;
y=((1-t)**3)*start_y2+3*((1-t)**2)*t*start_y2+3*(1-t)*(t**2)*end_y1+(t**3)*end_y1;
output;
end;
/**Bezier Curve 2: From Right Group back to Left Group**/
do t = 0 to 1 by 1/25;
x=((1-t)**3)*end_group_x+
3*((1-t)**2)*t*(start_group_x+2*(end_group_x-start_group_x)/3)+
3*(1-t)*(t**2)*(start_group_x+(end_group_x-start_group_x)/3)+
(t**3)*start_group_x;
y=((1-t)**3)*end_y2+3*((1-t)**2)*t*end_y2+3*(1-t)*(t**2)*start_y1+(t**3)*start_y1;
output;
end;
/**Increase running total for each groups N**/
start_n(starting_node,start_index)+n_move;
end_n(ending_node,end_index)+n_move;
keep x y path_index start;
run;
/**Combine Data for Plot**/
data _plot;
set _rectangles (keep=x y rectangle_id group rename=(rectangle_id=id))
_paths3 (keep=x y path_index start rename=(path_index=id start=group))
_node_labels;
run;
ods graphics /reset width=&width height=&height ANTIALIASMAX=&antialias imagename="&plotname";
proc sgplot data=_plot noborder;
polygon x=x y=y id=id / fill nooutline group=group transparency=0.3 name='p';
xaxistable nodes / x=x_label location=inside position=top nolabel title="&nodes_label" valueattrs=(size=&font_size) titleattrs=(size=&font_size) ;
xaxis min=0 max=%sysevalf(100+&barwidth) values=(0 to 100 by 10) display=none valueshint;
yaxis min=0 max=100 reverse values=(0 to 100 by 10) display=none;
keylegend 'p'/ title="&group_label" noborder location=outside position=bottom exclude=('') valueattrs=(size=&font_size) titleattrs=(size=&font_size);
run;
%mend;
%sankey(data=have,group=drug_type_num,id=patid,nodes=visit);
code. My client wants to see something like patients who received a particular type of drug in visit 2(in this case Acrivastine). And how many of them received a particular type of drug before and after visit2 (in this case visit 1 and 3). Thanks for any help
... View more