Good tip on the cycles (I have it implemented in a similar way in another part of my code). I have achieved my final table using the code below, but it is: Hardcoded, any extra depth will not work. Looks very inefficient. Any ideas how to improve on this (both SQL and datasteps are ok solution, I just have some experience in the past with SQL hence why I used that)? The below code block is 'insert SAS code', but still I get no formatting. I do get an HTML-error everytime I try to post the code though... proc sql;
/* parents of all cid's */
CREATE TABLE parents1 AS
SELECT a.cid, a.copy_parent_cid, a.sort_name, a.sort_index
FROM numbered a
UNION ALL
SELECT c.cid, c.copy_parent_cid, b.sort_name, b.sort_index
FROM numbered b
LEFT JOIN numbered c ON b.copy_parent_cid=c.cid
;
CREATE TABLE parents2 AS
SELECT a.cid, a.copy_parent_cid, a.sort_name, a.sort_index
FROM parents1 a
UNION ALL
SELECT c.cid, c.copy_parent_cid, b.sort_name, b.sort_index
FROM parents1 b
LEFT JOIN numbered c ON b.copy_parent_cid=c.cid
;
CREATE TABLE parents3 AS
SELECT a.cid, a.copy_parent_cid, a.sort_name, a.sort_index
FROM parents2 a
UNION ALL
SELECT c.cid, c.copy_parent_cid, b.sort_name, b.sort_index
FROM parents2 b
LEFT JOIN numbered c ON b.copy_parent_cid=c.cid
;
/* children of all cid's */
CREATE TABLE children1 AS
SELECT a.cid, a.copy_parent_cid, a.sort_name, a.sort_index
FROM numbered a
UNION ALL
SELECT c.cid, c.copy_parent_cid, b.sort_name, b.sort_index
FROM numbered b
LEFT JOIN numbered c ON b.cid=c.copy_parent_cid
;
CREATE TABLE children2 AS
SELECT a.cid, a.copy_parent_cid, a.sort_name, a.sort_index
FROM children1 a
UNION ALL
SELECT c.cid, c.copy_parent_cid, b.sort_name, b.sort_index
FROM children1 b
LEFT JOIN numbered c ON b.cid=c.copy_parent_cid
;
CREATE TABLE children3 AS
SELECT a.cid, a.copy_parent_cid, a.sort_name, a.sort_index
FROM children2 a
UNION ALL
SELECT c.cid, c.copy_parent_cid, b.sort_name, b.sort_index
FROM children2 b
LEFT JOIN numbered c ON b.cid=c.copy_parent_cid
;
CREATE TABLE TESTY AS
SELECT DISTINCT cid, sort_name, sort_index
FROM (
SELECT *
FROM parents3 a
UNION ALL
SELECT *
FROM children3 b
WHERE cid is not missing
)
ORDER BY cid, sort_index;
quit; Which gives: I removed 'h' child to compare 1-on-1 with my excel. Full code for reference to create output: %let sort_format=z1.;
data have;
input cid $ parent_cid $;
cards;
a .
b a
c b
d a
e .
f .
g f
run;
/*h c*/
data want;
* Load the cid->parent_cid mapping into a hash;
if _n_=1 then
do;
declare hash h(dataset:'have');
h.definekey('cid');
h.definedata('parent_cid');
h.definedone();
end;
* Load next CID value;
set have;
if missing(parent_cid) then
copy_parent_cid = cid;
else copy_parent_cid = parent_cid;
length parents $20;
* Find their parents;
do depth=1 by 1 until(h.find(key:parent_cid));
parents=catx('|',parent_cid,parents);
end;
drop parent_cid;
run;
proc sort;
by parents cid;
run;
data want;
set want;
by parents cid;
order+1;
if first.parents then
order=1;
run;
* find how many levels will be needed;
proc sql noprint;
select max(depth) into :md trimmed from want;
quit;
data numbered;
set want;
if _n_=1 then
do;
declare hash h(dataset:'want');
h.definekey('cid');
h.definedata('order');
h.definedone();
end;
array L[&md] $10;
L[depth]=put(order,&sort_format);
do i=depth+1 to &md;
L[i]=put(0,&sort_format);
end;
do i=1 to depth-1;
h.find(key:scan(parents,i,'|'));
L[i]=put(order,&sort_format);
end;
length sort_name $20 sort_index $30;
sort_name=substr(repeat('A0'x,depth-1)||cid,2);
sort_index=catx('.',of L[*]);
/* drop i parents L: order;*/
run;
proc sort;
by sort_index;
run;
proc sql;
/* parents of all cid's */
CREATE TABLE parents1 AS
SELECT a.cid, a.copy_parent_cid, a.sort_name, a.sort_index
FROM numbered a
UNION ALL
SELECT c.cid, c.copy_parent_cid, b.sort_name, b.sort_index
FROM numbered b
LEFT JOIN numbered c ON b.copy_parent_cid=c.cid
;
CREATE TABLE parents2 AS
SELECT a.cid, a.copy_parent_cid, a.sort_name, a.sort_index
FROM parents1 a
UNION ALL
SELECT c.cid, c.copy_parent_cid, b.sort_name, b.sort_index
FROM parents1 b
LEFT JOIN numbered c ON b.copy_parent_cid=c.cid
;
CREATE TABLE parents3 AS
SELECT a.cid, a.copy_parent_cid, a.sort_name, a.sort_index
FROM parents2 a
UNION ALL
SELECT c.cid, c.copy_parent_cid, b.sort_name, b.sort_index
FROM parents2 b
LEFT JOIN numbered c ON b.copy_parent_cid=c.cid
;
/* children of all cid's */
CREATE TABLE children1 AS
SELECT a.cid, a.copy_parent_cid, a.sort_name, a.sort_index
FROM numbered a
UNION ALL
SELECT c.cid, c.copy_parent_cid, b.sort_name, b.sort_index
FROM numbered b
LEFT JOIN numbered c ON b.cid=c.copy_parent_cid
;
CREATE TABLE children2 AS
SELECT a.cid, a.copy_parent_cid, a.sort_name, a.sort_index
FROM children1 a
UNION ALL
SELECT c.cid, c.copy_parent_cid, b.sort_name, b.sort_index
FROM children1 b
LEFT JOIN numbered c ON b.cid=c.copy_parent_cid
;
CREATE TABLE children3 AS
SELECT a.cid, a.copy_parent_cid, a.sort_name, a.sort_index
FROM children2 a
UNION ALL
SELECT c.cid, c.copy_parent_cid, b.sort_name, b.sort_index
FROM children2 b
LEFT JOIN numbered c ON b.cid=c.copy_parent_cid
;
CREATE TABLE TESTY AS
SELECT DISTINCT cid, sort_name, sort_index
FROM (
SELECT *
FROM parents3 a
UNION ALL
SELECT *
FROM children3 b
WHERE cid is not missing
)
ORDER BY cid, sort_index;
quit;
... View more