Solved: Re: Recursive Query

IvyHeng88 · Posted 06-01-2018 10:56 AM

How to construct query for the following expected outpit:-

Source table:-
Cust_id.     P_cust_id
111.            .
222.           111
333.           222
444.           333
555.           444

Expected result:-
Cust_id.      Expected_cust_id
111.             555
222.             555
333.             555
444.             555
111.             444
222.             444
333.             444
111.             333
222.             333
111.             222

PGStats · Posted 06-01-2018 04:54 PM

If there is a unique p_cust_id for any cust_id then you can go up the chain in a simple way by indexing your dataset:

data have;
input (Cust_id   P_cust_id) ($);
cards;
111            .
222           111
333           222
444           333
555           444
;

proc sql;
create unique index cust_id on have (cust_id);
quit;

data want;
set have;
Expected_cust_id = cust_id;
do while(not missing(p_cust_id));
    output;
    cust_id = p_cust_id;
    set have key=cust_id/unique;
    if _IORC_ ne 0 then leave;
    end;
_error_ = 0;
rename p_cust_id=cust_id;
drop cust_id;
run;

proc print data=want noobs; run;

PG

View solution in original post

ballardw · Posted 06-01-2018 11:26 AM

Is the logic that each P_cust_id to be paired with the other cust_id and cust_id is less than P_cust_id? If not explain the logic.

Reeza · Posted 06-01-2018 11:37 AM

Do you have dates attached at all? If you do it becomes a bit easier.

There's a macro here that will group the ID's , exp=8 means that it looks for a max of 8 depth. If you need further you can increase it, but the slower it gets. Once it groups the ID, if you have a date, you can take the latest ID and then you'll have a master table of IDs that you can then merge with SQL.

%macro SubGraphs(arcs,from=from,to=to,out=Clusters,exp=8);
data _null_;
if 0 then set &arcs(keep=&from rename=(&from=node)); /* get node data type */
length clust 8;
declare hash nodes(hashexp:&exp);
nodes.defineKey('node');
nodes.defineData('node', 'clust');
nodes.defineDone();
declare hiter nodeList('nodes');

do newClust = 1 by 1 while(not endLoop);
	set &arcs end=endLoop;
	call missing(clust); node = &from;
	if 0^=nodes.find() then nodes.add(); 
	fromClust = clust; 
	call missing(clust); node = &to;
	if 0^=nodes.find() then nodes.add(); 
	toClust = clust;
	if n(fromClust, toClust) = 0 then do;
		nodes.replace(key:&from, data:&from, data:newClust);
		nodes.replace(key:&to, data:&to, data:newClust);
	end;
	else if missing(toClust) then 
		nodes.replace(key:&to, data:&to, data:fromClust);
	else if missing(fromClust) then 
		nodes.replace(key:&from, data:&from, data:toClust);
	else if fromClust ne toClust then do;
		rc = nodeList.first();
		do while (rc = 0);
			if clust = fromClust then 
				nodes.replace(key:node, data:node, data:toClust);
			rc = nodeList.next();
		end;
	end;
end;
nodes.output(dataset:"&out");
stop;
run;
%mend SubGraphs;

data have;
infile cards dlm='.' truncover;
input Cust_id     P_cust_id;
cards;
222.           111
333.           222
444.           333
555.           444
;
run;



%SubGraphs(have,from=p_cust_id,to=cust_id,out=Clusters,exp=5);

https://gist.github.com/statgeek/14e3aa2a9f718f551cd98134e9ceed30

PGStats · Posted 06-01-2018 12:43 PM

@Reeza, exp=8 in the SubGraphs macro call is the power of two that determines the size of the hash table, 2^8 = 256 here. There is no limit to the search depth.

PG

Reeza · Posted 06-01-2018 12:45 PM

@PGStats Thanks! @IvyHeng88 This is originally PGStats code as noted in the link.

novinosrin · Posted 06-01-2018 12:04 PM

data have;
input (Cust_id   P_cust_id) ($);
cards;
111            .
222           111
333           222
444           333
555           444
;

proc sql;
create table temp as
select a.Cust_id,b.P_cust_id
from have a, have b;
quit;

data _null_;
if _n_=1 then do;
 dcl hash H (ordered: "d",multidata:'y') ;
   h.definekey  ("Cust_id") ;
   h.definedata ('Cust_id',"p_cust_id") ;
   h.definedone () ;
end;
_grp+1;
do _n=1 by 1 until(last.cust_id);
set temp end=last;
by cust_id;
where not missing(P_cust_id);
if _n<=_grp-1 then h.add();
end;
if last then h.output(dataset:'want');
run;

TomKari · Posted 06-01-2018 02:23 PM

Here's a similar example using PROC BOM:

Tom

data ParComp0;
infile datalines dlm='09'x;
length Parent $ 4 Component $ 4;
input Parent Component QtyPer;
cards;
LA01	B100	1
LA01	S100	1
LA01	A100	1
B100	1100	1
B100	1200	1
B100	1300	1
B100	1400	4
A100	1500	1
A100	1600	1
A100	1700	1
1100	2100	26
1500	1400	2
1700	2200	12
1700	2300	1
run;
data PMaster0;
infile datalines dlm='09'x;
length Part $ 4 Desc $ 32 Unit $ 12;
input Part Desc Unit;
cards;
1100	Finished shaft	Each
1200	6-Diameter steel plate	Each
1300	Hub	Each
1400	1/4-20 Screw	Each
1500	Steel holder	Each
1600	One-way socket	Each
1700	Wiring assembly	Each
2100	3/8 Steel tubing	Inches
2200	16-Gauge lamp cord	Feet
2300	Standard plug terminal	Each
A100	Socket assembly	Each
B100	Base assembly	Each
LA01	Lamp LA	Each
S100	Black shade	Each
run;

/* Create the indented BOM and summarized parts list */
proc bom data=ParComp0 pmdata=PMaster0 
         out=IndBOM0 summaryout=SumBOM0; 
   structure / part=Part 
               parent=Parent
               component=Component
               quantity=QtyPer
               id=(Desc Unit);
run;

novinosrin · Posted 06-01-2018 02:28 PM

@TomKari Blimey, that shows learning sas can be a never ending process. Nice and Thanks

Reeza · Posted 06-01-2018 03:58 PM

@novinosrin PROC BOM requires SAS/OR which isn't installed in a lot of SAS installations.

novinosrin · Posted 06-01-2018 04:01 PM

@Reeza right. Yes, I noticed on a quick google search. Hmm ocean of procs and modules

PGStats · Posted 06-01-2018 04:54 PM

If there is a unique p_cust_id for any cust_id then you can go up the chain in a simple way by indexing your dataset:

data have;
input (Cust_id   P_cust_id) ($);
cards;
111            .
222           111
333           222
444           333
555           444
;

proc sql;
create unique index cust_id on have (cust_id);
quit;

data want;
set have;
Expected_cust_id = cust_id;
do while(not missing(p_cust_id));
    output;
    cust_id = p_cust_id;
    set have key=cust_id/unique;
    if _IORC_ ne 0 then leave;
    end;
_error_ = 0;
rename p_cust_id=cust_id;
drop cust_id;
run;

proc print data=want noobs; run;

PG

novinosrin · Posted 06-01-2018 05:52 PM

data have;
input (Cust_id   P_cust_id) ($);
cards;
111            .
222           111
333           222
444           333
555           444
;
data _null_;
if _n_=1 then do;
 dcl hash H (ordered: "d",multidata:'y') ;
   h.definekey  ("Cust_id") ;
   h.definedata ('Cust_id',"p_cust_id") ;
   h.definedone () ;
end;
set have(keep=cust_id) end=last;
_grp+1;
do n=1 to nobs ;
_k=n-1;
set have(keep=p_cust_id) point=n nobs=nobs;
if _k and  _k<=_grp-1 then h.add();
end;
if last then h.output(dataset:'want');
run;

Ksharp · Posted 06-02-2018 07:49 AM

data have;
input (Cust_id   P_cust_id) ($);
n+1;
cards;
111            .
222           111
333           222
444           333
555           444
;
data want;
 if 0 then set have;
 declare hash h() ;
   h.definekey  ("p_cust_id") ;
   h.definedata ("Cust_id") ;
   h.definedone ();
 declare hash hh(dataset:'have',ordered:'y');
 declare hiter hi('hh');
   hh.definekey('n');
   hh.definedata("Cust_id","p_cust_id");
   hh.definedone();

do n=hh.num_items to 1 by -1;
h.clear();
rc=hi.first();
do while(rc=0);
  h.add();
  rc=hi.next();
end;

hh.remove();
rc=hi.first();
do while(rc=0);
  _Cust_id=Cust_id;
  rx=h.check(key:Cust_id);
  do while(h.find(key:Cust_id)=0);
  end;
  if rx ne 0 then call missing(_p_cust_id);
   else _p_cust_id=Cust_id;output; 
  rc=hi.next();
end;
end;
keep _: ;
run;

proc print noobs;run;

PGStats · Posted 06-02-2018 03:23 PM

@Ksharp, your program generates these notes:

NOTE: Character values have been converted to numeric
      values at the places given by: (Line):(Column).
      39:20
NOTE: There were 5 observations read from the data set WORK.HAVE.
NOTE: DATA STEP stopped due to looping.
NOTE: The data set WORK.WANT has 10 observations and 2 variables.

Under SAS 9.4 TS Level 1M5

PG

Ksharp · Posted 06-03-2018 08:00 AM

@PGStats

That is just NOTE not WARNING.

I believe should try this to get rid of this kind of NOTE.

data have;
input Cust_id   P_cust_id;
n+1;
cards;
111            .
222           111
333           222
444           333
555           444
;

Registration is open