- Mark as New
- Bookmark
- Subscribe
- Mute
- RSS Feed
- Permalink
- Report Inappropriate Content
I have data serialized by a variable N, and I have a dataset that is virtually an incidence matrix. How might I find the eleven (11) groupings of records in HAVE that are connected to common undirected graphs defined by the incidence_matrix dataset?
Caveats:
- I only have SAS STAT. Using any other SAS package is not an option, but please include your idea anyway.
- Do not assume big data. Expect less than 10k rows for HAVE in real life, and a matrix about as sparse.
data have ;
do N=1 to 15;
output;
end;
stop;
data incidence_matrix(index=(N));
input N N2;
datalines;
1 2
2 10
5 6
5 7
;
run;
want: a column that groups the records. (Answering @Reeza) User should be able to perform sorts and use by statements with the new column. An example dataset:
N | X |
1 | 20 |
2 | 20 |
3 | 3 |
4 | 4 |
5 | 30 |
6 | 30 |
7 | 30 |
8 | 8 |
9 | 9 |
10 | 20 |
:
:
15 | 15 |
Accepted Solutions
- Mark as New
- Bookmark
- Subscribe
- Mute
- RSS Feed
- Permalink
- Report Inappropriate Content
data have ;
do N=1 to 15;
output;
end;
stop;
run;
data incidence_matrix(index=(N));
input N N2;
datalines;
1 2
2 10
5 6
5 7
;
run;
data have;
set have incidence_matrix;
rename n=from n2=to;
run;
/***************************************/
data full;
set have end=last;
if _n_ eq 1 then do;
declare hash h();
h.definekey('node');
h.definedata('node');
h.definedone();
end;
output;
node=from; h.replace();
from=to; to=node;
output;
node=from; h.replace();
if last then h.output(dataset:'node');
drop node;
run;
data want(keep=node household);
declare hash ha(ordered:'a');
declare hiter hi('ha');
ha.definekey('count');
ha.definedata('last');
ha.definedone();
declare hash _ha(hashexp: 20);
_ha.definekey('key');
_ha.definedone();
if 0 then set full;
declare hash from_to(dataset:'full(where=(from is not missing and to is not missing))',hashexp:20,multidata:'y');
from_to.definekey('from');
from_to.definedata('to');
from_to.definedone();
if 0 then set node;
declare hash no(dataset:'node');
declare hiter hi_no('no');
no.definekey('node');
no.definedata('node');
no.definedone();
do while(hi_no.next()=0);
household+1; output;
count=1;
key=node;_ha.add();
last=node;ha.add();
rc=hi.first();
do while(rc=0);
from=last;rx=from_to.find();
do while(rx=0);
key=to;ry=_ha.check();
if ry ne 0 then do;
node=to;output;rr=no.remove(key:node);
key=to;_ha.add();
count+1;
last=to;ha.add();
end;
rx=from_to.find_next();
end;
rc=hi.next();
end;
ha.clear();_ha.clear();
end;
stop;
run;
- Mark as New
- Bookmark
- Subscribe
- Mute
- RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- RSS Feed
- Permalink
- Report Inappropriate Content
data have ;
do N=1 to 15;
output;
end;
stop;
run;
data incidence_matrix(index=(N));
input N N2;
datalines;
1 2
2 10
5 6
5 7
;
run;
data have;
set have incidence_matrix;
rename n=from n2=to;
run;
/***************************************/
data full;
set have end=last;
if _n_ eq 1 then do;
declare hash h();
h.definekey('node');
h.definedata('node');
h.definedone();
end;
output;
node=from; h.replace();
from=to; to=node;
output;
node=from; h.replace();
if last then h.output(dataset:'node');
drop node;
run;
data want(keep=node household);
declare hash ha(ordered:'a');
declare hiter hi('ha');
ha.definekey('count');
ha.definedata('last');
ha.definedone();
declare hash _ha(hashexp: 20);
_ha.definekey('key');
_ha.definedone();
if 0 then set full;
declare hash from_to(dataset:'full(where=(from is not missing and to is not missing))',hashexp:20,multidata:'y');
from_to.definekey('from');
from_to.definedata('to');
from_to.definedone();
if 0 then set node;
declare hash no(dataset:'node');
declare hiter hi_no('no');
no.definekey('node');
no.definedata('node');
no.definedone();
do while(hi_no.next()=0);
household+1; output;
count=1;
key=node;_ha.add();
last=node;ha.add();
rc=hi.first();
do while(rc=0);
from=last;rx=from_to.find();
do while(rx=0);
key=to;ry=_ha.check();
if ry ne 0 then do;
node=to;output;rr=no.remove(key:node);
key=to;_ha.add();
count+1;
last=to;ha.add();
end;
rx=from_to.find_next();
end;
rc=hi.next();
end;
ha.clear();_ha.clear();
end;
stop;
run;
- Mark as New
- Bookmark
- Subscribe
- Mute
- RSS Feed
- Permalink
- Report Inappropriate Content
Thanks for your time @Ksharp. Hash tables are probably the best way to do this given all of my limitations, but, more than this, the coders I work here with are not ready for hash tables, to be honest. I want to avoid it.
But I did. My original problem was a time period overlap problem, and I had experience working in other platforms with graph theory and network propagation so I found my self asking this question. Work you did with @yabwon in September 2019 helped me in the end. thanks again.
- Mark as New
- Bookmark
- Subscribe
- Mute
- RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- RSS Feed
- Permalink
- Report Inappropriate Content
Challenge to the next poster on this topic. Offer example data referring to this site's coverage of the same topic.
Solved: Get a list of all relationships - SAS Support Communities
Solved: Is family construction possible? - SAS Support Communities
- Mark as New
- Bookmark
- Subscribe
- Mute
- RSS Feed
- Permalink
- Report Inappropriate Content