The sample code at https://www.devenezia.com/downloads/sas/samples/#groupbyeither shows how find the groups within data having dual keys and linked by key1 OR key2. Written for a 2004 SAS-L question "How to group people by their first name OR last name".
Here is the same processing rewritten for Proc DS2.
The core concept is using a HASH to map names to groups, and an multidata anti-map HASH of group to names. The anti-map must be used when combining separated groups that become linked. In particular, combining groups requires a traversal one of the groups in the anti-map using has_next/find_next.
* Given:
* There is only one records number for each name, no matter how many records contain that name;
data have;
input
NAME1: $10. RECORDS1 NAME2: $10. RECORDS2;
format records: 6.;
datalines;
JONATHAN 500 JOHNNY 905
JOHNO 750 JOHNNY 905
JONNO 415 JOHNO 750
JOHHN 675 JOHNO 750
JOHNNY 905 JOHN 1017
JOHN 1017 JOHNNY 905
TOM 5243 TOMMY 4
BRAD 873 BRADLEY 219
BRADLEY 219 BRAD 873
JON 875 AJONO 775
JOHHNO 904 JON 875
ZIP 250 ZIPP 175
JOHNNY 905 AJONO 775
;
proc datasets nolist lib=work;
delete want;
run;
proc ds2;
data _null_;
declare package hash name_group_map(); * a mapping from name to group. a name can belong to only one group;
declare package hash group_name_map(); * an anti mapping of group to names. a group can have many names;
declare char(25) name name1 name2;
declare double records records1 records2 group group1 group2;
method init();
name_group_map.ordered('ASCENDING');
name_group_map.keys([name]);
name_group_map.data([name records group]);
name_group_map.defineDone();
group_name_map.multidata('yes');
group_name_map.keys([group]);
group_name_map.data([group name]);
group_name_map.defineDone();
group = 0;
end;
method run();
declare double found1 found2 hold_group;
declare double rc;
set have;
* each row represents a link. both ends are a name with a count.;
* a name can appear in other rows, but its count will not be different;
found1 = name_group_map.find([name1], [name1 records1 group1]) = 0;
found2 = name_group_map.find([name2], [name2 records2 group2]) = 0;
select;
when ( ~found1 and ~found2)
do;
group + 1;
*put 'NOTE: both new' group=;
name_group_map.add([name1], [name1 records1 group]);
name_group_map.add([name2], [name2 records2 group]);
group_name_map.add([group], [group name1]);
group_name_map.add([group], [group name2]);
end;
when ( found1 and ~found2)
do;
*put 'NOTE: add name2 to name1' group1=;
name_group_map.add([name2], [name2 records2 group1]);
group_name_map.add([group1], [group1 name2]);
end;
when ( ~found1 and found2)
do;
*put 'NOTE: add name1 to name2' group2=;
name_group_map.add([name1], [name1 records1 group2]);
group_name_map.add([group2], [group2 name1]);
end;
when ( found1 and found2)
do;
if group1 = group2 then return;
* traverse the multidata of group2 key and migrate each data to the group1 key;
hold_group = group;
group = group2;
rc = group_name_map.find();
do while (rc = 0);
name_group_map.find();
name_group_map.replace([name], [name records group1]);
group_name_map.add([group1], [group1 name]);
if group_name_map.has_next() ne 0 then leave;
rc = group_name_map.find_next();
end;
group_name_map.removeall(); * remove the anti-map key;
group = hold_group;
end;
otherwise;
end;
end;
method term();
* DS2 does not overwrite existing tables;
* DS2 does allow a table option (OVERWRITE=YES), or /OVERWRITE=YES;
* However, DS2 Package HASH method OUTPUT does NOT provisio for such,
* and will log an error if used, example:
* ERROR: Malformed hash data source name <table-name>(overwrite=yes).
*
* Thus, the coder MUST pre-delete the target output table in an earlier step;
name_group_map.output('work.want');
end;
enddata;
run;
quit;
options nosource nonotes;
proc sort data=want;
by group descending records name;
run;
ods html file='want.html' style=plateau;
proc print data=have;
proc print noobs data=want;
run;
ods _all_ close;
options source notes;
Sample output
... View more