Instead of using a RETAINed character variable, which is limited in length (with longer codes and source "names" you could crack the 32K limit), I recommend to use a hash object, which makes finding an existing entry that much easier (and faster, as a binary tree search will always outperform a sequential scan) and which is limited in size only by available memory.
data have;
length nam_1 $50;
input nam_1;
iso_a2 = "AL";
datalines;
Harry
Hummer
Harriet
Hog
Ham
Hello
Hogwarth
Hitachi
Holler
;
proc sort data=have;
by iso_a2 nam_1;
run;
data want;
set have;
by iso_a2;
length short_code $2;
if _n_ = 1
then do;
declare hash code ();
code.definekey("short_code");
code.definedata("short_code");
code.definedone();
end;
if first.iso_a2 then code.clear();
nam_u = upcase(nam_1);
short_code = substr(nam_u,1,2);
if code.find() ne 0
then code.add();
else do;
i1 = 2;
i2 = 64;
do until (code.find() ne 0 or i2 gt 90);
if i1 lt length(nam_u)
then do;
i1 + 1;
short_code = substr(nam_u,1,1) !! substr(nam_u,i1,1);
end;
else do;
i2 + 1;
short_code = substr(nam_u,1,1) !! byte(i2);
end;
end;
if i2 gt 90
then putlog "run out of characters";
else code.add();
end;
drop nam_u i1 i2;
run;
... View more