i'm certain there are at least 5 better ways of doing this but this may do what you're after: options mlogic symbolgen;
data ref_table;
infile datalines dlm=",";
format var $50.;
input var;
datalines;
Var_Ref,
one,
First one,
Test,
number,
;
run;
data big_table;
infile datalines dlm=",";
format var $50.;
input var;
datalines;
Var,
First one,
Test number one,
;
run;
proc sql;
create table distinct_varlist as
select distinct(strip(var)) as ref_var
from ref_table;
quit;
proc sql;
create table Big_table_distinct as
select distinct var
from big_table;
quit;
proc sql noprint; select count(ref_var) into: distinct_varcount from distinct_varlist;
%macro iterative_check;
%do i = 1 %to &distinct_varcount.;
data _null_;
p = &i.;
set distinct_varlist point=p;
call symputx('varlength',length(ref_var),'L');
call symputx('ref_var_m',ref_var,'L');
stop;
run;
data setall_&i.(drop=CV:);
set Big_table_distinct;
BigTable_Var = strip(Var);
RefTable_Var = strip("&ref_var_m.");
if BigTable_Var = RefTable_Var then CV1 = "Equal";
if indexw(BigTable_Var,RefTable_Var) then CV2 = strip(substr(BigTable_Var,indexw(BigTable_Var,RefTable_Var),index(BigTable_Var,' ')));
if indexw(RefTable_Var,BigTable_Var) then CV3 = strip(substr(RefTable_Var,indexw(RefTable_Var,BigTable_Var),index(RefTable_Var,' ')+1));
format Common_Values $50.;
Common_Values = coalescec(CV1,CV2,CV3);
run;
%end;
%mend iterative_check;
%iterative_check;
data combine;
set setall:;
run;
proc sort nodupkey data=combine; by BigTable_Var RefTable_Var Common_Values;
data output;
set combine;
where Common_Values ne '';
run;
... View more