Solved: Re: how to find same values from two variables ?

_bhishek · Posted 07-15-2015 04:36 AM

I have 23 variable and i want all those values which are coming in all the variable ( it doesn't matter that all should be in the same row ).

For Example:

var1 var2 var3 var4 var5

23 87 94 59 54

36 23 19 23 57

35 38 93 51 23

68 49 23 78 99

So we can see that all the variable have 23 as a value so i want 23 as answer .

ballardw · Posted 07-15-2015 01:25 PM

One way. The 5 is the variable count. I'll leave that as a nearly trivial exercise for the interested reader to get the value of the number of variables involved into a variable as needed.

data have;
input var1 var2 var3 var4 var5;
datalines;
23    87    94      59   54
36    23    19      23   57
87    63     87    51    99
35    38     87    87     23
68    49     23    78     87
;
run;

data trans (keep= name value);
   set have;
   array v var1 var2 var3 var4 var5;
   length name $ 32;
   do j = 1 to dim(v);
      value = v;
      name = vname(v);
      output;
   end;
run;

proc sql;
   select value
   from (
         select value, count(*) as vcount
         from (select distinct value,name from trans)
         group by value
        )
   where vcount=5;
   ;
quit;

View solution in original post

ballardw · Posted 07-15-2015 12:04 PM

Please show an example of what you expect the output to look like. What if you have 2 or more values with duplicate values? Do you want to know how many matches?

_bhishek · Posted 07-15-2015 12:16 PM

I want ALL the values which are present in ALL the variables. I do want how many matches are there.

For Example:

var1 var2 var3 var4 var5

23 87 94 59 54

36 23 19 23 57

87 63 87 51 99

35 38 87 87 23

68 49 23 78 87

like in this example 23 and 87 are present in all the variable so i want output as 23 87.

I think it's clear now if not you can ask me again and thanks for the reply ballardw.

ballardw · Posted 07-15-2015 01:25 PM

One way. The 5 is the variable count. I'll leave that as a nearly trivial exercise for the interested reader to get the value of the number of variables involved into a variable as needed.

data have;
input var1 var2 var3 var4 var5;
datalines;
23    87    94      59   54
36    23    19      23   57
87    63     87    51    99
35    38     87    87     23
68    49     23    78     87
;
run;

data trans (keep= name value);
   set have;
   array v var1 var2 var3 var4 var5;
   length name $ 32;
   do j = 1 to dim(v);
      value = v;
      name = vname(v);
      output;
   end;
run;

proc sql;
   select value
   from (
         select value, count(*) as vcount
         from (select distinct value,name from trans)
         group by value
        )
   where vcount=5;
   ;
quit;

_bhishek · Posted 07-16-2015 03:16 AM

Thanks ballardw your answer was correct but is there any other way to do it without transposing it because in actual problem i have 25 variables and 1 lakh observations so when i will transpose it. then i will have 25 Lakh observation so will you suggest me to do the same way for my problem.

ballardw · Posted 07-16-2015 11:20 AM

Increase the variables from VAR1 to VAR5 to be the names of all 25 variables.

I don't know what a lakh may be, but the code should run. What is the concern about the number of observations?

ebills · Posted 07-15-2015 12:09 PM

Are you wanting to know what number all 23 variables have in common? So, do you want your output to just show what number appears in EVERY column?

var1 var2 var3 ...

23 23 23 etc...

_bhishek · Posted 07-15-2015 01:05 PM

Yes ebills , I want my output to show all the numbers which are in every column.

evp000 · Posted 07-15-2015 03:30 PM

data x1;

length var1 var2 var3 var4 var5 8;

input var1 var2 var3 var4 var5;

datalines;

23 87 94 59 54

36 23 19 23 57

87 63 87 51 99

35 38 87 87 23

68 49 23 78 87

run;

%let nvars=5;

%macro check;

%do i = 01 %to 99;

%let found = N;

%global found&i;

data found&i;

set x1 end = at_end;

retain flag1-flag5 0; /* set to 0 for each record */

array vars {*} var1-var&nvars;

array flags {*} flag1-flag&nvars;

checkvar = &i;

do i = 1 to &nvars; /* for the first value in the first record, 23, check this against the var2, 3, 4 and 5 in the same record */

if checkvar = vars{i} then flags{i} = 1; /* do not sum, just flag if any */

end;

if at_end and sum(of flag1-flag5) = &nvars then /* if found in all, then each flag will be 1 and the total will be 5 */

do;

put checkvar ' EXISTS IN ALL COLUMNS. ';

in_all = checkvar;

output found&i;

call symputx("found&i", 'Y'); /* macro variable set to Y if found in all vars */

end;

else call symputx("found&i",'N');

run;

%put found&i=&&found&i;

%end;

%mend;

%check;

%macro put_together;

data all_found; /* create empty dataset */

stop;

run;

%do j = 01 %to 99;

%*put found&j=&&found&j;

%if &&found&j = Y %then /* append if there are any */

%do;

data all_found;

set all_found found&j;

run;

%end;

%mend;

%put_together;

data _null_;

set all_found;

if _n_ = 1 then put 'These values exist in all variables: ';

put in_all;

run;

stat_sas · Posted 07-15-2015 11:32 PM

data have;

input var1 var2 var3 var4 var5;

id=_n_;

datalines;

23 87 94 59 54

36 23 19 23 57

87 63 87 51 99

35 38 87 87 23

68 49 23 78 87

;

proc transpose data=have out=want;

by id;

run;

proc sql;

select col1 as value from want

group by col1

having count(distinct _name_)=(select max(id) from want);

quit;

_bhishek · Posted 07-16-2015 03:14 AM

Thanks stat@sas your answer was correct but is there any other way to do it without transposing it because in actual problem i have 25 variables and 1 lakh observations so when i will transpose it. then i will have 25 Lakh observation so will you suggest me to do the same way for my problem.

Ksharp · Posted 07-16-2015 08:46 AM

If you have 25 variable, you need change code as:

ha.definedata('k','v1','v2','v3','v4','v5');

----> ha.definedata('k','v1','v2','v3','v4','v5','v6',...........,'v25');

array x{*} var1-var5;

array y{*} v1-v5;

--->

array x{*} var1-var25;

array y{*} v1-v25;

Ksharp · Posted 07-16-2015 08:41 AM

Use Hash Table, If I understood what you mean.

Code: Program

data have;
input var1 var2 var3 var4 var5;
cards;
23 87 94 59 54
36 23 19 23 57
35 38 93 51 23
68 49 23 78 99  
;
run;
data want;
if _n_ eq 1 then do;
 declare hash ha(hashexp:20);
 declare hiter hi('ha');
 ha.definekey('k');
 ha.definedata('k','v1','v2','v3','v4','v5');
 ha.definedone();
end;
 set have end=last;
 array x{*} var1-var5;
 array y{*} v1-v5;
do i=1 to dim(x);
 k=x{i};call missing(of y{*});
 rc=ha.find();
 y{i}=1;
 ha.replace();
end;
if last then do;
 do while(hi.next()=0);
  if nmiss(of y{*})=0 then output;
 end;
end;
keep k;
run;

Xia Keshan

Bryan · Posted 07-16-2015 12:32 PM

Here is another way.

data have;

input var1 var2 var3 var4 var5;

id=_n_;

datalines;

23 87 94 59 54

36 23 19 23 57

87 63 87 51 99

35 38 87 87 23

68 49 23 78 87

;

run;

proc contents data = have

out = haveVars(keep = varnum name)

noprint;

run;

Proc Sql noprint;

Select count(name)

into :NmbrVrs separated by ''

from haveVars

where name contains "var";

Quit;

%put &NmbrVrs var&NmbrVrs;

%macro fndDplcts();

Data lst (drop=var1 - var&NmbrVrs);

Set have;

%do p = 1 %to &NmbrVrs;

Nmbr=var&&p;

varNm="var&&p";

VarNmId=compress(varNm||"_Id"||_N_);

Cntr=1;

output;

%end;

Run;

%mend;

%fndDplcts;

PROC SORT DATA=LST OUT=LstSrt;

BY Nmbr;

RUN;

PROC TRANSPOSE DATA=LstSrt OUT=LstTrnsps

NAME=Source LABEL=Label;

BY Nmbr;

ID VarNmId;

VAR Cntr;

RUN; QUIT;

Data LstTrnsps (drop= Source);

Set LstTrnsps;

array a(*) _numeric_;

do i=1 to dim(a);

if a(i) = . then a(i) = 0;

end;

drop i;

run;

proc contents data = LstTrnsps

out = LstTrnspsVars(keep = varnum name)

noprint;

run;

Proc Sql noprint;

Select name

into :VrsLst separated by ' '

from LstTrnspsVars

where name contains "var";

Quit;

%put &VrsLst;

Data LstCnt;

retain Nmbr Ttl &VrsLst;

Set LstTrnsps;

array vrArry &VrsLst;

Ttl=0;

do over vrArry;

Ttl=Ttl+vrArry;

end;

run;

Proc sort data=LstCnt;

by descending Ttl;

run;

Code: Program

Catch up on SAS Innovate 2026