Re: How to create dummy variables for common codition

hkim3677 · Posted 10-15-2019 02:15 PM

Hi masters,

I am wondering how to create dummy variables for matched conditions.

The data looks like..

data test;
input id group condition1 condition2;
datalines;
a 1 123 9
b 1 123 5
c 1 234 7
d 0 456 8
e 0 123 8

f 0 234 9
;
run;

I want to create two dummies indicating the common conditions between two groups. Dummy1 takes 1 if condition1 is a common condition with any obs in another group. Similarly, dummy1 takes 1 if condition2 is a common condition with any obs in another group. After creating these dummies, I want to have the following dataset:

data test;
input id group condition1 condition2 dummy1 dummy2;
datalines;
a 1 123 9 1 1
b 1 123 5 1 0
c 1 234 7 1 0
d 0 456 8 0 0
e 0 123 8 1 0

f 0 234 9 1 1
;
run;

Thank you for your help!

PeterClemmensen · Posted 10-15-2019 03:26 PM

If your data is representative of your actual problem, you can do like this

data test;
input id $ group condition1 condition2;
datalines;
a 1 123 9
b 1 123 5
c 1 234 7
d 0 456 8
e 0 123 8
f 0 234 9
;
run;

data want(drop=i);
   set test;
   array _1{0:99, 0:999} _temporary_;
   array _2{0:99, 0:999} _temporary_;

   do until (lr1);
      set test end=lr1;
      _1[group, condition1]=1;
      _2[group, condition2]=1;
   end;

   do until (lr2);
      set test end=lr2;
      dummy1=0; dummy2=0;
      do i=0 to 99;
         dummy1=max(dummy1, (_1[i, condition1]=1 & group ne i));
         dummy2=max(dummy2, (_2[i, condition2]=1 & group ne i));
      end;
      output;
   end;
run;

Result:

id group condition1 condition2 dummy1 dummy2 
a  1     123        9          1      1 
b  1     123        5          1      0 
c  1     234        7          1      0 
d  0     456        8          0      0 
e  0     123        8          1      0 
f  0     234        9          1      1

The DATA to DATA Step Macro
Blog: SASnrd

FreelanceReinh · Posted 10-15-2019 03:34 PM

Hi @hkim3677,

Or like this:

proc sql;
create table want as
select *, count(distinct group)>1 as dummy2
from (select *, count(distinct group)>1 as dummy1
      from have
      group by condition1)
group by condition2
order by id;
quit;

(using the standard names HAVE and WANT for input and output datasets, resp.)

PS: Note that in your test data id must be defined as a character variable (e.g. input id $ ...).

Patrick · Posted 10-15-2019 04:01 PM

Something like below should work.

data test;
  input id $ group condition1 condition2;
  datalines;
a 1 123 9
b 1 123 5
c 1 234 7
d 0 456 8
e 0 123 8
f 0 234 9
;

data want;
  if _n_=1 then
    do;
      if 0 then set test(keep=group rename=(group=_group));
      dcl hash h1 (dataset:'test(keep=group condition1 rename=(group=_group)))', multidata:'y');
      h1.defineKey('condition1');
      h1.defineData('_group');
      h1.defineDone();
      dcl hash h2 (dataset:'test(keep=group condition2 rename=(group=_group)))', multidata:'y');
      h2.defineKey('condition2');
      h2.defineData('_group');
      h2.defineDone();
    end;

  set test;

  Dummy1 =0;
  h1.reset_dup();
  do while(h1.do_over() eq 0);
    if group ne _group then
      do;
        Dummy1 =1;
        leave;
      end;
  end;

  Dummy2 =0;
  h2.reset_dup();
  do while(h2.do_over() eq 0);
    if group ne _group then
      do;
        Dummy2 =1;
        leave;
      end;
  end;

run;

proc print;
run;

Ksharp · Posted 10-16-2019 08:25 AM

If you don't have a big table, otherwise I would try Hash Table.

data test;
input id $ group condition1 condition2;
datalines;
a 1 123 9
b 1 123 5
c 1 234 7
d 0 456 8
e 0 123 8
f 0 234 9
;
run;
proc sql;
create table want as
select *,case when
(select count(distinct group) from test where condition1=a.condition1)=
(select count(distinct group) from test ) then 1
else 0 end as dummy1,
case when
(select count(distinct group) from test where condition2=a.condition2)=
(select count(distinct group) from test ) then 1
else 0 end as dummy2
 from test as a;
quit;

How to create dummy variables for common codition