DATA Step, Macro, Functions and more

How to calculate number of subjects by different variablesin same data step

Reply
Super Contributor
Posts: 272

How to calculate number of subjects by different variablesin same data step

Dear,

 

I neeed to calculate number of subjects by different sort order  to calculate percentages.

 

I am running following codes to calculate the NS and then merge with main data set by the sort order. It will be a 4 steps and I have to calcualte many. So I want tto know if I can do in one step. Please suggest help. Thanks.

 

proc sql;
create table NS1 as
select count(distinct id) as NS1,trt1
from data1
group by trt1;
quit;


proc sql;
create table NS2 as
select count(distinct id) as NS2,term,trt1,period
from adae3
group by period,trt1,term;
quit;

 

data1;

id        term       trt1                  period

1          ge         100 mg              1

2           ge        200 mg              1

3           ge         100 mg             1

4           ge          200 mg             1

5           ae          100 mg             1

6           ae           300 mg            1

 

output needed:

data1;

id        term       trt1                  period               ns1                   ns2

1          ge         100 mg              1                      3                       2

2           ge        200 mg              1                       2                       2

3           ge         100 mg             1                       3                        2

4           ge          200 mg             1                      2                        2

5           ae          100 mg             1                      3                         1

6           ae           300 mg            1                       1                        1

 

Trusted Advisor
Posts: 1,228

Re: How to calculate number of subjects by different variablesin same data step

Posted in reply to knveraraju91

Hi,

 

One way is do this using a single proc sql instead of multiple ones. Please try below:

 

proc sql;
create table want as
select *,count(distinct id) as NS2
from (select id,trt1,term,period,count(distinct id) as NS1
from data1
group by trt1)
group by period,trt1,term
order by id;
quit;

Respected Advisor
Posts: 4,173

Re: How to calculate number of subjects by different variablesin same data step

Posted in reply to knveraraju91

Below code sample uses a hash objects for the counts to allow for implementation in a single data step.

Make sure you define in the Length statement for _vars and _keys a length which is sufficient to hold the max. strings for variable name combinations and values (100 chars used in below sample).

data have;
  infile datalines truncover;
  input (id term trt1 period) ($);
  datalines;
1 ge 100 mg 1
2 ge 200 mg 1
3 ge 100 mg 1
4 ge 200 mg 1
5 ae 100 mg 1
6 ae 300 mg 1
;
run;

data want(drop=_:);

  if _n_=1 then
    do;
      if 0 then set have;
      length _vars $100 _key $100 _count 8;
      dcl hash h1(multidata:'r',suminc:'_count');
      h1.defineKey('_vars','_key');
      h1.defineDone();

      _count=1;
      do while(last ne 1);
        set have end=last;
        /* count by trt1 */
        _vars=catx('|','trt1');
        _key=catx('|',trt1);
        _rc=h1.ref();

        /*count by period,trt1,term */
        _vars=catx('|','period','trt1','term');
        _key=catx('|',period,trt1,term);
        _rc=h1.ref();
      end;
      call missing(of _all_);
    end;

    set have;

    /* get count by trt1 */
    _vars=catx('|','trt1');
    _key=catx('|',trt1);
    _rc=h1.sum(sum: ns1);

    /* get count by  period,trt1,term */
    _vars=catx('|','period','trt1','term');
    _key=catx('|',period,trt1,term);
    _rc=h1.sum(sum: ns2);

  run;

N.B: Unlike in the SQL you've posted, above code counts ALL occurences and not only per DISTINCT ID. Given that ID's should be unique (and that they are in your sample data) things should still work as expected. 

Valued Guide
Posts: 505

Re: How to calculate number of subjects by different variablesin same data step

For simplicity I dropped period, not had to add.
Can easily add other cross tabs.

proc summary data=sd1.have n;
class trt1 term;
types trt1 trt1*term;
output out=want_pre(rename=_freq_=count drop=id _type_);
id id;
run;quit;

proc sql;
  create
      table wrk.want as
  select
      l.id
     ,l.trt1
     ,l.term
     ,c.count as ns1
     ,r.count as ns2
  from
     sd1.have as l
       left join want_pre as c on l.trt1=c.trt1 and c.term=""
       left join want_pre as r on l.trt1=r.trt1 and l.term=r.term and l.term ne ""
  order
     by id
;quit;

Ask a Question
Discussion stats
  • 3 replies
  • 215 views
  • 1 like
  • 4 in conversation