<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Summary functions conditioning on another column, proc sql in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Summary-functions-conditioning-on-another-column-proc-sql/m-p/825566#M326080</link>
    <description>&lt;PRE&gt;&lt;CODE class=""&gt;(Payment*(Answer1 ne .)) &lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Is not going to do what you want because SAS returns 0 for false for the Answer1 ne . So Payment*0=0. Making it a valid value for the Median function to consider and getting included in a "count". &lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;What is your "count" supposed to be counting? Since you have two variables that might be missing at random it is not clear what it might be counting.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;The medians by not missing:&lt;/P&gt;
&lt;PRE&gt;proc sql; 
   create table sortofwant as
   select a.type,a.dis,a.MedianPayment_Ans1,b.MedianPayment_Ans2
   from (
         select type,dis, median(payment) as MedianPayment_Ans1
         from (select * from have where not missing(answer_1))
         group by type,dis
        ) as a
        left join
        (
         select type,dis, median(payment) as MedianPayment_Ans2
         from (select * from have where not missing(answer_2))
         group by type,dis
        ) as b
        on a.type=b.type
           and a.dis=b.dis
   ;
quit;&lt;/PRE&gt;
&lt;P&gt;Caution: small example =&amp;gt;small code and if you have a combination of answer_1 that are all missing for a type*dis combination that are not missing for answer_2 then you need something other than a left join to get both groups of summaries.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Note that this is likely going to be a bit of a headache if you try to extend it to many Answer periods.&lt;/P&gt;</description>
    <pubDate>Tue, 26 Jul 2022 21:59:27 GMT</pubDate>
    <dc:creator>ballardw</dc:creator>
    <dc:date>2022-07-26T21:59:27Z</dc:date>
    <item>
      <title>Summary functions conditioning on another column, proc sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Summary-functions-conditioning-on-another-column-proc-sql/m-p/825511#M326074</link>
      <description>&lt;P&gt;I have a dataset&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=""&gt;data have;
input ID Type $ DIS $ Timepoint Answer_1 Answer_2 Payment;
datalines;
1001 0to5 A 0 0 1 100
1001 0to5 A 1 1 0 210
1001 0to5 A 3 1 . 340
1002 0to5 A 0 1 0 290
1002 0to5 A 1 . 1 180
1003 0to5 B 0 1 1 200
1003 0to5 B 2 . 0 150
1004 6to9 A 0 . . 300
1004 6to9 A 1 1 1 100
1004 6to9 A 2 1 . 340
1005 6to9 A 0 1 1 900
1005 6to9 A 1 0 1 100
1005 6to9 A 3 . 0 940
1006 6to9 A 0 1 . 400
1006 6to9 A 1 1 1 450
;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;BR /&gt;I would like the output table:&lt;/P&gt;&lt;TABLE border="1"&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;Type&lt;/TD&gt;&lt;TD&gt;DIS&lt;/TD&gt;&lt;TD&gt;MedianPayment_Ans1&lt;/TD&gt;&lt;TD&gt;MedianPayment_Ans2&lt;/TD&gt;&lt;TD&gt;Count&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;0to5&lt;/TD&gt;&lt;TD&gt;A&lt;/TD&gt;&lt;TD&gt;250&lt;/TD&gt;&lt;TD&gt;195&lt;/TD&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;0to5&lt;/TD&gt;&lt;TD&gt;B&lt;/TD&gt;&lt;TD&gt;200&lt;/TD&gt;&lt;TD&gt;175&lt;/TD&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;6to9&lt;/TD&gt;&lt;TD&gt;A&lt;/TD&gt;&lt;TD&gt;370&lt;/TD&gt;&lt;TD&gt;450&lt;/TD&gt;&lt;TD&gt;3&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&lt;BR /&gt;The output is: grouped by Type and DIS, with three new columns.&lt;/P&gt;&lt;P&gt;Two of the columns are the median payments for each respective answer field, only considering the values which have the respective answer not being missing.&lt;/P&gt;&lt;P&gt;The third column has the count of the number of unique ID's which contributed to the median.&amp;nbsp;&amp;nbsp;&lt;BR /&gt;&amp;nbsp;&amp;nbsp;&lt;BR /&gt;Constraints are:&amp;nbsp;&amp;nbsp;&lt;BR /&gt;- There are many ID's, Type's and DIS's&lt;BR /&gt;- Data set is ordered on ID and Timepoint&lt;/P&gt;&lt;P&gt;- For any distinct ID, the first Timepoint is equal to 0, and Timepoint is a nonnegative integer&lt;/P&gt;&lt;P&gt;- All answer fields are either 0, 1 or missing (.) .&lt;/P&gt;&lt;P&gt;- Payment is a positive integer, possibly missing too.&lt;/P&gt;&lt;P&gt;- There are at least 50 answer fields.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Could someone help me with this please? Preferably using proc sql. I thought of using some indicator function but it didn't work:&amp;nbsp;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=""&gt;proc sql;
create table want as
select Type
,DIS
,median(Payment*(Answer1 ne .)) as medianPayment_Ans1
,median(Payment*(Answer2 ne .)) as medianPayment_Ans2
from have
group by Type, DIS;
quit;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 26 Jul 2022 19:27:57 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Summary-functions-conditioning-on-another-column-proc-sql/m-p/825511#M326074</guid>
      <dc:creator>penguins0101</dc:creator>
      <dc:date>2022-07-26T19:27:57Z</dc:date>
    </item>
    <item>
      <title>Re: Summary functions conditioning on another column, proc sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Summary-functions-conditioning-on-another-column-proc-sql/m-p/825565#M326079</link>
      <description>&lt;BLOCKQUOTE&gt;
&lt;P&gt;&lt;SPAN&gt;The third column has the count of the number of unique ID's which contributed to the median.&amp;nbsp;&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;
&lt;/BLOCKQUOTE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Do missing values contribute here in any way?&lt;/P&gt;</description>
      <pubDate>Tue, 26 Jul 2022 21:52:39 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Summary-functions-conditioning-on-another-column-proc-sql/m-p/825565#M326079</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2022-07-26T21:52:39Z</dc:date>
    </item>
    <item>
      <title>Re: Summary functions conditioning on another column, proc sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Summary-functions-conditioning-on-another-column-proc-sql/m-p/825566#M326080</link>
      <description>&lt;PRE&gt;&lt;CODE class=""&gt;(Payment*(Answer1 ne .)) &lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Is not going to do what you want because SAS returns 0 for false for the Answer1 ne . So Payment*0=0. Making it a valid value for the Median function to consider and getting included in a "count". &lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;What is your "count" supposed to be counting? Since you have two variables that might be missing at random it is not clear what it might be counting.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;The medians by not missing:&lt;/P&gt;
&lt;PRE&gt;proc sql; 
   create table sortofwant as
   select a.type,a.dis,a.MedianPayment_Ans1,b.MedianPayment_Ans2
   from (
         select type,dis, median(payment) as MedianPayment_Ans1
         from (select * from have where not missing(answer_1))
         group by type,dis
        ) as a
        left join
        (
         select type,dis, median(payment) as MedianPayment_Ans2
         from (select * from have where not missing(answer_2))
         group by type,dis
        ) as b
        on a.type=b.type
           and a.dis=b.dis
   ;
quit;&lt;/PRE&gt;
&lt;P&gt;Caution: small example =&amp;gt;small code and if you have a combination of answer_1 that are all missing for a type*dis combination that are not missing for answer_2 then you need something other than a left join to get both groups of summaries.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Note that this is likely going to be a bit of a headache if you try to extend it to many Answer periods.&lt;/P&gt;</description>
      <pubDate>Tue, 26 Jul 2022 21:59:27 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Summary-functions-conditioning-on-another-column-proc-sql/m-p/825566#M326080</guid>
      <dc:creator>ballardw</dc:creator>
      <dc:date>2022-07-26T21:59:27Z</dc:date>
    </item>
    <item>
      <title>Re: Summary functions conditioning on another column, proc sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Summary-functions-conditioning-on-another-column-proc-sql/m-p/825567#M326081</link>
      <description>&lt;P&gt;This should scale for you now:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
input ID Type $ DIS $ Timepoint Answer_1 Answer_2 Payment;
datalines;
1001 0to5 A 0 0 1 100
1001 0to5 A 1 1 0 210
1001 0to5 A 3 1 . 340
1002 0to5 A 0 1 0 290
1002 0to5 A 1 . 1 180
1003 0to5 B 0 1 1 200
1003 0to5 B 2 . 0 150
1004 6to9 A 0 . . 300
1004 6to9 A 1 1 1 100
1004 6to9 A 2 1 . 340
1005 6to9 A 0 1 1 900
1005 6to9 A 1 0 1 100
1005 6to9 A 3 . 0 940
1006 6to9 A 0 1 . 400
1006 6to9 A 1 1 1 450
;

proc sort data=have;
by TYPE DIS ID;
run;


data have_expanded;
set have;
by type DIS ID;

first_id = first.id;

array pmt_ans(2);
array answer(2) answer_1-answer_2;

do i=1 to dim(answer);
if nmiss(answer(i), payment) = 0 then pmt_ans(i) = payment;
end;

run;

proc means data=have_expanded nway noprint;
by type dis;
var pmt_ans: ;
output out=want sum(first_id) = Num_IDs median =   / autoname;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 26 Jul 2022 22:04:11 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Summary-functions-conditioning-on-another-column-proc-sql/m-p/825567#M326081</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2022-07-26T22:04:11Z</dc:date>
    </item>
  </channel>
</rss>

