<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Summarizing data in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Summarizing-data/m-p/806083#M317555</link>
    <description>&lt;P&gt;Please make sure that your example data set runs. This what my log looks like running your code:&lt;/P&gt;
&lt;PRE&gt;553  data have;
554  infile datalines delimiter=',';
555  input patient_id $ claim_num $ procedure $ primary diagnosis $;
556  datalines;

NOTE: Invalid data for primary in line 557 30-80.
RULE:      ----+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+--
558        123456, 1111, COVID antibody test, cough
NOTE: Invalid data errors for file CARDS occurred outside the printed range.
NOTE: Increase available buffer lines with the INFILE n= option.
patient_id=123456 claim_num=1111 procedure=COVID RN primary=. diagnosis=123456 _ERROR_=1 _N_=1
NOTE: Invalid data for primary in line 559 35-80.
560        777777, 4567, COVID RNA test, Sore throat
NOTE: Invalid data errors for file CARDS occurred outside the printed range.
NOTE: Increase available buffer lines with the INFILE n= option.
patient_id=123456 claim_num=5555 procedure=COVID an primary=. diagnosis=777777 _ERROR_=1 _N_=2
NOTE: Invalid data for primary in line 561 27-80.
NOTE: LOST CARD.
562        ;
NOTE: Invalid data errors for file CARDS occurred outside the printed range.
NOTE: Increase available buffer lines with the INFILE n= option.
patient_id=777777 claim_num=4567 procedure=Chest X- primary=. diagnosis=  _ERROR_=1 _N_=3
NOTE: SAS went to a new line when INPUT statement reached past the end of a line.
NOTE: The data set WORK.HAVE has 2 observations and 5 variables.
NOTE: DATA statement used (Total process time):
      real time           0.01 seconds
      cpu time            0.00 seconds


562  ;
&lt;/PRE&gt;
&lt;P&gt;Summarize and transpose:&lt;/P&gt;
&lt;PRE&gt;data have;
infile datalines delimiter=',';
input patient_id $ claim_num $ procedure :$25. primarydiagnosis :$15.;
datalines;
123456, 1111, COVID RNA test, cough
123456, 1111, COVID antibody test, cough
123456, 5555, COVID antibody test, cough
777777, 4567, COVID RNA test, Sore throat
777777, 4567, Chest X-ray, cough 
;

proc summary data=have nway;
   class procedure   primarydiagnosis;
   output out=counts;
run;

proc transpose data=counts out=want (drop=_name_)
   prefix=dx;
   by procedure;
   var _freq_;
   id primarydiagnosis;
run; &lt;/PRE&gt;
&lt;P&gt;Replace DX with something else but remember SAS variable names are limited to 32 characters and including fixed text in a name like "primary diagnosis" uses more than half that limit. Default rules also so not allow spaces in the variable names.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;If the only thing you are going to do with that data set is to print it then Proc Report or Tabulate are likely better choices to make a report.&lt;/P&gt;</description>
    <pubDate>Tue, 05 Apr 2022 14:51:29 GMT</pubDate>
    <dc:creator>ballardw</dc:creator>
    <dc:date>2022-04-05T14:51:29Z</dc:date>
    <item>
      <title>Summarizing data</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Summarizing-data/m-p/806078#M317552</link>
      <description>&lt;P&gt;I have claims data that I would like to summarize the procedures within a visit (claim num) and the diagnoses for the procedure combinations within a visit.&amp;nbsp;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Example of data I have:&lt;/P&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;Patient ID&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;Claim num&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;Procedure&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;Primary diagnosis&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;123456&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;1111&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;COVID RNA test&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;cough&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;123456&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;1111&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;COVID antibody test&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;cough&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;123456&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;5555&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;COVID antibody test&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;cough&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;777777&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;4567&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;COVID RNA test&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;Sore throat&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;777777&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;4567&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;Chest X-ray&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;Cough&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;888888&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;1212&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;COVID antibody test&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;cough&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Data I want, as a dataset:&lt;/P&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;Procedure combination by claim #&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;Primary diagnosis cough&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;Primary diagnosis sore throat, cough&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;COVID RNA test, COVID antibody test&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;1&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;COVID antibody test&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;2&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;COVID RNA test, chest X-ray&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;1&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;data have;&lt;BR /&gt;infile datalines delimiter=',';&lt;/P&gt;&lt;P&gt;input patient_id $ claim_num $ procedure :$25. primarydiagnosis :$15.;&lt;BR /&gt;datalines;&lt;BR /&gt;123456, 1111, COVID RNA test, cough&lt;BR /&gt;123456, 1111, COVID antibody test, cough&lt;BR /&gt;123456, 5555, COVID antibody test, cough&lt;BR /&gt;777777, 4567, COVID RNA test, Sore throat&lt;BR /&gt;777777, 4567, Chest X-ray, cough&amp;nbsp;&lt;/P&gt;&lt;P&gt;888888, 1212,&amp;nbsp;COVID antibody test, cough&lt;/P&gt;&lt;P&gt;;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 05 Apr 2022 17:08:53 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Summarizing-data/m-p/806078#M317552</guid>
      <dc:creator>jk2018</dc:creator>
      <dc:date>2022-04-05T17:08:53Z</dc:date>
    </item>
    <item>
      <title>Re: Summarizing data</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Summarizing-data/m-p/806083#M317555</link>
      <description>&lt;P&gt;Please make sure that your example data set runs. This what my log looks like running your code:&lt;/P&gt;
&lt;PRE&gt;553  data have;
554  infile datalines delimiter=',';
555  input patient_id $ claim_num $ procedure $ primary diagnosis $;
556  datalines;

NOTE: Invalid data for primary in line 557 30-80.
RULE:      ----+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+--
558        123456, 1111, COVID antibody test, cough
NOTE: Invalid data errors for file CARDS occurred outside the printed range.
NOTE: Increase available buffer lines with the INFILE n= option.
patient_id=123456 claim_num=1111 procedure=COVID RN primary=. diagnosis=123456 _ERROR_=1 _N_=1
NOTE: Invalid data for primary in line 559 35-80.
560        777777, 4567, COVID RNA test, Sore throat
NOTE: Invalid data errors for file CARDS occurred outside the printed range.
NOTE: Increase available buffer lines with the INFILE n= option.
patient_id=123456 claim_num=5555 procedure=COVID an primary=. diagnosis=777777 _ERROR_=1 _N_=2
NOTE: Invalid data for primary in line 561 27-80.
NOTE: LOST CARD.
562        ;
NOTE: Invalid data errors for file CARDS occurred outside the printed range.
NOTE: Increase available buffer lines with the INFILE n= option.
patient_id=777777 claim_num=4567 procedure=Chest X- primary=. diagnosis=  _ERROR_=1 _N_=3
NOTE: SAS went to a new line when INPUT statement reached past the end of a line.
NOTE: The data set WORK.HAVE has 2 observations and 5 variables.
NOTE: DATA statement used (Total process time):
      real time           0.01 seconds
      cpu time            0.00 seconds


562  ;
&lt;/PRE&gt;
&lt;P&gt;Summarize and transpose:&lt;/P&gt;
&lt;PRE&gt;data have;
infile datalines delimiter=',';
input patient_id $ claim_num $ procedure :$25. primarydiagnosis :$15.;
datalines;
123456, 1111, COVID RNA test, cough
123456, 1111, COVID antibody test, cough
123456, 5555, COVID antibody test, cough
777777, 4567, COVID RNA test, Sore throat
777777, 4567, Chest X-ray, cough 
;

proc summary data=have nway;
   class procedure   primarydiagnosis;
   output out=counts;
run;

proc transpose data=counts out=want (drop=_name_)
   prefix=dx;
   by procedure;
   var _freq_;
   id primarydiagnosis;
run; &lt;/PRE&gt;
&lt;P&gt;Replace DX with something else but remember SAS variable names are limited to 32 characters and including fixed text in a name like "primary diagnosis" uses more than half that limit. Default rules also so not allow spaces in the variable names.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;If the only thing you are going to do with that data set is to print it then Proc Report or Tabulate are likely better choices to make a report.&lt;/P&gt;</description>
      <pubDate>Tue, 05 Apr 2022 14:51:29 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Summarizing-data/m-p/806083#M317555</guid>
      <dc:creator>ballardw</dc:creator>
      <dc:date>2022-04-05T14:51:29Z</dc:date>
    </item>
    <item>
      <title>Re: Summarizing data</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Summarizing-data/m-p/806108#M317566</link>
      <description>&lt;P&gt;I'm trying to combine data across observations to see the total count of all the tests combined and diagnoses combined that occurred at a visit.&amp;nbsp; A visit can have multiple observations.&amp;nbsp; All the observations for a visit will have the same claim number.&amp;nbsp; When I used the code you provided there was just one procedure per row and one diagnosis per column.&amp;nbsp;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Data I want:&lt;/P&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;Procedure &lt;STRONG&gt;combination&lt;/STRONG&gt; &lt;STRONG&gt;by claim #&lt;/STRONG&gt;&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;Primary diagnosis cough&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&lt;STRONG&gt;Primary diagnosis sore throat, cough&lt;/STRONG&gt;&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;&lt;STRONG&gt;COVID RNA test, COVID antibody test&lt;/STRONG&gt;&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;1&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;COVID antibody test&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;1&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;&lt;STRONG&gt;COVID RNA test, chest X-ray&lt;/STRONG&gt;&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;1&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 05 Apr 2022 16:32:18 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Summarizing-data/m-p/806108#M317566</guid>
      <dc:creator>jk2018</dc:creator>
      <dc:date>2022-04-05T16:32:18Z</dc:date>
    </item>
    <item>
      <title>Re: Summarizing data</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Summarizing-data/m-p/806109#M317567</link>
      <description>&lt;BLOCKQUOTE&gt;&lt;HR /&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/66157"&gt;@jk2018&lt;/a&gt;&amp;nbsp;wrote:&lt;BR /&gt;
&lt;P&gt;I'm trying to combine data across observations to see the total count of all the tests combined and diagnoses combined that occurred at a visit.&amp;nbsp; A visit can have multiple observations.&amp;nbsp; All the observations for a visit will have the same claim number.&amp;nbsp; When I used the code you provided there was just one procedure per row and one diagnosis per column.&amp;nbsp;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Data I want:&lt;/P&gt;
&lt;TABLE&gt;
&lt;TBODY&gt;
&lt;TR&gt;
&lt;TD&gt;
&lt;P&gt;Procedure &lt;STRONG&gt;combination&lt;/STRONG&gt; &lt;STRONG&gt;by claim #&lt;/STRONG&gt;&lt;/P&gt;
&lt;/TD&gt;
&lt;TD&gt;
&lt;P&gt;Primary diagnosis cough&lt;/P&gt;
&lt;/TD&gt;
&lt;TD&gt;
&lt;P&gt;&lt;STRONG&gt;Primary diagnosis sore throat, cough&lt;/STRONG&gt;&lt;/P&gt;
&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;
&lt;P&gt;&lt;STRONG&gt;COVID RNA test, COVID antibody test&lt;/STRONG&gt;&lt;/P&gt;
&lt;/TD&gt;
&lt;TD&gt;
&lt;P&gt;1&lt;/P&gt;
&lt;/TD&gt;
&lt;TD&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;
&lt;P&gt;COVID antibody test&lt;/P&gt;
&lt;/TD&gt;
&lt;TD&gt;
&lt;P&gt;1&lt;/P&gt;
&lt;/TD&gt;
&lt;TD&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;
&lt;P&gt;&lt;STRONG&gt;COVID RNA test, chest X-ray&lt;/STRONG&gt;&lt;/P&gt;
&lt;/TD&gt;
&lt;TD&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;/TD&gt;
&lt;TD&gt;
&lt;P&gt;1&lt;/P&gt;
&lt;/TD&gt;
&lt;/TR&gt;
&lt;/TBODY&gt;
&lt;/TABLE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;HR /&gt;&lt;/BLOCKQUOTE&gt;
&lt;P&gt;So, where is the VISIT information in that? Don't see it any where.&lt;/P&gt;
&lt;P&gt;Summarize with the VISIT but since I don't see visit in here anywhere its pretty questionable as to what you want.&lt;/P&gt;
&lt;P&gt;If that "combination by claim #" is to be the visit then this is NOT a good candidate for a data set as you are moving values, Claim, into the same column as procedure and the result would be horrid to use for any real purpose.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;"to see" really sounds like a report.&lt;/P&gt;
&lt;PRE&gt;proc tabulate data=have;
   class claim_num procedure primarydiagnosis;
   table claim_num='Claim Number=',
         procedure,
         primarydiagnosis
         ;
run;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 05 Apr 2022 16:44:39 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Summarizing-data/m-p/806109#M317567</guid>
      <dc:creator>ballardw</dc:creator>
      <dc:date>2022-04-05T16:44:39Z</dc:date>
    </item>
    <item>
      <title>Re: Summarizing data</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Summarizing-data/m-p/806250#M317623</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
infile datalines delimiter=',';
input patient_id $ claim_num $ procedure :$25. primarydiagnosis :$15.;
datalines;
123456, 1111, COVID RNA test, cough
123456, 1111, COVID antibody test, cough
123456, 5555, COVID antibody test, cough
777777, 4567, COVID RNA test, Sore throat
777777, 4567, Chest X-ray, cough 
888888, 1212, COVID antibody test, cough
;

data temp;
do until(last.claim_num);
 set have;
 by patient_id  claim_num;
 length a b $ 200 ;
 if not findw(a,strip(procedure),', ') then a=catx(',',a,procedure);
 if not findw(b,strip(primarydiagnosis),', ') then b=catx(',',b,primarydiagnosis);
end;
keep a b;
run;

proc freq data=temp;
table a*b /nocol norow nopercent ;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 06 Apr 2022 11:49:17 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Summarizing-data/m-p/806250#M317623</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2022-04-06T11:49:17Z</dc:date>
    </item>
  </channel>
</rss>

