<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Query on is there any way to reduce the size of output for bigger dataset? in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Query-on-is-there-any-way-to-reduce-the-size-of-output-for/m-p/648299#M194164</link>
    <description>&lt;P&gt;A Proc REPORT variable DEFINED with feature / GROUP will cause summary statistics to be computed. The default statistic is SUM, perfect for 'counting' a 0/1 flag variable.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Example:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;data have_apriori_summary(keep=hotel_id guest_id is_repeated_guest);
  call streaminit (123);
  do hotel_id = 1 to 10;
    do index = 1 to 100;
      guest_id + 1;
      is_repeated_guest = rand ('uniform') &amp;lt; 0.40;
      output;
    end;
  end;
run;

proc report data=have_apriori_summary;
  title "Guest retention results from apriori per guest summary";
  column hotel_id is_repeated_guest;

  define hotel_id / group;
  define is_repeated_guest / 'Repeated Guests';

  where hotel_id in (1,3,4);
run;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Output&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="RichardADeVenezia_0-1589638708180.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/39515i17D90C89CF30C9F1/image-size/medium?v=v2&amp;amp;px=400" role="button" title="RichardADeVenezia_0-1589638708180.png" alt="RichardADeVenezia_0-1589638708180.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Consider a slightly different scenario:&lt;/P&gt;
&lt;P&gt;Suppose the data set contains transactional information, such as CHECK_IN_DATE, ROOM_ID, and GUEST_ID and the REPEAT flag is a marker for some customer service action at the desk.&amp;nbsp; Taking a sum of the flag values over the transactions will NOT give the number of DISTINCT guests, the SUM will be the number of repeated guest &lt;STRONG&gt;visits&lt;/STRONG&gt;.&amp;nbsp; You will need to reduce the transaction data to a summary in order to count the number of distinct guests.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Example:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;data have_checkin_log(keep=hotel_id room_id date guest_id is_repeated_guest);
  call streaminit (123);
  array tracker(7500) _temporary_;
  do index = 1 to dim(tracker);
    tracker(index) = rand('uniform') &amp;lt; 0.15;
  end;
  do hotel_id = 1 to 10;
    do room_id = 1 to 50;
      do date = '01jan2020'd to '31jan2020'd;
        guest_id = rand('integer',1,7500);
        is_repeated_guest = tracker(guest_id);
        output;
      end;
    end;
  end;
  format date yymmdd10.;
run;

proc sql;
  create table repeated_guests_summary as 
  select 
    hotel_id,
    guest_id,
    is_repeated_guest,
    count(*) as repeated_visits_count
  from have_checkin_log
  where is_repeated_guest
  group by hotel_id, guest_id
  ;
quit;

proc report data=repeated_guests_summary;
  title "Repeated Guests and Visits";
  column hotel_id is_repeated_guest repeated_visits_count;

  define hotel_id / group;
  define is_repeated_guest / 'Repeated Guests';

  where hotel_id in (1,3,4);
run;
&lt;/PRE&gt;
&lt;P&gt;Output&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="RichardADeVenezia_2-1589640782673.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/39521iC47B13778BC8618A/image-size/medium?v=v2&amp;amp;px=400" role="button" title="RichardADeVenezia_2-1589640782673.png" alt="RichardADeVenezia_2-1589640782673.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Sat, 16 May 2020 14:56:51 GMT</pubDate>
    <dc:creator>RichardDeVen</dc:creator>
    <dc:date>2020-05-16T14:56:51Z</dc:date>
    <item>
      <title>Query on is there any way to reduce the size of output for bigger dataset?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Query-on-is-there-any-way-to-reduce-the-size-of-output-for/m-p/648217#M194125</link>
      <description>&lt;P&gt;Hi everyone,&lt;/P&gt;&lt;P&gt;I tried proc report for calculating the total no of repeated guest to a specific hotel in my dataset which has 119390 observations in total. I got a longer output, is there any way to reduce the size of the output rather than scrolling the page too much down to see the result? Could you provide solution for this please?&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Amali6_0-1589587005088.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/39498i8C7797525A63A887/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Amali6_0-1589587005088.png" alt="Amali6_0-1589587005088.png" /&gt;&lt;/span&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Amali6_1-1589587027953.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/39499i67749AC883BCEAA1/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Amali6_1-1589587027953.png" alt="Amali6_1-1589587027953.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;Here&amp;nbsp; i would like to show only the total 2032 using the same proc report, is there any way to do this?&lt;/P&gt;&lt;PRE&gt;proc report data=hotel.Hotel_bookings;
 column hotel is_repeated_guest;
 where hotel='City Hotel' and 
       is_repeated_guest=1;
 rbreak after/summarize style=[font_weight=bold];
 title'No of repeated guest to City Hotel';
 run;&lt;/PRE&gt;</description>
      <pubDate>Fri, 15 May 2020 23:58:21 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Query-on-is-there-any-way-to-reduce-the-size-of-output-for/m-p/648217#M194125</guid>
      <dc:creator>Amali6</dc:creator>
      <dc:date>2020-05-15T23:58:21Z</dc:date>
    </item>
    <item>
      <title>Re: Query on is there any way to reduce the size of output for bigger dataset?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Query-on-is-there-any-way-to-reduce-the-size-of-output-for/m-p/648221#M194129</link>
      <description>&lt;P&gt;Input data is often helpful.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;If you want to count (or sum ) within a specific value then you would use a Define statement to tell the Proc that is what you want:&lt;/P&gt;
&lt;PRE&gt;proc report data=hotel.Hotel_bookings;
 column hotel is_repeated_guest;
 where hotel='City Hotel' and 
       is_repeated_guest=1;
 define hotel / group;
&lt;STRIKE&gt; rbreak after/summarize style=[font_weight=bold];&lt;/STRIKE&gt;
 title'No of repeated guest to City Hotel';
 run;&lt;/PRE&gt;
&lt;P&gt;Since your where statement will only have one value of hotel there really is no reason for an "rbreak after". &lt;BR /&gt;If you are running this without the where I would suggest not including the Rbreak and see what the result looks like.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Or quite possibly you might want to summarize your data and use proc print.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sat, 16 May 2020 00:54:01 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Query-on-is-there-any-way-to-reduce-the-size-of-output-for/m-p/648221#M194129</guid>
      <dc:creator>ballardw</dc:creator>
      <dc:date>2020-05-16T00:54:01Z</dc:date>
    </item>
    <item>
      <title>Re: Query on is there any way to reduce the size of output for bigger dataset?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Query-on-is-there-any-way-to-reduce-the-size-of-output-for/m-p/648243#M194143</link>
      <description>&lt;P&gt;A simple SQL will do it:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
select count(*) as count
from hotel.hotel_bookings
where hotel = 'City Hotel' and is_repeated_guest = 1
;
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Sat, 16 May 2020 06:38:21 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Query-on-is-there-any-way-to-reduce-the-size-of-output-for/m-p/648243#M194143</guid>
      <dc:creator>Kurt_Bremser</dc:creator>
      <dc:date>2020-05-16T06:38:21Z</dc:date>
    </item>
    <item>
      <title>Re: Query on is there any way to reduce the size of output for bigger dataset?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Query-on-is-there-any-way-to-reduce-the-size-of-output-for/m-p/648253#M194151</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/328574"&gt;@Amali6&lt;/a&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;What about using a PROC FREQ:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc freq data=hotel_bookings;
	where hotel = 'City Hotel' and is_repeated_guest = 1;
	tables is_repeated_guest / nocum nopercent;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Sat, 16 May 2020 08:15:31 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Query-on-is-there-any-way-to-reduce-the-size-of-output-for/m-p/648253#M194151</guid>
      <dc:creator>ed_sas_member</dc:creator>
      <dc:date>2020-05-16T08:15:31Z</dc:date>
    </item>
    <item>
      <title>Re: Query on is there any way to reduce the size of output for bigger dataset?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Query-on-is-there-any-way-to-reduce-the-size-of-output-for/m-p/648299#M194164</link>
      <description>&lt;P&gt;A Proc REPORT variable DEFINED with feature / GROUP will cause summary statistics to be computed. The default statistic is SUM, perfect for 'counting' a 0/1 flag variable.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Example:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;data have_apriori_summary(keep=hotel_id guest_id is_repeated_guest);
  call streaminit (123);
  do hotel_id = 1 to 10;
    do index = 1 to 100;
      guest_id + 1;
      is_repeated_guest = rand ('uniform') &amp;lt; 0.40;
      output;
    end;
  end;
run;

proc report data=have_apriori_summary;
  title "Guest retention results from apriori per guest summary";
  column hotel_id is_repeated_guest;

  define hotel_id / group;
  define is_repeated_guest / 'Repeated Guests';

  where hotel_id in (1,3,4);
run;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Output&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="RichardADeVenezia_0-1589638708180.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/39515i17D90C89CF30C9F1/image-size/medium?v=v2&amp;amp;px=400" role="button" title="RichardADeVenezia_0-1589638708180.png" alt="RichardADeVenezia_0-1589638708180.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Consider a slightly different scenario:&lt;/P&gt;
&lt;P&gt;Suppose the data set contains transactional information, such as CHECK_IN_DATE, ROOM_ID, and GUEST_ID and the REPEAT flag is a marker for some customer service action at the desk.&amp;nbsp; Taking a sum of the flag values over the transactions will NOT give the number of DISTINCT guests, the SUM will be the number of repeated guest &lt;STRONG&gt;visits&lt;/STRONG&gt;.&amp;nbsp; You will need to reduce the transaction data to a summary in order to count the number of distinct guests.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Example:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;data have_checkin_log(keep=hotel_id room_id date guest_id is_repeated_guest);
  call streaminit (123);
  array tracker(7500) _temporary_;
  do index = 1 to dim(tracker);
    tracker(index) = rand('uniform') &amp;lt; 0.15;
  end;
  do hotel_id = 1 to 10;
    do room_id = 1 to 50;
      do date = '01jan2020'd to '31jan2020'd;
        guest_id = rand('integer',1,7500);
        is_repeated_guest = tracker(guest_id);
        output;
      end;
    end;
  end;
  format date yymmdd10.;
run;

proc sql;
  create table repeated_guests_summary as 
  select 
    hotel_id,
    guest_id,
    is_repeated_guest,
    count(*) as repeated_visits_count
  from have_checkin_log
  where is_repeated_guest
  group by hotel_id, guest_id
  ;
quit;

proc report data=repeated_guests_summary;
  title "Repeated Guests and Visits";
  column hotel_id is_repeated_guest repeated_visits_count;

  define hotel_id / group;
  define is_repeated_guest / 'Repeated Guests';

  where hotel_id in (1,3,4);
run;
&lt;/PRE&gt;
&lt;P&gt;Output&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="RichardADeVenezia_2-1589640782673.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/39521iC47B13778BC8618A/image-size/medium?v=v2&amp;amp;px=400" role="button" title="RichardADeVenezia_2-1589640782673.png" alt="RichardADeVenezia_2-1589640782673.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sat, 16 May 2020 14:56:51 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Query-on-is-there-any-way-to-reduce-the-size-of-output-for/m-p/648299#M194164</guid>
      <dc:creator>RichardDeVen</dc:creator>
      <dc:date>2020-05-16T14:56:51Z</dc:date>
    </item>
  </channel>
</rss>

