<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Aggregation using hashing: Repeat summarized results without the use of a hash iterator in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Aggregation-using-hashing-Repeat-summarized-results-without-the/m-p/633557#M187941</link>
    <description>&lt;P&gt;This is follow-up question of&amp;nbsp;&lt;A href="https://communities.sas.com/t5/SAS-Programming/Aggregation-using-hashing-Repeat-summarized-results-for-each/m-p/633551#M187854" target="_self"&gt;Aggregation using hashing: Repeat summarized results for each observation in by-group&lt;/A&gt;.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Would it be possible to implement a solution using hashing without relying on a hash iterator (or an additional DoW-Loop), but by a further hash table lookup?&amp;nbsp;&lt;/P&gt;&lt;P&gt;I wonder if the smart combination of aggravated summary result in hash table [out] and the detailed data in hash table [detail] can be used for a look-up to achieve this?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Here is the underlying code where a possible solution could be done or explained why it cannot be done without a hash iterator or a further DoW-Loop.&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;/* 01 */ data _null_;
/* 01a */    * Hash table [out] for aggregated display (no duplicate keys);
/* 02 */     declare hash out(ordered: "a");
/* 03 */     out.defineKey ("smoking_status", "bp_status", "sex");
/* 04 */     out.defineData("smoking_status", "bp_status", "sex", 
/* 05 */                    "_unq", "_sum", "_mean", "_sum_alv", "_n");
/* 06 */     out.defineDone();
/* 07 */ 
/* 07a */    * Hash table [unq] used for counting unique occurrences of value [systolic] within the key of hash table [h];
/* 08 */     declare hash unq();
/* 09 */     unq.defineKey("smoking_status", "bp_status", "sex", "systolic");
/* 10 */     unq.defineDone();

/* 10a */    * Hash table [detail] for detailed data on patients status (alive/ dead) and systolic blood pressure;
/* 10b */    dcl hash detail (ordered: "a", multidata: "y");
/* 10c */    detail.defineKey("smoking_status", "bp_status", "sex");
/* 10d */    detail.defineData("smoking_status", "bp_status", "sex", "systolic", "status");
/* 10e */    dcl hiter h_detail ("detail");     * Iterator to be able to walk through detail hash table [detail];
/* 10f */    detail.defineDone();

/* 10g */    * Hash table [det_out] for output of detailed data (updated with aggregated values from hash table [h];
/* 10h */    dcl hash det_out(ordered: "a", multidata: "y");
/* 10i */    det_out.defineKey("smoking_status", "bp_status", "sex");
/* 10j */    det_out.defineData("smoking_status", "bp_status", "sex", "systolic", "_unq", "_sum", "_mean", "status", "_sum_alv", "_n");
/* 10k */    det_out.defineDone();
/* 11 */ 
/* 12 */     do until (end);
/* 13 */         set _heart_src end=end;
/* 13a */        detail.add();  * fill hash table [detail] for each observation;
/* 14 */         if (out.find() ne 0) then call missing(_sum, _unq, _n, _sum_alv);
/* 15 */         _sum+systolic;             * _sum=sum(_sum, systolic);
/* 16 */         _n+1;                      * _n=sum(_n, 1);
/* 16a */        _sum_alv=sum(_sum_alv, status =: "A");      * Count only patients alive;
/* 16b */        * Check if value is unique within its "by-group"/ key;
/* 17 */         if (unq.check() ne 0) then do;
/* 18 */             _unq+1;                * _unq=sum(_unq, 1);
/* 19 */             unq.add();
/* 20 */         end;
/* 21 */         _mean=_sum/_n;
/* 21a */        * Update hash table [h] (aggregated values only);
/* 22 */         out.replace();
/* 23 */     end;

/* 23a */    rc=h_detail.first();    * Place "pointer" to first item in hash table [detail];
/* 23b */    do while (rc=0);
/* 23c */        * Write data portion of hash table [h] into PDV with "h.find()" (for each matching key);
/* 23d */        if out.find()=0 then det_out.add();
/* 23e */        rc=h_detail.next();
/* 23f */    end;

/* 23g */    * Output of final hash tables;
/* 24 */     out.output    (dataset: "_hash_agg");      * [h]   aggravated dataset (1 entry for each key);
/* 24a */    det_out.output(dataset: "_hash_agg_all");  * [all] aggravated values for each observation (duplicate keys);

/* 25 */     format _mean 8.1;
/* 26 */     label  _mean="Mean (systolic)"
/* 27 */            _sum="Sum (systolic)"
/* 28 */            _unq="Unique values (systolic)"
/* 29 */            _n="N (# of subjects)"
/* 29a */           _sum_alv="Patients alive";
/* 29b */    stop;
/* 30 */ run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
    <pubDate>Fri, 20 Mar 2020 12:20:47 GMT</pubDate>
    <dc:creator>left</dc:creator>
    <dc:date>2020-03-20T12:20:47Z</dc:date>
    <item>
      <title>Aggregation using hashing: Repeat summarized results without the use of a hash iterator</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Aggregation-using-hashing-Repeat-summarized-results-without-the/m-p/633557#M187941</link>
      <description>&lt;P&gt;This is follow-up question of&amp;nbsp;&lt;A href="https://communities.sas.com/t5/SAS-Programming/Aggregation-using-hashing-Repeat-summarized-results-for-each/m-p/633551#M187854" target="_self"&gt;Aggregation using hashing: Repeat summarized results for each observation in by-group&lt;/A&gt;.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Would it be possible to implement a solution using hashing without relying on a hash iterator (or an additional DoW-Loop), but by a further hash table lookup?&amp;nbsp;&lt;/P&gt;&lt;P&gt;I wonder if the smart combination of aggravated summary result in hash table [out] and the detailed data in hash table [detail] can be used for a look-up to achieve this?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Here is the underlying code where a possible solution could be done or explained why it cannot be done without a hash iterator or a further DoW-Loop.&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;/* 01 */ data _null_;
/* 01a */    * Hash table [out] for aggregated display (no duplicate keys);
/* 02 */     declare hash out(ordered: "a");
/* 03 */     out.defineKey ("smoking_status", "bp_status", "sex");
/* 04 */     out.defineData("smoking_status", "bp_status", "sex", 
/* 05 */                    "_unq", "_sum", "_mean", "_sum_alv", "_n");
/* 06 */     out.defineDone();
/* 07 */ 
/* 07a */    * Hash table [unq] used for counting unique occurrences of value [systolic] within the key of hash table [h];
/* 08 */     declare hash unq();
/* 09 */     unq.defineKey("smoking_status", "bp_status", "sex", "systolic");
/* 10 */     unq.defineDone();

/* 10a */    * Hash table [detail] for detailed data on patients status (alive/ dead) and systolic blood pressure;
/* 10b */    dcl hash detail (ordered: "a", multidata: "y");
/* 10c */    detail.defineKey("smoking_status", "bp_status", "sex");
/* 10d */    detail.defineData("smoking_status", "bp_status", "sex", "systolic", "status");
/* 10e */    dcl hiter h_detail ("detail");     * Iterator to be able to walk through detail hash table [detail];
/* 10f */    detail.defineDone();

/* 10g */    * Hash table [det_out] for output of detailed data (updated with aggregated values from hash table [h];
/* 10h */    dcl hash det_out(ordered: "a", multidata: "y");
/* 10i */    det_out.defineKey("smoking_status", "bp_status", "sex");
/* 10j */    det_out.defineData("smoking_status", "bp_status", "sex", "systolic", "_unq", "_sum", "_mean", "status", "_sum_alv", "_n");
/* 10k */    det_out.defineDone();
/* 11 */ 
/* 12 */     do until (end);
/* 13 */         set _heart_src end=end;
/* 13a */        detail.add();  * fill hash table [detail] for each observation;
/* 14 */         if (out.find() ne 0) then call missing(_sum, _unq, _n, _sum_alv);
/* 15 */         _sum+systolic;             * _sum=sum(_sum, systolic);
/* 16 */         _n+1;                      * _n=sum(_n, 1);
/* 16a */        _sum_alv=sum(_sum_alv, status =: "A");      * Count only patients alive;
/* 16b */        * Check if value is unique within its "by-group"/ key;
/* 17 */         if (unq.check() ne 0) then do;
/* 18 */             _unq+1;                * _unq=sum(_unq, 1);
/* 19 */             unq.add();
/* 20 */         end;
/* 21 */         _mean=_sum/_n;
/* 21a */        * Update hash table [h] (aggregated values only);
/* 22 */         out.replace();
/* 23 */     end;

/* 23a */    rc=h_detail.first();    * Place "pointer" to first item in hash table [detail];
/* 23b */    do while (rc=0);
/* 23c */        * Write data portion of hash table [h] into PDV with "h.find()" (for each matching key);
/* 23d */        if out.find()=0 then det_out.add();
/* 23e */        rc=h_detail.next();
/* 23f */    end;

/* 23g */    * Output of final hash tables;
/* 24 */     out.output    (dataset: "_hash_agg");      * [h]   aggravated dataset (1 entry for each key);
/* 24a */    det_out.output(dataset: "_hash_agg_all");  * [all] aggravated values for each observation (duplicate keys);

/* 25 */     format _mean 8.1;
/* 26 */     label  _mean="Mean (systolic)"
/* 27 */            _sum="Sum (systolic)"
/* 28 */            _unq="Unique values (systolic)"
/* 29 */            _n="N (# of subjects)"
/* 29a */           _sum_alv="Patients alive";
/* 29b */    stop;
/* 30 */ run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Fri, 20 Mar 2020 12:20:47 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Aggregation-using-hashing-Repeat-summarized-results-without-the/m-p/633557#M187941</guid>
      <dc:creator>left</dc:creator>
      <dc:date>2020-03-20T12:20:47Z</dc:date>
    </item>
    <item>
      <title>Re: Aggregation using hashing: Repeat summarized results without the use of a hash iterator</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Aggregation-using-hashing-Repeat-summarized-results-without-the/m-p/633740#M188011</link>
      <description>Can you describe the data set _heart_src ? Is there one row per person ? per person bp reading ? Do you have some sample data ?</description>
      <pubDate>Fri, 20 Mar 2020 21:58:35 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Aggregation-using-hashing-Repeat-summarized-results-without-the/m-p/633740#M188011</guid>
      <dc:creator>RichardDeVen</dc:creator>
      <dc:date>2020-03-20T21:58:35Z</dc:date>
    </item>
    <item>
      <title>Re: Aggregation using hashing: Repeat summarized results without the use of a hash iterator</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Aggregation-using-hashing-Repeat-summarized-results-without-the/m-p/633754#M188017</link>
      <description>&lt;P&gt;Thank you for your reply,&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/12477"&gt;@RichardDeVen&lt;/a&gt;. The sample data is an excerpt from [sashelp.heart]:&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data _heart_src;
    set sashelp.heart (keep=smoking_status bp_status sex systolic status);
    label   sex="Sex"
            status="Survival status"
            systolic="Systolic";
run;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The data set [_heart_src] has one row per subject as shown in this screenshot:&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Sample data" style="width: 451px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/37140i5AE95DB78142484D/image-size/large?v=v2&amp;amp;px=999" role="button" title="hashing_heart_data.png" alt="Sample data" /&gt;&lt;span class="lia-inline-image-caption" onclick="event.preventDefault();"&gt;Sample data&lt;/span&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;My mentioned code gives me the following result:&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Resulting output from sample data after data step with hashing + hash iterator" style="width: 864px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/37142i34ED3C9BEDF10E22/image-size/large?v=v2&amp;amp;px=999" role="button" title="hashing_heart_data_detailed_final.png" alt="Resulting output from sample data after data step with hashing + hash iterator" /&gt;&lt;span class="lia-inline-image-caption" onclick="event.preventDefault();"&gt;Resulting output from sample data after data step with hashing + hash iterator&lt;/span&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;So this is what I want, but without using the hash iterator. I look for a solution with a further hash table facilitating a clever table-lookup instead of using the hash iterator or another DoW-Loop.If this is not possible I am glad to hear why not.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 20 Mar 2020 23:07:07 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Aggregation-using-hashing-Repeat-summarized-results-without-the/m-p/633754#M188017</guid>
      <dc:creator>left</dc:creator>
      <dc:date>2020-03-20T23:07:07Z</dc:date>
    </item>
    <item>
      <title>Re: Aggregation using hashing: Repeat summarized results without the use of a hash iterator</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Aggregation-using-hashing-Repeat-summarized-results-without-the/m-p/633785#M188035</link>
      <description>&lt;P&gt;Hash tables can only access one entry at at time.&lt;/P&gt;
&lt;P&gt;In order to calculate sum, average. etc, you need to read all these values one by one. So you need to iterate somehow.&lt;/P&gt;
&lt;P&gt;What's bothering you?&lt;/P&gt;
&lt;P&gt;Note that this code can be made much clearer. For example:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql ; 
   create table WANT as 
   select SMOKING_STATUS, BP_STATUS, SEX, SYSTOLIC, STATUS
        , count(unique SYSTOLIC) as _UNQ
        , sum(SYSTOLIC)          as _SUM
        , mean(SYSTOLIC)         as _MEAN
        , sum(STATUS='Alive')    as _SUM_ALV
        , count(*)               as _N
   from HAVE
   group by SMOKING_STATUS, BP_STATUS, SEX
   order by SMOKING_STATUS, BP_STATUS, SEX;
quit;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;You can use the options&amp;nbsp; _method&amp;nbsp; and &amp;nbsp; magic&amp;nbsp;&amp;nbsp;&amp;nbsp; to monitor and (hope to) influence the SQL parser&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sat, 21 Mar 2020 02:06:52 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Aggregation-using-hashing-Repeat-summarized-results-without-the/m-p/633785#M188035</guid>
      <dc:creator>ChrisNZ</dc:creator>
      <dc:date>2020-03-21T02:06:52Z</dc:date>
    </item>
    <item>
      <title>Re: Aggregation using hashing: Repeat summarized results without the use of a hash iterator</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Aggregation-using-hashing-Repeat-summarized-results-without-the/m-p/633788#M188037</link>
      <description>&lt;P&gt;There is no 'clever lookup'.&amp;nbsp; In order to output the aggregates, associated with the details, you need to loop over the details.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;A better long term solution might be to use an SQL query that is more clearly understood by more people than those that can decipher the hashing code.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Example of SQL solving the same problem.&amp;nbsp; This query relies on the SAS Proc SQL feature that automatically re-merges summary statistics back with the original data.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
  call streaminit(123);

  do response = 1 to 1e3;
    category1 = ceil(rand('uniform', 10));
    category2 = ceil(rand('uniform', 5));
    category3 = ceil(rand('uniform', 2));

    continuous_measurement = 100 + floor(rand('uniform', 100));
    binary_categorical_response = rand('uniform') &amp;lt; 0.09;

    output;
  end;
run;

proc sql;
  create table want as
  select
    category1, category2, category3, continuous_measurement, binary_categorical_response
    , count(distinct continuous_measurement) as _unq
    , sum(continuous_measurement) as _sum
    , mean(continuous_measurement) as _mean format = 5.1
    , sum(binary_categorical_response) as _count_binary_assert_1
    , count(*) as _n
  from
    have
  group by 
    category1, category2, category3
  order by
    category1, category2, category3, continuous_measurement desc ;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sat, 21 Mar 2020 02:38:45 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Aggregation-using-hashing-Repeat-summarized-results-without-the/m-p/633788#M188037</guid>
      <dc:creator>RichardDeVen</dc:creator>
      <dc:date>2020-03-21T02:38:45Z</dc:date>
    </item>
  </channel>
</rss>

