<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic cas action groupbyinfo: I don't get why it takes so long in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/cas-action-groupbyinfo-I-don-t-get-why-it-takes-so-long/m-p/921738#M362970</link>
    <description>&lt;P&gt;I have 700k distinct groupby counts.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;If I use the input={} statement it generates correctly the columns _frequency_ and _position_, but the _cumfreq_ is not what I want top have. I would like to accumulate the freq within a contract (numero_operacion).&amp;nbsp; My first groupbyinfo run relates to this attempt which at least runs quickly and gives correct freq and position counts.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;When I try to use the groupby={} option in the table{} statement, then it takes en eternity to finish.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;It's not so big to justify this amount of time, but probably I'm missing out something.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I don't know how to use and combine, vars{}, inputs{} and groupby{} wisely. And I am lost to fine-tune by setting algorithm2, groupbylimit groupbyorder,...&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Can someone help out and provide a complex example. the examples from the sas site should cover more sophisticated examples.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;By the way, I know how to resolve my problem with alternative approaches.&lt;/P&gt;
&lt;P&gt;But I want to truely understand the groupbyinfo action.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;thanks a lot.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc cas;
   session mysession;
                             simple.groupByInfo /                          /* 1 */
               includeDuplicates=true,
               minFrequency=1,
                              generatedcolumns={'frequency', 'position', 'cumfreq'},
                              groupByLimit=100M,
                              nworkerthreads=8,
               noVars=true,
                              algorithm2=true, 
                              journaltrace=true,
                              inputs={'Numero_operacion', 'JourneyName'},
               casOut={name="testa_dup", replace=true, CASLIB="mkt"},

               table={
/*                                                       vars={'Numero_operacion', 'JourneyName'}, groupBy={'_date'},  */
                                                           groupByMode="redistribute", orderBy='fecha_envio',
/*                          orderBy="fecha_envio", */
                              name="FUNNEL_REN", CASLIB="mkt"
                                            ,computedVars={
                                            name="_date"}, 
                                            computedVarsProgram="_date=put(datepart(fecha_envio), monyy.);"
};
run;

proc cas;
   table.fetch /                                  
      format=True, maxrows=100, 
/*       fetchVars={ */
/*          "_score_", 'Numero_operacion', 'JourneyName'}, */
      table={name="testa_dup", caslib="mkt", where="Numero_operacion in ('01RN40000930' '01RN27000014')"};
run;


proc cas;
   session mysession;
                             simple.groupByInfo /                          /* 1 */
               includeDuplicates=true,
               minFrequency=1,
                              generatedcolumns={'frequency', 'position', 'cumfreq'},
                              groupByLimit=100M,
                              details=true,
                              nworkerthreads=8,
               noVars=true,
                              algorithm2=true, 
                              journaltrace=true,
                              inputs={'Numero_operacion', 'JourneyName'},
               casOut={name="testa_dup", replace=true, CASLIB="mkt"},

               table={groupby={'Numero_operacion', 'JourneyName'}, groupByMode="redistribute", 
/*                          orderBy='fecha_envio', */
/*                          orderBy="fecha_envio", */
                              name="FUNNEL_RE1", CASLIB="mkt"
                                            ,computedVars={
                                            name="_date"}, 
                                            computedVarsProgram="_date=put(datepart(fecha_envio), monyy.);"
};
run;
&lt;/CODE&gt;&lt;/PRE&gt;</description>
    <pubDate>Mon, 25 Mar 2024 19:38:22 GMT</pubDate>
    <dc:creator>acordes</dc:creator>
    <dc:date>2024-03-25T19:38:22Z</dc:date>
    <item>
      <title>cas action groupbyinfo: I don't get why it takes so long</title>
      <link>https://communities.sas.com/t5/SAS-Programming/cas-action-groupbyinfo-I-don-t-get-why-it-takes-so-long/m-p/921738#M362970</link>
      <description>&lt;P&gt;I have 700k distinct groupby counts.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;If I use the input={} statement it generates correctly the columns _frequency_ and _position_, but the _cumfreq_ is not what I want top have. I would like to accumulate the freq within a contract (numero_operacion).&amp;nbsp; My first groupbyinfo run relates to this attempt which at least runs quickly and gives correct freq and position counts.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;When I try to use the groupby={} option in the table{} statement, then it takes en eternity to finish.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;It's not so big to justify this amount of time, but probably I'm missing out something.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I don't know how to use and combine, vars{}, inputs{} and groupby{} wisely. And I am lost to fine-tune by setting algorithm2, groupbylimit groupbyorder,...&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Can someone help out and provide a complex example. the examples from the sas site should cover more sophisticated examples.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;By the way, I know how to resolve my problem with alternative approaches.&lt;/P&gt;
&lt;P&gt;But I want to truely understand the groupbyinfo action.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;thanks a lot.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc cas;
   session mysession;
                             simple.groupByInfo /                          /* 1 */
               includeDuplicates=true,
               minFrequency=1,
                              generatedcolumns={'frequency', 'position', 'cumfreq'},
                              groupByLimit=100M,
                              nworkerthreads=8,
               noVars=true,
                              algorithm2=true, 
                              journaltrace=true,
                              inputs={'Numero_operacion', 'JourneyName'},
               casOut={name="testa_dup", replace=true, CASLIB="mkt"},

               table={
/*                                                       vars={'Numero_operacion', 'JourneyName'}, groupBy={'_date'},  */
                                                           groupByMode="redistribute", orderBy='fecha_envio',
/*                          orderBy="fecha_envio", */
                              name="FUNNEL_REN", CASLIB="mkt"
                                            ,computedVars={
                                            name="_date"}, 
                                            computedVarsProgram="_date=put(datepart(fecha_envio), monyy.);"
};
run;

proc cas;
   table.fetch /                                  
      format=True, maxrows=100, 
/*       fetchVars={ */
/*          "_score_", 'Numero_operacion', 'JourneyName'}, */
      table={name="testa_dup", caslib="mkt", where="Numero_operacion in ('01RN40000930' '01RN27000014')"};
run;


proc cas;
   session mysession;
                             simple.groupByInfo /                          /* 1 */
               includeDuplicates=true,
               minFrequency=1,
                              generatedcolumns={'frequency', 'position', 'cumfreq'},
                              groupByLimit=100M,
                              details=true,
                              nworkerthreads=8,
               noVars=true,
                              algorithm2=true, 
                              journaltrace=true,
                              inputs={'Numero_operacion', 'JourneyName'},
               casOut={name="testa_dup", replace=true, CASLIB="mkt"},

               table={groupby={'Numero_operacion', 'JourneyName'}, groupByMode="redistribute", 
/*                          orderBy='fecha_envio', */
/*                          orderBy="fecha_envio", */
                              name="FUNNEL_RE1", CASLIB="mkt"
                                            ,computedVars={
                                            name="_date"}, 
                                            computedVarsProgram="_date=put(datepart(fecha_envio), monyy.);"
};
run;
&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 25 Mar 2024 19:38:22 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/cas-action-groupbyinfo-I-don-t-get-why-it-takes-so-long/m-p/921738#M362970</guid>
      <dc:creator>acordes</dc:creator>
      <dc:date>2024-03-25T19:38:22Z</dc:date>
    </item>
  </channel>
</rss>

