<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: levels average and frequency for 1000 variables by proc sql in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/levels-average-and-frequency-for-1000-variables-by-proc-sql/m-p/665360#M198938</link>
    <description>&lt;PRE&gt;data have;
input id (var1 var2) ($) y;
datalines;
1 a b 1111
2 c m 2222
3 a m 4444
4 d m 7777
;
proc transpose data=have(obs=0) out=temp;
var _all_;
run; 
proc delete data=want;run;
data _null_;
 set temp(where=(lowcase(_name_) like 'var%'));
call execute(cat('proc sql;create table x as select "',_name_,'" as vname length=40,',
_name_,' as levels,count(',_name_,') as freq,mean(y) as avg_y from have group by ',_name_,
';quit;proc append base=want data=x force;run;'));
run;&lt;/PRE&gt;</description>
    <pubDate>Fri, 26 Jun 2020 13:52:32 GMT</pubDate>
    <dc:creator>Ksharp</dc:creator>
    <dc:date>2020-06-26T13:52:32Z</dc:date>
    <item>
      <title>levels average and frequency for 1000 variables by proc sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/levels-average-and-frequency-for-1000-variables-by-proc-sql/m-p/664983#M198745</link>
      <description>&lt;P&gt;I have a dataset :&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;ID&amp;nbsp; &amp;nbsp; var1&amp;nbsp; var2&amp;nbsp; ...&amp;nbsp; var1000&amp;nbsp; &amp;nbsp; &amp;nbsp;Y&lt;/P&gt;&lt;P&gt;1&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; a&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;b&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;f&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;1111&lt;/P&gt;&lt;P&gt;2&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; c&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;m&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; g&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 222&lt;/P&gt;&lt;P&gt;3&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; a&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;m&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; g&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 4444&lt;/P&gt;&lt;P&gt;4&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; d&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;m&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; h&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 7777&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Then I want an output like:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;varName&amp;nbsp; &amp;nbsp;Levels&amp;nbsp; &amp;nbsp;Freq&amp;nbsp; &amp;nbsp;Avg_Y&lt;/P&gt;&lt;P&gt;&amp;nbsp; var1&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;a&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;2&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; xx&lt;/P&gt;&lt;P&gt;&amp;nbsp; var1&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;c&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;1&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; yy&lt;/P&gt;&lt;P&gt;&amp;nbsp; var1&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;d&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;1&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; ww&lt;/P&gt;&lt;P&gt;&amp;nbsp; var2&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;m&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 3&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; zz&lt;/P&gt;&lt;P&gt;&amp;nbsp; var2&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;b&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;1&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; qq&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The dataset is huge with lots of levels, so PROC FREQ fails to run. I'm wondering if PROC SQL can help me run the code.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Any help would be greatly appreciated!&lt;/P&gt;</description>
      <pubDate>Thu, 25 Jun 2020 12:22:09 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/levels-average-and-frequency-for-1000-variables-by-proc-sql/m-p/664983#M198745</guid>
      <dc:creator>mh2t</dc:creator>
      <dc:date>2020-06-25T12:22:09Z</dc:date>
    </item>
    <item>
      <title>Re: levels average and frequency for 1000 variables by proc sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/levels-average-and-frequency-for-1000-variables-by-proc-sql/m-p/664985#M198747</link>
      <description>&lt;P&gt;PROC FREQ will do this for the levels. (Although I'm skeptical what value there is in creating this table for 1000 variables...)&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;PROC MEANS/PROC SUMMARY will compute the mean for Y.&amp;nbsp;(Although I'm skeptical what value there is in creating this table for 1000 variables...)&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;You can merge the tables together if you really need the output in the exact format shown.&lt;/P&gt;</description>
      <pubDate>Thu, 25 Jun 2020 12:28:58 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/levels-average-and-frequency-for-1000-variables-by-proc-sql/m-p/664985#M198747</guid>
      <dc:creator>PaigeMiller</dc:creator>
      <dc:date>2020-06-25T12:28:58Z</dc:date>
    </item>
    <item>
      <title>Re: levels average and frequency for 1000 variables by proc sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/levels-average-and-frequency-for-1000-variables-by-proc-sql/m-p/664993#M198752</link>
      <description>&lt;P&gt;How do you calculate the value for the average of Y?&lt;/P&gt;
&lt;P&gt;Anyway, this is the method for calculating the frequencies:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
input id (var1 var2) ($);
datalines;
1        a       b
2        c       m
3        a       m
4        d       m
;

proc transpose data=have out=long name=varname;
by id;
var var:;
run;

proc freq data=long noprint;
tables varname*col1 / nopercent out=want (drop=percent);
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Thu, 25 Jun 2020 12:55:43 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/levels-average-and-frequency-for-1000-variables-by-proc-sql/m-p/664993#M198752</guid>
      <dc:creator>Kurt_Bremser</dc:creator>
      <dc:date>2020-06-25T12:55:43Z</dc:date>
    </item>
    <item>
      <title>Re: levels average and frequency for 1000 variables by proc sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/levels-average-and-frequency-for-1000-variables-by-proc-sql/m-p/664994#M198753</link>
      <description>&lt;P&gt;Thank you&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/11562"&gt;@Kurt_Bremser&lt;/a&gt;&amp;nbsp;.&lt;/P&gt;&lt;P&gt;Avg_Y is calcualted within levels. So for Var1, level "a", the Avg_Y = (1111 + 4444)/2&lt;/P&gt;</description>
      <pubDate>Thu, 25 Jun 2020 13:02:22 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/levels-average-and-frequency-for-1000-variables-by-proc-sql/m-p/664994#M198753</guid>
      <dc:creator>mh2t</dc:creator>
      <dc:date>2020-06-25T13:02:22Z</dc:date>
    </item>
    <item>
      <title>Re: levels average and frequency for 1000 variables by proc sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/levels-average-and-frequency-for-1000-variables-by-proc-sql/m-p/665000#M198757</link>
      <description>&lt;P&gt;Then this would be my suggestion:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
input id (var1 var2) ($) y;
datalines;
1 a b 1111
2 c m 2222
3 a m 4444
4 d m 7777
;

proc transpose
  data=have
  out=long (rename=(col1=levels))
  name=varname
;
by id;
var var:;
run;

data long2;
merge
  long
  have (keep=id y)
;
by id;
run;

proc summary data=long2 nway;
class varname levels;
output
  out=want (drop=_type_ rename=(_freq_=freq))
  mean(y)=
;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Thu, 25 Jun 2020 13:14:39 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/levels-average-and-frequency-for-1000-variables-by-proc-sql/m-p/665000#M198757</guid>
      <dc:creator>Kurt_Bremser</dc:creator>
      <dc:date>2020-06-25T13:14:39Z</dc:date>
    </item>
    <item>
      <title>Re: levels average and frequency for 1000 variables by proc sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/levels-average-and-frequency-for-1000-variables-by-proc-sql/m-p/665007#M198760</link>
      <description>&lt;P&gt;For Huge data sets, I would avoid Proc Transpose. Using Data Step for fixed data structure can be faster and uses less data passes&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;/* Transpose the data using Data Step */
data want(keep=varname levels y);
	length varname $7 levels $1;
	array vars (*) $1 var1-var3;
	set have;
	do i=1 to dim(vars);
		varname =vname(vars(i));
		levels =vvalue(vars(i));
		output;
	end;
run;
/* Summarize to produce your desired means */
proc summary data=want nway;
	class varname levels;
	var y;
	output out=want_sums(drop=_type_ Rename=(_freq_=freq)) mean=y;
run;
/* Remove unwanted data */
proc delete data=want; run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Hope this helps,&lt;/P&gt;
&lt;P&gt;Ahmed&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 25 Jun 2020 13:36:20 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/levels-average-and-frequency-for-1000-variables-by-proc-sql/m-p/665007#M198760</guid>
      <dc:creator>AhmedAl_Attar</dc:creator>
      <dc:date>2020-06-25T13:36:20Z</dc:date>
    </item>
    <item>
      <title>Re: levels average and frequency for 1000 variables by proc sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/levels-average-and-frequency-for-1000-variables-by-proc-sql/m-p/665008#M198761</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/160501"&gt;@mh2t&lt;/a&gt;&amp;nbsp; If you are &lt;U&gt;ever&lt;/U&gt; interested in some advanced solutions like using Hash for your stated requirement, I encourage you to read Guru Paul D&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/21262"&gt;@hashman&lt;/a&gt;&amp;nbsp; /&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/13569"&gt;@DonH&lt;/a&gt;&amp;nbsp; &amp;nbsp;book that has an example of all these kinds.&amp;nbsp;&amp;nbsp;&lt;/P&gt;
&lt;DIV class="text parbase section"&gt;
&lt;DIV class=""&gt;
&lt;H3&gt;Data Management Solutions Using SAS&lt;SUP&gt;®&lt;/SUP&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;Hash Table Operations: A Business Intelligence Case Study&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/H3&gt;
&lt;/DIV&gt;
&lt;/DIV&gt;
&lt;DIV class="text parbase section"&gt;
&lt;DIV class=""&gt;
&lt;P&gt;&lt;SPAN class="xsmall-txt-light"&gt;By&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;A href="https://support.sas.com/en/books/authors/paul-dorfman.html" target="_blank" rel="noopener"&gt;Paul Dorfman&lt;/A&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;and&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;A href="https://support.sas.com/en/books/authors/don-henderson.html" target="_blank" rel="noopener"&gt;Don Henderson&lt;/A&gt;&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN class="xsmall-txt-light"&gt;Anyways, I wanted to have some fun-&lt;/SPAN&gt;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
input id (var1 var2) ($) y;
datalines;
1 a b 1111
2 c m 2222
3 a m 4444
4 d m 7777
;
data _null_ ;
 if _n_=1 then do;
  dcl hash H (ordered: "A") ;
  h.definekey  ("varname","levels") ;
  h.definedata ("varname","levels","freq","sum","avg_y") ;
  h.definedone () ;
 end;
 set have end=z;
 array t var1-var2;
 do over t;
  varname=vname(t);
  levels=t;
  if h.find()=0 then do;
   freq=sum(freq,1);
   sum=sum(y,sum);
   avg_y=sum/freq;
  end;
  else do;
   freq=1;
   sum=y;
   avg_y=sum;
  end;
  h.replace();
 end;
 if z;
 h.output(dataset:'want(drop=sum)');
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;/DIV&gt;
&lt;/DIV&gt;</description>
      <pubDate>Thu, 25 Jun 2020 13:38:07 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/levels-average-and-frequency-for-1000-variables-by-proc-sql/m-p/665008#M198761</guid>
      <dc:creator>novinosrin</dc:creator>
      <dc:date>2020-06-25T13:38:07Z</dc:date>
    </item>
    <item>
      <title>Re: levels average and frequency for 1000 variables by proc sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/levels-average-and-frequency-for-1000-variables-by-proc-sql/m-p/665360#M198938</link>
      <description>&lt;PRE&gt;data have;
input id (var1 var2) ($) y;
datalines;
1 a b 1111
2 c m 2222
3 a m 4444
4 d m 7777
;
proc transpose data=have(obs=0) out=temp;
var _all_;
run; 
proc delete data=want;run;
data _null_;
 set temp(where=(lowcase(_name_) like 'var%'));
call execute(cat('proc sql;create table x as select "',_name_,'" as vname length=40,',
_name_,' as levels,count(',_name_,') as freq,mean(y) as avg_y from have group by ',_name_,
';quit;proc append base=want data=x force;run;'));
run;&lt;/PRE&gt;</description>
      <pubDate>Fri, 26 Jun 2020 13:52:32 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/levels-average-and-frequency-for-1000-variables-by-proc-sql/m-p/665360#M198938</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2020-06-26T13:52:32Z</dc:date>
    </item>
  </channel>
</rss>

