<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: calculate statistics for each numeric var in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/calculate-statistics-for-each-numeric-var/m-p/929740#M365805</link>
    <description>&lt;P&gt;The only&amp;nbsp; quick and easy way I know to get number of distinct values is to use Proc Freq with the NLEVELS option.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I see three steps, get your counts of &lt;STRONG&gt;,nr_POS,nr_Neg,nr_Nulls,nr_0&lt;/STRONG&gt; which is one of your other questions,&lt;/P&gt;
&lt;P&gt;the distinct values&lt;/P&gt;
&lt;PRE&gt;      
Proc freq data=sashelp.class nlevels;
tables _numeric_;
ods output nlevels= distinctcount;
run;&lt;/PRE&gt;
&lt;P&gt;And then proc means for the rest&lt;/P&gt;
&lt;PRE&gt;proc means data=sashelp.class stackods 
  Mean STD UCLm LCLm Sum Min Max Q1 Median 
  Q3 P1 P5 P95 P99 P75 P25 CV Range;
  var _numeric_;
  ods output summary=mysummary;
run;
&lt;/PRE&gt;
&lt;P&gt;merge the three sets together on the variable name.&lt;/P&gt;</description>
    <pubDate>Sun, 26 May 2024 16:26:30 GMT</pubDate>
    <dc:creator>ballardw</dc:creator>
    <dc:date>2024-05-26T16:26:30Z</dc:date>
    <item>
      <title>calculate statistics for each numeric var</title>
      <link>https://communities.sas.com/t5/SAS-Programming/calculate-statistics-for-each-numeric-var/m-p/929700#M365790</link>
      <description>&lt;P&gt;Hello&lt;/P&gt;
&lt;P&gt;I want to calculate the following statistics for each numeric variable:&lt;/P&gt;
&lt;P&gt;&lt;STRONG&gt;nr,nr_POS,nr_Neg,nr_Nulls,nr_0,nr_distinct_values,AVG,STD,UCL,LCL,Sum,Min,Max,Q1,Median,&lt;/STRONG&gt;&lt;/P&gt;
&lt;P&gt;&lt;STRONG&gt;Q3,P1,P5,P95,P99,P75,P25,CV,Range&lt;/STRONG&gt;&lt;/P&gt;
&lt;P&gt;I want to identify automatically the numeric variables.&lt;/P&gt;
&lt;P&gt;In this code I perform wide to Long change of data structure.&lt;/P&gt;
&lt;P&gt;My question- Is there a better more efficient way to calculate the required statistics? May you show code?&lt;/P&gt;
&lt;P&gt;In real life the data set has 2 million rows and 200 numeric variables .&lt;/P&gt;
&lt;P&gt;I am not sure if this method of work (Long to wide) is&amp;nbsp; efficient in this case&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
create table numeric_Vars as
select libname, memname, name, type, length,
format, informat, label
from dictionary.columns
where libname = 'SASHELP' and memname='CLASS' and upcase(type)='NUM'
;
quit;


proc sql noprint;
select name into : Numeric_Vars_List SEPARATED by ' '
from numeric_Vars
;
quit;
%put &amp;amp;Numeric_Vars_List.; /**Age Height Weight**/


proc sql noprint;
select count(*) as nr_numeric_Vars into :nr_numeric_Vars
from numeric_Vars
;
quit;
%put &amp;amp;nr_numeric_Vars.;


data Long_Structure_Numeric_Vars_Data(DROP=&amp;amp;Numeric_Vars_List. J);
set sashelp.class(KEEP=&amp;amp;Numeric_Vars_List.);
array vv{&amp;amp;nr_numeric_Vars.}   &amp;amp;Numeric_Vars_List.; /* _NUMERIC_   */
do j=1 to dim(vv);
Var_Name  = vname(vv(j));
Var_Value =vv(j);
output;
end;
run;


proc sql;
create table Want1 as
select Var_Name,
       count(*) as nr,
       sum(case when Var_Value&amp;gt;0 then 1 else 0 end ) as nr_POS,
       sum(case when Var_Value&amp;lt;0 AND Var_Value=. then 1 else 0 end ) as nr_Neg,
	   sum(case when Var_Value=. then 1 else 0 end ) as nr_Nulls,
       sum(case when Var_Value=0 then 1 else 0 end ) as nr_0,
	   count(distinct Var_Value) as nr_distinct_values,
	   mean(Var_Value) as AVG format=8.2,
	   STD(Var_Value) as STD format=8.2,
	   calculated AVG+calculated STD *3 as UCL,
	   calculated AVG-calculated STD *3 as LCL
from Long_Structure_Numeric_Vars_Data
group by Var_Name
;
quit;

proc means data=sashelp.class sum min max q1 median q3  P1 p5 p95 P99 P75 P25 cv SUM Range /**N mean std**/ STACKODSOUTPUT maxdec=2;
var _numeric_;
ods output summary=Want2(Rename=(Variable=Var_Name));
run;


data want_summary_Report;
merge Want1 Want2;
by Var_Name;
Run;


&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 26 May 2024 06:28:34 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/calculate-statistics-for-each-numeric-var/m-p/929700#M365790</guid>
      <dc:creator>Ronein</dc:creator>
      <dc:date>2024-05-26T06:28:34Z</dc:date>
    </item>
    <item>
      <title>Re: calculate statistics for each numeric var</title>
      <link>https://communities.sas.com/t5/SAS-Programming/calculate-statistics-for-each-numeric-var/m-p/929715#M365795</link>
      <description>&lt;P&gt;SASHELP.CLASS is not a wide data set. It is a long data set. There is no benefit to doing a wide to long conversion. Don't waste your time.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Step 1: Use PROC FREQ and the formats you showed in your other thread to do the counting.&lt;/P&gt;
&lt;P&gt;Step 2: Use the information already given to you by &lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/13879"&gt;@Reeza&lt;/a&gt; here: &lt;A href="https://communities.sas.com/t5/SAS-Programming/proc-freq-one-way-table-for-multiple-vars-export-to-data-set/m-p/924730#M363982" target="_blank" rel="noopener"&gt;https://communities.sas.com/t5/SAS-Programming/proc-freq-one-way-table-for-multiple-vars-export-to-data-set/m-p/924730#M363982&lt;/A&gt; to convert the PROC FREQ output to usable data set&lt;/P&gt;
&lt;P&gt;Step 3: Use PROC SUMMARY/PROC UNIVARIATE to compute the rest of the statistics&lt;/P&gt;
&lt;P&gt;Step 4: Merge step 2 and SUMMARY output together&lt;/P&gt;</description>
      <pubDate>Sun, 26 May 2024 11:22:00 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/calculate-statistics-for-each-numeric-var/m-p/929715#M365795</guid>
      <dc:creator>PaigeMiller</dc:creator>
      <dc:date>2024-05-26T11:22:00Z</dc:date>
    </item>
    <item>
      <title>Re: calculate statistics for each numeric var</title>
      <link>https://communities.sas.com/t5/SAS-Programming/calculate-statistics-for-each-numeric-var/m-p/929740#M365805</link>
      <description>&lt;P&gt;The only&amp;nbsp; quick and easy way I know to get number of distinct values is to use Proc Freq with the NLEVELS option.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I see three steps, get your counts of &lt;STRONG&gt;,nr_POS,nr_Neg,nr_Nulls,nr_0&lt;/STRONG&gt; which is one of your other questions,&lt;/P&gt;
&lt;P&gt;the distinct values&lt;/P&gt;
&lt;PRE&gt;      
Proc freq data=sashelp.class nlevels;
tables _numeric_;
ods output nlevels= distinctcount;
run;&lt;/PRE&gt;
&lt;P&gt;And then proc means for the rest&lt;/P&gt;
&lt;PRE&gt;proc means data=sashelp.class stackods 
  Mean STD UCLm LCLm Sum Min Max Q1 Median 
  Q3 P1 P5 P95 P99 P75 P25 CV Range;
  var _numeric_;
  ods output summary=mysummary;
run;
&lt;/PRE&gt;
&lt;P&gt;merge the three sets together on the variable name.&lt;/P&gt;</description>
      <pubDate>Sun, 26 May 2024 16:26:30 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/calculate-statistics-for-each-numeric-var/m-p/929740#M365805</guid>
      <dc:creator>ballardw</dc:creator>
      <dc:date>2024-05-26T16:26:30Z</dc:date>
    </item>
    <item>
      <title>Re: calculate statistics for each numeric var</title>
      <link>https://communities.sas.com/t5/SAS-Programming/calculate-statistics-for-each-numeric-var/m-p/929742#M365806</link>
      <description>&lt;P&gt;Sounds like you need three passes through the data to generate all of those stats.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc format;
 value sign 
   low-&amp;lt;0 = 'Negative'
   0 = 'Zero'
   0&amp;lt;-high = 'Positive'
   other = 'Missing'
 ;
run;
ods select none;
proc freq nlevels data=have;
  tables _numeric_ / noprint ;
  ods output nlevels=nlevels;
run;
proc means data=have stackods 
  N Nmiss Mean STD UCLm LCLm Sum Min Max Q1 Median 
  Q3 P1 P5 P95 P99 P75 P25 CV Range
;
  var _numeric_;
  ods output summary=means;
run;
proc freq data=have ;
  tables _numeric_ /  list nopercent nocum;
  format _numeric_ sign.;
  ods output OneWayFreqs=outfreq; 
run;
ods select all;
data freqs;
  length Variable _name_ $32;
  keep Variable _name_ Frequency ;
  set outfreq;
  Variable = substr(table,7);
  _name_ =vvaluex('F_'||variable);
run;
proc transpose data=freqs out=Neg0Pos(drop=_:);
  by Variable notsorted;
  id _name_;
  var Frequency;
run;

data want;
  merge Neg0Pos nlevels(rename=(TableVar=Variable)) means ;
  by Variable;
  Negative=sum(0,Negative);
  Positive=sum(0,Positive);
  Zero=sum(0,Zero);
run;

&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Example results:&lt;/P&gt;
&lt;PRE&gt;                                                              NNon
                                                    NMiss     Miss
Obs Variable Negative     Zero Positive  NLevels   Levels   Levels  N NMiss         Mean

 1    var1          1        1        7        5        1        4  9   1       0.888889
 2    var2          0        4        4        4        1        3  8   2       0.750000
 3    var3          3        3        2        4        1        3  8   2       0.125000
 4    var4          4        3        2        4        1        3  9   1      -0.222222



Obs       StdDev          UCLM          LCLM           Sum           Min           Max

 1      0.927961      1.602183      0.175595      8.000000     -1.000000      2.000000
 2      0.886405      1.491053      0.008947      6.000000             0      2.000000
 3      1.246423      1.167036     -0.917036      1.000000     -1.000000      2.000000
 4      0.833333      0.418334     -0.862779     -2.000000     -1.000000      1.000000



Obs           Q1        Median            Q3            P1            P5           P95

 1      1.000000      1.000000      1.000000     -1.000000     -1.000000      2.000000
 2             0      0.500000      1.500000             0             0      2.000000
 3     -1.000000             0      1.000000     -1.000000     -1.000000      2.000000
 4     -1.000000             0             0     -1.000000     -1.000000      1.000000



Obs          P99             P75             P25              CV           Range

 1      2.000000        1.000000        1.000000      104.395582        3.000000
 2      2.000000        1.500000               0      118.187368        2.000000
 3      2.000000        1.000000       -1.000000      997.138764        3.000000
 4      1.000000               0       -1.000000     -375.000000        2.000000
&lt;/PRE&gt;</description>
      <pubDate>Sun, 26 May 2024 17:53:19 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/calculate-statistics-for-each-numeric-var/m-p/929742#M365806</guid>
      <dc:creator>Tom</dc:creator>
      <dc:date>2024-05-26T17:53:19Z</dc:date>
    </item>
  </channel>
</rss>

