<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic How to speed up the proc sql in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/How-to-speed-up-the-proc-sql/m-p/315249#M68722</link>
    <description>&lt;P&gt;I have a dataset like several million obs, and I am doing 10000 simulations.&amp;nbsp;The following is part of the code, it&amp;nbsp;takes really long time to finish this computation. Is there any way to speed up the proc sql steps? Thanks a lot as always!&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;%macro sim_table;

%do i=1 %to 10000;

proc sql noprint;
create table sim_&amp;amp;i as 
select *,
(select count(var1) from data_&amp;amp;i where id=a.id and year=a.year and month=a.month) as count1,
(select sum  (var2) from data_&amp;amp;i where id=a.id and year=a.year and month=a.month) as total2
from data_&amp;amp;i as a;
quit;

proc append base = simulation data = sim_&amp;amp;i; run;

%end;
%mend;
%sim_table;&lt;/CODE&gt;&lt;/PRE&gt;</description>
    <pubDate>Tue, 29 Nov 2016 20:28:53 GMT</pubDate>
    <dc:creator>Jonate_H</dc:creator>
    <dc:date>2016-11-29T20:28:53Z</dc:date>
    <item>
      <title>How to speed up the proc sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-speed-up-the-proc-sql/m-p/315249#M68722</link>
      <description>&lt;P&gt;I have a dataset like several million obs, and I am doing 10000 simulations.&amp;nbsp;The following is part of the code, it&amp;nbsp;takes really long time to finish this computation. Is there any way to speed up the proc sql steps? Thanks a lot as always!&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;%macro sim_table;

%do i=1 %to 10000;

proc sql noprint;
create table sim_&amp;amp;i as 
select *,
(select count(var1) from data_&amp;amp;i where id=a.id and year=a.year and month=a.month) as count1,
(select sum  (var2) from data_&amp;amp;i where id=a.id and year=a.year and month=a.month) as total2
from data_&amp;amp;i as a;
quit;

proc append base = simulation data = sim_&amp;amp;i; run;

%end;
%mend;
%sim_table;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Tue, 29 Nov 2016 20:28:53 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-speed-up-the-proc-sql/m-p/315249#M68722</guid>
      <dc:creator>Jonate_H</dc:creator>
      <dc:date>2016-11-29T20:28:53Z</dc:date>
    </item>
    <item>
      <title>Re: How to peed up the proc sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-speed-up-the-proc-sql/m-p/315272#M68729</link>
      <description>&lt;P&gt;Splitting your data into 10000 datasets is not usually an efficient strategy. But if you want to go that route, this query would be more efficient:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;%macro sim_table;

%do i=1 %to 10000;

	proc sql noprint;
	create table sim_&amp;amp;i as 
	select 
		*,
		count(var1) as count1,
		sum(var2) as total2
	from data_&amp;amp;i 
	group by  id, year, month;
	quit;

	proc append base = simulation data = sim_&amp;amp;i; run;

	%end;
%mend;

%sim_table;&lt;/CODE&gt;&amp;nbsp;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 29 Nov 2016 17:22:47 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-speed-up-the-proc-sql/m-p/315272#M68729</guid>
      <dc:creator>PGStats</dc:creator>
      <dc:date>2016-11-29T17:22:47Z</dc:date>
    </item>
    <item>
      <title>Re: How to peed up the proc sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-speed-up-the-proc-sql/m-p/315300#M68742</link>
      <description>&lt;P&gt;Thank you PG! I will try this one.&lt;/P&gt;&lt;P&gt;You are right, split data is never a good practice. Since I have to connect to public computer for the computation, and I have very limited storage quota, so I have to split original data into small ones, run simulation, then only combine outputs into single file.&amp;nbsp;Otherwise, the simulation will result in huge dataset that exceed my quota &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 29 Nov 2016 18:03:17 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-speed-up-the-proc-sql/m-p/315300#M68742</guid>
      <dc:creator>Jonate_H</dc:creator>
      <dc:date>2016-11-29T18:03:17Z</dc:date>
    </item>
    <item>
      <title>Re: How to peed up the proc sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-speed-up-the-proc-sql/m-p/315323#M68746</link>
      <description>&lt;P&gt;And why not cut your input/output possibly in half?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Don't "create table",&amp;nbsp;&amp;nbsp; Do "create view".&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Regards,&lt;/P&gt;
&lt;P&gt;Mark&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 29 Nov 2016 19:47:14 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-speed-up-the-proc-sql/m-p/315323#M68746</guid>
      <dc:creator>mkeintz</dc:creator>
      <dc:date>2016-11-29T19:47:14Z</dc:date>
    </item>
    <item>
      <title>Re: How to peed up the proc sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-speed-up-the-proc-sql/m-p/315328#M68747</link>
      <description>&lt;P&gt;Another way to speed this up is to create a data set view of, say 100 data sets, each distinguished by a SIM_NUM.&amp;nbsp; The do a proc sql create view on those 100 and append the results&amp;nbsp;all at once.&amp;nbsp; The only substantial disk space required will be your SIMULATION data set.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;To start, I'd suggest running it with&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; %let end=4&lt;/P&gt;
&lt;P&gt;and&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; %let grpsize=2&lt;/P&gt;
&lt;P&gt;and&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; options mprint;&lt;/P&gt;
&lt;P&gt;to see the code it generates.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;%let start=1;
%let end=10000;
%let grpsize=100;


%macro sim_by_grps;
  %do grp=%eval(&amp;amp;start-1) %to %eval(&amp;amp;end-&amp;amp;grpsize) %by &amp;amp;grpsize;
    data vt&amp;amp;grp /view=vt&amp;amp;grp;
      set 
        %do I=%eval(&amp;amp;grp+1) %to %eval(&amp;amp;grp+&amp;amp;grpsize);
        data_&amp;amp;I
        %end;
        open=defer indsname=indsn ;
      sim_num=input(scan(indsn,-1,'_'),5.);
    run;
    proc sql noprint;
      create view vtappend&amp;amp;grp as
        select *, n(var1) as count1, sum(var2) as total2 
        from vt&amp;amp;grp 
        group by sim_num,id,year,month;
    quit;
    proc append base=simulation data=vtappend&amp;amp;grp;
    run;
  %end;
%mend sim_by_grps;

%sim_by_grps;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 29 Nov 2016 20:11:57 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-speed-up-the-proc-sql/m-p/315328#M68747</guid>
      <dc:creator>mkeintz</dc:creator>
      <dc:date>2016-11-29T20:11:57Z</dc:date>
    </item>
    <item>
      <title>Re: How to peed up the proc sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-speed-up-the-proc-sql/m-p/315347#M68759</link>
      <description>&lt;P&gt;Thank you&amp;nbsp;mkeintz for your inputs! I will spend some time to digest your advice.&lt;/P&gt;</description>
      <pubDate>Tue, 29 Nov 2016 21:46:08 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-speed-up-the-proc-sql/m-p/315347#M68759</guid>
      <dc:creator>Jonate_H</dc:creator>
      <dc:date>2016-11-29T21:46:08Z</dc:date>
    </item>
    <item>
      <title>Re: How to peed up the proc sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-speed-up-the-proc-sql/m-p/315355#M68761</link>
      <description>&lt;P&gt;Excellent.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;And if the performance is still insufficient, then it's time to put the SQL "create view" aside (because it doesn't take advantage of the fact that the incoming dataset is sorted by SIM_NUM).&amp;nbsp; You would then either&amp;nbsp;do further DATA step programming, or run a "PROC&amp;nbsp;MEAN&amp;nbsp;...&amp;nbsp;NOPRINT&amp;nbsp;NWAY;"&amp;nbsp;with a "BY SIM_NUM;" and "CLASS&amp;nbsp;ID&amp;nbsp;YEAR&amp;nbsp;MONTH:" among the related statements.&amp;nbsp; The result&amp;nbsp; can be easily merged with the original data to be appended.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 29 Nov 2016 22:09:39 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-speed-up-the-proc-sql/m-p/315355#M68761</guid>
      <dc:creator>mkeintz</dc:creator>
      <dc:date>2016-11-29T22:09:39Z</dc:date>
    </item>
  </channel>
</rss>

