<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Optimization in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Optimization/m-p/498521#M132504</link>
    <description>&lt;P&gt;Hi,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Ok, this makes the assumption that your input is already sorted by i&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;Data A1;
   do i=1 to 200;

    if i ^in(9,13,14,87,100,101) then output;
end;
run;
data missing ; 
	retain cntr 1 ; /* counter, we will use this to compare to the input data */
	set a1 ;
	if cntr ne i then do ; /* if counter doesn't match i then we have found 1 or more missing numbers */
		do miss=cntr to i-1 ; /* This loop accounts for consecutive missing numbers, e.g. 13 &amp;amp; 14 */
		                      /* Cntr=13 i=15, so this will loop from 13 to 14 */
			output missing ;  /* output the missing numbers */
		end ;
		cntr=i ; /* set cntr to be the new value of i e.g. 15 */
	end ;
	cntr+1 ; /* increment cntr */
run ;&lt;/CODE&gt;&lt;/PRE&gt;</description>
    <pubDate>Mon, 24 Sep 2018 18:38:12 GMT</pubDate>
    <dc:creator>AMSAS</dc:creator>
    <dc:date>2018-09-24T18:38:12Z</dc:date>
    <item>
      <title>Optimization</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Optimization/m-p/498501#M132491</link>
      <description>&lt;P&gt;Dears,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I&amp;nbsp;am looking for optimal code to&amp;nbsp;identify&amp;nbsp;the missing values in&amp;nbsp;a&amp;nbsp;variable that has unique values&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Ex: I have a dataset that is similar to A1, where 70 million numbers are missed in the range beginning from 1 to&amp;nbsp; 2.1578 billion records&amp;nbsp;&lt;/P&gt;&lt;P&gt;Following step simulated with 6 missing nos instead of 70+&amp;nbsp;million for simplifying your understanding&lt;/P&gt;&lt;P&gt;Data A1;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp;do i=1 to 2157895433;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; if i ^in(9,13,14,87,100,101) then output;&lt;BR /&gt;end;&lt;BR /&gt;run;&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;I&amp;nbsp;wanted to list down the nos that don't exist between&amp;nbsp;&lt;SPAN&gt;1&amp;nbsp;and 2157895433&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;I&amp;nbsp;have following algorithm in hand but it doesn't seem efficient, would appreciate if you could pass a tip to write optimal code&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;proc sql;&lt;BR /&gt;select count(*) format best32., min(i) format best32., max(i) format best32. into&lt;BR /&gt;:mv_cnt, :mv_min, :mv_max from a1;&lt;BR /&gt;quit;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;proc sort data=a1(keep=i) out=a1_sorted;&lt;BR /&gt;by i;&lt;BR /&gt;run;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;data mis_a1;&lt;BR /&gt;do i=1 to 2157895433;&lt;BR /&gt;output;&lt;BR /&gt;end;&lt;/P&gt;&lt;P&gt;run;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;data a2_tgt;&lt;BR /&gt;merge &lt;SPAN&gt;a1_sorted&lt;/SPAN&gt;(in = a) &lt;SPAN&gt;mis_a1&lt;/SPAN&gt;(in=b);&lt;BR /&gt;by i;&lt;BR /&gt;if b and not a;&lt;BR /&gt;run;&lt;/P&gt;</description>
      <pubDate>Mon, 24 Sep 2018 18:04:02 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Optimization/m-p/498501#M132491</guid>
      <dc:creator>saivenkat</dc:creator>
      <dc:date>2018-09-24T18:04:02Z</dc:date>
    </item>
    <item>
      <title>Re: Optimization</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Optimization/m-p/498521#M132504</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Ok, this makes the assumption that your input is already sorted by i&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;Data A1;
   do i=1 to 200;

    if i ^in(9,13,14,87,100,101) then output;
end;
run;
data missing ; 
	retain cntr 1 ; /* counter, we will use this to compare to the input data */
	set a1 ;
	if cntr ne i then do ; /* if counter doesn't match i then we have found 1 or more missing numbers */
		do miss=cntr to i-1 ; /* This loop accounts for consecutive missing numbers, e.g. 13 &amp;amp; 14 */
		                      /* Cntr=13 i=15, so this will loop from 13 to 14 */
			output missing ;  /* output the missing numbers */
		end ;
		cntr=i ; /* set cntr to be the new value of i e.g. 15 */
	end ;
	cntr+1 ; /* increment cntr */
run ;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 24 Sep 2018 18:38:12 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Optimization/m-p/498521#M132504</guid>
      <dc:creator>AMSAS</dc:creator>
      <dc:date>2018-09-24T18:38:12Z</dc:date>
    </item>
    <item>
      <title>Re: Optimization</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Optimization/m-p/498524#M132505</link>
      <description>&lt;P&gt;First of all, I would drop the PROC SQL, since the subsequent proc sort will make it easy to get min, max, and count.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#000080" face="Sasfont"&gt;&lt;STRONG&gt;proc&lt;/STRONG&gt;&lt;/FONT&gt; &lt;STRONG&gt;&lt;FONT color="#000080" face="Sasfont"&gt;sort&lt;/FONT&gt;&lt;/STRONG&gt; &lt;FONT color="#0000ff" face="Sasfont"&gt;data&lt;/FONT&gt;&lt;FONT face="Sasfont"&gt;=a1;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#0000ff" face="Sasfont"&gt;by&lt;/FONT&gt;&lt;FONT face="Sasfont"&gt; I;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#000080" face="Sasfont"&gt;&lt;STRONG&gt;run&lt;/STRONG&gt;&lt;/FONT&gt;&lt;FONT face="Sasfont"&gt;;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#000080" face="Sasfont"&gt;&lt;STRONG&gt;data&lt;/STRONG&gt;&lt;/FONT&gt; &lt;FONT color="#0000ff" face="Sasfont"&gt;_null_&lt;/FONT&gt;&lt;FONT face="Sasfont"&gt;;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#0000ff" face="Sasfont"&gt;set&lt;/FONT&gt;&lt;FONT face="Sasfont"&gt; a1 &lt;/FONT&gt;&lt;FONT color="#0000ff" face="Sasfont"&gt;nobs&lt;/FONT&gt;&lt;FONT face="Sasfont"&gt;=n_a1;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#0000ff" face="Sasfont"&gt;call&lt;/FONT&gt;&lt;FONT face="Sasfont"&gt; symput(&lt;/FONT&gt;&lt;FONT color="#800080" face="Sasfont"&gt;'mv_cnt'&lt;/FONT&gt;&lt;FONT face="Sasfont"&gt;,trim(put(n_a1,&lt;/FONT&gt;&lt;STRONG&gt;&lt;FONT color="#008080" face="Sasfont"&gt;12.&lt;/FONT&gt;&lt;/STRONG&gt;&lt;FONT face="Sasfont"&gt;)));&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#0000ff" face="Sasfont"&gt;call&lt;/FONT&gt;&lt;FONT face="Sasfont"&gt; symput(&lt;/FONT&gt;&lt;FONT color="#800080" face="Sasfont"&gt;'mv_min'&lt;/FONT&gt;&lt;FONT face="Sasfont"&gt;,trim(put(I,&lt;/FONT&gt;&lt;STRONG&gt;&lt;FONT color="#008080" face="Sasfont"&gt;12.&lt;/FONT&gt;&lt;/STRONG&gt;&lt;FONT face="Sasfont"&gt;)));&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#0000ff" face="Sasfont"&gt;set&lt;/FONT&gt;&lt;FONT face="Sasfont"&gt; a1 &lt;/FONT&gt;&lt;FONT color="#0000ff" face="Sasfont"&gt;point&lt;/FONT&gt;&lt;FONT face="Sasfont"&gt;=n_a1;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#0000ff" face="Sasfont"&gt;call&lt;/FONT&gt;&lt;FONT face="Sasfont"&gt; symput(&lt;/FONT&gt;&lt;FONT color="#800080" face="Sasfont"&gt;'mv_max'&lt;/FONT&gt;&lt;FONT face="Sasfont"&gt;,trim(put(I,&lt;/FONT&gt;&lt;STRONG&gt;&lt;FONT color="#008080" face="Sasfont"&gt;12.&lt;/FONT&gt;&lt;/STRONG&gt;&lt;FONT face="Sasfont"&gt;)));&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#0000ff" face="Sasfont"&gt;stop&lt;/FONT&gt;&lt;FONT face="Sasfont"&gt;;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#000080" face="Sasfont"&gt;&lt;STRONG&gt;run&lt;/STRONG&gt;&lt;/FONT&gt;&lt;FONT face="Sasfont"&gt;;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;But which part of your sequence is the slow part?&amp;nbsp; If the problem is to take a complete data step and output only records that are not gaps in some other dataset, you can do the proc sort stuff.&amp;nbsp; But what about (1) sorting the gap file only, and (2) using it in a hash object for lookup.&amp;nbsp; This would relieve you of sorting the main file from which you want to delete the 70 million records.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;/* If a1 is not sorted then this is necessary */
proc sort data=a1;
  by i;
run;

/* Make a dataset of gaps - about 70 million records */
data gaps (keep=i) /* about 70 million expected*/
  set a1 (rename=(i=j));
  lagj=coalesce(lag{j},0);
  if j-lagj^=1 then do i=lagj+1 to j-1;
    output;
  end;
  if end_of_need=1 and j&amp;lt; 2157895433 then do i=j+1 to  2157895433;
    output;
  end;
run;

data want;
  set have;
  if _n_=1 then do;
    declare hash h(dataset:'gaps');
	  h.definekey('i');
	  h.definedata('i');
	  h.definedone();
  end;
  if h.find()^=0;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;The benefit here is that you don't have to sort the data set to be purged.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 24 Sep 2018 18:48:34 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Optimization/m-p/498524#M132505</guid>
      <dc:creator>mkeintz</dc:creator>
      <dc:date>2018-09-24T18:48:34Z</dc:date>
    </item>
    <item>
      <title>Re: Optimization</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Optimization/m-p/498530#M132507</link>
      <description>&lt;P&gt;I&amp;nbsp;have 2 different tables and both the tables have couple of billion&amp;nbsp;records.. It is&amp;nbsp;required to fetch the records from table1, if&amp;nbsp;the&amp;nbsp;record doesn't exist in table2.&amp;nbsp;I felt&amp;nbsp;it is best to find the missing&amp;nbsp;values&amp;nbsp;in the range that belongs to table2. Fianlly&amp;nbsp;fetch the records from table1&amp;nbsp;if it matches with missing values or beyond the range.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks for your prompt response!&lt;/P&gt;</description>
      <pubDate>Mon, 24 Sep 2018 19:14:32 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Optimization/m-p/498530#M132507</guid>
      <dc:creator>saivenkat</dc:creator>
      <dc:date>2018-09-24T19:14:32Z</dc:date>
    </item>
  </channel>
</rss>

