<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: why i get too many rows in SAS Procedures</title>
    <link>https://communities.sas.com/t5/SAS-Procedures/why-i-get-too-many-rows/m-p/542686#M74178</link>
    <description>could you please have a look on my new question about how to match two datasets ?&lt;BR /&gt;i need help</description>
    <pubDate>Wed, 13 Mar 2019 08:51:40 GMT</pubDate>
    <dc:creator>Radwan</dc:creator>
    <dc:date>2019-03-13T08:51:40Z</dc:date>
    <item>
      <title>why i get too many rows</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/why-i-get-too-many-rows/m-p/541889#M74146</link>
      <description>&lt;P&gt;Hello&amp;nbsp;&lt;/P&gt;&lt;P&gt;when i use proc sql; to create tables sometimes i get too many observations for instance&amp;nbsp;&lt;/P&gt;&lt;P&gt;i used this code&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
create table control_vars as select a.code,a.year,a.Total_assets,a.Recievables,a.total_liabilities,a.inventory,a.Equity,
b.Firm_Age,c.indusCode,d.boardsize,d.number_of_independent_directors,d.Number_of_executives,d._The_total_remuneration_of_the_f,
d.Total_Top_Three_Compensation,d.Duality,d.totalSalary,e.Number_of_Board_Meetings,f.LocationCode,g.roa,g.roe,h.soe,i.TOBINQ1,
i.BTM1,i.Enterpris__Value_Multiplier,j.big4
from balance as a, firm_age as b, industry as c, boards as d, meeting as e, location as f, roa_roe as g, soe as h,
indictors as i, big4 as j
where a.code=b.code=c.code=d.code=e.code=f.code=g.code=i.code=j.code and a.year=d.year=e.year=g.year=h.year2=i.year=j.year2;
quit;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;then i took around 10 minutes till finished the statement then i got the table that contains the following&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;79 proc sql;&lt;BR /&gt;80 create table control_vars as select&lt;BR /&gt;80 ! a.code,a.year,a.Total_assets,a.Recievables,a.total_liabilities,a.inventory,a.Equity,&lt;BR /&gt;81 b.Firm_Age,c.indusCode,d.boardsize,d.number_of_independent_directors,d.Number_of_exec&lt;BR /&gt;81 ! utives,d._The_total_remuneration_of_the_f,&lt;BR /&gt;82 d.Total_Top_Three_Compensation,d.Duality,d.totalSalary,e.Number_of_Board_Meetings,f.L&lt;BR /&gt;82 ! ocationCode,g.roa,g.roe,h.soe,i.TOBINQ1,&lt;BR /&gt;83 i.BTM1,i.Enterpris__Value_Multiplier,j.big4&lt;BR /&gt;84 from balance as a, firm_age as b, industry as c, boards as d, meeting as e, location&lt;BR /&gt;84 ! as f, roa_roe as g, soe as h,&lt;BR /&gt;85 indictors as i, big4 as j&lt;BR /&gt;86 where a.code=b.code=c.code=d.code=e.code=f.code=g.code=i.code=j.code and&lt;BR /&gt;86 ! a.year=d.year=e.year=g.year=h.year2=i.year=j.year2;&lt;BR /&gt;NOTE: Table WORK.CONTROL_VARS created, with 54165899 rows and 25 columns.&lt;/P&gt;&lt;P&gt;87 quit;&lt;BR /&gt;NOTE: PROCEDURE SQL used (Total process time):&lt;BR /&gt;real time 11:13.68&lt;BR /&gt;cpu time 3:26.12&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;the issue the program be so slow&amp;nbsp;&lt;/P&gt;&lt;P&gt;please i need a solution&lt;/P&gt;</description>
      <pubDate>Mon, 11 Mar 2019 06:33:14 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/why-i-get-too-many-rows/m-p/541889#M74146</guid>
      <dc:creator>Radwan</dc:creator>
      <dc:date>2019-03-11T06:33:14Z</dc:date>
    </item>
    <item>
      <title>Re: why i get too many rows</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/why-i-get-too-many-rows/m-p/541896#M74147</link>
      <description>&lt;P&gt;When SQL creates a much larger number of observations than expected, it is usually because the datasets have more than one repeat of the join condition, and then SQL creates a "cartesian join". See this example:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have1;
input year code value1;
cards;
2019 1 1
2019 1 2
;
run;

data have2;
input year code value2;
cards;
2019 1 3
2019 1 4
;
run;

proc sql;
create table want as
select
  a.year,
  a.code,
  a.value1,
  b.value2
from have1 a, have2 b
where a.year = b.year and a.code=b.code
;
quit;

proc print data=want noobs;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;The result:&lt;/P&gt;
&lt;PRE&gt;year    code    value1    value2

2019      1        1         3  
2019      1        1         4  
2019      1        2         3  
2019      1        2         4  
&lt;/PRE&gt;
&lt;P&gt;shows that you get 2 * 2 observations.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Start by implementing Maxim 3 (Know Your Data) and see how many repeats of year/code combinations you have in the datasets.&lt;/P&gt;
&lt;P&gt;Then decide how you want to treat multiple occurences for each individual dataset. Maybe you have an additional columns that identfies matches?&lt;/P&gt;
&lt;P&gt;Lastly, proc sql is a known resource hog when doing (large-scale) joins. All data from the tables is dumped into a utility file, from which the matches are read with &lt;EM&gt;very&lt;/EM&gt; many random accesses, which will slow down your disks, especially if they're not SSD's but spinning metal.&lt;/P&gt;
&lt;P&gt;Consider sorting your datasets and doing a data step merge, but only after you know your datasets (see above), as a data step merge behaves different than SQL when multiple repeats are present in more than 1 input dataset (no cartesian join).&lt;/P&gt;</description>
      <pubDate>Mon, 11 Mar 2019 07:06:16 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/why-i-get-too-many-rows/m-p/541896#M74147</guid>
      <dc:creator>Kurt_Bremser</dc:creator>
      <dc:date>2019-03-11T07:06:16Z</dc:date>
    </item>
    <item>
      <title>Re: why i get too many rows</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/why-i-get-too-many-rows/m-p/542598#M74174</link>
      <description>KirkBresmer, I like the way you code, easy to read.</description>
      <pubDate>Tue, 12 Mar 2019 22:42:08 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/why-i-get-too-many-rows/m-p/542598#M74174</guid>
      <dc:creator>CJac73</dc:creator>
      <dc:date>2019-03-12T22:42:08Z</dc:date>
    </item>
    <item>
      <title>Re: why i get too many rows</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/why-i-get-too-many-rows/m-p/542664#M74175</link>
      <description>&lt;BLOCKQUOTE&gt;&lt;HR /&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/38068"&gt;@CJac73&lt;/a&gt;&amp;nbsp;wrote:&lt;BR /&gt;KirkBresmer, I like the way you code, easy to read.&lt;HR /&gt;&lt;/BLOCKQUOTE&gt;
&lt;P&gt;That's covered in another Maxim (#12). Consistent code layout with a clear visability of functional blocks is a great help in understanding, debugging and maintaining code. I would not be able to handle my responsibilities (&amp;gt;1000 ETL and report batch jobs) if the programs would not be easy to maintain (or write in the first place, thanks to my collection of custom macros).&lt;/P&gt;</description>
      <pubDate>Wed, 13 Mar 2019 06:15:26 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/why-i-get-too-many-rows/m-p/542664#M74175</guid>
      <dc:creator>Kurt_Bremser</dc:creator>
      <dc:date>2019-03-13T06:15:26Z</dc:date>
    </item>
    <item>
      <title>Re: why i get too many rows</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/why-i-get-too-many-rows/m-p/542683#M74176</link>
      <description>agreed</description>
      <pubDate>Wed, 13 Mar 2019 08:44:25 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/why-i-get-too-many-rows/m-p/542683#M74176</guid>
      <dc:creator>Radwan</dc:creator>
      <dc:date>2019-03-13T08:44:25Z</dc:date>
    </item>
    <item>
      <title>Re: why i get too many rows</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/why-i-get-too-many-rows/m-p/542685#M74177</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/11562"&gt;@Kurt_Bremser&lt;/a&gt;&amp;nbsp; thanks i have check my dataset and i found a repetition&amp;nbsp; as you said&amp;nbsp;&lt;/P&gt;&lt;P&gt;now it is working well&amp;nbsp;&lt;/P&gt;&lt;P&gt;regards&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 13 Mar 2019 08:48:44 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/why-i-get-too-many-rows/m-p/542685#M74177</guid>
      <dc:creator>Radwan</dc:creator>
      <dc:date>2019-03-13T08:48:44Z</dc:date>
    </item>
    <item>
      <title>Re: why i get too many rows</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/why-i-get-too-many-rows/m-p/542686#M74178</link>
      <description>could you please have a look on my new question about how to match two datasets ?&lt;BR /&gt;i need help</description>
      <pubDate>Wed, 13 Mar 2019 08:51:40 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/why-i-get-too-many-rows/m-p/542686#M74178</guid>
      <dc:creator>Radwan</dc:creator>
      <dc:date>2019-03-13T08:51:40Z</dc:date>
    </item>
  </channel>
</rss>

