<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: More efficient way in SAS Procedures</title>
    <link>https://communities.sas.com/t5/SAS-Procedures/More-efficient-way/m-p/124872#M34306</link>
    <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Well. After through the logistic Model&amp;nbsp; , I found a lot of missing value of pred in both GD0 and GD1 , maybe you do not need it any more , exclude them , it will be very fast.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;proc sql;

create table temp as 
select a.tic as TEM1_tic, a.fyear as tem1_fyear, b.*, abs(a.pred - b.pred) as diffPS

from GD1(where=(pred is not missing)) a , GD0(where=(pred is not missing)) b
group by a.tic, a.fyear
having calculated diffPS = min(calculated diffPS);
quit;

&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Ksharp&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
    <pubDate>Thu, 20 Sep 2012 05:21:11 GMT</pubDate>
    <dc:creator>Ksharp</dc:creator>
    <dc:date>2012-09-20T05:21:11Z</dc:date>
    <item>
      <title>More efficient way</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/More-efficient-way/m-p/124867#M34301</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Dear all,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I ran the following program (propensity score matching):&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;proc sort data=test out=full ;&lt;/P&gt;&lt;P&gt;by descending tem; run;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;proc probit data=FULL noprint;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp; class TEM;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp; model TEM = FREQ SHARES NUMEST BONUS_PERCENT1 BLACK_OPTION1 ROA1 LG_ASSET MB&lt;/P&gt;&lt;P&gt;IND01-IND10 YeaR03 - YeaR09 / lackfit;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp; output out=regdata_out xbeta=gammaw p=pred;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; run;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;data GD1 GD0; set regdata_out;&lt;/P&gt;&lt;P&gt;if TEM = 1 then output GD1;&lt;/P&gt;&lt;P&gt;if TEM = 0 then output GD0;&lt;/P&gt;&lt;P&gt;run;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;proc sql;&lt;/P&gt;&lt;P&gt;create table temp as select a.tic as TEM1_tic, a.fyear as tem1_fyear, b.*, abs(sum(a.pred, -b.pred)) as diffPS,&lt;/P&gt;&lt;P&gt;min(calculated diffPS) as closest_PS&lt;/P&gt;&lt;P&gt;from GD1 a , GD0 b&lt;/P&gt;&lt;P&gt;group by a.tic, a.fyear&lt;/P&gt;&lt;P&gt;having diffPS = closest_PS;&lt;/P&gt;&lt;P&gt;quit;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;proc sql;&lt;/P&gt;&lt;P&gt;create table temp_tem1 as select a.*&lt;/P&gt;&lt;P&gt;from GD1 a, temp b&lt;/P&gt;&lt;P&gt;where&lt;/P&gt;&lt;P&gt;a.tic = b.TEM1_tic and a.fyear = b.tem1_fyear;&lt;/P&gt;&lt;P&gt;quit;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;data matchpair; set temp_tem1 temp(drop = tem1_tic); run;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;But I could not get the output as it consumes a lot of processing time and a large storage. I am not sure whether there is a more efficient way to obtain the same output as the program above?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thank you.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Regards,&lt;/P&gt;&lt;P&gt;mspak&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 19 Sep 2012 03:05:42 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/More-efficient-way/m-p/124867#M34301</guid>
      <dc:creator>mspak</dc:creator>
      <dc:date>2012-09-19T03:05:42Z</dc:date>
    </item>
    <item>
      <title>Re: More efficient way</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/More-efficient-way/m-p/124868#M34302</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;You are using Cartesian Product of SQL . therefore, you almost can't get the result when you have a not big table.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;What is your sample data?&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 19 Sep 2012 05:08:06 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/More-efficient-way/m-p/124868#M34302</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2012-09-19T05:08:06Z</dc:date>
    </item>
    <item>
      <title>Re: More efficient way</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/More-efficient-way/m-p/124869#M34303</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;In your first join, you're not specifying any join keys. Because of this, every record in GD1 will be matched with every record in GD0 to create your result, which is probably not what you want.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;There are different synaxes for doing this, I recommend the form of:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;proc sql;&lt;/P&gt;&lt;P&gt;create table temp as select ...&lt;/P&gt;&lt;P&gt;from GD1 a inner join GD0 b&lt;/P&gt;&lt;P&gt;on a.join_key = b.join_key&lt;/P&gt;&lt;P&gt;...&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;where join_key represents the fields in GD1 and GD0 that can be used to match the tables.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;On the other hand, if you genuinely do need to do a cartesian join, if you provide more details of your objectives I'm sure the group will be able to help improve the speed.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Tom&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;P.S. If you drop the following string into Google, the first result should take you right to the relevante section of the documentation:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Cartesian Product of LEFTTAB and RIGHTTAB Tables site:support.sas.com&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;T&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Message was edited by: Tom Kari&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 19 Sep 2012 16:19:47 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/More-efficient-way/m-p/124869#M34303</guid>
      <dc:creator>TomKari</dc:creator>
      <dc:date>2012-09-19T16:19:47Z</dc:date>
    </item>
    <item>
      <title>Re: More efficient way</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/More-efficient-way/m-p/124870#M34304</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Sorry for late reply. I was experiencing the problem to upload my SAS file. I have updated my query.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thanks for suggestions.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Mspak&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Thu, 20 Sep 2012 02:36:10 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/More-efficient-way/m-p/124870#M34304</guid>
      <dc:creator>mspak</dc:creator>
      <dc:date>2012-09-20T02:36:10Z</dc:date>
    </item>
    <item>
      <title>Re: More efficient way</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/More-efficient-way/m-p/124871#M34305</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Dear Tom Kari,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thanks for your suggestion. &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I am not going to join with a joining key. I would like to match GD1 and GD0 with the closest_PS. I wish to pair GD1 and GD0 with their closest score (ie. closest_PS).&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Regards,&lt;/P&gt;&lt;P&gt;MSPAK&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Thu, 20 Sep 2012 05:20:18 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/More-efficient-way/m-p/124871#M34305</guid>
      <dc:creator>mspak</dc:creator>
      <dc:date>2012-09-20T05:20:18Z</dc:date>
    </item>
    <item>
      <title>Re: More efficient way</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/More-efficient-way/m-p/124872#M34306</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Well. After through the logistic Model&amp;nbsp; , I found a lot of missing value of pred in both GD0 and GD1 , maybe you do not need it any more , exclude them , it will be very fast.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;proc sql;

create table temp as 
select a.tic as TEM1_tic, a.fyear as tem1_fyear, b.*, abs(a.pred - b.pred) as diffPS

from GD1(where=(pred is not missing)) a , GD0(where=(pred is not missing)) b
group by a.tic, a.fyear
having calculated diffPS = min(calculated diffPS);
quit;

&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Ksharp&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Thu, 20 Sep 2012 05:21:11 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/More-efficient-way/m-p/124872#M34306</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2012-09-20T05:21:11Z</dc:date>
    </item>
    <item>
      <title>Re: More efficient way</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/More-efficient-way/m-p/124873#M34307</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Lots of missing data in pred is a red flag for me.&amp;nbsp; That means that those observations are not going to be used in the propensity score matching process (as made explicit in KSharp's code).&amp;nbsp; You may wish to look at the patterns of missing in the predictors to see if you can refine the initial model.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Doc Muhlbaier&lt;/P&gt;&lt;P&gt;Duke&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;Message was edited by: Lawrence Muhlbaier&#xD;
&#xD;
See this note for lots on how to do the matching more efficiently&#xD;
&lt;/SPAN&gt;&lt;A class="jive-link-external-small" href="http://support.sas.com/kb/30/971.html"&gt;http://support.sas.com/kb/30/971.html&lt;/A&gt;&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Thu, 20 Sep 2012 12:54:14 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/More-efficient-way/m-p/124873#M34307</guid>
      <dc:creator>Doc_Duke</dc:creator>
      <dc:date>2012-09-20T12:54:14Z</dc:date>
    </item>
    <item>
      <title>Re: More efficient way</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/More-efficient-way/m-p/124874#M34308</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Thanks you for your suggestion.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;It does work.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Regards,&lt;/P&gt;&lt;P&gt;mspak&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Sat, 22 Sep 2012 11:06:36 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/More-efficient-way/m-p/124874#M34308</guid>
      <dc:creator>mspak</dc:creator>
      <dc:date>2012-09-22T11:06:36Z</dc:date>
    </item>
  </channel>
</rss>

