<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Nonmatching observations from 2 datasets plus additional aggregate statistics in one PROC SQL? in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Nonmatching-observations-from-2-datasets-plus-additional/m-p/394379#M95019</link>
    <description>&lt;P&gt;Apologies for the delay -- hardware problems.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;%MACRO nonm_2var_stat(dsn_in1=,dsn_in2=,dsn_an=,group_var=,var1in1=, var1in2=, var2in1=, var2in2=,var3in1=, var3in2=);
%macro _; %mend _;
proc sql;        ************ 22 seconds **********;
 create table &amp;amp;dsn_an. as
 select  *,
count(distinct(&amp;amp;group_var.)) as nonm_&amp;amp;group_var._cnt from
(select a.*,
count(a.&amp;amp;group_var.) as nonm_obs_per_&amp;amp;group_var._cnt
 from &amp;amp;dsn_in1. as a
       full join
      &amp;amp;dsn_in2. as b
       on a.&amp;amp;var1in1.=b.&amp;amp;var1in2. and a.&amp;amp;var2in1.=b.&amp;amp;var2in2. 
  where a.&amp;amp;var1in1. ne b.&amp;amp;var1in2. and a.&amp;amp;var2in1. ne b.&amp;amp;var2in2. 
  group by a.&amp;amp;group_var.),
(select count(distinct c.&amp;amp;var1in1.) as ds1_&amp;amp;var1in1._cnt,
count(distinct c.&amp;amp;var2in1.) as ds1_&amp;amp;var2in1._cnt,
count(distinct(c.&amp;amp;group_var.)) as ds1_&amp;amp;group_var._cnt
from  &amp;amp;dsn_in1. as c),
(select count(distinct d.&amp;amp;var1in2.) as ds2_&amp;amp;var1in2._cnt,
    count(distinct d.&amp;amp;var2in2.) as ds2_&amp;amp;var2in2._cnt, 
	count(distinct(d.&amp;amp;group_var.)) as ds2_&amp;amp;group_var._cnt
from  &amp;amp;dsn_in2. as d);
quit;
%MEND nonm_2var_stat;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;After several tries, this code does what I wanted. Merge/join analytics for panel data. Thank you everybody!&lt;/P&gt;</description>
    <pubDate>Sat, 09 Sep 2017 00:22:15 GMT</pubDate>
    <dc:creator>astrae_research</dc:creator>
    <dc:date>2017-09-09T00:22:15Z</dc:date>
    <item>
      <title>Nonmatching observations from 2 datasets plus additional aggregate statistics in one PROC SQL?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Nonmatching-observations-from-2-datasets-plus-additional/m-p/391031#M93859</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;Thsi is a follow up to my previous question about non-matcing observations which had great responses! I'm trying to build on PROC SQL way to find nonmatching observations between 2 datasets and add columns with aggregate statistics from the original datasets or even some other datasets. This will help to understand better the mechanics of PROC SQL.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data dataset1;
      input id $ group $ gender $ age;
      cards;
      111 A Male 11
      111 B Male 11
      222 D Male 12
      333 E Female 13
      666 G Female 14
      999 A Male 15
      999 B Male 15
      999 C Male 15
      ;
run;

data dataset2;
      input id $ group $ gender $ age;
      cards;
      111 A Male 11
      999 C Male 15
      ;
run;&lt;/CODE&gt;&lt;/PRE&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;%let dsn_in1=work.dataset1;
%LET dsn_in2=work.dataset2;
%LET dsn_an=work._t_nonm3;
%LET group_var=group;
%LET var1in1=group;
%LET var1in2=group;
%LET var2in1=gender;
%LET var2in2=gender;&lt;/CODE&gt;&lt;/PRE&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;        ************ 22 seconds **********;
 create table &amp;amp;dsn_an. as
 select  *,
count(distinct(&amp;amp;group_var.)) as group_cnt from
(select a.*,
count(a.&amp;amp;group_var.) as obs_per_group_cnt
 from &amp;amp;dsn_in1. as a
       left join
      &amp;amp;dsn_in2. as b
       on a.&amp;amp;var1in1.=b.&amp;amp;var1in2. and a.&amp;amp;var2in1.=b.&amp;amp;var2in2. 
  where a.&amp;amp;var1in1. ne b.&amp;amp;var1in2. and a.&amp;amp;var2in1. ne b.&amp;amp;var2in2. 
  group by a.&amp;amp;group_var.);
quit;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;So the questions is how can I add to this &amp;amp;dsn_an. some group by stats from the original 2 datasets or even just some counts? Like count(distinct(a.&amp;amp;group_var.)) as ds1_group_cnt,&lt;BR /&gt;count(distinct(b.&amp;amp;group_var.)) as ds2_group_cnt? Can I have&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;count(a.&amp;amp;group_var.) as obs_per_group_cnt&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;but for for all groups/observations in dataset2 ( b)?&lt;/P&gt;&lt;P&gt;I have tried different ways but the best i could get is the correct count for dataset 1 and 0 for dataset even though there are clearly distinct groups in dataset 2. I aim to do this in one proc sql to improve io perf.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;If there are good resources on how to manage multiple subqueries with aggregate statistics with say 2+ group by's for different variables from different datasets, that would be great!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Apologies for the rushed question.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 25 Aug 2017 22:30:54 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Nonmatching-observations-from-2-datasets-plus-additional/m-p/391031#M93859</guid>
      <dc:creator>astrae_research</dc:creator>
      <dc:date>2017-08-25T22:30:54Z</dc:date>
    </item>
    <item>
      <title>Re: Nonmatching observations from 2 datasets plus additional aggregate statistics in one PROC SQL?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Nonmatching-observations-from-2-datasets-plus-additional/m-p/391033#M93860</link>
      <description>&lt;P&gt;Please show what you expect as output.&lt;/P&gt;</description>
      <pubDate>Fri, 25 Aug 2017 22:35:07 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Nonmatching-observations-from-2-datasets-plus-additional/m-p/391033#M93860</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2017-08-25T22:35:07Z</dc:date>
    </item>
    <item>
      <title>Re: Nonmatching observations from 2 datasets plus additional aggregate statistics in one PROC SQL?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Nonmatching-observations-from-2-datasets-plus-additional/m-p/391034#M93861</link>
      <description>&lt;P&gt;I'm not sure if this will help but ...&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;You can refer to sub query and refer to it by an alias for use as another element of a join. This is what makes SQL programming so much fun you can nest queries until it is impossible to tell what comes from where and eat lots of computer resources if you do the nesting in the wrong order.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;from (&amp;nbsp;( select id, sum(balance) from ds group by id ) as alias&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; left join&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; (some other query as long as you have the joing requirements met) as other&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; on id&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; ) as combinedqueryalias&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&amp;lt;and so forth&amp;gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;The little bit I do with nested subqueries usually starts out a test query on a &lt;STRONG&gt;small&lt;/STRONG&gt; subset of the data to create a table. Then the query that uses that table. At which point I can directly replace the table reference with the body of the query&amp;nbsp;minus the create table bit in () with an alias. But I will make no claims as to efficiency of the result.&lt;/P&gt;</description>
      <pubDate>Mon, 28 Aug 2017 15:52:49 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Nonmatching-observations-from-2-datasets-plus-additional/m-p/391034#M93861</guid>
      <dc:creator>ballardw</dc:creator>
      <dc:date>2017-08-28T15:52:49Z</dc:date>
    </item>
    <item>
      <title>Re: Nonmatching observations from 2 datasets plus additional aggregate statistics in one PROC SQL?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Nonmatching-observations-from-2-datasets-plus-additional/m-p/391051#M93870</link>
      <description>&lt;P&gt;Sorry. I certainly don't understand the question. It looks as a convoluted way of doing something like this:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data dataset1;
      input id $ group $ gender $ age;
      cards;
      111 A Male 11
      111 B Male 11
      222 D Male 12
      333 E Female 13
      666 G Female 14
      999 A Male 15
      999 B Male 15
      999 C Male 15
      ;

data dataset2;
      input id $ group $ gender $ age;
      cards;
      111 A Male 11
      555 H Female 12
      999 C Male 15
      ;

proc sql;
select 
    count(distinct group) as nbGroups,
    count(distinct gender) as nbGenders
from 
    (select * from dataset1
     union corr
     select * from dataset2);
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Sat, 26 Aug 2017 04:29:28 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Nonmatching-observations-from-2-datasets-plus-additional/m-p/391051#M93870</guid>
      <dc:creator>PGStats</dc:creator>
      <dc:date>2017-08-26T04:29:28Z</dc:date>
    </item>
    <item>
      <title>Re: Nonmatching observations from 2 datasets plus additional aggregate statistics in one PROC SQL?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Nonmatching-observations-from-2-datasets-plus-additional/m-p/391178#M93919</link>
      <description>&lt;P&gt;Can you put this clause in the proc sql code I posted earlier? Your code seems to work but I don't know how to combine with the original proc sql code that finds nonmatching observations between 2 datasets.&lt;/P&gt;&lt;P&gt;Basically how do I add ( to every line) the statistics your code created to the table &amp;amp;dsn_an. created below in the same proc sql step?&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;        ************ 22 seconds **********;
 create table &amp;amp;dsn_an. as
 select  *,
count(distinct(&amp;amp;group_var.)) as group_cnt from
(select a.*,
count(a.&amp;amp;group_var.) as obs_per_group_cnt
 from &amp;amp;dsn_in1. as a
       left join
      &amp;amp;dsn_in2. as b
       on a.&amp;amp;var1in1.=b.&amp;amp;var1in2. and a.&amp;amp;var2in1.=b.&amp;amp;var2in2. 
  where a.&amp;amp;var1in1. ne b.&amp;amp;var1in2. and a.&amp;amp;var2in1. ne b.&amp;amp;var2in2. 
  group by a.&amp;amp;group_var.);
quit;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;The results I am looking for in one proc sql step are:&lt;/P&gt;&lt;TABLE border="0" cellpadding="0" cellspacing="0"&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;id&lt;/TD&gt;&lt;TD&gt;group&lt;/TD&gt;&lt;TD&gt;gender&lt;/TD&gt;&lt;TD&gt;age&lt;/TD&gt;&lt;TD&gt;obs_per_group_cnt&lt;/TD&gt;&lt;TD&gt;group_cnt&lt;/TD&gt;&lt;TD&gt;nbgroups&lt;/TD&gt;&lt;TD&gt;nbgenders&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;999&lt;/TD&gt;&lt;TD&gt;B&lt;/TD&gt;&lt;TD&gt;Male&lt;/TD&gt;&lt;TD&gt;15&lt;/TD&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;TD&gt;7&lt;/TD&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;111&lt;/TD&gt;&lt;TD&gt;B&lt;/TD&gt;&lt;TD&gt;Male&lt;/TD&gt;&lt;TD&gt;11&lt;/TD&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;TD&gt;7&lt;/TD&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;222&lt;/TD&gt;&lt;TD&gt;D&lt;/TD&gt;&lt;TD&gt;Male&lt;/TD&gt;&lt;TD&gt;12&lt;/TD&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;TD&gt;7&lt;/TD&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;333&lt;/TD&gt;&lt;TD&gt;E&lt;/TD&gt;&lt;TD&gt;Female&lt;/TD&gt;&lt;TD&gt;13&lt;/TD&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;TD&gt;7&lt;/TD&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;666&lt;/TD&gt;&lt;TD&gt;G&lt;/TD&gt;&lt;TD&gt;Female&lt;/TD&gt;&lt;TD&gt;14&lt;/TD&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;TD&gt;7&lt;/TD&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;Again, apologies if this is not entirely clear.&lt;/P&gt;</description>
      <pubDate>Mon, 28 Aug 2017 04:19:07 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Nonmatching-observations-from-2-datasets-plus-additional/m-p/391178#M93919</guid>
      <dc:creator>astrae_research</dc:creator>
      <dc:date>2017-08-28T04:19:07Z</dc:date>
    </item>
    <item>
      <title>Re: Nonmatching observations from 2 datasets plus additional aggregate statistics in one PROC SQL?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Nonmatching-observations-from-2-datasets-plus-additional/m-p/391181#M93920</link>
      <description>&lt;P&gt;What is the criteria for a match? Do you require equality on all 4 variables?&lt;/P&gt;</description>
      <pubDate>Mon, 28 Aug 2017 04:40:02 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Nonmatching-observations-from-2-datasets-plus-additional/m-p/391181#M93920</guid>
      <dc:creator>PGStats</dc:creator>
      <dc:date>2017-08-28T04:40:02Z</dc:date>
    </item>
    <item>
      <title>Re: Nonmatching observations from 2 datasets plus additional aggregate statistics in one PROC SQL?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Nonmatching-observations-from-2-datasets-plus-additional/m-p/394379#M95019</link>
      <description>&lt;P&gt;Apologies for the delay -- hardware problems.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;%MACRO nonm_2var_stat(dsn_in1=,dsn_in2=,dsn_an=,group_var=,var1in1=, var1in2=, var2in1=, var2in2=,var3in1=, var3in2=);
%macro _; %mend _;
proc sql;        ************ 22 seconds **********;
 create table &amp;amp;dsn_an. as
 select  *,
count(distinct(&amp;amp;group_var.)) as nonm_&amp;amp;group_var._cnt from
(select a.*,
count(a.&amp;amp;group_var.) as nonm_obs_per_&amp;amp;group_var._cnt
 from &amp;amp;dsn_in1. as a
       full join
      &amp;amp;dsn_in2. as b
       on a.&amp;amp;var1in1.=b.&amp;amp;var1in2. and a.&amp;amp;var2in1.=b.&amp;amp;var2in2. 
  where a.&amp;amp;var1in1. ne b.&amp;amp;var1in2. and a.&amp;amp;var2in1. ne b.&amp;amp;var2in2. 
  group by a.&amp;amp;group_var.),
(select count(distinct c.&amp;amp;var1in1.) as ds1_&amp;amp;var1in1._cnt,
count(distinct c.&amp;amp;var2in1.) as ds1_&amp;amp;var2in1._cnt,
count(distinct(c.&amp;amp;group_var.)) as ds1_&amp;amp;group_var._cnt
from  &amp;amp;dsn_in1. as c),
(select count(distinct d.&amp;amp;var1in2.) as ds2_&amp;amp;var1in2._cnt,
    count(distinct d.&amp;amp;var2in2.) as ds2_&amp;amp;var2in2._cnt, 
	count(distinct(d.&amp;amp;group_var.)) as ds2_&amp;amp;group_var._cnt
from  &amp;amp;dsn_in2. as d);
quit;
%MEND nonm_2var_stat;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;After several tries, this code does what I wanted. Merge/join analytics for panel data. Thank you everybody!&lt;/P&gt;</description>
      <pubDate>Sat, 09 Sep 2017 00:22:15 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Nonmatching-observations-from-2-datasets-plus-additional/m-p/394379#M95019</guid>
      <dc:creator>astrae_research</dc:creator>
      <dc:date>2017-09-09T00:22:15Z</dc:date>
    </item>
  </channel>
</rss>

