<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Usage of COUNT function in SQL JOINS with/without DISTINCT keyword in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Usage-of-COUNT-function-in-SQL-JOINS-with-without-DISTINCT/m-p/644549#M192530</link>
    <description>&lt;P&gt;Your join will always end up as a cartesian join, because that's how SQL works; when you have M tuples for a key in one dataset and N tuples in the other, the join will give you M*N tuples as result. Unless you prevent that with DISTINCT.&lt;/P&gt;
&lt;P&gt;Depending on the actual form of the join, you should contemplate using a data step merge, or prepare your datasets in a way that you'l have a M to 1 join instead of a M to N.&lt;/P&gt;</description>
    <pubDate>Fri, 01 May 2020 15:41:39 GMT</pubDate>
    <dc:creator>Kurt_Bremser</dc:creator>
    <dc:date>2020-05-01T15:41:39Z</dc:date>
    <item>
      <title>Usage of COUNT function in SQL JOINS with/without DISTINCT keyword</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Usage-of-COUNT-function-in-SQL-JOINS-with-without-DISTINCT/m-p/644544#M192528</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;Have embedded my COMMENTS in the codes, to get an understanding of my requirement.&lt;BR /&gt;&lt;BR /&gt;/*8 observation from Shoes dataset*/
data shoes;
 set sashelp.shoes;
  if region='Canada' and Subsidiary='Montreal';
run ;

/*8 observations from org dataset since same data replicated*/
data org;
 set sashelp.shoes;
 if region='Canada' and Subsidiary='Montreal';
run ;


/*TASK: JOINING both the datasets to get the count of PRODUCT variable and storing the result in CNT variable using COUNT FUNCTION. Since there
are only 8 observation the CNT should also be 8,but because of CARTESIAN product CNT variable is showing 64 . Different results
with different usage of DISTINCT function*/


/*Scenario 1 Not using DISTINCT getting 64 observations in the dataset and CNT varaible
 has 64 observations*/
proc sql noprint;
   create table want1 as select org.region ,shoes.product ,shoes.Subsidiary,shoes.stores,
   count(shoes.product) as cnt
	 	from org as org left join 
       shoes as shoes on org.region = shoes.region	    	     
     group by org.region
     order by org.region;
quit;

/*Scenario 2 using DISTINCT getting 8 observations and CNT variable has 64 observations */
proc sql noprint;
   create table want2 as select distinct org.region ,shoes.product ,shoes.Subsidiary,shoes.stores,
   count(shoes.product) as cnt
	 	from org as org left join 
       shoes as shoes on org.region = shoes.region	    	     
     group by org.region
     order by org.region;
quit;


/*Scenario 3: using DISTINCT at COUNT level getting 64 observations and CNT variable has 8 observations */
proc sql noprint;
   create table want3 as select org.region ,shoes.product ,shoes.Subsidiary,shoes.stores,
   count(distinct(shoes.product)) as cnt
	 	from org as org left join 
       shoes as shoes on org.region = shoes.region	    	     
     group by org.region
     order by org.region;
quit;


/*Scenario 4: using DISTINCT at COUNT level getting 8 observations and CNT variable has 64 observations */
proc sql noprint;
   create table want4 as select distinct org.region ,shoes.product ,shoes.Subsidiary,shoes.stores,
   count(distinct(shoes.product)) as cnt
	 	from org as org left join 
       shoes as shoes on org.region = shoes.region	    	     
     group by org.region
     order by org.region;
quit;

My requirement is NOT TO use DISTINCT at COUNT function but to get the COUNT(CNT) value as 8 since the actual count is &lt;BR /&gt;only 8 in the dataset. If we use distinct at COUNT FUNCTION we will get 8 . But i have customer requirements to use both &lt;BR /&gt;DISTINCT and without DISTINCT but my current issues is not to USE DISTINCT at COUNT function&lt;BR /&gt;to get 8 as CNT value though 64 observations are present. Any other alternative/solution for this.&lt;BR /&gt;
&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Fri, 01 May 2020 15:32:04 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Usage-of-COUNT-function-in-SQL-JOINS-with-without-DISTINCT/m-p/644544#M192528</guid>
      <dc:creator>keen_sas</dc:creator>
      <dc:date>2020-05-01T15:32:04Z</dc:date>
    </item>
    <item>
      <title>Re: Usage of COUNT function in SQL JOINS with/without DISTINCT keyword</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Usage-of-COUNT-function-in-SQL-JOINS-with-without-DISTINCT/m-p/644549#M192530</link>
      <description>&lt;P&gt;Your join will always end up as a cartesian join, because that's how SQL works; when you have M tuples for a key in one dataset and N tuples in the other, the join will give you M*N tuples as result. Unless you prevent that with DISTINCT.&lt;/P&gt;
&lt;P&gt;Depending on the actual form of the join, you should contemplate using a data step merge, or prepare your datasets in a way that you'l have a M to 1 join instead of a M to N.&lt;/P&gt;</description>
      <pubDate>Fri, 01 May 2020 15:41:39 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Usage-of-COUNT-function-in-SQL-JOINS-with-without-DISTINCT/m-p/644549#M192530</guid>
      <dc:creator>Kurt_Bremser</dc:creator>
      <dc:date>2020-05-01T15:41:39Z</dc:date>
    </item>
    <item>
      <title>Re: Usage of COUNT function in SQL JOINS with/without DISTINCT keyword</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Usage-of-COUNT-function-in-SQL-JOINS-with-without-DISTINCT/m-p/644550#M192531</link>
      <description>&lt;P&gt;Do you have to use SQL?&lt;/P&gt;
&lt;P&gt;I don't think the fix is the count/distinct, it's in the joins most likely - you need to make it a distinct join which usually means adding a join condition. Can you post an example of your org table?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 01 May 2020 15:42:37 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Usage-of-COUNT-function-in-SQL-JOINS-with-without-DISTINCT/m-p/644550#M192531</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2020-05-01T15:42:37Z</dc:date>
    </item>
    <item>
      <title>Re: Usage of COUNT function in SQL JOINS with/without DISTINCT keyword</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Usage-of-COUNT-function-in-SQL-JOINS-with-without-DISTINCT/m-p/644702#M192611</link>
      <description>&lt;P&gt;I think what you want to do is something like this:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql noprint;
   create table want as select distinct org.region ,shoes.product ,shoes.Subsidiary,shoes.stores,shoes.cnt
  	 	from org as org left join 
      (select *,count(*) as cnt from shoes) shoes
   on org.region = shoes.region	    	     
     group by org.region
     order by org.region;
quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;In other words: do the counting before the join by creating an inner query for shoes.&lt;/P&gt;</description>
      <pubDate>Sat, 02 May 2020 12:24:53 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Usage-of-COUNT-function-in-SQL-JOINS-with-without-DISTINCT/m-p/644702#M192611</guid>
      <dc:creator>s_lassen</dc:creator>
      <dc:date>2020-05-02T12:24:53Z</dc:date>
    </item>
  </channel>
</rss>

