<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Data Step merge gives more observations than expected in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Data-Step-merge-gives-more-observations-than-expected/m-p/415837#M102053</link>
    <description>&lt;P&gt;You didn't get a note because multiple records for a by variable in one data set is quite normal in a merge. The note is only included when you have multiple records for a by variable in both datasets.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Art, CEO, AnalystFinder.com&lt;/P&gt;</description>
    <pubDate>Thu, 23 Nov 2017 14:24:44 GMT</pubDate>
    <dc:creator>art297</dc:creator>
    <dc:date>2017-11-23T14:24:44Z</dc:date>
    <item>
      <title>Data Step merge gives more observations than expected</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Data-Step-merge-gives-more-observations-than-expected/m-p/415535#M101918</link>
      <description>&lt;P&gt;I am running the below code on SAS EG 5.1 .&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;***START preparing data for merging;
data _1_ACRONYM_HUBIDS_FOUND ;
	set ACRONYM_HUBIDS_FOUND ;
	customerno = LEFT(UPCASE(GROUP_NO_FOR_ACRONYM)) ; /*GROUP_NO_FOR_ACRONYM is copied as new variable customerno be the 'merge by' variable*/
run ;

data _1_cust_groups_FINAL ;
set cust_groups_FINAL ;
customerno = LEFT(UPCASE(customerno)) ;
run ;

proc sort data=_1_ACRONYM_HUBIDS_FOUND ; by customerno ; run;
proc sort data=_1_cust_groups_FINAL; by customerno; run ;

proc sql noprint ;
%let keeplist=;
select name into :keeplist separated by ' '
from dictionary.columns
where libname='WORK'
and memname= UPCASE('_1_ACRONYM_HUBIDS_FOUND') ;
;
quit;

OPTIONS MERGENOBY=WARN MSGLEVEL=I;

DATA ACRONYM_HUBIDS_with_custgroups (keep=&amp;amp;keeplist rank customergroup)
	acronyms_not_in_top50 (keep=&amp;amp;keeplist rank customerno customergroup) /*dataset to identify RAY customers not in top 50 customergroups*/
	EXCEPTION_1;

/*format statement to order variables in output datasets*/
format 	&amp;amp;keeplist
		rank customergroup
;

MERGE _1_ACRONYM_HUBIDS_FOUND(IN=In1) _1_cust_groups_FINAL (IN=In2);
BY customerno ;

IF (In1=1 and In2=1) then output ACRONYM_HUBIDS_with_custgroups ;
IF (In1 = 1 and  In2=0 ) then output acronyms_not_in_top50 ;
RUN;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;My issue is with the results of the merge as reported where there seems to 1 record appearing in both output files of the data step merge when the output criteria (in=) are mutually exclusive.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Here's the extract from the log:&lt;/P&gt;&lt;P&gt;NOTE: There were 3894 observations read from the data set WORK._1_ACRONYM_HUBIDS_FOUND.&lt;/P&gt;&lt;P&gt;NOTE: There were 1374 observations read from the data set WORK._1_CUST_GROUPS_FINAL.&lt;/P&gt;&lt;P&gt;NOTE: The data set WORK.ACRONYM_HUBIDS_WITH_CUSTGROUPS has 3261 observations and 11 variables.&lt;/P&gt;&lt;P&gt;NOTE: The data set WORK.ACRONYMS_NOT_IN_TOP50 has 634 observations and 11 variables.&lt;/P&gt;&lt;P&gt;NOTE: The data set WORK.EXCEPTION_1 has 0 observations and 11 variables.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;3261 + 634 = 3895 , but the "left join" left dataset WORK._1_ACRONYM_HUBIDS_FOUND had only 3894 observations.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;PROC CONTENTS of both files used in the merge are provided as&amp;nbsp;PDF attachment to this post.&lt;/P&gt;</description>
      <pubDate>Wed, 22 Nov 2017 15:14:55 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Data-Step-merge-gives-more-observations-than-expected/m-p/415535#M101918</guid>
      <dc:creator>constliv</dc:creator>
      <dc:date>2017-11-22T15:14:55Z</dc:date>
    </item>
    <item>
      <title>Re: Data Step merge gives more observations than expected</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Data-Step-merge-gives-more-observations-than-expected/m-p/415538#M101920</link>
      <description>&lt;P&gt;If you provide test data which illustrates the issue, then it would be easier to help.&amp;nbsp; I can suggest using a proc sql join to simplify the code:&lt;/P&gt;
&lt;PRE&gt;proc sql;
  create table ACRONYM_HUBIDS_with_custgroups as 
  select A.*,
         B.....
  from   _1_ACRONYM_HUBIDS_FOUND A
  inner join _1_CUST_GROUPS_FINAL B
  on     A.CUSTOMERNO=B.CUSTOMER_NO;
  create table ACRONYMS_NOT_IN_TOP50 as 
  select ...
  from   ...
  where  CUSTOMERNO not in (select CUSTOMERNO from _1_CUST_GROUPS_FINAL);
quit;&lt;/PRE&gt;</description>
      <pubDate>Wed, 22 Nov 2017 15:23:47 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Data-Step-merge-gives-more-observations-than-expected/m-p/415538#M101920</guid>
      <dc:creator>RW9</dc:creator>
      <dc:date>2017-11-22T15:23:47Z</dc:date>
    </item>
    <item>
      <title>Re: Data Step merge gives more observations than expected</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Data-Step-merge-gives-more-observations-than-expected/m-p/415543#M101922</link>
      <description>&lt;P&gt;Without see your data we can only guess. My guess would be that you have a duplicate record for one customer in your _1_cust_groups_FINAL dataset.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;For example, the following code results in the unexpected number you encountered:&lt;/P&gt;
&lt;PRE&gt;DATA _1_ACRONYM_HUBIDS_FOUND;
  input customerno;
  cards;
1
2
3
4
;

DATA _1_cust_groups_FINAL;
  input customerno;
  cards;
1
3
3
4
;

DATA ACRONYM_HUBIDS_with_custgroups
	acronyms_not_in_top50
	EXCEPTION_1;
  MERGE _1_ACRONYM_HUBIDS_FOUND(IN=In1) _1_cust_groups_FINAL (IN=In2);
  BY customerno ;

  IF (In1=1 and In2=1) then output ACRONYM_HUBIDS_with_custgroups ;
  IF (In1 = 1 and  In2=0 ) then output acronyms_not_in_top50 ;
RUN;
&lt;/PRE&gt;
&lt;P&gt;Also, you create a file called EXCEPTION_1, but don't include a statement to output any records to it.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Art, CEO, AnalystFinder.com&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 22 Nov 2017 15:38:28 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Data-Step-merge-gives-more-observations-than-expected/m-p/415543#M101922</guid>
      <dc:creator>art297</dc:creator>
      <dc:date>2017-11-22T15:38:28Z</dc:date>
    </item>
    <item>
      <title>Re: Data Step merge gives more observations than expected</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Data-Step-merge-gives-more-observations-than-expected/m-p/415753#M102019</link>
      <description>&lt;P&gt;Art,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;You guessed right! Indeed there are 2 records with same customerno in my dataset _1_cust_groups_FINAL&amp;nbsp;!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;However, I am curious to know why this did not get reported in the log with a message such as 'multiple values for BY variable" (Log provided as attachment).&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 23 Nov 2017 07:09:12 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Data-Step-merge-gives-more-observations-than-expected/m-p/415753#M102019</guid>
      <dc:creator>constliv</dc:creator>
      <dc:date>2017-11-23T07:09:12Z</dc:date>
    </item>
    <item>
      <title>Re: Data Step merge gives more observations than expected</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Data-Step-merge-gives-more-observations-than-expected/m-p/415837#M102053</link>
      <description>&lt;P&gt;You didn't get a note because multiple records for a by variable in one data set is quite normal in a merge. The note is only included when you have multiple records for a by variable in both datasets.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Art, CEO, AnalystFinder.com&lt;/P&gt;</description>
      <pubDate>Thu, 23 Nov 2017 14:24:44 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Data-Step-merge-gives-more-observations-than-expected/m-p/415837#M102053</guid>
      <dc:creator>art297</dc:creator>
      <dc:date>2017-11-23T14:24:44Z</dc:date>
    </item>
  </channel>
</rss>

