<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Eliminating duplicate records across three datasets in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Eliminating-duplicate-records-across-three-datasets/m-p/379119#M91221</link>
    <description>&lt;P&gt;The indsname option is new to me and super cool, but this doesn't do what I want. I want to delete records common between the tables.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;So with&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data fy07_data;
   input ssn;
   datalines;
11
12
13
14
15
16
17
18
21
22
31
32
;

data fy08_data;
   input ssn;
   datalines;
11
12
21
22
23
24
25
26
27
28
33
34
;

data fy09_data;
   input ssn;
   datalines;
13
14
21
22
31
32
33
34
35
36
37
38
;


data combined;
set fy07_data fy08_data fy09_data indsname=source;
source_name = source;
run;

proc sort data=combined; 
by source_name ssn;
run;

proc sort data=combined nodupkey;
by ssn;
run;


proc print data=combined noobs; 
 var ssn source_name;
run;

&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;I get a data set of&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;                                     ssn     source_name

                                      11    WORK.FY07_DATA
                                      12    WORK.FY07_DATA
                                      13    WORK.FY07_DATA
                                      14    WORK.FY07_DATA
                                      15    WORK.FY07_DATA
                                      16    WORK.FY07_DATA
                                      17    WORK.FY07_DATA
                                      18    WORK.FY07_DATA
                                      21    WORK.FY07_DATA
                                      22    WORK.FY07_DATA
                                      23    WORK.FY08_DATA
                                      24    WORK.FY08_DATA
                                      25    WORK.FY08_DATA
                                      26    WORK.FY08_DATA
                                      27    WORK.FY08_DATA
                                      28    WORK.FY08_DATA
                                      31    WORK.FY07_DATA
                                      32    WORK.FY07_DATA
                                      33    WORK.FY08_DATA
                                      34    WORK.FY08_DATA
                                      35    WORK.FY09_DATA
                                      36    WORK.FY09_DATA
                                      37    WORK.FY09_DATA
                                      38    WORK.FY09_DATA

&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;I want a data set of&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;                                     ssn     source_name

                                      15    WORK.FY07_DATA
                                      16    WORK.FY07_DATA
                                      17    WORK.FY07_DATA
                                      18    WORK.FY07_DATA
                                      23    WORK.FY08_DATA
                                      24    WORK.FY08_DATA
                                      25    WORK.FY08_DATA
                                      26    WORK.FY08_DATA
                                      27    WORK.FY08_DATA
                                      28    WORK.FY08_DATA
                                      35    WORK.FY09_DATA
                                      36    WORK.FY09_DATA
                                      37    WORK.FY09_DATA
                                      38    WORK.FY09_DATA
&lt;/CODE&gt;&lt;/PRE&gt;</description>
    <pubDate>Tue, 25 Jul 2017 16:35:36 GMT</pubDate>
    <dc:creator>HB</dc:creator>
    <dc:date>2017-07-25T16:35:36Z</dc:date>
    <item>
      <title>Eliminating duplicate records across three datasets</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Eliminating-duplicate-records-across-three-datasets/m-p/379097#M91213</link>
      <description>&lt;P&gt;I have three datasets representing people by year.They should be unique.&amp;nbsp; If a person in in year1, I don't want them in year2 or year3.&amp;nbsp; If in year2, not in 1 or 3, if in 3 not in 1 or 2.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Unfortunately in the real world, people manage to cross neat dataset boundaries and show up wherever.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I created a field in each record to mark the year of the record and did this:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sort data=fy07_data;
	by ssn;
run;

proc sort data=fy08_data;
	by ssn;
run;

proc sort data=fy09_data;
	by ssn;
run;

data data_07_09;
	merge 	        fy07_data (in=my07) 
			fy08_data (in=my08)
			fy09_data (in=my09);
	by ssn;
run;
* I was going to do something with the in's and didn't; &lt;BR /&gt;
data data_07_09_final;
	set data_07_09;
	if myyear07flag = 1 and myyear08flag = 1 then delete;
	if myyear07flag = 1 and myyear09flag = 1 then delete;
	if myyear08flag = 1 and myyear09flag = 1 then delete;
	myawardyear = '00';
	if myyear07flag = 1 then myawardyear = '07';
	if myyear08flag = 1 then myawardyear = '08';
	if myyear09flag = 1 then myawardyear = '09';
run;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;after which I drop the myyearXXflag fields befroe outputting the dataset.&amp;nbsp; This seems somehow inelegant.&amp;nbsp; It just feels like the process could be terser and cleaner.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Is there a better way to accomplish this or is this as good as anything?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Thanks.&lt;/P&gt;</description>
      <pubDate>Tue, 25 Jul 2017 15:58:50 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Eliminating-duplicate-records-across-three-datasets/m-p/379097#M91213</guid>
      <dc:creator>HB</dc:creator>
      <dc:date>2017-07-25T15:58:50Z</dc:date>
    </item>
    <item>
      <title>Re: Eliminating duplicate records across three datasets</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Eliminating-duplicate-records-across-three-datasets/m-p/379099#M91215</link>
      <description>&lt;P&gt;I'd probably stack them together, sorting by year, removing duplicates using PROC SORT NODUPKEY and then transposing if desired. Not sure there's a reason to at this point. You don't have any other variables in the dataset? If they had the same variables they'd be overwritten.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data combined;
set fy07_data fy08_data fy09_data indsname=source;
source_name = source;
run;

proc sort data=combined; 
by id source_name;
run;

proc sort data=combined nodupkey;
by id;
run;

*transpose if desired to 'wide' format, but assuming the data is the same what happens to the other variables?;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Tue, 25 Jul 2017 15:57:38 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Eliminating-duplicate-records-across-three-datasets/m-p/379099#M91215</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2017-07-25T15:57:38Z</dc:date>
    </item>
    <item>
      <title>Re: Eliminating duplicate records across three datasets</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Eliminating-duplicate-records-across-three-datasets/m-p/379119#M91221</link>
      <description>&lt;P&gt;The indsname option is new to me and super cool, but this doesn't do what I want. I want to delete records common between the tables.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;So with&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data fy07_data;
   input ssn;
   datalines;
11
12
13
14
15
16
17
18
21
22
31
32
;

data fy08_data;
   input ssn;
   datalines;
11
12
21
22
23
24
25
26
27
28
33
34
;

data fy09_data;
   input ssn;
   datalines;
13
14
21
22
31
32
33
34
35
36
37
38
;


data combined;
set fy07_data fy08_data fy09_data indsname=source;
source_name = source;
run;

proc sort data=combined; 
by source_name ssn;
run;

proc sort data=combined nodupkey;
by ssn;
run;


proc print data=combined noobs; 
 var ssn source_name;
run;

&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;I get a data set of&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;                                     ssn     source_name

                                      11    WORK.FY07_DATA
                                      12    WORK.FY07_DATA
                                      13    WORK.FY07_DATA
                                      14    WORK.FY07_DATA
                                      15    WORK.FY07_DATA
                                      16    WORK.FY07_DATA
                                      17    WORK.FY07_DATA
                                      18    WORK.FY07_DATA
                                      21    WORK.FY07_DATA
                                      22    WORK.FY07_DATA
                                      23    WORK.FY08_DATA
                                      24    WORK.FY08_DATA
                                      25    WORK.FY08_DATA
                                      26    WORK.FY08_DATA
                                      27    WORK.FY08_DATA
                                      28    WORK.FY08_DATA
                                      31    WORK.FY07_DATA
                                      32    WORK.FY07_DATA
                                      33    WORK.FY08_DATA
                                      34    WORK.FY08_DATA
                                      35    WORK.FY09_DATA
                                      36    WORK.FY09_DATA
                                      37    WORK.FY09_DATA
                                      38    WORK.FY09_DATA

&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;I want a data set of&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;                                     ssn     source_name

                                      15    WORK.FY07_DATA
                                      16    WORK.FY07_DATA
                                      17    WORK.FY07_DATA
                                      18    WORK.FY07_DATA
                                      23    WORK.FY08_DATA
                                      24    WORK.FY08_DATA
                                      25    WORK.FY08_DATA
                                      26    WORK.FY08_DATA
                                      27    WORK.FY08_DATA
                                      28    WORK.FY08_DATA
                                      35    WORK.FY09_DATA
                                      36    WORK.FY09_DATA
                                      37    WORK.FY09_DATA
                                      38    WORK.FY09_DATA
&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Tue, 25 Jul 2017 16:35:36 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Eliminating-duplicate-records-across-three-datasets/m-p/379119#M91221</guid>
      <dc:creator>HB</dc:creator>
      <dc:date>2017-07-25T16:35:36Z</dc:date>
    </item>
    <item>
      <title>Re: Eliminating duplicate records across three datasets</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Eliminating-duplicate-records-across-three-datasets/m-p/379358#M91310</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data fy07_data;
   input ssn;
   datalines;
11
12
13
14
15
16
17
18
21
22
31
32
;

data fy08_data;
   input ssn;
   datalines;
11
12
21
22
23
24
25
26
27
28
33
34
;

data fy09_data;
   input ssn;
   datalines;
13
14
21
22
31
32
33
34
35
36
37
38
;


data combined;
set fy07_data fy08_data fy09_data indsname=source;
source_name = source;
run;
proc sql;
select distinct source_name,ssn
 from combined
  group by ssn 
   having count(distinct source_name)=1;
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 26 Jul 2017 12:23:41 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Eliminating-duplicate-records-across-three-datasets/m-p/379358#M91310</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2017-07-26T12:23:41Z</dc:date>
    </item>
    <item>
      <title>Re: Eliminating duplicate records across three datasets</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Eliminating-duplicate-records-across-three-datasets/m-p/379361#M91312</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;
proc sql;
(
select * from fy07_data 
except 
(select * from fy08_data union select * from fy09_data )
)

union

(
select * from  fy08_data
except 
(select * from fy07_data union select * from fy09_data )
)

union

(
select * from  fy09_data
except 
(select * from fy08_data union select * from fy07_data )
)
;
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 26 Jul 2017 12:31:39 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Eliminating-duplicate-records-across-three-datasets/m-p/379361#M91312</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2017-07-26T12:31:39Z</dc:date>
    </item>
    <item>
      <title>Re: Eliminating duplicate records across three datasets</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Eliminating-duplicate-records-across-three-datasets/m-p/384309#M91749</link>
      <description>&lt;P&gt;Okay, with that much unioning and excepting going on, I feel better about my&lt;/P&gt;
&lt;PRE class=" language-sas"&gt;&lt;CODE class="  language-sas"&gt;&lt;SPAN class="token keyword"&gt;        if&lt;/SPAN&gt; myyear07flag &lt;SPAN class="token operator"&gt;=&lt;/SPAN&gt; &lt;SPAN class="token number"&gt;1&lt;/SPAN&gt; and myyear08flag &lt;SPAN class="token operator"&gt;=&lt;/SPAN&gt; &lt;SPAN class="token number"&gt;1&lt;/SPAN&gt; &lt;SPAN class="token keyword"&gt;then&lt;/SPAN&gt; &lt;SPAN class="token function"&gt;delete&lt;/SPAN&gt;&lt;SPAN class="token punctuation"&gt;;&lt;/SPAN&gt;
	&lt;SPAN class="token keyword"&gt;if&lt;/SPAN&gt; myyear07flag &lt;SPAN class="token operator"&gt;=&lt;/SPAN&gt; &lt;SPAN class="token number"&gt;1&lt;/SPAN&gt; and myyear09flag &lt;SPAN class="token operator"&gt;=&lt;/SPAN&gt; &lt;SPAN class="token number"&gt;1&lt;/SPAN&gt; &lt;SPAN class="token keyword"&gt;then&lt;/SPAN&gt; &lt;SPAN class="token function"&gt;delete;&lt;/SPAN&gt;&lt;/CODE&gt; &lt;/PRE&gt;
&lt;P&gt;stuff.&amp;nbsp; Lol. &lt;/P&gt;</description>
      <pubDate>Mon, 31 Jul 2017 15:11:25 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Eliminating-duplicate-records-across-three-datasets/m-p/384309#M91749</guid>
      <dc:creator>HB</dc:creator>
      <dc:date>2017-07-31T15:11:25Z</dc:date>
    </item>
  </channel>
</rss>

