<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Removing Duplicate Dates with conditions in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646453#M193385</link>
    <description>&lt;P&gt;Its not clear to me what exactly you want to check.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;if it is to select the last date just change:&lt;/P&gt;
&lt;P&gt;1) sort by &lt;STRONG&gt;date&lt;/STRONG&gt; instead by &lt;STRONG&gt;yy&lt;/STRONG&gt;&lt;/P&gt;
&lt;P&gt;2) select by if &lt;STRONG&gt;first.date&lt;/STRONG&gt; instead &lt;STRONG&gt;first.yy&amp;nbsp;&lt;/STRONG&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;or maybe you are looking for:&amp;nbsp;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;STRONG&gt;&amp;nbsp; &amp;nbsp;if first.yy or miss &amp;gt; 1;&lt;/STRONG&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Sun, 10 May 2020 04:57:12 GMT</pubDate>
    <dc:creator>Shmuel</dc:creator>
    <dc:date>2020-05-10T04:57:12Z</dc:date>
    <item>
      <title>Removing Duplicate Dates with conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646377#M193335</link>
      <description>&lt;P&gt;Hi All,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The datafile below records annual data on four firm characteristics (A,B,C and D) that has duplicate values in some years (namely 2014 for firm 1 and 2016 for firm 2). There should however be only one annual observation per firm. I need to remove the duplicate observation that occurs in those years. The criteria is to accept the most recent date if there are at least two variables (A,B, C or D) with non-missing values for that observation. Otherwise, accept the duplicate firm-year observation instead. &amp;nbsp;The example datafile is given below:&lt;/P&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;Obs&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;Firm&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;Date&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;A&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;B&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;C&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;D&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;1&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;1&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;30/6/2014&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;1.6&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;3.6&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;2&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;1&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;31/12/2014&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;1.2&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;3.1&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;3.0&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;5.6&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;3&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;1&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;31/12/2015&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;1.7&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;2.5&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;2.3&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;5.1&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;4&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;1&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;31/12/2016&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;1.4&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;2.3&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;1.9&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;6.0&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;5&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;2&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;31/12/2014&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;6.2&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;5.7&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;7.2&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;8.0&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;6&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;2&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;31/12/2015&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;5.8&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;6.2&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;7.0&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;8.2&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;7&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;2&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;31/3/2016&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;5.9&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;6.4&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;6.5&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;8.9&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;8&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;2&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;30/9/2016&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;6.5&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;7.0&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Based on the critera, obs 2 should be chosen for firm 1 since it is the most recent date and there are more than two variables without missing values. &amp;nbsp;For firm 2, obs 7 should be chosen since obs 8 with the most recent date, has values missing for more than 2 variables, while values for all four variables are available for obs 7. &amp;nbsp;The resulting output should look like:&lt;/P&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;Firm&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;Date&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;A&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;B&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;C&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;D&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;1&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;31/12/2014&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;1.2&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;3.1&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;3.0&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;5.6&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;1&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;31/12/2015&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;1.7&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;2.5&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;2.3&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;5.1&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;1&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;31/12/2016&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;1.4&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;2.3&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;1.9&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;6.0&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;2&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;31/12/2014&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;6.2&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;5.7&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;7.2&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;8.0&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;2&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;31/12/2015&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;5.8&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;6.2&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;7.0&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;8.2&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;2&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;31/3/2016&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;5.9&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;6.4&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;6.5&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;&lt;P&gt;8.9&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I tried to adapt a code that I found (see below&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data test1;
	set test;
	year=year(date);
run;
data test2;
	set test1;
	by firm year;
	retain pre_date; drop prev_date;
	if first.firm then do;
		prev_year=year;
		output;
		end; else;
	if year-prev_year&amp;gt;1 then do;
	output;
	end; else delete;
run;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;but could not get the result I wanted. Would appreciate your help please. Thanks.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sat, 09 May 2020 13:11:27 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646377#M193335</guid>
      <dc:creator>sjm</dc:creator>
      <dc:date>2020-05-09T13:11:27Z</dc:date>
    </item>
    <item>
      <title>Re: Removing Duplicate Dates with conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646380#M193338</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/327352"&gt;@sjm&lt;/a&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Just a clarification:&amp;nbsp;&lt;SPAN&gt;&amp;nbsp;for firm 2, you says that "&lt;EM&gt;obs 7 should be chosen since obs 8 with the most recent date, has values missing for more than 2 variables&lt;/EM&gt;" -&amp;gt; but obs 7 has only 2 missing values.&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;Is the rule to consider the most recent date if &amp;nbsp;0 or 1 value max are missing?&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;Best,&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Sat, 09 May 2020 13:22:41 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646380#M193338</guid>
      <dc:creator>ed_sas_member</dc:creator>
      <dc:date>2020-05-09T13:22:41Z</dc:date>
    </item>
    <item>
      <title>Re: Removing Duplicate Dates with conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646383#M193341</link>
      <description>Thanks for your question. Sorry about that! It should be missing more than one. But the main idea of the rule is to try to use the most recent observation as much as possible unless that observation has too many variables with missing values, in which case the duplicate is chosen.</description>
      <pubDate>Sat, 09 May 2020 13:46:41 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646383#M193341</guid>
      <dc:creator>sjm</dc:creator>
      <dc:date>2020-05-09T13:46:41Z</dc:date>
    </item>
    <item>
      <title>Re: Removing Duplicate Dates with conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646390#M193345</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/327352"&gt;@sjm&lt;/a&gt;&amp;nbsp; Assuming I understand your requirement, here is a solution&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;
data have;
input Obs	Firm	Date :ddmmyy10.	A	B	C	D;
format date ddmmyy10.;
cards;
1	1	30/6/2014	1.6	.	3.6	.
2	1	31/12/2014	1.2	3.1	3	5.6
3	1	31/12/2015	1.7	2.5	2.3	5.1
4	1	31/12/2016	1.4	2.3	1.9	6
5	2	31/12/2014	6.2	5.7	7.2	8
6	2	31/12/2015	5.8	6.2	7	8.2
7	2	31/3/2016	5.9	6.4	6.5	8.9
8	2	30/9/2016	6.5	7	.	.
;


data want;
 do _n_=1 by 1 until(last.date);
  set have;
  by firm date groupformat;
  format date year.;
  array t A	B	C	D;
  if n(of t(*))&amp;gt;=_n then do;
    _iorc_=date;
	_n= n(of t(*));
  end;
 end;
 do _n_=1 to _n_;
  set have;
  if date=_iorc_ then output;
 end;
 _iorc_=.;
 drop _:;
run;
proc datasets lib=work noprint;
 modify want;
 format date ddmmyy10.;
run;

proc print noobs;run;

 &lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Sat, 09 May 2020 18:24:59 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646390#M193345</guid>
      <dc:creator>novinosrin</dc:creator>
      <dc:date>2020-05-09T18:24:59Z</dc:date>
    </item>
    <item>
      <title>Re: Removing Duplicate Dates with conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646391#M193346</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/327352"&gt;@sjm&lt;/a&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Another question comes up to my mind: what if you have several observations for 1 company and a specific year, and all fo them have 2 or more missing values? Should we take the latest date?&lt;/P&gt;
&lt;P&gt;I have added some use cases in the datalines accordingly to clarify my purpose.&lt;/P&gt;
&lt;P&gt;Here is an SQL code that will give the result. For the mentioned use case, it takes the max value (cf. comment)&lt;/P&gt;
&lt;P&gt;Best,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
	infile datalines dlm="," dsd missover;
	input Obs Firm Date:DDMMYY10. A B C D;
	format date DDMMYY10.;
	datalines;
1,1,30/6/2014,1.6,.,3.6,.
2,1,31/12/2014,1.2,3.1,3.0,5.6
3,1,31/12/2015,.,4,2.3,5.1
4,1,31/12/2016,1.4,2.3,1.9,6.0
5,2,31/12/2014,6.2,5.7,7.2,8.0
6,2,30/12/2015,.,.,7.0,.
7,2,31/12/2015,.,.,7.0,8.2
8,2,31/3/2016,5.9,6.4,6.5,8.9
9,2,28/4/2016,5.9,6.4,6.5,8.9
10,2,30/9/2016,6.5,7.0,7,.
;
run;

proc sql;
	
	create table want as

	/****************************/
	/*obs with 1 record per year*/
	select *
	from have
	group by Firm, year(date)
	having count(year(date))=1
	/****************************/

	union all corr

	/*********************************************************************/
	/*obs with &amp;gt;1 record per year and all records with &amp;gt; 1 missing values*/	
	select a.*
	from have as a right join
		(select Firm, max(date) as Date2 format=DDMMYY10. /* Min Max ??*/
		from have
		group by Firm, year(date)
		having count(year(date))&amp;gt;1 and min(nmiss(A,B,C,D)) &amp;gt; 1) as b
		on a.Firm=b.Firm and a.Date=b.Date2
	/*********************************************************************/

	union all corr

	/*********************************************************************/
	/*obs with &amp;gt;1 record per year and at least one record with 0 or 1 missing value*/
	select a.*
	from have as a right join
		(select Firm, max(date) as Date2 format=DDMMYY10.
		from (select *
			  from have
			  group by Firm, year(date)
	     	  having count(year(date))&amp;gt;1 and min(nmiss(A,B,C,D)) &amp;lt;= 1)
	     where nmiss(A,B,C,D)&amp;lt;= 1
	     group by Firm, year(date)) as b
	     on a.Firm=b.Firm and a.Date=b.Date2
	/*********************************************************************/
	
	 order by obs;
	 
quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sat, 09 May 2020 15:08:20 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646391#M193346</guid>
      <dc:creator>ed_sas_member</dc:creator>
      <dc:date>2020-05-09T15:08:20Z</dc:date>
    </item>
    <item>
      <title>Re: Removing Duplicate Dates with conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646399#M193348</link>
      <description>&lt;P&gt;Check next code:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
infile cards truncover dlm='09'x;
input Obs Firm Date :ddmmyy10. A B C D;
yy = year(date);
format date ddmmyy10.;
cards;
1	1	30/6/2014	1.6	.	3.6	.
2	1	31/12/2014	1.2	3.1	3	5.6
3	1	31/12/2015	1.7	2.5	2.3	5.1
4	1	31/12/2016	1.4	2.3	1.9	6
5	2	31/12/2014	6.2	5.7	7.2	8
6	2	31/12/2015	5.8	6.2	7	8.2
7	2	31/3/2016	5.9	6.4	6.5	8.9
8	2	30/9/2016	6.5	7	.	.
;
run;

data temp;
 set have;
     miss = nmiss(a,b,c,d);
run;
proc sort data=temp; by descending yy miss; run;

data want;
 set temp;
  by descending yy;
     if first.yy or miss=0;
     drop yy;
run;
proc sort data=want; by date; run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Sat, 09 May 2020 15:27:19 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646399#M193348</guid>
      <dc:creator>Shmuel</dc:creator>
      <dc:date>2020-05-09T15:27:19Z</dc:date>
    </item>
    <item>
      <title>Re: Removing Duplicate Dates with conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646451#M193384</link>
      <description>&lt;P&gt;Thank you for your suggestion.&amp;nbsp; Could I check whether the second part of the code that selects "obs with 1 record per year and at least one record with 0 or 1 missing value" also captures the situation where there are three same-year observations for the firm that all have no missing values.&amp;nbsp; The rule in such a case would be to pick the observation with max(date).&amp;nbsp; &amp;nbsp; &amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 10 May 2020 04:32:36 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646451#M193384</guid>
      <dc:creator>sjm</dc:creator>
      <dc:date>2020-05-10T04:32:36Z</dc:date>
    </item>
    <item>
      <title>Re: Removing Duplicate Dates with conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646453#M193385</link>
      <description>&lt;P&gt;Its not clear to me what exactly you want to check.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;if it is to select the last date just change:&lt;/P&gt;
&lt;P&gt;1) sort by &lt;STRONG&gt;date&lt;/STRONG&gt; instead by &lt;STRONG&gt;yy&lt;/STRONG&gt;&lt;/P&gt;
&lt;P&gt;2) select by if &lt;STRONG&gt;first.date&lt;/STRONG&gt; instead &lt;STRONG&gt;first.yy&amp;nbsp;&lt;/STRONG&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;or maybe you are looking for:&amp;nbsp;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;STRONG&gt;&amp;nbsp; &amp;nbsp;if first.yy or miss &amp;gt; 1;&lt;/STRONG&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 10 May 2020 04:57:12 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646453#M193385</guid>
      <dc:creator>Shmuel</dc:creator>
      <dc:date>2020-05-10T04:57:12Z</dc:date>
    </item>
    <item>
      <title>Re: Removing Duplicate Dates with conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646458#M193390</link>
      <description>&lt;P&gt;Thank you for your suggestion, Shmuel.&amp;nbsp; Regarding your question: what I wanted was to select one annual observation from two or more annual observations on the same firm. The criteria was based on, most importantly, the observation with the most recent date. However, if the observation did not have much information i.e. in terms of having variables with missing values, then I would use one of the other two dates within the same year which had more information i.e. more variables with non-missing values.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 10 May 2020 05:45:15 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646458#M193390</guid>
      <dc:creator>sjm</dc:creator>
      <dc:date>2020-05-10T05:45:15Z</dc:date>
    </item>
    <item>
      <title>Re: Removing Duplicate Dates with conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646460#M193391</link>
      <description>&lt;P&gt;The posted code fits the results you asked.&lt;/P&gt;
&lt;P&gt;In case there is some year not fitting what you want,&lt;/P&gt;
&lt;P&gt;please point to that line or add new lines to the test example.&lt;/P&gt;</description>
      <pubDate>Sun, 10 May 2020 05:58:54 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646460#M193391</guid>
      <dc:creator>Shmuel</dc:creator>
      <dc:date>2020-05-10T05:58:54Z</dc:date>
    </item>
    <item>
      <title>Re: Removing Duplicate Dates with conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646471#M193399</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/327352"&gt;@sjm&lt;/a&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Thank you for the quick reply.&lt;/P&gt;
&lt;P&gt;I have added such a case in the below data (Firm2, 2017-&amp;gt; records 11, 12, 13):&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
	infile datalines dlm="," dsd missover;
	input Obs Firm Date:DDMMYY10. A B C D;
	format date DDMMYY10.;
	datalines;
1,1,30/6/2014,1.6,.,3.6,.
2,1,31/12/2014,1.2,3.1,3.0,5.6
3,1,31/12/2015,.,4,2.3,5.1
4,1,31/12/2016,1.4,2.3,1.9,6.0
5,2,31/12/2014,6.2,5.7,7.2,8.0
6,2,30/12/2015,.,.,7.0,.
7,2,31/12/2015,.,.,7.0,8.2
8,2,31/3/2016,5.9,6.4,6.5,8.9
9,2,28/4/2016,5.9,6.4,6.5,8.9
10,2,30/9/2016,6.5,7.0,7,.
11,2,28/1/2017,5,6.4,6.5,8.9
12,2,28/2/2017,9,6.4,6.5,8.9
13,2,28/4/2017,8,6.4,6.5,8.9
;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;The programs correctly picks up record=13:&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Capture d’écran 2020-05-10 à 09.59.44.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/39247iE085A97F2FD2F4A2/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Capture d’écran 2020-05-10 à 09.59.44.png" alt="Capture d’écran 2020-05-10 à 09.59.44.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;Best, &lt;/P&gt;</description>
      <pubDate>Sun, 10 May 2020 08:00:08 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646471#M193399</guid>
      <dc:creator>ed_sas_member</dc:creator>
      <dc:date>2020-05-10T08:00:08Z</dc:date>
    </item>
    <item>
      <title>Re: Removing Duplicate Dates with conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646485#M193405</link>
      <description>&lt;P&gt;I suppose you want to select lines per year per farm, so I modified the program:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
	infile datalines dlm="," dsd missover;
	input Obs Firm Date:DDMMYY10. A B C D;
	format date DDMMYY10.;
	yy = year(date);
	datalines;
1,1,30/6/2014,1.6,.,3.6,.
2,1,31/12/2014,1.2,3.1,3.0,5.6
3,1,31/12/2015,.,4,2.3,5.1
4,1,31/12/2016,1.4,2.3,1.9,6.0
5,2,31/12/2014,6.2,5.7,7.2,8.0
6,2,30/12/2015,.,.,7.0,.
7,2,31/12/2015,.,.,7.0,8.2
8,2,31/3/2016,5.9,6.4,6.5,8.9
9,2,28/4/2016,5.9,6.4,6.5,8.9
10,2,30/9/2016,6.5,7.0,7,.
11,2,28/1/2017,5,6.4,6.5,8.9
12,2,28/2/2017,9,6.4,6.5,8.9
13,2,28/4/2017,8,6.4,6.5,8.9
;
run;


data temp;
 set have;
     miss = nmiss(a,b,c,d);
run;
proc sort data=temp; by firm descending yy miss; run;

data want;
 set temp;
  by firm descending yy;
     if first.yy or miss=0;
     drop yy;
run;
proc sort data=want; by firm date; run;
     &lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;I don't understand why you selected for &lt;STRONG&gt;firm=2 year=2016&lt;/STRONG&gt; - line 10 and not lines 8 and 9.&lt;/P&gt;
&lt;P&gt;line 10 has one missing value while lines 8 and 9 are full.&lt;/P&gt;</description>
      <pubDate>Sun, 10 May 2020 10:17:45 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646485#M193405</guid>
      <dc:creator>Shmuel</dc:creator>
      <dc:date>2020-05-10T10:17:45Z</dc:date>
    </item>
    <item>
      <title>Re: Removing Duplicate Dates with conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646486#M193406</link>
      <description>In case of any issue please post the result lined and the expected lines marking the difference and explain why you selected the specific line(s) instead.</description>
      <pubDate>Sun, 10 May 2020 10:21:19 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646486#M193406</guid>
      <dc:creator>Shmuel</dc:creator>
      <dc:date>2020-05-10T10:21:19Z</dc:date>
    </item>
    <item>
      <title>Re: Removing Duplicate Dates with conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646501#M193417</link>
      <description>Line 10 was selected because it has the most recent date and it meets the criteria that there is at least more than 1 variable with a nonmissing value. More recent dates are preferred.</description>
      <pubDate>Sun, 10 May 2020 13:22:27 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646501#M193417</guid>
      <dc:creator>sjm</dc:creator>
      <dc:date>2020-05-10T13:22:27Z</dc:date>
    </item>
    <item>
      <title>Re: Removing Duplicate Dates with conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646502#M193418</link>
      <description>&lt;P&gt;One way is to first figure out the minimum number of missing values for that firm for that year.&amp;nbsp; Then you can pick the most recent date with that number of missing values.&lt;/P&gt;
&lt;P&gt;Let's start by turning your listing into data.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
  input firm date :ddmmyy. a b c d;
  format date yymmdd10.;
cards;
1 30/06/2014 1.6   . 3.6 .
1 31/12/2014 1.2 3.1   3 5.6
1 31/12/2015 1.7 2.5 2.3 5.1
1 31/12/2016 1.4 2.3 1.9 6
2 31/12/2014 6.2 5.7 7.2 8
2 31/12/2015 5.8 6.2   7 8.2
2 31/03/2016 5.9 6.4 6.5 8.9
2 30/09/2016 6.5   7   . .
;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Now we can use an SQL step to make the new variables and calculate the min missing and also do the sorting.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
create table step1 as 
  select *
       , year(date) as yr
       , nmiss(a,b,c,d) as nmiss
       , min(calculated nmiss) as min_miss
  from have
  group by firm,yr
  order by firm,yr,date desc 
;
quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Now we just need to find the first place where nmiss matches min_miss and output that record only.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;
  set step1;
  by firm yr;
  if first.yr then found=0;
  if not found and min_miss=nmiss then do;
    found=1;
    output;
  end;
  retain found;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Sun, 10 May 2020 13:46:20 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646502#M193418</guid>
      <dc:creator>Tom</dc:creator>
      <dc:date>2020-05-10T13:46:20Z</dc:date>
    </item>
    <item>
      <title>Re: Removing Duplicate Dates with conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646504#M193420</link>
      <description>&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/159"&gt;@Tom&lt;/a&gt;, I like your approach but your code &lt;BR /&gt;selects obs 9 (apr 2016) instead obs 10 (sep 2016)</description>
      <pubDate>Sun, 10 May 2020 14:11:19 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646504#M193420</guid>
      <dc:creator>Shmuel</dc:creator>
      <dc:date>2020-05-10T14:11:19Z</dc:date>
    </item>
    <item>
      <title>Re: Removing Duplicate Dates with conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646506#M193422</link>
      <description>&lt;P&gt;Next code results fit your expected:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
	infile datalines dlm="," dsd missover;
	input Obs Firm Date:DDMMYY10. A B C D;
	format date DDMMYY10.;
	yy = year(date);
	datalines;
1,1,30/6/2014,1.6,.,3.6,.
2,1,31/12/2014,1.2,3.1,3.0,5.6
3,1,31/12/2015,.,4,2.3,5.1
4,1,31/12/2016,1.4,2.3,1.9,6.0
5,2,31/12/2014,6.2,5.7,7.2,8.0
6,2,30/12/2015,.,.,7.0,.
7,2,31/12/2015,.,.,7.0,8.2
8,2,31/3/2016,5.9,6.4,6.5,8.9
9,2,28/4/2016,5.9,6.4,6.5,8.9
10,2,30/9/2016,6.5,7.0,7,.
11,2,28/1/2017,5,6.4,6.5,8.9
12,2,28/2/2017,9,6.4,6.5,8.9
13,2,28/4/2017,8,6.4,6.5,8.9
;
run;


data temp1;
 set have;
     miss = nmiss(a,b,c,d);
run;
proc sort data=temp1; by firm descending yy miss; run;

data temp2;
 set temp1;
  by firm descending yy;
     if first.yy and last.yy or miss &amp;lt; 3 
        then flag_sel = 1;
        else flag_sel=0;
run;
proc sort data=temp2; by firm date flag_sel; run;

data want;
 set temp2 (where=(flag_sel=1));
  by firm yy;
     if last.yy;&lt;BR /&gt;     drop flag_sel yy;
run;
&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Sun, 10 May 2020 14:24:18 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646506#M193422</guid>
      <dc:creator>Shmuel</dc:creator>
      <dc:date>2020-05-10T14:24:18Z</dc:date>
    </item>
    <item>
      <title>Re: Removing Duplicate Dates with conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646527#M193437</link>
      <description>&lt;P&gt;If you want to allow selecting an observations with more than the actual minimum number of missing values just change the logic of finding the first date.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;  if not found and max(1,min_miss)&amp;gt;=nmiss then do;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 10 May 2020 16:34:11 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646527#M193437</guid>
      <dc:creator>Tom</dc:creator>
      <dc:date>2020-05-10T16:34:11Z</dc:date>
    </item>
    <item>
      <title>Re: Removing Duplicate Dates with conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646633#M193471</link>
      <description>&lt;P&gt;Thank you for your solution.&amp;nbsp; It works very well!&amp;nbsp;&lt;/P&gt;&lt;P&gt;I tried using the same code (modified slightly) to eliminate duplicates for a quarterly data set but ran into some difficulties and would be grateful for your help. &amp;nbsp;&lt;/P&gt;&lt;P&gt;Unlike the other dataset that had yearly observations with different dates, the duplicates in this quarterly dataset have &lt;U&gt;identical dates&lt;/U&gt;, so I was not able to use MAX(.) efficiently in the code.&lt;/P&gt;&lt;P&gt;For the quarterly dataset, the rule to select the correct date, is that the observation must have more variables with non-missing values. For duplicates with the same number of non-missing variables, the rule is to select the observation where DATAFQTR has the same calendar year as DATADATE. DATAFQTR is however a character variable.&lt;/P&gt;&lt;P&gt;I have attached the code and a sample of the SAS datafile that highlights these problem for two firms. They are identified by their respective GVKEY’s.&amp;nbsp;&amp;nbsp;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Notice that firm 206883 has duplicate observations (17 and 18) that differ only for DATAFQTR. &amp;nbsp;Here, observation 17 should be chosen since its DATAFQTR has the same calendar year as DATADATE (i.e. 2009). &amp;nbsp;For observations 21 and 22 for the same firm, observation 21 should be chosen since it has more variables with non-missing values. &amp;nbsp;Here, DATAFTQR has the same calendar date as DATADATE.&lt;/P&gt;&lt;P&gt;Firm 212151 has duplicate observations 49 and 50.&amp;nbsp; In this case, observation 49 should be chosen since it has more variables with non-missing values even if DATAFQTR does not have the same calendar date as DATAFQTR.&lt;/P&gt;&lt;P&gt;The variable DATAFQTR could possibly help to solve this problem but I am not sure how to use it here.&amp;nbsp; Thanks.&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
	
	create table want as

	/****************************/
	/*obs with 1 record per year*/
	select *
	from have
	group by Firm, year(date)
	having count(year(date))=1
	/****************************/

	union all corr

	/*********************************************************************/
	/*obs with &amp;gt;1 record per year and all records with &amp;gt; 1 missing values*/	
	select a.*
	from have as a right join
		(select Firm, max(date) as Date2 format=DDMMYY10. /* Min Max ??*/
		from have
		group by Firm, year(date)
		having count(year(date))&amp;gt;1 and min(nmiss(A,B,C,D)) &amp;gt; 1) as b
		on a.Firm=b.Firm and a.Date=b.Date2
	/*********************************************************************/

	union all corr

	/*********************************************************************/
	/*obs with &amp;gt;1 record per year and at least one record with 0 or 1 missing value*/
	select a.*
	from have as a right join
		(select Firm, max(date) as Date2 format=DDMMYY10.
		from (select *
			  from have
			  group by Firm, year(date)
	     	  having count(year(date))&amp;gt;1 and min(nmiss(A,B,C,D)) &amp;lt;= 1)
	     where nmiss(A,B,C,D)&amp;lt;= 1
	     group by Firm, year(date)) as b
	     on a.Firm=b.Firm and a.Date=b.Date2
	/*********************************************************************/
	
	 order by obs;
	 
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 11 May 2020 11:09:50 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646633#M193471</guid>
      <dc:creator>sjm</dc:creator>
      <dc:date>2020-05-11T11:09:50Z</dc:date>
    </item>
    <item>
      <title>Re: Removing Duplicate Dates with conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646641#M193474</link>
      <description>&lt;P&gt;Sorry, the wrong SAS code was attached. Please use this one instead. Thanks.&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;	
	create table temp5 as
	/****************************/
	/*obs with 1 record per year*/
	select *
	from testfile
	group by gvkey, year(datadate), qtr(datadate)
	having count(qtr(datadate))=1
	/****************************/
	union all corr
	/*********************************************************************/
	/*obs with &amp;gt;1 record per year and all records with &amp;gt; 1 missing values*/	
	select a.*
	from testfile as a right join
		(select gvkey, max(datadate) as Date2 format=DDMMYY10. /* Min Max ??*/
		from testfile
		group by gvkey, year(datadate), qtr(datadate)
		having count(qtr(datadate))&amp;gt;1 and min(nmiss(atq,cheq,ltq,revtq)) &amp;gt; 1) as b
		on a.gvkey=b.gvkey and a.datadate=b.Date2
	/*********************************************************************/
	union all corr
	/*********************************************************************/
	/*obs with &amp;gt;1 record per year and at least one record with 0 or 1 missing value*/
	select a.*
	from testfile as a right join
		(select gvkey, max(datadate) as Date2 format=DDMMYY10.
		from (select *
			  from testfile
			  group by gvkey, year(datadate), qtr(datadate)
	     	  having count(qtr(datadate))&amp;gt;1 and min(nmiss(atq,cheq,ltq,revtq)) &amp;lt;= 1)
	     where nmiss(atq,cheq,ltq,revtq)&amp;lt;= 1
	     group by gvkey, year(datadate), qtr(datadate)) as b
	     on a.gvkey=b.gvkey and a.datadate=b.Date2;
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 11 May 2020 11:29:35 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-Duplicate-Dates-with-conditions/m-p/646641#M193474</guid>
      <dc:creator>sjm</dc:creator>
      <dc:date>2020-05-11T11:29:35Z</dc:date>
    </item>
  </channel>
</rss>

