<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: remove  duplicates with out sort in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748331#M235041</link>
    <description>&lt;P&gt;That's an interesting questions for an interview.&amp;nbsp; Any other good questions they asked you?&lt;/P&gt;</description>
    <pubDate>Wed, 16 Jun 2021 12:59:48 GMT</pubDate>
    <dc:creator>Quentin</dc:creator>
    <dc:date>2021-06-16T12:59:48Z</dc:date>
    <item>
      <title>remove  duplicates with out sort</title>
      <link>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748282#M235011</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data test;
input Empname $ ;
datalines;
ram
sita
ram
arjun
ram
sita
;
run;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Interview asked me a question without sorting how to remove duplicates&amp;nbsp;&lt;/P&gt;
&lt;P&gt;using above dataset scenario he said donot chage order of empnames but remove duplicates only datastep method&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 16 Jun 2021 09:30:26 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748282#M235011</guid>
      <dc:creator>BrahmanandaRao</dc:creator>
      <dc:date>2021-06-16T09:30:26Z</dc:date>
    </item>
    <item>
      <title>Re: remove  duplicates with out sort</title>
      <link>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748283#M235012</link>
      <description>&lt;P&gt;You can do that in a single pass using the hash object like this&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data test;
input Empname $ ;
datalines;
ram
sita
ram
arjun
ram
sita
;
run;

data want;
   if _N_ = 1 then do;
      dcl hash h();
      h.definekey("Empname");
      h.definedone();
   end;

   set test;

   if h.add() = 0;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 16 Jun 2021 09:33:42 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748283#M235012</guid>
      <dc:creator>PeterClemmensen</dc:creator>
      <dc:date>2021-06-16T09:33:42Z</dc:date>
    </item>
    <item>
      <title>Re: remove  duplicates with out sort</title>
      <link>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748314#M235033</link>
      <description>&lt;P&gt;You can just get a count of each name and only output where count is 1. Don't have to use hash object if you don't want to.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data test;
input Empname $ ;
datalines;
ram
sita
ram
arjun
ram
sita
;
run;

proc sort data=test;
	by empname;
run;

*get a count of each name;
data nodups;
	set test;
	by empname;
	if first.empname then count=0;
	count+1;
	*only output where count=1. if count &amp;gt;1 then it's a duplicate;
	if count=1 then output;
run;

proc print data=nodups;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 16 Jun 2021 11:50:41 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748314#M235033</guid>
      <dc:creator>tarheel13</dc:creator>
      <dc:date>2021-06-16T11:50:41Z</dc:date>
    </item>
    <item>
      <title>Re: remove  duplicates with out sort</title>
      <link>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748315#M235034</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data test;
input Empname $ ;
datalines;
ram
sita
ram
arjun
ram
sita
;
run;
data want;
 set test;
 array x{999} $ _temporary_;
 if Empname not in x then do;n+1;x{n}=Empname;output;end;
 drop n;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 16 Jun 2021 11:51:54 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748315#M235034</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2021-06-16T11:51:54Z</dc:date>
    </item>
    <item>
      <title>Re: remove  duplicates with out sort</title>
      <link>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748331#M235041</link>
      <description>&lt;P&gt;That's an interesting questions for an interview.&amp;nbsp; Any other good questions they asked you?&lt;/P&gt;</description>
      <pubDate>Wed, 16 Jun 2021 12:59:48 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748331#M235041</guid>
      <dc:creator>Quentin</dc:creator>
      <dc:date>2021-06-16T12:59:48Z</dc:date>
    </item>
    <item>
      <title>Re: remove  duplicates with out sort</title>
      <link>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748336#M235042</link>
      <description>&lt;P&gt;A lot of times interviewers have asked what is the difference between nodup and nodupkey.&lt;/P&gt;</description>
      <pubDate>Wed, 16 Jun 2021 13:09:48 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748336#M235042</guid>
      <dc:creator>tarheel13</dc:creator>
      <dc:date>2021-06-16T13:09:48Z</dc:date>
    </item>
    <item>
      <title>Re: remove  duplicates with out sort</title>
      <link>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748356#M235050</link>
      <description>&lt;P&gt;That would require sorting?&lt;/P&gt;</description>
      <pubDate>Wed, 16 Jun 2021 14:40:35 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748356#M235050</guid>
      <dc:creator>PeterClemmensen</dc:creator>
      <dc:date>2021-06-16T14:40:35Z</dc:date>
    </item>
    <item>
      <title>Re: remove  duplicates with out sort</title>
      <link>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748360#M235051</link>
      <description>&lt;BLOCKQUOTE&gt;&lt;HR /&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/265860"&gt;@BrahmanandaRao&lt;/a&gt;&amp;nbsp;wrote:&lt;BR /&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data test;
input Empname $ ;
datalines;
ram
sita
ram
arjun
ram
sita
;
run;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Interview asked me a question without sorting how to remove duplicates&amp;nbsp;&lt;/P&gt;
&lt;P&gt;using above dataset scenario he said donot chage order of empnames but remove duplicates only datastep method&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;HR /&gt;&lt;/BLOCKQUOTE&gt;
&lt;P&gt;In general a HASH (or some other method of remembering what values you have seen before) will do this.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;
  if _n_=1 then do;
   declare hash h();
   h.definekey('empname');
   h.definedone();
  end;
  set test ;
  if h.find() then do;
    output;
    h.add();
  end;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;But if the data is too large then HASH will not work (HASH needs to be in memory) as would any other DATA step only method.&amp;nbsp; In which case sorting is probably your best method. Either directly using PROC SORT or implicitly using PROC SQL code.&amp;nbsp; Just add a new variable to record the original order so it can be recreated.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data temp;
  row+1;
  set test;
run;
proc sql ;
create table want as
  select empname
  from temp
  group by empname
  having row=min(row)
  order by row
;
quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 16 Jun 2021 14:45:01 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748360#M235051</guid>
      <dc:creator>Tom</dc:creator>
      <dc:date>2021-06-16T14:45:01Z</dc:date>
    </item>
    <item>
      <title>Re: remove  duplicates with out sort</title>
      <link>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748612#M235109</link>
      <description>Hi PeterClemmensen&lt;BR /&gt;Thank you for your solution</description>
      <pubDate>Thu, 17 Jun 2021 05:21:40 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748612#M235109</guid>
      <dc:creator>BrahmanandaRao</dc:creator>
      <dc:date>2021-06-17T05:21:40Z</dc:date>
    </item>
    <item>
      <title>Re: remove  duplicates with out sort</title>
      <link>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748613#M235110</link>
      <description>Hi Sharp &lt;BR /&gt;Thank you for your solution</description>
      <pubDate>Thu, 17 Jun 2021 05:22:14 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748613#M235110</guid>
      <dc:creator>BrahmanandaRao</dc:creator>
      <dc:date>2021-06-17T05:22:14Z</dc:date>
    </item>
    <item>
      <title>Re: remove  duplicates with out sort</title>
      <link>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748651#M235133</link>
      <description>&lt;P&gt;It is so interesting .&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;1    proc sort data=sashelp.class out=x nodup;
2    by sex;
3    run;

NOTE: There were 19 observations read from the data set SASHELP.CLASS.
NOTE: 0 duplicate observations were deleted.
NOTE: The data set WORK.X has 19 observations and 5 variables.
NOTE: PROCEDURE SORT used (Total process time):
      real time           0.22 seconds
      cpu time            0.03 seconds


4
5    proc sort data=sashelp.class out=y nodupkey;
6    by sex;
7    run;

NOTE: There were 19 observations read from the data set SASHELP.CLASS.
NOTE: 17 observations with duplicate key values were deleted.
NOTE: The data set WORK.Y has 2 observations and 5 variables.
NOTE: PROCEDURE SORT used (Total process time):
      real time           0.03 seconds
      cpu time            0.01 seconds



&lt;/PRE&gt;
&lt;P&gt;It seems that "nodup" equals "noduprecs"&amp;nbsp; .&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Have other interesting question more ?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 17 Jun 2021 11:47:53 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748651#M235133</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2021-06-17T11:47:53Z</dc:date>
    </item>
    <item>
      <title>Re: remove  duplicates with out sort</title>
      <link>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748682#M235149</link>
      <description>&lt;BLOCKQUOTE&gt;&lt;HR /&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/18408"&gt;@Ksharp&lt;/a&gt;&amp;nbsp;wrote:&lt;BR /&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Have other interesting question more ?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;HR /&gt;&lt;/BLOCKQUOTE&gt;
&lt;P&gt;Many years ago, I was asked in an interview "In the macro language, what are the differences between a keyword parameter vs positional parameter, and when/why would you use one rather than the other." I thought it was a good open-ended question. Can reveal not only someone's understanding of the rules of the macro language, but also how they think about it's use, design issues, users, etc.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Sometimes I ask a similar question about SQL vs DATA step.&amp;nbsp; &amp;nbsp;Also ask "Do you prefer long narrow datasets or short wide datasets, why?"&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;When I interview, I tend to be less interested in knowledge of specific SAS features, and more interested in how they think about / understand the SAS language(s). Of course it varies by role.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I had a mentor who told me once that when he interviews people for entry-level SAS roles, the most important criterion he uses to judge them is not whether they can answer his tougher questions, it's whether they get excited when they hear the answers, and ask follow-up questions during the interview so that they can learn more.&lt;/P&gt;</description>
      <pubDate>Thu, 17 Jun 2021 13:53:20 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748682#M235149</guid>
      <dc:creator>Quentin</dc:creator>
      <dc:date>2021-06-17T13:53:20Z</dc:date>
    </item>
    <item>
      <title>Re: remove  duplicates with out sort</title>
      <link>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748864#M235248</link>
      <description>Quentin,&lt;BR /&gt; "In the macro language, what are the differences between a keyword parameter vs positional parameter, and when/why would you use one rather than the other."&lt;BR /&gt;&lt;BR /&gt;That question is tough. I have no answer. But for me , I prefer keyword ,not  position .</description>
      <pubDate>Fri, 18 Jun 2021 11:51:05 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748864#M235248</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2021-06-18T11:51:05Z</dc:date>
    </item>
    <item>
      <title>Re: remove  duplicates with out sort</title>
      <link>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748870#M235254</link>
      <description>&lt;P&gt;Agree, it's a hard one.&amp;nbsp; I got it wrong during the interview, but they still hired me. : )&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I think I said "keyword parameters are clearer, because in the call you can see the parameter name and the value."&amp;nbsp; My future boss pointed out that keyword parameters allow default values, which is a big design difference for macro developers.&amp;nbsp; I don't think I knew about default values when he interviewed me.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;So it's an open-ended question which allowed him to assess my understanding of the macro language, and also started a discussion about macro programming, which included chatting about positional vs keyword parameters from both the perspective of macro developer and macro user.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;As a developer, I use keyword parameters about 99% of the time.&amp;nbsp; One thing I like about positional parameters is that as a user, I can still choose to pass values to a positional parameter in a keyword style.&amp;nbsp; I often wish SAS functions had that flexibility.&amp;nbsp; I have to look up the parameter order for tranwrd just about every time I use it.&amp;nbsp; : )&lt;/P&gt;</description>
      <pubDate>Fri, 18 Jun 2021 12:40:04 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748870#M235254</guid>
      <dc:creator>Quentin</dc:creator>
      <dc:date>2021-06-18T12:40:04Z</dc:date>
    </item>
    <item>
      <title>Re: remove  duplicates with out sort</title>
      <link>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748873#M235255</link>
      <description>&lt;P&gt;Your boss is lucky . Now you are absolutely sas expert . I think your boss made right decision to pick you up .&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;P.S. One thing I like keyword&amp;nbsp;&lt;SPAN&gt;parameters more is you change the order of macro parameters and don't worry to pass the wrong macro parameter, and also you can delete some useless keyword&amp;nbsp;parameters when you invoke a macro .&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;But for&amp;nbsp;positional parameter, you have to obey the order of macro parameters and is unable to miss any one of them .&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 18 Jun 2021 13:02:54 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748873#M235255</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2021-06-18T13:02:54Z</dc:date>
    </item>
    <item>
      <title>Re: remove  duplicates with out sort</title>
      <link>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748894#M235264</link>
      <description>&lt;P&gt;Thanks for your kind words.&amp;nbsp; This was a boss about 20 years ago.&amp;nbsp; I was very lucky to work him.&amp;nbsp; Your boss is lucky too! (assuming you have a boss. : )&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Agree, if there are more than one or two parameters, I find trying to remember the order of positional parameters too hard.&lt;/P&gt;</description>
      <pubDate>Fri, 18 Jun 2021 14:08:23 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/748894#M235264</guid>
      <dc:creator>Quentin</dc:creator>
      <dc:date>2021-06-18T14:08:23Z</dc:date>
    </item>
    <item>
      <title>Re: remove  duplicates with out sort</title>
      <link>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/749024#M235319</link>
      <description>&lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt; &lt;BR /&gt;Sure. I have a boss ,since I am a workman .</description>
      <pubDate>Sat, 19 Jun 2021 12:03:43 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/remove-duplicates-with-out-sort/m-p/749024#M235319</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2021-06-19T12:03:43Z</dc:date>
    </item>
  </channel>
</rss>

