<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Row by row comparison within group in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/364892#M86600</link>
    <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/12151"&gt;@Jagadishkatam&lt;/a&gt;, thanks.&amp;nbsp;I have just tried your code with the following data but&amp;nbsp;the output still does not look correct. In particular, observations with the ID&amp;nbsp;22222222 and&amp;nbsp;22222223 are&amp;nbsp;selected, whereas they&amp;nbsp;should have not been selected.&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;DATA work.sample;
    format group industry id control;
    INPUT ID Industry Group $ Control;
    DATALINES; 
    11111111 2542 AB 1
    11111112 2542 AB 1
    22222222 8062 AB 1
    22222223 8062 AC 0
;
RUN;&lt;/CODE&gt;&lt;/PRE&gt;</description>
    <pubDate>Wed, 07 Jun 2017 10:36:04 GMT</pubDate>
    <dc:creator>Yegen</dc:creator>
    <dc:date>2017-06-07T10:36:04Z</dc:date>
    <item>
      <title>Row by row comparison within group</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/364832#M86569</link>
      <description>&lt;P&gt;I am trying to move completely&amp;nbsp;to SAS (from other statistical softwares) but I am having a difficult time getting used to "row by row" operations. For example, in&amp;nbsp;this particular case,&amp;nbsp;I am trying to compare rows within a given group and keep&amp;nbsp;the rows that have not the identical&amp;nbsp;ID number but are within the same industry and group.&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;Here is my&amp;nbsp;sample:&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;DATA work.sample;
	INPUT ID Industry Group $ Control;
	DATALINES; 
	55203610 2542 AB 1
	16124130 8062 AB 1
	16124130 8062 AB 0
	80479512 2062 AB 1
	70321918 2062 AB 1
	17312410 6712 AB 1
	17311710 6712 AB 0
	74623123 2082 AC 1
	24245321 2082 AC 0
;
RUN;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Here is what the output needs to&amp;nbsp;look like:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;Data work.want;
	INPUT ID Group $;
	DATALINES; 
	80479512 AB
	70321918 AB
	17311710 AB
	17312410 AB
	74623123 AC
	24245321 AC
;
RUN;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Here is my code:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sort data=work.sample;
	by Group Industry ID;
quit;


DATA work.identified;
	set work.sample;
	by Group;
	if lag(ID)^=ID and lag(Industry)=Industry then keep=1; else keep=0;	
RUN;


DATA work.identified;
	set work.identified; 
	if keep=1;
Run;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;With that&amp;nbsp;code, I am incorrectly identifying some of the&amp;nbsp;first observations within a given group and the lagged observations that have&amp;nbsp;a subsequent non-identical ID which&amp;nbsp;is within the same group and industry. For instance, with the code above, I have misidentified &lt;STRONG&gt;70321918&lt;/STRONG&gt;, &lt;STRONG&gt;17311710&lt;/STRONG&gt;, and &lt;STRONG&gt;24245321&lt;/STRONG&gt;.&lt;BR /&gt;&lt;BR /&gt;Any suggestions / help will be greatly appreciated.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 07 Jun 2017 05:58:07 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/364832#M86569</guid>
      <dc:creator>Yegen</dc:creator>
      <dc:date>2017-06-07T05:58:07Z</dc:date>
    </item>
    <item>
      <title>Re: Row by row comparison within group</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/364836#M86573</link>
      <description>&lt;P&gt;Next cofe will give you the wanted results:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;DATA work.sample;
    format group industry id control;
    INPUT ID Industry Group $ Control;
    DATALINES; 
    55203610 2542 AB 1
    16124130 8062 AB 1
    16124130 8062 AB 0
    80479512 2062 AB 1
    70321918 2062 AB 1
    17312410 6712 AB 1
    17311710 6712 AB 0
    74623123 2082 AC 1
    24245321 2082 AC 0
;
RUN;
proc sort data=work.sample;
    by Group Industry ID;
quit;

DATA work.identified;
    set work.sample;
    by Group industry ID;
    retain lagID;
    if not (first.industry and last.industry);
    if first.id and last.id;
    if ID ^= lagID then output; 
    lagID=ID;
RUN; 
&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 07 Jun 2017 06:30:44 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/364836#M86573</guid>
      <dc:creator>Shmuel</dc:creator>
      <dc:date>2017-06-07T06:30:44Z</dc:date>
    </item>
    <item>
      <title>Re: Row by row comparison within group</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/364839#M86575</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
create table want as select id,group from sample where industry in (select distinct industry from sample group by industry having count(Industry)&amp;gt;1) group by industry,id having count( Industry)&amp;lt;=1 order by group;
quit;
&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 07 Jun 2017 06:33:13 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/364839#M86575</guid>
      <dc:creator>Jagadishkatam</dc:creator>
      <dc:date>2017-06-07T06:33:13Z</dc:date>
    </item>
    <item>
      <title>Re: Row by row comparison within group</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/364844#M86578</link>
      <description>&lt;P&gt;Thanks&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/88384"&gt;@Shmuel&lt;/a&gt;, this is a great way of identifying the observations! Although the code does exactly what I was looking for, I am not quiet sure why you had to create a new column of lagged variables that are not within the same group or industry. Could you please elaborate on that line?&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 07 Jun 2017 06:58:04 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/364844#M86578</guid>
      <dc:creator>Yegen</dc:creator>
      <dc:date>2017-06-07T06:58:04Z</dc:date>
    </item>
    <item>
      <title>Re: Row by row comparison within group</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/364845#M86579</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/12151"&gt;@Jagadishkatam&lt;/a&gt;, when I run your code on the following sample, the output does not look correct. In particular, the observation with the ID&amp;nbsp;17311711 should have not been selected. Do you know where the mistake is happening? I would also like to know how to write this code using PROC SQL. Thanks.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;DATA work.sample;
    format group industry id control;
    INPUT ID Industry Group $ Control;
    DATALINES; 
    55203610 2542 AB 1
    16124130 8062 AB 1
    16124130 8062 AB 0
    80479512 2062 AB 1
    70321918 2062 AB 1 
    17312410 6712 AB 1
    17311710 6712 AB 0
    17311711 2082 AB 0
    74623123 2082 AC 1
    24245321 2082 AC 0
;
RUN;


proc sql;
  create table want as 
  select id,group 
  from sample 
  where industry in (select distinct industry from sample group by industry having count(Industry                     )&amp;gt;1) 
  group by industry,id 
  having count( Industry)&amp;lt;=1 
  order by group;
quit;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 07 Jun 2017 07:05:02 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/364845#M86579</guid>
      <dc:creator>Yegen</dc:creator>
      <dc:date>2017-06-07T07:05:02Z</dc:date>
    </item>
    <item>
      <title>Re: Row by row comparison within group</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/364847#M86580</link>
      <description>&lt;BLOCKQUOTE&gt;&lt;HR /&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/144925"&gt;@Yegen&lt;/a&gt; wrote:&lt;BR /&gt;
&lt;P&gt;Thanks&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/88384"&gt;@Shmuel&lt;/a&gt;, this is a great way of identifying the observations! Although the code does exactly what I was looking for, I am not quiet sure why you had to create a new column of lagged variables that are not within the same group or industry. Could you please elaborate on that line?&amp;nbsp;&lt;/P&gt;
&lt;HR /&gt;&lt;/BLOCKQUOTE&gt;
&lt;P&gt;LAG function gives sometimes false result when it is part of IF statement,&lt;/P&gt;
&lt;P&gt;therefore I retain the value into lagID for next itteration.&lt;/P&gt;
&lt;P&gt;You can drop this variable. I left it just for control.&lt;/P&gt;</description>
      <pubDate>Wed, 07 Jun 2017 07:12:23 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/364847#M86580</guid>
      <dc:creator>Shmuel</dc:creator>
      <dc:date>2017-06-07T07:12:23Z</dc:date>
    </item>
    <item>
      <title>Re: Row by row comparison within group</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/364852#M86583</link>
      <description>&lt;P&gt;Exactly, that's&amp;nbsp;where I was facing an issue. Thanks for this clarification &lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/88384"&gt;@Shmuel&lt;/a&gt;, I have just added a few comments and a question to the code below:&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;DATA work.identified;
    set work.sample;
    by Group industry ID;
    retain lagID;
    if not (first.industry and last.industry); *Excludes unique observations within industry and group;
    if first.id and last.id; *Question: &lt;STRONG&gt;Not sure what this line exactly does. Does it state that if it's not a unique observation, then keep it? Then in the last two lines, the code checks whether or not the lagged observation is identical to the one considered?&lt;/STRONG&gt;;
    if ID ^= lagID then output;  *With the line below, this shifts the lagged value to the LagID column;
    lagID=ID;
RUN;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 07 Jun 2017 08:14:42 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/364852#M86583</guid>
      <dc:creator>Yegen</dc:creator>
      <dc:date>2017-06-07T08:14:42Z</dc:date>
    </item>
    <item>
      <title>Re: Row by row comparison within group</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/364856#M86584</link>
      <description>&lt;P&gt;Relating to : &lt;STRONG&gt;&amp;nbsp;if first.id and last id;&lt;/STRONG&gt; &amp;nbsp; - &amp;nbsp;thus eliminating choosing from lines like:&lt;/P&gt;
&lt;PRE&gt;    16124130 8062 AB 1
    16124130 8062 AB 0&lt;/PRE&gt;
&lt;P&gt;where same id exists in same group and same industry in two rows.&lt;/P&gt;
&lt;P&gt;Without above IF selection, you will get the first row kept as ID differs from previous row ID (lagID).&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Comment that IF staement and check results.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 07 Jun 2017 08:26:03 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/364856#M86584</guid>
      <dc:creator>Shmuel</dc:creator>
      <dc:date>2017-06-07T08:26:03Z</dc:date>
    </item>
    <item>
      <title>Re: Row by row comparison within group</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/364858#M86585</link>
      <description>&lt;P&gt;I see. This makes A LOT of sense. So basically you are requiring&amp;nbsp;to remove the observations (which are within the same group and industry) whenever they have the same&amp;nbsp;ID (i.e., exactly as in the&amp;nbsp;exampe&amp;nbsp;you have given). Now I understand the code well! Thanks for all of your super&amp;nbsp;&lt;STRONG&gt;helpful&lt;/STRONG&gt; &lt;STRONG&gt;clarifications&lt;/STRONG&gt;&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/88384"&gt;@Shmuel&lt;/a&gt;.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 07 Jun 2017 08:31:40 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/364858#M86585</guid>
      <dc:creator>Yegen</dc:creator>
      <dc:date>2017-06-07T08:31:40Z</dc:date>
    </item>
    <item>
      <title>Re: Row by row comparison within group</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/364860#M86586</link>
      <description>&lt;P&gt;Thank you &lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/144925"&gt;@Yegen&lt;/a&gt; for your response. I understood where i am going wrong. Please try this code&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
create table want as select id,group from sample where industry in 
(select Industry from (select count(Industry) as c, industry,id from sample group by id,industry having count(Industry)&amp;lt;=1) group by Industry having sum(c) &amp;gt;1) 
order by group;
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 07 Jun 2017 08:55:57 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/364860#M86586</guid>
      <dc:creator>Jagadishkatam</dc:creator>
      <dc:date>2017-06-07T08:55:57Z</dc:date>
    </item>
    <item>
      <title>Re: Row by row comparison within group</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/364892#M86600</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/12151"&gt;@Jagadishkatam&lt;/a&gt;, thanks.&amp;nbsp;I have just tried your code with the following data but&amp;nbsp;the output still does not look correct. In particular, observations with the ID&amp;nbsp;22222222 and&amp;nbsp;22222223 are&amp;nbsp;selected, whereas they&amp;nbsp;should have not been selected.&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;DATA work.sample;
    format group industry id control;
    INPUT ID Industry Group $ Control;
    DATALINES; 
    11111111 2542 AB 1
    11111112 2542 AB 1
    22222222 8062 AB 1
    22222223 8062 AC 0
;
RUN;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 07 Jun 2017 10:36:04 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/364892#M86600</guid>
      <dc:creator>Yegen</dc:creator>
      <dc:date>2017-06-07T10:36:04Z</dc:date>
    </item>
    <item>
      <title>Re: Row by row comparison within group</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/364960#M86629</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;DATA work.sample;
	INPUT ID Industry Group $ Control;
	DATALINES; 
	55203610 2542 AB 1
	16124130 8062 AB 1
	16124130 8062 AB 0
	80479512 2062 AB 1
	70321918 2062 AB 1
	17312410 6712 AB 1
	17311710 6712 AB 0
	74623123 2082 AC 1
	24245321 2082 AC 0
;
RUN;
proc sql;
select *
 from sample
  group by group,industry
   having count(distinct id) ne 1;
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 07 Jun 2017 13:07:11 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/364960#M86629</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2017-06-07T13:07:11Z</dc:date>
    </item>
    <item>
      <title>Re: Row by row comparison within group</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/365173#M86702</link>
      <description>&lt;P&gt;That is such a novel&amp;nbsp;SQL code that identifies the correct observations! The distinct command is indeed very handy in this case. Thanks,&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/18408"&gt;@Ksharp&lt;/a&gt;.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 07 Jun 2017 20:30:36 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Row-by-row-comparison-within-group/m-p/365173#M86702</guid>
      <dc:creator>Yegen</dc:creator>
      <dc:date>2017-06-07T20:30:36Z</dc:date>
    </item>
  </channel>
</rss>

