<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Comparing a row in a dataset with the next row in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Comparing-a-row-in-a-dataset-with-the-next-row/m-p/625210#M184256</link>
    <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/107435"&gt;@harrylui&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Something like this:&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data testing;
infile datalines dlm="\";
input Name :$80. Category :$40.;
datalines;
A Cote\restaurant
A Cote\restaurant
7-ELEVEN\service
7ELEVEN\service
A BIG BIG\restaurant
A BIG\restaurant
A CUT ABOVEPARKSON PVL\restaurant
A CUT ABOVANGSAR VIL\restaurant
A DRIVING SCHOOL\service
A DRIVING\service
A LI SHAN XI DING JI\shop
A LI SHAN\shop
A PLUS BEAUTY MEDICAL\medical
A PLUS BEAUTY HOUSE\medical
;
run;

data testing2(drop=_:);
  set testing;
  _Name = lag(Name);
  _Category = lag(Category);
  dif=compged(Name, _Name);
  dif2=compged(Category, _Category);
  if dif&amp;lt;=70 and dif2 &amp;lt;=70 then 
    do;
      Name = _Name;
      Category = _Category;
      match = "ok";
    end;
run;
proc print;
run;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;All the best&lt;/P&gt;&lt;P&gt;Bart&lt;/P&gt;</description>
    <pubDate>Mon, 17 Feb 2020 09:16:21 GMT</pubDate>
    <dc:creator>yabwon</dc:creator>
    <dc:date>2020-02-17T09:16:21Z</dc:date>
    <item>
      <title>Comparing a row in a dataset with the next row</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Comparing-a-row-in-a-dataset-with-the-next-row/m-p/625201#M184253</link>
      <description>&lt;P&gt;Good day,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;I have a data set like the following :&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;data testing;&lt;BR /&gt;infile datalines dlm="09"x;&lt;BR /&gt;input Name :$80. Category :$40.;&lt;BR /&gt;datalines;&lt;BR /&gt;A Cote restaurant&lt;BR /&gt;A Cote restaurant&lt;BR /&gt;7-ELEVEN service &lt;BR /&gt;7ELEVEN service&lt;BR /&gt;A BIG BIG restaurant&lt;BR /&gt;A BIG restaurant&lt;BR /&gt;A CUT ABOVEPARKSON PVL restaurant&lt;BR /&gt;A CUT ABOVANGSAR VIL restaurant&lt;BR /&gt;A DRIVING SCHOOL service&lt;BR /&gt;A DRIVING service&lt;BR /&gt;A LI SHAN XI DING JI shop&lt;BR /&gt;A LI SHAN shop&lt;BR /&gt;A PLUS BEAUTY MEDICAL medical&lt;BR /&gt;A PLUS BEAUTY HOUSE medical&lt;BR /&gt;;&lt;BR /&gt;run;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;i am writing a program to group the similar merchant into one name&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;here is my program but not able to solve it&lt;/P&gt;
&lt;P&gt;&lt;BR /&gt;data testing2;&lt;BR /&gt;set testing;&lt;BR /&gt;dif=compged(Name,lag(name));&lt;BR /&gt;dif2=compged(Category,lag(Category));&lt;BR /&gt;if dif&amp;lt;=70 and dif2 &amp;lt;=70 then match="ok";&lt;BR /&gt;run;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;what I trying to do is&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;if N+1 name and&amp;nbsp;Category are both similar to N&amp;nbsp;&lt;/P&gt;
&lt;P&gt;then assign the same name to N+1 as N&lt;/P&gt;
&lt;P&gt;and keep comparing the data as follow logic:&amp;nbsp;&lt;/P&gt;
&lt;P&gt;row_n to row_n+1&lt;/P&gt;
&lt;P&gt;row_n+1 to row_n+2&lt;/P&gt;
&lt;P&gt;row_n+2 to row_n+3&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;can some help or give me some insight?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;what i expecting is&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;A Cote&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; restaurant&lt;BR /&gt;A Cote&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; restaurant&lt;BR /&gt;7-ELEVEN&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; service &lt;BR /&gt;7-ELEVEN&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; service&lt;BR /&gt;A BIG BIG&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;restaurant&lt;BR /&gt;A BIG BIG&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;restaurant&lt;BR /&gt;A CUT ABOVEPARKSON PVL&amp;nbsp; &amp;nbsp; restaurant&lt;BR /&gt;A CUT ABOVANGSAR VIL&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;restaurant&lt;BR /&gt;A DRIVING SCHOOL&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; service&lt;BR /&gt;A DRIVING SCHOOL&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; service&lt;BR /&gt;A LI SHAN XI DING JI&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;shop&lt;BR /&gt;A LI SHAN&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;shop&lt;BR /&gt;A PLUS BEAUTY MEDICAL&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;medical&lt;BR /&gt;A PLUS BEAUTY HOUSE&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; medical&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;thanks in advance&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Harry&lt;/P&gt;</description>
      <pubDate>Mon, 17 Feb 2020 08:15:27 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Comparing-a-row-in-a-dataset-with-the-next-row/m-p/625201#M184253</guid>
      <dc:creator>harrylui</dc:creator>
      <dc:date>2020-02-17T08:15:27Z</dc:date>
    </item>
    <item>
      <title>Re: Comparing a row in a dataset with the next row</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Comparing-a-row-in-a-dataset-with-the-next-row/m-p/625208#M184255</link>
      <description>&lt;P&gt;HI&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/107435"&gt;@harrylui&lt;/a&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I suggest that you proceed in two main steps:&lt;/P&gt;
&lt;P&gt;1/ group similar categories and rename them accordingly (NB: in your entry dataset, I have introduced some misspelling in the categories to test the code)&lt;/P&gt;
&lt;P&gt;2/&amp;nbsp;group similar names insides these 'clean' categories and rename them accordingly&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I have indicated the assumption I made to determine what is the 'true' value to impute. You can change them for example by sorting data by length of string, etc.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Hope this helps.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;My best,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data testing;
	infile datalines dlm="09"x;
	input Name :$80. Category :$40.;
	datalines;
A Cote	restaurant
A Cote	restaurant
7-ELEVEN	service 
7ELEVEN	service
A BIG BIG	restaurant
A BIG	restaurant
A CUT ABOVEPARKSON PVL	restaurat
A CUT ABOVANGSAR VIL	restaurant
A DRIVING SCHOOL	service
A DRIVING	services
A LI SHAN XI DING JI	shop
A LI SHAN	sho
A PLUS BEAUTY MEDICAL	medical
A PLUS BEAUTY HOUSE	medilal
;
run;

proc sort data=testing out=testing1;
	by category; /* Assumption 1: the 'true' category is the first in alphabetical order */
run;

/* Step1 : group similar categories */
data testing2;
	set testing1;
	length New_category $ 40.;
	retain New_category;
	_lag_c = lag(Category);
	dif = compged(Category,_lag_c);
	if dif &amp;gt; 100 then New_category = Category;
	drop _lag_c dif;
run;

/* Step2: Sort data by these 'clean' categories */
proc sort data=testing2 out=testing3;
	by category descending Name; /* Assumption 2: the 'true' name is the first in descending alphabetical order */
run;

/* Step3 : group similar names */
data testing4;
	set testing3;
	length New_Name $ 80.;
	by New_category;
	retain New_Name;
	_lag_n = lag(Name);
	dif = compged(Name,_lag_n);
	if first.New_category or dif &amp;gt; 100 then New_Name = Name;
	drop _lag_n dif;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 17 Feb 2020 09:01:59 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Comparing-a-row-in-a-dataset-with-the-next-row/m-p/625208#M184255</guid>
      <dc:creator>ed_sas_member</dc:creator>
      <dc:date>2020-02-17T09:01:59Z</dc:date>
    </item>
    <item>
      <title>Re: Comparing a row in a dataset with the next row</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Comparing-a-row-in-a-dataset-with-the-next-row/m-p/625210#M184256</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/107435"&gt;@harrylui&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Something like this:&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data testing;
infile datalines dlm="\";
input Name :$80. Category :$40.;
datalines;
A Cote\restaurant
A Cote\restaurant
7-ELEVEN\service
7ELEVEN\service
A BIG BIG\restaurant
A BIG\restaurant
A CUT ABOVEPARKSON PVL\restaurant
A CUT ABOVANGSAR VIL\restaurant
A DRIVING SCHOOL\service
A DRIVING\service
A LI SHAN XI DING JI\shop
A LI SHAN\shop
A PLUS BEAUTY MEDICAL\medical
A PLUS BEAUTY HOUSE\medical
;
run;

data testing2(drop=_:);
  set testing;
  _Name = lag(Name);
  _Category = lag(Category);
  dif=compged(Name, _Name);
  dif2=compged(Category, _Category);
  if dif&amp;lt;=70 and dif2 &amp;lt;=70 then 
    do;
      Name = _Name;
      Category = _Category;
      match = "ok";
    end;
run;
proc print;
run;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;All the best&lt;/P&gt;&lt;P&gt;Bart&lt;/P&gt;</description>
      <pubDate>Mon, 17 Feb 2020 09:16:21 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Comparing-a-row-in-a-dataset-with-the-next-row/m-p/625210#M184256</guid>
      <dc:creator>yabwon</dc:creator>
      <dc:date>2020-02-17T09:16:21Z</dc:date>
    </item>
    <item>
      <title>Re: Comparing a row in a dataset with the next row</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Comparing-a-row-in-a-dataset-with-the-next-row/m-p/625509#M184373</link>
      <description>&lt;P&gt;hi all,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;tried both of your program, still encounter an issue&lt;BR /&gt;&lt;BR /&gt;if there are three or more observations pass the condition test,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;nothing will be changed after the second observation.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;i believe the program&amp;nbsp; set the lag(name) at the first initialization and when the program working, the lag(name) never change and cause to this problem. i build a stupid program but how can i enhance it to make it recycle?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;hopefully, i can get all the observations =A DRIVING SCHOOL if i pass the dif test.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;BR /&gt;data testing;&lt;BR /&gt;infile datalines dlm="\";&lt;BR /&gt;input Name :$80. Category :$40.;&lt;BR /&gt;datalines;&lt;BR /&gt;A DRIVING SCHOOL\service&lt;BR /&gt;A DRIVING\service&lt;BR /&gt;A DRIVINGA\service&lt;BR /&gt;A DRIVINGb\service&lt;BR /&gt;A DRIVINGc\service&lt;/P&gt;
&lt;P&gt;;&lt;BR /&gt;run;&lt;BR /&gt;data testing2;&lt;BR /&gt;set testing;&lt;BR /&gt;_Name = lag(Name);&lt;BR /&gt;_Category = lag(Category);&lt;BR /&gt;dif=compged(Name, _Name);&lt;BR /&gt;dif2=compged(Category, _Category);&lt;BR /&gt;if dif&amp;lt;=100 and dif2 &amp;lt;=100 then &lt;BR /&gt;do;&lt;BR /&gt;Name = _Name;&lt;BR /&gt;Category = _Category;&lt;BR /&gt;match = "ok";&lt;/P&gt;
&lt;P&gt;/**/&lt;BR /&gt;/* _Name = lag(Name);*/&lt;BR /&gt;/* _Category = lag(Category);*/&lt;BR /&gt;end;&lt;BR /&gt;run;&lt;BR /&gt;proc print;&lt;BR /&gt;run;&lt;/P&gt;
&lt;P&gt;data testing3;&lt;BR /&gt;set testing2;&lt;BR /&gt;drop &lt;BR /&gt;_Name&lt;BR /&gt;_Category;&lt;BR /&gt;run;&lt;/P&gt;
&lt;P&gt;data testing4;&lt;BR /&gt;set testing3;&lt;BR /&gt;_Name = lag(Name);&lt;BR /&gt;_Category = lag(Category);&lt;BR /&gt;run;&lt;/P&gt;
&lt;P&gt;&lt;BR /&gt;data testing5;&lt;BR /&gt;set testing4;&lt;BR /&gt;_Name = lag(Name);&lt;BR /&gt;_Category = lag(Category);&lt;BR /&gt;dif=compged(Name, _Name);&lt;BR /&gt;dif2=compged(Category, _Category);&lt;BR /&gt;if dif&amp;lt;=100 and dif2 &amp;lt;=100 then &lt;BR /&gt;do;&lt;BR /&gt;Name = _Name;&lt;BR /&gt;Category = _Category;&lt;BR /&gt;match = "ok";&lt;/P&gt;
&lt;P&gt;/**/&lt;BR /&gt;/* _Name = lag(Name);*/&lt;BR /&gt;/* _Category = lag(Category);*/&lt;BR /&gt;end;&lt;BR /&gt;run;&lt;/P&gt;</description>
      <pubDate>Tue, 18 Feb 2020 04:11:11 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Comparing-a-row-in-a-dataset-with-the-next-row/m-p/625509#M184373</guid>
      <dc:creator>harrylui</dc:creator>
      <dc:date>2020-02-18T04:11:11Z</dc:date>
    </item>
    <item>
      <title>Re: Comparing a row in a dataset with the next row</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Comparing-a-row-in-a-dataset-with-the-next-row/m-p/625792#M184501</link>
      <description>thank you</description>
      <pubDate>Wed, 19 Feb 2020 07:33:22 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Comparing-a-row-in-a-dataset-with-the-next-row/m-p/625792#M184501</guid>
      <dc:creator>harrylui</dc:creator>
      <dc:date>2020-02-19T07:33:22Z</dc:date>
    </item>
  </channel>
</rss>

