<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic String comparision-Jaccard distance in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/String-comparision-Jaccard-distance/m-p/233127#M42553</link>
    <description>&lt;PRE&gt;I have Jaccard score in comparing two strings to check the similarity/Dissimlarity using R. &lt;BR /&gt;I tried to replicate the same in SAS but couldn't achieve it. &lt;BR /&gt;Can you please let me know if there is function/way to get jaccard score in SAS for  &lt;BR /&gt;comparing two strings "Krishna" and "Krishna Reddy"&lt;BR /&gt;&lt;BR /&gt;I tried to replicate in SAS with proc distance but no luck.&lt;BR /&gt;&lt;BR /&gt;in R&lt;BR /&gt;library(stringdist)&lt;BR /&gt;stringdist('krishna', 'krishna reddy', method='jaccard')&lt;BR /&gt;&lt;BR /&gt;result is 0.3636&lt;BR /&gt;&lt;BR /&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Wed, 04 Nov 2015 18:04:00 GMT</pubDate>
    <dc:creator>Krishnam</dc:creator>
    <dc:date>2015-11-04T18:04:00Z</dc:date>
    <item>
      <title>String comparision-Jaccard distance</title>
      <link>https://communities.sas.com/t5/SAS-Programming/String-comparision-Jaccard-distance/m-p/233127#M42553</link>
      <description>&lt;PRE&gt;I have Jaccard score in comparing two strings to check the similarity/Dissimlarity using R. &lt;BR /&gt;I tried to replicate the same in SAS but couldn't achieve it. &lt;BR /&gt;Can you please let me know if there is function/way to get jaccard score in SAS for  &lt;BR /&gt;comparing two strings "Krishna" and "Krishna Reddy"&lt;BR /&gt;&lt;BR /&gt;I tried to replicate in SAS with proc distance but no luck.&lt;BR /&gt;&lt;BR /&gt;in R&lt;BR /&gt;library(stringdist)&lt;BR /&gt;stringdist('krishna', 'krishna reddy', method='jaccard')&lt;BR /&gt;&lt;BR /&gt;result is 0.3636&lt;BR /&gt;&lt;BR /&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 04 Nov 2015 18:04:00 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/String-comparision-Jaccard-distance/m-p/233127#M42553</guid>
      <dc:creator>Krishnam</dc:creator>
      <dc:date>2015-11-04T18:04:00Z</dc:date>
    </item>
    <item>
      <title>Re: String comparision-Jaccard distance</title>
      <link>https://communities.sas.com/t5/SAS-Programming/String-comparision-Jaccard-distance/m-p/233129#M42554</link>
      <description>&lt;P&gt;I don't find a quick way to get a Jaccard score but SAS has two functions related to edit distance COMPGED and COMPLEV that may work for your purpose.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data _null_;
   length x y $ 50;
   x = 'krishna';
   y = 'krishna reddy';
   compg = compged(x,y); 
   compl = complev(x,y);
   put compg= compl=;
run;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;The additional function Call Compcost can be used to assign different weights to operations used in COMPGED.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 04 Nov 2015 18:16:12 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/String-comparision-Jaccard-distance/m-p/233129#M42554</guid>
      <dc:creator>ballardw</dc:creator>
      <dc:date>2015-11-04T18:16:12Z</dc:date>
    </item>
    <item>
      <title>Re: String comparision-Jaccard distance</title>
      <link>https://communities.sas.com/t5/SAS-Programming/String-comparision-Jaccard-distance/m-p/233130#M42555</link>
      <description>Thanks! I am aware of these levenshtein distance functions.&lt;BR /&gt;&lt;BR /&gt;I am specifically looking for Jaccard to achieve the mentioned example through SAS.</description>
      <pubDate>Wed, 04 Nov 2015 18:23:48 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/String-comparision-Jaccard-distance/m-p/233130#M42555</guid>
      <dc:creator>Krishnam</dc:creator>
      <dc:date>2015-11-04T18:23:48Z</dc:date>
    </item>
    <item>
      <title>Re: String comparision-Jaccard distance</title>
      <link>https://communities.sas.com/t5/SAS-Programming/String-comparision-Jaccard-distance/m-p/233237#M42583</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;%macro kshingling
(string
,k=5
,out=&amp;amp;sysmacroname.
)
;

data &amp;amp;out.;
   string = strip(prxchange('s#\s# #',-1,symget('string')));
   do _n_ = 1 to lengthn(string)-&amp;amp;k.+1;
      ngram = substr(string,_n_,&amp;amp;k.);
	  output;
   end;
run;

%mend;

%macro jaccard
(string1
,string2
)
;

%kshingling(&amp;amp;string1.,k=2,out=s1)
%kshingling(&amp;amp;string2.,k=2,out=s2)

proc append base=s1 data=s2; run;

proc freq data=s1 noprint;
   tables string*ngram / out=s2;
run;

proc transpose data=s2 out=s1(drop=_name_ _label_); 
by string notsorted;
var count;
id ngram;
run;

proc stdize data=s1 out=s2 missing=0 reponly;
var _numeric_;
run;

proc distance data=s2 method=jaccard absent=0 out=s1; 
var anominal(_numeric_);
id string;
run;

proc sql;
select &amp;amp;string1. as jaccard
  into :jaccard
  from s1
 where string="&amp;amp;string2.";
quit;
%mend;

%jaccard(krishna,krishna reddy);run;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;This is put together quickly. &amp;nbsp;It does not match the results from the R package for your example, but it does match most other Jaccard Simmillarity Metrics I have used. &amp;nbsp;You can adjust the value of k to get different values. &amp;nbsp;I beleive setting to k=5 will give you approx the result in R (0.333....)&lt;/P&gt;</description>
      <pubDate>Thu, 05 Nov 2015 14:30:06 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/String-comparision-Jaccard-distance/m-p/233237#M42583</guid>
      <dc:creator>FriedEgg</dc:creator>
      <dc:date>2015-11-05T14:30:06Z</dc:date>
    </item>
    <item>
      <title>Re: String comparision-Jaccard distance</title>
      <link>https://communities.sas.com/t5/SAS-Programming/String-comparision-Jaccard-distance/m-p/233778#M42710</link>
      <description>&lt;P&gt;Thank you!&lt;/P&gt;</description>
      <pubDate>Mon, 09 Nov 2015 05:57:58 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/String-comparision-Jaccard-distance/m-p/233778#M42710</guid>
      <dc:creator>Krishnam</dc:creator>
      <dc:date>2015-11-09T05:57:58Z</dc:date>
    </item>
  </channel>
</rss>

