<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic matching on the highest number digits, stopping if it finds a match in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/matching-on-the-highest-number-digits-stopping-if-it-finds-a/m-p/852918#M337132</link>
    <description>&lt;P&gt;I have a data set with job titles and job codes that are 1 to 8 digits long. I have another data set with a study population that have job codes from 10 to 12 digits long. The first 1 to 8 digits will match the job codes from the first data set. My plan is to write code to match the first 8 digits. If it matches, done. If not, match the first 7 digits. If it matches, done. If not, match the first 6 digits. And keep going until it's done. My question is how to write this in SAS. Would the "or" operator achieve this way of matching on the highest number digits, stopping if it finds a match, and looking for the next number of digits if not? Should the code look like this?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=""&gt;proc sql;
	create table job_titles as
	select A.*, B.job_title
	from study_population as A
	left join occupation_data as B
    	on A.industry = B.industry
		and A.seniority = A.seniority 
		and (upcase(A.job_code) = upcase(substr(B.Occupation_Code,1,8))
			or upcase(A.job_code) = upcase(substr(B.Occupation_Code,1,7))
			or upcase(A.job_code) = upcase(substr(B.Occupation_Code,1,6))
			or upcase(A.job_code) = upcase(substr(B.Occupation_Code,1,5))
			or upcase(A.job_code) = upcase(substr(B.Occupation_Code,1,4))
			or upcase(A.job_code) = upcase(substr(B.Occupation_Code,1,3))
			or upcase(A.job_code) = upcase(substr(B.Occupation_Code,1,2))
			or upcase(A.job_code) = upcase(substr(B.Occupation_Code,1,1)) );
quit;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;You'll notice that I am matching by industry and seniority first because the code will depend on those 2 things as well.&lt;/P&gt;</description>
    <pubDate>Mon, 09 Jan 2023 20:15:03 GMT</pubDate>
    <dc:creator>daufoi</dc:creator>
    <dc:date>2023-01-09T20:15:03Z</dc:date>
    <item>
      <title>matching on the highest number digits, stopping if it finds a match</title>
      <link>https://communities.sas.com/t5/SAS-Programming/matching-on-the-highest-number-digits-stopping-if-it-finds-a/m-p/852918#M337132</link>
      <description>&lt;P&gt;I have a data set with job titles and job codes that are 1 to 8 digits long. I have another data set with a study population that have job codes from 10 to 12 digits long. The first 1 to 8 digits will match the job codes from the first data set. My plan is to write code to match the first 8 digits. If it matches, done. If not, match the first 7 digits. If it matches, done. If not, match the first 6 digits. And keep going until it's done. My question is how to write this in SAS. Would the "or" operator achieve this way of matching on the highest number digits, stopping if it finds a match, and looking for the next number of digits if not? Should the code look like this?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=""&gt;proc sql;
	create table job_titles as
	select A.*, B.job_title
	from study_population as A
	left join occupation_data as B
    	on A.industry = B.industry
		and A.seniority = A.seniority 
		and (upcase(A.job_code) = upcase(substr(B.Occupation_Code,1,8))
			or upcase(A.job_code) = upcase(substr(B.Occupation_Code,1,7))
			or upcase(A.job_code) = upcase(substr(B.Occupation_Code,1,6))
			or upcase(A.job_code) = upcase(substr(B.Occupation_Code,1,5))
			or upcase(A.job_code) = upcase(substr(B.Occupation_Code,1,4))
			or upcase(A.job_code) = upcase(substr(B.Occupation_Code,1,3))
			or upcase(A.job_code) = upcase(substr(B.Occupation_Code,1,2))
			or upcase(A.job_code) = upcase(substr(B.Occupation_Code,1,1)) );
quit;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;You'll notice that I am matching by industry and seniority first because the code will depend on those 2 things as well.&lt;/P&gt;</description>
      <pubDate>Mon, 09 Jan 2023 20:15:03 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/matching-on-the-highest-number-digits-stopping-if-it-finds-a/m-p/852918#M337132</guid>
      <dc:creator>daufoi</dc:creator>
      <dc:date>2023-01-09T20:15:03Z</dc:date>
    </item>
    <item>
      <title>Re: matching on the highest number digits, stopping if it finds a match</title>
      <link>https://communities.sas.com/t5/SAS-Programming/matching-on-the-highest-number-digits-stopping-if-it-finds-a/m-p/852923#M337134</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;	and (upcase(A.job_code) =: upcase(substr(B.Occupation_Code,1,8))
			&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;I think you can do just that in SAS (colon addition) but not 100% sure it works in SQL. Definitely test it out.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 09 Jan 2023 20:33:20 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/matching-on-the-highest-number-digits-stopping-if-it-finds-a/m-p/852923#M337134</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2023-01-09T20:33:20Z</dc:date>
    </item>
    <item>
      <title>Re: matching on the highest number digits, stopping if it finds a match</title>
      <link>https://communities.sas.com/t5/SAS-Programming/matching-on-the-highest-number-digits-stopping-if-it-finds-a/m-p/852928#M337138</link>
      <description>&lt;P&gt;&lt;FONT face="courier new,courier"&gt;=:&lt;/FONT&gt; does not work in SQL, but &lt;FONT face="courier new,courier"&gt;eqt&lt;/FONT&gt; does work in SQL to see if the start of strings match.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;To &lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/291526"&gt;@daufoi&lt;/a&gt; why do you need the upcase() function if the strings are digits?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;... and a.job_code eqt b.occupation_code&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 09 Jan 2023 20:48:11 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/matching-on-the-highest-number-digits-stopping-if-it-finds-a/m-p/852928#M337138</guid>
      <dc:creator>PaigeMiller</dc:creator>
      <dc:date>2023-01-09T20:48:11Z</dc:date>
    </item>
    <item>
      <title>Re: matching on the highest number digits, stopping if it finds a match</title>
      <link>https://communities.sas.com/t5/SAS-Programming/matching-on-the-highest-number-digits-stopping-if-it-finds-a/m-p/853073#M337202</link>
      <description>&lt;P&gt;So you want to find the best match. In that case, you will have to calculate the quality of the match in an inner query (code not tested):&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
  create table job_titles as
  select *
    from(
    select A.*,b.Jobtitle,
      case 
        when upcase(substr(A.jobcode,1,8))=upcase(substr(B.Occupation_Code,1,8)) then 8
        when upcase(substr(A.jobcode,1,7))=upcase(substr(B.Occupation_Code,1,7)) then 7        
        when upcase(substr(A.jobcode,1,6))=upcase(substr(B.Occupation_Code,1,6)) then 6
        when upcase(substr(A.jobcode,1,5))=upcase(substr(B.Occupation_Code,1,5)) then 5
        when upcase(substr(A.jobcode,1,4))=upcase(substr(B.Occupation_Code,1,4)) then 4
        when upcase(substr(A.jobcode,1,3))=upcase(substr(B.Occupation_Code,1,3)) then 3
        when upcase(substr(A.jobcode,1,2))=upcase(substr(B.Occupation_Code,1,2)) then 2
        when upcase(substr(A.jobcode,1,1))=upcase(substr(B.Occupation_Code,1,1)) then 1
        else 0 
        end as match        
	from study_population as A
	left join occupation_data as B
    	on A.industry = B.industry
	    and A.seniority = A.seniority 
     )
  where match&amp;gt;0
  group by industry,seniority
  having match=max(match)
  ;
quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;This will also put the MATCH column on the output, if you do not want that you will have to name the columns explicitly in the outer query, or use a DROP= option on the output table.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 10 Jan 2023 14:49:52 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/matching-on-the-highest-number-digits-stopping-if-it-finds-a/m-p/853073#M337202</guid>
      <dc:creator>s_lassen</dc:creator>
      <dc:date>2023-01-10T14:49:52Z</dc:date>
    </item>
  </channel>
</rss>

