<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic SAS assign variables to a category based on approximate matches of strings in New SAS User</title>
    <link>https://communities.sas.com/t5/New-SAS-User/SAS-assign-variables-to-a-category-based-on-approximate-matches/m-p/820185#M34806</link>
    <description>&lt;PRE&gt;&amp;nbsp;&lt;/PRE&gt;&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;&amp;nbsp;I have this dataset&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=""&gt;data have;
input ID  Case_Dx
1	S72080	
2	812	
3	S72100	
4	813.2	
5	820.2	
6	808.4	
7	805.6
8	S5251	
9	S220	
10	S320
11	806
12	S5262	
;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I want to add a column to the dataset that groups the 'Case_dx' column into group A, B,C.&lt;/P&gt;&lt;P&gt;The groups are defined as follows&lt;BR /&gt;GroupA= Anything that starts with 'S720', 'S721' or 'S722' (up to 8 characters)&lt;BR /&gt;GroupB= Anything that starts with 'S525' or 'S526'&amp;nbsp;(up to 8 characters)&lt;BR /&gt;GroupC= Anything that starts with '805', '806', 'S220', 'S320' or 'S221'&amp;nbsp;(up to 8 characters)&lt;/P&gt;&lt;P&gt;I usually use this&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=""&gt;proc format;
    value $ Casetype
        'S720', 'S721', 'S722' =  'A'
		'S525', 'S526'  =  'B'
        '805', '806', 'S220', 'S320', 'S221'  =  'C;
	run;

data have;
set want;
Type= put(Case_DX, Casetype.);
RUN;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;But in this case it doesn't work because of the approximate matches.&lt;BR /&gt;How can I go about this?&lt;BR /&gt;Thanks&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Fri, 24 Jun 2022 05:30:54 GMT</pubDate>
    <dc:creator>Ad30</dc:creator>
    <dc:date>2022-06-24T05:30:54Z</dc:date>
    <item>
      <title>SAS assign variables to a category based on approximate matches of strings</title>
      <link>https://communities.sas.com/t5/New-SAS-User/SAS-assign-variables-to-a-category-based-on-approximate-matches/m-p/820185#M34806</link>
      <description>&lt;PRE&gt;&amp;nbsp;&lt;/PRE&gt;&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;&amp;nbsp;I have this dataset&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=""&gt;data have;
input ID  Case_Dx
1	S72080	
2	812	
3	S72100	
4	813.2	
5	820.2	
6	808.4	
7	805.6
8	S5251	
9	S220	
10	S320
11	806
12	S5262	
;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I want to add a column to the dataset that groups the 'Case_dx' column into group A, B,C.&lt;/P&gt;&lt;P&gt;The groups are defined as follows&lt;BR /&gt;GroupA= Anything that starts with 'S720', 'S721' or 'S722' (up to 8 characters)&lt;BR /&gt;GroupB= Anything that starts with 'S525' or 'S526'&amp;nbsp;(up to 8 characters)&lt;BR /&gt;GroupC= Anything that starts with '805', '806', 'S220', 'S320' or 'S221'&amp;nbsp;(up to 8 characters)&lt;/P&gt;&lt;P&gt;I usually use this&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=""&gt;proc format;
    value $ Casetype
        'S720', 'S721', 'S722' =  'A'
		'S525', 'S526'  =  'B'
        '805', '806', 'S220', 'S320', 'S221'  =  'C;
	run;

data have;
set want;
Type= put(Case_DX, Casetype.);
RUN;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;But in this case it doesn't work because of the approximate matches.&lt;BR /&gt;How can I go about this?&lt;BR /&gt;Thanks&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 24 Jun 2022 05:30:54 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/SAS-assign-variables-to-a-category-based-on-approximate-matches/m-p/820185#M34806</guid>
      <dc:creator>Ad30</dc:creator>
      <dc:date>2022-06-24T05:30:54Z</dc:date>
    </item>
    <item>
      <title>Re: SAS assign variables to a category based on approximate matches of strings</title>
      <link>https://communities.sas.com/t5/New-SAS-User/SAS-assign-variables-to-a-category-based-on-approximate-matches/m-p/820205#M34807</link>
      <description>&lt;P&gt;You can use SUBSTR function:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;BLOCKQUOTE&gt;
&lt;P&gt;data want;&lt;BR /&gt;set have;&lt;BR /&gt;if substr(Case_Dx,1,4) in ("S720","S721","S722") then Type="A";&lt;BR /&gt;else &lt;BR /&gt;if substr(Case_Dx,1,4) in ("S525","S526") then Type="B";&lt;BR /&gt;else&lt;BR /&gt;if substr(Case_Dx,1,3) in ("805","806") or substr(Case_Dx,1,4) in ("S220","S320","S221") then Type="C";&lt;BR /&gt;run;&lt;/P&gt;
&lt;/BLOCKQUOTE&gt;</description>
      <pubDate>Fri, 24 Jun 2022 09:05:52 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/SAS-assign-variables-to-a-category-based-on-approximate-matches/m-p/820205#M34807</guid>
      <dc:creator>V_Altomonte</dc:creator>
      <dc:date>2022-06-24T09:05:52Z</dc:date>
    </item>
    <item>
      <title>Re: SAS assign variables to a category based on approximate matches of strings</title>
      <link>https://communities.sas.com/t5/New-SAS-User/SAS-assign-variables-to-a-category-based-on-approximate-matches/m-p/820206#M34808</link>
      <description>&lt;P&gt;Something like below should work.&lt;/P&gt;
&lt;PRE&gt;  type= put(upcase(substr(case_dx,1,4)), $casetype.);
&lt;/PRE&gt;</description>
      <pubDate>Fri, 24 Jun 2022 09:16:30 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/SAS-assign-variables-to-a-category-based-on-approximate-matches/m-p/820206#M34808</guid>
      <dc:creator>Patrick</dc:creator>
      <dc:date>2022-06-24T09:16:30Z</dc:date>
    </item>
    <item>
      <title>Re: SAS assign variables to a category based on approximate matches of strings</title>
      <link>https://communities.sas.com/t5/New-SAS-User/SAS-assign-variables-to-a-category-based-on-approximate-matches/m-p/820207#M34809</link>
      <description>&lt;P&gt;Hello&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/427810"&gt;@Ad30&lt;/a&gt;,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;You can use&amp;nbsp;&lt;SPAN&gt;the&amp;nbsp;&lt;/SPAN&gt;&lt;A href="https://documentation.sas.com/?docsetId=lrcon&amp;amp;docsetTarget=p00iah2thp63bmn1lt20esag14lh.htm&amp;amp;docsetVersion=9.4&amp;amp;locale=en#p0xgxvo2we2mqrn1kteecpv1opx0" target="_blank" rel="noopener nofollow noreferrer"&gt;IN operator&lt;/A&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;with the colon modifier (see &lt;A href="https://documentation.sas.com/doc/en/lrcon/9.4/p00iah2thp63bmn1lt20esag14lh.htm#p1vu0ts68u42xun141p7vt4ne29k" target="_blank" rel="noopener"&gt;Character Comparisons&lt;/A&gt;)&lt;FONT face="helvetica"&gt;:&lt;/FONT&gt;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
input ID Case_Dx $;
cards;
1 S72080
2 812
3 S72100
4 813.2
5 820.2
6 808.4
7 805.6
8 S5251
9 S220
10 S320
11 806
12 S5262
;

data want;
set have;
if Case_Dx in: ('S720' 'S721' 'S722') then Type = 'A';
else if Case_Dx in: ('S525' 'S526') then Type = 'B';
else if Case_Dx in: ('805' '806' 'S220' 'S320' 'S221') then Type = 'C';
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;If variable Case_Dx has a defined length of 8 characters (as is the case in the code above), the condition "&lt;SPAN&gt;up to 8 characters" is automatically satisfied. Otherwise insert&lt;/SPAN&gt;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;if length(Case_Dx)&amp;lt;=8 then&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&lt;SPAN&gt;before the first IF statement in order to exclude strings like "S72012345" (9 characters) from the categorization.&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 24 Jun 2022 09:22:53 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/SAS-assign-variables-to-a-category-based-on-approximate-matches/m-p/820207#M34809</guid>
      <dc:creator>FreelanceReinh</dc:creator>
      <dc:date>2022-06-24T09:22:53Z</dc:date>
    </item>
    <item>
      <title>Re: SAS assign variables to a category based on approximate matches of strings</title>
      <link>https://communities.sas.com/t5/New-SAS-User/SAS-assign-variables-to-a-category-based-on-approximate-matches/m-p/820215#M34810</link>
      <description>&lt;PRE&gt;data have;
infile cards expandtabs;
input ID  Case_Dx :$40.;
cards;
1 S72080 
2 812 
3 S72100 
4 813.2 
5 820.2 
6 808.4 
7 805.6
8 S5251 
9 S220 
10 S320
11 806
12 S5262 
;

data want;
 set have;
 if  prxmatch('/^(S720|S721|S722)/',strip(Case_Dx)) then group='A';
 if  prxmatch('/^(S525|S526)/',strip(Case_Dx)) then group='B';
 if  prxmatch('/^(805|806|S320|S221)/',strip(Case_Dx)) then group='C';
run;&lt;/PRE&gt;</description>
      <pubDate>Fri, 24 Jun 2022 11:19:23 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/SAS-assign-variables-to-a-category-based-on-approximate-matches/m-p/820215#M34810</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2022-06-24T11:19:23Z</dc:date>
    </item>
  </channel>
</rss>

