<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Split mixture of strings separated by multiple different delimiters in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513313#M138299</link>
    <description>&lt;P&gt;I will use code window more consistently in the future, however, it sometimes flattens out my code to one line.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Now I understand the logic. But&amp;nbsp;the code example you shared produced quite bizarre result.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Any further hints as to how to correct this further?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="have 1 vs have 2" style="width: 600px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/24935iC95BFA4952A10F8B/image-size/large?v=v2&amp;amp;px=999" role="button" title="bizarre result.png" alt="have 1 vs have 2" /&gt;&lt;span class="lia-inline-image-caption" onclick="event.preventDefault();"&gt;have 1 vs have 2&lt;/span&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Thu, 15 Nov 2018 14:10:13 GMT</pubDate>
    <dc:creator>Cruise</dc:creator>
    <dc:date>2018-11-15T14:10:13Z</dc:date>
    <item>
      <title>Split mixture of strings separated by multiple different delimiters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513303#M138295</link>
      <description>&lt;P&gt;Hi All,&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Help is appreciated in creating distinct list of chemicals from 5 columns of strings where texts are separated by multiple different types of delimiters:&amp;nbsp;hyphen, comma, space, period, brackets, pound sign. Please see my unsuccessful attempt. I found other examples but they were concerned with only one type of delimiter.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;
data chemicals(drop=waste:);
  length chemicals $3000;
  set have(keep=waste:);
  chemicals = catx(':',waste1,waste2,waste3,waste4,waste5);
 run;

data temp; set chemicals;
 do i=1 to countw(chemicals,' ');
 v1=scan(chemicals,i,' ','m');output;
 end;
 drop i;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Please see data I HAVE AND WANT below.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;DATA HAVE;
LENGTH waste1 $600 waste2 $600 waste3 $600 waste4 $600 waste5 $600;
INFILE DATALINES DLM='#'; 
INPUT id waste1 $ waste2 $ waste3 $ waste4 $ waste5 $;
DATALINES;
1  #  Filter Cake-Celite, Carbon, Plasticizer    # (60% by wt) Toluene, Sulfonic                     # acid catalyst							  # Plasticizers, consisting of 26 TM			  # Trimillitate and DOA Adipate Plasticizers                             
2  #  Chromium sludge                            # Paint sludge                                      # Metals                                     # Chlorinated solvents                          #   
3  #  Trichlorethylene (F001)                    # Tetrachloroethene {(PCE or "perc.") F002}         # 1,1,1-Trichloroethane                      # Cyanide (F007)                                # Arsenic  (D003)
4  #  Chromium sludge                            # Paint sludge                                      # Metals                                     # Chlorinated solvents                          # 
5  #  Plant #1 Ester Sump: Residual organics.    # 2 Ethylhexanol, alcohols, diethylene glycol,      # ethylene glycol, plasticizer, adipic acid, # polyester                                     # Plant #2 polmyer solids
6  #  Cadmium, chromium, dichlorobenzene,        # Tetrachloroethylene. dieldrin.                    # Plating and painting wastes                #                                               # 
7  #  Toluene                                    # Xylene                                            # Ethyl Benzene                              #                                               # 
8  #  1,1,1-Trichloroethane {TCA (F001)}         #                                                   #                                            #                                               # 
9  #  Plating sludges                            # Starchy clay sludge                               # (D006, D007, D008 wastes)                  # Trichloroethylene, PVC sludge                 # Tetrachloroethylene, Vinyl chloride
10 #  Chlorinated Solvents (FOO1) (FOO1)         #                                                   #                                            #                                               # 
11 #  Toluene                                    # 1-2, Dichloroethylene                             # 1,1,1-Trichloroethane (FOO1) (FOO2)        # Trichloroethylene (TCE)                       # 
12 #  Fuel oil, gasoline, solvents               # Chlorinated hydrocarbons (FOO1 &amp;amp; FOO2)            #                                            #                                               # 
13 #  Chlorinated hydrocarbons and solvents      # (FOO1 &amp;amp; FOO2)                                     #                                            #                                               # 
14 #  Heavy Metals, Fe, Cr, Cu, Zn, Ni           # (D006) (D007)                                     #                                            #                                               # 
15 #  Waste solvents, phenols                    # Trichloroethylene (TCE)                           # Xylene, Dichlorobenzene (FOO1) (FOO2)      # Tetrachloroethylene (PCE or "perc.")          # 
16 #  1,1,1-Trichloroethane (FOO1 &amp;amp; FOO2)        # Methylene chloride                                #                                            #                                               # 
17 #  Tetrachloroethylene (PCE or "perc.")       # Trichloroethylene (TCE)                           # 1,1,1-Trichloroethane (TCA)                #                                               # 
18 #  Copper, lead (D008)                        # 1,1,1 Trichloroethane (F002)                      #                                            #                                               # 
19 #  Methylene chloride                         # 1,1,1-Trichloroethane, toluene, xylene,           # Ethyl benzene,  1,2-dichloropropane        # Aluminum, arsenic, copper, cadmium, chromium, # Lead, 1,1- dichloroethene, ethane and zinc
20 #  Inks                                       # Solvents (toluene) (FOO5)                         # Ethylbenzene                               # Ethylacetate                                  # 
21 #  1,1,1-Trichloroethane {(TCA) (F001 waste)} # Tetrachloroethylene  {(PCE or "perc.") (F001)}    #                                            #                                               #                                           
;

 data want(drop=waste:);
  length combined $3000;
  set have;
  combined = catx(':',waste1,waste2,waste3,waste4,waste5);
 run;

data want;
length Chemicals $50;
input Chemicals $;
cards;
Filter Cake-Celite
Carbon
Plasticizer
Chromium sludge
Trichlorethylene
Chromium sludge
Plant Ester Sump 
Residual organics
Ester Sump
Cadmium
Toluene
Trichloroethane 
Plating sludges
Chlorinated Solvents
Toluene
Fuel oil
Gasoline
Solvents
Chlorinated hydrocarbons
Solvents
Heavy Metals
Fe
Cr
Cu
Zn
Ni
Tetrachloroethylene
Copper
Lead
Methylene chrloride
Inks
Trichloroethane
;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 15 Nov 2018 13:36:47 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513303#M138295</guid>
      <dc:creator>Cruise</dc:creator>
      <dc:date>2018-11-15T13:36:47Z</dc:date>
    </item>
    <item>
      <title>Re: Split mixture of strings separated by multiple different delimiters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513306#M138296</link>
      <description>&lt;P&gt;The scan function allows a list of delimiters to be used:&lt;/P&gt;
&lt;P&gt;&lt;A href="http://support.sas.com/documentation/cdl/en/lrdict/64316/HTML/default/viewer.htm#a000214639.htm" target="_blank"&gt;http://support.sas.com/documentation/cdl/en/lrdict/64316/HTML/default/viewer.htm#a000214639.htm&lt;/A&gt;&lt;/P&gt;
&lt;P&gt;E.g.&lt;/P&gt;
&lt;PRE&gt;want=scan(have,1,"!:,; ");&lt;/PRE&gt;</description>
      <pubDate>Thu, 15 Nov 2018 13:43:25 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513306#M138296</guid>
      <dc:creator>RW9</dc:creator>
      <dc:date>2018-11-15T13:43:25Z</dc:date>
    </item>
    <item>
      <title>Re: Split mixture of strings separated by multiple different delimiters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513310#M138297</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/45151"&gt;@RW9&lt;/a&gt;&lt;/P&gt;
&lt;P&gt;Thanks for help. I just tried following which didn't work. What am I doing wrong here? It only outputs one column with the first string, which was for example 'Filter' of 'Filter Cake-Celite'.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;data d.have1(drop=waste:);&lt;BR /&gt; length chemicals $3000;&lt;BR /&gt; set d.have(keep=waste:);&lt;BR /&gt; chemicals = catx(':',waste1,waste2,waste3,waste4,waste5);&lt;BR /&gt; run;&lt;/P&gt;
&lt;P&gt;data d.have2; set d.have1;&lt;BR /&gt;var=scan(chemicals,1,"!:,-;#()");&lt;BR /&gt;run;&lt;/P&gt;</description>
      <pubDate>Thu, 15 Nov 2018 13:56:08 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513310#M138297</guid>
      <dc:creator>Cruise</dc:creator>
      <dc:date>2018-11-15T13:56:08Z</dc:date>
    </item>
    <item>
      <title>Re: Split mixture of strings separated by multiple different delimiters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513311#M138298</link>
      <description>&lt;P&gt;Please use the code window - its the {i} above post, so code formatting is retained.&lt;/P&gt;
&lt;P&gt;You have only told it to take the first string with those delimiter.&amp;nbsp; It should not split the string you give, as space is not part of the delimiter list.&amp;nbsp; If you want to loop over them then you need a loop, this for instance outputs each string block as a new obs:&lt;/P&gt;
&lt;PRE&gt;data d.have2;
  set d.have1;
  do i=1 to countw(chemicals,"!:,-;#()");
    var=scan(chemicals,i,"!:,-;#()");
    output;
  end;
run;
&lt;/PRE&gt;
&lt;P&gt;Or array:&lt;/P&gt;
&lt;PRE&gt;data d.have2;
  set d.have1;&lt;BR /&gt;  num_iter=countw(chemicals,"!:,-;#()");&lt;BR /&gt;  array str{num_iter} $2000;
  do i=1 to num_iter;
    str{i}=scan(chemicals,i,"!:,-;#()");
  end;
run;&lt;/PRE&gt;
&lt;P&gt;Will create variables for each string block.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 15 Nov 2018 14:06:39 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513311#M138298</guid>
      <dc:creator>RW9</dc:creator>
      <dc:date>2018-11-15T14:06:39Z</dc:date>
    </item>
    <item>
      <title>Re: Split mixture of strings separated by multiple different delimiters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513313#M138299</link>
      <description>&lt;P&gt;I will use code window more consistently in the future, however, it sometimes flattens out my code to one line.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Now I understand the logic. But&amp;nbsp;the code example you shared produced quite bizarre result.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Any further hints as to how to correct this further?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="have 1 vs have 2" style="width: 600px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/24935iC95BFA4952A10F8B/image-size/large?v=v2&amp;amp;px=999" role="button" title="bizarre result.png" alt="have 1 vs have 2" /&gt;&lt;span class="lia-inline-image-caption" onclick="event.preventDefault();"&gt;have 1 vs have 2&lt;/span&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 15 Nov 2018 14:10:13 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513313#M138299</guid>
      <dc:creator>Cruise</dc:creator>
      <dc:date>2018-11-15T14:10:13Z</dc:date>
    </item>
    <item>
      <title>Re: Split mixture of strings separated by multiple different delimiters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513315#M138300</link>
      <description>&lt;P&gt;Yes, I know the code window isn't great, its been pointed out to the forum admins.&amp;nbsp; Its better than smiley faces all over though.&lt;/P&gt;
&lt;P&gt;I don't see your issue below.&amp;nbsp; You stipulate the list of delimiters, and it is correctly splitting on them.&amp;nbsp; So obs one is split because - is found, obs 2 to the comma, obs three to next comma.&amp;nbsp; Do you only want it to split on commas?&amp;nbsp; If so only put commas in the list of delimiters.&lt;/P&gt;</description>
      <pubDate>Thu, 15 Nov 2018 14:14:20 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513315#M138300</guid>
      <dc:creator>RW9</dc:creator>
      <dc:date>2018-11-15T14:14:20Z</dc:date>
    </item>
    <item>
      <title>Re: Split mixture of strings separated by multiple different delimiters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513317#M138301</link>
      <description>This is why I said transpose first and then do scan. This approach is harder.</description>
      <pubDate>Thu, 15 Nov 2018 14:16:55 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513317#M138301</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2018-11-15T14:16:55Z</dc:date>
    </item>
    <item>
      <title>Re: Split mixture of strings separated by multiple different delimiters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513333#M138303</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/13879"&gt;@Reeza&lt;/a&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc transpose data=d.have out=d.have_long;
by id;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;NOTE: No variables to transpose.&lt;BR /&gt;NOTE: There were 21 observations read from the data set D.HAVE.&lt;BR /&gt;NOTE: The data set D.HAVE_LONG has 0 observations and 3 variables.&lt;/P&gt;</description>
      <pubDate>Thu, 15 Nov 2018 14:40:16 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513333#M138303</guid>
      <dc:creator>Cruise</dc:creator>
      <dc:date>2018-11-15T14:40:16Z</dc:date>
    </item>
    <item>
      <title>Re: Split mixture of strings separated by multiple different delimiters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513335#M138304</link>
      <description>&lt;P&gt;"&lt;SPAN&gt;NOTE: No variables to transpose." - no var line in procedure call.&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;&lt;A href="http://support.sas.com/documentation/cdl/en/proc/65145/HTML/default/viewer.htm#n1xno5xgs39b70n0zydov0owajj8.htm" target="_blank"&gt;http://support.sas.com/documentation/cdl/en/proc/65145/HTML/default/viewer.htm#n1xno5xgs39b70n0zydov0owajj8.htm&lt;/A&gt;&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 15 Nov 2018 14:42:43 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513335#M138304</guid>
      <dc:creator>RW9</dc:creator>
      <dc:date>2018-11-15T14:42:43Z</dc:date>
    </item>
    <item>
      <title>Re: Split mixture of strings separated by multiple different delimiters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513346#M138306</link>
      <description>&lt;P&gt;Thanks. I finally transposed. But how does it help? I still end up with multiple columns with chemical names. I'm tempted to manually weed out using excel &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc transpose data=d.have out=d.have_long NAME=TRANSPOSED;
VAR WASTE1 WASTE2 WASTE3 WASTE4 WASTE5; 
ID ID;
run;
 &lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Thu, 15 Nov 2018 14:55:24 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513346#M138306</guid>
      <dc:creator>Cruise</dc:creator>
      <dc:date>2018-11-15T14:55:24Z</dc:date>
    </item>
    <item>
      <title>Re: Split mixture of strings separated by multiple different delimiters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513349#M138307</link>
      <description>You should now end up with one column to fix. &lt;BR /&gt;And then you’re using SCAN once.</description>
      <pubDate>Thu, 15 Nov 2018 15:05:22 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513349#M138307</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2018-11-15T15:05:22Z</dc:date>
    </item>
    <item>
      <title>Re: Split mixture of strings separated by multiple different delimiters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513353#M138310</link>
      <description>&lt;P&gt;Well, this demonstrates the basic principal.&amp;nbsp; You haven't mentioned all the rules though, some data is not split by comma, but includes it, some data in brackets is not used, I can only apply rules I know about:&lt;/P&gt;
&lt;PRE&gt;data have;
  length waste1 $600 waste2 $600 waste3 $600 waste4 $600 waste5 $600;
  infile datalines dlm='#'; 
  input id waste1 $ waste2 $ waste3 $ waste4 $ waste5 $;
datalines;
1  #  Filter Cake-Celite, Carbon, Plasticizer    # (60% by wt) Toluene, Sulfonic                     # acid catalyst                       # Plasticizers, consisting of 26 TM           # Trimillitate and DOA Adipate Plasticizers                             
2  #  Chromium sludge                            # Paint sludge                                      # Metals                                     # Chlorinated solvents                          #   
3  #  Trichlorethylene (F001)                    # Tetrachloroethene {(PCE or "perc.") F002}         # 1,1,1-Trichloroethane                      # Cyanide (F007)                                # Arsenic  (D003)
4  #  Chromium sludge                            # Paint sludge                                      # Metals                                     # Chlorinated solvents                          # 
5  #  Plant #1 Ester Sump: Residual organics.    # 2 Ethylhexanol, alcohols, diethylene glycol,      # ethylene glycol, plasticizer, adipic acid, # polyester                                     # Plant #2 polmyer solids
6  #  Cadmium, chromium, dichlorobenzene,        # Tetrachloroethylene. dieldrin.                    # Plating and painting wastes                #                                               # 
7  #  Toluene                                    # Xylene                                            # Ethyl Benzene                              #                                               # 
8  #  1,1,1-Trichloroethane {TCA (F001)}         #         
;
run;

proc transpose data=have out=want;
  by id;
  var waste:;
run;

data want;
  set want;
  length wrd $2000;
  do i=1 to countw(col1,",");
    wrd=scan(col1,i,",");
    output;
  end;
run;&lt;/PRE&gt;</description>
      <pubDate>Thu, 15 Nov 2018 15:06:55 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513353#M138310</guid>
      <dc:creator>RW9</dc:creator>
      <dc:date>2018-11-15T15:06:55Z</dc:date>
    </item>
    <item>
      <title>Re: Split mixture of strings separated by multiple different delimiters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513361#M138314</link>
      <description>Thanks RW9 and Reeza!!!</description>
      <pubDate>Thu, 15 Nov 2018 15:24:21 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Split-mixture-of-strings-separated-by-multiple-different/m-p/513361#M138314</guid>
      <dc:creator>Cruise</dc:creator>
      <dc:date>2018-11-15T15:24:21Z</dc:date>
    </item>
  </channel>
</rss>

