<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: split several 'company name' variable in new variables based on brackets, quotation marks, etc. in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/split-several-company-name-variable-in-new-variables-based-on/m-p/547472#M151717</link>
    <description>&lt;P&gt;You can apply the same logic as in your previous thread on each variable and merge like this&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
input NAME1:$100. NAME2:$100. NAME_SHORT:$100. Previous_name:$100.;
infile datalines dlm="," missover;
datalines;
JUICE&amp;lt;BR&amp;gt;apple[footer],  HARDY(FRNS.)'A',  HARDY,  HARDY
juice &amp;lt;BR&amp;gt; apple,  juice (BR) apple, juice (BR) apple, juice (BR) apple 
juice&amp;lt;BODY&amp;gt; 'apple', juice(BODY), juice(BODY), juice(BODY)
&amp;lt;figure&amp;gt; "juice" LTD, ,"juice" LTD, "juice" LTD
ABB (ASEA BROWN BOVERI)
;
run;

data one;
   format NAME1 NAME1_inB NAME1_noB;
   set have;
   RegExID = prxparse('/&amp;lt;.*&amp;gt;|\[.*\]|\(.*\)|".*"|''.*''/');
   start=1;
   call prxnext(RegExID, start, length(NAME1), NAME1, pos, length);
   NAME1_noB=prxchange('s/&amp;lt;.*&amp;gt;|\[.*\]|\(.*\)|".*"|''.*''/ /', -1, NAME1);
      do while (pos &amp;gt; 0);
         NAME1_inB = substr(NAME1, pos+1, length-2);
         output;
         call prxnext(RegExID, start, length(NAME1), NAME1, pos, length);
      end;
   keep NAME1 NAME1_inB NAME1_noB;
run;

data two;
   format NAME2 NAME2_inB NAME2_noB;
   set have;
   RegExID = prxparse('/&amp;lt;.*&amp;gt;|\[.*\]|\(.*\)|".*"|''.*''/');
   start=1;
   call prxnext(RegExID, start, length(NAME2), NAME2, pos, length);
   NAME2_noB=prxchange('s/&amp;lt;.*&amp;gt;|\[.*\]|\(.*\)|".*"|''.*''/ /', -1, NAME2);
      do while (pos &amp;gt; 0);
         NAME2_inB = substr(NAME2, pos+1, length-2);
         output;
         call prxnext(RegExID, start, length(NAME2), NAME2, pos, length);
      end;
   keep NAME2 NAME2_inB NAME2_noB;
run;

data want;
   merge one two;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
    <pubDate>Sun, 31 Mar 2019 07:48:14 GMT</pubDate>
    <dc:creator>PeterClemmensen</dc:creator>
    <dc:date>2019-03-31T07:48:14Z</dc:date>
    <item>
      <title>split several 'company name' variable in new variables based on brackets, quotation marks, etc.</title>
      <link>https://communities.sas.com/t5/SAS-Programming/split-several-company-name-variable-in-new-variables-based-on/m-p/547437#M151704</link>
      <description>&lt;P&gt;Dear all,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have 'company names' dataset. For each company, I have four different company names (i.g., NAME1, NAME2, NAME_SHORT, Previous_name) and expect to process each of them. for each 'company name' variable, I expect to&amp;nbsp;&lt;SPAN&gt;find all strings between (),[],and {} (such as &amp;lt;BR&amp;gt;, [FONT],{BODY},'A',"JUICE") and split them in a new variable (i.e., &amp;amp;COMPANY_NAME._inB, and &amp;amp;COMPANY_NAME._noB).&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;for example&amp;nbsp;&lt;/P&gt;&lt;P&gt;table a&amp;nbsp;&lt;/P&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;no&lt;/TD&gt;&lt;TD&gt;NAME1&lt;/TD&gt;&lt;TD&gt;NAME2&lt;/TD&gt;&lt;TD&gt;NAME_SHORT&lt;/TD&gt;&lt;TD&gt;Previous_name&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;JUICE&amp;lt;BR&amp;gt;apple[footer]&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;HARDY(FRNS.)'A'&lt;/TD&gt;&lt;TD&gt;HARDY&lt;/TD&gt;&lt;TD&gt;HARDY&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;juice &amp;lt;BR&amp;gt; apple&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;juice (BR) apple&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;juice (BR) apple&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;juice (BR) apple&amp;nbsp;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;3&lt;/TD&gt;&lt;TD&gt;juice&amp;lt;BODY&amp;gt; 'apple'&lt;/TD&gt;&lt;TD&gt;juice(BODY)&lt;/TD&gt;&lt;TD&gt;juice(BODY)&lt;/TD&gt;&lt;TD&gt;juice(BODY)&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;TD&gt;&amp;lt;figure&amp;gt; "juice" LTD&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;"juice" LTD&lt;/TD&gt;&lt;TD&gt;"juice" LTD&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;by using 'NAME1' and 'NAME2' as an example, I expect to get&amp;nbsp;&lt;/P&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;NAME1&lt;/TD&gt;&lt;TD&gt;NAME1_inB&lt;/TD&gt;&lt;TD&gt;NAME1_noB&lt;/TD&gt;&lt;TD&gt;NAME2&lt;/TD&gt;&lt;TD&gt;NAME2_inB&lt;/TD&gt;&lt;TD&gt;NAME2_noB&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;JUICE&amp;lt;BR&amp;gt;apple[footer]&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;BR&lt;/TD&gt;&lt;TD&gt;JUICE apple&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;HARDY(FRNS.)'A'&lt;/TD&gt;&lt;TD&gt;FRNS.&lt;/TD&gt;&lt;TD&gt;HARDY&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;JUICE&amp;lt;BR&amp;gt;apple[footer]&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;footer&lt;/TD&gt;&lt;TD&gt;JUICE apple&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;HARDY(FRNS.)'A'&lt;/TD&gt;&lt;TD&gt;A&lt;/TD&gt;&lt;TD&gt;HARDY&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;juice &amp;lt;BR&amp;gt; apple&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;BR&lt;/TD&gt;&lt;TD&gt;juice&amp;nbsp; apple&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;juice (BR) apple&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;BR&lt;/TD&gt;&lt;TD&gt;juice apple&amp;nbsp;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;3&lt;/TD&gt;&lt;TD&gt;juice&amp;lt;BODY&amp;gt; 'apple'&lt;/TD&gt;&lt;TD&gt;BODY&lt;/TD&gt;&lt;TD&gt;juice&lt;/TD&gt;&lt;TD&gt;juice(BODY)&lt;/TD&gt;&lt;TD&gt;BODY&lt;/TD&gt;&lt;TD&gt;juice&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;3&lt;/TD&gt;&lt;TD&gt;juice&amp;lt;BODY&amp;gt; 'apple'&lt;/TD&gt;&lt;TD&gt;apple&lt;/TD&gt;&lt;TD&gt;juice&lt;/TD&gt;&lt;TD&gt;juice(BODY)&lt;/TD&gt;&lt;TD&gt;BODY&lt;/TD&gt;&lt;TD&gt;juice&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;TD&gt;&amp;lt;figure&amp;gt; "juice" LTD&lt;/TD&gt;&lt;TD&gt;figure&lt;/TD&gt;&lt;TD&gt;LTD&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;TD&gt;&amp;lt;figure&amp;gt; "juice" LTD&lt;/TD&gt;&lt;TD&gt;juice&lt;/TD&gt;&lt;TD&gt;LTD&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
input NAME1 $100. NAME2 $100. NAME_SHORT $100. Previous_name $100.;
infile datalines dlm="," missover;
datalines;
JUICE&amp;lt;BR&amp;gt;apple[footer],  HARDY(FRNS.)'A',  HARDY,  HARDY
juice &amp;lt;BR&amp;gt; apple,  juice (BR) apple, juice (BR) apple, juice (BR) apple&amp;nbsp;
juice&amp;lt;BODY&amp;gt; 'apple', juice(BODY), juice(BODY), juice(BODY)
&amp;lt;figure&amp;gt; "juice" LTD, ,"juice" LTD, "juice" LTD
;
run;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Edit:&lt;/P&gt;&lt;P&gt;at the current stage, I can find and split the variable by the code,&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;Data step9.Patstat_gb_hrm_Step3;
Set step9.Patstat_gb_hrm_Step23;
Run;

%MACRO CompnayNameM(no=,Company_name=);

data step9.Patstat_gb_hrm_Step3;
   set step9.Patstat_gb_hrm_Step3;
   RegExID = prxparse('/&amp;lt;\w*&amp;gt;|\[\w*\]|\(\w*\)|\(\w*\)|"\w*"|''\w*''/');
   start=1;
   stop=length(&amp;amp;COMPANY_NAME._Step23);
   call prxnext(RegExID, start, stop, &amp;amp;COMPANY_NAME._Step23, pos, length);
      do while (pos &amp;gt; 0);
         &amp;amp;COMPANY_NAME._inB = substr(&amp;amp;COMPANY_NAME._Step23, pos+1, length-2);
         &amp;amp;COMPANY_NAME._noB = prxchange('s/&amp;lt;\w*&amp;gt;|\[\w*\]|\(\w*\)|\(\w*\)|"\w*"|''\w*''/ /', -1, &amp;amp;COMPANY_NAME._Step23);
         call prxnext(RegExID, start, stop, &amp;amp;COMPANY_NAME._Step23, pos, length);
      end;
	  drop RegExID pos length start stop;
run;

proc sql;
create table PATSTAT&amp;amp;no. as
select distinct
&amp;amp;COMPANY_NAME.,
&amp;amp;COMPANY_NAME._Step23,
&amp;amp;COMPANY_NAME._inB,
&amp;amp;COMPANY_NAME._noB
from step9.Patstat_gb_hrm_Step3
where &amp;amp;COMPANY_NAME._inB ne ''
;
quit;

%MEND CompnayNameM;

%CompnayNameM(no=1,Company_name=HRM_L2)
%CompnayNameM(no=2,Company_name=PERSON_NAME)
run;

&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;however, the value like,&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;3M INNOVATIVE PROPERTIES COMPANY (MINNESOTA MINING  &amp;amp;  MANUFACTURING INNOVATIVE PROPERTIES COMPANY)&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;or&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;3RD ANGLE (U.K.)&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;or&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;ABB (ASEA BROWN BOVERI)&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;cannot be processed. could you give me some suggestions about this?&lt;/P&gt;</description>
      <pubDate>Sun, 31 Mar 2019 00:25:16 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/split-several-company-name-variable-in-new-variables-based-on/m-p/547437#M151704</guid>
      <dc:creator>Alexxxxxxx</dc:creator>
      <dc:date>2019-03-31T00:25:16Z</dc:date>
    </item>
    <item>
      <title>Re: split several 'company name' variable in new variables based on brackets, quotation marks, etc.</title>
      <link>https://communities.sas.com/t5/SAS-Programming/split-several-company-name-variable-in-new-variables-based-on/m-p/547472#M151717</link>
      <description>&lt;P&gt;You can apply the same logic as in your previous thread on each variable and merge like this&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
input NAME1:$100. NAME2:$100. NAME_SHORT:$100. Previous_name:$100.;
infile datalines dlm="," missover;
datalines;
JUICE&amp;lt;BR&amp;gt;apple[footer],  HARDY(FRNS.)'A',  HARDY,  HARDY
juice &amp;lt;BR&amp;gt; apple,  juice (BR) apple, juice (BR) apple, juice (BR) apple 
juice&amp;lt;BODY&amp;gt; 'apple', juice(BODY), juice(BODY), juice(BODY)
&amp;lt;figure&amp;gt; "juice" LTD, ,"juice" LTD, "juice" LTD
ABB (ASEA BROWN BOVERI)
;
run;

data one;
   format NAME1 NAME1_inB NAME1_noB;
   set have;
   RegExID = prxparse('/&amp;lt;.*&amp;gt;|\[.*\]|\(.*\)|".*"|''.*''/');
   start=1;
   call prxnext(RegExID, start, length(NAME1), NAME1, pos, length);
   NAME1_noB=prxchange('s/&amp;lt;.*&amp;gt;|\[.*\]|\(.*\)|".*"|''.*''/ /', -1, NAME1);
      do while (pos &amp;gt; 0);
         NAME1_inB = substr(NAME1, pos+1, length-2);
         output;
         call prxnext(RegExID, start, length(NAME1), NAME1, pos, length);
      end;
   keep NAME1 NAME1_inB NAME1_noB;
run;

data two;
   format NAME2 NAME2_inB NAME2_noB;
   set have;
   RegExID = prxparse('/&amp;lt;.*&amp;gt;|\[.*\]|\(.*\)|".*"|''.*''/');
   start=1;
   call prxnext(RegExID, start, length(NAME2), NAME2, pos, length);
   NAME2_noB=prxchange('s/&amp;lt;.*&amp;gt;|\[.*\]|\(.*\)|".*"|''.*''/ /', -1, NAME2);
      do while (pos &amp;gt; 0);
         NAME2_inB = substr(NAME2, pos+1, length-2);
         output;
         call prxnext(RegExID, start, length(NAME2), NAME2, pos, length);
      end;
   keep NAME2 NAME2_inB NAME2_noB;
run;

data want;
   merge one two;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Sun, 31 Mar 2019 07:48:14 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/split-several-company-name-variable-in-new-variables-based-on/m-p/547472#M151717</guid>
      <dc:creator>PeterClemmensen</dc:creator>
      <dc:date>2019-03-31T07:48:14Z</dc:date>
    </item>
  </channel>
</rss>

