<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Extracting digits with hyphens from character text in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Extracting-digits-with-hyphens-from-character-text/m-p/630469#M186654</link>
    <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/86015"&gt;@Caetreviop543&lt;/a&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Just to be sure to understand well, could you please show the expected output?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Best,&lt;/P&gt;</description>
    <pubDate>Sun, 08 Mar 2020 08:21:23 GMT</pubDate>
    <dc:creator>ed_sas_member</dc:creator>
    <dc:date>2020-03-08T08:21:23Z</dc:date>
    <item>
      <title>Extracting digits with hyphens from character text</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extracting-digits-with-hyphens-from-character-text/m-p/630467#M186652</link>
      <description>&lt;P&gt;I have some messy prescription data that looks like this:&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;Dose
15 mg
10-18-27 mg / mL
20-8 5 mg / 300 ml
12 (600 mgc spray)
130 (drugname)
12mg - 10 mg - 12 mg
5mg-10mg-6mg
10 mg (nmn)
12-10 mg / ml&lt;BR /&gt;10-10-20&amp;nbsp;30&amp;nbsp;mg&amp;nbsp;/&amp;nbsp;ml&lt;BR /&gt;10-20-&amp;nbsp;500&amp;nbsp;mg/mL&lt;BR /&gt;2-30-7-10-30&amp;nbsp;mg&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;My goal is to extract the first number, or if available, subsequent numbers after the hyphen. So I want to get rid of the characters, including /, as well as the number corresponding to the mg dosage (e.g, 5 mg), which follows a space, not a hyphen. I tried using prxchange, but couldn't come up with an expression which a) encompassed all the different cases and, b) understood that the 500 after 10-20- is part of 10-20- 500, not 500 mg/mL (it deleted 500 mg/mL, leaving 10-20-).&amp;nbsp; The desired output is:&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;Dose
15
10-18-27
20-8
12
130
12 - 10 - 12
5-10-6
10
12-10
10-10-20
10-20- 500&lt;BR /&gt;2-30-7-10-30&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I appreciate any advice!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 08 Mar 2020 14:37:37 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extracting-digits-with-hyphens-from-character-text/m-p/630467#M186652</guid>
      <dc:creator>Caetreviop543</dc:creator>
      <dc:date>2020-03-08T14:37:37Z</dc:date>
    </item>
    <item>
      <title>Re: Extracting digits with hyphens from character text</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extracting-digits-with-hyphens-from-character-text/m-p/630469#M186654</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/86015"&gt;@Caetreviop543&lt;/a&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Just to be sure to understand well, could you please show the expected output?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Best,&lt;/P&gt;</description>
      <pubDate>Sun, 08 Mar 2020 08:21:23 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extracting-digits-with-hyphens-from-character-text/m-p/630469#M186654</guid>
      <dc:creator>ed_sas_member</dc:creator>
      <dc:date>2020-03-08T08:21:23Z</dc:date>
    </item>
    <item>
      <title>Re: Extracting digits with hyphens from character text</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extracting-digits-with-hyphens-from-character-text/m-p/630493#M186660</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/86015"&gt;@Caetreviop543&lt;/a&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;This is diifficult because a number without a preceding or following hyphen can have a different meaning dependent on the context. I have tried to build a sequence of functions that handles your test input. But there might be other combinations in a full data set where it wouldn't work.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
	infile cards truncover;
	input Dose $char100.;
	cards;
15 mg
10-18-27 mg / mL
20-8 5 mg / 300 ml
12 (600 mgc spray)
130 (drugname)
12mg - 10 mg - 12 mg
5mg-10mg-6mg
10 mg (nmn)
12-10 mg / ml10-10-20 30 mg / ml10-20- 500 mg/mL2-30-7-10-30 mg
;
run;

data want (drop= Dose i); set have;
	length Dose_Clean $50;

	do i = 1 to count(Dose,'/')+1;

		* Split string in blocks separated by slash;
		Dose_Clean = scan(Dose,i,'/');

		* remove everything in parantheses before further tests and replacements;
		Dose_Clean = prxchange('s/\(.{1,}\)//i', -1, Dose_Clean);

		* IF the block is number one in a line OR contains the abbreviation "mg"...;
		if i = 1 or index(lowcase(Dose_Clean),'mg') then do;

			* remove all letters;
			Dose_Clean = compbl(compress(Dose_Clean,,'UL'));

			* remove spaces before and after a hyphen;
			Dose_Clean = tranwrd(Dose_Clean,' -','-');
			Dose_Clean = tranwrd(Dose_Clean,'- ','-');

			* keep only first "word" in block;
			Dose_Clean = scan(Dose_Clean,1,' ');
			if Dose_Clean ne '' then output;
		end;
	end;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 08 Mar 2020 16:23:34 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extracting-digits-with-hyphens-from-character-text/m-p/630493#M186660</guid>
      <dc:creator>ErikLund_Jensen</dc:creator>
      <dc:date>2020-03-08T16:23:34Z</dc:date>
    </item>
    <item>
      <title>Re: Extracting digits with hyphens from character text</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extracting-digits-with-hyphens-from-character-text/m-p/630499#M186664</link>
      <description>&lt;P&gt;That worked for my data, thanks!&lt;/P&gt;</description>
      <pubDate>Sun, 08 Mar 2020 17:13:42 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extracting-digits-with-hyphens-from-character-text/m-p/630499#M186664</guid>
      <dc:creator>Caetreviop543</dc:creator>
      <dc:date>2020-03-08T17:13:42Z</dc:date>
    </item>
    <item>
      <title>Re: Extracting digits with hyphens from character text</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extracting-digits-with-hyphens-from-character-text/m-p/630673#M186728</link>
      <description>&lt;P&gt;I discovered another way using prx expressions:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data new;
set old;

*get rid of the word mg;
dose_new=compbl(compress(dose, "mg"));

*delete space between hyphens;
dose_new=tranwrd(dose_new, " -", "-";
dose_new=tranwrd(dose_new, "- ", "-";

*prx expression to locate digit string. ? means character or digit are optional, \ before special characters, such as period. Reads as: optional decimal, followed by digit of any length, followed by optional comma,&lt;BR /&gt;followed by a digit of any length followed by an optional hyphen; repeat sequence;
&lt;BR /&gt;digit_string=prxparse("/\.?\d*,?\d*-?\.?\d*,?\d*-?\.?\d*,?\d*-?\.?\d*,?\d*-?\.?\d*,?\d*-?\.?\d*,?\d*-?\.?\d*,?\d*-?\.?\d*,?\d*/");

*extract digit string based on prx expression using substring function;&lt;BR /&gt;*create two new variables, position and length of digit variable;
&lt;BR /&gt;call prxsubstr(digit_string, dose_new, position, length);&lt;BR /&gt;&lt;BR /&gt;*extract digit string based on position and length;
new = substrn(dose_new, position, length);
length dose_new $32.;
run;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 09 Mar 2020 16:22:46 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extracting-digits-with-hyphens-from-character-text/m-p/630673#M186728</guid>
      <dc:creator>Caetreviop543</dc:creator>
      <dc:date>2020-03-09T16:22:46Z</dc:date>
    </item>
  </channel>
</rss>

