one way to do this.
\w+ =alphabet or number or underscore
(?=) is zero length lookahead assertion which means just see what is next to word of interest in our case it is \w+ which can be alphabet or number or underscore
(?=\sice cream) -- find the word with space and icecream
[0-9\.]+ is number or dot
(\sscoops)/') -- find the word with space and scoops
data have;
length id 8. comments $500.;
infile datalines dlm='09'x;
input ID Comments $ ;
datalines;
1 On Monday I had 1.5 scoops of vanilla ice cream, Tuesday we had 2 scoops of chocolate ice cream, one day we also had 3 scoops of strawberry ice cream, but I can't remember when.
2 Over the week I had 4 scoops of raspberry ice cream, 3 scoops of vanilla ice cream
3 On 1/20/2019, I had 3 scoops of chocolate ice cream.
;
data have1(keep= id icecream amount);
length val val1 icecream amount $4200.;
set have;
start = 1;
stop = length(comments);
re = prxparse('/\w+(?=\sice cream)/');
set have;
call prxnext(re, start, stop, trim(comments), position, length);
do while (position > 0 );
val = substr(comments, position, length);
icecream = catx(" ", icecream, val);
call prxnext(re, start, stop, trim(comments), position, length);
end;
set have;
start1 = 1;
stop1 = length(comments);
re1 = prxparse('/[0-9\.]+(?=\sscoops)/');
call prxnext(re1, start1, stop1, trim(comments), position1, length1);
do while (position1 > 0);
val1 = substr(comments, position1, length1);
amount = catx(" ", amount, val1);
call prxnext(re1, start1, stop1, trim(comments), position1, length1);
end;
run;
proc sql noprint;
select cats("ice", max(countw(icecream," "))),
cats("amount", max(countw(amount," ")))
into :maxice, :maxamount
from have1;
data want;
set have1;
length ice1-&maxice $10.;
array ice[*] $ ice1-&maxice;
array amounts[*] amount1-&maxamount;
do i = 1 to countw(icecream," ");
do j = 1 to countw(amount," ");
ice[i] = scan(icecream, i, ' ');
amounts[j] = scan(amount, j, ' ');
end;
end;
drop i j amount icecream;
run;
proc print;
Obs
id
ice1
ice2
ice3
amount1
amount2
amount3
1
1
vanilla
chocolate
strawberry
1.5
2
3
2
2
raspberry
vanilla
4.0
3
.
3
3
chocolate
3.0
.
.
... View more