data want;
set have;
length out_strand $30;
length cur_strand cur_base $1;
do _i = 1 to countw(dna);
cur_strand = scan(dna,_i,'-'); *identify the current base we are looking at;
do strand_count = 0 by 1 until (cur_strand ne cur_base); *iterate over the scans to find the next nonmatch;
cur_base = scan(dna,strand_count+_i,'-');
end;
*compose the output string, checking to see if we need to append the number if >1 or not if =1;
out_strand = catx('-',out_strand,cats(ifc(strand_count>1,strand_count,''),cur_strand));
_i = strand_count+_i-1; *have to decrement one, since we go one past the match;
end;
run;
This should work for what you need; basically you scan over the string and keep scanning until you reach a non-match.
... View more