Art, thanks for your reply. I've attached the html file. Here is the code for the first program: data MMMdata. uberdrivermessagepagetest; filename fileref url 'https://forum.mrmoneymustache.com/antimustachian-wall-of-shame-and-comedy/uber-driver/'; infile fileref length=len; input record $varying10000. len; file "c:\MMM\uberdrivermessagepagetest.txt" mod lrecl=10000; put record; *avatarflag=0; if find(record,'<div class="smalltext">« <strong>Reply #') ne 0 then do; startreplynum=find(record,'<div class="smalltext">« <strong>Reply #')+45; endreplynum=find(record,' on:'); lengthreplynum=endreplynum-startreplynum; replynum=substr(record,startreplynum,lengthreplynum); replynumcheck=1; end; if find(record,'<li class="avatar">') then avatarflag = 1; if find(record,'title="View the profile of ') ne 0 then do; startposter=find(record,'">')+2; endposter=find(record,'</a>'); lengthposter=endposter-startposter; poster=substr(record,startposter,lengthposter); postercheck=1; end; if find(record,'<li class="postgroup">') ne 0 then do; startstache=find(record,'li class="postgroup">')+21; endstache=find(record,'</li>'); lengthstache=endstache-startstache; stache=substr(record,startstache,lengthstache); stachecheck=1; end; if find(record,'<li class="postcount">Location: ') ne 0 then do; startlocation=find(record,'Location: ')+10; endlocation=find(record,'</li>'); lengthlocation=endlocation-startlocation; location=substr(record,startlocation,lengthlocation); locationcheck=1; end; if find(record,'<li class="postcount">Posts: ') ne 0 then do; startnumposts=find(record,'Posts: ')+7; endnumposts=find(record,'</li>'); lengthnumposts=endnumposts-startnumposts; numposts=substr(record,startnumposts,lengthnumposts); numpostscheck=1; end; if find(record,'<div class="smalltext">« <strong>') ne 0 then do; startpostdatetime=find(record,'on:</strong> ')+13; endpostdatetime=find(record,' &#'); lengthpostdatetime=endpostdatetime-startpostdatetime; postdatetime=substr(record,startpostdatetime,lengthpostdatetime); postdatetimecheck=1; end; if find(record,'<div class="signature" ') ne 0 then do; startsignature=find(record,'<br /><br />')+12; endsignature=find(record,'</div>'); lengthsignature=endsignature-startsignature; signature=substr(record,startsignature,lengthsignature); signaturecheck=1; end; if find(record,'<li class="postcount">Age: ') ne 0 then do; startage=find(record,'Age: ')+5; endage=find(record,'</li>'); lengthage=endage-startage; age=substr(record,startage,lengthage); agecheck=1; end; if find(record, '<div class="postarea">') then endcheck=1; run; And here is the code for the retain program: data MMMdata. uberdriverclean; set MMMdata. uberdrivermessagepagetest; format replynumtemp best12. avatarflagtemp best12.; retain replynumtemp avatarflagtemp postertemp stachetemp locationtemp numpoststemp postdatetimetemp signaturetemp agetemp; final=0; format replynumtemp best12. avatarflagtemp best12.; retain replynumtemp avatarflagtemp postertemp stachetemp locationtemp numpoststemp postdatetimetemp signaturetemp agetemp; final=0; if replynum ne "" then replynumtemp=replynum; if avatarflag ne "" then avatarflagtemp=avatarflag; if poster ne "" then postertemp=poster; if stache ne "" then stachetemp=stache; if location ne "" then locationtemp=location; if numposts ne "" then numpoststemp=numposts; if postdatetime ne "" then postdatetimetemp=postdatetime; if signature ne "" then signaturetemp=signature; if age ne "" then agetemp=age; if endcheck ne "" then do; endchecktemp=endchek; final=1; end; if final=0 then delete; keep replynumtemp avatarflagtemp postertemp stachetemp locationtemp numpoststemp postdatetimetemp signaturetemp agetemp; run; Thank you!
... View more