I have a txt file which has the data in the below format.
#FILE_HEADER# MOnth|1901124578965
#MEMBERSHIP_PERSON# 9999944999|0009999|05|ABC|F|Medium|REGN|||07/Jun/2017|Pay Pal|||N|DISTIBUTOR|CURRENT|01|Y|Annual|2017|EXISTING|01/Jan/1901
#ADDRESS_LINE# 0059999999|Mr|Test||Test||M|03/Jan/1901|009999900|Online|abc00@abc.com.au|N|N|00244444||11 ABC Street|||SUBURB|3003|SA|Australia|N|N|00999900|Email|abc00@abc.com.au|N|N|||||||Y|Y|0000000001|YZx|0000000001|ZYX|PERSON|N|0000000000||||||0000000000|||||
#PAYMENT_LINE# 1234|123|35|First Last
#PAYMENT_LINE# 123|123|12|First Last
#MLS# 123|123
#SOC_YHC# 00099999|First||Last|01|03/Jan/1901|01/Jan/1901|30|00000|01/Jul/1901|00|C|01/Jul/1901|Y|
#SOC_YHC# 00099999|First|F|Last|02|03/Jan/1901|01/Jan/1901|30|00000|01/Jul/1901|00|C|01/Jul/1901|Y|
#SOC_MEMBERSHIP# Mr|First||Last|12 ABC Street|SUBURB SA 3003|||3003|C|Couples|996|PRODUCT||01/O1/1901|30.256%|225|211|145
#SIS# N
#FILE_FOOTER# 1901eExtractionOptIn_0000.txt|12345|1901124578965
The headers are not in the file.The file layout is below: The data provided above is in between the file header and file footer, and there are multiple rows of data between Header and Footer .
Every records starts with #MEMBERSHIP_PERSON. The end goal is to convert this data into a table (columns as below and row data as above)
#FILE_HEADER# <TAXGENERATIONMODE>|<BATCHPROCESSSTARTDATETIME>
#MEMBERSHIP_PERSON# <STATEMENT ID>|<MEMBERSHIP NUMBER>|<FUND NO>|<HEALTH FUND ID>|<MEMBERSHIP STATUS>|<STATEMENT COMPLEXITY>|<MEMBERSHIP REBATE STATUS>|<REBATE TIER EFFECTIVE DATE>|<REBATE TIER>|<DATE PAID TO>|<PAYMENT METHOD>|<ORGX TYPE>|<ORGX GROUP DPT>|<REBATE TYPE>|<PERSON CATEGORY>|<PERSON STATUS>|<PERSON SUFFIX NUMBER>|<MULTIPLE ADULTS>|<STATEMENT_TYPE>|<STATEMENT _YEAR>|<STATEMEMT_FREQ>|<STATEMENT GENERATE DATE>
#ADDRESS_LINE# <PERSON TITLE>|<PERSON GIVEN NAME>|<PERSON INITIAL>|<PERSON SURNAME>|<PERSON PREFERRED NAME>|<PERSON GENDER>|<PERSON DOB>|<MAILING CONTACT ID>|<MAILING CONTACT TYPE>|<MAILING CONTACT>|<ADDRESS ID>|<ADDRESSEE>|<ADDRESS LINE 1>|<ADDRESS LINE 2>|<ADDRESS LINE 3>|<CITY/TOWN>|<POSTCODE>|<STATE/PROVINCE/REGION>|<COUNTRY>|<ADDRESS UNKNOWN>|<ADDRESS INVALID>|<ONLINE NOTIFICATION CONTACT ID>|<ONLINE NOTIFICATION CONTACT TYPE>|<ONLINE NOTIFICATION CONTACT>|<ONLINE NOTIFICATION CONTACT UNKNOWN>|<ONLINE NOTIFICATION CONTACT INVALID>|<COURTESY NOTIFICATION CONTACT ID>|<COURTESY NOTIFICATION TYPE>|<COURTESY NOTIFICATION CONTACT>|<COURTESY NOTIFICATION CONTACT UNKNOWN>|<COURTESY NOTIFICATION CONTACT INVALID>|<COMBINED AUTHORITY>|<COMPANY REGISTRATION>|<COMMS CATEGORY>|<COMMS CATEGORY ENTITY>|<MEMBERSHIP STATUS>|<PERSON STATUS>
#PAYMENT_LINE# <PREMIUMS PAID>|<REBATE RECEIVED>|<REBATE CODE>|<OTHER BENEFICIARY>
#MLS# <TOTAL HOSPITAL DAYS>|<TOTAL HOLIDAY DAYS>
#SOC_LHC# <MEMBERSHIP NUMBER>|<GIVEN>|<INITIAL>|<SURNAME>|<SUFFIX>|<DOB>|<START-DATE>|<CERT-ENT-AGE>|<TOT-ABSENT-DAYS>|<ABSENT-DAYS-AS-AT>|<LIFE-LOADING>|<CONFIRM-IND>|<ISSUE-DATE>|<PROJ-AB-DAYS>|<BABYCARE EXCESS WAIVER FLAG>
#SOC_MEMBERSHIP# <TITLE>|<GIVEN>|<INITIAL>|<SURNAME>|<ADDRESS LINE 1>|<ADDRESS LINE 2>|<ADDRESS LINE 3>|<ADDRESS LINE 4>|<POSTCODE>|<MEMB-CODE>|<MEMB-TYPE LMEMTP>|<PACKAGE-CODE>|<PACKAGE-DESC>|<HOSP-CORE-CODE LHSPCC>|<COVER-DATE-FROM>|<CURR-REBATE>|<HOSP-OPT-ID>|<ANC-OPT-ID>|<NP-OPT-ID>
#SIS# <HAS SIS FLAG>
#FILE_FOOTER# <FILENAME>|<TOTALRECORDSCOUNT>|<BATCHPROCESSFINISHDATETIME>
Any ideas on this would be great .
I am using SAS EG to get this working.
Like this?
data _null_;
file "%sysfunc(pathname(work))\t.txt";
put '#FILE_HEADER# MOnth|1901124578965 ';
put '#MEMBERSHIP_PERSON# 9999944999|0009999|05|ABC|F|Medium|REGN|||07/Jun/2017|Pay Pal|||N|DISTIBUTOR|CURRENT|01|Y|Annual|2017|EXISTING|01/Jan/1901 ';
put '#ADDRESS_LINE# 0059999999|Mr|Test||Test||M|03/Jan/1901|009999900|Online|abc00@abc.com.au|N|N|00244444||11 ABC Street|||SUBURB|3003|SA|Australia|N|N|00999900|Email|abc00@abc.com.au|N|N|||||||Y|Y|0000000001|YZx|0000000001|ZYX|PERSON|N|0000000000||||||0000000000|||||';
put '#PAYMENT_LINE# 1234|123|35|First Last';
put '#PAYMENT_LINE# 123|123|12|First Last ';
put '#MLS# 123|123 ';
put '#SOC_YHC# 00099999|First||Last|01|03/Jan/1901|01/Jan/1901|30|00000|01/Jul/1901|00|C|01/Jul/1901|Y| ';
put '#SOC_YHC# 00099999|First|F|Last|02|03/Jan/1901|01/Jan/1901|30|00000|01/Jul/1901|00|C|01/Jul/1901|Y| ';
put '#SOC_MEMBERSHIP# Mr|First||Last|12 ABC Street|SUBURB SA 3003|||3003|C|Couples|996|PRODUCT||01/O1/1901|30.256%|225|211|145 ';
put '#SIS# N ';
put '#FILE_HEADER# MOnth|1901124578965 ';
put '#MEMBERSHIP_PERSON# 9999944999|0009999|05|ABC|F|Medium|REGN|||07/Jun/2017|Pay Pal|||N|DISTIBUTOR|CURRENT|01|Y|Annual|2017|EXISTING|01/Jan/1901 ';
put '#ADDRESS_LINE# 0059999999|Mr|Test||Test||M|03/Jan/1901|009999900|Online|abc00@abc.com.au|N|N|00244444||11 ABC Street|||SUBURB|3003|SA|Australia|N|N|00999900|Email|abc00@abc.com.au|N|N|||||||Y|Y|0000000001|YZx|0000000001|ZYX|PERSON|N|0000000000||||||0000000000|||||';
put '#PAYMENT_LINE# 1234|123|35|First Last';
put '#PAYMENT_LINE# 123|123|12|First Last ';
put '#MLS# 123|123 ';
put '#SOC_YHC# 00099999|First||Last|01|03/Jan/1901|01/Jan/1901|30|00000|01/Jul/1901|00|C|01/Jul/1901|Y| ';
put '#SOC_YHC# 00099999|First|F|Last|02|03/Jan/1901|01/Jan/1901|30|00000|01/Jul/1901|00|C|01/Jul/1901|Y| ';
put '#SOC_MEMBERSHIP# Mr|First||Last|12 ABC Street|SUBURB SA 3003|||3003|C|Couples|996|PRODUCT||01/O1/1901|30.256%|225|211|145 ';
put '#SIS# Y ';
run;
data want;
retain TAXGENERATIONMODE BATCHPROCESSSTARTDATETIME;
length TAXGENERATIONMODE BATCHPROCESSSTARTDATETIME STATEMENT_ID MEMBERSHIP_NUMBER FUND_NO PERSON_TITLE FLAG $20;
infile "%sysfunc(pathname(work))\t.txt";
input;
if _infile_ =:'#FILE_HEADER#' then do;
TAXGENERATIONMODE= scan(_infile_,2,'|# ');
BATCHPROCESSSTARTDATETIME=scan(_infile_,3,'|# ');
end;
if _infile_ =:'#MEMBERSHIP_PERSON#' then do until(0);
if _infile_ =:'#MEMBERSHIP_PERSON#' then do ;
STATEMENT_ID =scan(_infile_,2,'|# ');
MEMBERSHIP_NUMBER=scan(_infile_,3,'|# ');
FUND_NO =scan(_infile_,4,'|# ');
end;
if _infile_ =:'#ADDRESS_LINE#' then do ;
PERSON_TITLE =scan(_infile_,2,'|# ');
end;
if _infile_ =:'#PAYMENT_LINE#' then do ;
PREMIUM_PAID =input(scan(_infile_,2,'|# '),?? 32.);
end;
if _infile_ =:'#SIS#' then do ;
FLAG =scan(_infile_,2,'|# ');
output;
leave;
end;
else input;
end;
run;
TAXGENERATIONMODE | BATCHPROCESSSTARTDATETIME | STATEMENT_ID | MEMBERSHIP_NUMBER | FUND_NO | PERSON_TITLE | FLAG | PREMIUM_PAID |
---|---|---|---|---|---|---|---|
MOnth | 1901124578965 | 9999944999 | 0009999 | 05 | 0059999999 | N | 123 |
MOnth | 1901124578965 | 9999944999 | 0009999 | 05 | 0059999999 | Y | 123 |
The easiest is probably to change the first space to a "|":
Data members(keep=statement_id member_no) addresses(keep=member_no whats_this title) payments(keep=member_no premiums rebate) days(keep=member_no hospital_days holidays) ; infile cards dsd delimiter='|'; length linetype $30; input @; prx_id=prxparse('s/ /|/'); call prxchange(prx_id,1,_infile_); input linetype @; select(linetype); when('#MEMBERSHIP_PERSON#') do; input statement_id member_no; /* add the other variables as needed */ output members; end; when('#ADDRESS_LINE#') do; input whats_this title $; output addresses; end; when('#PAYMENT_LINE#') do; input premiums rebate; output payments; end; when('#MLS#') do; input hospital_days holidays; output days; end; otherwise input; end; retain member_no; cards; #FILE_HEADER# MOnth|1901124578965 #MEMBERSHIP_PERSON# 9999944999|0009999|05|ABC|F|Medium|REGN|||07/Jun/2017|Pay Pal|||N|DISTIBUTOR|CURRENT|01|Y|Annual|2017|EXISTING|01/Jan/1901 #ADDRESS_LINE# 0059999999|Mr|Test||Test||M|03/Jan/1901|009999900|Online|abc00@abc.com.au|N|N|00244444||11 ABC Street|||SUBURB|3003|SA|Australia|N|N|00999900|Email|abc00@abc.com.au|N|N|||||||Y|Y|0000000001|YZx|0000000001|ZYX|PERSON|N|0000000000||||||0000000000||||| #PAYMENT_LINE# 1234|123|35|First Last #PAYMENT_LINE# 123|123|12|First Last #MLS# 123|123 #SOC_YHC# 00099999|First||Last|01|03/Jan/1901|01/Jan/1901|30|00000|01/Jul/1901|00|C|01/Jul/1901|Y| #SOC_YHC# 00099999|First|F|Last|02|03/Jan/1901|01/Jan/1901|30|00000|01/Jul/1901|00|C|01/Jul/1901|Y| #SOC_MEMBERSHIP# Mr|First||Last|12 ABC Street|SUBURB SA 3003|||3003|C|Couples|996|PRODUCT||01/O1/1901|30.256%|225|211|145 #SIS# N ;run;
I used CALL PRXCHANGE for this, because it changes the string in place (which is faster than calling a function), and because it allows you to specify that you only want to change one occurence.
I output several datasets, as there obviously can be more lines of some types per person (e.g. payments). The key variables (in this case I assumed member_no) should then be on all the tables, and they should be retained (they must be in the first row read for that member, I assume that is always the membership-person line).
It should be reasonably easy to fill out the remaining line types and variables.
If you are wondering what the WHATS_THIS variable does, that is just a placeholder, because your data do not fit the specification. Title should be the first variable in the address line, but there is some undefined number first.
No need to change the input buffer. You can use a variable name for the DLM= option on the INFILE statement.
dlm=' ';
infile cards dsd dlm=dlm truncover;
input linetype :$30. @;
dlm='|';
Question:
Your data has a record type SOC_YHC, but your template does not described that type. Should such types be ignored?
Are any valid record types repeated between header and footer? If so, please provide an example of the wanted output. For example payment line is repeated in your data.
SOC_YHC needs to be captured as well .
It would a a static value : example
Column SOC_YHC and all the values under it would be the same , and the corresponding columns will have the values .
Example:
SOC_YHC | MEMBERSHIP NUMBER | GIVEN |
SOC_YHC | 12345 | Name1 |
SOC_YHC | 12346 | Name12 |
SOC_YHC | 12347 | Name13 |
SOC_YHC | 12348 | Name14 |
SOC_YHC | 12349 | Name15 |
SOC_YHC | 12350 | Name16 |
I take it that you want one dataset per record type, in which the MEMBERSHIP_PERSON vars are carried into each of the datasets, along with the vars specific to the record type. Here's and untested and unfinished sample:
Edit: @s_lassen made a good point on the risks of using of dlm='#|', (in case there's a # in the data fields), so I'd modify my code
from
infile dlm='#|' truncover ;
input rectype @;
to
chr='#';
infile xxx dlm=chr dsd truncover;
input @'#' rectype @;
chr='|';
The reason for the @'#' is the addition of the DSD in the infile statement, which makes the leading # a boundary between the first (null) field and the next field. But you need DSD to accommodate null fields in the rest of the record.
data address (keep=mem_: add_:)
payment (keep=mem_: MEM_:)
mls (keep=mem_: mls_:)
socyhc (keep=mem_: socyhc_:)
socmem (keep=mem_: socmem_:)
sis (keep=mem_: sis_:)
;
informat rectype $20. ;
/* FILE_HEADER vars */
informat fh_taxgenmode $8. fh_procstartdatetime 15.;
/* Informat for all the MEMBERSHIP_PERSON vars, in order */
informat
MEM_stmtid 15. MEM_NUMBER $15. MEM_FUNDNO 4. MEM_HEALTHFUNDID $5.
MEM_STATUS $2. MEM_stmtCOMPLEXITY $12. MEM_REBATE_STATUS $6. MEM_REBATE_TE_DATE date11.
MEM_REBATE TIER $15. MEM_DATE_PAIDTO date11. MEM_PAYMENT_METHOD $10. MEM_ORGX_TYPE $6.
MEM_ORGX_GROUP_DPT $6. MEM_REBATETYPE $5. MEM_PERSON_CATEGORY $15. MEM_PERSON_STATUS $15.
MEM_PERSON_SUFFIX_NUM 4. MEM_MULTIPLE_ADULTS $2. MEM_stmt_TYPE $12. MEM_stmt_year 4.
MEM_STATEMEM_T_FREQ $15. MEM_stmt_GENDATE date11.;
INFORMAT /*For the vars in the ADDRESS_LINE rectype*/
add_TITLE ?? add_GIVENNAME ?? add_INITIAL ?? add_SURNAME ?? add_PREFERRED_NAME ??
add_GENDER ?? add_DOB ?? add_MAIL_CONTACT_ID ?? add_MAIL_CONTACT_TYPE ?? add_MAIL_CONTACT ??
add_ADDRESSID ?? add_ADDRESSEE ?? add_ADDRESS_LINE1 ?? add_ADDRESS_LINE2 ?? add_ADDRESS_LINE3 ??
add_CITYTOWN ?? add_POSTCODE ?? add_STAT_PROV_REG ?? add_COUNTRY ??
add_ADDRESS_UNKNOWN ?? add_ADDRESS_INVALID ??
add_ONLINE_CONTACTID ?? add_ONLINE_CONTACTTYPE ?? add_ONLINE_CONTACT ?? add_ONLINE_CONTACT_UNK ?? add_ONLINE_CONTACT_INVALID ??
add_COURTESY_CONTACTID ?? add_COURTESY_CONTACTTYPE ?? add_COURTESY_CONTACT ?? add_COURTESY_CONTACT_UNK ?? add_COURTESY_CONTACT_INVALID ??
add_COMBINED_AUTHORITY ?? add_COMPANY_REGISTRATION ??
add_COMMS_CATEGORY ?? add_COMMS_CATEGORY_ENTITY ??add_MEMBERSHIP_STATUS ??
add_STATUS ??;
informat PAY_PREMIUMSPAID 8. PAY_REBATE_RECEIVED 8. PAY_REBATE_CODE 8. PAY_OTHER_BENEFICIARY $16.;
informat MLS_TOTAL_HOSPDAYS 3. MLS_TOTAL_HOLIDAYS3. ;
informat socyhc_... ... ;
informat socmem_... ... ;
informat SIS_FLAG $1.;
informat FF_FILENAME $40. FF_TOTALRECORDSCOUNT 8. FF_procstartdatetime 15.;
retain MEM_: ;
infile dlm='#|' truncover ;
input rectype @;
select (rectype);
when ('FILE_HEADER') input fh_taxgenmode -- fh_procstartdatetime;
when ('MEMBERSHIP_PERSON') input MEM_stmtid--MEM_stmt_gendate;
when ('ADDRESS_LINE') do; input add_title--add_status ; output address; end;
when ('PAYMENT_LINE') do; input PAY_premiumspaid--pay_other_beneficiary ; output payment; end;
when ('MLS') do; input MLS_total_hospdays--mls_total_holidays ; output mls; end;
when ('SOC_YHC') do; input .......; output socyhc; end;
when ('SOC_MEMBERSHIP') do; input .......;
when ('SIS') do; input sis_flag; output sis;end;
where ('FILE_FOOTER') input FF_FILENAME--FF_procstartdatetime ;
end;
run;
The crucial element is the construction of the informat statements. All varnames for a given INFORMAT statement start with the same characters, and the vars are listed in physical order.
Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.