The SAS Output Delivery System and reporting techniques

Reading a RTF table into SAS data set without capturing footnotes in last OBS line

Reply
Super Contributor
Posts: 263

Reading a RTF table into SAS data set without capturing footnotes in last OBS line

Dear,

 

My RTF table look like this: I am using the the follwing Macro to convert the table into sas dataset. The ouput sas dataset contains the footnote in the last OBS. Please help where I need to modify in the code to remove the footnote in the last OBS of my output .

 

Table Header

____________________________________________________________________

                                                                              one                               two

                                                                        _______                  ______________

NS                                                                    0          1                         2              3

                                                                        n=1       n=2                    n=3            n=4

_____________________________________________________________________

ANY                                                                   1          2                          3                4

   Rel                                                                  0           2                         2                2

   NotR                                                                1           0                         1                2

____________________________________________________________________

note1;

note2:

source:

Programname

 

 

 

code:

 

%macro extract(loc=%str(),file=,subheader_num= 1,out=one);

%local opts;
%let opts=%sysfunc(getoption(ls,keyword) );

options linesize=200;
%let rtffile=%lowcase(%sysfunc(tranwrd(%upcase(&rtffile), %str(.RTF), %str())));
%local _i _params _param _param_exist_err;
%let _params=.RTFLOC.RTFFILE.DOUT.SUBHEADER_NUM;
%let _i=1;

%do %while(%scan(&_params,&_i,.)^=%str()) ;
%let _param=%scan(&_params,&_i,.);
%if %quote(&&&_param) = %str() %then %do ;
%put ERROR: (macro &SYSMACRONAME): Macro parameter %upcase(&_param) is required. Please specify it.;
%let _param_exist_err=%eval(&_param_exist_err+1);

%end;
%let _i=%eval(&_i+1) ;

%end;

%if &_param_exist_err>=1 %then %do;
%put ERROR- Macro will exit now.;
%goto exit;
%end;


%if %datatyp(&SUBHEADER_NUM) ne NUMERIC %then %do;
%put ERROR: (macro &SYSMACRONAME): Macro parameter SUBHEADER_NUM should be a number, please correct it.;
%put ERROR- Macro will exit now.;
%goto exit;
%end;

%else %if &SUBHEADER_NUM<0 %then %do;
%put ERROR: (macro &SYSMACRONAME): Macro parameter SUBHEADER_NUM should be a positive number, please correct it.;
%put ERROR- Macro will exit now.;
%goto exit;
%end;

%else %if &SUBHEADER_NUM=0 %then %do;
%let SUBHEADER_NUM=1;
%put USER MESSAGE: (macro &SYSMACRONAME): Macro parameter SUBHEADER_NUM cannot be 0. In this case, macro assigned SUBHEADER_NUM=1.;
%end;


%if %sysfunc(fileexist(&rtfloc))=0 %then %do ;
%put ERROR: (macro &SYSMACRONAME): path/directory specified by macro parameter RTFLOC does not exist;
%put ERROR- (&rtfloc);
%put ERROR- Please check this path/directory.;
%put ERROR- Macro will exit now.;
%goto exit;
%end;


%if %sysfunc(fileexist(&rtfloc\&rtffile..rtf))=0 %then %do ;
%put ERROR: (macro &SYSMACRONAME): File &rtffile..rtf (specified by macro parameter RTFFILE);
%put ERROR- does not exist in directory RTFLOC (&rtfloc).;
%put ERROR- Please check this RTF name and specify proper RTFFILE.;
%put ERROR- Macro will exit now.;
%goto exit;
%end;


%if %length(&dout)>32 %then %do;
%put ERROR: (macro &SYSMACRONAME): Length of macro parameter DOUT (i.e. SAS dataset name) is longer 32 char-s.;
%put ERROR- (&dout);
%put ERROR- SAS datasets name cannot exceed 32 char-s. Please shorten macro parameter DOUT.;
%put ERROR- Macro will exit now.;
%goto exit;
%end;

 

%local letter1;
%let letter1= %substr(&dout, 1, 1);

%if %sysfunc(notalpha(&letter1))>0 and &letter1 ne _ %then %do;
%put ERROR: (macro &SYSMACRONAME): Macro parameter DOUT (i.e. SAS dataset name) should ;
%put ERROR- start with an English letter (A, B, C, . . ., Z) or underscore (_).;
%put ERROR- Your macro parameter DOUT (&dout) starts with &letter1..;
%put ERROR- Please make first letter either a letter or an underscore.;
%put ERROR- Macro will exit now.;
%goto exit;
%end;

%if %index(&dout, %str( )) %then %do;
%put ERROR: (macro &SYSMACRONAME): Macro parameter DOUT (i.e. SAS dataset name) should not have blanks;
%put ERROR- (according to SAS rules about naming SAS datasets).;
%put ERROR- Please remove all blanks from DOUT.;
%put ERROR- Macro will exit now.;
%goto exit;
%end;

%local other_lettes;
%let other_letters=%substr(&dout, 2, %length(&dout)-1);

%local remove_alphanum_dout;
%let remove_alphanum_dout=%sysfunc(compress(&dout, , %str(ad)));

%let remove_alphanum_dout=%sysfunc(tranwrd(&remove_alphanum_dout, %str(_), %str()));

%if %length(&remove_alphanum_dout)>0 %then %do;
%put ERROR: (macro &SYSMACRONAME): Macro parameter DOUT (i.e. SAS dataset name) should not should not contain any special char-s.;
%put ERROR- (according to SAS rules about naming SAS datasets).;
%put ERROR- Your macro parameter DOUT has the following special char-s: &remove_alphanum_dout..;
%put ERROR- Please remove all special char-s from DOUT.;
%put ERROR- Macro will exit now.;
%goto exit;
%end;

proc datasets library=work nolist nowarn;
delete &dout ;
quit;


%let tmpfile=%sysfunc(pathname(work))\&rtffile..rtf;
options noxwait;
x copy "&rtfloc\&RTFFile..rtf" "&tmpfile";

 

data __readrtf0;
infile "&tmpfile." missover length = l end = lastobs lrecl = 2000;
input string $varying2000. l;

if not missing(string) then do;
if index(string, '{\header\') then flag1=1;
if index(string, '{\footer\') then flag2=1;
if index(string, '\trowd') then flag3=1;
if index(string, '\row') then flag4=1;
if length(string)>=3 and substr(strip(string), 1, 3)='\cl' then flag5=1;
end;
run;

%local max1 max2 max3 max4 max5;
proc sql noprint;
create table __check_keywords as
select max(flag1) as flag1,
max(flag2) as flag2,
max(flag3) as flag3,
max(flag4) as flag4,
max(flag5) as flag5
from __readrtf0
;
quit;

%local keywords;
%let keywords=0;
proc sql noprint;
select count(*) into : keywords
from __check_keywords
where flag1=1 and flag2=1 and flag3=1 and flag4=1 and flag5=1
;
quit;
%let keywords=&keywords;

%if &keywords=0 %then %do;
%put ERROR: (macro &SYSMACRONAME): This macro works only on RTF documents created by SAS.;
%put ERROR- It looks like either of the following happened:;
%put ERROR- ;
%put ERROR- (1) This RTF was not created by SAS;
%put ERROR- (2) This RTF *was* created by SAS, but someone *edited* it after it was generated by SAS.;
%put ERROR- (3) A Word document was created by SAS and the user converted it to RTF.;
%put ERROR- ;
%put ERROR- In either of these cases, macro will not work on this RTF doc and will exit now.;
%put ERROR- ;
%put ERROR- FYI: here is an explanation:;
%put ERROR- When RTF doc is created by SAS, a certain RTF code is generated, and this macro works with this code.;
%put ERROR- For example, this code will contain such key words as \trowd, \row,{\header\, {\footer\, \cl.;
%put ERROR- However, if RTF doc is *not* created by SAS, the keywords mentioned above won_t be created, and therefore this macro won_t work.;
%goto exit;

%end;

data __readrtf1;
infile "&tmpfile." missover length = l end = lastobs lrecl = 2000;
input string $varying2000. l;
rownum = _n_;



retain c1-c99
dropme indent;

length c1-c99 $1000;
if _n_ = 1 then dropme = 0.5;


array c{99} $;

if index(string, '\trowd') then do;
count = 0;
indent = 0;
do i=1 to dim(c);
c{i} = '';
end;
end;

 

{ ...{\line} (Scenario 2)
RTF control word which signals that a row was split into 2+ lines
This will be line #1.
example: ... \cf1{P: Abnormal dreams/{\line}
Note: cfN=background color

{\line} (Scenario 3)
Note that there is no left curly bracket { !
This happens when a line is split into 2+ lines:
This will be line #2, 3, etc., but NOT the last line.
example: S: Psychiatric disorders/{\line}

... \cell} (Scenario 4)
Note that there is no left curly bracket { !
This happens when a line is split into 2+ lines.
This will be the last line.
example: V: VIVID DREAMING\cell}
;

if ( index(string, '{') and index(string, '\cell'))

or ( count(string,"{")>=2 and index(string, '{\line}' ))

or ( count(string,"{")=1 and index(string, '{\line}'))
or ( count(string,"{")=0 and index(string, '\cell}'))
then do;





first_bracket=index(string, '{');


if ( index(string, '{') and index(string, '\cell'))
or ( count(string,"{")>=2 and index(string, '{\line}' ))
then count + 1;



if (index(string, '{') and index(string, '\cell')) then do;
prep = substr(string, 1, index(string, '\cell')-1);


if first_bracket ne 0 then prep= substr(prep, first_bracket+1);
end;


else if count(string,"{")>=2 and index(string, '{\line}') then do;
%*** extract part of PREP which comes after the first curved/angled bracket {;
prep= substr(string, first_bracket+1);
%*** replace '{\line}' with blanks;
prep=tranwrd(prep, '{\line}', '');
end;

%*** (Scenario 1) \cell signals the end of text printed in the cell
(Scenario 2) a row was split into 2+ lines, and this will be line #1.;
if ( index(string, '{') and index(string, '\cell'))
or ( count(string,"{")>=2 and index(string, '{\line}' ))
then do;
c{count} = compress(prep, byte(13));
end;

%*** (Scenario 3) When a line is split into several lines:
This will be line #2, 3, etc., but NOT the last line: end with {\line};
else if ( count(string,"{")=1 and index(string, '{\line}')) then do;
%*** Do:
1. take c{count} from Scenario 2
2. extract part of STRING which comes before {\line}
3. take 1 + 2 and separate them by blanks;
c{count}=catx(' ', c{count}, substr(string, 1, index(string, '{\line}')-1) );
end;


%*** (Scenario 4) When a line is split into several lines:
This will be the last line: end with \cell};
else if ( count(string,"{")=0 and index(string, '\cell}')) then do;
%*** Do:
1. take c{count} from Scenario 3
2. extract part of STRING which comes before \cell}
3. take 1 + 2 and separate them by blanks;
c{count}=catx(' ', c{count}, substr(string, 1, index(string, '\cell}')-1) );
end;



if index(string, '{ ') then do;
indent_start=index(string, '{ ')+1;
%** if first symbols after '{ ' are \line then assign indent=0;
if length(string)>=5 and substr( strip(substr(string, indent_start)), 1,5)= '\line' then indent=0 ;
else do;
%*** VERIFY() function: find location of the first char which is not a space (' ');
other_char_start=verify( substr(string, indent_start), ' ')+indent_start;
indent=other_char_start-indent_start;
end;
end;


else do;
%*** if there are any digits/numbers after \li then assign sst;
sst = substr(string, index(string, '{\li') + 4);
%*** VERIFY() function: find location of the first char which is not a number;
if verify(sst, '-0123456789') > 1 then indent=input(substr(sst, 1, verify(sst, '-0123456789') - 1), best.);
end;
end;

%*** if indent> 0 and string has \li240, etc., then then replace it with '';
if indent>0 and not missing(sst) then do;
c{count} = strip(tranwrd(c{count}, '\li'||strip(put(indent, best.)), ''));
end;


if index(c{count}, '\line') then do;
if substr(strip(c{count}), 1, 5)='\line' then c{count}=strip(tranwrd(c{count}, '\line', ''));
else do;
c{count}=tranwrd(c{count}, '{\line}', '');
c{count}=tranwrd(c{count}, '\line', '');
end;
c{count}=compbl(c{count});
end;

c{count}=strip(c{count}); %*** get read of leading and trailing blanks;

end; %*** end of: if index(string, '{') and index(string, '\cell') then do;

 

if dropme =999 then dropme = 0; %*** This signals the beginning of table body (i.e. note titles, columns headers or footnotes),
and only rows with DROPME=0 will be kept.;


%* (1) Find where you encounter a footer (i.e. find '{\footer\' in STRING), set DROPME=1 to signal we_re in the title area.;
if substr(string, 1, 9)='{\header\' then dropme = 0.6;
else if index(string, '}}') and dropme=0.6 then dropme = 0.7;
else if substr(string, 1, 9)='{\footer\' and dropme=0.7 then dropme = 1;


%**********************************************************************************;
%*** Repeat steps below for each row in column headers (i.e. up to &subheader_num);
%**********************************************************************************;

%local s;
%do s=1 %to &subheader_num;

%*put ===> s=&s;
if index(string, '\trowd' )
and ( (dropme =1 and index(string, '}}' )) /* For the first iteration, DROPME=1.
Find next row where footer ends (i.e. find '}}' and '\trowd' in STRING) */
or
dropme=4+%sysevalf(&s*10-10) /* If column headers spread over several rows (i.e. if &subheader_num>1) */
)
then dropme = 2+%sysevalf(&s*10); %*** DROPME will be 12, 22, 32, etc., depending on &subheader_num;

%*** Find next row where column headers begin (i.e. find '\cl' in STRING) and assign DROPME=13, 23, 33, etc., depending on &subheader_num;
else if length(string)>=3 and substr(strip(string), 1, 3)='\cl' and dropme = 2+%sysevalf(&s*10) then dropme = 3+%sysevalf(&s*10); %** \clbrdrb, \cltxlrtb, etc.;

%*** If there are no more column headers in the next row (i.e. if &s=&subheader_num ), then assign DROPME=999;
%if &s=&subheader_num %then %do;
else if index(string, '\row' ) and dropme = 3+%sysevalf(&s*10) then dropme = 999;
%end;

%*** If there are more column headers, spreaded over next row, then assign DROPME=14, 24, 34, etc., depending on &subheader_num;
%else %do;
else if index(string, '\row' ) and dropme = 3+%sysevalf(&s*10) then dropme =4+%sysevalf(&s*10);
%end;

%end;



if not dropme and index(string, '\row') then do;
allblank = 1;
do i=1 to dim(c);
if compress(c{i}, ' \') ne '' then allblank = 0;
end;
/* %put ERROR: unquote;*/
if allblank=0 then output;
end;

%jump:

run;

/*%goto exit;*/

%*** Delete RTF file which you copied to temporary location;
x del "&tmpfile";



proc transpose data=__readrtf1(drop=count indent_start other_char_start) out=__chk;
var c:;
by rownum;
run;

%local dropper;
%let dropper=;
proc sql noprint;
select distinct _name_ into: dropper
separated by ' '
from __chk
where _name_ not in (select _name_ from __chk where col1 ne '')
;

%local numcols;
%let numcols=0;
proc sql noprint;
select distinct count(distinct _name_) into: numcols
from __chk
where col1 ne ''
;
quit;
%let numcols=&numcols; %*** to get rid of leading blanks;

%if &numcols>1 %then %do;
data __readrtf1;
set __readrtf1;
if missing(c1) then indent=1;
run;
%end;


proc sort data=__readrtf1(drop=count &dropper indent_start other_char_start string dropme prep first_bracket sst i allblank) out=__readrtf2;
by indent ;
run;

data __readrtf4 /*(index=(rownum))*/;
set __readrtf2;
by indent;
if first.indent then order2 + 1;
run;

%*** create order1, etc. from INDENT;
proc sql noprint;
create table __ind1 as
select indent, rownum
from __readrtf4
order by indent, rownum
;
quit;

data __ind2;
set __ind1;
by indent rownum;
retain order;
if first.indent then order+1;
run;

proc sort data=__ind2;
by rownum order;
run;

data __ind3;
set __ind2;
by rownum order;
retain order1;
if _n_=1 then order1=1;
else if order=1 then order1+1;
run;

proc sort data=__ind3;
by order1 indent rownum order;
run;


data __ind4;
set __ind3 (rename=(order=order_old));
by order1 indent rownum order_old;
retain order;
if first.order1 then order=0;
if first.indent then order+1;
drop order_old;
run;

%*** Assign variable level_order1, which is number of unique ORDER within each ORDER1;
proc sql noprint;
create table __ind5 as
select *, max(order) as level_order1
from __ind4
group by order1
order by order1, rownum, order
;
quit;

%*** how many are there distinct levels of ORDER? Create order1, order2, etc. based on this number of levels;
%local levels;
proc sql noprint;
select max(order) into : levels
from __ind5
;
quit;
%let levels=&levels; %** to get rid of leading blanks;
%*put levels=&levels;

%*** if there more than 1 levels, do this;
%if &levels>1 %then %do;

data __ind6;
set __ind5;
by order1 rownum order;
lag_order=lag(order);
lag_order1=lag(order1);

retain order2-order&levels;

array ord(*) order2- order&levels;
array lag_ord(*) lag_order2- lag_order&levels;

do i=1 to dim(ord);

if first.order1 then ord(i)=0;
if i ne 1 then do;
lag_ord(i-1)=lag(ord(i-1));
if lag_ord(i-1) ne ord(i-1) then ord(i)=0;
end;
if order=i+1 then ord(i)=ord(i)+1;
if level_order1=i+1 and lag_order > order then ord(i)=0; %*** if level_order1=i+1 means: if there are no more levels after this ord(i);
end;
drop i;

drop lag_:;
run;

proc sort data=__readrtf4;
by rownum;
run;

proc sort data=__ind6;
by rownum;
run;

data __readrtf5;
merge __readrtf4 (drop=order2 )
__ind6 (drop=order indent level_order1);
by rownum;
run;

%end; %** end of: &levels>1;

%else %do;
data __readrtf5;
set __readrtf4 (drop=order2 );
order1=_n_;
run;
%end;


%*** reassign rownum;
data &dout;
set __readrtf5 (drop=rownum);
rownum=_n_;
if indent ne 0 then indented='Y';
drop indent;
run;

%exit:

options &opts; %*** whatever system options you changes - restore them to what they used to be before running this macro;

proc datasets library=work nolist nowarn;
delete __readrtf: __chk __ind: __check_keywords;
quit;

%mend extract;

SAS Super FREQ
Posts: 8,641

Re: Reading a RTF table into SAS data set without capturing footnotes in last OBS line

Hi:
I would really recommend that you ask the author of this macro program for help in modifying the program.

cynthia
Post a Question
Discussion Stats
  • 1 reply
  • 161 views
  • 0 likes
  • 2 in conversation