BookmarkSubscribeRSS Feed
knveraraju91
Barite | Level 11

Dear,

 

My RTF table look like this: I am using the the follwing Macro to convert the table into sas dataset. The ouput sas dataset contains the footnote in the last OBS. Please help where I need to modify in the code to remove the footnote in the last OBS of my output .

 

Table Header

____________________________________________________________________

                                                                              one                               two

                                                                        _______                  ______________

NS                                                                    0          1                         2              3

                                                                        n=1       n=2                    n=3            n=4

_____________________________________________________________________

ANY                                                                   1          2                          3                4

   Rel                                                                  0           2                         2                2

   NotR                                                                1           0                         1                2

____________________________________________________________________

note1;

note2:

source:

Programname

 

 

 

code:

 

%macro extract(loc=%str(),file=,subheader_num= 1,out=one);

%local opts;
%let opts=%sysfunc(getoption(ls,keyword) );

options linesize=200;
%let rtffile=%lowcase(%sysfunc(tranwrd(%upcase(&rtffile), %str(.RTF), %str())));
%local _i _params _param _param_exist_err;
%let _params=.RTFLOC.RTFFILE.DOUT.SUBHEADER_NUM;
%let _i=1;

%do %while(%scan(&_params,&_i,.)^=%str()) ;
%let _param=%scan(&_params,&_i,.);
%if %quote(&&&_param) = %str() %then %do ;
%put ERROR: (macro &SYSMACRONAME): Macro parameter %upcase(&_param) is required. Please specify it.;
%let _param_exist_err=%eval(&_param_exist_err+1);

%end;
%let _i=%eval(&_i+1) ;

%end;

%if &_param_exist_err>=1 %then %do;
%put ERROR- Macro will exit now.;
%goto exit;
%end;


%if %datatyp(&SUBHEADER_NUM) ne NUMERIC %then %do;
%put ERROR: (macro &SYSMACRONAME): Macro parameter SUBHEADER_NUM should be a number, please correct it.;
%put ERROR- Macro will exit now.;
%goto exit;
%end;

%else %if &SUBHEADER_NUM<0 %then %do;
%put ERROR: (macro &SYSMACRONAME): Macro parameter SUBHEADER_NUM should be a positive number, please correct it.;
%put ERROR- Macro will exit now.;
%goto exit;
%end;

%else %if &SUBHEADER_NUM=0 %then %do;
%let SUBHEADER_NUM=1;
%put USER MESSAGE: (macro &SYSMACRONAME): Macro parameter SUBHEADER_NUM cannot be 0. In this case, macro assigned SUBHEADER_NUM=1.;
%end;


%if %sysfunc(fileexist(&rtfloc))=0 %then %do ;
%put ERROR: (macro &SYSMACRONAME): path/directory specified by macro parameter RTFLOC does not exist;
%put ERROR- (&rtfloc);
%put ERROR- Please check this path/directory.;
%put ERROR- Macro will exit now.;
%goto exit;
%end;


%if %sysfunc(fileexist(&rtfloc\&rtffile..rtf))=0 %then %do ;
%put ERROR: (macro &SYSMACRONAME): File &rtffile..rtf (specified by macro parameter RTFFILE);
%put ERROR- does not exist in directory RTFLOC (&rtfloc).;
%put ERROR- Please check this RTF name and specify proper RTFFILE.;
%put ERROR- Macro will exit now.;
%goto exit;
%end;


%if %length(&dout)>32 %then %do;
%put ERROR: (macro &SYSMACRONAME): Length of macro parameter DOUT (i.e. SAS dataset name) is longer 32 char-s.;
%put ERROR- (&dout);
%put ERROR- SAS datasets name cannot exceed 32 char-s. Please shorten macro parameter DOUT.;
%put ERROR- Macro will exit now.;
%goto exit;
%end;

 

%local letter1;
%let letter1= %substr(&dout, 1, 1);

%if %sysfunc(notalpha(&letter1))>0 and &letter1 ne _ %then %do;
%put ERROR: (macro &SYSMACRONAME): Macro parameter DOUT (i.e. SAS dataset name) should ;
%put ERROR- start with an English letter (A, B, C, . . ., Z) or underscore (_).;
%put ERROR- Your macro parameter DOUT (&dout) starts with &letter1..;
%put ERROR- Please make first letter either a letter or an underscore.;
%put ERROR- Macro will exit now.;
%goto exit;
%end;

%if %index(&dout, %str( )) %then %do;
%put ERROR: (macro &SYSMACRONAME): Macro parameter DOUT (i.e. SAS dataset name) should not have blanks;
%put ERROR- (according to SAS rules about naming SAS datasets).;
%put ERROR- Please remove all blanks from DOUT.;
%put ERROR- Macro will exit now.;
%goto exit;
%end;

%local other_lettes;
%let other_letters=%substr(&dout, 2, %length(&dout)-1);

%local remove_alphanum_dout;
%let remove_alphanum_dout=%sysfunc(compress(&dout, , %str(ad)));

%let remove_alphanum_dout=%sysfunc(tranwrd(&remove_alphanum_dout, %str(_), %str()));

%if %length(&remove_alphanum_dout)>0 %then %do;
%put ERROR: (macro &SYSMACRONAME): Macro parameter DOUT (i.e. SAS dataset name) should not should not contain any special char-s.;
%put ERROR- (according to SAS rules about naming SAS datasets).;
%put ERROR- Your macro parameter DOUT has the following special char-s: &remove_alphanum_dout..;
%put ERROR- Please remove all special char-s from DOUT.;
%put ERROR- Macro will exit now.;
%goto exit;
%end;

proc datasets library=work nolist nowarn;
delete &dout ;
quit;


%let tmpfile=%sysfunc(pathname(work))\&rtffile..rtf;
options noxwait;
x copy "&rtfloc\&RTFFile..rtf" "&tmpfile";

 

data __readrtf0;
infile "&tmpfile." missover length = l end = lastobs lrecl = 2000;
input string $varying2000. l;

if not missing(string) then do;
if index(string, '{\header\') then flag1=1;
if index(string, '{\footer\') then flag2=1;
if index(string, '\trowd') then flag3=1;
if index(string, '\row') then flag4=1;
if length(string)>=3 and substr(strip(string), 1, 3)='\cl' then flag5=1;
end;
run;

%local max1 max2 max3 max4 max5;
proc sql noprint;
create table __check_keywords as
select max(flag1) as flag1,
max(flag2) as flag2,
max(flag3) as flag3,
max(flag4) as flag4,
max(flag5) as flag5
from __readrtf0
;
quit;

%local keywords;
%let keywords=0;
proc sql noprint;
select count(*) into : keywords
from __check_keywords
where flag1=1 and flag2=1 and flag3=1 and flag4=1 and flag5=1
;
quit;
%let keywords=&keywords;

%if &keywords=0 %then %do;
%put ERROR: (macro &SYSMACRONAME): This macro works only on RTF documents created by SAS.;
%put ERROR- It looks like either of the following happened:;
%put ERROR- ;
%put ERROR- (1) This RTF was not created by SAS;
%put ERROR- (2) This RTF *was* created by SAS, but someone *edited* it after it was generated by SAS.;
%put ERROR- (3) A Word document was created by SAS and the user converted it to RTF.;
%put ERROR- ;
%put ERROR- In either of these cases, macro will not work on this RTF doc and will exit now.;
%put ERROR- ;
%put ERROR- FYI: here is an explanation:;
%put ERROR- When RTF doc is created by SAS, a certain RTF code is generated, and this macro works with this code.;
%put ERROR- For example, this code will contain such key words as \trowd, \row,{\header\, {\footer\, \cl.;
%put ERROR- However, if RTF doc is *not* created by SAS, the keywords mentioned above won_t be created, and therefore this macro won_t work.;
%goto exit;

%end;

data __readrtf1;
infile "&tmpfile." missover length = l end = lastobs lrecl = 2000;
input string $varying2000. l;
rownum = _n_;



retain c1-c99
dropme indent;

length c1-c99 $1000;
if _n_ = 1 then dropme = 0.5;


array c{99} $;

if index(string, '\trowd') then do;
count = 0;
indent = 0;
do i=1 to dim(c);
c{i} = '';
end;
end;

 

{ ...{\line} (Scenario 2)
RTF control word which signals that a row was split into 2+ lines
This will be line #1.
example: ... \cf1{P: Abnormal dreams/{\line}
Note: cfN=background color

{\line} (Scenario 3)
Note that there is no left curly bracket { !
This happens when a line is split into 2+ lines:
This will be line #2, 3, etc., but NOT the last line.
example: S: Psychiatric disorders/{\line}

... \cell} (Scenario 4)
Note that there is no left curly bracket { !
This happens when a line is split into 2+ lines.
This will be the last line.
example: V: VIVID DREAMING\cell}
;

if ( index(string, '{') and index(string, '\cell'))

or ( count(string,"{")>=2 and index(string, '{\line}' ))

or ( count(string,"{")=1 and index(string, '{\line}'))
or ( count(string,"{")=0 and index(string, '\cell}'))
then do;





first_bracket=index(string, '{');


if ( index(string, '{') and index(string, '\cell'))
or ( count(string,"{")>=2 and index(string, '{\line}' ))
then count + 1;



if (index(string, '{') and index(string, '\cell')) then do;
prep = substr(string, 1, index(string, '\cell')-1);


if first_bracket ne 0 then prep= substr(prep, first_bracket+1);
end;


else if count(string,"{")>=2 and index(string, '{\line}') then do;
%*** extract part of PREP which comes after the first curved/angled bracket {;
prep= substr(string, first_bracket+1);
%*** replace '{\line}' with blanks;
prep=tranwrd(prep, '{\line}', '');
end;

%*** (Scenario 1) \cell signals the end of text printed in the cell
(Scenario 2) a row was split into 2+ lines, and this will be line #1.;
if ( index(string, '{') and index(string, '\cell'))
or ( count(string,"{")>=2 and index(string, '{\line}' ))
then do;
c{count} = compress(prep, byte(13));
end;

%*** (Scenario 3) When a line is split into several lines:
This will be line #2, 3, etc., but NOT the last line: end with {\line};
else if ( count(string,"{")=1 and index(string, '{\line}')) then do;
%*** Do:
1. take c{count} from Scenario 2
2. extract part of STRING which comes before {\line}
3. take 1 + 2 and separate them by blanks;
c{count}=catx(' ', c{count}, substr(string, 1, index(string, '{\line}')-1) );
end;


%*** (Scenario 4) When a line is split into several lines:
This will be the last line: end with \cell};
else if ( count(string,"{")=0 and index(string, '\cell}')) then do;
%*** Do:
1. take c{count} from Scenario 3
2. extract part of STRING which comes before \cell}
3. take 1 + 2 and separate them by blanks;
c{count}=catx(' ', c{count}, substr(string, 1, index(string, '\cell}')-1) );
end;



if index(string, '{ ') then do;
indent_start=index(string, '{ ')+1;
%** if first symbols after '{ ' are \line then assign indent=0;
if length(string)>=5 and substr( strip(substr(string, indent_start)), 1,5)= '\line' then indent=0 ;
else do;
%*** VERIFY() function: find location of the first char which is not a space (' ');
other_char_start=verify( substr(string, indent_start), ' ')+indent_start;
indent=other_char_start-indent_start;
end;
end;


else do;
%*** if there are any digits/numbers after \li then assign sst;
sst = substr(string, index(string, '{\li') + 4);
%*** VERIFY() function: find location of the first char which is not a number;
if verify(sst, '-0123456789') > 1 then indent=input(substr(sst, 1, verify(sst, '-0123456789') - 1), best.);
end;
end;

%*** if indent> 0 and string has \li240, etc., then then replace it with '';
if indent>0 and not missing(sst) then do;
c{count} = strip(tranwrd(c{count}, '\li'||strip(put(indent, best.)), ''));
end;


if index(c{count}, '\line') then do;
if substr(strip(c{count}), 1, 5)='\line' then c{count}=strip(tranwrd(c{count}, '\line', ''));
else do;
c{count}=tranwrd(c{count}, '{\line}', '');
c{count}=tranwrd(c{count}, '\line', '');
end;
c{count}=compbl(c{count});
end;

c{count}=strip(c{count}); %*** get read of leading and trailing blanks;

end; %*** end of: if index(string, '{') and index(string, '\cell') then do;

 

if dropme =999 then dropme = 0; %*** This signals the beginning of table body (i.e. note titles, columns headers or footnotes),
and only rows with DROPME=0 will be kept.;


%* (1) Find where you encounter a footer (i.e. find '{\footer\' in STRING), set DROPME=1 to signal we_re in the title area.;
if substr(string, 1, 9)='{\header\' then dropme = 0.6;
else if index(string, '}}') and dropme=0.6 then dropme = 0.7;
else if substr(string, 1, 9)='{\footer\' and dropme=0.7 then dropme = 1;


%**********************************************************************************;
%*** Repeat steps below for each row in column headers (i.e. up to &subheader_num);
%**********************************************************************************;

%local s;
%do s=1 %to &subheader_num;

%*put ===> s=&s;
if index(string, '\trowd' )
and ( (dropme =1 and index(string, '}}' )) /* For the first iteration, DROPME=1.
Find next row where footer ends (i.e. find '}}' and '\trowd' in STRING) */
or
dropme=4+%sysevalf(&s*10-10) /* If column headers spread over several rows (i.e. if &subheader_num>1) */
)
then dropme = 2+%sysevalf(&s*10); %*** DROPME will be 12, 22, 32, etc., depending on &subheader_num;

%*** Find next row where column headers begin (i.e. find '\cl' in STRING) and assign DROPME=13, 23, 33, etc., depending on &subheader_num;
else if length(string)>=3 and substr(strip(string), 1, 3)='\cl' and dropme = 2+%sysevalf(&s*10) then dropme = 3+%sysevalf(&s*10); %** \clbrdrb, \cltxlrtb, etc.;

%*** If there are no more column headers in the next row (i.e. if &s=&subheader_num ), then assign DROPME=999;
%if &s=&subheader_num %then %do;
else if index(string, '\row' ) and dropme = 3+%sysevalf(&s*10) then dropme = 999;
%end;

%*** If there are more column headers, spreaded over next row, then assign DROPME=14, 24, 34, etc., depending on &subheader_num;
%else %do;
else if index(string, '\row' ) and dropme = 3+%sysevalf(&s*10) then dropme =4+%sysevalf(&s*10);
%end;

%end;



if not dropme and index(string, '\row') then do;
allblank = 1;
do i=1 to dim(c);
if compress(c{i}, ' \') ne '' then allblank = 0;
end;
/* %put ERROR: unquote;*/
if allblank=0 then output;
end;

%jump:

run;

/*%goto exit;*/

%*** Delete RTF file which you copied to temporary location;
x del "&tmpfile";



proc transpose data=__readrtf1(drop=count indent_start other_char_start) out=__chk;
var c:;
by rownum;
run;

%local dropper;
%let dropper=;
proc sql noprint;
select distinct _name_ into: dropper
separated by ' '
from __chk
where _name_ not in (select _name_ from __chk where col1 ne '')
;

%local numcols;
%let numcols=0;
proc sql noprint;
select distinct count(distinct _name_) into: numcols
from __chk
where col1 ne ''
;
quit;
%let numcols=&numcols; %*** to get rid of leading blanks;

%if &numcols>1 %then %do;
data __readrtf1;
set __readrtf1;
if missing(c1) then indent=1;
run;
%end;


proc sort data=__readrtf1(drop=count &dropper indent_start other_char_start string dropme prep first_bracket sst i allblank) out=__readrtf2;
by indent ;
run;

data __readrtf4 /*(index=(rownum))*/;
set __readrtf2;
by indent;
if first.indent then order2 + 1;
run;

%*** create order1, etc. from INDENT;
proc sql noprint;
create table __ind1 as
select indent, rownum
from __readrtf4
order by indent, rownum
;
quit;

data __ind2;
set __ind1;
by indent rownum;
retain order;
if first.indent then order+1;
run;

proc sort data=__ind2;
by rownum order;
run;

data __ind3;
set __ind2;
by rownum order;
retain order1;
if _n_=1 then order1=1;
else if order=1 then order1+1;
run;

proc sort data=__ind3;
by order1 indent rownum order;
run;


data __ind4;
set __ind3 (rename=(order=order_old));
by order1 indent rownum order_old;
retain order;
if first.order1 then order=0;
if first.indent then order+1;
drop order_old;
run;

%*** Assign variable level_order1, which is number of unique ORDER within each ORDER1;
proc sql noprint;
create table __ind5 as
select *, max(order) as level_order1
from __ind4
group by order1
order by order1, rownum, order
;
quit;

%*** how many are there distinct levels of ORDER? Create order1, order2, etc. based on this number of levels;
%local levels;
proc sql noprint;
select max(order) into : levels
from __ind5
;
quit;
%let levels=&levels; %** to get rid of leading blanks;
%*put levels=&levels;

%*** if there more than 1 levels, do this;
%if &levels>1 %then %do;

data __ind6;
set __ind5;
by order1 rownum order;
lag_order=lag(order);
lag_order1=lag(order1);

retain order2-order&levels;

array ord(*) order2- order&levels;
array lag_ord(*) lag_order2- lag_order&levels;

do i=1 to dim(ord);

if first.order1 then ord(i)=0;
if i ne 1 then do;
lag_ord(i-1)=lag(ord(i-1));
if lag_ord(i-1) ne ord(i-1) then ord(i)=0;
end;
if order=i+1 then ord(i)=ord(i)+1;
if level_order1=i+1 and lag_order > order then ord(i)=0; %*** if level_order1=i+1 means: if there are no more levels after this ord(i);
end;
drop i;

drop lag_:;
run;

proc sort data=__readrtf4;
by rownum;
run;

proc sort data=__ind6;
by rownum;
run;

data __readrtf5;
merge __readrtf4 (drop=order2 )
__ind6 (drop=order indent level_order1);
by rownum;
run;

%end; %** end of: &levels>1;

%else %do;
data __readrtf5;
set __readrtf4 (drop=order2 );
order1=_n_;
run;
%end;


%*** reassign rownum;
data &dout;
set __readrtf5 (drop=rownum);
rownum=_n_;
if indent ne 0 then indented='Y';
drop indent;
run;

%exit:

options &opts; %*** whatever system options you changes - restore them to what they used to be before running this macro;

proc datasets library=work nolist nowarn;
delete __readrtf: __chk __ind: __check_keywords;
quit;

%mend extract;

3 REPLIES 3
Cynthia_sas
SAS Super FREQ
Hi:
I would really recommend that you ask the author of this macro program for help in modifying the program.

cynthia
yh_zhang
Calcite | Level 5

Hi:

  I wonder if the whole macro is as long as you show it. And it's so complex.




 

ChrisNZ
Tourmaline | Level 20

It would be better to add a step and remove the last row :

data TAB;

  set TAB obs=NOBS;

  if _N_ ne NOBS;

run;

 

sas-innovate-2024.png

Don't miss out on SAS Innovate - Register now for the FREE Livestream!

Can't make it to Vegas? No problem! Watch our general sessions LIVE or on-demand starting April 17th. Hear from SAS execs, best-selling author Adam Grant, Hot Ones host Sean Evans, top tech journalist Kara Swisher, AI expert Cassie Kozyrkov, and the mind-blowing dance crew iLuminate! Plus, get access to over 20 breakout sessions.

 

Register now!

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

Click image to register for webinarClick image to register for webinar

Classroom Training Available!

Select SAS Training centers are offering in-person courses. View upcoming courses for:

View all other training opportunities.

Discussion stats
  • 3 replies
  • 1310 views
  • 1 like
  • 4 in conversation