SAS Programming

DATA Step, Macro, Functions and more
BookmarkSubscribeRSS Feed
chinna0369
Pyrite | Level 9

Hi,

 

I have a below code to check non ascii characters from datasets, xpt, define. When I am giving paths it is not taking that path. I am not sure what I am missing. Can anyone help me with this.

This is the code I have:

 

%let DataDIR =  ; * Terget Folder path ;
 
/* Define.xml check setting */
 
%let DefDIR = &DataDIR. ; * Folder path for Define.xml ;
*NOTE: if you want to no check for define.xml then set blank &DefDIR above. ;
%let DefDIR =  ;
 
***** - DO NOT CHANGE - contents of macro *****;
 
libname CURR "." ;
 
%macro GLB_Create_transval(lib);
  %local rc fileref makedir workpath;
  %let workpath = %sysfunc(pathname(work));
 
  %let rc=%sysfunc(filename(fileref));
  %let rc = %sysfunc(filename(fileref,&workpath.\&lib.));
  %put checking &lib. fileref    %eval(%sysfunc(fexist(&fileref));
  %if %eval(%sysfunc(fexist(&fileref))) = 0 %then %do;
    x mkdir "&workpath.\&lib.";
    %put %sysfunc(sleep(3)) ;
    %let rc=%sysfunc(filename(fileref));
    %let rc = %sysfunc(filename(fileref,&workpath.\&lib.));
  %end;
  %global v_vald;
    %let v_vald = &workpath.\&lib.;
    libname &lib. "&v_vald";
%mend GLB_Create_transval;
/* make validated library */
%glb_create_transval(TempOut);
 
%macro m_xpt_ds(__INLIB=,__OUTLIB=, __XPTFLG= /*Convert Flag*/);
data _NULL_;
  length path $200;
  path=pathname("&__inlib.");
  call symputx("__path",path);
run;
 
filename __xpt "&__path.";
 
data __LIST;
  length LIST $500;
  did = dopen("__xpt") ;
  do __i = 1 to dnum(did) ;
    LIST = strip(dread(did,__i));
    output;
  end;
run;
 
data __LIST;
  length AD DS $200;
  set __LIST;
  if scan(LIST,-1,".")^="xpt" then delete;
  DS=SUBSTR(LIST,1,length(LIST)-4);
  AD=cats("&__path.\",LIST);
  keep LIST DS __i AD;
run;
 
data _NULL_;
  set __LIST;
  %if &__XPTFLG.=Y %then %do;
    call execute("libname xptfile xport '"||strip(AD)||"';");
    call execute("Proc copy in=xptfile OUT=&__OUTLIB.;");
    call execute("run;");
  %end;
  %else %do;
    call execute("proc cimport lib=&__OUTLIB. INFILE='"||strip(AD)||"';");
    call execute("select "||strip(DS)||";");
    call execute("run;");
%end;
run;
 
proc datasets lib=work nolist;
  delete __:;
  quit;
run;
%mend m_xpt_ds;
 
%macro check_ASCII();
 
/* create ASCII list */
data ListofASCII;
  length ASCII_List $200;
  do i=32 to 126;
    ASCII_List=trim(ASCII_List)||byte(i);
  end;
  drop i;
run;
 
data CURR.CHK_ASCII;
  length DS $20 OBS 8 VARNAME $20 VALUE nonASC $2000 rank 8 uni $20;
  call missing(of _all_);
  delete;
run;
 
 
%if "&DataDIR." ^= "" %then %do;
 
libname TLIB "&DataDIR." access=readonly;
* if datatype is sas7bdat ;
Proc copy in=TLIB OUT=TempOut; run;
* if datatype is xpt ;
%m_xpt_ds(__INLIB=TLIB,__OUTLIB=TempOut, __XPTFLG=Y);
 
/* Check for datasets */
	proc contents data=TempOut._all_ out=WORK.FILE(keep=MEMNAME) noprint ;run ; 
	proc sort data=WORK.FILE nodupkey;by MEMNAME;run;
	%macro chk(ds);
	data _tmp;
	  length DS $20 OBS 8 VARNAME $20 VALUE nonASC $2000 rank 8 uni $20;
	  set TempOut.&DS.;
	  if _n_=1 then set WORK.ListofASCII ; *merge ASCII list;
	  array CH{*} _character_;
	  do i=1 to dim(CH);
	    nonASC=kcompress(CH{i},ASCII_List);
	    if not missing(nonASC) then do;
	     DS="&DS.";
	     OBS=_N_;
	     VARNAME=vname(CH{i});
	     VALUE=CH{i};
	     rank=rank(nonasc);
	     uni=unicodec(nonasc);
	     output;
	    end;
	  end;
	  keep DS OBS VARNAME VALUE nonASC rank uni;
	run;
	proc append base=CURR.CHK_ASCII data=_tmp;
	%mend chk;
	data _null_;
	  set sashelp.vtable(where=(upcase(libname)=upcase("TempOut"))) end=eof;
	  call symput('DS' || compress(put(_n_,8.)), compress(memname));
	  if eof then call symput('DSCNT',compress(put(_n_,8.)));
	run;
 
	%do i = 1 %to &dscnt;
	 %CHK(&&ds&i..);
	%end;
%end;
 
%if "&DefDIR." ^= ""%then %do;
 
/* Add non ASCII check for define.xml */
	data DefineASCII;
		set ListofASCII;
		ASCII_List=compress(ASCII_List,"&");
	run;
 
/* import from xml to sas */
  filename def "&DefDIR.\define.xml" lrecl=32767;
  data WORK.def;
    infile def firstobs=1 pad;
    input @1 _txt $char32767.;
  run;
 
/* Check for define.xml */
	data _tmp;
	  length DS $20 OBS 8 VARNAME $20 VALUE nonASC $2000 rank 8 uni $20 rank_W N_len 8;
	  set WORK.def;
	  retain rank N_len 0;
	  if _n_=1 then set WORK.DefineASCII ; *merge ASCII list;
	  array CH{*} _character_;
	  do i=1 to dim(CH);
		CH{i}=tranwrd(CH{i},'&',"-");
		CH{i}=tranwrd(CH{i},'&lt;',"<");
		CH{i}=tranwrd(CH{i},'&gt;',">");
		CH{i}=tranwrd(CH{i},'&quot;','"');
		CH{i}=tranwrd(CH{i},'&apos;',"'");
	    nonASC=kcompress(CH{i},ASCII_List);
	    if not missing(nonASC) then do;
	     DS="DEFINE";
	     OBS=_N_;
	     VARNAME=vname(CH{i});
	     VALUE=CH{i};
		 if index(nonasc,"&") then do l=1 to count(CH{i},";");
		 	if l=1 then do; rank=0; N_len=0; end;
			rank_W=find(scan(CH{i},l,";"),"&");
			rank=sum(rank_W,rank);
			uni=cats(scan(substr(CH{i},sum(rank,N_len)),1,";"),";");
			nonasc=uni;
			N_len=length(nonasc)-1;
			output;
		 end;
	     else do;
		    rank=rank(nonasc);
			uni=unicodec(nonasc);
			output;
		 end;
	    end;
	  end;
	  keep DS OBS VARNAME VALUE nonASC rank uni;
	run;
	proc append base=CURR.CHK_ASCII data=_tmp;
%end;
%mend check_ASCII;
 
%check_ASCII();

This is the code I have updated. But not working.

 

 

 

%let DataDIR =/bdm/study01; * Terget Folder path ;
 
/* Define.xml check setting */
 
%let DefDIR = &DataDIR. ; * Folder path for Define.xml ;
*NOTE: if you want to no check for define.xml then set blank &DefDIR above. ;
/* %let DefDIR =  ; */
 
***** - DO NOT CHANGE - contents of macro *****;
 
libname CURR "/bdm/study01" ;
 
%macro GLB_Create_transval(lib);
  %local rc fileref makedir workpath;
  %let workpath = %sysfunc(pathname(work));
 
  %let rc=%sysfunc(filename(fileref));
  %let rc = %sysfunc(filename(fileref,&workpath.\&lib.));
  %put checking &lib. fileref    %eval(%sysfunc(fexist(&fileref));
  %if %eval(%sysfunc(fexist(&fileref))) = 0 %then %do;
    x mkdir "&workpath.\&lib.";
    %put %sysfunc(sleep(3)) ;
    %let rc=%sysfunc(filename(fileref));
    %let rc = %sysfunc(filename(fileref,&workpath.\&lib.));
  %end;
  %global v_vald;
    %let v_vald = &workpath.\&lib.;
    libname &lib. "&v_vald";
%mend GLB_Create_transval;
/* make validated library */
%glb_create_transval(TempOut);

libname TempOut "/bdm/study01/Results" ;

%macro m_xpt_ds(__INLIB=,__OUTLIB=, __XPTFLG= /*Convert Flag*/);
data _NULL_;
  length path $200;
  path=pathname("&__inlib.");
  call symputx("__path",path);
run;
 
filename __xpt "&__path.";
 
data __LIST;
  length LIST $500;
  did = dopen("__xpt") ;
  do __i = 1 to dnum(did) ;
    LIST = strip(dread(did,__i));
    output;
  end;
run;
 
data __LIST;
  length AD DS $200;
  set __LIST;
  if scan(LIST,-1,".")^="xpt" then delete;
  DS=SUBSTR(LIST,1,length(LIST)-4);
  AD=cats("&__path.\",LIST);
  keep LIST DS __i AD;
run;
 
data _NULL_;
  set __LIST;
  %if &__XPTFLG.=Y %then %do;
    call execute("libname xptfile xport '"||strip(AD)||"';");
    call execute("Proc copy in=xptfile OUT=&__OUTLIB.;");
    call execute("run;");
  %end;
  %else %do;
    call execute("proc cimport lib=&__OUTLIB. INFILE='"||strip(AD)||"';");
    call execute("select "||strip(DS)||";");
    call execute("run;");
%end;
run;
 
proc datasets lib=work nolist;
  delete __:;
  quit;
run;
%mend m_xpt_ds;
 
%macro check_ASCII();
 
/* create ASCII list */
data ListofASCII;
  length ASCII_List $200;
  do i=32 to 126;
    ASCII_List=trim(ASCII_List)||byte(i);
  end;
  drop i;
run;
 
data CURR.CHK_ASCII;
  length DS $20 OBS 8 VARNAME $20 VALUE nonASC $2000 rank 8 uni $20;
  call missing(of _all_);
  delete;
run;
 
 
%if "&DataDIR." ^= "" %then %do;
 
libname TLIB "&DataDIR." access=readonly;
* if datatype is sas7bdat ;
Proc copy in=TLIB OUT=TempOut; run;
* if datatype is xpt ;
%m_xpt_ds(__INLIB=TLIB,__OUTLIB=TempOut, __XPTFLG=Y);
 
/* Check for datasets */
	proc contents data=TempOut._all_ out=WORK.FILE(keep=MEMNAME) noprint ;run ; 
	proc sort data=WORK.FILE nodupkey;by MEMNAME;run;
	%macro chk(ds);
	data _tmp;
	  length DS $20 OBS 8 VARNAME $20 VALUE nonASC $2000 rank 8 uni $20;
	  set TempOut.&DS.;
	  if _n_=1 then set WORK.ListofASCII ; *merge ASCII list;
	  array CH{*} _character_;
	  do i=1 to dim(CH);
	    nonASC=kcompress(CH{i},ASCII_List);
	    if not missing(nonASC) then do;
	     DS="&DS.";
	     OBS=_N_;
	     VARNAME=vname(CH{i});
	     VALUE=CH{i};
	     rank=rank(nonasc);
	     uni=unicodec(nonasc);
	     output;
	    end;
	  end;
	  keep DS OBS VARNAME VALUE nonASC rank uni;
	run;
	proc append base=CURR.CHK_ASCII data=_tmp;
	%mend chk;
	data _null_;
	  set sashelp.vtable(where=(upcase(libname)=upcase("TempOut"))) end=eof;
	  call symput('DS' || compress(put(_n_,8.)), compress(memname));
	  if eof then call symput('DSCNT',compress(put(_n_,8.)));
	run;
 
	%do i = 1 %to &dscnt;
	 %CHK(&&ds&i..);
	%end;
%end;
 
%if "&DefDIR." ^= ""%then %do;
 
/* Add non ASCII check for define.xml */
	data DefineASCII;
		set ListofASCII;
		ASCII_List=compress(ASCII_List,"&");
	run;
 
/* import from xml to sas */
  filename def "&DefDIR.\define.xml" lrecl=32767;
  data WORK.def;
    infile def firstobs=1 pad;
    input @1 _txt $char32767.;
  run;
 
/* Check for define.xml */
	data _tmp;
	  length DS $20 OBS 8 VARNAME $20 VALUE nonASC $2000 rank 8 uni $20 rank_W N_len 8;
	  set WORK.def;
	  retain rank N_len 0;
	  if _n_=1 then set WORK.DefineASCII ; *merge ASCII list;
	  array CH{*} _character_;
	  do i=1 to dim(CH);
		CH{i}=tranwrd(CH{i},'&amp;',"-");
		CH{i}=tranwrd(CH{i},'&lt;',"<");
		CH{i}=tranwrd(CH{i},'&gt;',">");
		CH{i}=tranwrd(CH{i},'&quot;','"');
		CH{i}=tranwrd(CH{i},'&apos;',"'");
	    nonASC=kcompress(CH{i},ASCII_List);
	    if not missing(nonASC) then do;
	     DS="DEFINE";
	     OBS=_N_;
	     VARNAME=vname(CH{i});
	     VALUE=CH{i};
		 if index(nonasc,"&") then do l=1 to count(CH{i},";");
		 	if l=1 then do; rank=0; N_len=0; end;
			rank_W=find(scan(CH{i},l,";"),"&");
			rank=sum(rank_W,rank);
			uni=cats(scan(substr(CH{i},sum(rank,N_len)),1,";"),";");
			nonasc=uni;
			N_len=length(nonasc)-1;
			output;
		 end;
	     else do;
		    rank=rank(nonasc);
			uni=unicodec(nonasc);
			output;
		 end;
	    end;
	  end;
	  keep DS OBS VARNAME VALUE nonASC rank uni;
	run;
	proc append base=CURR.CHK_ASCII data=_tmp;
%end;
%mend check_ASCII;
 
%check_ASCII();

These are the errors I am getting.

NOTE: Library TEMPOUT does not exist.

NOTE: Input library XPTFILE is sequential.
ERROR: Physical file does not exist,
/bdm/study01\adeff.xpt.
 
Adeff is not there but I am getting this error.
 
Can anyone check this macro and help me where I am missing and what I am missing.
 
Thanks,
Chinna

 

 

11 REPLIES 11
Tom
Super User Tom
Super User

If you are running on Unix then why did you use \  instead of / when generating the path:

/bdm/study01\adeff.xpt.

On Unix the \ is an escape character.  Since there is no need to escape the letter a you asked it to look for this file:

/bdm/study01adeff.xpt.

 

I did not to check all of your code, but in general SAS does not mind if you use / as the delimiter between directory levels on Windows.  So why not just use / all of the time.   If you want the generated paths to look "pretty" then check what operating system your code is running on and use the proper character.

%let slash=/;
%if "&syscp" - "WIN" %then %do;
  %let slash=\;
%end;

 

chinna0369
Pyrite | Level 9

Thank you! Updated \ to /. And it is working now. but getting below warning.

MPRINT(CHECK_ASCII): data _tmp;
MPRINT(CHECK_ASCII): length DS $20 OBS 8 VARNAME $20 VALUE nonASC $2000 rank 8 uni $20 rank_W N_len 8;
MPRINT(CHECK_ASCII): set WORK.def;
MPRINT(CHECK_ASCII): retain rank N_len 0;
MPRINT(CHECK_ASCII): if _n_=1 then set WORK.DefineASCII ;
MPRINT(CHECK_ASCII): *merge ASCII list;
MPRINT(CHECK_ASCII): array CH{*} _character_;
MPRINT(CHECK_ASCII): do i=1 to dim(CH);
MPRINT(CHECK_ASCII): CH{i}=tranwrd(CH{i},'&amp;',"-");
MPRINT(CHECK_ASCII): CH{i}=tranwrd(CH{i},'&lt;',"<");
MPRINT(CHECK_ASCII): CH{i}=tranwrd(CH{i},'&gt;',">");
MPRINT(CHECK_ASCII): CH{i}=tranwrd(CH{i},'&quot;','"');
MPRINT(CHECK_ASCII): CH{i}=tranwrd(CH{i},'&apos;',"'");
MPRINT(CHECK_ASCII): nonASC=kcompress(CH{i},ASCII_List);
MPRINT(CHECK_ASCII): if not missing(nonASC) then do;
MPRINT(CHECK_ASCII): DS="DEFINE";
MPRINT(CHECK_ASCII): OBS=_N_;
MPRINT(CHECK_ASCII): VARNAME=vname(CH{i});
MPRINT(CHECK_ASCII): VALUE=CH{i};
MPRINT(CHECK_ASCII): if index(nonasc,"&") then do l=1 to count(CH{i},";");
MPRINT(CHECK_ASCII): if l=1 then do;
MPRINT(CHECK_ASCII): rank=0;
MPRINT(CHECK_ASCII): N_len=0;
MPRINT(CHECK_ASCII): end;
MPRINT(CHECK_ASCII): rank_W=find(scan(CH{i},l,";"),"&");
MPRINT(CHECK_ASCII): rank=sum(rank_W,rank);
MPRINT(CHECK_ASCII): uni=cats(scan(substr(CH{i},sum(rank,N_len)),1,";"),";");
MPRINT(CHECK_ASCII): nonasc=uni;
MPRINT(CHECK_ASCII): N_len=length(nonasc)-1;
MPRINT(CHECK_ASCII): output;
MPRINT(CHECK_ASCII): end;
MPRINT(CHECK_ASCII): else do;
MPRINT(CHECK_ASCII): rank=rank(nonasc);
MPRINT(CHECK_ASCII): uni=unicodec(nonasc);
MPRINT(CHECK_ASCII): output;
MPRINT(CHECK_ASCII): end;
MPRINT(CHECK_ASCII): end;
MPRINT(CHECK_ASCII): end;
MPRINT(CHECK_ASCII): keep DS OBS VARNAME VALUE nonASC rank uni;
MPRINT(CHECK_ASCII): run;

NOTE: Argument 1 to function UNICODEC('’’’’ '[12 of 1992 characters shown]) at line 285 column 146 is invalid.
DS=DEFINE OBS=37909 VARNAME=_txt
VALUE=Set to 'Y' if ADXM.COHORT=’On-Demand’ and COHTRANS=’Y’ and ASTDT>=COHCHGDT</TranslatedText> nonASC= rank=226 uni=
rank_W=. N_len=0 _txt=Set to 'Y' if ADXM.COHORT=’On-Demand’ and COHTRANS=’Y’ and ASTDT>=COHCHGDT</TranslatedText>
ASCII_List=!"#$%'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ i=8 l=. _ERROR_=1 _N_=37909
NOTE: Argument 1 to function UNICODEC('’’’’’’ '[12 of 1988 characters shown]) at line 285 column 146 is invalid.
DS=DEFINE OBS=38319 VARNAME=_txt
VALUE=<TranslatedText>Select records from ADEX where EXDOSMOD="Y" and EXADJ ne "One acute TTP event" and ((ADXM.COHORT=’Prophylaxi
s’ and ASTDY>=1) OR (ADXM.COHORT=’On-Demand’ and COHTRANS=’Y’ and ASTDT>COHCHGDT)).</TranslatedText> nonASC= rank=226
uni= rank_W=. N_len=0
_txt=<TranslatedText>Select records from ADEX where EXDOSMOD="Y" and EXADJ ne "One acute TTP event" and ((ADXM.COHORT=’Prophylaxis
’ and ASTDY>=1) OR (ADXM.COHORT=’On-Demand’ and COHTRANS=’Y’ and ASTDT>COHCHGDT)).</TranslatedText>
ASCII_List=!"#$%'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ i=8 l=. _ERROR_=1 _N_=38319
NOTE: Argument 1 to function UNICODEC('“””””””” '[12 of 1984 characters shown]) at line 285 column 146 is
invalid.
DS=DEFINE OBS=41354 VARNAME=_txt
VALUE=<TranslatedText>Select the last dose per subject from EX where EXDOSE>0, then connect EXTRT and EXDOSFRQ as 'EXTRT: EXDOSFRQ'.
Do not consider 'Resume' or missing dosing frequencies. Note, consider any EXTRT containing 'BAX930' or 'TAK-755' as the same treat
ment, and set it equal to “BAX930” in FDOSFRQ. For example if a subjects last dosing record has EXTRT=”TAK-755 (rADAMTS-13)”
and EXDOSFRQ=”EVERY 2 WEEKS” then FDOSFRQ=”BAX930: EVERY 2 WEEKS”.</TranslatedText> nonASC= rank=226 uni= rank_W=.
N_len=0
_txt=<TranslatedText>Select the last dose per subject from EX where EXDOSE>0, then connect EXTRT and EXDOSFRQ as 'EXTRT: EXDOSFRQ'.
Do not consider 'Resume' or missing dosing frequencies. Note, consider any EXTRT containing 'BAX930' or 'TAK-755' as the same treatm
ent, and set it equal to “BAX930” in FDOSFRQ. For example if a subjects last dosing record has EXTRT=”TAK-755 (rADAMTS-13)”
and EXDOSFRQ=”EVERY 2 WEEKS” then FDOSFRQ=”BAX930: EVERY 2 WEEKS”.</TranslatedText>
ASCII_List=!"#$%'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ i=8 l=. _ERROR_=1 _N_=41354
NOTE: Argument 1 to function UNICODEC('””””“” '[12 of 1988 characters shown]) at line 285 column 146 is invalid.
DS=DEFINE OBS=41396 VARNAME=_txt
VALUE=<TranslatedText>Where either ACUHIS or SACUHIS are Y, then look at MH:MHSTDTC where MHCAT=”TTP EVENT HISTORY” and MHTERM=
”ACUTE TTP EVENT” or “SUB-ACUTE TTP EVENT” and pick the last event .</TranslatedText> nonASC= rank=226 uni= rank_W=.
N_len=0
_txt=<TranslatedText>Where either ACUHIS or SACUHIS are Y, then look at MH:MHSTDTC where MHCAT=”TTP EVENT HISTORY” and MHTERM=
”ACUTE TTP EVENT” or “SUB-ACUTE TTP EVENT” and pick the last event .</TranslatedText>
ASCII_List=!"#$%'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ i=8 l=. _ERROR_=1 _N_=41396
WARNING: In a call to the CATS function, the buffer allocated for the result was not long enough to contain the concatenation of
all the arguments. The correct result would contain 46 characters, but the actual result might either be truncated to 20
character(s) or be completely blank, depending on the calling environment. The following note indicates the left-most
argument that caused truncation.
NOTE: Argument 1 to function CATS('PROC SORT DA'[12 of 45 characters shown],';') at line 285 column 235 is invalid.
DS=DEFINE OBS=43596 VARNAME=_txt
VALUE=<arm:ProgrammingCode Context="[SAS V9.4 / LINUX]&#xA;&#xA;PROC SORT DATA=ADAM.ADAE(ENCODING=ANY) OUT=AE;&#xA;BY STUDYID USUBJI
D SUBJID;&#xA;WHERE SAFFL='Y' ;&#xA;RUN;&#xA;PROC MEANS, PROC SQL"> nonASC= rank=72 uni= rank_W=1 N_len=0
_txt=<arm:ProgrammingCode Context="[SAS V9.4 / LINUX]&#xA;&#xA;PROC SORT DATA=ADAM.ADAE(ENCODING=ANY) OUT=AE;&#xA;BY STUDYID USUBJID
SUBJID;&#xA;WHERE SAFFL='Y' ;&#xA;RUN;&#xA;PROC MEANS, PROC SQL">
ASCII_List=!"#$%'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ i=8 l=11 _ERROR_=1
_N_=43596
WARNING: In a call to the CATS function, the buffer allocated for the result was not long enough to contain the concatenation of
all the arguments. The correct result would contain 46 characters, but the actual result might either be truncated to 20
character(s) or be completely blank, depending on the calling environment. The following note indicates the left-most
argument that caused truncation.
NOTE: Argument 1 to function CATS('PROC SORT DA'[12 of 45 characters shown],';') at line 285 column 235 is invalid.
WARNING: In a call to the CATS function, the buffer allocated for the result was not long enough to contain the concatenation of
all the arguments. The correct result would contain 46 characters, but the actual result might either be truncated to 20
character(s) or be completely blank, depending on the calling environment. The following note indicates the left-most
argument that caused truncation.
NOTE: Argument 1 to function CATS('PROC SORT DA'[12 of 45 characters shown],';') at line 285 column 235 is invalid.
DS=DEFINE OBS=43630 VARNAME=_txt
VALUE=<arm:ProgrammingCode Context="[SAS V9.4 / LINUX]&#xA;&#xA;PROC SORT DATA=ADAM.ADAE(ENCODING=ANY) OUT=AE;&#xA;BY STUDYID USUBJI
D SUBJID;&#xA;WHERE SAFFL='Y' ;&#xA;RUN;&#xA;&#xA;&#xA;PROC MEANS, PROC SQL"> nonASC= rank=74 uni= rank_W=1 N_len=0
_txt=<arm:ProgrammingCode Context="[SAS V9.4 / LINUX]&#xA;&#xA;PROC SORT DATA=ADAM.ADAE(ENCODING=ANY) OUT=AE;&#xA;BY STUDYID USUBJID
SUBJID;&#xA;WHERE SAFFL='Y' ;&#xA;RUN;&#xA;&#xA;&#xA;PROC MEANS, PROC SQL">
ASCII_List=!"#$%'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ i=8 l=13 _ERROR_=1
_N_=43630
WARNING: In a call to the CATS function, the buffer allocated for the result was not long enough to contain the concatenation of
all the arguments. The correct result would contain 46 characters, but the actual result might either be truncated to 20
character(s) or be completely blank, depending on the calling environment. The following note indicates the left-most
argument that caused truncation.
NOTE: Argument 1 to function CATS('PROC SORT DA'[12 of 45 characters shown],';') at line 285 column 235 is invalid.
DS=DEFINE OBS=43664 VARNAME=_txt
VALUE=<arm:ProgrammingCode Context="[SAS V9.4 / LINUX]&#xA;&#xA;PROC SORT DATA=ADAM.ADAE(ENCODING=ANY) OUT=AE;&#xA;BY STUDYID USUBJI
D SUBJID;&#xA;WHERE SAFFL='Y' ;&#xA;RUN;&#xA;&#xA;PROC MEANS, PROC SQL"> nonASC= rank=73 uni=#xA; rank_W=1 N_len=3
_txt=<arm:ProgrammingCode Context="[SAS V9.4 / LINUX]&#xA;&#xA;PROC SORT DATA=ADAM.ADAE(ENCODING=ANY) OUT=AE;&#xA;BY STUDYID USUBJID
SUBJID;&#xA;WHERE SAFFL='Y' ;&#xA;RUN;&#xA;&#xA;PROC MEANS, PROC SQL">
ASCII_List=!"#$%'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ i=8 l=12 _ERROR_=1
_N_=43664
WARNING: In a call to the CATS function, the buffer allocated for the result was not long enough to contain the concatenation of
all the arguments. The correct result would contain 46 characters, but the actual result might either be truncated to 20
character(s) or be completely blank, depending on the calling environment. The following note indicates the left-most
argument that caused truncation.
NOTE: Further warning from this call to CATS will be suppressed.
NOTE: Argument 1 to function CATS('PROC SORT DA'[12 of 45 characters shown],';') at line 285 column 235 is invalid.
DS=DEFINE OBS=43698 VARNAME=_txt
VALUE=<arm:ProgrammingCode Context="[SAS V9.4 / LINUX]&#xA;&#xA;PROC SORT DATA=ADAM.ADAE(ENCODING=ANY) OUT=AE;&#xA;BY STUDYID USUBJI
D SUBJID;&#xA;WHERE SAFFL='Y' ;&#xA;RUN;&#xA;&#xA;PROC MEANS, PROC SQL"> nonASC= rank=73 uni=#xA; rank_W=1 N_len=3
_txt=<arm:ProgrammingCode Context="[SAS V9.4 / LINUX]&#xA;&#xA;PROC SORT DATA=ADAM.ADAE(ENCODING=ANY) OUT=AE;&#xA;BY STUDYID USUBJID
SUBJID;&#xA;WHERE SAFFL='Y' ;&#xA;RUN;&#xA;&#xA;PROC MEANS, PROC SQL">
ASCII_List=!"#$%'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ i=8 l=12 _ERROR_=1
_N_=43698
NOTE: Argument 1 to function CATS('A=ADAM.ADAE('[12 of 32 characters shown],';') at line 285 column 235 is invalid.
NOTE: Argument 1 to function CATS('A=ADAM.ADAE('[12 of 32 characters shown],';') at line 285 column 235 is invalid.
NOTE: Argument 1 to function CATS('A=ADAM.ADAE('[12 of 32 characters shown],';') at line 285 column 235 is invalid.
NOTE: Argument 1 to function CATS('=ADAM.ADAE(E'[12 of 31 characters shown],';') at line 285 column 235 is invalid.
NOTE: Argument 1 to function CATS('=ADAM.ADAE(E'[12 of 31 characters shown],';') at line 285 column 235 is invalid.
NOTE: Argument 1 to function CATS('ADAM.ADAE(EN'[12 of 30 characters shown],';') at line 285 column 235 is invalid.
NOTE: Argument 1 to function CATS('DAM.ADAE(ENC'[12 of 29 characters shown],';') at line 285 column 235 is invalid.
DS=DEFINE OBS=43736 VARNAME=_txt
VALUE=<arm:ProgrammingCode Context="[SAS V9.4 / LINUX]&#xA;&#xA;PROC SORT DATA=ADAM.ADAE(ENCODING=ANY) OUT=AE;&#xA;BY STUDYID USUBJI
D SUBJID;&#xA;WHERE TRTEMFL EQ &#34;Y&#34; AND SAFFL='Y';&#xA;RUN;&#xA;&#xA;PROC MEANS, PROC SQL"> nonASC= rank=93 uni= rank_W=1
N_len=0
_txt=<arm:ProgrammingCode Context="[SAS V9.4 / LINUX]&#xA;&#xA;PROC SORT DATA=ADAM.ADAE(ENCODING=ANY) OUT=AE;&#xA;BY STUDYID USUBJID
SUBJID;&#xA;WHERE TRTEMFL EQ &#34;Y&#34; AND SAFFL='Y';&#xA;RUN;&#xA;&#xA;PROC MEANS, PROC SQL">
ASCII_List=!"#$%'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ i=8 l=14 _ERROR_=1
_N_=43736
NOTE: Argument 1 to function CATS('A=ADAM.ADAE('[12 of 32 characters shown],';') at line 285 column 235 is invalid.
NOTE: Argument 1 to function CATS('A=ADAM.ADAE('[12 of 32 characters shown],';') at line 285 column 235 is invalid.
NOTE: Argument 1 to function CATS('A=ADAM.ADAE('[12 of 32 characters shown],';') at line 285 column 235 is invalid.
NOTE: Argument 1 to function CATS('=ADAM.ADAE(E'[12 of 31 characters shown],';') at line 285 column 235 is invalid.
NOTE: Argument 1 to function CATS('=ADAM.ADAE(E'[12 of 31 characters shown],';') at line 285 column 235 is invalid.
NOTE: Argument 1 to function CATS('ADAM.ADAE(EN'[12 of 30 characters shown],';') at line 285 column 235 is invalid.
NOTE: Argument 1 to function CATS('DAM.ADAE(ENC'[12 of 29 characters shown],';') at line 285 column 235 is invalid.
DS=DEFINE OBS=43774 VARNAME=_txt
VALUE=<arm:ProgrammingCode Context="[SAS V9.4 / LINUX]&#xA;&#xA;PROC SORT DATA=ADAM.ADAE(ENCODING=ANY) OUT=AE;&#xA;BY STUDYID USUBJI
D SUBJID;&#xA;WHERE TRTEMFL EQ &#34;Y&#34; AND SAFFL='Y';&#xA;RUN;&#xA;&#xA;PROC MEANS, PROC SQL"> nonASC= rank=93 uni= rank_W=1
N_len=0
_txt=<arm:ProgrammingCode Context="[SAS V9.4 / LINUX]&#xA;&#xA;PROC SORT DATA=ADAM.ADAE(ENCODING=ANY) OUT=AE;&#xA;BY STUDYID USUBJID
SUBJID;&#xA;WHERE TRTEMFL EQ &#34;Y&#34; AND SAFFL='Y';&#xA;RUN;&#xA;&#xA;PROC MEANS, PROC SQL">
ASCII_List=!"#$%'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ i=8 l=14 _ERROR_=1
_N_=43774
NOTE: Argument 1 to function CATS('A=ADAM.ADAE('[12 of 32 characters shown],';') at line 285 column 235 is invalid.
NOTE: Argument 1 to function CATS('A=ADAM.ADAE('[12 of 32 characters shown],';') at line 285 column 235 is invalid.
NOTE: Argument 1 to function CATS('A=ADAM.ADAE('[12 of 32 characters shown],';') at line 285 column 235 is invalid.
NOTE: Argument 1 to function CATS('=ADAM.ADAE(E'[12 of 31 characters shown],';') at line 285 column 235 is invalid.
NOTE: Argument 1 to function CATS('=ADAM.ADAE(E'[12 of 31 characters shown],';') at line 285 column 235 is invalid.
NOTE: Argument 1 to function CATS('ADAM.ADAE(EN'[12 of 30 characters shown],';') at line 285 column 235 is invalid.
NOTE: Argument 1 to function CATS('DAM.ADAE(ENC'[12 of 29 characters shown],';') at line 285 column 235 is invalid.
DS=DEFINE OBS=43812 VARNAME=_txt
VALUE=<arm:ProgrammingCode Context="[SAS V9.4 / LINUX]&#xA;&#xA;PROC SORT DATA=ADAM.ADAE(ENCODING=ANY) OUT=AE;&#xA;BY STUDYID USUBJI
D SUBJID;&#xA;WHERE TRTEMFL EQ &#34;Y&#34; AND SAFFL='Y';&#xA;RUN;&#xA;&#xA;PROC MEANS, PROC SQL"> nonASC= rank=93 uni= rank_W=1
N_len=0
_txt=<arm:ProgrammingCode Context="[SAS V9.4 / LINUX]&#xA;&#xA;PROC SORT DATA=ADAM.ADAE(ENCODING=ANY) OUT=AE;&#xA;BY STUDYID USUBJID
SUBJID;&#xA;WHERE TRTEMFL EQ &#34;Y&#34; AND SAFFL='Y';&#xA;RUN;&#xA;&#xA;PROC MEANS, PROC SQL">
ASCII_List=!"#$%'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ i=8 l=14 _ERROR_=1
_N_=43812
Tom
Super User Tom
Super User

Still too much code to actually look at.

Try removing all of the macro code and macro variables and just concentrate on getting the data step with UNICODEC() function call to work properly.

 

Note that is block of code where you are getting the notes generated.

MPRINT(CHECK_ASCII): else do;
MPRINT(CHECK_ASCII): rank=rank(nonasc);
MPRINT(CHECK_ASCII): uni=unicodec(nonasc);
MPRINT(CHECK_ASCII): output;
MPRINT(CHECK_ASCII): end;

Only makes sense if your SAS session is running with the system option ENCODING set to a single byte encoding such as WLATIN1.

 

So check what you have that option set to in your SAS session.

%put %sysfunc(getoption(encoding));
Ksharp
Super User

No need to make a dataset to include all these ASCII characters ,just use Perl Regular Expression " [[:ascii:]] "  is more than suffice.

 




data have;
input x $40.;
if _n_=1 then x=cats(x,'EE'x,'xx');
p=prxmatch('/[[:^ascii:]]/',x);
if p then flag=catx(' ','have none ascii character at ',p);
cards;
sadfas
sd 23 %
@!~&a *()
;
proc print;run;

Ksharp_0-1741314982896.png

 

ChrisNZ
Tourmaline | Level 20

What @Ksharp describes is the easiest way.

No need to use a convoluted logic that's hard to maintain.

One line of code does the job.

Use prxchange() instead of prxmatch() if you want to retain a list of the non-ASCII characters.

Tom
Super User Tom
Super User

I usually find it is more useful to search for non printable 7-bit ASCII bytes.  That way you can locate things like tabs, linefeeds and other invisible characters.

p=verify(x,collate(32,126));

 

Ksharp
Super User

Tom,

PRX already have corresponding expression for these blank/non-print characters.

 

Ksharp_0-1741415560356.png

Ksharp_1-1741415713526.png

 

And more there is a function NOTPRINT() also could find a non-printable character.

 

 

Tom
Super User Tom
Super User

Those are useful but not precise enough it the actual goal is to detect the characters that will cause trouble with transcoding (or HTMLencoding or URLencoding).

 

chinna0369
Pyrite | Level 9

@Ksharp  Thanks for you reply!

I have updated code as below just to see P and flag values. But I a getting below error. Is there anything I am missing. 

 

	data _tmp;
	  length DS $20 OBS 8 VARNAME $20 VALUE nonASC $2000 rank 8 uni $5000 rank_W N_len 8;
	  set WORK.def;
	  retain rank N_len 0;		
	  if _n_=1 then set WORK.DefineASCII ; *merge ASCII list; 
	  array CH{*} _character_;  
	  do i=1 to dim(CH);
		CH{i}=tranwrd(CH{i},'&amp;',"-");
		CH{i}=tranwrd(CH{i},'&lt;',"<");
		CH{i}=tranwrd(CH{i},'&gt;',">");
		CH{i}=tranwrd(CH{i},'&quot;','"');
		CH{i}=tranwrd(CH{i},'&apos;',"'");
		
		CH{i}=cats(CH{i},'EE'CH{i},'xx');
		p=prxmatch('/[[:^ascii:]]/',CH{i});
		if p then flag=catx(' ','have none ascii character at ',p);
		
	    nonASC=kcompress(CH{i},ASCII_List);
	    if not missing(nonASC) then do;
	     DS="DEFINE";
	     OBS=_N_;
	     VARNAME=vname(CH{i});
	     VALUE=CH{i};
		 if index(nonasc,"&") then do l=1 to count(CH{i},";");
		 	if l=1 then do; rank=0; N_len=0; end;
			rank_W=find(scan(CH{i},l,";"),"&");
			rank=sum(rank_W,rank);
			uni=cats(scan(substr(CH{i},sum(rank,N_len)),1,";"),";");
			nonasc=uni;
			N_len=length(nonasc)-1;
			output;
		 end;
	     else do;
		    rank=rank(nonasc);
			uni=unicodec(nonasc);
			output;
		 end;
	    end;
	  end;
	  keep DS OBS VARNAME VALUE nonASC rank uni p flag;
	run;

 

NOTE: Line generated by the invoked macro "CHECK_ASCII".
290 ,'&amp;',"-"); CH{i}=tranwrd(CH{i},'&lt;',"<"); CH{i}=tranwrd(CH{i},'&gt;',">"); CH{i}=tranwrd(CH{i},'&quot;','"');
290 ! CH{i}=tranwrd(CH{i},'&apos;',"'"); CH{i}=cats(CH{i},'EE'CH{i},'xx'); p=prxmatch('/[[:^ascii:]]/',CH{i}); if p
______
49 22
290 ! then
 
NOTE 49-169: The meaning of an identifier after a quoted string might change in a future SAS release. Inserting white space
between a quoted string and the succeeding identifier is recommended.
 
ERROR 22-322: Syntax error, expecting one of the following: !, !!, &, *, **, +, ',', -, /, <, <=, <>, =, >, ><, >=, AND, EQ, GE,
GT, LE, LT, MAX, MIN, NE, NG, NL, OR, ^=, |, ||, ~=.
 
NOTE: The SAS System stopped processing this step because of errors.
WARNING: The data set WORK._TMP may be incomplete. When this step was stopped there were 0 observations and 8 variables.
WARNING: Data set WORK._TMP was not replaced because this step was stopped.

 

Tom
Super User Tom
Super User

Why did you include the line from the demonstration code that was only there so it could insert a non-ascii character for the regular expression to detect?

CH{i}=cats(CH{i},'EE'CH{i},'xx');
Ksharp
Super User

Post your dataset WORK.def ,that would help you to solve this question.

And you could use HTMLDECODE() to replace with the following code.

                CH{i}=tranwrd(CH{i},'&amp;',"-");
		CH{i}=tranwrd(CH{i},'&lt;',"<");
		CH{i}=tranwrd(CH{i},'&gt;',">");
		CH{i}=tranwrd(CH{i},'&quot;','"');
		CH{i}=tranwrd(CH{i},'&apos;',"'");

Ksharp_0-1741416353846.png

 

sas-innovate-white.png

Special offer for SAS Communities members

Save $250 on SAS Innovate and get a free advance copy of the new SAS For Dummies book! Use the code "SASforDummies" to register. Don't miss out, May 6-9, in Orlando, Florida.

 

View the full agenda.

Register now!

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

SAS Training: Just a Click Away

 Ready to level-up your skills? Choose your own adventure.

Browse our catalog!

Discussion stats
  • 11 replies
  • 1149 views
  • 2 likes
  • 4 in conversation