BookmarkSubscribeRSS Feed
sas_Forum
Calcite | Level 5

If i have run the same code for 4000 Rec there is no error if i run the same for 400000 (4lac) Rec i am getting this error's:

ERROR: The text expression length (65548) exceeds maximum length (65534). The text expression has be

en truncated to 65534

       characters.

NOTE: Line generated by the macro variable "RECODES".

608         hhold eq 165961 then hhold=161860; i

                                               _

                                               180

ERROR: The text expression length (65546) exceeds maximum length (65534). The text expression has be

en truncated to 65534

       characters.

ERROR: DATA STEP Component Object failure.  Aborted during the COMPILATION phase.

ERROR 180-322: Statement is not valid or it is used out of proper order.

-----------------------------------------Error-2-----------------------------------------------------------

608                    key=_house{i};            call missing(hhold);            rc=ha.find();     
      if rc=0 then do;
608      !     found=1;              &recodes.              leave;            end;          end;   
      if not found then do;
                                                            _____
                                                            22
608      !        n+1;
ERROR 22-322: Syntax error, expecting one of the following: !, !!, &, *, **, +, -, /, ;, <, <=, <>,
=, >, ><, >=, AND, EQ, GE, GT,
              IN, LE, LT, MAX, MIN, NE, NG, NL, NOTIN, OR, ^=, |, ||, ~=. 

ERROR: The text expression length (65546) exceeds maximum length (65534). The text expression has be
en truncated to 65534
       characters.

---------------------------------- Error-3----------------

      hhold=n;            &recodes.          end;          do j=1 to dim(_house); 
          if not
                                                              ___
                                                              22
608      ! missing(_house{j}) then do;              key=_house{j};              ha.replace();      
       start=key;
608      ! output;            end;
ERROR 22-322: Syntax error, expecting one of the following: !, !!, &, *, **, +, -, /, ;, <, <=, <>,
=, >, ><, >=, AND, EQ, GE, GT,
              IN, LE, LT, MAX, MIN, NE, NG, NL, NOTIN, OR, ^=, |, ||, ~=. 

ERROR: DATA STEP Component Object failure.  Aborted during the COMPILATION phase.

-------------------ERROR-4---------------------

ERROR: The length of the value of the macro variable RECODES (1992851) exceeds the maximum length (6

5534). The value has been

       truncated to 65534 characters.

---------------xxxxxxxxxxxxx-------------------

art297
Opal | Level 21

Then I'll have to revise the code.  Using a macro variable won't work as you apparently have so many reverses that it exceeds the maximum allowable length for a macro variable.

art297
Opal | Level 21

The following version doesn't have the macro variable limitation:

data have;

  infile cards truncover;

  input (pan1 add1 pan2 pan3) (: $40.);

  cards;

aaa     bbb     ccc     ddd

qqq     rrr     www     aaa

rrr     ppp     mmm     lll

uuu     zzz     ffff    ppp

p       l       m       n  

m       aaa     .       .    

a       b       c       .   

jjjj    eee     rrr     ooo

.  .  .  .                            

e       f       .       .    

a       e       .       .    

g       h       .       .    

i       j       k       .   

i       j       k       a  

sss     www     .       .    

.        .      .       eee

;

run;

%macro findthem;

  proc datasets library=work nowarn;

    delete recodes;

  quit;

 

  filename recode temp;

   %let stopthis=0;

  /*Assign initial household numbers*/

  data for_recodes (keep=start hhold);

    if _n_ eq 1 then do;

      declare hash ha(hashexp: 16);

      ha.definekey('key');

      ha.definedata('hhold');

      ha.definedone();

    end;

    set have;

    array _house{*} $ 40 pan1--pan3;

    do i=1 to dim(_house);

      key=_house{i};

      call missing(hhold);

      rc=ha.find();

      if rc=0 then do;

        found=1;

        leave;

      end;

    end;

    if not found then do;

      n+1;

      hhold=n;

    end;

    do j=1 to dim(_house);

      if not missing(_house{j}) then do;

        key=_house{j};

        ha.replace();

        start=key;

        output;

      end;

    end;

  run;

  %do %while (&stopthis eq 0);

    /*Test for invalid assignments*/

    proc sort data=for_recodes out=test nodupkey;

      by start hhold;

    run;

     data test2 (drop=n);

      set test end=eof;

      by start;

      if not(first.start and last.start) then do;

        output;

        n+1;

      end;

      if eof and n lt 1 then do;

        call symput('stopthis',1);

      end;

    run;

     %if &stopthis eq 0 %then %do;

      proc append base=recodes data=test2 force;

      run;

             proc sql noprint;

        create table extra as

          select hhold as first,

             min(hhold) as second

               from recodes

                        group by start

        ;

      quit;

 

      data _null_;

        set extra;

        file recode;

        length recode $80;

        recode=catx(" ","if hhold eq",first,

          "then hhold=",second,";");

              put recode;

      run;

             /*Correct identified discrepancies*/

      data for_recodes (keep=start hhold);

        if _n_ eq 1 then do;

          declare hash ha(hashexp: 16);

          ha.definekey('key');

          ha.definedata('hhold');

          ha.definedone();

        end;

        set have;

        array _house{*} $ 40 pan1--pan3;

        do i=1 to dim(_house);

          key=_house{i};

          call missing(hhold);

          rc=ha.find();

          if rc=0 then do;

            found=1;

            %include recode;

            leave;

          end;

        end;

        if not found then do;

          n+1;

          hhold=n;

          %include recode;

        end;

        do j=1 to dim(_house);

          if not missing(_house{j}) then do;

            key=_house{j};

            ha.replace();

            start=key;

            output;

          end;

        end;

      run;

    %end;

  %end;

 

  /*Get Final Dataset*/

  data want (keep=pan1 pan2 pan3 add1 household);

    if _n_ eq 1 then do;

      declare hash ha(hashexp: 16);

      ha.definekey('key');

      ha.definedata('hhold');

      ha.definedone();

    end;

    set have;

    array _house{*} $ 40 pan1--pan3;

    do i=1 to dim(_house);

      key=_house{i};

      call missing(hhold);

      rc=ha.find();

      if rc=0 then do;

        found=1;

        %include recode;

        household=hhold;

        leave;

      end;

    end;

    if not found then do;

      n+1;

      hhold=n;

      %include recode;

      household=hhold;

    end;

    do j=1 to dim(_house);

      if not missing(_house{j}) then do;

        key=_house{j};

        ha.replace();

      end;

    end;

  run;

%mend findthem;

 

%findthem

sas_Forum
Calcite | Level 5

WARNING: Physical file does not exist, /sas_data/saswork/SAS_work/#LN00267.

ERROR: Cannot open %INCLUDE file RECODE.

WARNING: Physical file does not exist, /sas_data/saswork/SAS_work/#LN00267.

ERROR: Cannot open %INCLUDE file RECODE.

Hi Art Now i am getting this Error

art297
Opal | Level 21

Did you change something in the code?  The error is indicating that SAS can't find a file and that looks like an odd location for it to be looking.

It works fine for me.  Given the above error, I would simply hardcode the file then.  Change the line:

filename recode temp;

to

filename recode "/sas_data/recode.sas";

or some location where you know that you have write permission

art297
Opal | Level 21

Are u on a Unix box?  I found the following section of the documentation that describes how you have to use pathnames when using include on a Unix box: http://support.sas.com/documentation/cdl/en/hostunx/61879/HTML/default/viewer.htm#specpath.htm

Ksharp
Super User

HaHa......

I finally find another way to promote efficiency.

I believe it will fast a lot.

Please let me know how fast it will be .

data test;
input  ( pan1 pan2 pan3 add1  ) ($);
datalines;
aaa   bbb    ccc     ddd    
qqq   rrr       www   aaa   
rrr     ppp    mmm lll       
uuu   zzz    ffff      ppp     
p       l        m          n
. . . . 
jjjj    eee     rrr       ooo    
.    .   .   .             
sss   www  .    .             
 .         .        .        eee 

;
run;
data want(keep=pan1 pan2 pan3 add1 household);
 declare hash ha(hashexp : 20);
 declare hiter hi('ha');
  ha.definekey('count');
  ha.definedata('count','pan1','pan2','pan3','add1');
  ha.definedone();
 declare hash _ha(hashexp: 20);
  _ha.definekey('key');
  _ha.definedata('_household');
  _ha.definedone();

do until(last);
 set test end=last;
 count+1;
 ha.add();
end;

length key $ 40;
array h{4} $ 40 pan1 pan2 pan3 add1;
_rc=hi.first();
do while(_rc eq 0);
 household+1;_household=household; n=0;
 do i=1 to 4;
   if not missing(h{i}) then do; key=h{i}; _ha.replace();end;
    else n+1;
 end;   
 if n eq 4 then do;output; return;end;
 do until(x=1);
    x=1;
    rc=hi.first();
    do while(rc=0);
      if found then rx=ha.remove(key : _count);
      found=0;
      do j=1 to 4;
      key=h{j};rcc=_ha.check();
      if rcc =0 then found=1;
      end;
      if found then do;
                      do k=1 to 4;
                      if not missing(h{k}) then do; key=h{k};_ha.replace();end;
                      end;
                      x=0; _count=count; output;
                    end;
      rc=hi.next();
     end;
 end;  
_rc=hi.first();
end; 
run;

Ksharp

:smileylaugh:

Message was edited by: xia keshan

sas_Forum
Calcite | Level 5

Hi Ksharp Thqs for your help actually i have run on 30000 obs for the dataset Test but the final dataset wnat has only 269 obs only....

Ksharp
Super User

Sorry. The code above is wrong, but it is correct if you do not care the missing obs(i.e. all these four vaules are missing.)

If you do not care the missing obs, the code above is the fastest.

The following code is fixed, But not as efficient as the code above.

data test;
input  ( pan1 pan2 pan3 add1  ) ($);
datalines;
aaa   bbb    ccc     ddd    
qqq   rrr       www   aaa   
rrr     ppp    mmm lll       
uuu   zzz    ffff      ppp     
p       l        m          n      
jjjj    eee     rrr       ooo    
.    .   .   .             
sss   www  .    .             
 .         .        .        eee       
;
run;
data want(keep=pan1 pan2 pan3 add1 household);
if _n_ eq 1 then do;
 declare hash ha(hashexp : 20,ordered: 'a');
 declare hiter hi('ha');
  ha.definekey('count');
  ha.definedata('count','pan1','pan2','pan3','add1');
  ha.definedone();
 declare hash _ha(hashexp: 20);
  _ha.definekey('key');
  _ha.definedata('_household');
  _ha.definedone();

do until(last);
 set test end=last;
 count+1;
 ha.add();
end;
end;
length key $ 40;
array h{4} $ 40 pan1 pan2 pan3 add1;
set test;
count=_n_; _rc=ha.check();
if _rc eq 0 then do;
 household+1;_household=household; n=0;
 do i=1 to 4;
   if not missing(h{i}) then do; key=h{i}; _ha.replace();end;
    else n+1;
 end;   
 do until(x=1);
    x=1;
    rc=hi.first();
    do while(rc=0);
      if found then rx=ha.remove(key : _count);
      found=0;
      do j=1 to 4;
      key=h{j};rcc=_ha.check();
      if rcc =0 then found=1;
      end;
      if found then do;
                      do k=1 to 4;
                      if not missing(h{k}) then do; key=h{k};_ha.replace();end;
                      end;
                      output;x=0; _count=count;flag=1; 
                    end;
      rc=hi.next();
    end;
 end;  
 if n=4 and not flag then output;
  else if n=4 and flag then do; household+1;output;end;
end; 

run;

Ksharp

Message was edited by: xia keshan

Ksharp
Super User

Opps.

data test;
input  ( pan1 pan2 pan3 add1  ) ($);
datalines;
aaa   bbb    ccc     ddd    
qqq   rrr       www   aaa   
rrr     ppp    mmm lll       
. . . .
. . . .
uuu   zzz    ffff      ppp  
. . . . 
p       l        m          n      
jjjj    eee     rrr       ooo    
.    .   .   .             
sss   www  .    .             
 .         .        .        eee       
;
run;

data want(keep=pan1 pan2 pan3 add1 household);
if _n_ eq 1 then do;
 declare hash ha(hashexp : 20,ordered: 'a');
 declare hiter hi('ha');
  ha.definekey('count');
  ha.definedata('count','pan1','pan2','pan3','add1');
  ha.definedone();
 declare hash _ha(hashexp: 20);
  _ha.definekey('key');
  _ha.definedata('_household');
  _ha.definedone();

do until(last);
 set test end=last;
 count+1;
 ha.add();
end;
end;
length key $ 40;
array h{4} $ 40 pan1 pan2 pan3 add1;
set test;
count=_n_; _rc=ha.check();
if _rc eq 0 then do;
 household+1;_household=household; n=0;
 do i=1 to 4;
   if not missing(h{i}) then do; key=h{i}; _ha.replace();end;
    else n+1;
 end;   
 do until(x=1);
    x=1;
    rc=hi.first();
    do while(rc=0);
      if found then rx=ha.remove(key : _count);
      found=0;
      do j=1 to 4;
      key=h{j};rcc=_ha.check();
      if rcc =0 then found=1;
      end;
      if found then do;
                      do k=1 to 4;
                      if not missing(h{k}) then do; key=h{k};_ha.replace();end;
                      end;
                      output;x=0; _count=count; 
                    end;
      rc=hi.next();
    end;
 end;  
 if n=4 then output;
end; 
run;

Ksharp

sas_Forum
Calcite | Level 5

Hi Ksharp i have cheked the codes but not working for Missing values i have checked the 3 codes above u have posted....

DLing
Obsidian | Level 7

Finally had some free time to work on this a bit.  The attached code uses hash objects to eliminate sorting.  It uses a very simple macro to loop and propagate group changes until no more group id needs to be changed.  This loop until consistent is what makes this capable of navigating the data structure regardless of how deeply nested it needs to go.

The limitation of the code is machine limit:  amount of available memory to hold the hash table.  Other than that, it should be able to tackle very large data sets.

With this input data:

aa bb cc dd

ee ff gg hh

ii jj kk ll

aa ff gg dd

mm oo pp

pp rr rr ss

tt uu vv ww

xx yy zz dd

xx z1

1 2

3 4

5 6

7 8

9 10

11 12

13 14

15 16

1 3

5 7

9 11

13 15

1 5

9 13

1 9

It generates this which is correct by visual inspection:

   

k1k2k3k4grp
aabbccdd1
eeffgghh1
iijjkkll3
aaffggdd1
mmoopp4
pprrrrss4
.
ttuuvvww6
xxyyzzdd1
xxz11
127
347
567
787
9107
11127
13147
15167
137
577
9117
13157
157
9137
197

The OP will need to modify the names and length statements to adapt to the situation, but that should be relatively easy to do.

Modifying it to handle different number of key columns is also easy.

For true householding where it is "same value within same column" as opposed to this problem of "same value in any of the key columns" is also very easily derived by tweaking this code a tiny bit.

art297
Opal | Level 21

From my initial testing it appears to work, but the OP wasn't able to run a pure hash solution previously, which is why I posted an alternative.  However, as I'm still learning to use hash, I do have one question.  In the final step _key comes up as uninitialized.  Can you explain?

DLing
Obsidian | Level 7

The _key is uninitialized message is a precaution from the data step compiler.  I think it's saying at the point where it's compiling the hash object, that variable isn't initialized yet, so it's spitting out a note.  It's easily avoided by assigning a value to the variable earlier in the code prior to the declare hash, then it's happy.  With this code, it is initialized later so the code runs fine.  But the message is disturbing to see in the log.  I was just being lazy and only wanted to illustrate the concept, so I didn't clean the code and make it water tight... etc.

sas_Forum
Calcite | Level 5

Hi i am getting this ERROR:

478            do _i = 1 to 4;
479                grps[_i] = .;
480                if keys[_i] ^= ' ' then if hh.find(key: keys[_i])=0 then grps[_i] = _grp;       
* Fetch existing grps;
481            end;
482       
483            mingrp = min(of grps

  • );
    484       
    485            if mingrp > . then _grp = mingrp;                            * Update to this smalles
    t legit grp;
    486            else do;                                                    * Or grab a new grp;
    487                _grp = _nxtgrp;
    488                _nxtgrp + 1;
    489            end;
    490       
    491            do _i = 1 to 4;                                             * Update non blank keys t
    o this value;
    492                if keys[_i] ^= ' ' then do;
    493                    _key = keys[_i];
    494                    _rc = hh.replace();                                 * replace() will update o
    r add new keys;
    495                end;
    496            end;
    497       
    498            if eof then hh.output( dataset: 'keys' );
    499        run;
  • ERROR: Argument length greater than length of key variable _key at line 480 column 36.
    ERROR: Keys and data must be specified in the same order and have the same types as given in DefineK
    ey/DefineData at line 480
           column 36.
    ERROR: DATA STEP Component Object failure.  Aborted during the EXECUTION phase.

    SAS Innovate 2025: Save the Date

     SAS Innovate 2025 is scheduled for May 6-9 in Orlando, FL. Sign up to be first to learn about the agenda and registration!

    Save the date!

    What is Bayesian Analysis?

    Learn the difference between classical and Bayesian statistical approaches and see a few PROC examples to perform Bayesian analysis in this video.

    Find more tutorials on the SAS Users YouTube channel.

    SAS Training: Just a Click Away

     Ready to level-up your skills? Choose your own adventure.

    Browse our catalog!

    Discussion stats
    • 124 replies
    • 3699 views
    • 4 likes
    • 7 in conversation