DATA Step, Macro, Functions and more

Help with macro that counts records (created by MikeZdeb)

Accepted Solution Solved
Reply
Regular Contributor
Posts: 186
Accepted Solution

Help with macro that counts records (created by MikeZdeb)

Hi,

I need help with a macro that was created by MikeZdeb for me. The macro currently counts the number of missing records, records with the value N/A and "non-missing records" as well as providing the total.

Below is the code provided by MikeZdeb (you can see this post for more info https://communities.sas.com/thread/36410?start=15&tstart=0)

First of all, I would be really greatful if someone could put comments in the code in order for me to properly understand what it is doing.

I would also like the macro output to have one more column wich would indicate the number of duplicates records.

I currently use the following sql commands to count the duplicates records and I don't think it is the most efficient way to do it.:

proc sql;
create table work.test as SELECT name, COUNT(name)
AS NumOfccurrences
FROM sasuser.blue_car_table

GROUP BY name HAVING ( COUNT(name) > 1 )
;quit;

proc sql;
select count(*) from work.test
;quit;

I would like to know the proper code to counts the number of duplicates records and how to add it in the existing MikeZdeb's macro (as a new column).

Thank you for your help and time.


data table1;

input name $ age @@ ;

cards;

John 45 N/A 30 . 15 Carl 25

;


data table2;

input color $ height;

cards;

Blue 110 N/A 120 . 100 Red .

;


proc format;

value nn low-high='1' other='3';

value $c 'N/A'='2' ' '='3' other='1';

run;


%macro tables(stuff);

proc datasets lib=work nolist;

delete tables;

quit;


%let j=1;

%do %while(%scan(&stuff,&j,/) ne);

%let dset=%scan(&stuff,&j,/);


ods output onewayfreqs=temp (keep=table f_: freqSmiley Happy;

proc freq data=&dset;

tables _all_ / missing;

format _numeric_ nn. _character_ $c.;

run;

ods output close;


data temp (keep=table name not_: missing total);

length table name $32;

do until(last.table);

   set temp;

   by table notsorted;

   array t(*) f_:;

   select(t(_n_));

      when('1') not_missing = frequency;

      when('2') not_applicable = frequency;

      when('3') missing = frequency;

   end;

end;

total = sum(of not_:, missing);

name = scan(table,2);

table = "&dset";

run;


proc append base=tables data=temp;

run;

%let j=%eval(&j+1);

%end;


proc datasets lib=work nolist;

delete temp;

quit;

%mend;


ods listing close;

%tables(table1/table2/sashelp.class/sashelp.heart);

ods listing;


partial output of data set TABLES ...

                                    not_

table            name              missing    not_applicable    missing    total

table1           name                   2            1               1         4

table1           age                    4            .               .         4

z.table2         color                  1            .               .         1

z.table2         height                 1            .               .         1

sashelp.class    Name                  19            .               .        19

sashelp.class    Sex                   19            .               .        19

sashelp.class    Age                   19            .               .        19

sashelp.class    Height                19            .               .        19

sashelp.class    Weight                19            .               .        19

sashelp.heart    Status              5209            .               .      5209

sashelp.heart    DeathCause          1991            .            3218      5209

sashelp.heart    AgeCHDdiag          1449            .            3760      5209


Accepted Solutions
Solution
‎08-29-2012 04:38 PM
PROC Star
Posts: 7,364

Re: Help with macro that counts records (created by MikeZdeb)

I lied and did end up having time to write some comments as well as incorporate what I think you said you wanted.  Let me know if it now does what you want and if the comments are at the right level.  I used Mike's code, combined with a slight change to your proc sql code:

/* create some test data */

data table1;

input name $ age @@ ;

cards;

John 45 N/A 30 . 15 Carl 25

;

data table2;

input color $ height;

cards;

Blue 110 N/A 120 . 100 Red .

;

data class;

  set sashelp.class;

  if name in ('Carol','James') then call missing(age);

  if name in ('Jane','John') then output;

  output;

run;

/*end of test data creation */

/*create formats that can later be used to identify missing and N/A data*/

/*format nn is for numeric values*/

/*format $c is for character values*/

proc format;

  value nn low-high='1' other='3';

  value $c 'N/A'='2' ' '='3' other='1';

run;

%macro tables(stuff);

  /*the macro won't work unless the file work.tables doesn't exist*/

  /*so the proc datasets is used to delete it*/

  proc datasets lib=work nolist;

    delete tables;

  quit;

  %let j=1;

  /*loop through all of the identified datasets*/

  %do %while(%scan(&stuff,&j,/) ne);

    %let dset=%scan(&stuff,&j,/);

    /*run proc freq for each dataset, storing the output in work.temp*/

    ods output onewayfreqs=temp (keep=table f_: freqSmiley Happy;

    proc freq data=&dset;

      tables _all_ / missing;

      format _numeric_ nn. _character_ $c.;

    run;

    ods output close;

    /*use proc sql to count the number of duplicates on variable name*/

    /*placing the result in a macro variable called &dups.*/

    proc sql;

      select sum(num) into :dups

        from (

          select COUNT(name) as num

            FROM &dset.

              GROUP BY name

                HAVING ( COUNT(name) > 1 )

             )

      ;

    quit;

    /*restructure work.temp to create the desired columns*/

    /*and incorporate &dups. as a column called duplicates*/

    data temp (keep=table name not_: missing duplicates total);

      length table name $32;

      do until(last.table);

        set temp;

        by table notsorted;

        array t(*) f_:;

        select(t(_n_));

          when('1') not_missing = frequency;

          when('2') not_applicable = frequency;

          when('3') missing = frequency;

        end;

      end;

      total = sum(of not_:, missing);

      name = scan(table,2);

      table = "&dset";

      if upcase(name) eq "NAME" then duplicates=&dups.;

    run;

    /*append work.temp to a dataset called tables*/

    proc append base=tables data=temp;

    run;

    %let j=%eval(&j+1);

  %end;

  /*cleanup by deleting work.temp .. the final desired dataset*/

  /*will be work.tables*/

  proc datasets lib=work nolist;

    delete temp;

  quit;

%mend;

ods listing close;

%tables(table1/table2/class/sashelp.heart);

ods listing;

View solution in original post


All Replies
Regular Contributor
Posts: 186

Re: Help with macro that counts records (created by MikeZdeb)

Do you have any tips on what I should look for or any documentation I should read in order to solve this problem?

Thank you for your help and time.

Regular Contributor
Posts: 186

Re: Help with macro that counts records (created by MikeZdeb)

Anyone has an idea how to solve this?

Thank you.

PROC Star
Posts: 7,364

Re: Help with macro that counts records (created by MikeZdeb)

A hint and please don't take this the wrong way.  A number of users helped you the last time, but none of their posts were marked as either correct or helpful.

I think a first step would to go back to that post and give people credit for the work that they did!

Regular Contributor
Posts: 186

Re: Help with macro that counts records (created by MikeZdeb)

Yes I am aware of that and I would really like to give them credit, but the thing is that the green and yellow stars with correct answer or useful answer does not appear at the end of their reply.

I am also unable to mark the question as solved.

Any idea what is causing this?

Thank you for the reply.

PROC Star
Posts: 7,364

Re: Help with macro that counts records (created by MikeZdeb)

Yes and I will post the answer as a question of the Forum's administrator.  When one initially creates a discussion, you have the opportunity to check the box that indicates whether the discussion is or isn't a question.  The box MUST be checked, at the top of the page, to indicate that it is a question in order to be able to mark responses as being correct or helpful.

I don't think there is a way to change that unless one edits the discussion within 15 minutes after posting it.  There should be, as very few people probably realize the significance.

This post says: this question has not been answerd at the top of the page, thus I presume that you had the box checked when you submitted the question and that you have the opportunity to mark responses as being either correct or helpful.

Regular Contributor
Posts: 186

Re: Help with macro that counts records (created by MikeZdeb)

Thank you for the quick reply.

I am sorry I can't give credit to the awesome coders who helped me in the thread mentionned above.

This is my error when I created the thread.


PROC Star
Posts: 7,364

Re: Help with macro that counts records (created by MikeZdeb)

Okay, now that we have resolved the points' matter, I have to confuse you.  Unmark my posts as being helpful for, while they may have been helpful in addressing the points' matter, they definitely didn't help to answer your question.  Besides, I have been overly rewarded for my posts this past year, and would rather that people who really help are afforded the opportunity.

I looked at Mike's code and it leads me to a question for you.  How do you want to define duplicate records?  In your example it was only if there were multiple names within any one dataset and that was only if there was a variable called name in the dataset.  Duplicate records could be defined that way, or based on some other id, or based on whether the values for every variable were the same in a given dataset.

To answer your question, one would have to know which definition you would want to use.

As for adding comments to the code, for now, I'll leave that to someone else.

Regular Contributor
Posts: 186

Re: Help with macro that counts records (created by MikeZdeb)

Thank you again for the quick reply Arthur.

I would like to define duplicates as if it was only if there were multiple names within any one dataset and that was only if there was a variable called name in the dataset.

I would like the result to be the number of multiple records.

Hope you can help me with that.

Thank you for your time and help.

Solution
‎08-29-2012 04:38 PM
PROC Star
Posts: 7,364

Re: Help with macro that counts records (created by MikeZdeb)

I lied and did end up having time to write some comments as well as incorporate what I think you said you wanted.  Let me know if it now does what you want and if the comments are at the right level.  I used Mike's code, combined with a slight change to your proc sql code:

/* create some test data */

data table1;

input name $ age @@ ;

cards;

John 45 N/A 30 . 15 Carl 25

;

data table2;

input color $ height;

cards;

Blue 110 N/A 120 . 100 Red .

;

data class;

  set sashelp.class;

  if name in ('Carol','James') then call missing(age);

  if name in ('Jane','John') then output;

  output;

run;

/*end of test data creation */

/*create formats that can later be used to identify missing and N/A data*/

/*format nn is for numeric values*/

/*format $c is for character values*/

proc format;

  value nn low-high='1' other='3';

  value $c 'N/A'='2' ' '='3' other='1';

run;

%macro tables(stuff);

  /*the macro won't work unless the file work.tables doesn't exist*/

  /*so the proc datasets is used to delete it*/

  proc datasets lib=work nolist;

    delete tables;

  quit;

  %let j=1;

  /*loop through all of the identified datasets*/

  %do %while(%scan(&stuff,&j,/) ne);

    %let dset=%scan(&stuff,&j,/);

    /*run proc freq for each dataset, storing the output in work.temp*/

    ods output onewayfreqs=temp (keep=table f_: freqSmiley Happy;

    proc freq data=&dset;

      tables _all_ / missing;

      format _numeric_ nn. _character_ $c.;

    run;

    ods output close;

    /*use proc sql to count the number of duplicates on variable name*/

    /*placing the result in a macro variable called &dups.*/

    proc sql;

      select sum(num) into :dups

        from (

          select COUNT(name) as num

            FROM &dset.

              GROUP BY name

                HAVING ( COUNT(name) > 1 )

             )

      ;

    quit;

    /*restructure work.temp to create the desired columns*/

    /*and incorporate &dups. as a column called duplicates*/

    data temp (keep=table name not_: missing duplicates total);

      length table name $32;

      do until(last.table);

        set temp;

        by table notsorted;

        array t(*) f_:;

        select(t(_n_));

          when('1') not_missing = frequency;

          when('2') not_applicable = frequency;

          when('3') missing = frequency;

        end;

      end;

      total = sum(of not_:, missing);

      name = scan(table,2);

      table = "&dset";

      if upcase(name) eq "NAME" then duplicates=&dups.;

    run;

    /*append work.temp to a dataset called tables*/

    proc append base=tables data=temp;

    run;

    %let j=%eval(&j+1);

  %end;

  /*cleanup by deleting work.temp .. the final desired dataset*/

  /*will be work.tables*/

  proc datasets lib=work nolist;

    delete temp;

  quit;

%mend;

ods listing close;

%tables(table1/table2/class/sashelp.heart);

ods listing;

Regular Contributor
Posts: 186

Re: Help with macro that counts records (created by MikeZdeb)

Thank you very much Arthur!!!

Exactly what I needed and I learned a lot from your comment, everything is more clear to me now!

One last question : lets say the dataset can either have the variable "name", "id" or "dw_id" and I want to my sure that my code counts the number of duplicates even if it does not find the variable "name" (that it then look for "id"; if not found look for dw_id "id_name")... How do I properly code this?

Here is my failed attemp. I have put in bold the code I added (not working). Hope you understand what I am trying to do :

Your help and time are really appreciated.

/* create some test data */

data table1;

input name $ age @@ ;

cards;

John 45
N/A 30
. 15
Carl 25
Carl 30
John 30

;


data table2;

input id $ height;

cards;

Blue 110
N/A 120
. 100 Red
Blue 120
Blue 120
;

data table3;

input id_name $ height;

cards;

John 45
N/A 30
. 15
Bob 25
Bob 30
Bob 15
Bob 60
John 30
;


data class;

  set sashelp.class;

  if name in ('Carol','James') then call missing(age);

  if name in ('Jane','John') then output;

  output;

run;

/*end of test data creation */


/*create formats that can later be used to identify missing and N/A data*/

/*format nn is for numeric values*/

/*format $c is for character values*/

proc format;

  value nn low-high='1' other='3';

  value $c 'N/A'='2' ' '='3' other='1';

run;



%macro tables(stuff);


  /*the macro won't work unless the file work.tables doesn't exist*/

  /*so the proc datasets is used to delete it*/

  proc datasets lib=work nolist;

    delete tables;

  quit;


  %let j=1;


  /*loop through all of the identified datasets*/

  %do %while(%scan(&stuff,&j,/) ne);

    %let dset=%scan(&stuff,&j,/);

if upcase(name) eq "NAME" then variable="name";
if upcase(id) eq "ID" then variable="ID";
if upcase(id_name) eq "ID_NAME" then variable="ID_name";

    /*run proc freq for each dataset, storing the output in work.temp*/

    ods output onewayfreqs=temp (keep=table f_: freqSmiley Happy;

    proc freq data=&dset;

      tables _all_ / missing;

      format _numeric_ nn. _character_ $c.;

    run;

    ods output close;


    /*use proc sql to count the number of duplicates on variable name*/

    /*placing the result in a macro variable called &dups.*/

    proc sql;

      select sum(num) into :dups

        from (

          select COUNT(variable) as num

            FROM &dset.

              GROUP BY variable

                HAVING ( COUNT(variable) > 1 )

             )

      ;

    quit;


    /*restructure work.temp to create the desired columns*/

    /*and incorporate &dups. as a column called duplicates*/

    data temp (keep=table name not_: missing duplicates total);

      length table name $32;

      do until(last.table);

        set temp;

        by table notsorted;

        array t(*) f_:;

        select(t(_n_));

          when('1') not_missing = frequency;

          when('2') not_applicable = frequency;

          when('3') missing = frequency;

        end;

      end;

      total = sum(of not_:, missing);

      name = scan(table,2);

      table = "&dset";

if upcase(name) eq "NAME" then duplicates=&dups.;
if upcase(id) eq "ID" then duplicates=&dups.;
if upcase(id_name) eq "ID_NAME" then duplicates=&dups.;

    run;


    /*append work.temp to a dataset called tables*/

    proc append base=tables data=temp;

    run;

    %let j=%eval(&j+1);

  %end;


  /*cleanup by deleting work.temp .. the final desired dataset*/

  /*will be work.tables*/

  proc datasets lib=work nolist;

    delete temp;

  quit;

%mend;


ods listing close;

%tables(table1/table2/table3);

ods listing;

PROC Star
Posts: 7,364

Re: Help with macro that counts records (created by MikeZdeb)

I think that the following accomplishes your latest request:

/* create some test data */

data table1;

input name $ age @@ ;

cards;

John 45 N/A 30 . 15 Carl 25 John 30

;

data table2;

input id $ height @@;

cards;

Blue 110 N/A 120 . 100 Red .

;

data class (rename=(name=id_name));

  set sashelp.class;

  if name in ('Carol','James') then call missing(age);

  if name in ('Jane','John') then output;

  output;

run;

/*end of test data creation */

/*create formats that can later be used to identify missing and N/A data*/

/*format nn is for numeric values*/

/*format $c is for character values*/

proc format;

  value nn low-high='1' other='3';

  value $c 'N/A'='2' ' '='3' other='1';

run;

%macro tables(stuff);

  /*the macro won't work unless the file work.tables doesn't exist*/

  /*so the proc datasets is used to delete it*/

  proc datasets lib=work nolist;

    delete tables;

  quit;

  %let j=1;

  /*loop through all of the identified datasets*/

  %do %while(%scan(&stuff,&j,/) ne);

    %let dset=%scan(&stuff,&j,/);

    /*run proc freq for each dataset, storing the output in work.temp*/

    ods output onewayfreqs=temp (keep=table f_: freqSmiley Happy;

    proc freq data=&dset;

      tables _all_ / missing;

      format _numeric_ nn. _character_ $c.;

    run;

    ods output close;

    /*use proc sql to count the number of duplicates on variable name*/

    /*placing the result in a macro variable called &dups.*/

    data _null_;

      set temp;

      if upcase(substr(table,7)) in ("NAME","ID","ID_NAME") then do;

        call symput('name', substr(table,7));

        stop;

      end;

    run;

    proc sql;

      select sum(num) into :dups

        from (

          select COUNT(&name.) as num

            FROM &dset.

              GROUP BY &name.

                HAVING ( COUNT(&name.) > 1 )

             )

      ;

    quit;

    /*restructure work.temp to create the desired columns*/

    /*and incorporate &dups. as a column called duplicates*/

    data temp (keep=table name not_: missing duplicates total);

      length table name $32;

      do until(last.table);

        set temp;

        by table notsorted;

        array t(*) f_:;

        select(t(_n_));

          when('1') not_missing = frequency;

          when('2') not_applicable = frequency;

          when('3') missing = frequency;

        end;

      end;

      total = sum(of not_:, missing);

      name = scan(table,2);

      table = "&dset";

      if upcase(name) in ("NAME","ID","ID_NAME") then duplicates=&dups.;

    run;

    /*append work.temp to a dataset called tables*/

    proc append base=tables data=temp;

    run;

    %let j=%eval(&j+1);

  %end;

  /*cleanup by deleting work.temp .. the final desired dataset*/

  /*will be work.tables*/

  proc datasets lib=work nolist;

    delete temp;

  quit;

%mend;

ods listing close;

%tables(table1/table2/class/sashelp.heart);

ods listing;

Regular Contributor
Posts: 186

Re: Help with macro that counts records (created by MikeZdeb)

This is working perfectly!

One small change I made to your code :

    proc sql;

      select sum(num - 1) into :dups

        from (

          select COUNT(&name.) as num

            FROM &dset.

              GROUP BY &name.

                HAVING ( COUNT(&name.) > 1 )

             )

      ;

    quit;

Here I want to get the number of duplicate value, so even if the name appear 4 times the first one is always a unique value and I don't want it in my count.

I.E

In this table :

data table3;

input id_name $ height;

cards;

John 45
N/A 30
. 15
Bob 25
Bob 30
Bob 15
Bob 60
John 30
;

the result would be :

name duplicates

John 2

Bob 4

So the result of the sum in the proc sql would be 6, but in fact there is 2 unique value and 4 duplicates. So the num - 1 does the trick.

Can't thank you enough for this, I learned a lot from your code and comments and now the macro is working just like I wanted.

Thank you very much!!!

Regular Contributor
Posts: 186

Re: Help with macro that counts records (created by MikeZdeb)

I have one last request and I would be really greatful if you could help me solve this problem (I think I will also learn a lot from it).

As of right now we manipulate the data and put the output for each dataset in the table work.temp, we then use the append procedure to put the work.temp of each dataset in the work.tables as a summary (hope I understood this correctly).

What I would like to do with this code is create differents datasets for different situation, not necessarly having all the information in one dataset.

E.G.

Lets says I have 4 datasets named table1,table2,table3,table4

Normally I would write this code :

ods listing close;

%tables(table1/table2/table3/table4);

ods listing;

And have all the info in one dataset named tables.

Is there an easy way to modify the macro so that I would be able to simply write someting like

%tables(summary_one(table1/table2),summary_two(table3/table4))   *Not sure this would be the most efficient writing structure

So that I would end up with two datasets :

- one named summary_one that would contain the info from table1 and table2

- one named summary_two that would contain the info from table3 and table4

I am not too sure of the level of complexity there is in order to modify the code so the end result would be something like this. Waste no time on this if it is too long or difficult. This is just a nice to have and would help me understand a little more about the macro procedures.

Again your help and time are really appreciated!

PROC Star
Posts: 7,364

Re: Help with macro that counts records (created by MikeZdeb)

I thought your last question was your last question!  I would do it as follows:

/* create some test data */

data table1;

input name $ age @@ ;

cards;

John 45 N/A 30 . 15 Carl 25 John 30

;

data table2;

input id $ height @@;

cards;

Blue 110 N/A 120 . 100 Red .

;

data class (rename=(name=id_name));

  set sashelp.class;

  if name in ('Carol','James') then call missing(age);

  if name in ('Jane','John') then output;

  output;

run;

/*end of test data creation */

/*create formats that can later be used to identify missing and N/A data*/

/*format nn is for numeric values*/

/*format $c is for character values*/

proc format;

  value nn low-high='1' other='3';

  value $c 'N/A'='2' ' '='3' other='1';

run;

%macro tables(table_name,stuff);

  /*the macro won't work unless the file work.tables doesn't exist*/

  /*so the proc datasets is used to delete it*/

  proc datasets lib=work nolist;

    delete &table_name.;

  quit;

  %let j=1;

  /*loop through all of the identified datasets*/

  %do %while(%scan(&stuff,&j,/) ne);

    %let dset=%scan(&stuff,&j,/);

    /*run proc freq for each dataset, storing the output in work.temp*/

    ods output onewayfreqs=temp (keep=table f_: freqSmiley Happy;

    proc freq data=&dset;

      tables _all_ / missing;

      format _numeric_ nn. _character_ $c.;

    run;

    ods output close;

    /*use proc sql to count the number of duplicates on variable name*/

    /*placing the result in a macro variable called &dups.*/

    data _null_;

      set temp;

      if upcase(substr(table,7)) in ("NAME","ID","ID_NAME") then do;

        call symput('name', substr(table,7));

        stop;

      end;

    run;

    proc sql;

      select sum(num) into :dups

        from (

          select COUNT(&name.) as num

            FROM &dset.

              GROUP BY &name.

                HAVING ( COUNT(&name.) > 1 )

             )

      ;

    quit;

    /*restructure work.temp to create the desired columns*/

    /*and incorporate &dups. as a column called duplicates*/

    data temp (keep=table name not_: missing duplicates total);

      length table name $32;

      do until(last.table);

        set temp;

        by table notsorted;

        array t(*) f_:;

        select(t(_n_));

          when('1') not_missing = frequency;

          when('2') not_applicable = frequency;

          when('3') missing = frequency;

        end;

      end;

      total = sum(of not_:, missing);

      name = scan(table,2);

      table = "&dset";

      if upcase(name) in ("NAME","ID","ID_NAME") then duplicates=&dups.;

    run;

    /*append work.temp to a dataset called tables*/

    proc append base=&table_name. data=temp;

    run;

    %let j=%eval(&j+1);

  %end;

  /*cleanup by deleting work.temp .. the final desired dataset*/

  /*will be work.tables*/

  proc datasets lib=work nolist;

    delete temp;

  quit;

%mend;

ods listing close;

%tables(summary_one,table1/table2);

%tables(summary_two,class/sashelp.heart);

ods listing;

☑ This topic is solved.

Need further help from the community? Please ask a new question.

Discussion stats
  • 16 replies
  • 540 views
  • 6 likes
  • 3 in conversation