<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: How to keep specific rows, by frequency in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/How-to-keep-specific-rows-by-frequency/m-p/917711#M361502</link>
    <description>&lt;P&gt;You might have ties, so this code keeps tied values at each extreme, as long as at least the 3 lowest and 3 highest observations are output:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;
proc sort data=have;
  by descending freq;
run;

%let min_needed=3;  /* At least 3 highest and 3 lowest, including ties */

data want;
  set have nobs=nrows;
  by descending freq;
  output;

  /* If first 3 have been output then stop when ties are exhausted,
     then do the same from the bottom up */
  if _n_&amp;gt;=&amp;amp;min_needed and last.freq=1 then do p=nrows by -1;
    set have point=p;
    if p&amp;lt;nrows-&amp;amp;min_needed and freq^=lag(freq) then stop;
    output;
  end;
run;

proc print data=want;
run;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;In the case of a very large dataset, this can be much faster because it doesn't pass through the middle observations.&amp;nbsp;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Sat, 24 Feb 2024 03:53:58 GMT</pubDate>
    <dc:creator>mkeintz</dc:creator>
    <dc:date>2024-02-24T03:53:58Z</dc:date>
    <item>
      <title>How to keep specific rows, by frequency</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-keep-specific-rows-by-frequency/m-p/917666#M361475</link>
      <description>&lt;P&gt;Hello all, I am trying to write a code where I take my data, put it into a table, and then just make a table for the top 5 and bottom 5 values by frequency.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;My code is:&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=""&gt;Proc Freq data=db1;
	table statefips / nocum;
run;

Proc Sort data=db1;
	by statefips;
run;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;DIV class=""&gt;&lt;P class=""&gt;My Table&lt;/P&gt;&lt;/DIV&gt;&lt;P&gt;&amp;nbsp;StateCode Frequency&amp;nbsp;&lt;/P&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;26357&lt;/TD&gt;&lt;TD&gt;1.47&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;4326&lt;/TD&gt;&lt;TD&gt;0.24&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;35139&lt;/TD&gt;&lt;TD&gt;1.97&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;15870&lt;/TD&gt;&lt;TD&gt;0.89&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;205357&lt;/TD&gt;&lt;TD&gt;11.49&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;31240&lt;/TD&gt;&lt;TD&gt;1.75&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;22006&lt;/TD&gt;&lt;TD&gt;1.23&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;5115&lt;/TD&gt;&lt;TD&gt;0.29&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;3871&lt;/TD&gt;&lt;TD&gt;0.22&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;106730&lt;/TD&gt;&lt;TD&gt;5.97&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;54726&lt;/TD&gt;&lt;TD&gt;3.06&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;8187&lt;/TD&gt;&lt;TD&gt;0.46&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;9065&lt;/TD&gt;&lt;TD&gt;0.51&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;75141&lt;/TD&gt;&lt;TD&gt;4.20&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;38683&lt;/TD&gt;&lt;TD&gt;2.16&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;19039&lt;/TD&gt;&lt;TD&gt;1.06&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;17154&lt;/TD&gt;&lt;TD&gt;0.96&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;24458&lt;/TD&gt;&lt;TD&gt;1.37&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;24895&lt;/TD&gt;&lt;TD&gt;1.39&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;7944&lt;/TD&gt;&lt;TD&gt;0.44&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;35711&lt;/TD&gt;&lt;TD&gt;2.00&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;40991&lt;/TD&gt;&lt;TD&gt;2.29&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;56479&lt;/TD&gt;&lt;TD&gt;3.16&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;33679&lt;/TD&gt;&lt;TD&gt;1.88&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;15439&lt;/TD&gt;&lt;TD&gt;0.86&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;35793&lt;/TD&gt;&lt;TD&gt;2.00&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;6046&lt;/TD&gt;&lt;TD&gt;0.34&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;11315&lt;/TD&gt;&lt;TD&gt;0.63&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;15939&lt;/TD&gt;&lt;TD&gt;0.89&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;8566&lt;/TD&gt;&lt;TD&gt;0.48&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;51896&lt;/TD&gt;&lt;TD&gt;2.90&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;10674&lt;/TD&gt;&lt;TD&gt;0.60&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;110102&lt;/TD&gt;&lt;TD&gt;6.16&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;55570&lt;/TD&gt;&lt;TD&gt;3.11&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;4396&lt;/TD&gt;&lt;TD&gt;0.25&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;68905&lt;/TD&gt;&lt;TD&gt;3.85&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;21246&lt;/TD&gt;&lt;TD&gt;1.19&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;22879&lt;/TD&gt;&lt;TD&gt;1.28&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;74577&lt;/TD&gt;&lt;TD&gt;4.17&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;6383&lt;/TD&gt;&lt;TD&gt;0.36&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;26927&lt;/TD&gt;&lt;TD&gt;1.51&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;5100&lt;/TD&gt;&lt;TD&gt;0.29&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;36552&lt;/TD&gt;&lt;TD&gt;2.04&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;139158&lt;/TD&gt;&lt;TD&gt;7.78&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;15825&lt;/TD&gt;&lt;TD&gt;0.89&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;4237&lt;/TD&gt;&lt;TD&gt;0.24&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;48324&lt;/TD&gt;&lt;TD&gt;2.70&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;40337&lt;/TD&gt;&lt;TD&gt;2.26&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;9658&lt;/TD&gt;&lt;TD&gt;0.54&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;36436&lt;/TD&gt;&lt;TD&gt;2.04&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;3579&lt;/TD&gt;&lt;TD&gt;0.20&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I really never learned how to pull specific values out of a table to keep, so I'm a little stumped. If anyone can help me with this, I would greatly appreciate it.&lt;/P&gt;</description>
      <pubDate>Fri, 23 Feb 2024 21:10:03 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-keep-specific-rows-by-frequency/m-p/917666#M361475</guid>
      <dc:creator>OwnerOfTwoCats</dc:creator>
      <dc:date>2024-02-23T21:10:03Z</dc:date>
    </item>
    <item>
      <title>Re: How to keep specific rows, by frequency</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-keep-specific-rows-by-frequency/m-p/917703#M361495</link>
      <description>&lt;P&gt;If your table is sorted by FREQ then just picking the top 3 and bottom 3 rows is not that hard. But.... how do you intend to deal with ties?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;In below data would you want to pick the row where category="D"?&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
  input category $ freq;
  datalines;
A 10
B 9
C 9
D 8
E 7
F 6
G 5
H 4
I 3
;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Using above data the code to just pick the first and last 3 rows could look like below.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sort data=have;
  by descending freq;
run;

data want;
  set have nobs=nrows;
  by descending freq;
  if _n_ &amp;lt;=3 or _n_&amp;gt;nrows-3;
run;

proc print data=want;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;As you can see the row with the 3rd highest value hasn't been selected because it was row 4. ...and if you would have 3 rows with a 2nd highest value then even one of these wouldn't get selected.&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Patrick_0-1708739959804.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/94063i397276F34E6A9F58/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Patrick_0-1708739959804.png" alt="Patrick_0-1708739959804.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;Proc Rank can help you dealing with ties. In below code code the rows with the three highest and three lowest values for Freq will get selected.&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc rank data=have out=inter ties=dense descending;
  var freq;
  ranks ranking;
run;
proc sql;
/*  create table want as*/
  select *
  from inter
  having ranking&amp;lt;=3 or ranking&amp;gt;=max(ranking)-2
  ;
quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sat, 24 Feb 2024 02:04:45 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-keep-specific-rows-by-frequency/m-p/917703#M361495</guid>
      <dc:creator>Patrick</dc:creator>
      <dc:date>2024-02-24T02:04:45Z</dc:date>
    </item>
    <item>
      <title>Re: How to keep specific rows, by frequency</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-keep-specific-rows-by-frequency/m-p/917711#M361502</link>
      <description>&lt;P&gt;You might have ties, so this code keeps tied values at each extreme, as long as at least the 3 lowest and 3 highest observations are output:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;
proc sort data=have;
  by descending freq;
run;

%let min_needed=3;  /* At least 3 highest and 3 lowest, including ties */

data want;
  set have nobs=nrows;
  by descending freq;
  output;

  /* If first 3 have been output then stop when ties are exhausted,
     then do the same from the bottom up */
  if _n_&amp;gt;=&amp;amp;min_needed and last.freq=1 then do p=nrows by -1;
    set have point=p;
    if p&amp;lt;nrows-&amp;amp;min_needed and freq^=lag(freq) then stop;
    output;
  end;
run;

proc print data=want;
run;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;In the case of a very large dataset, this can be much faster because it doesn't pass through the middle observations.&amp;nbsp;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sat, 24 Feb 2024 03:53:58 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-keep-specific-rows-by-frequency/m-p/917711#M361502</guid>
      <dc:creator>mkeintz</dc:creator>
      <dc:date>2024-02-24T03:53:58Z</dc:date>
    </item>
  </channel>
</rss>

