<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Emulate fct_lump from R / Change all except top n factors to &amp;quot;Other&amp;quot; based on total value in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Emulate-fct-lump-from-R-Change-all-except-top-n-factors-to-quot/m-p/774561#M246199</link>
    <description>&lt;P&gt;I am trying to emulate R functions in SAS so I know how to manipulate data in both. Below is the code to get to the data before applying the function I want to emulate.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;In R here is what it does: Takes the factors, in this case Seafood types, checks the total value of all the data, in this case Production, and changes all except the highest n factor levels to "Other". I included a picture from R at the end of this post to show. At the start there are 7 different Seafood types. The function changes them to Freshwater, Pelagic, Demersal, Other, Other, Other, Other. Since it is only 7 I could do this manually relatively easily, but I am sure I will run into a case where there are too many to do manually.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Is there a succinct way to do this in SAS?&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;* Get data;
filename test1234 url "https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-10-12/seafood-and-fish-production-thousand-tonnes.csv";

data production ;
  infile test1234 dsd truncover firstobs=2 ;
  input entity :$40. code :$8. year
        Pelagic Crustaceans Cephalopods Demersal Freshwater Molluscs Other_Marine;
run;

*clean up and filter;
proc sql;
	create table production3 as
	select *
	from production 
	where ENTITY not in ('Entity', 'World') and not missing(Code) 
	having year=max(year);
quit;

* pivot_longer;
proc transpose data=production3 out=long_production (rename = (_name_ = Seafood col1=Production));
	by Entity Year Code;
	var Crustaceans--Other_Marine;
run;

* Remove non zero;
proc sql;
	create table production_case2 as
	select *
	from long_production
	where Production &amp;gt; 0;
quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Top table is before the function (note the different Seafood levels), bottom table is after (note all the "other")&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Indescribled_0-1634314054931.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/64766i45818F7D1118948B/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Indescribled_0-1634314054931.png" alt="Indescribled_0-1634314054931.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;Showing totals to help understanding of the function I am trying to emulate. Freshwater, Pelagic, Demersal are the top 3 when looking at total production. All others should be changed to "Other"&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Indescribled_1-1634314844739.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/64767i91D719F2504AFC73/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Indescribled_1-1634314844739.png" alt="Indescribled_1-1634314844739.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Fri, 15 Oct 2021 16:22:47 GMT</pubDate>
    <dc:creator>Indescribled</dc:creator>
    <dc:date>2021-10-15T16:22:47Z</dc:date>
    <item>
      <title>Emulate fct_lump from R / Change all except top n factors to "Other" based on total value</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Emulate-fct-lump-from-R-Change-all-except-top-n-factors-to-quot/m-p/774561#M246199</link>
      <description>&lt;P&gt;I am trying to emulate R functions in SAS so I know how to manipulate data in both. Below is the code to get to the data before applying the function I want to emulate.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;In R here is what it does: Takes the factors, in this case Seafood types, checks the total value of all the data, in this case Production, and changes all except the highest n factor levels to "Other". I included a picture from R at the end of this post to show. At the start there are 7 different Seafood types. The function changes them to Freshwater, Pelagic, Demersal, Other, Other, Other, Other. Since it is only 7 I could do this manually relatively easily, but I am sure I will run into a case where there are too many to do manually.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Is there a succinct way to do this in SAS?&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;* Get data;
filename test1234 url "https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-10-12/seafood-and-fish-production-thousand-tonnes.csv";

data production ;
  infile test1234 dsd truncover firstobs=2 ;
  input entity :$40. code :$8. year
        Pelagic Crustaceans Cephalopods Demersal Freshwater Molluscs Other_Marine;
run;

*clean up and filter;
proc sql;
	create table production3 as
	select *
	from production 
	where ENTITY not in ('Entity', 'World') and not missing(Code) 
	having year=max(year);
quit;

* pivot_longer;
proc transpose data=production3 out=long_production (rename = (_name_ = Seafood col1=Production));
	by Entity Year Code;
	var Crustaceans--Other_Marine;
run;

* Remove non zero;
proc sql;
	create table production_case2 as
	select *
	from long_production
	where Production &amp;gt; 0;
quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Top table is before the function (note the different Seafood levels), bottom table is after (note all the "other")&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Indescribled_0-1634314054931.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/64766i45818F7D1118948B/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Indescribled_0-1634314054931.png" alt="Indescribled_0-1634314054931.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;Showing totals to help understanding of the function I am trying to emulate. Freshwater, Pelagic, Demersal are the top 3 when looking at total production. All others should be changed to "Other"&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Indescribled_1-1634314844739.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/64767i91D719F2504AFC73/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Indescribled_1-1634314844739.png" alt="Indescribled_1-1634314844739.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 15 Oct 2021 16:22:47 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Emulate-fct-lump-from-R-Change-all-except-top-n-factors-to-quot/m-p/774561#M246199</guid>
      <dc:creator>Indescribled</dc:creator>
      <dc:date>2021-10-15T16:22:47Z</dc:date>
    </item>
    <item>
      <title>Re: Emulate fct_lump from R / Change all except top n factors to "Other" based on total va</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Emulate-fct-lump-from-R-Change-all-except-top-n-factors-to-quot/m-p/774568#M246201</link>
      <description>A custom format driven by a data set can accomplish this, so not as succinct as the R code but doable.</description>
      <pubDate>Fri, 15 Oct 2021 16:34:50 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Emulate-fct-lump-from-R-Change-all-except-top-n-factors-to-quot/m-p/774568#M246201</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2021-10-15T16:34:50Z</dc:date>
    </item>
    <item>
      <title>Re: Emulate fct_lump from R / Change all except top n factors to "Other" based on total va</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Emulate-fct-lump-from-R-Change-all-except-top-n-factors-to-quot/m-p/774582#M246204</link>
      <description>&lt;P&gt;I think I figured out the random digit underlining, no commas or separator for thousands, millions and such. &lt;span class="lia-unicode-emoji" title=":thinking_face:"&gt;🤔&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 15 Oct 2021 17:55:42 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Emulate-fct-lump-from-R-Change-all-except-top-n-factors-to-quot/m-p/774582#M246204</guid>
      <dc:creator>ballardw</dc:creator>
      <dc:date>2021-10-15T17:55:42Z</dc:date>
    </item>
  </channel>
</rss>

