<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Calculating proportion of variable by another identifier in SAS Enterprise Guide</title>
    <link>https://communities.sas.com/t5/SAS-Enterprise-Guide/Calculating-proportion-of-variable-by-another-identifier/m-p/587993#M34636</link>
    <description>&lt;P&gt;Hi - I am looking at bank lending data by bank within different industries. I want to create a table that has the banks with the largest proportion of their loans in one industry. I am looking for the top 20. This is the code I have and I am really stuck on how to actually calculate the industry proportion of loans by bank - the one piece I need.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;It all comes from one huge database.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;libname home "/Excel_Output";
libname s4a "/staging";

data industrycon_;
	set s4a.bank_201907	(keep =	mainbank
					mainbankid
					bankstate
					naics_sect_cd
					naics_sector
					balance
					)
					;
	if balgrs=0 then delete;
run;

proc sql;
	create table indcon_
	as
	select
		distinct mainbank,
		mainbankid as mainbankid,
		bankstate as bankstate,
		sum(balance) as balance_
	from industrycon_
	group by mainbank, mainbankid;
quit;

proc sql;
	create table indconman
	as
	select
		distinct mainbank,
		mainbankid as mainbankid,
		bankstate as bankstate,
		sum(balance) as balance_
	from industrycon_
	where naics_sector="Manufacturing"
	group by mainbank, mainbankid;
quit;




proc sql outobs=20;
	create table test
	as
	select distinct
	a.*,b.balgrs1_ as balgrs
from indconman as a 
	full join indcon_ as b
	on a.mainbank = b.mainbank
	order by balgrs desc;
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
    <pubDate>Wed, 11 Sep 2019 19:32:15 GMT</pubDate>
    <dc:creator>claforest1</dc:creator>
    <dc:date>2019-09-11T19:32:15Z</dc:date>
    <item>
      <title>Calculating proportion of variable by another identifier</title>
      <link>https://communities.sas.com/t5/SAS-Enterprise-Guide/Calculating-proportion-of-variable-by-another-identifier/m-p/587993#M34636</link>
      <description>&lt;P&gt;Hi - I am looking at bank lending data by bank within different industries. I want to create a table that has the banks with the largest proportion of their loans in one industry. I am looking for the top 20. This is the code I have and I am really stuck on how to actually calculate the industry proportion of loans by bank - the one piece I need.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;It all comes from one huge database.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;libname home "/Excel_Output";
libname s4a "/staging";

data industrycon_;
	set s4a.bank_201907	(keep =	mainbank
					mainbankid
					bankstate
					naics_sect_cd
					naics_sector
					balance
					)
					;
	if balgrs=0 then delete;
run;

proc sql;
	create table indcon_
	as
	select
		distinct mainbank,
		mainbankid as mainbankid,
		bankstate as bankstate,
		sum(balance) as balance_
	from industrycon_
	group by mainbank, mainbankid;
quit;

proc sql;
	create table indconman
	as
	select
		distinct mainbank,
		mainbankid as mainbankid,
		bankstate as bankstate,
		sum(balance) as balance_
	from industrycon_
	where naics_sector="Manufacturing"
	group by mainbank, mainbankid;
quit;




proc sql outobs=20;
	create table test
	as
	select distinct
	a.*,b.balgrs1_ as balgrs
from indconman as a 
	full join indcon_ as b
	on a.mainbank = b.mainbank
	order by balgrs desc;
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 11 Sep 2019 19:32:15 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Enterprise-Guide/Calculating-proportion-of-variable-by-another-identifier/m-p/587993#M34636</guid>
      <dc:creator>claforest1</dc:creator>
      <dc:date>2019-09-11T19:32:15Z</dc:date>
    </item>
    <item>
      <title>Re: Calculating proportion of variable by another identifier</title>
      <link>https://communities.sas.com/t5/SAS-Enterprise-Guide/Calculating-proportion-of-variable-by-another-identifier/m-p/588156#M34647</link>
      <description>&lt;P&gt;This should do want you want roughly.&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;** For each bank, find prop of total loans by sector **;
PROC SQL;
  CREATE TABLE bank0 AS SELECT
  a.*, 
  sum(balance) as indtot, 
  b.banktot,
  sum(balance)/banktot as prop  
  FROM industrycon_ a
  LEFT JOIN (select distinct mainbank, mainbank, sum(balance) as banktot from industrycon_ group by mainbank, mainbankid) b
    ON a.mainbank=b.mainbank and a.mainbankid=b,mainbankid  
  GROUP BY a.mainbank, a.mainbankid, a.naics_sect_cd, a.naics_sector;
QUIT;

** Sort each bank by prop of loans **;
PROC SORT data=bank0;
  BY mainbank mainbankid prop;
RUN;

** Get one record per bank with max prop **;
DATA bank1;
  SET bank0;
  BY mainbank mainbankid;
  IF last.mainbankid;
RUN;

** Re-sort by max prop**;
PROC SORT data=bank1;
  BY descending prop;
RUN;

** Find top 20 banks **;
DATA bank2;
  SET bank1;
  IF _n_&amp;gt;20 THEN DELETE;
RUN;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;It does not account for ties, i.e. if a bank has loans exactly split between two industries or if 2 banks are tied on exactly the same proportion but if it is a huge data set it may not matter&lt;/P&gt;</description>
      <pubDate>Thu, 12 Sep 2019 10:05:41 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Enterprise-Guide/Calculating-proportion-of-variable-by-another-identifier/m-p/588156#M34647</guid>
      <dc:creator>SwissC</dc:creator>
      <dc:date>2019-09-12T10:05:41Z</dc:date>
    </item>
    <item>
      <title>Re: Calculating proportion of variable by another identifier</title>
      <link>https://communities.sas.com/t5/SAS-Enterprise-Guide/Calculating-proportion-of-variable-by-another-identifier/m-p/588194#M34648</link>
      <description>&lt;P&gt;Thank you!! I edited it a little bit, but it gave me exactly what I needed. The dataset is massive. Millions and millions of rows.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 12 Sep 2019 12:51:01 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Enterprise-Guide/Calculating-proportion-of-variable-by-another-identifier/m-p/588194#M34648</guid>
      <dc:creator>claforest1</dc:creator>
      <dc:date>2019-09-12T12:51:01Z</dc:date>
    </item>
  </channel>
</rss>

