<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Weight of Evidence and Information Value of Categorical Variable - HPBIN? in New SAS User</title>
    <link>https://communities.sas.com/t5/New-SAS-User/Weight-of-Evidence-and-Information-Value-of-Categorical-Variable/m-p/507420#M1443</link>
    <description>&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have a categorical variable Q1 taking the values from 1 to 4 and a binary variable Bad taking the values 1 and 0.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I want to calculate WoE and IV for the variable according to the existing categories in ascending order.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I know that HPBIN has option to calculate those statistics but it would also create other bins - i want to use the variable binned as it is. I have also tried to use the output of the Freq procedure, but it becomes inconvenient for the cases in which there is no events in some of the categories.&lt;/P&gt;</description>
    <pubDate>Thu, 25 Oct 2018 08:26:10 GMT</pubDate>
    <dc:creator>KonstantinV1</dc:creator>
    <dc:date>2018-10-25T08:26:10Z</dc:date>
    <item>
      <title>Weight of Evidence and Information Value of Categorical Variable - HPBIN?</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Weight-of-Evidence-and-Information-Value-of-Categorical-Variable/m-p/507420#M1443</link>
      <description>&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have a categorical variable Q1 taking the values from 1 to 4 and a binary variable Bad taking the values 1 and 0.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I want to calculate WoE and IV for the variable according to the existing categories in ascending order.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I know that HPBIN has option to calculate those statistics but it would also create other bins - i want to use the variable binned as it is. I have also tried to use the output of the Freq procedure, but it becomes inconvenient for the cases in which there is no events in some of the categories.&lt;/P&gt;</description>
      <pubDate>Thu, 25 Oct 2018 08:26:10 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Weight-of-Evidence-and-Information-Value-of-Categorical-Variable/m-p/507420#M1443</guid>
      <dc:creator>KonstantinV1</dc:creator>
      <dc:date>2018-10-25T08:26:10Z</dc:date>
    </item>
    <item>
      <title>Re: Weight of Evidence and Information Value of Categorical Variable - HPBIN?</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Weight-of-Evidence-and-Information-Value-of-Categorical-Variable/m-p/507464#M1451</link>
      <description>&lt;P&gt;Do you want combine these into other group ? or just use its original levels ?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;

%let var=marital   ;


title "变量: &amp;amp;var";
proc sql;
create table woe_&amp;amp;var as
 select &amp;amp;var as group,
sum(good_bad='bad') as n_bad label='bad的个数',sum(good_bad='good') as n_good label='good的个数',
sum(good_bad='bad')/(select sum(good_bad='bad') from have ) as bad_dist  format=percent7.2 label='bad的占比',
sum(good_bad='good')/(select sum(good_bad='good') from have ) as good_dist  format=percent7.2 label='good的占比',
log(calculated Bad_Dist/calculated Good_Dist) as woe
from have
   group by &amp;amp;var
    order by woe;


select *,sum(  (Bad_Dist-Good_Dist)*woe  ) as iv
 from woe_&amp;amp;var ;

quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;A href="https://www.sas.com/content/dam/SAS/support/en/sas-global-forum-proceedings/2018/1808-2018.pdf" target="_blank"&gt;https://www.sas.com/content/dam/SAS/support/en/sas-global-forum-proceedings/2018/1808-2018.pdf&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 25 Oct 2018 12:50:35 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Weight-of-Evidence-and-Information-Value-of-Categorical-Variable/m-p/507464#M1451</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2018-10-25T12:50:35Z</dc:date>
    </item>
  </channel>
</rss>

