<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Need help with my logistic regression code in Statistical Procedures</title>
    <link>https://communities.sas.com/t5/Statistical-Procedures/Need-help-with-my-logistic-regression-code/m-p/852729#M42202</link>
    <description>&lt;P&gt;Hi, thanks for answering my question!&lt;BR /&gt;&lt;EM&gt;Prod&lt;/EM&gt;&amp;nbsp;is a new data file that contains the same information as the data file that the model was trained on. The only difference between the two files is that the column &lt;EM&gt;'loan status'&lt;/EM&gt; in &lt;EM&gt;prod&lt;/EM&gt; is empty.&lt;/P&gt;&lt;P&gt;I want to use the logistic regression model to make new predictions about the loan status(default or current) in new &lt;EM&gt;prod&lt;/EM&gt; data. Essentially, I expect the previously empty &lt;EM&gt;'loan status'&lt;/EM&gt;&amp;nbsp; column to be filled with 0 or 1 in the final outputted &lt;EM&gt;prod&lt;/EM&gt; data file&amp;nbsp;&lt;BR /&gt;However when I tried to use the score statement as described in your response, the model failed to predict anything.&lt;BR /&gt;I'm unsure if it is due to some problem with my data.&lt;/P&gt;&lt;P&gt;These are the columns of the data on which the model was trained on&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=""&gt;PROC SQL;
CREATE TABLE WORK.query AS
SELECT loanId , memberId , 'date'n , 
		purpose , isJointApplication , loanAmount ,
 		term , interestRate , monthlyPayment , 
 		grade , loanStatus , residentialState , 
 		yearsEmployment , homeOwnership , annualIncome , 
 		incomeVerified , dtiRatio , lengthCreditHistory , 
 		numTotalCreditLines , numOpenCreditLines , 
 		numOpenCreditLines1Year , revolvingBalance , revolvingUtilizationRate , 
 		numDerogatoryRec , numDelinquency2Years , numChargeoff1year , 
 		numInquiries6Mon , bad_good FROM WORK.MERGED_LABEL;
RUN;
QUIT;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;and these are the columns in prod data&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=""&gt;PROC SQL;
CREATE TABLE WORK.query AS
SELECT  loanId , memberId , 'date'n , 
		purpose , isJointApplication , loanAmount ,
 		term , interestRate , monthlyPayment , 
 		grade , loanStatus , residentialState , 
 		yearsEmployment , homeOwnership , annualIncome , 
 		incomeVerified , dtiRatio , lengthCreditHistory , 
 		numTotalCreditLines , numOpenCreditLines , 
 		numOpenCreditLines1Year , revolvingBalance , revolvingUtilizationRate , 
 		numDerogatoryRec , numDelinquency2Years , numChargeoff1year , 
 		numInquiries6Mon, loanStatus FROM WORK.PROD;
RUN;
QUIT;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Thank you for your help!&lt;/P&gt;</description>
    <pubDate>Mon, 09 Jan 2023 06:31:46 GMT</pubDate>
    <dc:creator>JulietteZ</dc:creator>
    <dc:date>2023-01-09T06:31:46Z</dc:date>
    <item>
      <title>Need help with my logistic regression code</title>
      <link>https://communities.sas.com/t5/Statistical-Procedures/Need-help-with-my-logistic-regression-code/m-p/852635#M42188</link>
      <description>&lt;P&gt;I have a simulated dataset for personal loans, it contains borrowers' financial history and their requested loans. I'm trying to write a logistic regression model to assess loan status - current(0) or default(1)&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have already splitter the dataset into 70%train and 30%test&lt;/P&gt;&lt;P&gt;my code looks like:&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=""&gt;/*Logistic regression*/
ods graphics on;
proc logistic data=train outmodel=model.log plots=all;
	class purpose term grade yearsemployment homeownership incomeVerified;
	model bad_good (event='0') =purpose term grade yearsemployment homeownership incomeVerified
					date
					isJointApplication
					loanAmount
					interestRate
					monthlyPayment
					annualIncome
					dtiRatio
					lengthCreditHistory
					numTotalCreditLines
					numOpenCreditLines
					numOpenCreditLines1Year
					revolvingBalance
					revolvingUtilizationRate
					numDerogatoryRec
					numDelinquency2Years
					numChargeoff1year
					numInquiries6Mon					
/
	selection=stepwise
	details
	lackfit;
	score data= test out=score1;
	store log_model;
run;

/*Score model*/
proc logistic inmodel=model.log;
	score data=train out=score2 fitstat;
run;

proc logistic inmodel=model.log;
	score data=test out=score3 fitstat;
run;

/*confusion matrix*/
proc freq data=score2;
	tables f_bad_good*i_bad_good / nocol norow; 
run;

proc freq data=score3;
	tables f_bad_good*i_bad_good / nocol norow; 
run;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;My next step is to use this trained model to make predictions to a new prod data, update that data and store it. How would I do that?&amp;nbsp;&lt;/P&gt;&lt;P&gt;Also I wonder if anyone could take a look at my code and see if there's anything I should improve on.&amp;nbsp; I'm new to SAS and statistics, any help is much appreciated!&lt;/P&gt;</description>
      <pubDate>Sat, 07 Jan 2023 07:42:30 GMT</pubDate>
      <guid>https://communities.sas.com/t5/Statistical-Procedures/Need-help-with-my-logistic-regression-code/m-p/852635#M42188</guid>
      <dc:creator>JulietteZ</dc:creator>
      <dc:date>2023-01-07T07:42:30Z</dc:date>
    </item>
    <item>
      <title>Re: Need help with my logistic regression code</title>
      <link>https://communities.sas.com/t5/Statistical-Procedures/Need-help-with-my-logistic-regression-code/m-p/852640#M42189</link>
      <description>"My next step is to use this trained model to make predictions to a new prod data, update that data and store it. "&lt;BR /&gt;You could use SCORE statement to score your new dataset as in your code. Or try PROC PLM.&lt;BR /&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/13684"&gt;@Rick_SAS&lt;/a&gt; wrote a blog about it before.&lt;BR /&gt;&lt;A href="https://blogs.sas.com/content/iml/2019/02/11/proc-plm-regression-models-sas.html" target="_blank"&gt;https://blogs.sas.com/content/iml/2019/02/11/proc-plm-regression-models-sas.html&lt;/A&gt;&lt;BR /&gt;&lt;A href="https://blogs.sas.com/content/iml/2020/12/02/score-external-logistic-model.html" target="_blank"&gt;https://blogs.sas.com/content/iml/2020/12/02/score-external-logistic-model.html&lt;/A&gt;&lt;BR /&gt;&lt;A href="https://blogs.sas.com/content/iml/2014/02/19/scoring-a-regression-model-in-sas.html" target="_blank"&gt;https://blogs.sas.com/content/iml/2014/02/19/scoring-a-regression-model-in-sas.html&lt;/A&gt;&lt;BR /&gt;&lt;BR /&gt;" I wonder if anyone could take a look at my code and see if there's anything I should improve on."&lt;BR /&gt;I would like to use penality max likelihood method via FIRTH option in model, if your data is not big enough. Like:&lt;BR /&gt;model y=x1 ................./ firth ..........</description>
      <pubDate>Sat, 07 Jan 2023 08:54:55 GMT</pubDate>
      <guid>https://communities.sas.com/t5/Statistical-Procedures/Need-help-with-my-logistic-regression-code/m-p/852640#M42189</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2023-01-07T08:54:55Z</dc:date>
    </item>
    <item>
      <title>Re: Need help with my logistic regression code</title>
      <link>https://communities.sas.com/t5/Statistical-Procedures/Need-help-with-my-logistic-regression-code/m-p/852644#M42190</link>
      <description>&lt;P&gt;What do you mean by "new prod data"? New product? Or new data for the same product? In any event, if you have new data and you previously used this code:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc logistic inmodel=model.log;
	score data=test out=score3 fitstat;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;then you can score the data in a new data set this way:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc logistic inmodel=model.log;
	score data=new_data out=score4 fitstat;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;PROC PLM also works in this case.&lt;/P&gt;</description>
      <pubDate>Sat, 07 Jan 2023 22:43:59 GMT</pubDate>
      <guid>https://communities.sas.com/t5/Statistical-Procedures/Need-help-with-my-logistic-regression-code/m-p/852644#M42190</guid>
      <dc:creator>PaigeMiller</dc:creator>
      <dc:date>2023-01-07T22:43:59Z</dc:date>
    </item>
    <item>
      <title>Re: Need help with my logistic regression code</title>
      <link>https://communities.sas.com/t5/Statistical-Procedures/Need-help-with-my-logistic-regression-code/m-p/852729#M42202</link>
      <description>&lt;P&gt;Hi, thanks for answering my question!&lt;BR /&gt;&lt;EM&gt;Prod&lt;/EM&gt;&amp;nbsp;is a new data file that contains the same information as the data file that the model was trained on. The only difference between the two files is that the column &lt;EM&gt;'loan status'&lt;/EM&gt; in &lt;EM&gt;prod&lt;/EM&gt; is empty.&lt;/P&gt;&lt;P&gt;I want to use the logistic regression model to make new predictions about the loan status(default or current) in new &lt;EM&gt;prod&lt;/EM&gt; data. Essentially, I expect the previously empty &lt;EM&gt;'loan status'&lt;/EM&gt;&amp;nbsp; column to be filled with 0 or 1 in the final outputted &lt;EM&gt;prod&lt;/EM&gt; data file&amp;nbsp;&lt;BR /&gt;However when I tried to use the score statement as described in your response, the model failed to predict anything.&lt;BR /&gt;I'm unsure if it is due to some problem with my data.&lt;/P&gt;&lt;P&gt;These are the columns of the data on which the model was trained on&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=""&gt;PROC SQL;
CREATE TABLE WORK.query AS
SELECT loanId , memberId , 'date'n , 
		purpose , isJointApplication , loanAmount ,
 		term , interestRate , monthlyPayment , 
 		grade , loanStatus , residentialState , 
 		yearsEmployment , homeOwnership , annualIncome , 
 		incomeVerified , dtiRatio , lengthCreditHistory , 
 		numTotalCreditLines , numOpenCreditLines , 
 		numOpenCreditLines1Year , revolvingBalance , revolvingUtilizationRate , 
 		numDerogatoryRec , numDelinquency2Years , numChargeoff1year , 
 		numInquiries6Mon , bad_good FROM WORK.MERGED_LABEL;
RUN;
QUIT;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;and these are the columns in prod data&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=""&gt;PROC SQL;
CREATE TABLE WORK.query AS
SELECT  loanId , memberId , 'date'n , 
		purpose , isJointApplication , loanAmount ,
 		term , interestRate , monthlyPayment , 
 		grade , loanStatus , residentialState , 
 		yearsEmployment , homeOwnership , annualIncome , 
 		incomeVerified , dtiRatio , lengthCreditHistory , 
 		numTotalCreditLines , numOpenCreditLines , 
 		numOpenCreditLines1Year , revolvingBalance , revolvingUtilizationRate , 
 		numDerogatoryRec , numDelinquency2Years , numChargeoff1year , 
 		numInquiries6Mon, loanStatus FROM WORK.PROD;
RUN;
QUIT;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Thank you for your help!&lt;/P&gt;</description>
      <pubDate>Mon, 09 Jan 2023 06:31:46 GMT</pubDate>
      <guid>https://communities.sas.com/t5/Statistical-Procedures/Need-help-with-my-logistic-regression-code/m-p/852729#M42202</guid>
      <dc:creator>JulietteZ</dc:creator>
      <dc:date>2023-01-09T06:31:46Z</dc:date>
    </item>
    <item>
      <title>Re: Need help with my logistic regression code</title>
      <link>https://communities.sas.com/t5/Statistical-Procedures/Need-help-with-my-logistic-regression-code/m-p/852774#M42206</link>
      <description>&lt;P&gt;I'm not going to scan through your SQL code to figure out the difference. Just tell me, yes or no, are the columns that same in the two SQL calls, and if so, what is the difference? Why two SQL calls creating the same named data set (WORK.query)? What are you trying to say by showing us the two SQL calls?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;BLOCKQUOTE&gt;
&lt;P&gt;&lt;SPAN&gt;However when I tried to use the score statement as described in your response, the model failed to predict anything.&lt;/SPAN&gt;&lt;/P&gt;
&lt;/BLOCKQUOTE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;Never (that's NEVER, not even once more in the future) should you state something failed and then not explain and not provide evidence. There are two possibilities here:&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;1. If there is an error or problem in the log, show us the &lt;FONT color="#FF0000"&gt;&lt;STRONG&gt;ENTIRE&lt;/STRONG&gt; &lt;/FONT&gt;log for PROC LOGISTIC. Please copy the log as text and paste it into the window that appears when you click on the &amp;lt;/&amp;gt; icon.&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="2021-11-26 08_27_29-Reply to Message - SAS Support Communities — Mozilla Firefox.png" style="width: 859px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/66123iA4EF494F9CA0F6EE/image-size/large?v=v2&amp;amp;px=999" role="button" title="2021-11-26 08_27_29-Reply to Message - SAS Support Communities — Mozilla Firefox.png" alt="2021-11-26 08_27_29-Reply to Message - SAS Support Communities — Mozilla Firefox.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;2. If the output is wrong, show us the incorrect output and explain what is wrong.&lt;/P&gt;</description>
      <pubDate>Mon, 09 Jan 2023 12:17:36 GMT</pubDate>
      <guid>https://communities.sas.com/t5/Statistical-Procedures/Need-help-with-my-logistic-regression-code/m-p/852774#M42206</guid>
      <dc:creator>PaigeMiller</dc:creator>
      <dc:date>2023-01-09T12:17:36Z</dc:date>
    </item>
  </channel>
</rss>

