<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Impute Missing Values in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Impute-Missing-Values/m-p/487389#M127000</link>
    <description>&lt;P&gt;PROC SQL with CASE expression to calculate the Average for missing values. I'm assuming your delay values are&amp;nbsp;hh:mm&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
infile datalines dlm=',' dsd missover;
input Origin $ Destination $ Carrier $ Delay;
datalines;
EWR,DEL,UA,15
EWR,DEL,UA,60
NYC,DEL,CA,120
DEL,NYC,UA,180
NYC,DEL,CA,-15
MUM,NYC,B6,60
MUM,NYC,B6,.
EWR,DEL,UA,.
;
run;

proc sql;
select 	Origin , 
		Destination , 
		Carrier,
		Delay,
		case when Delay is null then avg(Delay) 
			else Delay end	as New_Delay
from have
group by Origin , Destination , Carrier
;
quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Note: Please provide your sample data in the form of Data Step, it saves our time and you can expect quick replies .&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Thu, 16 Aug 2018 12:53:17 GMT</pubDate>
    <dc:creator>SuryaKiran</dc:creator>
    <dc:date>2018-08-16T12:53:17Z</dc:date>
    <item>
      <title>Impute Missing Values</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Impute-Missing-Values/m-p/487357#M126985</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Here is sample data just to make the question comprehensible.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Origi&lt;STRONG&gt;n Destination Carrier Delay&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;EWR&amp;nbsp; &amp;nbsp;DEL&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; UA&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;0:15&lt;/P&gt;&lt;P&gt;EWR&amp;nbsp; &amp;nbsp; DEL&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;UA&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;-1:00&lt;/P&gt;&lt;P&gt;NYC&amp;nbsp; &amp;nbsp; &amp;nbsp; DEL&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; CA&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 2:00&lt;/P&gt;&lt;P&gt;DEL&amp;nbsp; &amp;nbsp; &amp;nbsp; NYC&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; UA&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 3:00&lt;/P&gt;&lt;P&gt;NYC&amp;nbsp; &amp;nbsp; &amp;nbsp;DEL&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;CA&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; -0:15&lt;/P&gt;&lt;P&gt;MUM&amp;nbsp; &amp;nbsp; &amp;nbsp;NYC&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; B6&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;1:00&lt;/P&gt;&lt;P&gt;MUM&amp;nbsp; &amp;nbsp; &amp;nbsp;NYC&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; B6&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; .&lt;/P&gt;&lt;P&gt;EWR&amp;nbsp; &amp;nbsp; &amp;nbsp; DEL&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; UA&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; .&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Now we have to impute missing values in Delay with an average of delay that has to be specific for the carrier and (origin--&amp;gt;dest).&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Please help me on this.&amp;nbsp; &amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks in advance.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 16 Aug 2018 11:57:35 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Impute-Missing-Values/m-p/487357#M126985</guid>
      <dc:creator>Akshayvrata</dc:creator>
      <dc:date>2018-08-16T11:57:35Z</dc:date>
    </item>
    <item>
      <title>Re: Impute Missing Values</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Impute-Missing-Values/m-p/487360#M126986</link>
      <description>&lt;P&gt;You could use the average of the delays for each Origin/Destination/Carrier combination in place of the missing values.&lt;/P&gt;</description>
      <pubDate>Thu, 16 Aug 2018 12:02:41 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Impute-Missing-Values/m-p/487360#M126986</guid>
      <dc:creator>PaigeMiller</dc:creator>
      <dc:date>2018-08-16T12:02:41Z</dc:date>
    </item>
    <item>
      <title>Re: Impute Missing Values</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Impute-Missing-Values/m-p/487369#M126991</link>
      <description>&lt;P&gt;Did you try to compute the average delay for each Origin / Destination / Carrier?&amp;nbsp; Show what you tried.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;If you already did this, does the problem lie with getting that average back into your original data set to replace missing values?&amp;nbsp; Again, show what you tried.&lt;/P&gt;</description>
      <pubDate>Thu, 16 Aug 2018 12:15:00 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Impute-Missing-Values/m-p/487369#M126991</guid>
      <dc:creator>Astounding</dc:creator>
      <dc:date>2018-08-16T12:15:00Z</dc:date>
    </item>
    <item>
      <title>Re: Impute Missing Values</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Impute-Missing-Values/m-p/487389#M127000</link>
      <description>&lt;P&gt;PROC SQL with CASE expression to calculate the Average for missing values. I'm assuming your delay values are&amp;nbsp;hh:mm&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
infile datalines dlm=',' dsd missover;
input Origin $ Destination $ Carrier $ Delay;
datalines;
EWR,DEL,UA,15
EWR,DEL,UA,60
NYC,DEL,CA,120
DEL,NYC,UA,180
NYC,DEL,CA,-15
MUM,NYC,B6,60
MUM,NYC,B6,.
EWR,DEL,UA,.
;
run;

proc sql;
select 	Origin , 
		Destination , 
		Carrier,
		Delay,
		case when Delay is null then avg(Delay) 
			else Delay end	as New_Delay
from have
group by Origin , Destination , Carrier
;
quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Note: Please provide your sample data in the form of Data Step, it saves our time and you can expect quick replies .&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 16 Aug 2018 12:53:17 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Impute-Missing-Values/m-p/487389#M127000</guid>
      <dc:creator>SuryaKiran</dc:creator>
      <dc:date>2018-08-16T12:53:17Z</dc:date>
    </item>
    <item>
      <title>Re: Impute Missing Values</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Impute-Missing-Values/m-p/487390#M127001</link>
      <description>&lt;P&gt;Yeah!&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have made a variable named as flight_code in which I have concatenated origin, destination and carrier.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Then using this I calculated mean by proc means by flight_code. I got the mean but I am unable to figure how to impute that into missing values..&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;And one more problem I am facing is that mean is coming in a numeric format not in time format.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks!&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 16 Aug 2018 12:53:28 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Impute-Missing-Values/m-p/487390#M127001</guid>
      <dc:creator>Akshayvrata</dc:creator>
      <dc:date>2018-08-16T12:53:28Z</dc:date>
    </item>
    <item>
      <title>Re: Impute Missing Values</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Impute-Missing-Values/m-p/487403#M127003</link>
      <description>&lt;P&gt;Great.&amp;nbsp; You're familiar with some of the useful tools.&amp;nbsp; Next step would be to change the BY statement in PROC MEANS.&amp;nbsp; Instead of concatenating three variables, use all three original variables in the BY statement:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;by origin destination carrier;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;(The order of variables in the BY statement must match the sorted order to the data.)&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Following that, you should be able to merge the results back in (again, using all three variables in the BY statement when merging).&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;The time format can be applied later, in the same step that merges the results back in.&amp;nbsp; Use the same time format that is in use for your existing DELAY variable.&lt;/P&gt;</description>
      <pubDate>Thu, 16 Aug 2018 13:26:22 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Impute-Missing-Values/m-p/487403#M127003</guid>
      <dc:creator>Astounding</dc:creator>
      <dc:date>2018-08-16T13:26:22Z</dc:date>
    </item>
    <item>
      <title>Re: Impute Missing Values</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Impute-Missing-Values/m-p/487416#M127007</link>
      <description>&lt;P&gt;If I understood your question.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
infile datalines dlm=',' dsd missover;
input Origin $ Destination $ Carrier $ Delay;
datalines;
EWR,DEL,UA,15
EWR,DEL,UA,60
NYC,DEL,CA,120
DEL,NYC,UA,180
NYC,DEL,CA,-15
MUM,NYC,B6,60
MUM,NYC,B6,.
EWR,DEL,UA,.
;
run;
proc sort data=have;
by Origin  Destination  Carrier;
run;
proc stdize data=have missing=mean reponly out=want;
by Origin  Destination  Carrier;
var delay;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Thu, 16 Aug 2018 13:47:48 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Impute-Missing-Values/m-p/487416#M127007</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2018-08-16T13:47:48Z</dc:date>
    </item>
  </channel>
</rss>

