BookmarkSubscribeRSS Feed
acordes
Rhodochrosite | Level 12

When I use the normal distribution and apply the same code logic, the CDF looks correct. 

But the inverse gaussian seems to have a parametrization that I cannot translate to the correct plug-in for the CDF. 

 

Works for normal distribution, but not for the inverse gaussian dist (commented lines). 

 

data WORK.have;
  infile datalines dsd truncover;
  input out_APZMTO:32. EDAD:32. _TYPE_:32. _FREQ_:32. amount:32.;
datalines4;
36,,0,3455,762502.05
36,1,1,1,14.85
36,2,1,12,319.21
36,3,1,26,1154.55
36,4,1,30,727.04
36,5,1,44,3923.28
36,6,1,38,1173.26
36,7,1,48,1531.13
36,8,1,43,1769.97
36,9,1,58,2238.34
36,10,1,58,4278.06
36,11,1,84,12114.76
36,12,1,196,31770.97
36,13,1,173,29070.71
36,14,1,130,24682.33
36,15,1,132,24051.46
36,16,1,168,31645.01
36,17,1,135,25100.97
36,18,1,149,26097.26
36,19,1,125,25800.34
36,20,1,107,18768.79
36,21,1,72,14674.27
36,22,1,89,24478.6
36,23,1,124,33427.97
36,24,1,131,37396.51
36,25,1,124,37955
36,26,1,122,36291.39
36,27,1,116,30372.11
36,28,1,103,28423.52
36,29,1,109,29962.6
36,30,1,99,26768.72
36,31,1,118,30332.69
36,32,1,106,33733.1
36,33,1,108,30746.45
36,34,1,102,34331.35
36,35,1,102,38201.46
36,36,1,73,29174.02
48,,0,8206,2102235.17
48,1,1,1,22.5
48,2,1,18,457.06
48,3,1,49,1854.96
48,4,1,56,2254.12
48,5,1,67,1946.5
48,6,1,55,1762.57
48,7,1,84,3352.96
48,8,1,70,2431.33
48,9,1,74,3802.55
48,10,1,102,6265.82
48,11,1,171,24198.12
48,12,1,282,45526.93
48,13,1,255,45520.94
48,14,1,230,38398.41
48,15,1,207,35372.05
48,16,1,224,41366.14
48,17,1,201,37845.88
48,18,1,191,32593.99
48,19,1,175,33280.73
48,20,1,157,26252.88
48,21,1,118,21057.52
48,22,1,173,43551.9
48,23,1,198,55577.1
48,24,1,212,62589.53
48,25,1,215,63742.76
48,26,1,186,46174.82
48,27,1,215,60954.3
48,28,1,190,51721.48
48,29,1,199,56238.08
48,30,1,192,52071.48
48,31,1,206,54004.67
48,32,1,189,51740.96
48,33,1,184,58890.72
48,34,1,240,81164.21
48,35,1,280,85248.34
48,36,1,293,93944.21
48,37,1,235,78480.88
48,38,1,224,71788.13
48,39,1,194,57125.38
48,40,1,214,73545.89
48,41,1,178,59174.8
48,42,1,175,58910.43
48,43,1,155,45958.2
48,44,1,182,59252.39
48,45,1,210,79901.32
48,46,1,150,57074.72
48,47,1,200,80289.84
48,48,1,130,57554.67
60,,0,2671,752973.36
60,1,1,1,25.32
60,2,1,4,108.72
60,3,1,8,212.01
60,4,1,11,286.19
60,5,1,20,1194.83
60,6,1,23,835.52
60,7,1,10,250.62
60,8,1,14,857.08
60,9,1,21,798.12
60,10,1,21,740.61
60,11,1,35,4062.02
60,12,1,54,8376.32
60,13,1,63,8808.42
60,14,1,61,11076.52
60,15,1,61,9662.05
60,16,1,51,8079.35
60,17,1,60,9869.83
60,18,1,45,8284.84
60,19,1,44,7441.36
60,20,1,48,8823.24
60,21,1,22,3946.96
60,22,1,39,8820.83
60,23,1,43,11302.04
60,24,1,61,17808.52
60,25,1,57,11451.55
60,26,1,62,13092.27
60,27,1,54,16471.61
60,28,1,53,13031.64
60,29,1,42,10698.35
60,30,1,49,12760.43
60,31,1,41,11839.69
60,32,1,48,13549.64
60,33,1,56,14955.85
60,34,1,66,21948.83
60,35,1,60,14491.75
60,36,1,64,24835.36
60,37,1,63,21083.67
60,38,1,47,14364.19
60,39,1,62,18092.91
60,40,1,49,15239.7
60,41,1,46,13236.22
60,42,1,31,10448.78
60,43,1,42,15600.66
60,44,1,49,17155.81
60,45,1,43,13370.65
60,46,1,54,15116.6
60,47,1,60,20706.52
60,48,1,58,21646.32
60,49,1,38,11715.94
60,50,1,59,21514.77
60,51,1,45,18584.31
60,52,1,47,19166.09
60,53,1,45,21027.81
60,54,1,42,15514.08
60,55,1,43,18157.71
60,56,1,48,19284.13
60,57,1,64,27124.37
60,58,1,62,31347.93
60,59,1,58,23238.35
60,60,1,44,19437.55
;;;;
run;

proc sgplot data=WORK.have;
by out_apzmto;
vbar edad / response=amount;
run;

proc fmm data=work.have plots=all;
where _type_=1 and out_apzmto in (36 48 60) ;
by out_apzmto;
model  edad  =    / distribution=gaussian kmax=5 kmin=2;
freq amount;
/*   parms(24 2, 36 3, 48 4, 60 5);  */
    ods select ParameterEstimates MixingProbs DensityPlot;
    ods output ParameterEstimates=PE0 MixingProbs=mixi;
run;

data pe_wide;
/* merge pe0(where=(Parameter^='Scale')) pe0(where=(Parameter='Scale') rename=(estimate=shape) drop=ilink); */
merge pe0(where=(Parameter^='Intercept')) pe0(where=(Parameter='Intercept') rename=(estimate=est) );
by out_APZMTO component;
/* keep out_APZMTO component estimate shape ilink; */
keep out_APZMTO component estimate est;
run;

proc sql;
create table pe_wide1 as
select a.*, b.Prob from 
WORK.PE_WIDE a left join WORK.MIXI b 
on a.Component=b.Component and a.out_APZMTO=b.out_APZMTO order by out_APZMTO, component;
run;


data WORK.PE_wide2;
set WORK.PE_wide1 end=eof;
array arr(3) $6. ('_fmm36' '_fmm48' '_fmm60');
retain cod arr_c;
format cod $500.;
by out_APZMTO component;
if first.out_APZMTO then do;
cod='';
arr_c+1;
end;
/* cod=catx('', cod, prob, " * cdf('wald',", 'temp(1),', shape, ',', ilink, ') +' ); */
cod=catx('', cod,  prob, " * cdf('normal',", 'temp(1),', est, ',', sqrt(estimate), ') +' );

if last.out_APZMTO then call symputx(arr(arr_c), quote(substr(cod, 1, length(cod)-1)));
run;

%put &_fmm48.;

proc sql;
select sum(amount) into :total36 from have where _type_=1 and out_apzmto=36;
select sum(amount) into :total48 from have where _type_=1 and out_apzmto=48;
select sum(amount) into :total60 from have where _type_=1 and out_apzmto=60;
quit;

data have1;
set have;
array helpa (3) _temporary_ (&total36. &total48. &total60.);
array temp (1) edad;
array temp2 (3) $500. (&_fmm36. &_fmm48. &_fmm60.);
format amount_share pred_cdf percent9.1 maturity percent9.0;
where _type_=1;
by out_apzmto edad;
if first.out_apzmto then acum_amount=0;
acum_amount+amount;
amount_share=acum_amount/helpa(whichn(out_apzmto, 36, 48, 60));
maturity=edad/out_apzmto;

/*here I want to unquote the formula, but it doesn't work */
pred_cdf=resolve(substr(temp2(out_apzmto/12-2), 2, length(temp2(out_apzmto/12-2))-2)); 

if out_apzmto=36 then pred_cdf= 
0.375327347 * cdf('normal', temp(1), 15.635497438 , 3.9918793772 ) 
+ 0.3922306778 * cdf('normal', temp(1), 26.175544333 , 3.1090404865 ) 
+ 0.1005071903 * cdf('normal', temp(1), 31.998863512 , 0.8964434876 ) 
+ 0.0450229677 * cdf('normal', temp(1), 34 , 0.0001 ) 
+ 0.0869118173 * cdf('normal', temp(1), 35.436121806 , 0.4959496162 );

if out_apzmto=48 then pred_cdf= 
0.1283200146 * cdf('normal', temp(1), 46.076814239 , 1.4242197152 ) 
+ 0.4126163318 * cdf('normal', temp(1), 37.291694824 , 4.5092171615 ) 
+ 0.4590636536 * cdf('normal', temp(1), 22.406224725 , 7.4331299613 );


if out_apzmto=60 then pred_cdf= 0.0593909406 * cdf('normal', temp(1), 14.371900802 , 2.1626772284 ) 
+ 0.5727265672 * cdf('normal', temp(1), 33.275424127 , 9.8667817795 ) 
+ 0.144608475 * cdf('normal', temp(1), 48.66603877 , 3.4019403213 ) 
+ 0.1019414322 * cdf('normal', temp(1), 54.269588212 , 2.2853175888 ) 
+ 0.1213325851 * cdf('normal', temp(1), 58.257180487 , 1.2243720918 );

run;
3 REPLIES 3
sbxkoenk
SAS Super FREQ

I moved this topic to "Statistical Procedures" board.

 

FMM = finite mixture models

Koen

StatDave
SAS Super FREQ

Not sure if this gets at what you need, but the following generates inverse gaussian data with known parameters, mean, and variance. The CDF at value 10 is computed using the known parameters. Some empirical quantiles are also computed. An inverse gaussian model is fit using PROC FMM and the results from the model used to compute the CDF at 10 and the quantiles for comparison. Note that the log link is used in this example. So, the code shows how the FMM results can be used in DATA step distribution functions such as CDF or QUANTILE.

data a; 
   call streaminit(23422);
   scale=.2; lambda=1/scale**2; 
   do x=1 to 10; do n=1 to 1000;
     mean=exp(1+.5*x);  *assumes log link;
     std=sqrt(mean**3*scale**2);
     y=rand('igauss',lambda,mean);
     cdf10=cdf('igauss',10,1/scale**2,mean);
     output; 
   end; end; run;
data b; set a; by x; if first.x; run;
proc print; run;
proc means data=a mean std q1 median q3; 
  class x; var y mean std; 
  run;
proc fmm data=a;
   model y=x / dist=ig link=log;
   output out=outigfm pred=p;
   ods output parameterestimates=pe(where=(effect ? 'Scale'));
   run;
data q; set outigfm; by x;
if _n_=1 then set pe;
cdf10=cdf('igauss',10,1/estimate,p);
q1=quantile('igauss',.25,1/estimate,p);
median=quantile('igauss',.5,1/estimate,p);
q3=quantile('igauss',.75,1/estimate,p);
if first.x;
run;
proc print data=q; var x cdf10--q3; run;

Rick_SAS
SAS Super FREQ

Arnie,

Here are two thoughts:
1. Pay attention to the ORDER and MEANING of the parameters for the inverse Gaussian distribution. The doc for PROC FMM uses IG(mean, scale) whereas the CDF and PDF functions use IG(shape. mean). So the order is reversed and shape = 1/scale. Please see the article "Fit a mixture of Weibull distributions in SAS" for a discussion of the parameterization that PROC FMM uses and how it differs from parameterization in Base SAS functions.

2. While debugging the program, use the PDF function. That will make it easier to visualize if the parameterization is correct. You can then switch to CDF after debugging.

SAS Innovate 2025: Call for Content

Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!

Submit your idea!

What is ANOVA?

ANOVA, or Analysis Of Variance, is used to compare the averages or means of two or more populations to better understand how they differ. Watch this tutorial for more.

Find more tutorials on the SAS Users YouTube channel.

Discussion stats
  • 3 replies
  • 586 views
  • 4 likes
  • 4 in conversation