Re: PROC FMM, use estimates and mixing probabilities for CDF modeling ...

acordes · Posted 02-14-2023 08:05 AM

When I use the normal distribution and apply the same code logic, the CDF looks correct.

But the inverse gaussian seems to have a parametrization that I cannot translate to the correct plug-in for the CDF.

Works for normal distribution, but not for the inverse gaussian dist (commented lines).

data WORK.have;
  infile datalines dsd truncover;
  input out_APZMTO:32. EDAD:32. _TYPE_:32. _FREQ_:32. amount:32.;
datalines4;
36,,0,3455,762502.05
36,1,1,1,14.85
36,2,1,12,319.21
36,3,1,26,1154.55
36,4,1,30,727.04
36,5,1,44,3923.28
36,6,1,38,1173.26
36,7,1,48,1531.13
36,8,1,43,1769.97
36,9,1,58,2238.34
36,10,1,58,4278.06
36,11,1,84,12114.76
36,12,1,196,31770.97
36,13,1,173,29070.71
36,14,1,130,24682.33
36,15,1,132,24051.46
36,16,1,168,31645.01
36,17,1,135,25100.97
36,18,1,149,26097.26
36,19,1,125,25800.34
36,20,1,107,18768.79
36,21,1,72,14674.27
36,22,1,89,24478.6
36,23,1,124,33427.97
36,24,1,131,37396.51
36,25,1,124,37955
36,26,1,122,36291.39
36,27,1,116,30372.11
36,28,1,103,28423.52
36,29,1,109,29962.6
36,30,1,99,26768.72
36,31,1,118,30332.69
36,32,1,106,33733.1
36,33,1,108,30746.45
36,34,1,102,34331.35
36,35,1,102,38201.46
36,36,1,73,29174.02
48,,0,8206,2102235.17
48,1,1,1,22.5
48,2,1,18,457.06
48,3,1,49,1854.96
48,4,1,56,2254.12
48,5,1,67,1946.5
48,6,1,55,1762.57
48,7,1,84,3352.96
48,8,1,70,2431.33
48,9,1,74,3802.55
48,10,1,102,6265.82
48,11,1,171,24198.12
48,12,1,282,45526.93
48,13,1,255,45520.94
48,14,1,230,38398.41
48,15,1,207,35372.05
48,16,1,224,41366.14
48,17,1,201,37845.88
48,18,1,191,32593.99
48,19,1,175,33280.73
48,20,1,157,26252.88
48,21,1,118,21057.52
48,22,1,173,43551.9
48,23,1,198,55577.1
48,24,1,212,62589.53
48,25,1,215,63742.76
48,26,1,186,46174.82
48,27,1,215,60954.3
48,28,1,190,51721.48
48,29,1,199,56238.08
48,30,1,192,52071.48
48,31,1,206,54004.67
48,32,1,189,51740.96
48,33,1,184,58890.72
48,34,1,240,81164.21
48,35,1,280,85248.34
48,36,1,293,93944.21
48,37,1,235,78480.88
48,38,1,224,71788.13
48,39,1,194,57125.38
48,40,1,214,73545.89
48,41,1,178,59174.8
48,42,1,175,58910.43
48,43,1,155,45958.2
48,44,1,182,59252.39
48,45,1,210,79901.32
48,46,1,150,57074.72
48,47,1,200,80289.84
48,48,1,130,57554.67
60,,0,2671,752973.36
60,1,1,1,25.32
60,2,1,4,108.72
60,3,1,8,212.01
60,4,1,11,286.19
60,5,1,20,1194.83
60,6,1,23,835.52
60,7,1,10,250.62
60,8,1,14,857.08
60,9,1,21,798.12
60,10,1,21,740.61
60,11,1,35,4062.02
60,12,1,54,8376.32
60,13,1,63,8808.42
60,14,1,61,11076.52
60,15,1,61,9662.05
60,16,1,51,8079.35
60,17,1,60,9869.83
60,18,1,45,8284.84
60,19,1,44,7441.36
60,20,1,48,8823.24
60,21,1,22,3946.96
60,22,1,39,8820.83
60,23,1,43,11302.04
60,24,1,61,17808.52
60,25,1,57,11451.55
60,26,1,62,13092.27
60,27,1,54,16471.61
60,28,1,53,13031.64
60,29,1,42,10698.35
60,30,1,49,12760.43
60,31,1,41,11839.69
60,32,1,48,13549.64
60,33,1,56,14955.85
60,34,1,66,21948.83
60,35,1,60,14491.75
60,36,1,64,24835.36
60,37,1,63,21083.67
60,38,1,47,14364.19
60,39,1,62,18092.91
60,40,1,49,15239.7
60,41,1,46,13236.22
60,42,1,31,10448.78
60,43,1,42,15600.66
60,44,1,49,17155.81
60,45,1,43,13370.65
60,46,1,54,15116.6
60,47,1,60,20706.52
60,48,1,58,21646.32
60,49,1,38,11715.94
60,50,1,59,21514.77
60,51,1,45,18584.31
60,52,1,47,19166.09
60,53,1,45,21027.81
60,54,1,42,15514.08
60,55,1,43,18157.71
60,56,1,48,19284.13
60,57,1,64,27124.37
60,58,1,62,31347.93
60,59,1,58,23238.35
60,60,1,44,19437.55
;;;;
run;

proc sgplot data=WORK.have;
by out_apzmto;
vbar edad / response=amount;
run;

proc fmm data=work.have plots=all;
where _type_=1 and out_apzmto in (36 48 60) ;
by out_apzmto;
model  edad  =    / distribution=gaussian kmax=5 kmin=2;
freq amount;
/*   parms(24 2, 36 3, 48 4, 60 5);  */
    ods select ParameterEstimates MixingProbs DensityPlot;
    ods output ParameterEstimates=PE0 MixingProbs=mixi;
run;

data pe_wide;
/* merge pe0(where=(Parameter^='Scale')) pe0(where=(Parameter='Scale') rename=(estimate=shape) drop=ilink); */
merge pe0(where=(Parameter^='Intercept')) pe0(where=(Parameter='Intercept') rename=(estimate=est) );
by out_APZMTO component;
/* keep out_APZMTO component estimate shape ilink; */
keep out_APZMTO component estimate est;
run;

proc sql;
create table pe_wide1 as
select a.*, b.Prob from 
WORK.PE_WIDE a left join WORK.MIXI b 
on a.Component=b.Component and a.out_APZMTO=b.out_APZMTO order by out_APZMTO, component;
run;


data WORK.PE_wide2;
set WORK.PE_wide1 end=eof;
array arr(3) $6. ('_fmm36' '_fmm48' '_fmm60');
retain cod arr_c;
format cod $500.;
by out_APZMTO component;
if first.out_APZMTO then do;
cod='';
arr_c+1;
end;
/* cod=catx('', cod, prob, " * cdf('wald',", 'temp(1),', shape, ',', ilink, ') +' ); */
cod=catx('', cod,  prob, " * cdf('normal',", 'temp(1),', est, ',', sqrt(estimate), ') +' );

if last.out_APZMTO then call symputx(arr(arr_c), quote(substr(cod, 1, length(cod)-1)));
run;

%put &_fmm48.;

proc sql;
select sum(amount) into :total36 from have where _type_=1 and out_apzmto=36;
select sum(amount) into :total48 from have where _type_=1 and out_apzmto=48;
select sum(amount) into :total60 from have where _type_=1 and out_apzmto=60;
quit;

data have1;
set have;
array helpa (3) _temporary_ (&total36. &total48. &total60.);
array temp (1) edad;
array temp2 (3) $500. (&_fmm36. &_fmm48. &_fmm60.);
format amount_share pred_cdf percent9.1 maturity percent9.0;
where _type_=1;
by out_apzmto edad;
if first.out_apzmto then acum_amount=0;
acum_amount+amount;
amount_share=acum_amount/helpa(whichn(out_apzmto, 36, 48, 60));
maturity=edad/out_apzmto;

/*here I want to unquote the formula, but it doesn't work */
pred_cdf=resolve(substr(temp2(out_apzmto/12-2), 2, length(temp2(out_apzmto/12-2))-2)); 

if out_apzmto=36 then pred_cdf= 
0.375327347 * cdf('normal', temp(1), 15.635497438 , 3.9918793772 ) 
+ 0.3922306778 * cdf('normal', temp(1), 26.175544333 , 3.1090404865 ) 
+ 0.1005071903 * cdf('normal', temp(1), 31.998863512 , 0.8964434876 ) 
+ 0.0450229677 * cdf('normal', temp(1), 34 , 0.0001 ) 
+ 0.0869118173 * cdf('normal', temp(1), 35.436121806 , 0.4959496162 );

if out_apzmto=48 then pred_cdf= 
0.1283200146 * cdf('normal', temp(1), 46.076814239 , 1.4242197152 ) 
+ 0.4126163318 * cdf('normal', temp(1), 37.291694824 , 4.5092171615 ) 
+ 0.4590636536 * cdf('normal', temp(1), 22.406224725 , 7.4331299613 );


if out_apzmto=60 then pred_cdf= 0.0593909406 * cdf('normal', temp(1), 14.371900802 , 2.1626772284 ) 
+ 0.5727265672 * cdf('normal', temp(1), 33.275424127 , 9.8667817795 ) 
+ 0.144608475 * cdf('normal', temp(1), 48.66603877 , 3.4019403213 ) 
+ 0.1019414322 * cdf('normal', temp(1), 54.269588212 , 2.2853175888 ) 
+ 0.1213325851 * cdf('normal', temp(1), 58.257180487 , 1.2243720918 );

run;

sbxkoenk · Posted 02-14-2023 01:34 PM

I moved this topic to "Statistical Procedures" board.

FMM = finite mixture models

Koen

StatDave · Posted 02-15-2023 11:14 AM

Not sure if this gets at what you need, but the following generates inverse gaussian data with known parameters, mean, and variance. The CDF at value 10 is computed using the known parameters. Some empirical quantiles are also computed. An inverse gaussian model is fit using PROC FMM and the results from the model used to compute the CDF at 10 and the quantiles for comparison. Note that the log link is used in this example. So, the code shows how the FMM results can be used in DATA step distribution functions such as CDF or QUANTILE.

data a; 
   call streaminit(23422);
   scale=.2; lambda=1/scale**2; 
   do x=1 to 10; do n=1 to 1000;
     mean=exp(1+.5*x);  *assumes log link;
     std=sqrt(mean**3*scale**2);
     y=rand('igauss',lambda,mean);
     cdf10=cdf('igauss',10,1/scale**2,mean);
     output; 
   end; end; run;
data b; set a; by x; if first.x; run;
proc print; run;
proc means data=a mean std q1 median q3; 
  class x; var y mean std; 
  run;
proc fmm data=a;
   model y=x / dist=ig link=log;
   output out=outigfm pred=p;
   ods output parameterestimates=pe(where=(effect ? 'Scale'));
   run;
data q; set outigfm; by x;
if _n_=1 then set pe;
cdf10=cdf('igauss',10,1/estimate,p);
q1=quantile('igauss',.25,1/estimate,p);
median=quantile('igauss',.5,1/estimate,p);
q3=quantile('igauss',.75,1/estimate,p);
if first.x;
run;
proc print data=q; var x cdf10--q3; run;

Rick_SAS · Posted 02-16-2023 06:28 AM

Arnie,

Here are two thoughts:
1. Pay attention to the ORDER and MEANING of the parameters for the inverse Gaussian distribution. The doc for PROC FMM uses IG(mean, scale) whereas the CDF and PDF functions use IG(shape. mean). So the order is reversed and shape = 1/scale. Please see the article "Fit a mixture of Weibull distributions in SAS" for a discussion of the parameterization that PROC FMM uses and how it differs from parameterization in Base SAS functions.

2. While debugging the program, use the PDF function. That will make it easier to visualize if the parameterization is correct. You can then switch to CDF after debugging.

PROC FMM, use estimates and mixing probabilities for CDF modeling a inverse gaussian distribution

Re: PROC FMM, use estimates and mixing probabilities for CDF modeling a inverse gaussian distributio

Re: PROC FMM, use estimates and mixing probabilities for CDF modeling a inverse gaussian distributio

Re: PROC FMM, use estimates and mixing probabilities for CDF modeling a inverse gaussian distributio

PROC FMM, use estimates and mixing probabilities for CDF modeling a inverse gaussian distribution

Re: PROC FMM, use estimates and mixing probabilities for CDF modeling a inverse gaussian distributio

Re: PROC FMM, use estimates and mixing probabilities for CDF modeling a inverse gaussian distributio

Re: PROC FMM, use estimates and mixing probabilities for CDF modeling a inverse gaussian distributio

SAS Innovate 2025: Call for Content