Hi, here is my code for Shapley Value and sample data http://www.megaupload.com/?d=7WN5AS2W and info about Shapley Value: http://marketing.gfkamerica.com/website/articles/ShapelyValueRegression.pdf For 20 regressors it took ~4 h and for more regressors I can't perform analysis... Is it possible to speed up my analysis? /*declaring variables*/ %let x1=V6015 V6016 V6017 V6018 V6019 V6020; proc iml; use sample_data; read all var {y} into y; read all var {&x1} into x; /*generating 0-1 matrix*/ /*source of makro - Rick Wicklin, SAS Institute*/ /*http://blogs.sas.com/content/iml/2011/01/05/creating-a-matrix-with-all-combinations-of-zeros-and-ones/*/ start makro(n); rows=2##n; x=j(rows,n); do j=1 to n; PatLength=2##(n-j); PatRepl=2##(j-1); pattern=j(PatLength,1,0)//j(PatLength,1,1); x[,j]=repeat(pattern,PatRepl); end; return(x); finish; zero=makro(ncol(x)); /*constant for 0-1 matrix and x*/ const1=j(nrow(zero),1,1); const2=j(nrow(x),1,1); /*attach constants*/ x=x||const2; zero=zero||const1; /*solving linear equation for all regressors*/ A=x`*x; c=x`*y; b=solve(A,c); y_hat=x*b; OSK=t(y)*y-1/nrow(x)*sum(y); RSK=t(y_hat)*y_hat-1/nrow(x)*sum(y_hat); R2_all=RSK/OSK; n=ncol(x); /*matrix for Shapley Value*/ sv=j(n,1,0); /*loop thru variables*/ do j=1 to ncol(x)-1; /*matrix with selected variable*/ zero_with=zero[loc(zero[,j]),]; /*matrix without selected variable*/ zero_without=zero[loc(zero[,j]=0),]; /*loop thru regressions*/ do i=1 to nrow(zero_with); /* gamma from equation*/ gamma=fact(sum(zero[i,]))#fact(ncol(x)-sum(zero[i,])-1)/fact(ncol(x)); /* matrix for x with and without selected variable*/ x_with=x#zero_with[i,]; x_without=x#zero_without[i,]; /* matrix x with selected variables*/ x_with=x_with[,loc(x_with[+,])]; x_without=x_without[,loc(x_without[+,])]; /* solving linear equations*/ A_with=x_with`*x_with; A_without=x_without`*x_without; c_with=x_with`*y; c_without=x_without`*y; b_with=solve(A_with,c_with); b_without=solve(A_without,c_without); y_hat_with=x_with*b_with; y_hat_without=x_without*b_without; OSK_with=t(y)*y-1/nrow(x_with)*sum(y); OSK_without=t(y)*y-1/nrow(x_without)*sum(y); RSK_with=t(y_hat_with)*y_hat_with-1/nrow(x_with)*sum(y_hat); RSK_without=t(y_hat_without)*y_hat_without-1/nrow(x_without)*sum(y_hat_without); R2_with=RSK_with/OSK_with; R2_without=RSK_without/OSK_without; /* shapley value for each variable*/ sv[j,]=sv[j,]+gamma#(R2_with-R2_without); end; end; /*net effect for all variables*/ NE=j(ncol(x),1,0); C=0; n1=ncol(x)-1; x1=x[,1:n1]; corr=corr(x1,"spearman"); do i=1 to ncol(x1); do j=1 to ncol(x1); if i=j then C=C; else C=C+b[i,1]*corr[i,j]*b[j,1]; end; NE[i,1]=b[i,1]*2+C; end; a1=sv[+,]; a2=NE[+,]; sv1=sv/a1#100; NE1=NE/a2#100; rn={&x1 const}; print R2_all; print b[Rowname=rn] sv sv1 NE NE1; quit;
... View more