/****************************************************************/ /* S A S S A M P L E L I B R A R Y */ /* NAME: REGDEMO */ /* TITLE: IML Regression Example */ /* DATA: */ /****************************************************************/ proc iml; /*---------REGRESSION ROUTINE------------*/ /* given X, and y, this fits y = X b + e */ /* by least squares. */ start reg; n = nrow(X); /* number of observations */ k = ncol(X); /* number of variables */ xpx = X`*X; /* cross-products, X'X */ xpy = X`*y; xpxi = inv(xpx); /* inverse crossproducts */ b = xpxi*xpy; /* parameter estimates */ yhat = X*b; /* predicted values */ resid = y-yhat; /* residuals */ sse = resid`*resid; /* sum of squared errors */ dfe = n-k; /* degrees of freedom error */ mse = sse/dfe; /* mean squared error */ rmse = sqrt(mse); /* root mean squared error */ covb = xpxi#mse; /* covariance of estimates */ stdb = sqrt(vecdiag(covb)); /* standard errors */ t = b/stdb; /* ttest for estimates=0 */ probt = 1-probf(t#t,1,dfe); /* significance probability */ print name b stdb t probt; s = diag(1/stdb); corrb = s*covb*s; /* correlation of estimates */ print ,"Covariance of Estimates", covb[r=name c=name], "Correlation of Estimates",corrb[r=name c=name]; if nrow(alpha)= 0 then return; /* is alpha specified? */ H = X*xpxi*X`; /* hat matrix */ vresid = (i(n)-H)*mse; /* covariance of residuals */ vpred = H#mse; /* covariance of yhat values */ hat= vecdiag(H); /* hat leverage values */ alpha=.05; t = tinv(1-alpha/2,dfe); lowerm=yhat-t#sqrt(hat*mse); /* lower confidence limit for mean */ upperm=yhat+t#sqrt(hat*mse); /* upper */ lower =yhat-t#sqrt(hat*mse+mse); /* lower confidence limit for indiv */ upper =yhat+t#sqrt(hat*mse+mse); /* upper */ print ,,"Predicted Values, Residuals, Leverage, and Conf. Limits" ,, id y yhat resid hat lowerm upperm lower upper; finish; /*---routine to test a linear combination of the estimates---*/ /* given L, this routine tests hypothesis that L b = 0. */ start test; dfn=nrow(l); /* degrees of freedom of test */ lb=l*b; /* lin. comb of coeffs tested */ vlb=l*xpxi*l`; /* variance of LB */ q=lb`*inv(vlb)*lb /dfn; /* Mean square for H0: LB=0 */ f=q/mse; prob=1-probf(f,dfn,dfe); print l[c=name], lb f dfn dfe prob; finish; /*---run it on population of U.S. for Decades beginning 1790---*/ X= { 1 1 1, 1 2 4, 1 3 9, 1 4 16, 1 5 25, 1 6 36, 1 7 49, 1 8 64 }; Y= {3.929,5.308,7.239,9.638,12.866,17.069,23.191,31.443}; id={'1790','1800','1810','1820','1830','1840','1850','1860'}; NAME={"Intercept", "Decade", "Decade**2" }; alpha=.05; /* Error rate for confidence intervals */ reset fw=7; run reg; do; print ,"TEST Coef for Linear"; l= {0 1 0 } ; run test; print ,"TEST Coef for Linear,Quad"; l= {0 1 0,0 0 1} ; run test; print ,"TEST Linear+Quad = 0"; l= {0 1 1 } ; run test; end;