% LinearModelDemo.m clear all, close all % The purpose of this example is to demonstrate that with linear models % if the noise is i.i.d. Gaussian, the parameter estimates will also be % i.i.d. Gaussian. Here we use a model of the form % % y = X*b + e, e~N(0,sigma^2*I) (*******) % % where y is 2x1, beta 2x1, and X is 2x2. We keep things this dimension % for visualization purposes. % % The least squares solution b_hat is computed for a large number of % samples from the above linear model: % % b_hat = arg_min_b ||X*b-y||^2 = inv(X'*X)*X'*y. % % The mean of b_hat is b_true and its covariance is sigma^2*inv(X'*X). % %-------------------------------------------------------------------------- % Define the true parameters b and the matrix X in (*******). %-------------------------------------------------------------------------- disp('1. First we will randomly generate a true paramater vector b, ') disp(' regressor matrix X, and noise free data y. Hit any key to continue.'), pause b_true = 10*rand(2,1)-5; X = [ones(2,1), 20*rand(2,1)-10]; y_true = X*b_true; %-------------------------------------------------------------------------- % Compute Random draws from (*******) above using Monte Carlo simulation. %-------------------------------------------------------------------------- disp('2. Next we compute a bunch of random draws from the data distribution') disp(' given by y=X*b+e, e~N(0,sigma^2*I) and corresponding parameter ') disp(' estimates, which are random draws from the distribution defined by ') disp(' b_hat=argmin_b||y-Xb||^2. Hit any key to continue.'), pause ndraws = 1e4; sigma = 0.1; b_hat_sample = zeros(2,ndraws); y_sample = zeros(2,ndraws); for i = 1:ndraws y_sample(:,i)=y_true + sigma*randn(2,1); b_hat_sample(:,i) = X\y_sample(:,i); end %-------------------------------------------------------------------------- % Output the sample and analytic covariances to see that they are similar % and compute their relative errors, then plot the samples of y and b_hat. %-------------------------------------------------------------------------- disp('3. Now we perform a variety of normality tests: (i) compare the ') disp(' analytic and sample covariances, (ii) plot the data and parameter') disp(' samples, (iii) output normal quantile plots and histograms, and ') disp(' (iv) perform the chi^2 test. Hit any key to continue.'), pause CovTrue_b_hat = sigma^2*inv(X'*X) CovSample_b_hat = cov(b_hat_sample') figure(1) subplot(121) plot(y_sample(1,:),y_sample(2,:),'*') hold on, plot(y_true(1,1),y_true(2,1),'*r'), hold off xlabel('y_1'), ylabel('y_2') title('Samples from the data distribution.') subplot(122) plot(b_hat_sample(1,:),b_hat_sample(2,:),'*') hold on, plot(b_true(1,1),b_true(2,1),'*r'), hold off xlabel('b_1 estimate'), ylabel('b_2 estimate') title('Samples from the estimate distribution.') %-------------------------------------------------------------------------- % Perform various tests for normalcy. %-------------------------------------------------------------------------- % Plot the normal quantiles. figure(2) subplot(221) qqplot(y_sample(1,:)) title('y_1') subplot(222) qqplot(y_sample(2,:)) title('y_2') subplot(223) qqplot(b_hat_sample(1,:)) title('b_1') subplot(224) qqplot(b_hat_sample(2,:)) title('b_2') % Make some histograms and perform chi^2 test. figure(3) subplot(221) [h,p]=chi2gof(y_sample(1,:)); hist(y_sample(1,:),100) title(['y_1. Chi^2: ',num2str(h),', p = ',num2str(p)]) subplot(222) [h,p]=chi2gof(y_sample(2,:)); hist(y_sample(2,:),100) title(['y_2. Chi^2: ',num2str(h),', p = ',num2str(p)]) subplot(223) [h,p]=chi2gof(b_hat_sample(1,:)); hist(b_hat_sample(1,:),100) title(['b_1. Chi^2: ',num2str(h),', p = ',num2str(p)]) subplot(224) [h,p]=chi2gof(b_hat_sample(2,:)); hist(b_hat_sample(2,:),100) title(['b_2. Chi^2: ',num2str(h),', p = ',num2str(p)]) %-------------------------------------------------------------------------- % Create normalized histograms of parameter samples and make inference. %-------------------------------------------------------------------------- disp('4. Finally, we normalize the histograms to make them proper probability') disp(' density functions, and use the quantile function to compute confidence') disp(' intervals. Finally, these are compared to the analytic values.') disp(' Hit any key to continue.'), pause % b_1 figure(4) [nb_est1,tb1]=nhist_fn(b_hat_sample(1,:)); q=quantile(b_hat_sample(1,:),[0.025 0.975]); subplot(121) plot(tb1,nb_est1,'ok'), hold on jj = min( find( abs(tb1-q(1))==min(abs(tb1-q(1))) ) ); tb1_025 = tb1(jj); plot(tb1_025,0,'*r') mb1 = mean(b_hat_sample(1,:)); plot(mb1,0,'*m') jj = min( find( abs(tb1-q(2))==min(abs(tb1-q(2))) ) ); tb1_975 = tb1(jj); plot(tb1_975,0,'*g'), hold off legend('distribution','.025 quantile','mean','.975 quantile') mseb1 = CovTrue_b_hat(1,1); %sum((b_hat_sample(1,:)-mb1).^2)/(ndraws-2); fprintf('b_1 quantile: 0.025 quantile = %2.5f, mean = %2.5f 0.975 quantile = %2.5f\n',tb1_025,mb1,tb1_975) fprintf('b_1 normal theory: 0.025 quantile = %2.5f, mean = %2.5f 0.975 quantile = %2.5f\n',mb1-1.96*sqrt(mseb1),mb1,mb1+1.96*sqrt(mseb1)) disp('---------------------------------------') % b_2 [nb_est2,tb2]=nhist_fn(b_hat_sample(2,:)); q=quantile(b_hat_sample(2,:),[0.025 0.975]); subplot(122) plot(tb2,nb_est2,'ok'), hold on jj = min( find( abs(tb2-q(1))==min(abs(tb2-q(1))) ) ); tb2_025 = tb2(jj); plot(tb2_025,0,'*r') mb2 = mean(b_hat_sample(2,:)); plot(mb2,0,'*m') jj = min( find( abs(tb2-q(2))==min(abs(tb2-q(2))) ) ); tb2_975 = tb2(jj); plot(tb2_975,0,'*g'), hold off mseb2 = CovTrue_b_hat(2,2); %sum((b_hat_sample(2,:)-mb2).^2)/(ndraws-2); fprintf('b_2 quantile: 0.025 quantile = %2.5f, mean = %2.5f 0.975 quantile = %2.5f\n',tb2_025,mb2,tb2_975) fprintf('b_2 normal theory: 0.025 quantile = %2.5f, mean = %2.5f 0.975 quantile = %2.5f\n',mb2-1.96*sqrt(mseb2),mb2,mb2+1.96*sqrt(mseb2))