### ridge regresija i kolinearnost medu varijablama library(MASS) # jako korelirane kovarijate i linearan model set.seed(5) n=30 X = mvrnorm(n, c(0, 0), matrix(c(1,0.8, 0.8, 1), 2,2)) y = rnorm(n, mean = X[,1] + X[,2]) ## beta_*=(1,1) plot(X) lm(y~X-1)\$coef sum(lm(y~X-1)\$coef) ## approx = 1+1=2 lm.ridge(y~X-1, lambda=10) ## simulacijska studija # simulirat cemo podatke N puta te izracunati ls i ridge procjenitelje # na taj nacin cemo dobiti uzorak duljine N za beta^ls i beta^r N=1000 beta_ls=matrix(NA, nrow=N, ncol=2) beta_ridge1=matrix(NA, nrow=N, ncol=2) beta_ridge2=matrix(NA, nrow=N, ncol=2) set.seed(19) for (i in 1:N) { X = mvrnorm(n, c(0, 0), matrix(c(1,0.8, 0.8, 1), 2,2)) y = rnorm(n, mean = X[,1] + X[,2]) beta_ls[i,]=as.numeric(lm(y~X-1)\$coef) beta_ridge1[i,]=as.numeric(lm.ridge(y~X-1, lambda=5)\$coef) beta_ridge2[i,]=as.numeric(lm.ridge(y~X-1, lambda=30)\$coef) } plot(beta_ls, cex=0.8) points(1,1, pch=16, cex=1.2, col="red") ## beta_* ## ls procjenitelj je nepristran, ali su koeficijenti negativno korelirani points(beta_ridge1, cex=0.8, col="darkorange") colMeans(beta_ridge1) # [1] 0.9124180 0.8990887 ## ridge procjenitelj je pristran, ali marginalne procjene imaju manju varijancu plot(beta_ls, cex=0.8) points(beta_ridge2, cex=0.8, col="darkorange") points(1,1, pch=16, cex=1.2, col="red") colMeans(beta_ridge2) ## [1] 0.6381062 0.6337374 ### veci lambda daje vecu pristranost, a manju varijancu pri procjeni koeficijenata