library(ISLR2) attach(Wage) ?Wage agelims <- range(age)+c(-5,5) age.grid <- seq(from = agelims[1], to = agelims[2]) plot(age, wage, xlim = agelims, cex = .5, col = "darkgrey") ### Lokalna regresija ## U praksi se velicina okoline tipicno zadaje preko ## tzv. span parametra. ## To je postotak tocaka iz skupa za trening ## koje koristimo za prilagodbu lok. lin. modela. ## U nasoj notaciji to dobijemo ako stavimo ## h_lambda(x)=udaljenost ## od x do span*n najblizeg susjeda od x u {x^(i):i=1,...,n} ## npr. u kNN metodi span = k/n -> span*n=k plot(age, wage, xlim = agelims, cex = .5, col = "darkgrey") title("Local Regression") fit <- loess(wage ~ age, span = 0.05, data = Wage, degree = 1) ## moze i kvadratna funkcija umjesto linearne, ## a za degree = 0 dobijemo NW procjenitelj fit2 <- loess(wage ~ age, span = 0.2, data = Wage, degree = 1) fit3 <- loess(wage ~ age, span = 0.7, data = Wage, degree = 1) lines(age.grid, predict(fit, data.frame(age = age.grid)), col = "darkred", lwd = 1.5) lines(age.grid, predict(fit2, data.frame(age = age.grid)), col = "darkblue", lwd = 1.5) lines(age.grid, predict(fit3, data.frame(age = age.grid)), col = "darkorange", lwd = 1.5) legend("topright", legend = c("Span = 0.05", "Span = 0.2", "Span= 0.7"), col = c("darkred", "darkblue", "darkorange"), lty = 1, lwd = 2, cex = .7) ## gore se koristi tricube jezgra, ## tj. K_lambda(x,t)=D((x-t)/h_lambda(x)) za D: tricubic=function(t){ (1 - abs(t)^3)^3 * (abs(t)<=1) } ## za fiksni x=10, t |-> K_lambda(x,t) je: t=seq(6,14, by=0.01) plot(t, tricubic((10-t)/3), type="l", ylab="") ## h_lambda(x)=3 lines(t, tricubic((10-t)/1), lty="dashed") ## h_lambda(x)=1 ### usporedba s NW procjeniteljem ### (problem pristranosti na rubovima domene) set.seed(1) x<- runif(50, max = 3) y <- 9 - x^2 + rnorm(50, sd = 0.3) plot(x, y) ## stvarna regresijska funkcija curve(9 - x^2, col = "grey", add = TRUE, lwd = 3) grid.x <- seq(from = 0, to = 3, length.out = 300) lok_lin <- loess(y ~ x, span=0.2, degree=1) ## 0.2*50=10 lines(grid.x, predict(lok_lin, newdata = grid.x), col="darkblue") NW <- loess(y ~ x, span=0.2, degree=0) lines(grid.x, predict(NW, newdata = grid.x), col="darkorange") ## lok.lin. regresija tipicno je glada od NW procjenitelja ## te ima manje problema s pristranosti na rubu (vidi desni rub!) ## probaj span=0.1 za NW -> manji problem na rubu, ## ali veci u unutrasnjosti NW <- loess(y ~ x, span=0.1, degree=0) plot(x, y) curve(9 - x^2, col = "grey", add = TRUE, lwd = 3) lines(grid.x, predict(NW, newdata = grid.x), col="darkorange")