- k-Nearest Neighbors (kNN)
- R exercise: create a simple KNN classifier
- knn() function from class library
2017-01-24
Algorithm
- 0 training error - sensitive to "noise" - overfitting problem
- Larger k gives smoother boundaries - underfitting problem
set.seed(99) ## prepare the data set train = data.frame(x1 = runif(20,-1,1), x2 = runif(20,-1,1), label = rep(NA,20)) ## assign labels based on x, y coordinates train[train$x1>0 & train$x2 >0,"label"] = "red" # Quadrant 1 train[train$x1<0 & train$x2 >0,"label"] = "green" # Quadrant 2 train[train$x1<0 & train$x2 <0,"label"] = "orange" train[train$x1>0 & train$x2 <0,"label"] = "blue" ## plot the points plot(train[,1:2],col=train$label,pch=19,asp=1,xlim=c(-1,1),ylim=c(-1,1)) abline(h=0,lty=3) abline(v=0,lty=3) ## add a point we want to predict points(0.5,0.5,pch=10) ## knn classifier p = c(0.5,0.5) # test data ## dist function distFunc = function(p,data){ val = c() for (i in 1:nrow(data)){ val = c(val,sqrt(sum((p - data[i,])^2))) } return(val) } #as.matrix(dist(rbind(p,train[,1:2])))[,1] # use the built in dist function myKnn = function(p,trainData,labels,k){ distV = distFunc(p,trainData) ## calculate distance klabels = labels[order(distV)][1:k] ## k nearest neighbours ## majority vote and distance return(list(vote = sort(table(klabels),decreasing = T),dist=distV[order(distV)][1:k])) } p = c(0.1,-0.5) # test data myKnn(p,train[,1:2],train[,3],k=3)
knn(train, test, cl,k ,...)
library(class) knn(train[,1:2],p,train[,3],k=5,prob=T)
library(class) library(ISLR) ## ?Smarket attach(Smarket) train = (Year<2005) train.X = cbind(Lag1,Lag2)[train,] test.X = cbind(Lag1,Lag2)[!train,] train.Direction = Direction[train] Direction.2005 = Direction[!train] set.seed(1) knn.pred = knn(train.X,test.X,train.Direction,k=1) table(knn.pred,Direction.2005) mean(knn.pred == Direction.2005)