We need the following packages:
library(tidyverse)
library(leaps)
library(glmnet)
library(mlbench)
library(doMC)
library(caret)
library(class)
library(reshape2)
library(plotROC)
library(kernlab)
library(bestglm)
We consider the ozone data set from the mlbench package
data("Ozone")
summary(Ozone)
## V1 V2 V3 V4 V5
## 1 : 31 1 : 12 1:52 Min. : 1.00 Min. :5320
## 3 : 31 2 : 12 2:52 1st Qu.: 5.00 1st Qu.:5700
## 5 : 31 3 : 12 3:52 Median : 9.00 Median :5770
## 7 : 31 4 : 12 4:53 Mean :11.53 Mean :5753
## 8 : 31 5 : 12 5:53 3rd Qu.:16.00 3rd Qu.:5830
## 10 : 31 6 : 12 6:52 Max. :38.00 Max. :5950
## (Other):180 (Other):294 7:52 NA's :5 NA's :12
## V6 V7 V8 V9
## Min. : 0.000 Min. :19.00 Min. :25.00 Min. :27.68
## 1st Qu.: 3.000 1st Qu.:49.00 1st Qu.:51.00 1st Qu.:49.73
## Median : 5.000 Median :65.00 Median :62.00 Median :57.02
## Mean : 4.869 Mean :58.48 Mean :61.91 Mean :56.85
## 3rd Qu.: 6.000 3rd Qu.:73.00 3rd Qu.:72.00 3rd Qu.:66.11
## Max. :11.000 Max. :93.00 Max. :93.00 Max. :82.58
## NA's :15 NA's :2 NA's :139
## V10 V11 V12 V13
## Min. : 111 Min. :-69.0 Min. :27.50 Min. : 0.0
## 1st Qu.: 890 1st Qu.:-10.0 1st Qu.:51.26 1st Qu.: 70.0
## Median :2125 Median : 24.0 Median :62.24 Median :110.0
## Mean :2591 Mean : 17.8 Mean :60.93 Mean :123.3
## 3rd Qu.:5000 3rd Qu.: 45.0 3rd Qu.:70.52 3rd Qu.:150.0
## Max. :5000 Max. :107.0 Max. :91.76 Max. :500.0
## NA's :15 NA's :1 NA's :14
We first delete individuals with missing data and the three first variables which contains the date of the observations:
donnees <- Ozone %>% na.omit()
donnees <- donnees[,-c(1:3)]
summary(donnees)
## V4 V5 V6 V7
## Min. : 1.00 Min. :5320 Min. : 0.000 Min. :19.00
## 1st Qu.: 5.00 1st Qu.:5690 1st Qu.: 3.000 1st Qu.:46.00
## Median : 9.00 Median :5760 Median : 5.000 Median :64.00
## Mean :11.37 Mean :5746 Mean : 4.867 Mean :57.61
## 3rd Qu.:16.00 3rd Qu.:5830 3rd Qu.: 6.000 3rd Qu.:73.00
## Max. :38.00 Max. :5950 Max. :11.000 Max. :93.00
## V8 V9 V10 V11
## Min. :25.00 Min. :27.68 Min. : 111 Min. :-69.00
## 1st Qu.:51.50 1st Qu.:49.64 1st Qu.: 869 1st Qu.:-14.00
## Median :61.00 Median :56.48 Median :2083 Median : 18.00
## Mean :61.11 Mean :56.54 Mean :2602 Mean : 14.43
## 3rd Qu.:71.00 3rd Qu.:66.20 3rd Qu.:5000 3rd Qu.: 43.00
## Max. :93.00 Max. :82.58 Max. :5000 Max. :107.00
## V12 V13
## Min. :27.50 Min. : 0.0
## 1st Qu.:51.26 1st Qu.: 60.0
## Median :60.98 Median :100.0
## Mean :60.69 Mean :122.2
## 3rd Qu.:70.88 3rd Qu.:150.0
## Max. :90.68 Max. :350.0
We then split the data into:
set.seed(1234)
perm <- sample(nrow(donnees))
ind.train <- perm[1:150]
ind.test <- perm[-c(1:150)]
train <- donnees[ind.train,]
test <- donnees[ind.test,]
The problem is to explain the Daily maximum one-hour-average ozone reading
(V4) by the other variables. We first consider the linear model
linear.model <- lm(V4~.,data=train)
summary(linear.model)
##
## Call:
## lm(formula = V4 ~ ., data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.8226 -2.7434 -0.6538 2.9229 14.2906
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.7498441 43.3205526 0.433 0.66581
## V5 -0.0055941 0.0081420 -0.687 0.49318
## V6 0.1749939 0.1921661 0.911 0.36405
## V7 0.0867529 0.0270191 3.211 0.00164 **
## V8 0.1466358 0.0769024 1.907 0.05860 .
## V9 0.4107676 0.1463569 2.807 0.00572 **
## V10 -0.0012717 0.0004349 -2.924 0.00403 **
## V11 -0.0052064 0.0164637 -0.316 0.75230
## V12 -0.1574278 0.1330782 -1.183 0.23883
## V13 -0.0051887 0.0059843 -0.867 0.38739
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.307 on 140 degrees of freedom
## Multiple R-squared: 0.7274, Adjusted R-squared: 0.7098
## F-statistic: 41.5 on 9 and 140 DF, p-value: < 2.2e-16
It seems that some variables are not necessary in the model.
mod.sel <- regsubsets(V4~.,data=train)
summary(mod.sel)
## Subset selection object
## Call: regsubsets.formula(V4 ~ ., data = train)
## 9 Variables (and intercept)
## Forced in Forced out
## V5 FALSE FALSE
## V6 FALSE FALSE
## V7 FALSE FALSE
## V8 FALSE FALSE
## V9 FALSE FALSE
## V10 FALSE FALSE
## V11 FALSE FALSE
## V12 FALSE FALSE
## V13 FALSE FALSE
## 1 subsets of each size up to 8
## Selection Algorithm: exhaustive
## V5 V6 V7 V8 V9 V10 V11 V12 V13
## 1 ( 1 ) " " " " " " "*" " " " " " " " " " "
## 2 ( 1 ) " " " " "*" " " "*" " " " " " " " "
## 3 ( 1 ) " " " " "*" " " "*" "*" " " " " " "
## 4 ( 1 ) " " " " "*" "*" "*" "*" " " " " " "
## 5 ( 1 ) " " " " "*" "*" "*" "*" " " "*" " "
## 6 ( 1 ) " " "*" "*" "*" "*" "*" " " "*" " "
## 7 ( 1 ) " " "*" "*" "*" "*" "*" " " "*" "*"
## 8 ( 1 ) "*" "*" "*" "*" "*" "*" " " "*" "*"
We obtain the best models with \(1,2,\dots,8\) variables according to the \(R^2\).
plot(mod.sel,scale="bic")
plot(mod.sel,scale="Cp")
BIC selects \[V_4=\beta_0+\beta_1V_7+\beta_2V_9+\beta_3V_{10}+\varepsilon\] while Mallows’s \(C_p\) selects \[V_4=\beta_0+\beta_1V_5+\beta_2V_7+\beta_3V_8+\beta_4V_9+\beta_5V_{10}+\varepsilon.\] Thus,
mod.bic <- lm(V4~V7+V9+V10,data=train)
mod.cp <- lm(V4~V5+V7+V8+V9+V10,data=train)
prev <- data.frame(Y=test$V4,lin=predict(linear.model,newdata=test),BIC=predict(mod.bic,newdata=test),Cp=predict(mod.cp,newdata=test))
prev %>% summarize(Err_lin=mean((Y-lin)^2),Err_BIC=mean((Y-BIC)^2),Err_Cp=mean((Y-Cp)^2))
Cp has the smallest estimated quadratic error.
We consider the model \[Y=\beta_0+\beta_1X_1+\dots+\beta_pX_p+\varepsilon\] where \(X_1,\dots,X_p\) are independent variables with distribution \(\mathcal N(0,1)\) and \(\varepsilon\) has distribution \(\mathcal N(0,0.5^2)\) (\(X=(X_1,\dots,X_p)\) and \(\varepsilon\) are independent).
We assume that \(p=105\) and that
n <- 1000
p <- 105
set.seed(1234)
X.mat <- matrix(rnorm(n*p),ncol=p)
eps <- rnorm(n,mean = 0,sd=0.5)
df <- data.frame(X.mat,eps)
df <- df %>% mutate(Y=X1+X2+X3+X4+X5+eps) %>% select(-eps)
mod.full <- lm(Y~.,data=df)
summary(mod.full)
##
## Call:
## lm(formula = Y ~ ., data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.67736 -0.33472 0.00032 0.30319 1.45542
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0130727 0.0166020 -0.787 0.43124
## X1 0.9846185 0.0165621 59.450 < 2e-16 ***
## X2 0.9962524 0.0166838 59.714 < 2e-16 ***
## X3 1.0185854 0.0162804 62.565 < 2e-16 ***
## X4 1.0069154 0.0164305 61.283 < 2e-16 ***
## X5 1.0075293 0.0171871 58.621 < 2e-16 ***
## X6 0.0162413 0.0167867 0.968 0.33355
## X7 -0.0380549 0.0165760 -2.296 0.02192 *
## X8 -0.0138780 0.0163996 -0.846 0.39765
## X9 0.0078099 0.0168906 0.462 0.64392
## X10 -0.0169844 0.0172212 -0.986 0.32428
## X11 -0.0219250 0.0165394 -1.326 0.18530
## X12 0.0035766 0.0164080 0.218 0.82750
## X13 -0.0173715 0.0165630 -1.049 0.29455
## X14 0.0015961 0.0161666 0.099 0.92138
## X15 0.0030420 0.0165937 0.183 0.85459
## X16 0.0123968 0.0167407 0.741 0.45918
## X17 -0.0140972 0.0160033 -0.881 0.37861
## X18 0.0047780 0.0157696 0.303 0.76197
## X19 0.0089718 0.0160524 0.559 0.57636
## X20 0.0120790 0.0166445 0.726 0.46821
## X21 -0.0052803 0.0168198 -0.314 0.75364
## X22 0.0055084 0.0169444 0.325 0.74519
## X23 -0.0119207 0.0165276 -0.721 0.47094
## X24 -0.0279466 0.0168081 -1.663 0.09673 .
## X25 0.0067445 0.0161983 0.416 0.67724
## X26 0.0115625 0.0164784 0.702 0.48306
## X27 -0.0031926 0.0162123 -0.197 0.84393
## X28 -0.0158039 0.0167512 -0.943 0.34571
## X29 -0.0393262 0.0158546 -2.480 0.01331 *
## X30 -0.0131434 0.0171851 -0.765 0.44458
## X31 -0.0187328 0.0162188 -1.155 0.24840
## X32 -0.0442897 0.0164643 -2.690 0.00728 **
## X33 -0.0335800 0.0160712 -2.089 0.03695 *
## X34 0.0087326 0.0158260 0.552 0.58123
## X35 0.0089432 0.0169050 0.529 0.59692
## X36 0.0031839 0.0163345 0.195 0.84550
## X37 -0.0044967 0.0170532 -0.264 0.79208
## X38 -0.0108821 0.0158859 -0.685 0.49351
## X39 -0.0135691 0.0166509 -0.815 0.41534
## X40 -0.0060755 0.0165664 -0.367 0.71390
## X41 -0.0066924 0.0162678 -0.411 0.68089
## X42 -0.0117674 0.0155517 -0.757 0.44945
## X43 -0.0198761 0.0165470 -1.201 0.22999
## X44 0.0036242 0.0165999 0.218 0.82723
## X45 0.0105387 0.0161889 0.651 0.51522
## X46 -0.0404653 0.0165887 -2.439 0.01491 *
## X47 -0.0300850 0.0168022 -1.791 0.07371 .
## X48 0.0124983 0.0163363 0.765 0.44444
## X49 0.0044487 0.0166952 0.266 0.78994
## X50 0.0240242 0.0164747 1.458 0.14512
## X51 -0.0048970 0.0163609 -0.299 0.76477
## X52 0.0307387 0.0166920 1.842 0.06588 .
## X53 -0.0051005 0.0169236 -0.301 0.76319
## X54 0.0151990 0.0163935 0.927 0.35411
## X55 -0.0354083 0.0167166 -2.118 0.03444 *
## X56 -0.0222455 0.0160800 -1.383 0.16688
## X57 0.0014254 0.0168241 0.085 0.93250
## X58 0.0073491 0.0164866 0.446 0.65588
## X59 -0.0160027 0.0160585 -0.997 0.31926
## X60 0.0160967 0.0173806 0.926 0.35463
## X61 -0.0166780 0.0168870 -0.988 0.32360
## X62 0.0141593 0.0167828 0.844 0.39908
## X63 0.0002043 0.0164197 0.012 0.99008
## X64 0.0275355 0.0160570 1.715 0.08672 .
## X65 0.0048604 0.0163827 0.297 0.76678
## X66 0.0168259 0.0161561 1.041 0.29795
## X67 -0.0121348 0.0169109 -0.718 0.47321
## X68 -0.0003431 0.0161142 -0.021 0.98302
## X69 0.0419336 0.0160549 2.612 0.00916 **
## X70 0.0238172 0.0160990 1.479 0.13938
## X71 0.0119947 0.0168301 0.713 0.47622
## X72 0.0030503 0.0168465 0.181 0.85636
## X73 -0.0104551 0.0160765 -0.650 0.51564
## X74 -0.0410657 0.0171878 -2.389 0.01709 *
## X75 -0.0170613 0.0160844 -1.061 0.28910
## X76 0.0257386 0.0166885 1.542 0.12336
## X77 0.0101337 0.0168627 0.601 0.54802
## X78 -0.0168289 0.0163639 -1.028 0.30403
## X79 -0.0074790 0.0162544 -0.460 0.64554
## X80 0.0133683 0.0167342 0.799 0.42458
## X81 -0.0058840 0.0174241 -0.338 0.73568
## X82 0.0253755 0.0166387 1.525 0.12759
## X83 0.0288952 0.0164084 1.761 0.07858 .
## X84 -0.0296538 0.0161642 -1.835 0.06691 .
## X85 0.0122871 0.0168487 0.729 0.46603
## X86 0.0169482 0.0161737 1.048 0.29497
## X87 -0.0026240 0.0166393 -0.158 0.87473
## X88 -0.0172478 0.0170835 -1.010 0.31295
## X89 -0.0396334 0.0167748 -2.363 0.01836 *
## X90 -0.0181130 0.0160121 -1.131 0.25827
## X91 -0.0075442 0.0163882 -0.460 0.64538
## X92 -0.0238149 0.0162008 -1.470 0.14192
## X93 0.0270102 0.0166067 1.626 0.10420
## X94 -0.0104033 0.0158777 -0.655 0.51250
## X95 -0.0134528 0.0168760 -0.797 0.42557
## X96 -0.0209762 0.0164563 -1.275 0.20276
## X97 0.0199452 0.0160869 1.240 0.21536
## X98 0.0308105 0.0167190 1.843 0.06568 .
## X99 -0.0049707 0.0167238 -0.297 0.76636
## X100 0.0077319 0.0161819 0.478 0.63290
## X101 0.0116617 0.0166005 0.702 0.48255
## X102 0.0002618 0.0171176 0.015 0.98780
## X103 0.0199205 0.0167189 1.191 0.23378
## X104 0.0143133 0.0165195 0.866 0.38647
## X105 -0.0166893 0.0169788 -0.983 0.32590
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4932 on 894 degrees of freedom
## Multiple R-squared: 0.9592, Adjusted R-squared: 0.9544
## F-statistic: 199.9 on 105 and 894 DF, p-value: < 2.2e-16
To obtain the selective variables, we can look at the following graph
m.back1 <- regsubsets(Y~.,data=df,method="backward",nvmax=30)
plot(m.back1,scale="bic")
We can also do that automatically with
a <- summary(m.back1)
number <- order(a$bic)[1]
var.sel <- a$which[number,][-1]
var.sel1 <- names(var.sel)[var.sel] %>% paste(collapse="+")
form.back <- formula(paste("Y~",var.sel1,sep=""))
mod.back <- lm(form.back,data=df)
summary(mod.back)
##
## Call:
## lm(formula = form.back, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.63923 -0.34301 0.00179 0.32041 1.45661
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.002413 0.015749 -0.153 0.87828
## X1 0.992339 0.015807 62.777 < 2e-16 ***
## X2 0.991358 0.016097 61.588 < 2e-16 ***
## X3 1.010115 0.015562 64.907 < 2e-16 ***
## X4 1.006043 0.015830 63.552 < 2e-16 ***
## X5 1.008520 0.016242 62.093 < 2e-16 ***
## X29 -0.043358 0.015158 -2.860 0.00432 **
## X69 0.042714 0.015292 2.793 0.00532 **
## X74 -0.043792 0.016118 -2.717 0.00670 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4969 on 991 degrees of freedom
## Multiple R-squared: 0.954, Adjusted R-squared: 0.9537
## F-statistic: 2571 on 8 and 991 DF, p-value: < 2.2e-16
We have selected a model with 8 variables (the 5 relevant variables and 3 noise variables).
m <- 300
p <- 105
set.seed(12345)
X.mat <- matrix(rnorm(m*p),ncol=p)
eps <- rnorm(m,mean = 0,sd=0.5)
df.test <- data.frame(X.mat,eps)
df.test <- df.test %>% mutate(Y=X1+X2+X3+X4+X5+eps) %>% select(-eps)
p.full <- predict(mod.full,newdata=df.test)
p.step <- predict(mod.back,newdata=df.test)
pred.df <- data.frame(full=p.full,step=p.step,obs=df.test$Y)
pred.df %>% summarise_at(1:2,~mean((obs-.)^2))
We observe that the selected model has smaller MSE than the full model. We conclude that this model is better for this criterion. We have seen that it is due to the variance of least square estimates: when there are noisy variables in a model, variance of least square estimate increases. These estimates are thus less accurate. To circumvent this problem, we have to select variables (as we do here with the regsubset function) or to use regularized methods such as Lasso and Ridge.
We consider the same problem as in exercise 1. We propose to fit ridge ans lasso estimates and to compare these models with those of exercise 1.
glmnet function requires the \(X\) matrix and the \(Y\) vector as inputs (we can not use a formula). To compute the \(X\) matrix, we use the function model.matrix:
train.X <- model.matrix(V4~.,data=train)
test.X <- model.matrix(V4~.,data=test)
mod.R <- glmnet(train.X,train$V4,alpha=0)
mod.L <- glmnet(train.X,train$V4,alpha=1)
plot(mod.R)
plot(mod.L)
plot(mod.R,xvar="lambda")
plot(mod.L,xvar="lambda")
lassoCV <- cv.glmnet(train.X,train$V4,alpha=1)
plot(lassoCV)
lassoCV$lambda.min
## [1] 0.1356943
ridgeCV <- cv.glmnet(train.X,train$V4,alpha=0)
plot(ridgeCV)
ridgeCV$lambda
## [1] 6153.5728042 5606.9063567 5108.8042497 4654.9521610 4241.4190410
## [6] 3864.6230635 3521.3006022 3208.4779621 2923.4456231 2663.7347716
## [11] 2427.0959162 2211.4794046 2015.0176696 1836.0090536 1672.9030696
## [16] 1524.2869718 1388.8735184 1265.4898231 1153.0671951 1050.6318836
## [21] 957.2966428 872.2530475 794.7644909 724.1598041 659.8274430
## [26] 601.2101916 547.8003352 499.1352633 454.7934622 414.3908645
## [31] 377.5775222 344.0345758 313.4714924 285.6235490 260.2495401
## [36] 237.1296883 216.0637403 196.8692332 179.3799131 163.4442960
## [41] 148.9243552 135.6943260 123.6396161 112.6558134 102.6477814
## [46] 93.5288354 85.2199915 77.6492823 70.7511341 64.4657983
## [51] 58.7388345 53.5206383 48.7660123 44.4337742 40.4864002
## [56] 36.8897000 33.6125207 30.6264770 27.9057052 25.4266393
## [61] 23.1678067 21.1096426 19.2343201 17.5255962 15.9686705
## [66] 14.5500578 13.2574708 12.0797136 11.0065852 10.0287905
## [71] 9.1378604 8.3260780 7.5864122 6.9124563 6.2983727
## [76] 5.7388426 5.2290197 4.7644880 4.3412240 3.9555616
## [81] 3.6041604 3.2839767 2.9922373 2.7264152 2.4842080
## [86] 2.2635178 2.0624331 1.8792122 1.7122682 1.5601550
## [91] 1.4215551 1.2952681 1.1802001 1.0753543 0.9798228
## [96] 0.8927781 0.8134661 0.7412000 0.6753539 0.6153573
Here the selected parameter correspond to the minimum value over the grid. We need to change the grid:
ridgeCV <- cv.glmnet(train.X,train$V4,alpha=0,lambda=exp(seq(-4,4,length=100)))
plot(ridgeCV)
It’s better!
ridgeCV$lambda.min
## [1] 0.9604013
prev1 <- prev %>% mutate(ridge=as.vector(predict(ridgeCV,newx=test.X,s="lambda.min")),lasso=as.vector(predict(lassoCV,newx=test.X,s="lambda.min")))
prev1 %>% summarize(Err_lin=mean((Y-lin)^2),Err_BIC=mean((Y-BIC)^2),Err_Cp=mean((Y-Cp)^2),Err_ridge=mean((Y-ridge)^2),Err_lasso=mean((Y-lasso)^2))
prev1 %>% gather(key="Method",value="Pred",-Y) %>% group_by(Method) %>%
summarize(Err=mean((Pred-Y)^2)) %>% arrange(Err)
Ridge and lasso are not interesting for this dataset. It is not surprising since we are not in high dimension (only 9 input variables).
We consider the spam dataset from the kernlab package:
data(spam)
We split the data into a training dataset of size 3000 and a test dataset of size 1601.
set.seed(1234)
perm <- sample(nrow(spam))
train <- spam[perm[1:3000],]
test <- spam[perm[3001:4601],]
train.X <- model.matrix(type~.,data=train)
test.X <- model.matrix(type~.,data=test)
full.logit <- glm(type~.,data=train,family="binomial")
mod.back <- step(full.logit,direction="backward",k=log(nrow(train)),trace=0)
set.seed(1234)
cv.lasso <- cv.glmnet(train.X,train[,58],family="binomial",alpha=1)
plot(cv.lasso)
set.seed(1234)
cv.ridge <- cv.glmnet(train.X,train[,58],family="binomial",alpha=0)
plot(cv.ridge)
We have to change the grid: default values are too large.
set.seed(1234)
cv.ridge <- cv.glmnet(train.X,train[,58],family="binomial",alpha=0,lambda=exp(seq(-15,-2,length=100)))
plot(cv.ridge)
set.seed(1234)
ctrl1 <- trainControl(method="cv",number=10)
grid.k <- data.frame(k=seq(1,20,by=1))
registerDoMC(cores = 5)
sel.k <- train(type~.,data=train,method="knn",trControl=ctrl1,tuneGrid=grid.k)
plot(sel.k)
We choose the 1-NN rule.
prev.full <- predict(full.logit,newdata=test,type="response") %>% round() %>% as.factor()
prev.full <- fct_recode(prev.full,nonspam="0",spam="1")
prev.back <- predict(mod.back,newdata=test,type="response") %>% round() %>% as.factor()
prev.back <- fct_recode(prev.back,nonspam="0",spam="1")
prev.lasso <- predict(cv.lasso,newx=test.X,type="class",s="lambda.min")
prev.ridge <- predict(cv.ridge,newx=test.X,type="class",s="lambda.min")
prev.knn <- knn(train.X,test.X,train$type,k=1)
prev <- data.frame(full=prev.full,back=prev.back,lasso=as.vector(prev.lasso),ridge=as.vector(prev.ridge),knn=prev.knn,Y=test$type)
prev %>% summarise_at(vars(1:5),~(mean((.!=Y)^2)))
prev.full <- predict(full.logit,newdata=test,type="response")
prev.back <- predict(mod.back,newdata=test,type="response")
prev.lasso <- predict(cv.lasso,newx=test.X,type="response") %>% as.vector()
prev.ridge <- predict(cv.ridge,newx=test.X,type="response") %>% as.vector()
prev.knn <- knn(train.X,test.X,train$type,k=1,prob=TRUE)
score.knn <- attributes(prev.knn)$prob
score <- data.frame(full=prev.full,back=prev.back,lasso=prev.lasso,ridge=prev.ridge,knn=score.knn,Y=test$type)
score1 <- score %>% gather(key="Method",value="score",-Y)
score1 %>% group_by(Method) %>% summarize(AUC=pROC::auc(Y,score)) %>% arrange(desc(AUC))
ggplot(score1)+aes(d=Y,m=score,color=Method)+geom_roc()+theme_classic()