iris
datalibrary(xgboost)
library(Matrix)
library(rBayesianOptimization)
odd.n <- 2*(1:75)-1
iris_train <- iris[odd.n, ] # odd numbered rows for training data
iris_test <- iris[-odd.n, ] # even numbered rows for test data
# resahpe data in order to deal with {xgboost} package
train.mx <- sparse.model.matrix(Species ~., iris_train)
test.mx <- sparse.model.matrix(Species ~ ., iris_test)
dtrain <- xgb.DMatrix(train.mx, label = as.integer(iris_train$Species) - 1)
dtest <- xgb.DMatrix(test.mx, label = as.integer(iris_test$Species) - 1)
# make a function to maximize
xgb_holdout <- function(ex, mx, nr){
model <- xgb.train(params=list(objective = "multi:softmax", num_class = 10, eval_metric = "mlogloss",
eta = ex, max_depth = mx),
data = dtrain, nrounds = nr)
t.pred <- predict(model, newdata = dtest)
Pred <- sum(diag(table(test$label, t.pred)))/nrow(test)
list(Score = Pred, Pred = Pred)
}
# Bayesian Optimization
opt_xgb <- BayesianOptimization(xgb_holdout,
bounds=list(ex = c(2L,5L), mx = c(4L,5L), nr = c(70L,160L)),
init_points = 20, n_iter = 1, acq = 'ei', kappa = 2.576,
eps = 0.0, verbose = TRUE)
I want to make code shorter!
“No packages? OK, develop it!!!”
devtools::install_github("ymattu/MlBayesOpt")
library(MlBayesOpt)
set.seed(123)
res <- svm_opt(
train_data = iris_train,
train_label = iris_train$Species,
test_data = iris_test,
test_label = iris_test$Species,
acq = "ucb"
)
elapsed = 0.00 Round = 1 gamma_opt = 6.e+04 cost_opt = 42.9050 Value = 0.3333
elapsed = 0.01 Round = 2 gamma_opt = 6.e+04 cost_opt = 12.0327 Value = 0.3333
elapsed = 0.00 Round = 3 gamma_opt = 7.e+04 cost_opt = 92.1573 Value = 0.3333
elapsed = 0.01 Round = 4 gamma_opt = 9.e+04 cost_opt = 18.3716 Value = 0.3333
elapsed = 0.01 Round = 5 gamma_opt = 8.e+04 cost_opt = 56.2588 Value = 0.3333
# [...]
elapsed = 0.00 Round = 19 gamma_opt = 2453.1625 cost_opt = 84.8863 Value = 0.3733
elapsed = 0.00 Round = 20 gamma_opt = 1.e+05 cost_opt = 62.2435 Value = 0.3333
elapsed = 0.01 Round = 21 gamma_opt = 1.e+04 cost_opt = 23.6688 Value = 0.5867
Best Parameters Found:
Round = 15 gamma_opt = 1.e+04 cost_opt = 79.5983 Value = 0.6133
set.seed(123)
mod <- rf_opt(
train_data = iris_train,
train_label = iris_train$Species,
test_data = iris_test,
test_label = iris_test$Species,
mtry_range = c(1L, 4L)
)
elapsed = 0.01 Round = 1 num_trees_opt = 288.0000 mtry_opt = 4.0000 Value = 1.0000
elapsed = 0.03 Round = 2 num_trees_opt = 789.0000 mtry_opt = 3.0000 Value = 1.0000
elapsed = 0.02 Round = 3 num_trees_opt = 410.0000 mtry_opt = 3.0000 Value = 1.0000
elapsed = 0.04 Round = 4 num_trees_opt = 883.0000 mtry_opt = 4.0000 Value = 1.0000
elapsed = 0.03 Round = 5 num_trees_opt = 941.0000 mtry_opt = 3.0000 Value = 1.0000
# [...]
elapsed = 0.01 Round = 19 num_trees_opt = 329.0000 mtry_opt = 2.0000 Value = 1.0000
elapsed = 0.03 Round = 20 num_trees_opt = 955.0000 mtry_opt = 2.0000 Value = 1.0000
elapsed = 0.01 Round = 21 num_trees_opt = 101.0000 mtry_opt = 2.0000 Value = 1.0000
Best Parameters Found:
Round = 1 num_trees_opt = 288.0000 mtry_opt = 4.0000 Value = 1.0000
set.seed(71)
res1 <- xgb_opt(train_data = iris_train,
train_label = iris_train$Species,
test_data = iris_test,
test_label = iris_test$Species,
objectfun = "multi:softmax",
classes = 3,
evalmetric = "merror"
)
elapsed = 0.02 Round = 1 eta_opt = 0.8729 max_depth_opt = 6.0000 nrounds_opt = 123.8761 subsample_opt = 0.2789 bytree_opt = 0.5343 Value = 0.7467
elapsed = 0.02 Round = 2 eta_opt = 0.5779 max_depth_opt = 6.0000 nrounds_opt = 144.4570 subsample_opt = 0.4523 bytree_opt = 0.4854 Value = 0.6933
elapsed = 0.01 Round = 3 eta_opt = 0.3202 max_depth_opt = 6.0000 nrounds_opt = 88.6309 subsample_opt = 0.1219 bytree_opt = 0.4910 Value = 0.7467
elapsed = 0.01 Round = 4 eta_opt = 0.5614 max_depth_opt = 4.0000 nrounds_opt = 76.5790 subsample_opt = 0.3092 bytree_opt = 0.6768 Value = 0.9600
elapsed = 0.02 Round = 5 eta_opt = 0.3955 max_depth_opt = 6.0000 nrounds_opt = 157.8434 subsample_opt = 0.3799 bytree_opt = 0.9856 Value = 0.9867
# [...]
elapsed = 0.01 Round = 19 eta_opt = 0.6466 max_depth_opt = 5.0000 nrounds_opt = 82.4813 subsample_opt = 0.1647 bytree_opt = 0.6055 Value = 0.9733
elapsed = 0.02 Round = 20 eta_opt = 0.7080 max_depth_opt = 4.0000 nrounds_opt = 94.7421 subsample_opt = 0.7886 bytree_opt = 0.9739 Value = 0.9867
elapsed = 0.02 Round = 21 eta_opt = 0.1000 max_depth_opt = 6.0000 nrounds_opt = 93.1294 subsample_opt = 0.6490 bytree_opt = 0.9329 Value = 1.0000
Best Parameters Found:
Round = 10 eta_opt = 0.4986 max_depth_opt = 5.0000 nrounds_opt = 94.3087 subsample_opt = 0.9980 bytree_opt = 0.9219 Value = 1.0000
ex. SVM
res <- svm_opt(
# about dataset (at least required)
train_data = iris_train,
train_label = iris_train$Species,
test_data = iris_test,
test_label = iris_test$Species,
# about hyper parameters (optional, default is following)
gamma_range = c(10 ^ (-5), 10 ^ 5),
c_range = c(10 ^ (-2), 10 ^ 2),
# about bayesian optimization (optional, default is following)
init_points = 20,
n_iter = 1,
acq = "ei",
kappa = 2.576,
eps = 0.0,
kernel = list(type = "exponential", power = 2)
)
This slide is made from {revealjs} package.
This slide and Rmd file are published on Git Hub(https://github.com/ymattu/Global-TokyoR-2).