dat <- dna_sj_n dat$X1 <- as.numeric(dat$X1) # Make split index train_index <- sample(1:nrow(dat), nrow(dat)*0.75) # Full data set data_variables <- as.matrix(dat[,-1]) data_label <- dat[,"X1"] data = as.matrix(dat) data_matrix <- xgb.DMatrix(data, label = dat$X1) # split train data and make xgb.DMatrix train_data <- data_variables[train_index,] train_label <- data_label[train_index,] train_label$X1 <- as.numeric(train_label$X1) train_dat <- dat[train_index,] test_dat <- dat[-train_index,] train_matrix <- xgb.DMatrix(matrix(as.numeric(train_data),dim(train_data)), label = train_label$X1) # split test data and make xgb.DMatrix test_data <- data_variables[-train_index,] test_label <- data_label[-train_index,] test_label$X1 <- as.numeric(test_label$X1) test_matrix <- xgb.DMatrix(matrix(as.numeric(test_data),dim(test_data)), label = test_label$X1) numberOfClasses <- length(unique(dat$X1)) xgb_params <- list("objective" = "multi:softprob","eval_metric" = "rmse", "num_class" = numberOfClasses) nr <- 55 # number of XGBoost rounds cv.nfold <- 10 # Fit cv.nfold * cv.nround XGB models and save OOF predictions ptm <- proc.time() ptm <- proc.time() cvm0 <- xgb.cv(params = xgb_params,data = train_matrix, nrounds = nr, nfold = cv.nfold,verbose = FALSE,prediction = TRUE) proc.time() - ptm p0 <- data.frame(cvm0$pred) %>% mutate(max_prob = max.col(., ties.method = "last"), label = train_label + 1) confusionMatrix(table(as.vector(as.matrix(p0$label[1])), as.vector(as.matrix(p0$max_prob)))) ######################################### xgb_params <- list("objective" = "multi:softprob","eval_metric" = "mlogloss", "num_class" = numberOfClasses) nr <- 55 # number of XGBoost rounds cv.nfold <- 10 # Fit cv.nfold * cv.nround XGB models and save OOF predictions ptm <- proc.time() #cvm_best <- xgb.cv(params = xgb_params,data = train_matrix, nrounds = nr, nfold = cv.nfold,verbose = FALSE,prediction = TRUE, # max_depth = 6, eta = 0.2, min_child_weight=5.47,subsample=0.608, colsample_bytree=1, # gamma=0,seed=500) cvm_best <- xgb.cv(params = xgb_params,data = train_matrix, nrounds = nr, nfold = cv.nfold,verbose = FALSE,prediction = TRUE, max_depth = 6, eta = 0.2, min_child_weight=5,subsample=0.6, colsample_bytree=1, gamma=0,seed=88) proc.time() - ptm p_best <- data.frame(cvm_best$pred) %>% mutate(max_prob = max.col(., ties.method = "last"), label = train_label + 1) confusionMatrix(table(as.vector(as.matrix(p_best$label[1])), as.vector(as.matrix(p_best$max_prob))))