Using devices such as Jawbone Up, Nike FuelBand, and Fitbit it is now possible to collect a large amount of data about personal activity relatively inexpensively. These type of devices are part of the quantified self movement - a group of enthusiasts who take measurements about themselves regularly to improve their health, to find patterns in their behavior, or because they are tech geeks. One thing that people regularly do is quantify how much of a particular activity they do, but they rarely quantify how well they do it. In this project, your goal will be to use data from accelerometers on the belt, forearm, arm, and dumbell of 6 participants. They were asked to perform barbell lifts correctly and incorrectly in 5 different ways. More information is available from the website here: http://groupware.les.inf.puc-rio.br/har (see the section on the Weight Lifting Exercise Dataset).
library(dplyr) library(xgboost) library(caret) library(Ckmeans.1d.dp) library(corrplot)
train <- read.csv("pml-training.csv",header = TRUE,sep=',',stringsAsFactors = FALSE) test <- read.csv("pml-testing.csv",header = TRUE,sep=',',stringsAsFactors = FALSE)
#Dimensions of data dim(train) dim(test) #Structure of data str(train) str(test) # target outcome outcome = as.factor(train[, "classe"]) levels(outcome) # convert character levels to numeric # convert character levels to numeric num.class = length(levels(outcome)) levels(outcome) = 1:num.class head(outcome) # remove outcome from train # filter columns on: belt, forearm, arm, dumbell filter = grepl("belt|arm|dumbell", names(train)) train = train[, filter] test = test[, filter] train$classe <- NULL names(train) [1] "roll_belt" "pitch_belt" "yaw_belt" "total_accel_belt" [5] "kurtosis_roll_belt" "kurtosis_picth_belt" "kurtosis_yaw_belt" "skewness_roll_belt" [9] "skewness_roll_belt.1" "skewness_yaw_belt" "max_roll_belt" "max_picth_belt" [13] "max_yaw_belt" "min_roll_belt" "min_pitch_belt" "min_yaw_belt" [17] "amplitude_roll_belt" "amplitude_pitch_belt" "amplitude_yaw_belt" "var_total_accel_belt" [21] "avg_roll_belt" "stddev_roll_belt" "var_roll_belt" "avg_pitch_belt" [25] "stddev_pitch_belt" "var_pitch_belt" "avg_yaw_belt" "stddev_yaw_belt" [29] "var_yaw_belt" "gyros_belt_x" "gyros_belt_y" "gyros_belt_z" [33] "accel_belt_x" "accel_belt_y" "accel_belt_z" "magnet_belt_x" [37] "magnet_belt_y" "magnet_belt_z" "roll_arm" "pitch_arm" [41] "yaw_arm" "total_accel_arm" "var_accel_arm" "avg_roll_arm" [45] "stddev_roll_arm" "var_roll_arm" "avg_pitch_arm" "stddev_pitch_arm" [49] "var_pitch_arm" "avg_yaw_arm" "stddev_yaw_arm" "var_yaw_arm" [53] "gyros_arm_x" "gyros_arm_y" "gyros_arm_z" "accel_arm_x" [57] "accel_arm_y" "accel_arm_z" "magnet_arm_x" "magnet_arm_y" [61] "magnet_arm_z" "kurtosis_roll_arm" "kurtosis_picth_arm" "kurtosis_yaw_arm" [65] "skewness_roll_arm" "skewness_pitch_arm" "skewness_yaw_arm" "max_roll_arm" [69] "max_picth_arm" "max_yaw_arm" "min_roll_arm" "min_pitch_arm" [73] "min_yaw_arm" "amplitude_roll_arm" "amplitude_pitch_arm" "amplitude_yaw_arm" [77] "roll_forearm" "pitch_forearm" "yaw_forearm" "kurtosis_roll_forearm" [81] "kurtosis_picth_forearm" "kurtosis_yaw_forearm" "skewness_roll_forearm" "skewness_pitch_forearm" [85] "skewness_yaw_forearm" "max_roll_forearm" "max_picth_forearm" "max_yaw_forearm" [89] "min_roll_forearm" "min_pitch_forearm" "min_yaw_forearm" "amplitude_roll_forearm" [93] "amplitude_pitch_forearm" "amplitude_yaw_forearm" "total_accel_forearm" "var_accel_forearm" [97] "avg_roll_forearm" "stddev_roll_forearm" "var_roll_forearm" "avg_pitch_forearm" [101] "stddev_pitch_forearm" "var_pitch_forearm" "avg_yaw_forearm" "stddev_yaw_forearm" [105] "var_yaw_forearm" "gyros_forearm_x" "gyros_forearm_y" "gyros_forearm_z" [109] "accel_forearm_x" "accel_forearm_y" "accel_forearm_z" "magnet_forearm_x" [113] "magnet_forearm_y" "magnet_forearm_z" ######################################################## #Low variance filtering - default variance cutof : 0.05 ######################################################## nsv <- nearZeroVar(train,saveMetrics = TRUE,names = TRUE) columnsToFilter <- nsv[nsv$zeroVar == TRUE,] lowvarianceFilter <- row.names(columnsToFilter) print(lowvarianceFilter) #empty:every column has a variance above threshold ############################ # Missing Value ############################ train[is.na(train)] <- -1 test[is.na(test)] <- -1
I have used XGBOOST package to build prediction model for this project
# convert data to matrix train.matrix = as.matrix(train) mode(train.matrix) = "numeric" test.matrix = as.matrix(test) mode(test.matrix) = "numeric" # convert outcome from factor to numeric matrix # xgboost takes multi-labels in [0, numOfClass) y = as.matrix(as.integer(outcome)-1) # xgboost parameters param <- list("objective" = "multi:softprob", # multiclass classification "num_class" = num.class, # number of classes "eval_metric" = "merror", # evaluation metric "nthread" = 8, # number of threads to be used "max_depth" = 16, # maximum depth of tree "eta" = 0.3, # step size shrinkage "gamma" = 0, # minimum loss reduction "subsample" = 1, # part of data instances to grow tree "colsample_bytree" = 1, # subsample ratio of columns when constructing each tree "min_child_weight" = 12 # minimum sum of instance weight needed in a child )
# set random seed, for reproducibility set.seed(1234) # k-fold cross validation, with timing nround.cv = 150 bst.cv <- xgb.cv(param=param, data=train.matrix, label=y,nfold=4, nrounds=nround.cv, prediction=TRUE, verbose=TRUE,missing = NaN) [0] train-merror:0.062821+0.002672 test-merror:0.089848+0.006229 [1] train-merror:0.034485+0.001884 test-merror:0.060748+0.001447 [2] train-merror:0.024530+0.002142 test-merror:0.049791+0.004196 [3] train-merror:0.018652+0.001352 test-merror:0.040669+0.002216 [4] train-merror:0.013879+0.000187 test-merror:0.034757+0.003199 [5] train-merror:0.011229+0.000848 test-merror:0.029864+0.002982 [6] train-merror:0.008986+0.000522 test-merror:0.027266+0.003491 [7] train-merror:0.007713+0.000583 test-merror:0.025838+0.002934 [8] train-merror:0.006099+0.000988 test-merror:0.023698+0.003878 [9] train-merror:0.004842+0.000378 test-merror:0.021507+0.003711 [10] train-merror:0.004077+0.000563 test-merror:0.020284+0.004273 [11] train-merror:0.003160+0.000251 test-merror:0.018908+0.003604 [12] train-merror:0.002446+0.000229 test-merror:0.016767+0.002478 [13] train-merror:0.001937+0.000039 test-merror:0.015442+0.001902 [14] train-merror:0.001631+0.000147 test-merror:0.014729+0.002298 [15] train-merror:0.001342+0.000116 test-merror:0.013658+0.001364 [16] train-merror:0.001104+0.000086 test-merror:0.012486+0.001262 [17] train-merror:0.000968+0.000116 test-merror:0.012231+0.000577 [18] train-merror:0.000849+0.000068 test-merror:0.011824+0.000726 [19] train-merror:0.000730+0.000140 test-merror:0.011263+0.000103 [20] train-merror:0.000561+0.000102 test-merror:0.010753+0.000420 [21] train-merror:0.000527+0.000116 test-merror:0.010448+0.001032 [22] train-merror:0.000425+0.000086 test-merror:0.010499+0.000634 [23] train-merror:0.000374+0.000130 test-merror:0.010244+0.000933 [24] train-merror:0.000306+0.000118 test-merror:0.009581+0.001268 [25] train-merror:0.000306+0.000130 test-merror:0.009581+0.001013 [26] train-merror:0.000272+0.000147 test-merror:0.009276+0.001020 [27] train-merror:0.000187+0.000116 test-merror:0.008715+0.001059 [28] train-merror:0.000187+0.000116 test-merror:0.008868+0.001073 [29] train-merror:0.000153+0.000086 test-merror:0.008664+0.001034 [30] train-merror:0.000119+0.000065 test-merror:0.008460+0.001184 [31] train-merror:0.000136+0.000079 test-merror:0.008460+0.000950 [32] train-merror:0.000068+0.000056 test-merror:0.008307+0.000991 [33] train-merror:0.000034+0.000039 test-merror:0.008409+0.001110 [34] train-merror:0.000017+0.000034 test-merror:0.008256+0.000790 [35] train-merror:0.000017+0.000034 test-merror:0.008154+0.001142 [36] train-merror:0.000017+0.000034 test-merror:0.008307+0.000991 [37] train-merror:0.000000+0.000000 test-merror:0.008358+0.000865 [38] train-merror:0.000000+0.000000 test-merror:0.008409+0.000586 [39] train-merror:0.000000+0.000000 test-merror:0.008358+0.000600 [40] train-merror:0.000000+0.000000 test-merror:0.008205+0.000536 [41] train-merror:0.000000+0.000000 test-merror:0.008052+0.000790 [42] train-merror:0.000000+0.000000 test-merror:0.008001+0.000805 [43] train-merror:0.000000+0.000000 test-merror:0.007900+0.000674 [44] train-merror:0.000000+0.000000 test-merror:0.007746+0.001026 [45] train-merror:0.000000+0.000000 test-merror:0.007899+0.000787 [46] train-merror:0.000000+0.000000 test-merror:0.007848+0.000904 [47] train-merror:0.000000+0.000000 test-merror:0.007543+0.000865 [48] train-merror:0.000000+0.000000 test-merror:0.007746+0.000865 [49] train-merror:0.000000+0.000000 test-merror:0.007593+0.001109 [50] train-merror:0.000000+0.000000 test-merror:0.007593+0.001134 [51] train-merror:0.000000+0.000000 test-merror:0.007491+0.001109 [52] train-merror:0.000000+0.000000 test-merror:0.007491+0.001146 [53] train-merror:0.000000+0.000000 test-merror:0.007645+0.000919 [54] train-merror:0.000000+0.000000 test-merror:0.007543+0.000763 [55] train-merror:0.000000+0.000000 test-merror:0.007492+0.000733 [56] train-merror:0.000000+0.000000 test-merror:0.007339+0.000441 [57] train-merror:0.000000+0.000000 test-merror:0.007237+0.000513 [58] train-merror:0.000000+0.000000 test-merror:0.007288+0.000609 [59] train-merror:0.000000+0.000000 test-merror:0.007339+0.000666 [60] train-merror:0.000000+0.000000 test-merror:0.007237+0.000697 [61] train-merror:0.000000+0.000000 test-merror:0.007288+0.000674 [62] train-merror:0.000000+0.000000 test-merror:0.006982+0.000714 [63] train-merror:0.000000+0.000000 test-merror:0.007084+0.000788 [64] train-merror:0.000000+0.000000 test-merror:0.007186+0.000770 [65] train-merror:0.000000+0.000000 test-merror:0.007135+0.000781 [66] train-merror:0.000000+0.000000 test-merror:0.007186+0.000733 [67] train-merror:0.000000+0.000000 test-merror:0.007135+0.000781 [68] train-merror:0.000000+0.000000 test-merror:0.007084+0.000562 [69] train-merror:0.000000+0.000000 test-merror:0.006982+0.000674 [70] train-merror:0.000000+0.000000 test-merror:0.006829+0.000716 [71] train-merror:0.000000+0.000000 test-merror:0.006778+0.000787 [72] train-merror:0.000000+0.000000 test-merror:0.006727+0.000645 [73] train-merror:0.000000+0.000000 test-merror:0.006676+0.000887 [74] train-merror:0.000000+0.000000 test-merror:0.006676+0.000653 [75] train-merror:0.000000+0.000000 test-merror:0.006676+0.000902 [76] train-merror:0.000000+0.000000 test-merror:0.006880+0.000822 [77] train-merror:0.000000+0.000000 test-merror:0.006727+0.000832 [78] train-merror:0.000000+0.000000 test-merror:0.006727+0.001092 [79] train-merror:0.000000+0.000000 test-merror:0.006523+0.000832 [80] train-merror:0.000000+0.000000 test-merror:0.006575+0.000838 [81] train-merror:0.000000+0.000000 test-merror:0.006574+0.000713 [82] train-merror:0.000000+0.000000 test-merror:0.006574+0.000713 [83] train-merror:0.000000+0.000000 test-merror:0.006574+0.000694 [84] train-merror:0.000000+0.000000 test-merror:0.006523+0.000600 [85] train-merror:0.000000+0.000000 test-merror:0.006625+0.000857 [86] train-merror:0.000000+0.000000 test-merror:0.006421+0.000841 [87] train-merror:0.000000+0.000000 test-merror:0.006625+0.000677 [88] train-merror:0.000000+0.000000 test-merror:0.006472+0.000733 [89] train-merror:0.000000+0.000000 test-merror:0.006523+0.000726 [90] train-merror:0.000000+0.000000 test-merror:0.006268+0.000674 [91] train-merror:0.000000+0.000000 test-merror:0.006371+0.000770 [92] train-merror:0.000000+0.000000 test-merror:0.006371+0.000839 [93] train-merror:0.000000+0.000000 test-merror:0.006370+0.000631 [94] train-merror:0.000000+0.000000 test-merror:0.006370+0.000714 [95] train-merror:0.000000+0.000000 test-merror:0.006370+0.000632 [96] train-merror:0.000000+0.000000 test-merror:0.006370+0.000482 [97] train-merror:0.000000+0.000000 test-merror:0.006268+0.000674 [98] train-merror:0.000000+0.000000 test-merror:0.006217+0.000676 [99] train-merror:0.000000+0.000000 test-merror:0.006217+0.000540 [100] train-merror:0.000000+0.000000 test-merror:0.006319+0.000500 [101] train-merror:0.000000+0.000000 test-merror:0.006319+0.000500 [102] train-merror:0.000000+0.000000 test-merror:0.006319+0.000645 [103] train-merror:0.000000+0.000000 test-merror:0.006370+0.000632 [104] train-merror:0.000000+0.000000 test-merror:0.006268+0.000674 [105] train-merror:0.000000+0.000000 test-merror:0.006268+0.000674 [106] train-merror:0.000000+0.000000 test-merror:0.006370+0.000482 [107] train-merror:0.000000+0.000000 test-merror:0.006319+0.000471 [108] train-merror:0.000000+0.000000 test-merror:0.006268+0.000536 [109] train-merror:0.000000+0.000000 test-merror:0.006268+0.000586 [110] train-merror:0.000000+0.000000 test-merror:0.006217+0.000540 [111] train-merror:0.000000+0.000000 test-merror:0.006116+0.000644 [112] train-merror:0.000000+0.000000 test-merror:0.006268+0.000536 [113] train-merror:0.000000+0.000000 test-merror:0.006319+0.000372 [114] train-merror:0.000000+0.000000 test-merror:0.006268+0.000452 [115] train-merror:0.000000+0.000000 test-merror:0.006319+0.000372 [116] train-merror:0.000000+0.000000 test-merror:0.006319+0.000441 [117] train-merror:0.000000+0.000000 test-merror:0.006268+0.000536 [118] train-merror:0.000000+0.000000 test-merror:0.006217+0.000634 [119] train-merror:0.000000+0.000000 test-merror:0.006167+0.000609 [120] train-merror:0.000000+0.000000 test-merror:0.006116+0.000706 [121] train-merror:0.000000+0.000000 test-merror:0.006268+0.000536 [122] train-merror:0.000000+0.000000 test-merror:0.006217+0.000634 [123] train-merror:0.000000+0.000000 test-merror:0.006217+0.000513 [124] train-merror:0.000000+0.000000 test-merror:0.006217+0.000634 [125] train-merror:0.000000+0.000000 test-merror:0.006268+0.000805 [126] train-merror:0.000000+0.000000 test-merror:0.006217+0.000634 [127] train-merror:0.000000+0.000000 test-merror:0.006167+0.000631 [128] train-merror:0.000000+0.000000 test-merror:0.006217+0.000634 [129] train-merror:0.000000+0.000000 test-merror:0.006218+0.000634 [130] train-merror:0.000000+0.000000 test-merror:0.006421+0.000486 [131] train-merror:0.000000+0.000000 test-merror:0.006268+0.000674 [132] train-merror:0.000000+0.000000 test-merror:0.006268+0.000536 [133] train-merror:0.000000+0.000000 test-merror:0.006217+0.000540 [134] train-merror:0.000000+0.000000 test-merror:0.006217+0.000486 [135] train-merror:0.000000+0.000000 test-merror:0.006217+0.000540 [136] train-merror:0.000000+0.000000 test-merror:0.006167+0.000482 [137] train-merror:0.000000+0.000000 test-merror:0.006268+0.000609 [138] train-merror:0.000000+0.000000 test-merror:0.006167+0.000510 [139] train-merror:0.000000+0.000000 test-merror:0.006167+0.000510 [140] train-merror:0.000000+0.000000 test-merror:0.006167+0.000510 [141] train-merror:0.000000+0.000000 test-merror:0.006116+0.000441 [142] train-merror:0.000000+0.000000 test-merror:0.006167+0.000510 [143] train-merror:0.000000+0.000000 test-merror:0.006116+0.000334 [144] train-merror:0.000000+0.000000 test-merror:0.006167+0.000349 [145] train-merror:0.000000+0.000000 test-merror:0.006116+0.000441 [146] train-merror:0.000000+0.000000 test-merror:0.006116+0.000552 [147] train-merror:0.000000+0.000000 test-merror:0.006116+0.000552 [148] train-merror:0.000000+0.000000 test-merror:0.006065+0.000653 [149] train-merror:0.000000+0.000000 test-merror:0.006065+0.000653 tail(bst.cv$dt) train.merror.mean train.merror.std test.merror.mean test.merror.std 1: 0 0 0.006167 0.000349 2: 0 0 0.006116 0.000441 3: 0 0 0.006116 0.000552 4: 0 0 0.006116 0.000552 5: 0 0 0.006065 0.000653 6: 0 0 0.006065 0.000653 # index of minimum merror min.merror.idx = which.min(bst.cv$dt[, test.merror.mean]) min.merror.idx [1] 149 # minimum merror bst.cv$dt[min.merror.idx,] train.merror.mean train.merror.std test.merror.mean test.merror.std 1: 0 0 0.006065 0.000653 # get CV's prediction decoding pred.cv = matrix(bst.cv$pred, nrow=length(bst.cv$pred)/num.class, ncol=num.class) pred.cv = max.col(pred.cv, "last") # confusion matrix confusionMatrix(factor(y+1), factor(pred.cv)) Confusion Matrix and Statistics Reference Prediction 1 2 3 4 5 1 5568 10 2 0 0 2 16 3766 15 0 0 3 0 24 3378 19 1 4 0 1 16 3196 3 5 0 1 3 8 3595 Overall Statistics Accuracy : 0.9939 95% CI : (0.9927, 0.995) No Information Rate : 0.2846 P-Value [Acc > NIR] : < 2.2e-16 Kappa : 0.9923 Mcnemar's Test P-Value : NA
# real model fit training, with full data system.time( bst <- xgboost(param=param, data=train.matrix, label=y, nrounds=min.merror.idx, verbose=1,missing = NaN) )
[0] train-merror:0.048721 [1] train-merror:0.024564 [2] train-merror:0.017124 [3] train-merror:0.013352 [4] train-merror:0.010346 [5] train-merror:0.007746 [6] train-merror:0.006523 [7] train-merror:0.004842 [8] train-merror:0.003822 [9] train-merror:0.003058 [10] train-merror:0.002650 [11] train-merror:0.002344 [12] train-merror:0.001835 [13] train-merror:0.001478 [14] train-merror:0.000866 [15] train-merror:0.000866 [16] train-merror:0.000663 [17] train-merror:0.000561 [18] train-merror:0.000561 [19] train-merror:0.000408 [20] train-merror:0.000255 [21] train-merror:0.000204 [22] train-merror:0.000204 [23] train-merror:0.000204 [24] train-merror:0.000153 [25] train-merror:0.000153 [26] train-merror:0.000153 [27] train-merror:0.000153 [28] train-merror:0.000153 [29] train-merror:0.000153 [30] train-merror:0.000102 [31] train-merror:0.000051 [32] train-merror:0.000051 [33] train-merror:0.000051 [34] train-merror:0.000051 [35] train-merror:0.000051 [36] train-merror:0.000051 [37] train-merror:0.000051 [38] train-merror:0.000051 [39] train-merror:0.000000 [40] train-merror:0.000000 [41] train-merror:0.000000 [42] train-merror:0.000000 [43] train-merror:0.000000 [44] train-merror:0.000000 [45] train-merror:0.000000 [46] train-merror:0.000000 [47] train-merror:0.000000 [48] train-merror:0.000000 [49] train-merror:0.000000 [50] train-merror:0.000000 [51] train-merror:0.000000 [52] train-merror:0.000000 [53] train-merror:0.000000 [54] train-merror:0.000000 [55] train-merror:0.000000 [56] train-merror:0.000000 [57] train-merror:0.000000 [58] train-merror:0.000000 [59] train-merror:0.000000 [60] train-merror:0.000000 [61] train-merror:0.000000 [62] train-merror:0.000000 [63] train-merror:0.000000 [64] train-merror:0.000000 [65] train-merror:0.000000 [66] train-merror:0.000000 [67] train-merror:0.000000 [68] train-merror:0.000000 [69] train-merror:0.000000 [70] train-merror:0.000000 [71] train-merror:0.000000 [72] train-merror:0.000000 [73] train-merror:0.000000 [74] train-merror:0.000000 [75] train-merror:0.000000 [76] train-merror:0.000000 [77] train-merror:0.000000 [78] train-merror:0.000000 [79] train-merror:0.000000 [80] train-merror:0.000000 [81] train-merror:0.000000 [82] train-merror:0.000000 [83] train-merror:0.000000 [84] train-merror:0.000000 [85] train-merror:0.000000 [86] train-merror:0.000000 [87] train-merror:0.000000 [88] train-merror:0.000000 [89] train-merror:0.000000 [90] train-merror:0.000000 [91] train-merror:0.000000 [92] train-merror:0.000000 [93] train-merror:0.000000 [94] train-merror:0.000000 [95] train-merror:0.000000 [96] train-merror:0.000000 [97] train-merror:0.000000 [98] train-merror:0.000000 [99] train-merror:0.000000 [100] train-merror:0.000000 [101] train-merror:0.000000 [102] train-merror:0.000000 [103] train-merror:0.000000 [104] train-merror:0.000000 [105] train-merror:0.000000 [106] train-merror:0.000000 [107] train-merror:0.000000 [108] train-merror:0.000000 [109] train-merror:0.000000 [110] train-merror:0.000000 [111] train-merror:0.000000 [112] train-merror:0.000000 [113] train-merror:0.000000 [114] train-merror:0.000000 [115] train-merror:0.000000 [116] train-merror:0.000000 [117] train-merror:0.000000 [118] train-merror:0.000000 [119] train-merror:0.000000 [120] train-merror:0.000000 [121] train-merror:0.000000 [122] train-merror:0.000000 [123] train-merror:0.000000 [124] train-merror:0.000000 [125] train-merror:0.000000 [126] train-merror:0.000000 [127] train-merror:0.000000 [128] train-merror:0.000000 [129] train-merror:0.000000 [130] train-merror:0.000000 [131] train-merror:0.000000 [132] train-merror:0.000000 [133] train-merror:0.000000 [134] train-merror:0.000000 [135] train-merror:0.000000 [136] train-merror:0.000000 [137] train-merror:0.000000 [138] train-merror:0.000000 [139] train-merror:0.000000 [140] train-merror:0.000000 [141] train-merror:0.000000 [142] train-merror:0.000000 [143] train-merror:0.000000 [144] train-merror:0.000000 [145] train-merror:0.000000 [146] train-merror:0.000000 [147] train-merror:0.000000 [148] train-merror:0.000000 user system elapsed 198.51 8.15 64.87
# xgboost predict test data using the trained model pred <- predict(bst, test.matrix,missing = NaN) head(pred, 10) # decode prediction pred = matrix(pred, nrow=num.class, ncol=length(pred)/num.class) pred = t(pred) pred = max.col(pred, "last") pred.char = toupper(letters[pred])
#Feature Importance # get the trained model model = xgb.dump(bst, with.stats=TRUE) # get the feature real names names = dimnames(train.matrix)[[2]] # compute feature importance matrix importance_matrix = xgb.importance(names, model=bst) # plot gp = xgb.plot.importance(importance_matrix) print(gp)
write.csv(pred.char, "answers.csv",quote = FALSE)