Machine Learning
|
Os dados são medidas de comprimento feitos em 100 folhas de 3 variedades de uva: malbec, marlot e souvignon blanc. Os dados foram fornecidos pelo pesquisador João Peterson Pereira Gardin. Os valores de área das folhas foram determinados por análise de imagem das folhas digitalizadas por scanner usando o pacote EBImage.
#-----------------------------------------------------------------------
# Pacotes.
library(lattice)
library(latticeExtra)
#-----------------------------------------------------------------------
# Dados hospedados na web.
url <- "http://www.leg.ufpr.br/~walmes/data/areafoliarUva.txt"
uva <- read.table(url, header = TRUE, sep = "\t",
stringsAsFactors = FALSE)
uva$cult <- factor(uva$cult)
uva$id <- NULL
# Comprimento da nervura lateral: média dos lados direito e esquerdo.
uva$nl <- with(uva, apply(cbind(nld, nle), 1, mean))
uva <- subset(uva, select = -c(nld, nle))
str(uva)
## 'data.frame': 300 obs. of 7 variables:
## $ cult: Factor w/ 3 levels "malbec","merlot",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ area: num 100.8 85.8 119.5 137 84.7 ...
## $ mc : num 12 11.5 12.5 15.5 10 12 15.5 17.5 13.5 13.3 ...
## $ nc : num 7.5 9 8.5 10 7 8.5 11 13 10 9.5 ...
## $ ml : num 12.8 10.5 13 14.4 11 12 14 14 12 15 ...
## $ cll : num 9.5 9.5 10.2 12 7.5 8.9 13.5 10.8 9.7 10.3 ...
## $ nl : num 6.95 7.75 8.8 9.5 6.75 ...
splom(uva[-(1:2)],
groups = uva$cult,
auto.key = TRUE,
as.matrix = TRUE,
cex = 0.2)
# IMPORTANT: padronizar as variáveis para reduzir problemas de escala.
str(uva)
## 'data.frame': 300 obs. of 7 variables:
## $ cult: Factor w/ 3 levels "malbec","merlot",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ area: num 100.8 85.8 119.5 137 84.7 ...
## $ mc : num 12 11.5 12.5 15.5 10 12 15.5 17.5 13.5 13.3 ...
## $ nc : num 7.5 9 8.5 10 7 8.5 11 13 10 9.5 ...
## $ ml : num 12.8 10.5 13 14.4 11 12 14 14 12 15 ...
## $ cll : num 9.5 9.5 10.2 12 7.5 8.9 13.5 10.8 9.7 10.3 ...
## $ nl : num 6.95 7.75 8.8 9.5 6.75 ...
uva$larea <- log10(uva$area)
uva$area <- NULL
uva[, -1] <- sapply(uva[, -1], FUN = scale)
str(uva)
## 'data.frame': 300 obs. of 7 variables:
## $ cult : Factor w/ 3 levels "malbec","merlot",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ mc : num -0.377 -0.519 -0.235 0.617 -0.945 ...
## $ nc : num -0.765 -0.191 -0.382 0.192 -0.956 ...
## $ ml : num 0.0462 -0.7287 0.1135 0.5852 -0.5603 ...
## $ cll : num -0.391 -0.391 -0.139 0.511 -1.113 ...
## $ nl : num -0.699 -0.3 0.225 0.574 -0.799 ...
## $ larea: num -0.109 -0.4 0.2 0.448 -0.425 ...
# Verifica se a média 0 e variância 1.
# apply(uva[, -1], 2, mean, na.rm = TRUE)
# apply(uva[, -1], 2, sd, na.rm = TRUE)
kernlab
library(kernlab)
# Chamada com apenas duas classes. Simplificar para aprender.
da <- uva
levels(da$cult) <- c("malbec-merlot", "malbec-merlot", "sauvignonblanc")
table(da$cult)
##
## malbec-merlot sauvignonblanc
## 200 100
m0 <- ksvm(cult ~ ., data = da)
m0
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 1
##
## Gaussian Radial Basis kernel function.
## Hyperparameter : sigma = 0.374508321409246
##
## Number of Support Vectors : 209
##
## Objective Function Value : -175.957
## Training error : 0.253333
# Classe e funções disponíveis.
class(m0)
## [1] "ksvm"
## attr(,"package")
## [1] "kernlab"
isS4(m0)
## [1] TRUE
methods(class = class(m0))
## [1] alphaindex alpha b coef cross error
## [7] fitted kcall kernelf kpar lev nSV
## [13] obj param plot predict prior prob.model
## [19] scaling show SVindex type xmatrix ymatrix
## see '?methods' for accessing help and source code
# Número de vetores de suporte.
nSV(m0)
## [1] 209
# Classficação nas observações de treino.
table(fitted(m0))
##
## malbec-merlot sauvignonblanc
## 240 60
# Desempenho na classificação.
abs(c("Acerto" = 1, "Erro" = 0) - error(m0))
## Acerto Erro
## 0.7466667 0.2533333
# ATTENTION: Cuidado que o modelo ingênuo irá acertar 1/2.
# Parâmetros do SVM.
param(m0)
## $C
## [1] 1
# Só funciona para classificações binárias com duas preditoras.
# plot(m0)
splom(~da[, -1] | da$cult,
groups = fitted(m0),
auto.key = list(title = "Classificação"))
# Matriz de confusão.
ct <- table(fitted(m0), da$cult)
prop.table(ct)
##
## malbec-merlot sauvignonblanc
## malbec-merlot 0.6066667 0.1933333
## sauvignonblanc 0.0600000 0.1400000
# Percentual de acerto na classificação (já visto antes).
100 * sum(diag(ct))/sum(ct)
## [1] 74.66667
#-----------------------------------------------------------------------
# Usando funções kernel e fazendo tunnings arbitrários.
# Agora é a classificação das 3 espécies de uva.
m0 <- ksvm(cult ~ .,
data = uva,
scaled = TRUE, # Padronizar com média 0 e variância 1.
type = "C-svc", # Emprego: classificação/regressão, etc.
C = 50, # Parâmetro de penalização.
kernel = "rbfdot", # Função kernel.
kpar = list(sigma = 0.1), # Parâmetros da função kernel.
cross = 1) # Quantidade de folds para validação cruzada.
##
## cross should be >1 no cross-validation done!
##
m0
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 50
##
## Gaussian Radial Basis kernel function.
## Hyperparameter : sigma = 0.1
##
## Number of Support Vectors : 230
##
## Objective Function Value : -4267.729 -5455.378 -3393.475
## Training error : 0.303333
# ATTENTION
# Matriz de confusão.
ct <- table(uva$cult, fitted(m0))
prop.table(ct)
##
## malbec merlot sauvignonblanc
## malbec 0.19666667 0.05333333 0.08333333
## merlot 0.04666667 0.24666667 0.04000000
## sauvignonblanc 0.06666667 0.01333333 0.25333333
# Gráfico de mosaico da matriz de confusão.
mosaicplot(ct,
color = brewer.pal(n = nlevels(uva$cult),
name = "Spectral"))
# Percentual de acerto na classificação.
100 * sum(diag(ct))/sum(ct)
## [1] 69.66667
1 - error(m0)
## [1] 0.6966667
#-----------------------------------------------------------------------
# Mais variações.
# Para diminuir o número de vetores de suporte.
m0 <- ksvm(cult ~ .,
data = uva,
scaled = TRUE,
type = "C-svc",
kernel = "rbfdot",
kpar = list(sigma = 0.01),
C = 1e5,
cross = 1)
##
## cross should be >1 no cross-validation done!
##
m0
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 1e+05
##
## Gaussian Radial Basis kernel function.
## Hyperparameter : sigma = 0.01
##
## Number of Support Vectors : 198
##
## Objective Function Value : -7051232 -8357558 -4886157
## Training error : 0.23
1 - error(m0)
## [1] 0.77
# Kernel linear (baunilha).
m0 <- ksvm(cult ~ .,
data = uva,
scaled = TRUE,
type = "nu-svc",
kernel = "vanilladot",
nu = 0.5,
cross = 1)
## Setting default kernel parameters
##
## cross should be >1 no cross-validation done!
##
m0
## Support Vector Machine object of class "ksvm"
##
## SV type: nu-svc (classification)
## parameter : nu = 0.5
##
## Linear (vanilla) kernel function.
##
## Number of Support Vectors : 225
##
## Objective Function Value : 42.5648 320.9918 16.3111
## Training error : 0.42
1 - error(m0)
## [1] 0.58
e1071
library(e1071)
# Especificação com fórmula.
m1 <- svm(cult ~ .,
data = uva)
summary(m1)
##
## Call:
## svm(formula = cult ~ ., data = uva)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
## gamma: 0.1666667
##
## Number of Support Vectors: 263
##
## ( 98 77 88 )
##
##
## Number of Classes: 3
##
## Levels:
## malbec merlot sauvignonblanc
# str(m1)
# Quantidade e coordenadas dos pontos de suporte.
m1$tot.nSV
## [1] 263
m1$nSV
## [1] 98 77 88
head(m1$SV)
## mc nc ml cll nl larea
## 1 -0.3771832 -0.7647780 0.04615744 -0.3910711 -0.6988537 -0.10946742
## 2 -0.5191593 -0.1908119 -0.72874853 -0.3910711 -0.2995563 -0.39990852
## 3 -0.2352071 -0.3821339 0.11354057 -0.1385344 0.2245216 0.20007331
## 4 0.6166496 0.1918322 0.58522247 0.5108456 0.5739069 0.44769609
## 5 -0.9450877 -0.9561001 -0.56029071 -1.1126045 -0.7986780 -0.42505850
## 6 -0.3771832 -0.3821339 -0.22337507 -0.6075311 0.1246973 0.03131424
#-----------------------------------------------------------------------
# Fazendo a tunagem com a grid search.
# tune() é uma função do e1071 que permite avaliar o modelo em vários
# valores para os hiperparâmetros.
# help(tune, h = "html")
# Configura.
x <- as.matrix(subset(uva, select = -cult))
y <- uva$cult
tune <- tune(method = svm,
train.x = x,
train.y = y,
kernel = "radial",
ranges = list(cost = 10^seq(-2, 5, l = 8),
gamma = 2^seq(-3, 3, l = 8)),
tunecontrol = tune.control(cross = 5))
print(tune)
##
## Parameter tuning of 'svm':
##
## - sampling method: 5-fold cross validation
##
## - best parameters:
## cost gamma
## 1000 0.125
##
## - best performance: 0.4033333
# ATTENTION: o acerto do modelo ingênuo é 1/3.
# A superfície do erro.
levelplot(error ~ log10(cost) + log2(gamma),
data = tune$performances,
contour = TRUE) +
layer(panel.abline(v = log10(cost), h = log2(gamma), lty = 2),
data = tune$best.model)
# Usando os valores otimizados na validação cruzada.
m2 <- svm(cult ~ .,
data = uva,
kernel = "radial",
cost = tune$best.model$cost,
gamma = tune$best.model$gamma)
summary(m2)
##
## Call:
## svm(formula = cult ~ ., data = uva, kernel = "radial", cost = tune$best.model$cost,
## gamma = tune$best.model$gamma)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1000
## gamma: 0.125
##
## Number of Support Vectors: 201
##
## ( 73 59 69 )
##
##
## Number of Classes: 3
##
## Levels:
## malbec merlot sauvignonblanc
yfit <- predict(m2)
ct <- table(yfit, y)
ct
## y
## yfit malbec merlot sauvignonblanc
## malbec 80 9 8
## merlot 8 86 3
## sauvignonblanc 12 5 89
sum(diag(ct))/sum(ct)
## [1] 0.85
O código abaixo foi aperfeiçoado do material http://rischanlab.github.io/SVM.html e trabalha os dados de especies de iris em iris
.
#-----------------------------------------------------------------------
# Usando com todas as variáveis e 3 classes de espécie e avaliando
# diferentes funções kernel.
# Nomes das funções kernel.
ker <- c("linear", "polynomial", "radial", "sigmoid")
# Ajusta com cada opção de kernel.
svm0 <- sapply(ker,
simplify = FALSE,
FUN = function(k) {
svm(Species ~ Petal.Length + Sepal.Length,
data = iris,
kernel = k)
})
# lapply(svm0, summary)
# Número de pontos de suporte.
colSums(sapply(svm0, getElement, "nSV"))
## linear polynomial radial sigmoid
## 49 60 57 64
# Tabelas de confusão.
lapply(svm0,
FUN = function(model) {
ct <- table(iris$Species, predict(model))
cat("Acertos:", 100 * sum(diag(ct))/sum(ct), "\n")
return(ct)
})
## Acertos: 97.33333
## Acertos: 90
## Acertos: 96
## Acertos: 72.66667
## $linear
##
## setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 47 3
## virginica 0 1 49
##
## $polynomial
##
## setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 50 0
## virginica 0 15 35
##
## $radial
##
## setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 48 2
## virginica 0 4 46
##
## $sigmoid
##
## setosa versicolor virginica
## setosa 50 0 0
## versicolor 4 23 23
## virginica 0 14 36
# Gerando um grid no retângulo que contém os pontos.
grid <- with(iris,
expand.grid(Sepal.Length = seq(min(Sepal.Length),
max(Sepal.Length),
l = 51),
Petal.Length = seq(min(Petal.Length),
max(Petal.Length),
l = 51)))
y <- lapply(svm0, FUN = predict, newdata = grid)
y <- lapply(y, as.data.frame)
names(y) <- ker
y <- plyr::ldply(y)
names(y) <- c("kernel", "y")
grid <- cbind(grid, y)
# str(grid)
# Exibindo a fronteira de classificação.
xyplot(Petal.Length ~ Sepal.Length | kernel,
data = grid,
groups = y,
pch = 3,
as.table = TRUE,
aspect = 1,
auto.key = list(columns = 3)) +
as.layer(xyplot(Petal.Length ~ Sepal.Length,
data = iris,
pch = 19,
groups = Species))
caret
Adaptação feita baseada no material http://dataaspirant.com/2017/01/19/support-vector-machine-classifier-implementation-r-caret-package/.
Modelos disponíveis no caret
: https://rdrr.io/cran/caret/man/models.html.
Modelos de máquinas de vetores de suporte: https://topepo.github.io/caret/train-models-by-tag.html#support-vector-machines.
library(caret)
packageVersion("caret")
## [1] '6.0.77'
# Modelos de SVM.
names(getModelInfo(model = "^svm"))
## [1] "svmBoundrangeString" "svmExpoString" "svmLinear"
## [4] "svmLinear2" "svmLinear3" "svmLinearWeights"
## [7] "svmLinearWeights2" "svmPoly" "svmRadial"
## [10] "svmRadialCost" "svmRadialSigma" "svmRadialWeights"
## [13] "svmSpectrumString"
# Definir a semente para haver reprodutibilidade.
set.seed(1234)
# Especificação da validação cruzada.
trctrl <- trainControl(method = "repeatedcv",
number = 5,
repeats = 3,
returnResamp = "all",
savePredictions = "all")
# Ajuste.
svm_Linear <- train(cult ~ .,
data = uva,
method = "svmLinear",
trControl = trctrl)
# Resultado do ajuste.
svm_Linear
## Support Vector Machines with Linear Kernel
##
## 300 samples
## 6 predictors
## 3 classes: 'malbec', 'merlot', 'sauvignonblanc'
##
## No pre-processing
## Resampling: Cross-Validated (5 fold, repeated 3 times)
## Summary of sample sizes: 240, 240, 240, 240, 240, 240, ...
## Resampling results:
##
## Accuracy Kappa
## 0.55 0.325
##
## Tuning parameter 'C' was held constant at a value of 1
# Estrutura.
# str(svm_Linear)
names(svm_Linear)
## [1] "method" "modelInfo" "modelType" "results"
## [5] "pred" "bestTune" "call" "dots"
## [9] "metric" "control" "finalModel" "preProcess"
## [13] "trainingData" "resample" "resampledCM" "perfNames"
## [17] "maximize" "yLimits" "times" "levels"
## [21] "terms" "coefnames" "xlevels"
# Mostra que foi feito a chamada da kernlab::ksvm().
svm_Linear$finalModel
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 1
##
## Linear (vanilla) kernel function.
##
## Number of Support Vectors : 253
##
## Objective Function Value : -133.5853 -139.6498 -105.8677
## Training error : 0.406667
# Matriz de confusão.
confusionMatrix(svm_Linear$trainingData$.outcome,
predict(svm_Linear))
## Confusion Matrix and Statistics
##
## Reference
## Prediction malbec merlot sauvignonblanc
## malbec 47 24 29
## merlot 20 64 16
## sauvignonblanc 20 13 67
##
## Overall Statistics
##
## Accuracy : 0.5933
## 95% CI : (0.5354, 0.6494)
## No Information Rate : 0.3733
## P-Value [Acc > NIR] : 9.81e-15
##
## Kappa : 0.39
## Mcnemar's Test P-Value : 0.5074
##
## Statistics by Class:
##
## Class: malbec Class: merlot Class: sauvignonblanc
## Sensitivity 0.5402 0.6337 0.5982
## Specificity 0.7512 0.8191 0.8245
## Pos Pred Value 0.4700 0.6400 0.6700
## Neg Pred Value 0.8000 0.8150 0.7750
## Prevalence 0.2900 0.3367 0.3733
## Detection Rate 0.1567 0.2133 0.2233
## Detection Prevalence 0.3333 0.3333 0.3333
## Balanced Accuracy 0.6457 0.7264 0.7113
svm_Linear$results
## C Accuracy Kappa AccuracySD KappaSD
## 1 1 0.55 0.325 0.06009252 0.09013878
str(svm_Linear$pred)
## 'data.frame': 900 obs. of 5 variables:
## $ pred : Factor w/ 3 levels "malbec","merlot",..: 3 1 2 3 1 1 3 3 2 2 ...
## $ obs : Factor w/ 3 levels "malbec","merlot",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ rowIndex: int 1 2 4 10 21 24 26 41 46 47 ...
## $ C : num 1 1 1 1 1 1 1 1 1 1 ...
## $ Resample: chr "Fold1.Rep1" "Fold1.Rep1" "Fold1.Rep1" "Fold1.Rep1" ...
# Desempenho em cada partição.
aggregate(cbind(acc = pred == obs) ~ Resample,
data = svm_Linear$pred,
FUN = mean)
## Resample acc
## 1 Fold1.Rep1 0.5666667
## 2 Fold1.Rep2 0.6000000
## 3 Fold1.Rep3 0.4666667
## 4 Fold2.Rep1 0.5833333
## 5 Fold2.Rep2 0.5166667
## 6 Fold2.Rep3 0.6333333
## 7 Fold3.Rep1 0.5666667
## 8 Fold3.Rep2 0.6000000
## 9 Fold3.Rep3 0.4833333
## 10 Fold4.Rep1 0.5000000
## 11 Fold4.Rep2 0.5666667
## 12 Fold4.Rep3 0.5166667
## 13 Fold5.Rep1 0.6166667
## 14 Fold5.Rep2 0.4333333
## 15 Fold5.Rep3 0.6000000
#-----------------------------------------------------------------------
# Tunando.
# Parâmetro de penalidade.
grid <- expand.grid(C = c(0.01, 0.05, 0.1, 0.25, 0.5, 0.75,
1, 1.25, 1.5, 1.75, 2, 5, 10, 20))
svm_Linear_Grid <- train(cult ~ .,
data = uva,
method = "svmLinear",
trControl = trctrl,
tuneGrid = grid)
svm_Linear_Grid
## Support Vector Machines with Linear Kernel
##
## 300 samples
## 6 predictors
## 3 classes: 'malbec', 'merlot', 'sauvignonblanc'
##
## No pre-processing
## Resampling: Cross-Validated (5 fold, repeated 3 times)
## Summary of sample sizes: 240, 240, 240, 240, 240, 240, ...
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.01 0.4677778 0.2016667
## 0.05 0.4911111 0.2366667
## 0.10 0.5111111 0.2666667
## 0.25 0.5522222 0.3283333
## 0.50 0.5477778 0.3216667
## 0.75 0.5566667 0.3350000
## 1.00 0.5588889 0.3383333
## 1.25 0.5533333 0.3300000
## 1.50 0.5555556 0.3333333
## 1.75 0.5544444 0.3316667
## 2.00 0.5555556 0.3333333
## 5.00 0.5744444 0.3616667
## 10.00 0.5788889 0.3683333
## 20.00 0.5811111 0.3716667
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was C = 20.
sc <- list(x = list(log = 2))
c("Performance média" =
plot(svm_Linear_Grid, scales = sc),
"Performances por partição" =
xyplot(svm_Linear_Grid, scales = sc) +
as.layer(plot(svm_Linear_Grid, scales = sc)))
#-----------------------------------------------------------------------
# Usando kernel radial.
# O parâmetro C já é avaliado em grid.
svm_Radial <- train(cult ~ .,
data = uva,
method = "svmRadial",
trControl = trctrl,
tuneLength = 10)
svm_Radial
## Support Vector Machines with Radial Basis Function Kernel
##
## 300 samples
## 6 predictors
## 3 classes: 'malbec', 'merlot', 'sauvignonblanc'
##
## No pre-processing
## Resampling: Cross-Validated (5 fold, repeated 3 times)
## Summary of sample sizes: 240, 240, 240, 240, 240, 240, ...
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.25 0.5577778 0.3366667
## 0.50 0.5644444 0.3466667
## 1.00 0.5611111 0.3416667
## 2.00 0.5700000 0.3550000
## 4.00 0.5688889 0.3533333
## 8.00 0.5577778 0.3366667
## 16.00 0.5500000 0.3250000
## 32.00 0.5411111 0.3116667
## 64.00 0.5311111 0.2966667
## 128.00 0.5255556 0.2883333
##
## Tuning parameter 'sigma' was held constant at a value of 0.6060484
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.6060484 and C = 2.
sc <- list(x = list(log = 2))
c("Performance média" =
plot(svm_Radial, scales = sc),
"Performances por partição" =
xyplot(svm_Radial, scales = sc) +
as.layer(plot(svm_Radial, scales = sc)))
svm_Radial$finalModel
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 2
##
## Gaussian Radial Basis kernel function.
## Hyperparameter : sigma = 1
##
## Number of Support Vectors : 263
##
## Objective Function Value : -182.9919 -215.1982 -148.8929
## Training error : 0.253333
# Número de pontos de suporte.
svm_Radial$finalModel@nSV
## [1] 263
# Acurácia.
1 - error(svm_Radial$finalModel)
## [1] 0.7466667
# str(svm_Radial$finalModel)
#-----------------------------------------------------------------------
# Usando kernel polinomial.
# Gride de configurações para tunning.
grid <- expand.grid(C = 2^seq(-2, 5),
degree = c(2, 3),
scale = 1,
KEEP.OUT.ATTRS = FALSE)
dim(grid)
## [1] 16 3
trctrl <- trainControl(method = "cv",
number = 5,
returnResamp = "all",
savePredictions = "all")
# O parâmetro C já é avaliado em grid.
svm_Poly <- train(cult ~ .,
data = uva,
method = "svmPoly",
trControl = trctrl,
tuneGrid = grid)
svm_Poly
## Support Vector Machines with Polynomial Kernel
##
## 300 samples
## 6 predictors
## 3 classes: 'malbec', 'merlot', 'sauvignonblanc'
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 240, 240, 240, 240, 240
## Resampling results across tuning parameters:
##
## C degree Accuracy Kappa
## 0.25 2 0.6066667 0.410
## 0.25 3 0.5900000 0.385
## 0.50 2 0.6133333 0.420
## 0.50 3 0.5833333 0.375
## 1.00 2 0.6266667 0.440
## 1.00 3 0.5866667 0.380
## 2.00 2 0.6333333 0.450
## 2.00 3 0.5733333 0.360
## 4.00 2 0.6366667 0.455
## 4.00 3 0.5966667 0.395
## 8.00 2 0.6266667 0.440
## 8.00 3 0.6166667 0.425
## 16.00 2 0.6200000 0.430
## 16.00 3 0.6033333 0.405
## 32.00 2 0.6266667 0.440
## 32.00 3 0.6066667 0.410
##
## Tuning parameter 'scale' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were degree = 2, scale = 1 and C = 4.
sc <- list(x = list(log = 2))
plot(svm_Poly, scales = sc)
xyplot(svm_Poly,
scales = sc,
type = c("p", "a"))
svm_Poly$finalModel
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 4
##
## Polynomial kernel function.
## Hyperparameters : degree = 2 scale = 1 offset = 1
##
## Number of Support Vectors : 208
##
## Objective Function Value : -335.2436 -410.6984 -263.2073
## Training error : 0.3
# Número de pontos de suporte.
svm_Poly$finalModel@nSV
## [1] 208
# Acurácia.
1 - error(svm_Poly$finalModel)
## [1] 0.7
Machine Learning | Prof. Eduardo V. Ferreira & Prof. Walmes M. Zeviani |