简单介绍一下R语言的几个分类算法便于以后查找

# install.packages("DMwR")
# install.packages("kernlab")
# install.packages("Hmisc")
# install.packages("corrplot")
# install.packages("mlogit")
# install.packages("randomForest")
# install.packages("e1071")

# 决策树方法
set.seed(42)
#加载决策树包
library(rpart) 
data("iris")
#抽取用于建模的数据,70%的数据集进行建模 
ind <- sample(1:150, 105) 
#构建决策树模型,因为是对分类进行建模,class
m <- rpart(formula = Species~., data = iris[ind,],method = "class")
summary(m)
print(m)

#绘制决策树图像,看出决策的过程和规则
rpart.plot::rpart.plot(m)
pre <- predict(m, iris[-ind, -5], type = "class")
table(pre, iris$Species[-ind])
sum(pre == iris$Species[-ind])/length(pre)
# 尝试一百次决策树
result3 <- 0
for(i in 1:100)
{
  ind <- sample(1:150, 105)
  m <- rpart(formula = Species~., data = iris[ind,], method = "class")
  pre <- predict(m, iris[-ind, -5], type = "class")
  table(pre, iris$Species[-ind])
  result3[i] <- sum(pre == iris$Species[-ind])/length(pre)
  
}
mean(result3)
table(pre, iris[-ind, 5])

# 支持向量机
library(kernlab)
result4 <- 0
for(i in 1:100)
{
  ind <- sample(1:150, 105)
  k <- ksvm(x = Species~., iris[ind,], C = 10, type = "C-svc")  #, C = 10, type = "C-svc"
  # type类型可以选择分类或者回归
  pre <- predict(k, iris[-ind,])
  result4[i] <- sum(pre == iris$Species[-ind])/length(pre)
}
mean(result4)
table(pre, iris[-ind, 5])


# 随机森林
library(randomForest)
result5 <- 0
for(i in 1:100)
{  
  ind <- sample(1:150,105)  
  k <- randomForest(Species ~ ., data = iris[ind, ]) #, ntree = 100, proximity = T  
  pre <- predict(k, iris[-ind, ])  
  result5[i] <- sum(pre == iris$Species[-ind])/length(pre)
}
mean(result5)
table(pre, iris[-ind, 5])


#朴素贝叶斯
library(e1071)
result6 <- 0
for(i in 1:100)
{
  ind <- sample(1:150, 105)
  k <- naiveBayes(Species ~ ., data = iris[ind, ])
  pre <- predict(k, iris[-ind, ]) 
  result6[i] <- sum(pre == iris$Species[-ind])/length(pre)
}
mean(result6)
table(pre, iris[-ind, 5])

#模型之间的比较
table(pre,iris[-ind, 5])
result <- data.frame(precision = 
                       c(result3, result4, result5, result6),
                     method = gl(n = 4, k = 100, labels = 
                                   c("decision",
                                     "SVM", "randomForest", "naiveBayes")))

ggplot(data = result, mapping = 
         aes(y = precision, x = method, fill = method))+
  geom_boxplot()

在这里插入图片描述

更多推荐