R语言机器学习
简单介绍一下R语言的几个分类算法便于以后查找# install.packages("DMwR")# install.packages("kernlab")# install.packages("Hmisc")# install.packages("corrplot")# install.packages("mlogit")# install.packages("randomForest")# in
·
简单介绍一下R语言的几个分类算法便于以后查找
# install.packages("DMwR")
# install.packages("kernlab")
# install.packages("Hmisc")
# install.packages("corrplot")
# install.packages("mlogit")
# install.packages("randomForest")
# install.packages("e1071")
# 决策树方法
set.seed(42)
#加载决策树包
library(rpart)
data("iris")
#抽取用于建模的数据,70%的数据集进行建模
ind <- sample(1:150, 105)
#构建决策树模型,因为是对分类进行建模,class
m <- rpart(formula = Species~., data = iris[ind,],method = "class")
summary(m)
print(m)
#绘制决策树图像,看出决策的过程和规则
rpart.plot::rpart.plot(m)
pre <- predict(m, iris[-ind, -5], type = "class")
table(pre, iris$Species[-ind])
sum(pre == iris$Species[-ind])/length(pre)
# 尝试一百次决策树
result3 <- 0
for(i in 1:100)
{
ind <- sample(1:150, 105)
m <- rpart(formula = Species~., data = iris[ind,], method = "class")
pre <- predict(m, iris[-ind, -5], type = "class")
table(pre, iris$Species[-ind])
result3[i] <- sum(pre == iris$Species[-ind])/length(pre)
}
mean(result3)
table(pre, iris[-ind, 5])
# 支持向量机
library(kernlab)
result4 <- 0
for(i in 1:100)
{
ind <- sample(1:150, 105)
k <- ksvm(x = Species~., iris[ind,], C = 10, type = "C-svc") #, C = 10, type = "C-svc"
# type类型可以选择分类或者回归
pre <- predict(k, iris[-ind,])
result4[i] <- sum(pre == iris$Species[-ind])/length(pre)
}
mean(result4)
table(pre, iris[-ind, 5])
# 随机森林
library(randomForest)
result5 <- 0
for(i in 1:100)
{
ind <- sample(1:150,105)
k <- randomForest(Species ~ ., data = iris[ind, ]) #, ntree = 100, proximity = T
pre <- predict(k, iris[-ind, ])
result5[i] <- sum(pre == iris$Species[-ind])/length(pre)
}
mean(result5)
table(pre, iris[-ind, 5])
#朴素贝叶斯
library(e1071)
result6 <- 0
for(i in 1:100)
{
ind <- sample(1:150, 105)
k <- naiveBayes(Species ~ ., data = iris[ind, ])
pre <- predict(k, iris[-ind, ])
result6[i] <- sum(pre == iris$Species[-ind])/length(pre)
}
mean(result6)
table(pre, iris[-ind, 5])
#模型之间的比较
table(pre,iris[-ind, 5])
result <- data.frame(precision =
c(result3, result4, result5, result6),
method = gl(n = 4, k = 100, labels =
c("decision",
"SVM", "randomForest", "naiveBayes")))
ggplot(data = result, mapping =
aes(y = precision, x = method, fill = method))+
geom_boxplot()
更多推荐
已为社区贡献1条内容
所有评论(0)