R语言实战：机器学习与数据分析源代码4

来源：互联网发布：查看网络攻击的网站编辑：程序博客网时间：2024/06/10 18:38

本文辑录了《R语言实战——机器学习与数据分析》（电子工业出版社2016年出版）一书第9章至第11章之代码。本书引言请见如下链接：
http://blog.csdn.net/baimafujinji/article/details/51596171

内容简介：本书系统地介绍了统计分析和机器学习领域中最为重要和流行的多种技术及它们的基本原理，在详解有关算法的基础上，结合大量R语言实例演示了这些理论在实践中的使用方法。具体内容被分成三个部分，即R语言编程基础、基于统计的数据分析方法以及机器学习理论。统计分析与机器学习部分又具体介绍了包括参数估计、假设检验、极大似然估计、非参数检验方法（包括列联分析、符号检验、符号秩检验等）、方差分析、线性回归（包括岭回归和Lasso方法）、逻辑回归、支持向量机、聚类分析（包括K均值算法和EM算法）和人工神经网络等内容。同时，统计理论的介绍也为深化读者对于后续机器学习部分的理解提供了很大助益。知识结构和阅读进度的安排上既兼顾了循序渐进的学习规律，亦统筹考虑了夯实基础的必要性

网上书店地址：

电子工业出版社官网
中国互动出版网China-pub
京东商城（1）
京东商城（2）

Chapter 9

P184~185

chisq.test(c(25, 18, 28, 20, 16, 13))pchisq(42.252,6,lower.tail=F)

P186~188

alcohol.by.nicotine <- matrix(c(105, 7, 11,+ 58, 5, 13,+ 84, 37, 42,+ 57, 16, 17), nrow = 4, byrow = TRUE)chisq.test(alcohol.by.nicotine)aluminium.by.alzheimers <- matrix(c(112, 3, 5, 8,+114, 9, 3, 2), nrow=2, byrow = TRUE)(a.by.a.test <- chisq.test(aluminium.by.alzheimers))a.by.a.test$expectedchisq.test(aluminium.by.alzheimers, simulate.p.value = TRUE)aluminium.by.alzheimers <- matrix(c(112, 3, 13,+114, 9, 5), nrow=2, byrow = TRUE)(a.by.a.test <- chisq.test(aluminium.by.alzheimers))

P189~190

dhyper(c(0,1,2,3,4,5), 31, 16, 5)0.002847571 + 0.036781124handedness <- matrix(c(1, 30, 4, 12), nrow = 2, byrow = TRUE)fisher.test(handedness)

P192~195

pnorm(-2.232625)pbinom(23, 65, 0.5)pbinom(1, 8, 0.5)*2binom.test(sum(x>99), length(x), alternative = "less")pbinom(4, 19, prob = 0.5, lower.tail = TRUE) * 2

P197~198

x=c(4.12,5.81,7.63,9.74,10.39,11.92,12.32,12.89,13.54,14.45)wilcox.test(x-8, alternative = "greater")x <- c(1.83, 0.50, 1.62, 2.48, 1.68, 1.88, 1.55, 3.06, 1.30)y <- c(0.878, 0.647, 0.598, 2.05, 1.06, 1.29, 1.06, 3.14, 1.29)## wilcox.test(y - x, alternative = "less")wilcox.test(x, y, paired = TRUE, alternative = "greater")wilcox.test(y - x, alternative = "less",+exact = FALSE, correct = FALSE)

P201

placebo <- c(0.90, 0.37, 1.63, 0.83, 0.95,+ 0.78, 0.86, 0.61, 0.38, 1.97)alcohol <- c(1.46, 1.45, 1.76, 1.44, 1.11,+ 3.07, 0.98, 1.27, 2.56, 1.32)wilcox.test(placebo, alcohol, alternative = "less", exact = TRUE)

P203~204

pnorm(3.817159, lower.tail = FALSE)before <- c(11.0, 11.2, 11.2, 11.2, 11.4,+ 11.5, 11.6, 11.7, 11.8, 11.9, 11.9, 12.1)after <- c(10.2, 10.3, 10.4, 10.6, 10.6,+ 10.7, 10.8, 10.8, 10.9, 11.1, 11.1, 11.3)wilcox.test(before, after, alternative = "greater",+ exact = FALSE, correct = FALSE)

P206~207

pchisq(df = 2, 7.505538, lower.tail = FALSE)x <-c(4.2, 3.3, 3.7, 4.3, 4.1, 3.3)y <-c(4.5, 4.4, 3.5, 4.2, 4.6, 4.2)z <-c(5.6, 3.6, 4.5, 5.1, 4.9, 4.7)kruskal.test(list(x, y, z))

Chapter10

P211

cars <- read.csv("c:/racv.csv")plot(lp100km ~ mass.kg, data=cars,+ xlab="Mass (kg)", ylab="Fuel consumption (l/100km)")abline(lm(lp100km ~ mass.kg, data = cars))

P216~218

plants <- data.frame(age = rep(2:7, rep(4, 6)),+ height = c(5.6, 4.8, 5.3, 5.7, 6.2, 5.9, 6.4, 6.1,+ 6.2, 6.7, 6.4, 6.7, 7.1, 7.3, 6.9, 6.9,+ 7.2, 7.5, 7.8, 7.8, 8.9, 9.2, 8.5, 8.7))plants.lm <- lm(height ~ age, data = plants)summary(plants.lm)plot(height ~ age, data = plants)abline(plants.lm)

P219

cars.lm <- lm(lp100km ~ mass.kg, data = cars)par(mfrow = c(2, 2))plot(cars.lm)

P222

plot(Std_Residuals ~ Leverage, xlab="Leverage",+ ylab="Standardized residuals",+ xlim=c(0,0.21), ylim=c(-2,2), main = "Residuals vs Leverage")abline(v = 0.0, h = 0.0, lty=3, col = "gray60")par(new=TRUE)lines(lowess(Std_Residuals~Leverage ), col = 'red')

P227

summary(cars.lm)confint(cars.lm)

P232~234

qf(0.05, 1, 22, lower.tail = FALSE)pf(248.2238923, 1, 22, lower.tail = FALSE)2*(1-pt(15.75484,22))

P237~238

predict(plants.lm,+ newdata = data.frame(age = 4),+ interval = "confidence")predict(plants.lm,+ newdata = data.frame(age = 4),+ interval = "prediction")

Chapter11

P244

pai = c(4, 18, 14, 18, 26, 26, 21, 30, 28, 36, 65, 62, 40)iron = c(61, 175, 111, 124, 130, 173, 169, 169, 160, 244, 257, 333, 199)aluminium = c(13, 21, 24, 23, 64, 38, 33, 61, 39, 71, 112, 88, 54)pairs(~pai+iron+aluminium, labels = c("Phosphate \nAbsorption Index",+ "Amount of \nExtractable Iron","Amount of \nExtractable Aluminium"),+ main ="Scatterplot Matrices")soil.lm <- lm(pai ~ iron + aluminium)summary(soil.lm)

P248~250

pf(92.03, 2, 10, lower.tail=FALSE)2*(1-pt(2.109,10))2*(1-pt(3.797,10))2*(1-pt(4.894,10))

P251~252

predict(soil.lm, newdata = data.frame(iron=150, aluminium=40),+ interval = "prediction")qt(0.025, 10, lower.tail = FALSE)soil.lm$residuals23.51929 - 2.228139 * sqrt(1+0.08958766) * 4.37937523.51929 + 2.228139 * sqrt(1+0.08958766) * 4.379375predict(soil.lm, newdata = data.frame(iron=150, aluminium=40),+ interval = "confidence")23.51929 - 2.228139 * sqrt(0.08958766) * 4.37937523.51929 + 2.228139 * sqrt(0.08958766) * 4.379375

P256~257

x <- c(6.8, 5.5, 5.5, 6.7, 5.5, 5.7, 5.2, 4.5, 3.8, 3.8, 3.6, 3.5)y <- c(4.2, 3.5, 3.4, 3.0, 3.4, 2.8, 2.8, 3.6, 4.3, 5.0, 6.1, 6.7)phillips.lm.1 <- lm(y ~ x)coef(phillips.lm.1)x.rec <- 1/xphillips.lm.2 <- lm(y ~ x.rec)coef(phillips.lm.2)par(mfrow = c(1,2))plot(y ~ x, xlim = c(3.5, 7), ylim = c(2.8, 7),+ main = "Phillips Curve (Linear Model)")abline(phillips.lm, col = "red")plot(y ~ x, xlim = c(3.5, 7), ylim = c(2.8, 7),+ main = "Phillips Curve (Reciprocal Model)")par(new=TRUE)curve(-0.2594+20.5879*(1/x), xlim = c(3.5, 7), ylim = c(2.8, 7),+ col = "red", ylab= "", xlab = "")

P259~261

plants.lm.1 <- lm(height ~ age, data = plants)plants.lm.3 <- lm(height ~ age + I(age^2) + I(age^3), data = plants)summary(plants.lm.3)plot(height ~ age, data = plants, xlab="Age (y)", ylab="Height (ft)")curve(predict(plants.lm.1,+ newdata=data.frame(age = x)), add=TRUE, col="red")curve(predict(plants.lm.3,+ newdata=data.frame(age = x)), add=TRUE, col="blue")anova(plants.lm.1, plants.lm.3)

P264~265

n <- 13p <- 3rss <- sum(soil.lm$residuals * soil.lm$residuals)AIC(soil.lm)n + n * log(2 * pi) + n * log(rss/n) + 2 * (p+1)BIC(soil.lm)AIC(soil.lm, k = log(n))n + n * log(2 * pi) + n * log(rss/n) + log(n) * (p+1)extractAIC(soil.lm)n * log(rss/n) + 2 * pextractAIC(soil.lm, k = log(n))n * log(rss/n) + log(n) * pheat <- read.csv("c:/cement.csv")

P266~268

heat.lm1 <- lm(y~x1+x2+x3+x4, data=heat)step(heat.lm1, ~.)step(heat.lm1, ~.,direction = "backward")heat.lm2 <- lm(y~1, data=heat)step(heat.lm2, ~. + x1 + x2 + x3 + x4)step(heat.lm2, ~. + x1 + x2 + x3 + x4, direction = "forward")

4 0