From Statistics for Engineering
Class date(s):
 12, 19, 26 January 2012















[edit] Materials
[edit] Code used in class
Code used to illustrate how the qq plot is constructed:
N < 10
# What are the quantiles from the theoretical normal distribution?
index < seq(1, N)
P < (index  0.5) / N
theoretical.quantity < qnorm(P)
# Our sampled data:
yields < c(86.2, 85.7, 71.9, 95.3, 77.1, 71.4, 68.9, 78.9, 86.9, 78.4)
mean.yield < mean(yields) # 80.0
sd.yield < sd(yields) # 8.35
# What are the quantiles for the sampled data?
yields.z < (yields  mean.yield)/sd.yield
yields.z
yields.z.sorted < sort(yields.z)
# Compare the values in text:
yields.z.sorted
theoretical.quantity
# Compare them graphically:
plot(theoretical.quantity, yields.z.sorted, asp=1)
abline(a=0, b=1)
# Builtin R function to do all the above for you:
qqnorm(yields)
qqline(yields)
# A better function: see http://connectmv.com/tutorials/rtutorial/extendingrwithpackages/
library(car)
qqPlot(yields)
Code used to illustrate the central limit theorem's reduction in variance:
# Show the 3 plots side by side
layout(matrix(c(1,2,3), 1, 3))
# Sample the population:
N < 100
x < rnorm(N, mean=80, sd=5)
mean(x)
sd(x)
# Plot the raw data
x.range < range(x)
plot(x, ylim=x.range, main='Raw data')
# Subgroups of 2
subsize < 2
x.2 < numeric(N/subsize)
for (i in 1:(N/subsize))
{
x.2[i] < mean(x[((i1)*subsize+1):(i*subsize)])
}
plot(x.2, ylim=x.range, main='Subgroups of 2')
# Subgroups of 4
subsize < 4
x.4 < numeric(N/subsize)
for (i in 1:(N/subsize))
{
x.4[i] < mean(x[((i1)*subsize+1):(i*subsize)])
}
plot(x.4, ylim=x.range, main='Subgroups of 4')