## Tuesday, January 10, 2017

### Using R to work through Sokal and Rohlf's Biometry: Chapter 6 (The Normal Probability Distribution), sections 6.3-6.5

Previously in this series:

### Section 6.3 A Model for the Normal Distribution

#This section describes what produces a normal distribution and
#a heuristic showing how it is related to the binomial distribution.

### Section 6.4 Applications of the Normal Distribution

#This section describes how to do what we did to get Table 6.1,
#but with a different set of data back from Section 4.2.

mean.bw<-109.9
sd.bw<-13.593

(sdeviate.bw<-(151-mean.bw)/sd.bw)

pnorm(sdeviate.bw) #This value is more precise
pnorm(3.02) #But the book uses this rounded value so we will too here.
#We can do some of the calculations the book does on pp. 103-104.
#Only a very few individuals are higher than 151 oz.
1-pnorm(3.02)
#If we want to look both directions we can double this number since the distribution is symmetrical.
2*(1-pnorm(3.02))

#By default, lower.tail=TRUE in pnorm.
pnorm(3.02,
lower.tail=FALSE)
#It is the same as our earlier value of
1-pnorm(3.02)
#Here is the default setting for comparison.
pnorm(3.02,
lower.tail=TRUE)
#This simply tells use which direction we want to look at,
#the upper or lower tail of the distribution from our value of standard deviate.

### Section 6.5 Fitting a Normal Distribution to Observed Data

#These data are again from Section 4.2.
classmark<-seq(from=59.5, to=171.5, by=8)
frequencies<-c(2,6,39,385,888,1729,2240,2007,1233,641,201,74,14,5,1)
samplesize<-sum(frequencies) #This confirms that we entered the data correctly, and gets our sample size.
#Multiply classmark and frequencies to get the sums for each class.
classsums<-classmark*frequencies

#To look at all this stuff together, combine it into a dataset.
birthweights<-data.frame(cbind(classmark, frequencies, classsums))

#Add on a row of the next class up which contains 0 individuals.
(birthweights<-rbind(birthweights, c(179.5, 0, 0)))

#On page 104, equation 6.2 is like equation 6.1 above but with sample size (n) and i (class intervals).
normal.manual.applied<-function(mean, sd, n, i){
curve(((1/(sd*sqrt(2*pi)))*exp((-((x-mean)/sd)^2)/2))*n*i,
-4, 4, #go from -4 to +4 standard deviations.
ylab="freq",
xlab="Y",
type = "l")
}

normal.manual.applied(mean=0, sd=1, n=1000, i=0.5)
#This gives the curve that Table 6.1 also has (same class intervals of 0.5, 1000 samples, and mean=0 with sd=1).
#Let's do this with the birthweights data.

birthweights.mean<-109.8996
birthweights.sd<-13.5942

#Like above we need the lower boundaries of the class marks.
birthweights\$boundaries<-birthweights\$classmark-4

#Get the expected frequencies for the class boundaries with the mean and sd of our dataset.
birthweights\$pnorm.results<-pnorm(birthweights\$boundaries,
mean=birthweights.mean,
sd=birthweights.sd)

#Then, take the difference of the first row minus the next row.
#The last row will not have anything, which is why we needed to add the lower boundary of
#the next class mark, which has a frequency of zero.  Thus, this calculation generates a vector of length 14.
#We need 15, so we just add a zero on for the last difference as they do in Table 6.2
birthweights\$expected.freqs<-c(abs(diff(birthweights\$pnorm.results)),0) #add a zero on for the last difference

#Multipy the frequencies by the sample size to get the expected frequencies for a sample of this size.
#Round as in the table.
birthweights\$expected.freqs.values<-round(birthweights\$expected.freqs*samplesize, 1)

#We can even add the plus and minus signs using ifelse and sign() to see in which direction the differences are.
birthweights\$departure.signs<-ifelse(sign(birthweights\$frequencies-birthweights\$expected.freqs.values)==-1,
"-", #if -1, then write "-"
"+") #else if not -1, write "+"

#View the table to confirm it has the same data as Table 6.2.
birthweights