This is part 2 of a previous post on linear models, so you'll need to start with the previous code to get the dataframe and variables used in this post.

#############################

#####Linear models: ANOVA####

#############################

#ANOVA is a type of linear model with a categorical independent variable

#and a particular analysis for partitioning variance. (Hence the name,

#Analysis O VAriance.)

#Use the same formulas and function as before but with categorical variables.

state.anova<-lm(wingchordr~state,

data=titmouse.na.omit)

summary(state.anova)

#This is the same thing as a t-test with only two levels.

t.test(titmouse.na.omit$wingchordr~as.numeric(titmouse.na.omit$state),

var.equal=TRUE)

titmouse.na.omit$species<-as.factor(titmouse.na.omit$species)

#Change species to a factor so we can compare these three titmouse populations.

crest.a<-lm(crestlength~species,

data=titmouse.na.omit)

summary(crest.a)

anova(crest.a)

#Base R (without packages) has this anova function, but does it do what we want?

?anova

#not super obvious which type of ANOVA though.

#Let's use an easier package, car, which we used earlier.

#It doesn't hurt to call a library twice so we'll put it here anyway in case we want to run this code separately.

library(car) #You'll note it doesn't give any messages in the R console if we've already run it in this session.

Anova(crest.a, type="III")

#Different types of ANOVA.

Anova(crest.a, type=3) #You can use numbers (without quotes) or Roman numerals (with quotes)

Anova(crest.a, type=2)

?Anova #Help lets us know that Type II and Type III do not exactly correspond to those in SAS, but have similar principles.

#It also lets us know that anova() in base R (without capitalization) is a type I test.

#More information on the different types as implemented in R compared with other standard programs (SAS and SPSS).

#Let's look at the spread of boxplots/points in each group, then at residuals.

plot(titmouse.na.omit$crestlength~titmouse.na.omit$species,

xlab="Species",

ylab="Crest length (mm)")

#Just to see what the points themselves look like.

plot(titmouse.na.omit$crestlength~jitter(as.numeric(titmouse.na.omit$species),

factor=0.05), #Add a bit of jitter so you can see all the points.

xlab="Species",

ylab="Crest length (mm)",

pch=21,

bg="black",

xaxt="n") #Stops x-axis from receiving numbered labels (the factors' numbers).

axis(side=1, #Add in the custom x-axis

at=1:3, #at positions 1 through 3

labels=c("Black-crested",

"hybrid",

"Tufted")) #with appropriate labels.

#What about the residuals? (section 8.3.2 of textbook)

plot(residuals(crest.a)~titmouse.na.omit$species,

xlab="Species",

ylab="residuals")

#Interactions in ANOVA

#Code this the same way you would code

crest.sp.z.a<-lm(crestlength~species*zone,

data=titmouse.na.omit)

summary(crest.sp.z.a)

Anova(crest.sp.z.a,

type="III")

#Unplanned comparisions, i.e., post hoc tests.

#Fisher's LSD

library(agricolae)

crest.LSD<-LSD.test(crest.a, #linear model that contains what you want to test

"species", #groups to test

p.adj="none", #none for standard Fisher's LSD

alpha=0.05, #alpha=0.05

console=TRUE #show your output.

)

#plot the group letters.

plot(titmouse.na.omit$crestlength~titmouse.na.omit$species,

xlab="Species",

ylab="Crest length (mm)",

ylim=c(15,25))

for (i in 1:3) { #Another loop, this time with three repeats.

text(x=i, #Different function, put i where you'd put 1:3.

y=24, #Place all at 24 on the y axis.

labels=crest.LSD$groups[3][i,])#label with the group results from the Fisher's LSD list.

}

#Bonferroni adjustment

pairwise.t.test(titmouse.na.omit$crestlength,

g=titmouse.na.omit$species,

p.adjust.method="bonf")

#Same results in a different format.

crest.lsd.bonf<-LSD.test(crest.a, #linear model

"species", #groups to test

p.adj="bonf", #Bonferroni correction

alpha=0.05, #alpha=0.05

console=TRUE #show your output.

)

#Tukey's Honest Significant Differences

library(multcomp)

crest.tukey<-glht(crest.a,

linfct=mcp(species="Tukey"))

summary(crest.tukey)

#There is also a base R function TukeyHDS() but I find this one simpler to read.

#Tukey's HSD not mentioned in lecture, but a good one.

#Use the same function to get Dunnett's test, which compares specifically to one level (often a control).

#Here our data are a bit weird for this because there's no reason to compare to what R thinks is the control

#which is the first factor (Black-crested, because it's alphabetical).

crest.dunnett<-glht(crest.a,

linfct=mcp(species="Dunnett"))

summary(crest.dunnett)

#A priori comparisions are planned comparisions and must be coded before the analysis.

#This is the clearest explanation of contrasts in R that I've found:

#http://www.uvm.edu/~dhowell/StatPages/More_Stuff/R/ContrastsInR.html

levels(titmouse.na.omit$species)#Check the ordering of your factor levels.

#In the matrix, the first one will be Black-crested, second will be hybrid, and third will be Tufted.

#Let's look at R-provided contrast matrices.

#There are functions to generate each matrix, then you use contrasts() to set it for your factor.

(sp.treatment<-contr.treatment(levels(titmouse.na.omit$species),

contrasts=TRUE))

apply(sp.treatment,2,sum) #check that column sums are zero (orthogonal contrasts are independent).

#It's not for contr.treatment (not orthogonal).

contrasts(titmouse.na.omit$species)<-sp.treatment #Apply this constrast matrix to your variable.

crest.contr1<-lm(crestlength~species,

data=titmouse.na.omit,

contrasts=TRUE)

Anova(crest.contr1, type='III')

summary(crest.contr1)

#You'll note this is the default for R. It compares to the first ("base") level.

sp.treatment<-contr.treatment(levels(titmouse.na.omit$species),

base=2, #Change the base to 2 to compare to 'hybrid'.

contrasts=TRUE)

apply(sp.treatment,2,sum) #check that column sums are zero (orthogonal contrasts are independent).

#It's not for contr.treatment (not orthogonal).

contrasts(titmouse.na.omit$species)<-sp.treatment

crest.contr2<-lm(crestlength~species,

data=titmouse.na.omit,

contrasts=TRUE)

Anova(crest.contr2, type='III')

summary(crest.contr2)

#Now it compares everything to level 2 (hybrids).

#A different example:

sp.helmert<-contr.helmert(levels(titmouse.na.omit$species),

contrasts=TRUE)

apply(sp.helmert,2,sum) #check that column sums are zero (orthogonal contrasts are independent).

contrasts(titmouse.na.omit$species)<-sp.helmert

crest.contr3<-lm(crestlength~species,

data=titmouse.na.omit,

contrasts=TRUE)

Anova(crest.contr3, type='III')

summary(crest.contr3)

#You'll notice the ANOVA tables are the same, but the summary with contrasts are different.

#More types of contrasts and their definitions are here:

#http://www.ats.ucla.edu/stat/r/library/contrast_coding.htm

?contr.treatment #or contr.helmert, contr.poly, contr.sum, contr.SAS will all get you to the contrast matrix function help file.

#Custom contrasts are also possible.

#You make a matrix with an additional row of 1s for a constant/intercept (it's removed later).

(cmat.solve<-rbind(c(1,1,1), #rbind binds together rows.

'hybrid vs. parental'=c(1,-2,1), #label each comparison and make each list sum to zero.

'BCTI vs. TUTI'=c(-1,0,1)))

library(MASS) #invoke the MASS library to use fractions().

#Doesn't change your result, just makes it easier to read.

#This library comes installed with R, just needs called.

(test.solve<-fractions(solve(cmat.solve))) #Inverse of the matrix.

contrasts(titmouse.na.omit$species)<-test.solve[,2:3] #Select all but the intercept and apply it to contrasts().

apply(contrasts(titmouse.na.omit$species), 2, sum) # check to make sure sums are zero.

#If zero, then orthogonal and this method works. It's essentially zero with rounding error.

crest.contr4<-lm(crestlength~species,

data=titmouse.na.omit,

contrasts=TRUE)

Anova(crest.contr4, type='III')

summary(crest.contr4)

## No comments:

## Post a Comment

Comments and suggestions welcome.