This is the last multivariate code post from the quantitative methods class. Next week will be the final set of code (on non-normal data) from that class, which I co-taught with Dr. Nicola Koper.

Get the data import code and data here.

######################################

####Principle Componenents Analysis###

######################################

#R has two methods for performing PCA.

#Generally they will give similar results and the differences involve the computation and a few summary functions.

pca.morphology<-prcomp(titmouse.na.omit[,1:7],
#specifying columns by number can be dangerous if you change your
data format.

scale=TRUE, #correlation
matrix used instead of covariance matrix, which is only appropriate if
everything in same units.

retx=TRUE) #required to get PC scores for each individual.

summary(pca.morphology)

(pca.eigenvalues<-pca.morphology$sdev^2) #Get eigenvalues.

screeplot(pca.morphology) #Plot eigenvalues.

biplot(pca.morphology)

pca.morphology$rotation #eigenvectors. Again the signs are arbitrary so don't worry

#if they differ but absolute values are the same between different programs or versions of R.

(loadings.pca<-cor(pca.morphology$x, titmouse.na.omit[,1:7], method="pearson"))

#Pearson's correlation of components with original variables. Easier to interpret.

#Eigenvectors are how you get PCs, so also a sort of weight, just harder to think about.

#If not obvious, probably not interpretable. Interpret loadings above |0.33|.

#PC1 has shorter wing and tail, smaller mass, shorter and shallower bills (size) and longer crest.

#PC1 explains 32.4% of variance.

#PC2 is longer wing and tail, shorter and shallower bills (shape) and explains 20.5% of variance.

#plotting PCA

defaultpalette<-palette() #save the default palette before we mess with it.

gray<-gray.colors(7, start=1, end=0)

palette(gray)

#creates standard hybrid index gray palette that has white (1) through black (7)

#for colors. Can be used to label figures with indexsum+1.

palette() #shows palette hex codes

pscores<-data.frame(pca.morphology$x) #puts PCA scores in a data frame

pscores$indexsum<-titmouse.na.omit$indexsum # adds indexsum values to PC scores for individual birds

pscores$indexcolor<-as.factor(pscores$indexsum+1)

pscores$zone<-as.factor(titmouse.na.omit$zone) # adds zone age

pscores$zonenum<-as.factor(titmouse.na.omit$zone) # makes zone age number column

pscores$zonenum<- sub("old",21,pscores$zonenum)

pscores$zonenum<- sub("young",23,pscores$zonenum)

pscores$zonenum<-as.numeric(pscores$zonenum)

str(pscores)

#plotting principal components

plot(x=pscores$PC1,y=pscores$PC2,

asp=1,

mar=c(2,2,2,2)+0.1,

bg=pscores$indexcolor,

col=7,

pch=pscores$zonenum,

cex=2,

lwd=2,

xlab="PC1 Increasing values: shorter wing and tail, shorter and shallower bill, less weight",

ylab="PC2 Increasing values: longer wing and tail, shorter and shallower bill",

cex.axis=1,

cex.lab=0.6)

legend(title="Hybrid index values",

title.adj=0,

x=-5.5,

y=3.76,

ncol=1,

xjust=0,

bty="n",

x.intersp=0.75,

y.intersp=1,

text.width=0.5,

legend = c(0,1,2,3,4,5,6),

pt.lwd=2,

pt.bg=gray,

pch=c(21,21,21,21,21,21),

cex=0.8,

pt.cex=1.25)

# specifies colors, labels for legend, and symbols, and location for legend and size (cex)

legend(title="Zone age",

title.adj=0,

x=-5.5,

y=0,

ncol=1,

bty="n",

x.intersp=1,

y.intersp=1,

text.width=0.5,

legend=c("Old","Young"),

pt.bg="gray",

pch=c(21,23),

cex=0.8,

pt.cex=1.25,

pt.lwd=2)

### add ellipses for old/young

#make some logical objects to keep your selections cleaner in the code below.

tuftedmo<-pscores$indexsum<1 & pscores$zone=='old'

bcmo<-pscores$indexsum>5 & pscores$zone=='old'

tuftedmy<-pscores$indexsum<1 & pscores$zone=='young'

bcmy<-pscores$indexsum>5 & pscores$zone=='young'

library(car) #required for dataEllipse although we've called it earlier so it's unnecessary here.

dataEllipse(pscores[tuftedmo, ]$PC1,

pscores[tuftedmo, ]$PC2,

levels=0.68,

lty=1,

plot.points=FALSE,

add=TRUE,col="gray",

center.pch=FALSE)

dataEllipse(pscores[bcmo, ]$PC1,

pscores[bcmo, ]$PC2,

levels=0.68,lty=1,

plot.points=FALSE,

add=TRUE,

col="black",

center.pch=FALSE)

dataEllipse(pscores[tuftedmy, ]$PC1,

pscores[tuftedmy, ]$PC2,

levels=0.68,

lty=3,

plot.points=FALSE,

add=TRUE,

col="gray",

center.pch=FALSE)

dataEllipse(pscores[bcmy, ]$PC1,

pscores[bcmy, ]$PC2,

levels=0.68,lty=3,

plot.points=FALSE,

add=TRUE,

col="black",

center.pch=FALSE)

#These give black lines for Black-crested Titmouse and gray lines for Tufted Titmouse.

#Birds from the young zone have diamond symbols and dotted lines,

#while birds from the older zone have solid lines and round symbols.

#Return palette() to default after using the gray palette for this plot.

palette(defaultpalette)

palette()

## Tuesday, April 28, 2015

## Tuesday, April 21, 2015

### Beginning R: selected multivariate statistics, part 2 of 3 (MANOVA and DFA)

This is the second part of a quick run-through of multivariate statistics I did for the quantitative methods class I'm co-teaching with Dr. Nicola Koper.

Go here for a link to the data and data import code.

######################################

## Multivariate ANOVA (MANOVA) and ##

#Discriminant Function Analysis (DFA)#

######################################

##MANOVA##

##########

#Codes adapted from help file and here.

birdsize.manova<-lm(cbind(titmouse.na.omit$wingchordr, titmouse.na.omit$mass)~titmouse.na.omit$species,

data=titmouse.na.omit)

#A few ways to do this.

summary(manova(birdsize.manova), test="Wilks")

#manova gives type I ANOVA.

#manova by itself gives the univariate ANOVAs but without significance testing.

manova(birdsize.manova)

#the 'car' package will also give you type II and type III MANOVAs.

library(car)

Anova(birdsize.manova, type=3, test="Wilks")

summary(Manova(birdsize.manova)) #gives you all the different test statistics.

#Default is type II without specifying test type.

#To look at which dependent variables are contributing, do all the univariate ANOVAs,

#or maybe the DFA (next up).

##########

###DFA####

##########

#Code adapted from four sources.

#use MANOVA first. Remember DFA and MANOVA are two versions of the same analysis.

dfa.manova<-lm(cbind(titmouse.na.omit$mass,

titmouse.na.omit$wingchordr,

titmouse.na.omit$taillength)~titmouse.na.omit$species,

data=titmouse.na.omit)

library(candisc)

#note this masks your cancor that is used in the canonical correlation function,

#so we'll need to detach the library once we are done with the DFA if planning on using CCA again.

candiscrimin<-candisc(dfa.manova) #This function takes the MANOVA model and runs a canonical discriminant analysis on it.

candiscrimin #get the test statistics for each discriminant function. Only the first one is significant.

#CanRsq is how much of the total relationship between groups and predictors.

#DF1 accounts for about 42.2% of the total relationship (overlapping variance)

#as in CCA (square root of this value would be canonical correlation),

#and 97.3% of between-groups variability.

#Annoyingly, the "eigenvalue" here is the "eigenvalue" used in SAS, I believe,

#which is not defined as in Tabachnick and Fidell (2007) but some sort of inverted matrix thing

#that we won't worry about.

#(birdsize.dfa below shows a slightly different number, about 98%, but I believe they use slightly different calculation methods).

summary(candiscrimin,

means=TRUE, #mean scores for each group

scores=TRUE, #the scores that will be plotted

coef=c("structure")) #loading matrix.

plot(candiscrimin)

#More info on understanding output: http://www.ats.ucla.edu/stat/sas/output/sas_discrim.htm

#Is in SAS, but same procedure.

detach(package:candisc, unload=TRUE)

#detach the package since we use cancor() that's not from the candisc package

#in our CCA function.

#Use the more specific discriminant function analysis in lda() to get classifications.

library(MASS)

birdsize.dfa <- lda(species~ mass+wingchordr+taillength,

data = titmouse.na.omit,

prior = c(1/3, 1/3, 1/3))

#lda stands for linear discriminant analysis.

#prior, from help: "the prior probabilities of class membership.

#If unspecified, the class proportions for the training set are used.

#If present, the probabilities should be specified in the order of the factor levels."

#Because we don't want the birds weighted by frequency of each type,

#just use 1/k (number of groups) k times in the order of the factor levels.

#You can find out your the order of levels in your factor using levels().

levels(titmouse.na.omit$species)

plot(birdsize.dfa, dimen=1, type="b") #Plots the 1st discriminant function scores in a histogram with a density line.

plot(birdsize.dfa) #Plots your discriminant functions with group names as the symbols

print(birdsize.dfa) # Proportion of trace is the proportion of between-groups variance explained by adding in each DF.

#It's similar to what's calculated above in candisc but this method uses slightly different calculations.

predict(birdsize.dfa)$x #The individual DF scores.

#Let's test our classifications.

dfa.predict <- predict(birdsize.dfa,

prior=c(1/3,1/3,1/3))

#If you add a newdata= argument then you can a subset of your data as training data and another set as validation data,

#or predict from a totally new dataset.

#Store the results in a new list.

#The predicted groups are a factor.

dfa.predict$class

#Get out the probabilities.

dfa.predict$posterior # posterior probabilities of group classifications

dfa.predict$x # DF scores, same as you got earlier.

#If the newdata argument is not specified, it classifies the original dataset and classification rates may be high.

#produce the loadings manually. This is similar to the loadings generated by candisc().

cor(dfa.predict$x, titmouse.na.omit[,c("mass",

"wingchordr",

"taillength")])

(dfa.accuracy<-table("actually was"=titmouse.na.omit$species,

"classified as"=dfa.predict$class)) # accuracy of classification

sum(dfa.accuracy[row(dfa.accuracy) == col(dfa.accuracy)]) / sum(dfa.accuracy) #proportion of correctly classified rows.

accuracy.table<-diag(prop.table(dfa.accuracy,1)) #proportion of correctly classified individuals in each group.

#hybrids seem pretty hard to classify based on body size and tail length.

#Do jackknifing to test classifications with "leave-one-out".

birdsize.dfa.jk <- lda(species~ mass+wingchordr+taillength,

data = titmouse.na.omit,

prior = c(1/3, 1/3, 1/3),

CV=TRUE) #CV=TRUE specifies cross-validation (aka jackknifing)

birdsize.dfa.jk$class #show the new jackknifed classifications.

(dfa.accuracy.jk<-table("actually was"=titmouse.na.omit$species,

"jk classified as"=birdsize.dfa.jk$class)) # accuracy of classification

jackknifing.accuracy.table<-diag(prop.table(dfa.accuracy.jk,1))

chisq.test(dfa.accuracy.jk) #better than random

The last of the multivariate code will be principle components analysis (PCA) next week!

Go here for a link to the data and data import code.

######################################

## Multivariate ANOVA (MANOVA) and ##

#Discriminant Function Analysis (DFA)#

######################################

##MANOVA##

##########

#Codes adapted from help file and here.

birdsize.manova<-lm(cbind(titmouse.na.omit$wingchordr, titmouse.na.omit$mass)~titmouse.na.omit$species,

data=titmouse.na.omit)

#A few ways to do this.

summary(manova(birdsize.manova), test="Wilks")

#manova gives type I ANOVA.

#manova by itself gives the univariate ANOVAs but without significance testing.

manova(birdsize.manova)

#the 'car' package will also give you type II and type III MANOVAs.

library(car)

Anova(birdsize.manova, type=3, test="Wilks")

summary(Manova(birdsize.manova)) #gives you all the different test statistics.

#Default is type II without specifying test type.

#To look at which dependent variables are contributing, do all the univariate ANOVAs,

#or maybe the DFA (next up).

##########

###DFA####

##########

#Code adapted from four sources.

#use MANOVA first. Remember DFA and MANOVA are two versions of the same analysis.

dfa.manova<-lm(cbind(titmouse.na.omit$mass,

titmouse.na.omit$wingchordr,

titmouse.na.omit$taillength)~titmouse.na.omit$species,

data=titmouse.na.omit)

library(candisc)

#note this masks your cancor that is used in the canonical correlation function,

#so we'll need to detach the library once we are done with the DFA if planning on using CCA again.

candiscrimin<-candisc(dfa.manova) #This function takes the MANOVA model and runs a canonical discriminant analysis on it.

candiscrimin #get the test statistics for each discriminant function. Only the first one is significant.

#CanRsq is how much of the total relationship between groups and predictors.

#DF1 accounts for about 42.2% of the total relationship (overlapping variance)

#as in CCA (square root of this value would be canonical correlation),

#and 97.3% of between-groups variability.

#Annoyingly, the "eigenvalue" here is the "eigenvalue" used in SAS, I believe,

#which is not defined as in Tabachnick and Fidell (2007) but some sort of inverted matrix thing

#that we won't worry about.

#(birdsize.dfa below shows a slightly different number, about 98%, but I believe they use slightly different calculation methods).

summary(candiscrimin,

means=TRUE, #mean scores for each group

scores=TRUE, #the scores that will be plotted

coef=c("structure")) #loading matrix.

plot(candiscrimin)

#More info on understanding output: http://www.ats.ucla.edu/stat/sas/output/sas_discrim.htm

#Is in SAS, but same procedure.

detach(package:candisc, unload=TRUE)

#detach the package since we use cancor() that's not from the candisc package

#in our CCA function.

#Use the more specific discriminant function analysis in lda() to get classifications.

library(MASS)

birdsize.dfa <- lda(species~ mass+wingchordr+taillength,

data = titmouse.na.omit,

prior = c(1/3, 1/3, 1/3))

#lda stands for linear discriminant analysis.

#prior, from help: "the prior probabilities of class membership.

#If unspecified, the class proportions for the training set are used.

#If present, the probabilities should be specified in the order of the factor levels."

#Because we don't want the birds weighted by frequency of each type,

#just use 1/k (number of groups) k times in the order of the factor levels.

#You can find out your the order of levels in your factor using levels().

levels(titmouse.na.omit$species)

plot(birdsize.dfa, dimen=1, type="b") #Plots the 1st discriminant function scores in a histogram with a density line.

plot(birdsize.dfa) #Plots your discriminant functions with group names as the symbols

print(birdsize.dfa) # Proportion of trace is the proportion of between-groups variance explained by adding in each DF.

#It's similar to what's calculated above in candisc but this method uses slightly different calculations.

predict(birdsize.dfa)$x #The individual DF scores.

#Let's test our classifications.

dfa.predict <- predict(birdsize.dfa,

prior=c(1/3,1/3,1/3))

#If you add a newdata= argument then you can a subset of your data as training data and another set as validation data,

#or predict from a totally new dataset.

#Store the results in a new list.

#The predicted groups are a factor.

dfa.predict$class

#Get out the probabilities.

dfa.predict$posterior # posterior probabilities of group classifications

dfa.predict$x # DF scores, same as you got earlier.

#If the newdata argument is not specified, it classifies the original dataset and classification rates may be high.

#produce the loadings manually. This is similar to the loadings generated by candisc().

cor(dfa.predict$x, titmouse.na.omit[,c("mass",

"wingchordr",

"taillength")])

(dfa.accuracy<-table("actually was"=titmouse.na.omit$species,

"classified as"=dfa.predict$class)) # accuracy of classification

sum(dfa.accuracy[row(dfa.accuracy) == col(dfa.accuracy)]) / sum(dfa.accuracy) #proportion of correctly classified rows.

accuracy.table<-diag(prop.table(dfa.accuracy,1)) #proportion of correctly classified individuals in each group.

#hybrids seem pretty hard to classify based on body size and tail length.

#Do jackknifing to test classifications with "leave-one-out".

birdsize.dfa.jk <- lda(species~ mass+wingchordr+taillength,

data = titmouse.na.omit,

prior = c(1/3, 1/3, 1/3),

CV=TRUE) #CV=TRUE specifies cross-validation (aka jackknifing)

birdsize.dfa.jk$class #show the new jackknifed classifications.

(dfa.accuracy.jk<-table("actually was"=titmouse.na.omit$species,

"jk classified as"=birdsize.dfa.jk$class)) # accuracy of classification

jackknifing.accuracy.table<-diag(prop.table(dfa.accuracy.jk,1))

chisq.test(dfa.accuracy.jk) #better than random

The last of the multivariate code will be principle components analysis (PCA) next week!

## Tuesday, April 14, 2015

### Beginning R: selected multivariate statistics, part 1 of 3 (CCA)

Here's some more code from the University of Manitoba class in quantitative methods that I'm co-teaching with my post-doc mentor, Dr. Nicola Koper. The class is now almost over, but I still have a few more code posts from it. I gave the lecture for this class on multivariate statistics as well.

#Start by setting your workspace and bringing in your data,

#and checking to make sure the variables are imported correctly

#with numbers as numeric, categories as factors, etc.

######################################################################################

setwd("C:\\Users\\YourUserName\\")

#Your directory here.

#Or use the menu...

#Session>Set Working Directory>To Source File Location [or you can choose the directory]

#Import your data. (This has a few more variables but is otherwise the same as in previous posts.)

titmouse<- read.table(file="blog_banding_data_expanded.csv", header=TRUE, sep=",")

str(titmouse) #shows the data types; always do this check.

titmouse$species<-titmouse$indexsum # create a new column with indexsum data copied.

#Create a categorical species variable.

titmouse$species<-sub(0,"Tufted",titmouse$species)

titmouse$species<-sub(6,"Black-crested",titmouse$species)

titmouse[titmouse$species>0&titmouse$species<6,]$species = "hybrid"

titmouse$species<-as.factor(titmouse$species)

#Our strategy for missing data will be to just eliminate NAs.

#so create a new dataframe with all the variables that we'll use from titmouse.

titmouse.na.omit<-data.frame(na.omit(titmouse[,c("wingchordr",

"taillength",

"mass",

"crestlength",

"billlength",

"billdepth",

"billwidth",

"indexsum",

"species",

"state",

"zone")]))

#Over in Environment we see that we have 87 rows of 11 variables.

#We could probably use up to 8 variables in our multivariate analysis.

######################################

#######Checking for assumptions#######

######################################

##Multivariate normality, collinearity, and homoscedasticity

#using scatterplots

#####

#scatterplots and density for univariate normality

pairs(~mass+wingchordr+taillength,

data=titmouse.na.omit)

#another method that's a bit fancier.

library(car)

scatterplotMatrix(~mass+wingchordr+taillength|species, #weird formula here, nothing as "response"/Y, and |indicates grouping variable.

data=titmouse.na.omit,

smoother=FALSE, #default is TRUE, draws a nonparametric smoother.

reg.line=FALSE, #default is TRUE, draws a regression line.

diagonal="density", #puts a density kernel line on the diagonal for each group.

ellipse=TRUE,

levels=c(0.5,0.95), #dataEllipse quartiles

pch=c(2,3,1), #rearrange the default points a bit

col=c(1,1,1)) #changed the colors to all black.

#All are oval-shaped data clouds with no huge skewness and no obvious increases in variance

#Do without groups as well.

scatterplotMatrix(~mass+wingchordr+taillength, #weird formula here, nothing as "response"/Y, and |indicates grouping variable.

data=titmouse.na.omit,

smoother=FALSE, #default is TRUE, draws a nonparametric smoother.

reg.line=FALSE, #default is TRUE, draws a regression line.

diagonal="density", #puts a density kernel line on the diagonal for each variable.

# ellipse=TRUE,

# levels=c(0.5,0.95), #dataEllipse quartiles

pch=c(2,3,1), #rearrange the default points a bit

col=c(1,1,1)) #changed the colors to all black.

#Mahalanbois distances

#####

#Code adapted from various sources (and more information available at those links).

md.tcwm<-mahalanobis(titmouse.na.omit[,c("taillength","crestlength","wingchordr","mass")],

colMeans(titmouse.na.omit[,c("taillength","crestlength","wingchordr","mass")]),

cov(titmouse.na.omit[,c("taillength","crestlength","wingchordr","mass")]))

#pick out the variables we'll be using in some of the later analyses.

#You should do this for each analysis.

plot(density(md.tcwm),

main="Squared Mahalanobis distances")

rug(md.tcwm)

#Next we'll get critical values, which are from the chi-squared distribution.

k<-4 #number of variables

critical<-qchisq(0.999, df = 4) #alpha=0.001 for this test (Tabachnick and Fidell 2007).

chi2<-qchisq(ppoints(length(titmouse.na.omit$taillength)), df = k)

#Generates the theoretical chi-squared values spaced along like the actual points.

#Do a qqplot.

qqplot.data<-qqplot(chi2,

md.tcwm,

main = expression("Q-Q plot of Mahalanobis" * ~D^2 *

" vs. quantiles of" * ~ chi[4]^2), #Expression allows the fancy subscripts and greek letters.

ylab=expression("Mahalanobis " * ~D^2),

xlab=expression("Theoretical " * ~ chi[4]^2 * " quantiles"))

abline(0, 1, col = 'gray')

abline(h=critical, col="black")

text(x=4,

y=critical+1,

cex=0.5,

labels=c("Critical value: outliers above this line"))

md.tcwm.outliers<-md.tcwm>critical #generate true/false column for multivariate outlier status.

titmouse.na.omit$md.tcwm.outliers<-md.tcwm.outliers #add column to dataframe

titmouse.na.omit[titmouse.na.omit$md.tcwm.outliers==TRUE,] #bring up the outliers

hist(titmouse.na.omit$wingchordr) #Hmm, no huge gap between the small wing measurement and the rest of the data. Probably just a runt?

######################################

#Canonical correlation analysis (CCA)#

#with bonus extra exciting thrills: learn more about using functions and lists!#

###########################################################################

#The CCA code is only slightly modified from a previous post on writing functions and using lists,

#but I'm including it again here so you can have an example with a real dataset

#instead of a randomly generated one.

#First test out canonical correlation.

vars<-as.matrix(cbind(titmouse.na.omit[,c("taillength","crestlength")]))

withs<-as.matrix(cbind(titmouse.na.omit[,c("mass","wingchordr")]))

results<-cancor(vars, withs)

results #view the results

#Okay, we get some results.

#But what about when we want to run this with different variables?

#It might be easier, if we're going to need to do the same set of commands

#repeatedly, to make a function. Functions automate a process.

#For example, let's say we always want to add two variables (a and b).

#Every time, we could just do this:

a<-1

b<-2

c<-a+b

#But what about next time when we need a=2 and b=5?

#We'd have to re-set the variables or make new ones.

#It'd be easier to create an adding function.

adding<-function(a,b){ #name the function ('adding')

#function with the variables you can input in parentheses (here a and b)

#followed by a curly bracket.

a+b #This tells you what the function will do with your input variables.

} #Close the curly brackets.

adding(a=2,b=5)

adding(a=3,b=2)

#This is a very simple example, but the concept holds with added complexity.

#Take a process and automate it. Now we'll automate our CCA code from above.

canonical.corr.simple<-function (var, with, dataframe) {

#function name and the variables we can input.

#The next four lines are the same thing we did earlier to get CCA results,

#but with the standardized variable names to match what's in the parentheses after function ().

var.original<-as.matrix(cbind(dataframe[,var]))

with.original<-as.matrix(cbind(dataframe[,with]))

results.cancor<-cancor(var.original,with.original)

results.cancor

}

#Now you can run it using input variables's names.

var.variables<-c("taillength", "crestlength")

with.variables<-c("mass", "wingchordr")

function.results<-canonical.corr.simple(var=var.variables,

with=with.variables,

dataframe=titmouse.na.omit)

function.results

function.results$cor #You can access different pieces of the function results this way.

function.results$cor[1]

#Use this list notation to get very specific pieces of the results.

#This is useful because now you can run the same function

#more than once using different input objects.

#However, you'll notice that if you try to access any objects

#internal to the function, you get an error.

#This is annoying if we want to keep our results,

#especially once we make a more complicated function where we need results

#that are not in the final printout.

var.original

#Error: object 'var.original' not found

#This means that the internal objects haven't been created in our workspace.

#To access them for later analysis, you'll need to return these values as a list.

canonical.corr.listreturn<-function (var, with, dataframe) {

var.original<-as.matrix(cbind(dataframe[,var]))

with.original<-as.matrix(cbind(dataframe[,with]))

results.cancor<-cancor(var.original,with.original)

#Create the list using the list function with your internal function objects.

results.list<-list(results.cancor,

var.original,

with.original)

#Use return to get it out of the function into the environment.

return(results.list)

}

#Test out the function!

results.yeah<-canonical.corr.listreturn(var=var.variables,

with=with.variables,

dataframe=titmouse.na.omit)

#If you do not assign your results to an object,

#it will just print them and you can't access the list.

#Sometimes I forget this and try to ask for "results.list"

#in the function, but that won't work if you

#don't assign your results to an object like I did here.

#So then type out your object name. It is now a list with three main elements.

results.yeah

#Yep. Lots of info. But how to we get individual pieces of the list?

#First it helps to know the structure of the list.

str(results.yeah)

#So, how do we get stuff out of this structure?

results.yeah[1]

#This is the whole first element.

#You can see its structure, too:

str(results.yeah[1])

#It has five elements, and more within each of those.

#To go farther into the list, use the square brackets.

#You need two around each except for the last element.

results.yeah[[1]][1]

#We got $cor! Woohoo! To get only the first $cor, try this:

results.yeah[[1]][[1]][1]

#Now just test out which pieces of the list you want using the data from str().

results.yeah[[1]][2]

#For example, the second element at this level is $xcoef.

#Sometimes I have to experiment with it a little to find what I want, but just remember

#it's got a structure, so you will be able to find what you need.

#Once you've messed around with that a bit to see

#how you can get to the various pieces you want,

#let's move on to making a more complete CCA function to use.

#This one will have spots for transformed and untransformed variables

#in case you need to transform any of your data and

#will output additional statistics to include in your work.

titmouse.na.omit.transforms<-titmouse.na.omit

titmouse.na.omit.transforms$lntaillength<-log(titmouse.na.omit$taillength)

#Remember, R uses the natural log.

#Now we have example data with one transformed variable. (It didn't need it, just an example here.)

#Select the columns you want.

var.variables<-c("lntaillength","crestlength") #note I've selected one transformed variable.

with.variables<-c("mass","wingchordr")

#These are your variables. You are correlating the with and var variate

#for the with and var variables. That is, the two var variables will be combined into a single

#variate. The same thing is done to the with variables (they are combined into a with variate

#that extracts as much variance as possible from the variables).

#Select columns for untransformed variables.

var.variables.untransformed<-c("taillength","crestlength")

with.variables.untransformed<-c("mass","wingchordr")

#note I've selected both untransformed variables.

#You need to do this so that you can get loadings of the variables on the variates

#for interpretation of the variates.

#Select any subsetting. Here I will use data from either place1 or place2

#(all the data), but you can use this to select data from particular sites or groups.

subsetting.all<-c(titmouse.na.omit.transforms$zone=='old'|titmouse.na.omit.transforms$zone=='young')

#That vertical line is "or".

#Make some variable names for when we get to graphing just to keep track.

graphing.variable.names<-c("var-variate= size proxies", "with-variate= feathers!")

#Make sure you have downloaded and installed the packages 'yacca' and 'CCP'

#if you don't already have them.

canonical.corr<- function (var,

with,

var.untransformed,

with.untransformed,

variablenames,

dataframe,

subsetting) {

var.original<-as.matrix(cbind(dataframe[subsetting,var])) #subsetting selects rows

with.original<-as.matrix(cbind(dataframe[subsetting,with]))

results.cancor<-cancor(var.original,with.original)

rho<-cancor(var.original,with.original)$cor

N<-dim(var.original)[1]

p<-dim(var.original)[2]

q<-dim(with.original)[2]

#These four lines are needed for the below two, to get a Wilks' lambda.

library(CCP)

cancorresults<-p.asym(rho,N,p,q,tstat="Wilks")

library(yacca)

results.yacca<-cca(var.original,with.original)

#easiest version of cancor! gives correlations.

#Use CCP library for additional test stats if needed, because cca() will only give Rao's

#(related to and same value as Wilks).

#To see structural correlations (loadings) for how X and Y variables change with variates,

#get redundancy. Interpret loadings that are |r| equal to or greater than 0.33.

#If input variables are transformed, then need to do a Pearson's correlation on original variable

#and appropriate canonical variate.

var_expl<-cbind('x.redundancy'=results.yacca$xvrd, 'y.redundancy'=results.yacca$yvrd)

#Canonical redundancies for x variables

#(i.e., total fraction of x variance accounted for by y variables, through each canonical variate)

#yvrd = Canonical redundancies for y variables (i.e., total fraction of y variance accounted for by x variables, through each canonical variate).

#y is with, so y.redundancy is amount of with variance accounted for by var variance.

#x is var, so x.redundancy is amount of var variance accounted for by with variance.

var_expl #prints

cancor.variates<-cbind('canonical.correlation'=results.yacca$corr,

#report this

'overlappingvariance.corrsq'=results.yacca$corrsq,

#canonical correlation with THIS % overlapping variance

'variance.in.x.from.x.cc'=results.yacca$xcanvad,

#amount of x variate accounted for by x variables

'variance.in.y.from.y.cc'=results.yacca$ycanvad

# amount of y variate accounted for by y variables

)

CC1x<-as.matrix(results.yacca$canvarx)

CC1x

CC1y<-as.matrix(results.yacca$canvary)

with.loadings<-cor(CC1y,dataframe[subsetting,with.untransformed],method="pearson")

# this gives feather length (y, with) loadings for untransformed data.

#Used in interpreting graph because of transformation.

var.loadings<-cor(CC1x,dataframe[subsetting,var.untransformed],method="pearson")

# this gives size proxy (x, var) loadings for untransformed data.

#is same as loadings in the original redundancy analysis because there was no transformation.

Yplot<-data.frame(CC1x[,1])

Xplot<-data.frame(CC1y[,1])

graphing<-cbind(Xplot, Yplot) #This gives you data you can plot.

colnames(graphing)<-variablenames #This gives you names of your variables.

results.list<-list(cancorresults,

var_expl,

var.loadings,

with.loadings,

graphing,

cancor.variates)

return(results.list) #Return gets this out of your function and creates the list.

}

#Now use that complicated function to put in your data.

long.results<-canonical.corr(var=var.variables,

with=with.variables,

var.untransformed=var.variables.untransformed,

with.untransformed=with.variables.untransformed,

variablenames=graphing.variable.names,

dataframe=titmouse.na.omit.transforms,

subsetting=subsetting.all)

long.results #look at our results here. You can see the range of results we extracted.

str(long.results) #and the structure.

#First, is it significant?

long.results[[1]]$p.value[1] #This gets the first variate. Yup.

long.results[[1]]$p.value[2] #gets the second. Nope.

long.results[[1]]$p.value #gets all the variates' p-values.

long.results[[1]] #shows you all the degrees of freedom and Wilks' lambda test statistic that you need to report.

long.results[[2]] #shows the redundancy analysis. How much of X is explained by y?

#This is what I commented in the function, but I'll repeat here:

#total fraction of y or x variance accounted for by x or y variables,

#through each canonical variate).

#y is with, so y.redundancy is amount of with variance accounted for by var variance.

#x is var, so x.redundancy is amount of var variance accounted for by with variance.

#Loadings for each variate. Interpret loadings higher than |0.33| for significant variates.

long.results[[3]] #Loadings for var variables on each canonical variate (CV1 and CV2 in this case)

long.results[[4]] #Loadings for with variables on each canonical variate (CV1 and CV2 in this case)

#How about graphing? Here's a very simple plot. It uses the variable names we made earlier

#and plots CCI of each variate.

plot(long.results[[5]],

pch=21, bg="black")

#It's better to plot with labels describing what each axis says (use the loadings),

#so modify your x- and y-axis labels.

plot(long.results[[5]],

xlab="Wing chord (-0.99) and mass (-0.42) decrease",

ylab="Tail length (-0.98) decreases")

long.results[[6]] #Finally some more interpretative stats.

#The first two are the canonical correlation

#and overlapping variance (correlation squared=the eigenvalue)).

#The last two are the variance explained by the variables within its own variate.

#variance.in.x.from.x.cc is how much of the "var" variables (x) is explained by its variate.

#Same for y ("with") variance.in.y.from.y.cc.

#Only CV1 was significant from the Wilks' lambda test so we do not interpret CV2.

#Body size and feather length variates were significantly correlated

#(Wilks' lambda=0.45, n=87, p<0.001; Canonical correlation=0.74, 55.3% overlapping variance).

#Loadings show that tail length (-0.98) decreases

#as wing chord (-0.99) and mass (-0.42) decrease.

#(This seems a counterintuitive way to label the axes but it's an arbitrary orientation.)

#The crest length loading is below the |0.33| cutoff at 0.28 so we do not interpret it.

#Wing chord and mass together explain 32.2% of the feather length variation;

#Feather length variation explains 28.5% of body size variation

#(but you'd assume that's probably not a causal relationship so it might not be appropriate to report).

#Wing chord and mass explain 51.6% of the variance in the body size variate,

#and tail length and crest length explain 58.2% of the variance in the feather length variate.

#Use this last function (canonical.corr) one to get the complete analysis. The previous functions

#were to demonstrate how to develop a function.

#If you decide you need additional statistics for your CCA,

#you can read the help files for the CCA functions within it

#if you need to extract additional statistics for your particular analysis;

#just write them into the function and add them to the results list that is returned.

#That's all for CCA; next time we'll do discriminant function analysis (DFA) and multivariate ANOVA (MANOVA).

#Start by setting your workspace and bringing in your data,

#and checking to make sure the variables are imported correctly

#with numbers as numeric, categories as factors, etc.

######################################################################################

setwd("C:\\Users\\YourUserName\\")

#Your directory here.

#Or use the menu...

#Session>Set Working Directory>To Source File Location [or you can choose the directory]

#Import your data. (This has a few more variables but is otherwise the same as in previous posts.)

titmouse<- read.table(file="blog_banding_data_expanded.csv", header=TRUE, sep=",")

str(titmouse) #shows the data types; always do this check.

titmouse$species<-titmouse$indexsum # create a new column with indexsum data copied.

#Create a categorical species variable.

titmouse$species<-sub(0,"Tufted",titmouse$species)

titmouse$species<-sub(6,"Black-crested",titmouse$species)

titmouse[titmouse$species>0&titmouse$species<6,]$species = "hybrid"

titmouse$species<-as.factor(titmouse$species)

#Our strategy for missing data will be to just eliminate NAs.

#so create a new dataframe with all the variables that we'll use from titmouse.

titmouse.na.omit<-data.frame(na.omit(titmouse[,c("wingchordr",

"taillength",

"mass",

"crestlength",

"billlength",

"billdepth",

"billwidth",

"indexsum",

"species",

"state",

"zone")]))

#Over in Environment we see that we have 87 rows of 11 variables.

#We could probably use up to 8 variables in our multivariate analysis.

######################################

#######Checking for assumptions#######

######################################

##Multivariate normality, collinearity, and homoscedasticity

#using scatterplots

#####

#scatterplots and density for univariate normality

pairs(~mass+wingchordr+taillength,

data=titmouse.na.omit)

#another method that's a bit fancier.

library(car)

scatterplotMatrix(~mass+wingchordr+taillength|species, #weird formula here, nothing as "response"/Y, and |indicates grouping variable.

data=titmouse.na.omit,

smoother=FALSE, #default is TRUE, draws a nonparametric smoother.

reg.line=FALSE, #default is TRUE, draws a regression line.

diagonal="density", #puts a density kernel line on the diagonal for each group.

ellipse=TRUE,

levels=c(0.5,0.95), #dataEllipse quartiles

pch=c(2,3,1), #rearrange the default points a bit

col=c(1,1,1)) #changed the colors to all black.

#All are oval-shaped data clouds with no huge skewness and no obvious increases in variance

#Do without groups as well.

scatterplotMatrix(~mass+wingchordr+taillength, #weird formula here, nothing as "response"/Y, and |indicates grouping variable.

data=titmouse.na.omit,

smoother=FALSE, #default is TRUE, draws a nonparametric smoother.

reg.line=FALSE, #default is TRUE, draws a regression line.

diagonal="density", #puts a density kernel line on the diagonal for each variable.

# ellipse=TRUE,

# levels=c(0.5,0.95), #dataEllipse quartiles

pch=c(2,3,1), #rearrange the default points a bit

col=c(1,1,1)) #changed the colors to all black.

#Mahalanbois distances

#####

#Code adapted from various sources (and more information available at those links).

md.tcwm<-mahalanobis(titmouse.na.omit[,c("taillength","crestlength","wingchordr","mass")],

colMeans(titmouse.na.omit[,c("taillength","crestlength","wingchordr","mass")]),

cov(titmouse.na.omit[,c("taillength","crestlength","wingchordr","mass")]))

#pick out the variables we'll be using in some of the later analyses.

#You should do this for each analysis.

plot(density(md.tcwm),

main="Squared Mahalanobis distances")

rug(md.tcwm)

#Next we'll get critical values, which are from the chi-squared distribution.

k<-4 #number of variables

critical<-qchisq(0.999, df = 4) #alpha=0.001 for this test (Tabachnick and Fidell 2007).

chi2<-qchisq(ppoints(length(titmouse.na.omit$taillength)), df = k)

#Generates the theoretical chi-squared values spaced along like the actual points.

#Do a qqplot.

qqplot.data<-qqplot(chi2,

md.tcwm,

main = expression("Q-Q plot of Mahalanobis" * ~D^2 *

" vs. quantiles of" * ~ chi[4]^2), #Expression allows the fancy subscripts and greek letters.

ylab=expression("Mahalanobis " * ~D^2),

xlab=expression("Theoretical " * ~ chi[4]^2 * " quantiles"))

abline(0, 1, col = 'gray')

abline(h=critical, col="black")

text(x=4,

y=critical+1,

cex=0.5,

labels=c("Critical value: outliers above this line"))

md.tcwm.outliers<-md.tcwm>critical #generate true/false column for multivariate outlier status.

titmouse.na.omit$md.tcwm.outliers<-md.tcwm.outliers #add column to dataframe

titmouse.na.omit[titmouse.na.omit$md.tcwm.outliers==TRUE,] #bring up the outliers

hist(titmouse.na.omit$wingchordr) #Hmm, no huge gap between the small wing measurement and the rest of the data. Probably just a runt?

######################################

#Canonical correlation analysis (CCA)#

#with bonus extra exciting thrills: learn more about using functions and lists!#

###########################################################################

#The CCA code is only slightly modified from a previous post on writing functions and using lists,

#but I'm including it again here so you can have an example with a real dataset

#instead of a randomly generated one.

#First test out canonical correlation.

vars<-as.matrix(cbind(titmouse.na.omit[,c("taillength","crestlength")]))

withs<-as.matrix(cbind(titmouse.na.omit[,c("mass","wingchordr")]))

results<-cancor(vars, withs)

results #view the results

#Okay, we get some results.

#But what about when we want to run this with different variables?

#It might be easier, if we're going to need to do the same set of commands

#repeatedly, to make a function. Functions automate a process.

#For example, let's say we always want to add two variables (a and b).

#Every time, we could just do this:

a<-1

b<-2

c<-a+b

#But what about next time when we need a=2 and b=5?

#We'd have to re-set the variables or make new ones.

#It'd be easier to create an adding function.

adding<-function(a,b){ #name the function ('adding')

#function with the variables you can input in parentheses (here a and b)

#followed by a curly bracket.

a+b #This tells you what the function will do with your input variables.

} #Close the curly brackets.

adding(a=2,b=5)

adding(a=3,b=2)

#This is a very simple example, but the concept holds with added complexity.

#Take a process and automate it. Now we'll automate our CCA code from above.

canonical.corr.simple<-function (var, with, dataframe) {

#function name and the variables we can input.

#The next four lines are the same thing we did earlier to get CCA results,

#but with the standardized variable names to match what's in the parentheses after function ().

var.original<-as.matrix(cbind(dataframe[,var]))

with.original<-as.matrix(cbind(dataframe[,with]))

results.cancor<-cancor(var.original,with.original)

results.cancor

}

#Now you can run it using input variables's names.

var.variables<-c("taillength", "crestlength")

with.variables<-c("mass", "wingchordr")

function.results<-canonical.corr.simple(var=var.variables,

with=with.variables,

dataframe=titmouse.na.omit)

function.results

function.results$cor #You can access different pieces of the function results this way.

function.results$cor[1]

#Use this list notation to get very specific pieces of the results.

#This is useful because now you can run the same function

#more than once using different input objects.

#However, you'll notice that if you try to access any objects

#internal to the function, you get an error.

#This is annoying if we want to keep our results,

#especially once we make a more complicated function where we need results

#that are not in the final printout.

var.original

#Error: object 'var.original' not found

#This means that the internal objects haven't been created in our workspace.

#To access them for later analysis, you'll need to return these values as a list.

canonical.corr.listreturn<-function (var, with, dataframe) {

var.original<-as.matrix(cbind(dataframe[,var]))

with.original<-as.matrix(cbind(dataframe[,with]))

results.cancor<-cancor(var.original,with.original)

#Create the list using the list function with your internal function objects.

results.list<-list(results.cancor,

var.original,

with.original)

#Use return to get it out of the function into the environment.

return(results.list)

}

#Test out the function!

results.yeah<-canonical.corr.listreturn(var=var.variables,

with=with.variables,

dataframe=titmouse.na.omit)

#If you do not assign your results to an object,

#it will just print them and you can't access the list.

#Sometimes I forget this and try to ask for "results.list"

#in the function, but that won't work if you

#don't assign your results to an object like I did here.

#So then type out your object name. It is now a list with three main elements.

results.yeah

#Yep. Lots of info. But how to we get individual pieces of the list?

#First it helps to know the structure of the list.

str(results.yeah)

#So, how do we get stuff out of this structure?

results.yeah[1]

#This is the whole first element.

#You can see its structure, too:

str(results.yeah[1])

#It has five elements, and more within each of those.

#To go farther into the list, use the square brackets.

#You need two around each except for the last element.

results.yeah[[1]][1]

#We got $cor! Woohoo! To get only the first $cor, try this:

results.yeah[[1]][[1]][1]

#Now just test out which pieces of the list you want using the data from str().

results.yeah[[1]][2]

#For example, the second element at this level is $xcoef.

#Sometimes I have to experiment with it a little to find what I want, but just remember

#it's got a structure, so you will be able to find what you need.

#Once you've messed around with that a bit to see

#how you can get to the various pieces you want,

#let's move on to making a more complete CCA function to use.

#This one will have spots for transformed and untransformed variables

#in case you need to transform any of your data and

#will output additional statistics to include in your work.

titmouse.na.omit.transforms<-titmouse.na.omit

titmouse.na.omit.transforms$lntaillength<-log(titmouse.na.omit$taillength)

#Remember, R uses the natural log.

#Now we have example data with one transformed variable. (It didn't need it, just an example here.)

#Select the columns you want.

var.variables<-c("lntaillength","crestlength") #note I've selected one transformed variable.

with.variables<-c("mass","wingchordr")

#These are your variables. You are correlating the with and var variate

#for the with and var variables. That is, the two var variables will be combined into a single

#variate. The same thing is done to the with variables (they are combined into a with variate

#that extracts as much variance as possible from the variables).

#Select columns for untransformed variables.

var.variables.untransformed<-c("taillength","crestlength")

with.variables.untransformed<-c("mass","wingchordr")

#note I've selected both untransformed variables.

#You need to do this so that you can get loadings of the variables on the variates

#for interpretation of the variates.

#Select any subsetting. Here I will use data from either place1 or place2

#(all the data), but you can use this to select data from particular sites or groups.

subsetting.all<-c(titmouse.na.omit.transforms$zone=='old'|titmouse.na.omit.transforms$zone=='young')

#That vertical line is "or".

#Make some variable names for when we get to graphing just to keep track.

graphing.variable.names<-c("var-variate= size proxies", "with-variate= feathers!")

#Make sure you have downloaded and installed the packages 'yacca' and 'CCP'

#if you don't already have them.

canonical.corr<- function (var,

with,

var.untransformed,

with.untransformed,

variablenames,

dataframe,

subsetting) {

var.original<-as.matrix(cbind(dataframe[subsetting,var])) #subsetting selects rows

with.original<-as.matrix(cbind(dataframe[subsetting,with]))

results.cancor<-cancor(var.original,with.original)

rho<-cancor(var.original,with.original)$cor

N<-dim(var.original)[1]

p<-dim(var.original)[2]

q<-dim(with.original)[2]

#These four lines are needed for the below two, to get a Wilks' lambda.

library(CCP)

cancorresults<-p.asym(rho,N,p,q,tstat="Wilks")

library(yacca)

results.yacca<-cca(var.original,with.original)

#easiest version of cancor! gives correlations.

#Use CCP library for additional test stats if needed, because cca() will only give Rao's

#(related to and same value as Wilks).

#To see structural correlations (loadings) for how X and Y variables change with variates,

#get redundancy. Interpret loadings that are |r| equal to or greater than 0.33.

#If input variables are transformed, then need to do a Pearson's correlation on original variable

#and appropriate canonical variate.

var_expl<-cbind('x.redundancy'=results.yacca$xvrd, 'y.redundancy'=results.yacca$yvrd)

#Canonical redundancies for x variables

#(i.e., total fraction of x variance accounted for by y variables, through each canonical variate)

#yvrd = Canonical redundancies for y variables (i.e., total fraction of y variance accounted for by x variables, through each canonical variate).

#y is with, so y.redundancy is amount of with variance accounted for by var variance.

#x is var, so x.redundancy is amount of var variance accounted for by with variance.

var_expl #prints

cancor.variates<-cbind('canonical.correlation'=results.yacca$corr,

#report this

'overlappingvariance.corrsq'=results.yacca$corrsq,

#canonical correlation with THIS % overlapping variance

'variance.in.x.from.x.cc'=results.yacca$xcanvad,

#amount of x variate accounted for by x variables

'variance.in.y.from.y.cc'=results.yacca$ycanvad

# amount of y variate accounted for by y variables

)

CC1x<-as.matrix(results.yacca$canvarx)

CC1x

CC1y<-as.matrix(results.yacca$canvary)

with.loadings<-cor(CC1y,dataframe[subsetting,with.untransformed],method="pearson")

# this gives feather length (y, with) loadings for untransformed data.

#Used in interpreting graph because of transformation.

var.loadings<-cor(CC1x,dataframe[subsetting,var.untransformed],method="pearson")

# this gives size proxy (x, var) loadings for untransformed data.

#is same as loadings in the original redundancy analysis because there was no transformation.

Yplot<-data.frame(CC1x[,1])

Xplot<-data.frame(CC1y[,1])

graphing<-cbind(Xplot, Yplot) #This gives you data you can plot.

colnames(graphing)<-variablenames #This gives you names of your variables.

results.list<-list(cancorresults,

var_expl,

var.loadings,

with.loadings,

graphing,

cancor.variates)

return(results.list) #Return gets this out of your function and creates the list.

}

#Now use that complicated function to put in your data.

long.results<-canonical.corr(var=var.variables,

with=with.variables,

var.untransformed=var.variables.untransformed,

with.untransformed=with.variables.untransformed,

variablenames=graphing.variable.names,

dataframe=titmouse.na.omit.transforms,

subsetting=subsetting.all)

long.results #look at our results here. You can see the range of results we extracted.

str(long.results) #and the structure.

#First, is it significant?

long.results[[1]]$p.value[1] #This gets the first variate. Yup.

long.results[[1]]$p.value[2] #gets the second. Nope.

long.results[[1]]$p.value #gets all the variates' p-values.

long.results[[1]] #shows you all the degrees of freedom and Wilks' lambda test statistic that you need to report.

long.results[[2]] #shows the redundancy analysis. How much of X is explained by y?

#This is what I commented in the function, but I'll repeat here:

#total fraction of y or x variance accounted for by x or y variables,

#through each canonical variate).

#y is with, so y.redundancy is amount of with variance accounted for by var variance.

#x is var, so x.redundancy is amount of var variance accounted for by with variance.

#Loadings for each variate. Interpret loadings higher than |0.33| for significant variates.

long.results[[3]] #Loadings for var variables on each canonical variate (CV1 and CV2 in this case)

long.results[[4]] #Loadings for with variables on each canonical variate (CV1 and CV2 in this case)

#How about graphing? Here's a very simple plot. It uses the variable names we made earlier

#and plots CCI of each variate.

plot(long.results[[5]],

pch=21, bg="black")

#It's better to plot with labels describing what each axis says (use the loadings),

#so modify your x- and y-axis labels.

plot(long.results[[5]],

xlab="Wing chord (-0.99) and mass (-0.42) decrease",

ylab="Tail length (-0.98) decreases")

long.results[[6]] #Finally some more interpretative stats.

#The first two are the canonical correlation

#and overlapping variance (correlation squared=the eigenvalue)).

#The last two are the variance explained by the variables within its own variate.

#variance.in.x.from.x.cc is how much of the "var" variables (x) is explained by its variate.

#Same for y ("with") variance.in.y.from.y.cc.

#Only CV1 was significant from the Wilks' lambda test so we do not interpret CV2.

#Body size and feather length variates were significantly correlated

#(Wilks' lambda=0.45, n=87, p<0.001; Canonical correlation=0.74, 55.3% overlapping variance).

#Loadings show that tail length (-0.98) decreases

#as wing chord (-0.99) and mass (-0.42) decrease.

#(This seems a counterintuitive way to label the axes but it's an arbitrary orientation.)

#The crest length loading is below the |0.33| cutoff at 0.28 so we do not interpret it.

#Wing chord and mass together explain 32.2% of the feather length variation;

#Feather length variation explains 28.5% of body size variation

#(but you'd assume that's probably not a causal relationship so it might not be appropriate to report).

#Wing chord and mass explain 51.6% of the variance in the body size variate,

#and tail length and crest length explain 58.2% of the variance in the feather length variate.

#Use this last function (canonical.corr) one to get the complete analysis. The previous functions

#were to demonstrate how to develop a function.

#If you decide you need additional statistics for your CCA,

#you can read the help files for the CCA functions within it

#if you need to extract additional statistics for your particular analysis;

#just write them into the function and add them to the results list that is returned.

#That's all for CCA; next time we'll do discriminant function analysis (DFA) and multivariate ANOVA (MANOVA).

Subscribe to:
Posts (Atom)