# = 1 Preparation and Tree Plot ===========================================
if(!requireNamespace("ape",quietly=TRUE)){
install.packages("ape")
}
# Package information:
# library(help = ape) # basic information
# browseVignettes("ape") # available vignettes
# data(package = "ape") # available datasets
# We change the graphics parameters from time to time, let's define the
# default so we can recreate a sane state:
dev.off()
PAR<-par()
# = 2 SPECIES REFERENCE TREE ==============================================
# Before we do any kind of phylogenetic analysis of genes from several species,
# we MUST have a reference tree of the taxonomic relationships in hand. This
# context is absolutely required for the interpretation of our tree.
# We have the tax-ids in our database, and the NCBI has the species tree - we just need some way to extract the subtree that corresponds to our taxons of interest. Here's how to use the taxize:: package.
if(!requireNamespace("taxize",quietly=TRUE)){
install.packages("taxize")
}
# Package information:
# library(help = taxize) # basic information
# browseVignettes("taxize") # available vignettes
# data(package = "taxize") # available datasets
(mySOI<-c(myDB$taxonomy$ID,"83333"))
myClass<-taxize::classification(mySOI,db="ncbi")
str(myClass)
myClass[[1]]
fungiTree<-taxize::class2tree(myClass,check=TRUE)
plot(fungiTree)
# The tree produced by taxize:: contains full length species names,
# but it would be more convenient if it had bicodes instead. Also, the actual
# tree is only part of the list(), which will cause problems later:
str(fungiTree)
# we therefor simplify
fungiTree<-fungiTree$phylo
str(fungiTree)
# The species names are in a vector $phylo$tip.label of this list.
# Note that the species at the bottom of the clade descending from node
# 17 is now plotted at the top.
par(PAR)# reset graphics state
# ... or we can rearrange the tree so it corresponds as well as possible to a
# predefined tip ordering. Here we use the ordering that taxize:: has inferred
# from the NCBI taxonomic classification.
nOrg<-length(apsTree$tip.label)
plot(fungiTree,
no.margin=FALSE,root.edge=TRUE)
ape::nodelabels(text=fungiTree$node.label,
cex=0.5,
adj=0.2,
bg="#D4F2DA")
# These are the fungi tree tips ...
fungiTree$tip.label
# ... and their order is determined by the edge-list that is stored in
fungiTree$edge
# which edges join the tips?
ape::tiplabels(cex=0.5,frame="rect")
# as you can see, the tips (range [1:nOrg] ) are in column 2 and they are
# ordered from bottom to top.
# And each tip number is the index of the species in the tip.label vector. So we can take column 2, subset it, and use it to get a list of species in the order of the tree ...
sel<-fungiTree$edge[,2]<=nOrg
(oSp<-fungiTree$tip.label[fungiTree$edge[sel,2]])
# Now, here are the genes of the apsTree tips ...
apsTree$tip.label
# ... and the "constraint" we need for reordering, according to the help page
# of ape::rotateConstr(), is "a vector specifying the order of the tips as they
# should appear (from bottom to top)". Thus we need to add the "MBP1_" prefix to our vector
oSp<-gsub("^","MBP1_",oSp)
(oSp<-gsub("MBP1_ESSCO","KILA_ESCCO",oSp))
# Then we can plot the two trees to compare: the fungi- tree
par(PAR)# reset graphics state
layout(matrix(1:2,1,2))
plot(fungiTree,
no.margin=TRUE,
root.edge=TRUE)
ape::nodelabels(text=fungiTree$node.label,
cex=0.5,
adj=0.2,
bg="#D4F2DA")
# and the re-organized apsesTree ...
plot(ape::rotateConstr(apsTree,constraint=oSp[]),
no.margin=TRUE,
root.edge=TRUE)
par(PAR)# reset graphics state
# As you can see, the reordering is not perfect, since the topologies are
# different, mostly due to the unresolved nodes in the reference tree. One
# could play with that ...
# Task: Study the two trees and consider their similarities and differences.
# What do you expect? What do you find? Note that this is not a "mixed"
# gene tree yet, since it contains only a single gene for the species
# we considered. All of the branch points in this tree are speciation
# events. Thus the gene tree should have the same topology as the
# species tree. Does it? Are the differences important? How many
# branches would you need to remove and reinsert elsewhere to get the
# same topology as the species tree?
# In order to quantify how different these two trees are, we need to compute
# tree distances.
# == 3.3 Computing tree distances ==========================================
# Many superb phylogeny tools are contributed by the phangorn package.
if(!requireNamespace("phangorn",quietly=TRUE)){
install.packages("phangorn")
}
# Package information:
# library(help = phangorn) # basic information
# browseVignettes("phangorn") # available vignettes
# data(package = "phangorn") # available datasets
# To compare two trees, they must have the same tip labels. We delete "MBP1_" or
# "KILA_" from the existing tip labels in a copy of our APSES domain tree.