# BIN-PHYLO-Tree_analysis.R # # Purpose: A Bioinformatics Course: # R code accompanying the BIN-PHYLO-Tree_analysis unit. # # Version: 1.0 # # Date: 2017 10. 31 # Author: Boris Steipe (boris.steipe@utoronto.ca) # # Versions: # 1.0 First 2017 version # 0.1 First code copied from 2016 material. # # # TODO: # # # == DO NOT SIMPLY source() THIS FILE! ======================================= # # If there are portions you don't understand, use R's help system, Google for an # answer, or ask your instructor. Don't continue if you don't understand what's # going on. That's not how it works ... # # ============================================================================== # = 1 ___Section___ if (!require(Rphylip, quietly=TRUE)) { install.packages("Rphylip") library(Rphylip) } # Package information: # library(help = Rphylip) # basic information # browseVignettes("Rphylip") # available vignettes # data(package = "Rphylip") # available datasets # Read the species tree that you have created at the phyloT Website: fungiTree <- read.tree("fungiTree.txt") plot(fungiTree) # The tree produced by phyloT contains full length species names, but it would # be more convenient if it had bicodes instead. str(fungiTree) # The species names are in a vector $tip.label of this list. We can use bicode() # to shorten them - but note that they have underscores as word separators. Thus # we will use gsub("-", " ", ...) to replace the underscores with spaces. for (i in seq_along(fungiTree$tip.label)) { fungiTree$tip.label[i] <- biCode(gsub("_", " ", fungiTree$tip.label[i])) } # Plot the tree plot(fungiTree, cex=1.0, root.edge=TRUE, no.margin=TRUE) nodelabels(text=orgTree$node.label, cex=0.6, adj=0.2, bg="#D4F2DA") # Note that you can use the arrow buttons in the menu above the plot to scroll # back to plots you have created earlier - so you can reference back to the # species tree. # = 1 Tree Analysis # 1.1 Visualizing your tree # The trees that are produced by Rphylip are stored as an object of class # "phylo". This is a class for phylogenetic trees that is widely used in the # community, practically all R phylogenetics packages will options to read and # manipulate such trees. Outside of R, a popular interchange format is the # Newick_format that you have seen above. It's easy to output your calculated # trees in Newick format and visualize them elsewhere. # The "phylo" class object is one of R's "S3" objects and methods to plot and # print it have been defined with the Rphylip package, and the package ape that # Rphylip has loaded. You can simply call plot() and R knows what to # do with and how to plot it. The underlying function is # plot.phylo(), and documentation for its many options can by found by typing: ?plot.phylo # We load the APSES sequence tree that you produced in the # BIN-PHYLO-Tree_building unit: load(file = "APSEStreeRproml.RData") plot(apsTree) # default type is "phylogram" plot(apsTree, type="unrooted") plot(apsTree, type="fan", no.margin = TRUE) # rescale to show all of the labels: # record the current plot parameters by assigning them to a variable ... (tmp <- plot(apsTree, type="fan", no.margin = TRUE, plot=FALSE)) # ... and adjust the plot limits for a new plot: plot(apsTree, type="fan", x.lim = tmp$x.lim * 1.8, y.lim = tmp$y.lim * 1.8, cex = 0.8, no.margin = TRUE) # Inspect the tree object str(apsTree) apsTree$tip.label apsTree$edge apsTree$edge.length # show the node / edge and tip labels on a plot plot(apsTree) nodelabels() edgelabels() tiplabels() # show the number of nodes, edges and tips Nnode(apsTree) Nedge(apsTree) Ntip(apsTree) # Finally, write the tree to console in Newick format write.tree(apsTree) # = 1.1 Rooting Trees # In order to analyse the tree, it is helpful to root it first and reorder its # clades. Contrary to documentation, Rproml() returns an unrooted tree. is.rooted(apsTree) # You can root the tree with the command root() from the "ape" package. ape is # automatically installed and loaded with Rphylip. plot(apsTree) # add labels for internal nodes and tips nodelabels(cex=0.5, frame="circle") tiplabels(cex=0.5, frame="rect") # The outgroup of the tree is tip "11" in my sample tree, it may be a different # number in yours. Substitute the correct node number below for "outgroup". apsTree <- root(apsTree, outgroup = 11, resolve.root = TRUE) plot(apsTree) is.rooted(apsTree) # This tree _looks_ unchanged, beacuse when the root trifurcation was resolved, # an edge of length zero was added to connect the MRCA (Most Recent Common # Ancestor) of the ingroup. # The edge lengths are stored in the phylo object: apsTree$edge.length # ... and you can assign a small arbitrary value to the edge # to show how it connects to the tree without having an # overlap. apsTree$edge.length[1] <- 0.1 plot(apsTree, cex=0.7) nodelabels(text="MRCA", node=12, cex=0.5, adj=0.1, bg="#ff8866") # This procedure does however not assign an actual length to a root edge, and # therefore no root edge is visible on the plot. Why? , you might ask. I ask # myself that too. We'll just add a length by hand. apsTree$root.edge <- mean(apsTree$edge.length) * 1.5 plot(apsTree, cex=0.7, root.edge=TRUE) nodelabels(text="MRCA", node=12, cex=0.5, adj=0.8, bg="#ff8866") # = 1.1 Rotating Clades # To interpret the tree, it is useful to rotate the clades so that they appear # in the order expected from the cladogram of species. # We can either rotate around individual internal nodes ... layout(matrix(1:2, 1, 2)) plot(apsTree, no.margin=TRUE, root.edge=TRUE) nodelabels(node=17, cex=0.7, bg="#ff8866") plot(rotate(apsTree, node=17), no.margin=TRUE, root.edge=TRUE) nodelabels(node=17, cex=0.7, bg="#88ff66") # Note that the species at the bottom of the clade descending from node # 17 is now plotted at the top. layout(matrix(1), widths=1.0, heights=1.0) # ... or we can plot the tree so it corresponds as well as possible to a # predefined tip ordering. Here we use the ordering that phyloT has returned # for the species tree. # (Nb. we need to reverse the ordering for the plot. This is why we use the # expression [nOrg:1] below instead of using the vector directly.) nOrg <- length(apsTree$tip.label) layout(matrix(1:2, 1, 2)) plot(fungiTree, no.margin=TRUE, root.edge=TRUE) nodelabels(text=fungiTree$node.label, cex=0.5, adj=0.2, bg="#D4F2DA") plot(rotateConstr(apsTree, apsTree$tip.label[nOrg:1]), no.margin=TRUE, root.edge=TRUE) add.scale.bar(length=0.5) layout(matrix(1), widths=1.0, heights=1.0) # Study the two trees and consider their similarities and differences. What do # you expect? What do you find? Note that this is not a "mixed" gene tree yet, # since it contains only a single gene for the species we considered. All of the # branch points in this tree are speciation events. # [END]