2017-11-01 13:44:24 +00:00
|
|
|
# BIN-PHYLO-Tree_building.R
|
2017-09-12 20:09:20 +00:00
|
|
|
#
|
|
|
|
# Purpose: A Bioinformatics Course:
|
2017-11-01 13:44:24 +00:00
|
|
|
# R code accompanying the BIN-PHYLO-Tree_building unit.
|
2017-09-12 20:09:20 +00:00
|
|
|
#
|
2017-11-01 13:44:24 +00:00
|
|
|
# Version: 1.0
|
2017-09-12 20:09:20 +00:00
|
|
|
#
|
2017-11-01 13:44:24 +00:00
|
|
|
# Date: 2017 10. 31
|
2017-09-12 20:09:20 +00:00
|
|
|
# Author: Boris Steipe (boris.steipe@utoronto.ca)
|
|
|
|
#
|
|
|
|
# Versions:
|
2017-11-01 13:44:24 +00:00
|
|
|
# 1.0 First 2017 version
|
2017-09-12 20:09:20 +00:00
|
|
|
# 0.1 First code copied from 2016 material.
|
2017-11-01 13:44:24 +00:00
|
|
|
#
|
2017-09-12 20:09:20 +00:00
|
|
|
#
|
|
|
|
# TODO:
|
2017-11-02 03:30:01 +00:00
|
|
|
# Add MrBayes
|
|
|
|
# https://cran.r-project.org/web/packages/phangorn/vignettes/IntertwiningTreesAndNetworks.html
|
2017-09-12 20:09:20 +00:00
|
|
|
#
|
|
|
|
# == DO NOT SIMPLY source() THIS FILE! =======================================
|
2017-11-01 13:44:24 +00:00
|
|
|
#
|
2017-09-12 20:09:20 +00:00
|
|
|
# If there are portions you don't understand, use R's help system, Google for an
|
|
|
|
# answer, or ask your instructor. Don't continue if you don't understand what's
|
|
|
|
# going on. That's not how it works ...
|
2017-11-01 13:44:24 +00:00
|
|
|
#
|
2017-09-12 20:09:20 +00:00
|
|
|
# ==============================================================================
|
|
|
|
|
|
|
|
|
2017-11-02 03:30:01 +00:00
|
|
|
#TOC> ==========================================================================
|
2017-11-23 12:19:31 +00:00
|
|
|
#TOC>
|
2017-11-02 03:30:01 +00:00
|
|
|
#TOC> Section Title Line
|
|
|
|
#TOC> -------------------------------------------------------
|
|
|
|
#TOC> 1 Calculating Trees 43
|
|
|
|
#TOC> 1.1 PROMLPATH ... 64
|
|
|
|
#TOC> 1.1.1 ... on the Mac 69
|
|
|
|
#TOC> 1.1.2 ... on Windows 80
|
|
|
|
#TOC> 1.1.3 ... on Linux 94
|
|
|
|
#TOC> 1.1.4 Confirming PROMLPATH 99
|
2017-11-23 12:19:31 +00:00
|
|
|
#TOC> 1.2 Building a maximum likelihood tree 108
|
|
|
|
#TOC>
|
2017-11-02 03:30:01 +00:00
|
|
|
#TOC> ==========================================================================
|
|
|
|
|
2017-11-01 13:44:24 +00:00
|
|
|
|
2017-11-02 03:30:01 +00:00
|
|
|
# = 1 Calculating Trees ===================================================
|
2017-11-01 13:44:24 +00:00
|
|
|
|
2017-09-12 20:09:20 +00:00
|
|
|
|
|
|
|
# Follow the instructions found at phylip's home on the Web to install. If you
|
|
|
|
# are on a Windows computer, take note of the installation directory.
|
|
|
|
|
|
|
|
# After you have installed Phylip on your computer, install the R package that
|
|
|
|
# provides an interface to the Phylip functions.
|
|
|
|
|
|
|
|
if (!require(Rphylip, quietly=TRUE)) {
|
|
|
|
install.packages("Rphylip")
|
|
|
|
library(Rphylip)
|
|
|
|
}
|
2017-10-29 03:05:53 +00:00
|
|
|
# Package information:
|
|
|
|
# library(help = Rphylip) # basic information
|
|
|
|
# browseVignettes("Rphylip") # available vignettes
|
|
|
|
# data(package = "Rphylip") # available datasets
|
2017-09-12 20:09:20 +00:00
|
|
|
|
|
|
|
# This will install RPhylip, as well as its dependency, the package "ape".
|
|
|
|
|
2017-10-29 03:05:53 +00:00
|
|
|
|
2017-11-02 03:30:01 +00:00
|
|
|
# == 1.1 PROMLPATH ... =====================================================
|
2017-09-12 20:09:20 +00:00
|
|
|
# The next part may be tricky. You will need to figure out where
|
|
|
|
# on your computer Phylip has been installed and define the path
|
|
|
|
# to the proml program that calculates a maximum-likelihood tree.
|
|
|
|
|
2017-11-23 12:19:31 +00:00
|
|
|
# === 1.1.1 ... on the Mac
|
2017-09-12 20:09:20 +00:00
|
|
|
# On the Mac, the standard installation places a phylip folder
|
|
|
|
# in the /Applications directory. That folder contains all the
|
|
|
|
# individual phylip programs as <name>.app files. These are not
|
|
|
|
# the actual executables, but "app" files are actually directories
|
|
|
|
# that contain the required resources for a program to run.
|
|
|
|
|
|
|
|
# The executable is in a subdirectory and you can point Rphylip
|
|
|
|
# directly to that subdirectory to find the program it needs:
|
|
|
|
# PROMLPATH <- "/Applications/phylip-3.695/exe/proml.app/Contents/MacOS"
|
|
|
|
|
2017-11-23 12:19:31 +00:00
|
|
|
# === 1.1.2 ... on Windows
|
2017-09-12 20:09:20 +00:00
|
|
|
# On Windows you need to know where the rograms have been installed, and you
|
|
|
|
# need to specify a path that is correct for the Windows OS. Find the folder
|
|
|
|
# that is named "exe", and right-click to inspect its properties. The path
|
|
|
|
# should be listed among them.
|
|
|
|
|
|
|
|
# If the path looks like "C:\Users\Meng\Programs\phylip-3.695\exe", then your
|
|
|
|
# assignment has to be
|
|
|
|
# PROMLPATH <- "C:/Users/Meng/Programs/phylip-3.695/exe"
|
|
|
|
# (Note: "/", not "\")
|
|
|
|
|
|
|
|
# I have heard that your path must not contain spaces, and it is prudent to
|
|
|
|
# avoid other special characters as well.
|
|
|
|
|
2017-11-23 12:19:31 +00:00
|
|
|
# === 1.1.3 ... on Linux
|
2017-09-12 20:09:20 +00:00
|
|
|
# If you are running Linux I trust you know what to do. It's probably
|
|
|
|
# something like
|
|
|
|
# PROMLPATH <- "/usr/local/phylip-3.695/bin"
|
|
|
|
|
2017-11-23 12:19:31 +00:00
|
|
|
# === 1.1.4 Confirming PROMLPATH
|
2017-09-12 20:09:20 +00:00
|
|
|
# Confirm that the settings are right.
|
|
|
|
PROMLPATH # returns the path
|
|
|
|
list.dirs(PROMLPATH) # returns the directories in that path
|
2017-11-01 13:44:24 +00:00
|
|
|
list.files(PROMLPATH) # lists the files [1] "proml" "proml.command"
|
2017-09-12 20:09:20 +00:00
|
|
|
|
|
|
|
# If "proml" is NOT among the files that the last command returns, you
|
2017-11-01 13:44:24 +00:00
|
|
|
# can't continue. Ask on the mailing list for advice.
|
2017-09-12 20:09:20 +00:00
|
|
|
|
2017-11-23 12:19:31 +00:00
|
|
|
# == 1.2 Building a maximum likelihood tree ================================
|
2017-11-01 13:44:24 +00:00
|
|
|
# Now read the mfa file you have saved in the BIB-PHYLO-Data_preparation unit,
|
|
|
|
# as a "proseq" object with the read.protein() function of the RPhylip package:
|
2017-09-12 20:09:20 +00:00
|
|
|
|
2017-11-01 13:44:24 +00:00
|
|
|
apsIn <- read.protein("APSESphyloSet.mfa")
|
2017-09-12 20:09:20 +00:00
|
|
|
|
|
|
|
# ... and you are ready to build a tree.
|
|
|
|
|
2017-11-02 03:30:01 +00:00
|
|
|
# There are many fast options in PHYLIP - we will use the most _accurate_ one
|
|
|
|
# that it has: proml, a maximum-likelihood tree building program for protein
|
|
|
|
# data.
|
|
|
|
|
2017-09-12 20:09:20 +00:00
|
|
|
# Building maximum-likelihood trees can eat as much computer time
|
|
|
|
# as you can throw at it. Calculating a tree of 48 APSES domains
|
|
|
|
# with default parameters of Rproml() runs for more than half a day
|
|
|
|
# on my computer. But we have only twelve sequences here, so the
|
2017-11-02 03:30:01 +00:00
|
|
|
# process will take us about 5 to 10 minutes. Run this, and anjoy a good cup
|
|
|
|
# of coffee while you are waiting.
|
2017-09-12 20:09:20 +00:00
|
|
|
|
|
|
|
apsTree <- Rproml(apsIn, path=PROMLPATH)
|
|
|
|
|
|
|
|
# A quick first look:
|
|
|
|
|
|
|
|
plot(apsTree)
|
|
|
|
|
2017-11-01 13:44:24 +00:00
|
|
|
# save your tree:
|
|
|
|
save(apsTree, file = "APSEStreeRproml.RData")
|
2017-09-12 20:09:20 +00:00
|
|
|
|
2017-11-01 13:44:24 +00:00
|
|
|
# If this did not work, ask for advice.
|
2017-09-12 20:09:20 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# [END]
|