maintenance, update file locations, minor bugs

This commit is contained in:
hyginn 2020-09-26 01:07:49 +10:00
parent 092494d9de
commit 866a1c0e8f

View File

@ -1,20 +1,15 @@
# tocID <- "BIN-ALI-MSA.R" # tocID <- "BIN-ALI-MSA.R"
# #
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
#
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the BIN-ALI-MSA unit. # R code accompanying the BIN-ALI-MSA unit.
# #
# Version: 1.2 # Version: 1.3
# #
# Date: 2017 10 - 2019 01 # Date: 2017-10 - 2020-09
# Author: Boris Steipe (boris.steipe@utoronto.ca) # Author: Boris Steipe (boris.steipe@utoronto.ca)
# #
# Versions: # Versions:
# 1.3 2020 updates
# 1.2 Change from require() to requireNamespace(), # 1.2 Change from require() to requireNamespace(),
# use <package>::<function>() idiom throughout # use <package>::<function>() idiom throughout
# 1.1 Added fetchMSAmotif() # 1.1 Added fetchMSAmotif()
@ -35,25 +30,25 @@
#TOC> ========================================================================== #TOC> ==========================================================================
#TOC> #TOC>
#TOC> Section Title Line #TOC> Section Title Line
#TOC> ------------------------------------------------------------------ #TOC> ------------------------------------------------------------------
#TOC> 1 Preparations 54 #TOC> 1 Preparations 55
#TOC> 2 Aligning full length MBP1 proteins 96 #TOC> 2 Aligning full length MBP1 proteins 97
#TOC> 2.1 Preparing Sequences 107 #TOC> 2.1 Preparing Sequences 108
#TOC> 2.2 Compute the MSA 132 #TOC> 2.2 Compute the MSA 133
#TOC> 3 Analyzing an MSA 153 #TOC> 3 Analyzing an MSA 154
#TOC> 4 Comparing MSAs 224 #TOC> 4 Comparing MSAs 225
#TOC> 4.1 Importing an alignment to msa 233 #TOC> 4.1 Importing an alignment to msa 234
#TOC> 4.1.1 importing an .aln file 242 #TOC> 4.1.1 importing an .aln file 243
#TOC> 4.1.2 Creating an MsaAAMultipleAlignment object 273 #TOC> 4.1.2 Creating an MsaAAMultipleAlignment object 274
#TOC> 4.2 More alignments 324 #TOC> 4.2 More alignments 325
#TOC> 4.3 Computing comparison metrics 336 #TOC> 4.3 Computing comparison metrics 337
#TOC> 5 Profile-Profile alignments 473 #TOC> 5 Profile-Profile alignments 475
#TOC> 6 Sequence Logos 546 #TOC> 6 Sequence Logos 548
#TOC> 6.1 Subsetting an alignment by motif 555 #TOC> 6.1 Subsetting an alignment by motif 557
#TOC> 6.2 Plot a Sequence Logo 604 #TOC> 6.2 Plot a Sequence Logo 606
#TOC> #TOC>
#TOC> ========================================================================== #TOC> ==========================================================================
@ -211,9 +206,9 @@ for (i in seq_along(highScoringRanges$lengths)) {
# threshold (experiment with that a bit), but once we are satisfied, we can use # threshold (experiment with that a bit), but once we are satisfied, we can use
# the boundaries to print the MSA alignments separately for domains. # the boundaries to print the MSA alignments separately for domains.
# Unfortunately the msa package provides no native way to extract blocks of the # Unfortunately the msa package provides no native way to extract blocks of the
# alignment for further processing, but your .utilities file loads a function to # alignment for further processing, but your .utilities.R script contains a
# write alignment objects and sequence sets to .aln formatted output. Have a # function to write alignment objects and sequence sets to .aln formatted
# look: # output. Have a look:
writeALN writeALN
@ -245,7 +240,7 @@ for (i in seq_along(highScoringRanges$lengths)) {
# - adjust the sequence names # - adjust the sequence names
# - convert to msaAAMultipleAlignment object # - convert to msaAAMultipleAlignment object
# === 4.1.1 importing an .aln file # === 4.1.1 importing an .aln file
# The seqinr package has a function to read CLUSTAL W formatted .aln files ... # The seqinr package has a function to read CLUSTAL W formatted .aln files ...
if (! requireNamespace("seqinr", quietly=TRUE)) { if (! requireNamespace("seqinr", quietly=TRUE)) {
@ -391,6 +386,7 @@ boxplot(list(CLUSTAL.W = msaWScores,
CLUSTAL.O = msaOScores, CLUSTAL.O = msaOScores,
T.COFFEE = msaTScores, T.COFFEE = msaTScores,
MUSCLE = msaMScores), MUSCLE = msaMScores),
cex.axis = 0.8,
col = c("#7D556488", "#74628F88", "#5E78A188", "#3DAEB288")) col = c("#7D556488", "#74628F88", "#5E78A188", "#3DAEB288"))
# CLUSTAL W and CLUSTAL O don't look all that different. T-Coffee tends to have # CLUSTAL W and CLUSTAL O don't look all that different. T-Coffee tends to have
@ -503,7 +499,7 @@ DECIPHER::AlignProfiles(msaW@unmasked, msaM@unmasked)
m <- as.character(msaM) m <- as.character(msaM)
w <- as.character(msaW)[names(m)] w <- as.character(msaW)[names(m)]
(ppa <- DECIPHER::AlignProfiles(msa::AAStringSet(w), msa::AAStringSet(m))) (ppa <- DECIPHER::AlignProfiles(AAStringSet(w), AAStringSet(m)))
# Conveniently, AlignProfiles() returns an AAStringSet, so we can use our # Conveniently, AlignProfiles() returns an AAStringSet, so we can use our
# writeALN function to show it. Here is an arbitrary block, from somewhere in # writeALN function to show it. Here is an arbitrary block, from somewhere in
@ -609,12 +605,12 @@ writeALN(fetchMSAmotif(msaM, wing))
# == 6.2 Plot a Sequence Logo ============================================== # == 6.2 Plot a Sequence Logo ==============================================
# There are now several good options to plot sequence logos in R, these include # The Bioconductor seqLogo:: packager handles only DNA sequences, but there are
# dagLogo, DiffLogo, Logolas, and motifStack. For our example we will use # now several good options to plot sequence logos in R, these include dagLogo,
# ggseqlogo written by by Omar Waghi, a former UofT BCB student who is now at # DiffLogo, Logolas, and motifStack. For our example we will use ggseqlogo
# the EBI. # written by by Omar Waghi, a former UofT BCB student who is now at the EBI.
if (! requireNamspace("ggseqlogo", quietly=TRUE)) { if (! requireNamespace("ggseqlogo", quietly=TRUE)) {
install.packages(("ggseqlogo")) install.packages(("ggseqlogo"))
} }
# Package information: # Package information: