add fetchMSAmotif() function

This commit is contained in:
hyginn 2017-11-01 09:43:32 -04:00
parent d3c42da51b
commit 7cc2853d00
2 changed files with 57 additions and 6 deletions

View File

@ -133,6 +133,50 @@ waitTimer <- function(t, nIntervals = 50) {
}
fetchMSAmotif <- function(ali, mot) {
# retrieve a subset from ali that spans the sequence in mot.
# Parameters:
# ali MsaAAMultipleAlignment object
# mot chr substring within ali
# Value: AAStringset
if (class(ali) != "MsaAAMultipleAlignment" &&
class(ali) != "MsaDNAMultipleAlignment" &&
class(ali) != "MsaRNAMultipleAlignment") {
stop("ali has to be an msa multiple alignment object.")
}
if (class(mot) != "character") {
stop("mot has to be a character object.")
}
x <- gsub("-", "", as.character(ali)) # pure sequence, no hyphens
idx <- grep(mot, x)[1] # first sequence containing mot. If no match,
# idx becomes NA
if (is.na(idx)) {
stop("mot is not a subsequence in ali.")
}
# Find the match range
m <- regexpr(mot, x[idx])
motifStart <- as.numeric(m)
motifEnd <- attr(m, "match.length") + motifStart - 1
# Count characters, skip hyphens ...
x <- unlist(strsplit(as.character(ali)[idx], ""))
x <- x != "-"
x <- as.numeric(x)
x <- cumsum(x)
return(subseq(ali@unmasked,
start = which(x == motifStart)[1], # get the first position
end = which(x == motifEnd)[1]))
}
# ====== PDB ID selection ======================================================
selectPDBrep <- function(n) {

View File

@ -3,12 +3,13 @@
# Purpose: A Bioinformatics Course:
# R code accompanying the BIN-ALI-MSA unit.
#
# Version: 1.0
# Version: 1.1
#
# Date: 2017 10 23
# Date: 2017 10
# Author: Boris Steipe (boris.steipe@utoronto.ca)
#
# Versions:
# 1.1 Added fetchMSAmotif()
# 1.0 Fully refactored and rewritten for 2017
# 0.1 First code copied from 2016 material.
#
@ -53,7 +54,7 @@
# You need to reload you protein database, including changes that might
# have been made to the reference files. If you have worked with the
# prerequiste units, you should have a script named "makeProteinDB.R"
# that will create the myDB object with aprotein and feature database.
# that will create the myDB object with a protein and feature database.
# Ask for advice if not.
source("makeProteinDB.R")
@ -587,6 +588,12 @@ x <- cumsum(x)
(motifAli <- subseq(msaM@unmasked, start = aliStart, end = aliEnd))
# Packaging this into a function is convenient to have, therefore I have added
# such a function to the .utilities.R script: fetchMSAmotif(). Try it:
wing <- "HEKVQGGFGKYQGTWV" # the MBP1_SACCE APSES "wing" sequence
writeALN(fetchMSAmotif(msaM, wing))
# == 6.2 Plot a Sequence Logo ==============================================