add fetchMSAmotif() function
This commit is contained in:
parent
d3c42da51b
commit
7cc2853d00
44
.utilities.R
44
.utilities.R
@ -133,6 +133,50 @@ waitTimer <- function(t, nIntervals = 50) {
|
||||
}
|
||||
|
||||
|
||||
fetchMSAmotif <- function(ali, mot) {
|
||||
# retrieve a subset from ali that spans the sequence in mot.
|
||||
# Parameters:
|
||||
# ali MsaAAMultipleAlignment object
|
||||
# mot chr substring within ali
|
||||
# Value: AAStringset
|
||||
|
||||
if (class(ali) != "MsaAAMultipleAlignment" &&
|
||||
class(ali) != "MsaDNAMultipleAlignment" &&
|
||||
class(ali) != "MsaRNAMultipleAlignment") {
|
||||
stop("ali has to be an msa multiple alignment object.")
|
||||
}
|
||||
|
||||
if (class(mot) != "character") {
|
||||
stop("mot has to be a character object.")
|
||||
}
|
||||
|
||||
x <- gsub("-", "", as.character(ali)) # pure sequence, no hyphens
|
||||
|
||||
idx <- grep(mot, x)[1] # first sequence containing mot. If no match,
|
||||
# idx becomes NA
|
||||
if (is.na(idx)) {
|
||||
stop("mot is not a subsequence in ali.")
|
||||
}
|
||||
|
||||
# Find the match range
|
||||
m <- regexpr(mot, x[idx])
|
||||
motifStart <- as.numeric(m)
|
||||
motifEnd <- attr(m, "match.length") + motifStart - 1
|
||||
|
||||
# Count characters, skip hyphens ...
|
||||
x <- unlist(strsplit(as.character(ali)[idx], ""))
|
||||
x <- x != "-"
|
||||
x <- as.numeric(x)
|
||||
x <- cumsum(x)
|
||||
|
||||
return(subseq(ali@unmasked,
|
||||
start = which(x == motifStart)[1], # get the first position
|
||||
end = which(x == motifEnd)[1]))
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
# ====== PDB ID selection ======================================================
|
||||
|
||||
selectPDBrep <- function(n) {
|
||||
|
@ -3,12 +3,13 @@
|
||||
# Purpose: A Bioinformatics Course:
|
||||
# R code accompanying the BIN-ALI-MSA unit.
|
||||
#
|
||||
# Version: 1.0
|
||||
# Version: 1.1
|
||||
#
|
||||
# Date: 2017 10 23
|
||||
# Date: 2017 10
|
||||
# Author: Boris Steipe (boris.steipe@utoronto.ca)
|
||||
#
|
||||
# Versions:
|
||||
# 1.1 Added fetchMSAmotif()
|
||||
# 1.0 Fully refactored and rewritten for 2017
|
||||
# 0.1 First code copied from 2016 material.
|
||||
#
|
||||
@ -587,6 +588,12 @@ x <- cumsum(x)
|
||||
|
||||
(motifAli <- subseq(msaM@unmasked, start = aliStart, end = aliEnd))
|
||||
|
||||
# Packaging this into a function is convenient to have, therefore I have added
|
||||
# such a function to the .utilities.R script: fetchMSAmotif(). Try it:
|
||||
|
||||
wing <- "HEKVQGGFGKYQGTWV" # the MBP1_SACCE APSES "wing" sequence
|
||||
writeALN(fetchMSAmotif(msaM, wing))
|
||||
|
||||
|
||||
# == 6.2 Plot a Sequence Logo ==============================================
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user