add fetchMSAmotif() function
This commit is contained in:
parent
d3c42da51b
commit
7cc2853d00
44
.utilities.R
44
.utilities.R
@ -133,6 +133,50 @@ waitTimer <- function(t, nIntervals = 50) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
fetchMSAmotif <- function(ali, mot) {
|
||||||
|
# retrieve a subset from ali that spans the sequence in mot.
|
||||||
|
# Parameters:
|
||||||
|
# ali MsaAAMultipleAlignment object
|
||||||
|
# mot chr substring within ali
|
||||||
|
# Value: AAStringset
|
||||||
|
|
||||||
|
if (class(ali) != "MsaAAMultipleAlignment" &&
|
||||||
|
class(ali) != "MsaDNAMultipleAlignment" &&
|
||||||
|
class(ali) != "MsaRNAMultipleAlignment") {
|
||||||
|
stop("ali has to be an msa multiple alignment object.")
|
||||||
|
}
|
||||||
|
|
||||||
|
if (class(mot) != "character") {
|
||||||
|
stop("mot has to be a character object.")
|
||||||
|
}
|
||||||
|
|
||||||
|
x <- gsub("-", "", as.character(ali)) # pure sequence, no hyphens
|
||||||
|
|
||||||
|
idx <- grep(mot, x)[1] # first sequence containing mot. If no match,
|
||||||
|
# idx becomes NA
|
||||||
|
if (is.na(idx)) {
|
||||||
|
stop("mot is not a subsequence in ali.")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Find the match range
|
||||||
|
m <- regexpr(mot, x[idx])
|
||||||
|
motifStart <- as.numeric(m)
|
||||||
|
motifEnd <- attr(m, "match.length") + motifStart - 1
|
||||||
|
|
||||||
|
# Count characters, skip hyphens ...
|
||||||
|
x <- unlist(strsplit(as.character(ali)[idx], ""))
|
||||||
|
x <- x != "-"
|
||||||
|
x <- as.numeric(x)
|
||||||
|
x <- cumsum(x)
|
||||||
|
|
||||||
|
return(subseq(ali@unmasked,
|
||||||
|
start = which(x == motifStart)[1], # get the first position
|
||||||
|
end = which(x == motifEnd)[1]))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# ====== PDB ID selection ======================================================
|
# ====== PDB ID selection ======================================================
|
||||||
|
|
||||||
selectPDBrep <- function(n) {
|
selectPDBrep <- function(n) {
|
||||||
|
@ -3,12 +3,13 @@
|
|||||||
# Purpose: A Bioinformatics Course:
|
# Purpose: A Bioinformatics Course:
|
||||||
# R code accompanying the BIN-ALI-MSA unit.
|
# R code accompanying the BIN-ALI-MSA unit.
|
||||||
#
|
#
|
||||||
# Version: 1.0
|
# Version: 1.1
|
||||||
#
|
#
|
||||||
# Date: 2017 10 23
|
# Date: 2017 10
|
||||||
# Author: Boris Steipe (boris.steipe@utoronto.ca)
|
# Author: Boris Steipe (boris.steipe@utoronto.ca)
|
||||||
#
|
#
|
||||||
# Versions:
|
# Versions:
|
||||||
|
# 1.1 Added fetchMSAmotif()
|
||||||
# 1.0 Fully refactored and rewritten for 2017
|
# 1.0 Fully refactored and rewritten for 2017
|
||||||
# 0.1 First code copied from 2016 material.
|
# 0.1 First code copied from 2016 material.
|
||||||
#
|
#
|
||||||
@ -53,7 +54,7 @@
|
|||||||
# You need to reload you protein database, including changes that might
|
# You need to reload you protein database, including changes that might
|
||||||
# have been made to the reference files. If you have worked with the
|
# have been made to the reference files. If you have worked with the
|
||||||
# prerequiste units, you should have a script named "makeProteinDB.R"
|
# prerequiste units, you should have a script named "makeProteinDB.R"
|
||||||
# that will create the myDB object with aprotein and feature database.
|
# that will create the myDB object with a protein and feature database.
|
||||||
# Ask for advice if not.
|
# Ask for advice if not.
|
||||||
source("makeProteinDB.R")
|
source("makeProteinDB.R")
|
||||||
|
|
||||||
@ -587,6 +588,12 @@ x <- cumsum(x)
|
|||||||
|
|
||||||
(motifAli <- subseq(msaM@unmasked, start = aliStart, end = aliEnd))
|
(motifAli <- subseq(msaM@unmasked, start = aliStart, end = aliEnd))
|
||||||
|
|
||||||
|
# Packaging this into a function is convenient to have, therefore I have added
|
||||||
|
# such a function to the .utilities.R script: fetchMSAmotif(). Try it:
|
||||||
|
|
||||||
|
wing <- "HEKVQGGFGKYQGTWV" # the MBP1_SACCE APSES "wing" sequence
|
||||||
|
writeALN(fetchMSAmotif(msaM, wing))
|
||||||
|
|
||||||
|
|
||||||
# == 6.2 Plot a Sequence Logo ==============================================
|
# == 6.2 Plot a Sequence Logo ==============================================
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user