MSA unit, and supporting files
This commit is contained in:
135
scripts/ABC-writeALN.R
Normal file
135
scripts/ABC-writeALN.R
Normal file
@@ -0,0 +1,135 @@
|
||||
# ABC-writeALN.R
|
||||
#
|
||||
# ToDo: calculate consensus line
|
||||
# append sequence numbers
|
||||
# Notes:
|
||||
#
|
||||
# ==============================================================================
|
||||
|
||||
|
||||
writeALN <- function(ali,
|
||||
range,
|
||||
note = "",
|
||||
myCon = stdout(),
|
||||
blockWidth = 60) {
|
||||
# Purpose:
|
||||
# Write an MsaAAMultipleAlignment or AAStringSet object to stdout() or
|
||||
# a file in multi-FASTA format.
|
||||
# Version: 2.0
|
||||
# Date: 2017 10
|
||||
# Author: Boris Steipe
|
||||
#
|
||||
# Parameters:
|
||||
# ali MsaAAMultipleAlignment or AAStringSet or character
|
||||
# vector.
|
||||
# range num a two-integer vector of start and end positions if
|
||||
# only a range of the MSA should be written, e.g.
|
||||
# a domain. Defaults to the full alignment length.
|
||||
# note chr a vector of character that is appended to the name
|
||||
# of a sequence in the FASTA header. Recycling of
|
||||
# shorter vectors applies, thus a vector of length one
|
||||
# is added to all headers.
|
||||
# myCon a connection (cf. the con argument for writeLines).
|
||||
# Defaults to stdout()
|
||||
# blockWidth int width of sequence block. Default 80 characters.
|
||||
# Value:
|
||||
# NA the function is invoked for its side effect of printing an
|
||||
# alignment to stdout() or file.
|
||||
|
||||
blockWidth <- as.integer(blockWidth)
|
||||
if (is.na(blockWidth)) {
|
||||
stop("PANIC: parameter \"blockWidth\" must be numeric.")
|
||||
}
|
||||
if (blockWidth < 1) {
|
||||
stop("PANIC: parameter \"blockWidth\" must be greater than zero.")
|
||||
}
|
||||
if (blockWidth > 60) {
|
||||
stop("PANIC: \"blockWidth\" for CLUSTAL format can't be greater than 60.")
|
||||
}
|
||||
|
||||
# Extract the raw data from the objects depending on their respective class
|
||||
# and put it into a named vector of strings.
|
||||
|
||||
# Extract XStringSet from MsaXMultipleAlignment ...
|
||||
if (class(ali) == "MsaAAMultipleAlignment" |
|
||||
class(ali) == "MsaDNAMultipleAlignment" |
|
||||
class(ali) == "MsaRNAMultipleAlignment") {
|
||||
ali <- ali@unmasked
|
||||
}
|
||||
|
||||
# Process XStringSet
|
||||
if (class(ali) == "AAStringSet" |
|
||||
class(ali) == "DNAStringSet" |
|
||||
class(ali) == "RNAStringSet") {
|
||||
sSet <- as.character(ali) # we use as.character(), not toString() thus
|
||||
# we don't _have_ to load Biostrings
|
||||
} else if (class(ali) == "character") {
|
||||
sSet <- ali
|
||||
} else {
|
||||
stop(paste("Input object of class",
|
||||
class(ali),
|
||||
"can't be handled by this function."))
|
||||
}
|
||||
|
||||
if (missing(range)) {
|
||||
range <- 1
|
||||
range[2] <- max(nchar(sSet))
|
||||
} else {
|
||||
range <- as.integer(range)
|
||||
if(length(range) != 2 ||
|
||||
any(is.na(range)) ||
|
||||
range[1] > range[2] ||
|
||||
range[1] < 1) {
|
||||
stop("PANIC: \"range\" parameter must contain valid start and end index.")
|
||||
}
|
||||
}
|
||||
|
||||
# Right-pad any sequence with "-" that is shorter than ranges[2]
|
||||
for (i in seq_along(sSet)) {
|
||||
if (nchar(sSet[i]) < range[2]) {
|
||||
sSet[i] <- paste0(sSet[i],
|
||||
paste0(rep("-", range[2] - nchar(sSet[i])),
|
||||
collapse = ""))
|
||||
}
|
||||
}
|
||||
|
||||
# Right-pad sequence names
|
||||
sNames <- names(sSet)
|
||||
len <- max(nchar(sNames)) + 2 # longest name plus two spaces
|
||||
for (i in seq_along(sNames)) {
|
||||
sNames[i] <- paste0(sNames[i],
|
||||
paste0(rep(" ", len - nchar(sNames[i])),
|
||||
collapse = ""))
|
||||
}
|
||||
|
||||
|
||||
# Process each sequence
|
||||
txt <- paste0("CLUSTAL W format. ", note)
|
||||
txt[2] <- ""
|
||||
|
||||
iStarts <- seq(range[1], range[2], by = blockWidth)
|
||||
iEnds <- c((iStarts[-1] - 1), range[2])
|
||||
|
||||
for (i in seq_along(iStarts)) {
|
||||
for (j in seq_along(sSet)) {
|
||||
txt <- c(txt,
|
||||
paste0(sNames[j], substring(sSet[j], iStarts[i], iEnds[i])))
|
||||
}
|
||||
txt <- c(txt, "") # append a blank consenus line
|
||||
txt <- c(txt, "") # append a separator line
|
||||
}
|
||||
|
||||
writeLines(txt, con= myCon)
|
||||
|
||||
}
|
||||
|
||||
# ==== TESTS =================================================================
|
||||
# Enter your function tests here...
|
||||
|
||||
if (FALSE) {
|
||||
# test ...
|
||||
}
|
||||
|
||||
|
||||
|
||||
# [END]
|
115
scripts/ABC-writeMFA.R
Normal file
115
scripts/ABC-writeMFA.R
Normal file
@@ -0,0 +1,115 @@
|
||||
# ABC-writeMFA.R
|
||||
#
|
||||
# ToDo:
|
||||
# Notes:
|
||||
#
|
||||
# ==============================================================================
|
||||
|
||||
|
||||
writeMFA <- function(ali,
|
||||
range,
|
||||
note = "",
|
||||
myCon = stdout(),
|
||||
blockWidth = 80) {
|
||||
# Purpose:
|
||||
# Write an MsaAAMultipleAlignment or AAStringSet object to stdout() or
|
||||
# a file in multi-FASTA format.
|
||||
# Version: 2.0
|
||||
# Date: 2017 10
|
||||
# Author: Boris Steipe
|
||||
#
|
||||
# Parameters:
|
||||
# ali MsaAAMultipleAlignment or AAStringSet or character
|
||||
# vector
|
||||
# range num a two-integer vector of start and end positions if
|
||||
# only a range of the MSA should be written, e.g.
|
||||
# a domain. Defaults to the full sequence length.
|
||||
# note chr a vector of character that is appended to the name
|
||||
# of a sequence in the FASTA header. Recycling of
|
||||
# shorter vectors applies, thus a vector of length one
|
||||
# is added to all headers.
|
||||
# myCon a connection (cf. the con argument for writeLines).
|
||||
# Defaults to stdout()
|
||||
# blockWidth int width of sequence block. Default 80 characters.
|
||||
# Value:
|
||||
# NA the function is invoked for its side effect of printing an
|
||||
# alignment to stdout() or file.
|
||||
|
||||
blockWidth <- as.integer(blockWidth)
|
||||
if (is.na(blockWidth)) {
|
||||
stop("PANIC: parameter \"blockWidth\" must be numeric.")
|
||||
}
|
||||
if (blockWidth < 1){
|
||||
stop("PANIC: parameter \"blockWidth\" must be greater than zero.")
|
||||
}
|
||||
|
||||
# Extract the raw data from the objects depending on their respective class
|
||||
# and put it into a named vector of strings.
|
||||
|
||||
# Extract XStringSet from MsaXMultipleAlignment ...
|
||||
if (class(ali) == "MsaAAMultipleAlignment" |
|
||||
class(ali) == "MsaDNAMultipleAlignment" |
|
||||
class(ali) == "MsaRNAMultipleAlignment") {
|
||||
ali <- ali@unmasked
|
||||
}
|
||||
|
||||
# Process XStringSet
|
||||
if (class(ali) == "AAStringSet" |
|
||||
class(ali) == "DNAStringSet" |
|
||||
class(ali) == "RNAStringSet") {
|
||||
sSet <- as.character(ali) # we use as.character(), not toString() thus
|
||||
# we don't _have_ to load Biostrings
|
||||
} else if (class(ali) == "character") {
|
||||
sSet <- ali
|
||||
} else {
|
||||
stop(paste("Input object of class",
|
||||
class(ali),
|
||||
"can't be handled by this function."))
|
||||
}
|
||||
|
||||
if (missing(range)) {
|
||||
range <- 1
|
||||
range[2] <- max(nchar(sSet))
|
||||
} else {
|
||||
range <- as.integer(range)
|
||||
if(length(range) != 2 ||
|
||||
any(is.na(range)) ||
|
||||
range[1] > range[2] ||
|
||||
range[1] < 1) {
|
||||
stop("PANIC: \"range\" parameter must contain valid start and end index.")
|
||||
}
|
||||
}
|
||||
|
||||
# Process each sequence
|
||||
txt <- character()
|
||||
headers <- paste(names(sSet), note)
|
||||
for (i in seq_along(sSet)) {
|
||||
|
||||
# output FASTA header
|
||||
txt <- c(txt, sprintf(">%s", headers[i]))
|
||||
|
||||
# output the sequence in blocks of blockWidth per line ...
|
||||
iStarts <- seq(range[1], range[2], by = blockWidth)
|
||||
iEnds <- c((iStarts[-1] - 1), range[2])
|
||||
|
||||
thisSeq <- substring(sSet[i], iStarts, iEnds) # collect all blocks
|
||||
thisSeq <- thisSeq[! nchar(thisSeq) == 0] # drop empty blocks
|
||||
txt <- c(txt, thisSeq)
|
||||
|
||||
txt <- c(txt, "") # append an empty line for readability
|
||||
}
|
||||
|
||||
writeLines(txt, con= myCon)
|
||||
|
||||
}
|
||||
|
||||
# ==== TESTS =================================================================
|
||||
# Enter your function tests here...
|
||||
|
||||
if (FALSE) {
|
||||
# test ...
|
||||
}
|
||||
|
||||
|
||||
|
||||
# [END]
|
Reference in New Issue
Block a user