Update: FASTA files may contain gap and stop characters
This commit is contained in:
parent
d9e7813d27
commit
153e7dcf00
@ -33,7 +33,7 @@ if (!require(xml2, quietly = TRUE)) {
|
|||||||
dbSanitizeSequence <- function(s, unambiguous = TRUE) {
|
dbSanitizeSequence <- function(s, unambiguous = TRUE) {
|
||||||
# Remove FASTA header lines, if any,
|
# Remove FASTA header lines, if any,
|
||||||
# flatten any structure that s has,
|
# flatten any structure that s has,
|
||||||
# remove all non-letters,
|
# remove all non-letters except "-" (gap) and "*" (stop),
|
||||||
# convert to uppercase.
|
# convert to uppercase.
|
||||||
#
|
#
|
||||||
# Parameters:
|
# Parameters:
|
||||||
@ -51,7 +51,7 @@ dbSanitizeSequence <- function(s, unambiguous = TRUE) {
|
|||||||
s <- unlist(strsplit(s, "\n")) # split up at linebreaks, if any
|
s <- unlist(strsplit(s, "\n")) # split up at linebreaks, if any
|
||||||
s <- s[! grepl("^>", s)] # drop all lines beginning">" (FASTA header)
|
s <- s[! grepl("^>", s)] # drop all lines beginning">" (FASTA header)
|
||||||
s <- paste(s, collapse="") # combine into single string
|
s <- paste(s, collapse="") # combine into single string
|
||||||
s <- toupper(gsub("[^a-zA-Z]", "", s))
|
s <- toupper(gsub("[^a-zA-Z*-]", "", s))
|
||||||
if (unambiguous) {
|
if (unambiguous) {
|
||||||
amb <- "([bjouxzBJOUXZ])" # parentheses capture the match
|
amb <- "([bjouxzBJOUXZ])" # parentheses capture the match
|
||||||
ambChar <- unlist(regmatches(s, regexec(amb, s)))[1]
|
ambChar <- unlist(regmatches(s, regexec(amb, s)))[1]
|
||||||
|
Loading…
Reference in New Issue
Block a user