updates; vectorized dbFetchUniProtSeq()
This commit is contained in:
parent
2775c7c9a8
commit
36140bc984
@ -1,20 +1,16 @@
|
|||||||
# tocID <- "RPR-UniProt_GET.R"
|
# tocID <- "RPR-UniProt_GET.R"
|
||||||
#
|
#
|
||||||
# ---------------------------------------------------------------------------- #
|
|
||||||
# PATIENCE ... #
|
|
||||||
# Do not yet work wih this code. Updates in progress. Thank you. #
|
|
||||||
# boris.steipe@utoronto.ca #
|
|
||||||
# ---------------------------------------------------------------------------- #
|
|
||||||
#
|
|
||||||
# Purpose: A Bioinformatics Course:
|
# Purpose: A Bioinformatics Course:
|
||||||
# R code accompanying the RPR-Scripting_data_downloads unit.
|
# R code accompanying the RPR-Scripting_data_downloads unit.
|
||||||
#
|
#
|
||||||
# Version: 1.1
|
# Version: 1.2
|
||||||
#
|
#
|
||||||
# Date: 2017 10 - 2019 01
|
# Date: 2017-10 - 2020-09
|
||||||
# Author: Boris Steipe (boris.steipe@utoronto.ca)
|
# Author: Boris Steipe (boris.steipe@utoronto.ca)
|
||||||
#
|
#
|
||||||
# Versions:
|
# Versions:
|
||||||
|
# 1.2 2020 Maintenance. Made dbFetchUniProtSeq() vector-safe and
|
||||||
|
# added FASTA headers as attribute
|
||||||
# 1.1 Change from require() to requireNamespace(),
|
# 1.1 Change from require() to requireNamespace(),
|
||||||
# use <package>::<function>() idiom throughout
|
# use <package>::<function>() idiom throughout
|
||||||
# 1.0 First ABC units version
|
# 1.0 First ABC units version
|
||||||
@ -34,13 +30,13 @@
|
|||||||
|
|
||||||
|
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> Section Title Line
|
#TOC> Section Title Line
|
||||||
#TOC> ----------------------------------------------------------
|
#TOC> ----------------------------------------------------------
|
||||||
#TOC> 1 UniProt files via GET 41
|
#TOC> 1 UniProt files via GET 43
|
||||||
#TOC> 1.1 Task - fetchUniProtSeq() function 103
|
#TOC> 1.1 Task - fetchUniProtSeq() function 105
|
||||||
#TOC> 2 Task solutions 110
|
#TOC> 2 Task solutions 118
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
|
|
||||||
@ -51,7 +47,7 @@
|
|||||||
# FASTA sequence from UniProt. All we need is to construct an URL with the
|
# FASTA sequence from UniProt. All we need is to construct an URL with the
|
||||||
# correct UniProt ID.
|
# correct UniProt ID.
|
||||||
|
|
||||||
# An interface between R scripts and We=b servers is provided by the httr
|
# An interface between R scripts and Web servers is provided by the httr::
|
||||||
# package. This sends and receives information via the http protocol, just like
|
# package. This sends and receives information via the http protocol, just like
|
||||||
# a Web browser. Since this is a short and simple request, the GET verb is the
|
# a Web browser. Since this is a short and simple request, the GET verb is the
|
||||||
# right tool:
|
# right tool:
|
||||||
@ -108,21 +104,31 @@ httr::status_code(response)
|
|||||||
|
|
||||||
# == 1.1 Task - fetchUniProtSeq() function =================================
|
# == 1.1 Task - fetchUniProtSeq() function =================================
|
||||||
|
|
||||||
# Task: write a function that takes as input a UniProt ID, fetches the
|
# Task: write a function that
|
||||||
# FASTA sequence, returns only the sequence if the operation is successful, or
|
# - takes as input a vector of UniProt IDs,
|
||||||
# a vector of length 0 if there is an error.
|
# - fetches the FASTA sequence for each
|
||||||
|
# - returns a vector of the same length as the input, where an element is:
|
||||||
|
# - ... the sequence, if the query was successful
|
||||||
|
# - ... NA if there was an error
|
||||||
|
# - each element has the UniProt ID as the name()
|
||||||
|
# - bonus: the output has an attribute "headers" that is a vector of the
|
||||||
|
# FASTA headers ( cf. ?attr )
|
||||||
|
|
||||||
|
|
||||||
# = 2 Task solutions ======================================================
|
# = 2 Task solutions ======================================================
|
||||||
|
|
||||||
|
|
||||||
# I have placed such a function into the dbUtilities script: look it up by
|
# I have placed such a function - dbFetchUniProtSeq() - into
|
||||||
# clicking on dbFetchUniProtSeq() in the Environment pane.
|
# "./scripts/ABC-dbUtilities.R": look it up by clicking on dbFetchUniProtSeq()
|
||||||
|
# in the Environment pane.
|
||||||
# Test:
|
|
||||||
dbFetchUniProtSeq("P39678")
|
|
||||||
|
|
||||||
|
|
||||||
|
# Test this:
|
||||||
|
( x <- dbFetchUniProtSeq("P39678") )
|
||||||
|
names(x)[1]
|
||||||
|
attr(x, "headers")[1]
|
||||||
|
x[1]
|
||||||
|
cat(writeFASTA(data.frame(head = attr(x, "headers")[1], seq =x[1]),
|
||||||
|
width = 40), sep = "\n")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -20,11 +20,11 @@
|
|||||||
#TOC> 2.07 dbAddTaxonomy() 199
|
#TOC> 2.07 dbAddTaxonomy() 199
|
||||||
#TOC> 2.08 dbAddAnnotation() 215
|
#TOC> 2.08 dbAddAnnotation() 215
|
||||||
#TOC> 2.09 dbFetchUniProtSeq() 243
|
#TOC> 2.09 dbFetchUniProtSeq() 243
|
||||||
#TOC> 2.10 dbFetchPrositeFeatures() 267
|
#TOC> 2.10 dbFetchPrositeFeatures() 289
|
||||||
#TOC> 2.11 node2text() 311
|
#TOC> 2.11 node2text() 333
|
||||||
#TOC> 2.12 dbFetchNCBItaxData() 323
|
#TOC> 2.12 dbFetchNCBItaxData() 345
|
||||||
#TOC> 2.13 UniProtIDmap() 362
|
#TOC> 2.13 UniProtIDmap() 384
|
||||||
#TOC> 3 TESTS 401
|
#TOC> 3 TESTS 423
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
@ -241,28 +241,50 @@ dbAddAnnotation <- function(db, jsonDF) {
|
|||||||
|
|
||||||
|
|
||||||
# == 2.09 dbFetchUniProtSeq() ==============================================
|
# == 2.09 dbFetchUniProtSeq() ==============================================
|
||||||
dbFetchUniProtSeq <- function(ID) {
|
dbFetchUniProtSeq <- function(IDs) {
|
||||||
# Fetch a protein sequence from UniProt.
|
# Fetch a protein sequence from UniProt.
|
||||||
# Parameters:
|
# Parameters:
|
||||||
# ID char a UniProt ID (accession number)
|
# IDs char a vector of UniProt IDs (accession number)
|
||||||
# Value:
|
# Value:
|
||||||
# char the sequence
|
# char a vector of the same length as ID. It contains
|
||||||
# If the operation is not successful, a 0-length string is returned
|
# sequences where the retrieval was successful, NA where
|
||||||
|
# it was not successful. The elements are named with
|
||||||
|
# the ID, the header lines are set as attribute "header"
|
||||||
|
|
||||||
URL <- sprintf("http://www.uniprot.org/uniprot/%s.fasta", ID)
|
|
||||||
|
|
||||||
response <- httr::GET(URL)
|
BASE <- "http://www.uniprot.org/uniprot/"
|
||||||
|
|
||||||
mySeq <- character()
|
sq <- character()
|
||||||
if (httr::status_code(response) == 200) {
|
hd <- character()
|
||||||
x <- as.character(response)
|
for (i in seq_along(IDs)) {
|
||||||
x <- strsplit(x, "\n")
|
URL <- sprintf("%s%s.fasta", BASE, IDs[i])
|
||||||
mySeq <- dbSanitizeSequence(x)
|
response <- httr::GET(URL)
|
||||||
|
if (httr::status_code(response) == 200) {
|
||||||
|
s <- as.character(response)
|
||||||
|
s <- unlist(strsplit(s, "\n"))
|
||||||
|
x <- dbSanitizeSequence(s)
|
||||||
|
} else {
|
||||||
|
s <- ""
|
||||||
|
x <- NA
|
||||||
|
}
|
||||||
|
hd[i] <- s[1]
|
||||||
|
sq[i] <- x
|
||||||
}
|
}
|
||||||
|
names(sq) <- IDs
|
||||||
|
attr(sq, "headers") <- hd
|
||||||
|
|
||||||
return(mySeq)
|
return(sq)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (FALSE) {
|
||||||
|
inp <- c("P79073", "P0000000", "A0A1W2TKZ7")
|
||||||
|
s <- dbFetchUniProtSeq(inp)
|
||||||
|
s[1:3]
|
||||||
|
str(s)
|
||||||
|
attr(s, "headers")[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# == 2.10 dbFetchPrositeFeatures() =========================================
|
# == 2.10 dbFetchPrositeFeatures() =========================================
|
||||||
dbFetchPrositeFeatures <- function(ID) {
|
dbFetchPrositeFeatures <- function(ID) {
|
||||||
|
Loading…
Reference in New Issue
Block a user