Updates to UniProtIDmap, and address httr/curl initialization bug
This commit is contained in:
parent
f3a436cb6e
commit
2775c7c9a8
@ -1,20 +1,15 @@
|
|||||||
# tocID <- "BIN-Data_integration.R"
|
# tocID <- "BIN-Data_integration.R"
|
||||||
#
|
#
|
||||||
# ---------------------------------------------------------------------------- #
|
|
||||||
# PATIENCE ... #
|
|
||||||
# Do not yet work wih this code. Updates in progress. Thank you. #
|
|
||||||
# boris.steipe@utoronto.ca #
|
|
||||||
# ---------------------------------------------------------------------------- #
|
|
||||||
#
|
|
||||||
# Purpose: A Bioinformatics Course:
|
# Purpose: A Bioinformatics Course:
|
||||||
# R code accompanying the BIN-Data_integration unit.
|
# R code accompanying the BIN-Data_integration unit.
|
||||||
#
|
#
|
||||||
# Version: 1.1
|
# Version: 1.2
|
||||||
#
|
#
|
||||||
# Date: 2018 10 - 2019 01
|
# Date: 2018-10 - 2020-09
|
||||||
# Author: Boris Steipe (boris.steipe@utoronto.ca)
|
# Author: Boris Steipe (boris.steipe@utoronto.ca)
|
||||||
#
|
#
|
||||||
# Versions:
|
# Versions:
|
||||||
|
# 1.2 2020 Maintenance and updates
|
||||||
# 1.1 Change from require() to requireNamespace(),
|
# 1.1 Change from require() to requireNamespace(),
|
||||||
# use <package>::<function>() idiom throughout
|
# use <package>::<function>() idiom throughout
|
||||||
# 1.0.1 Bugfix: UniProt ID Mapping service API change
|
# 1.0.1 Bugfix: UniProt ID Mapping service API change
|
||||||
@ -62,8 +57,8 @@
|
|||||||
|
|
||||||
# To begin, we load httr, which supports sending and receiving data via the
|
# To begin, we load httr, which supports sending and receiving data via the
|
||||||
# http protocol, just like a Web browser.
|
# http protocol, just like a Web browser.
|
||||||
if (! requireNamespace("httpr", quietly=TRUE)) {
|
if (! requireNamespace("httr", quietly=TRUE)) {
|
||||||
install.packages("httpr")
|
install.packages("httr")
|
||||||
}
|
}
|
||||||
# Package information:
|
# Package information:
|
||||||
# library(help = httr) # basic information
|
# library(help = httr) # basic information
|
||||||
@ -81,6 +76,12 @@ myQueryIDs <- "NP_010227 NP_00000 NP_011036"
|
|||||||
# the URL of the server and send a list of items labelled as "query" in the body
|
# the URL of the server and send a list of items labelled as "query" in the body
|
||||||
# of the request. GET() and POST() are functions from httr.
|
# of the request. GET() and POST() are functions from httr.
|
||||||
|
|
||||||
|
# Note. A recent bug in the interaction between the server expectations and the
|
||||||
|
# curl client libraries requires the following initialization
|
||||||
|
httr::set_config(httr::config(http_version = 0))
|
||||||
|
# cf. https://stackoverflow.com/questions/44610845/stream-error-in-the-http-2-framing-layer-bigrquery-commands-error-in-r-studio-b
|
||||||
|
|
||||||
|
|
||||||
URL <- "https://www.uniprot.org/mapping/"
|
URL <- "https://www.uniprot.org/mapping/"
|
||||||
response <- httr::POST(URL,
|
response <- httr::POST(URL,
|
||||||
body = list(from = "P_REFSEQ_AC", # Refseq Protein
|
body = list(from = "P_REFSEQ_AC", # Refseq Protein
|
||||||
@ -102,14 +103,6 @@ myMappedIDs <- read.delim(file = textConnection(httr::content(response)),
|
|||||||
stringsAsFactors = FALSE)
|
stringsAsFactors = FALSE)
|
||||||
myMappedIDs
|
myMappedIDs
|
||||||
|
|
||||||
# We actually only need columns 1 and 3, and we can also change the names
|
|
||||||
# to "From" and "To":
|
|
||||||
|
|
||||||
myMappedIDs <- myMappedIDs[ , c(1,3)]
|
|
||||||
colnames(myMappedIDs) <- c("From", "To")
|
|
||||||
|
|
||||||
myMappedIDs
|
|
||||||
|
|
||||||
# If this works as expected, you should see:
|
# If this works as expected, you should see:
|
||||||
# From To
|
# From To
|
||||||
# 1 NP_010227 P39678
|
# 1 NP_010227 P39678
|
||||||
@ -138,6 +131,9 @@ myIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
|
|||||||
# empty data frame if the mapping was unsuccessful. No rows are returned
|
# empty data frame if the mapping was unsuccessful. No rows are returned
|
||||||
# for IDs that are not mapped.
|
# for IDs that are not mapped.
|
||||||
|
|
||||||
|
# Initialize curl
|
||||||
|
httr::set_config(httr::config(http_version = 0))
|
||||||
|
|
||||||
URL <- "https://www.uniprot.org/uploadlists/"
|
URL <- "https://www.uniprot.org/uploadlists/"
|
||||||
response <- httr::POST(URL,
|
response <- httr::POST(URL,
|
||||||
body = list(from = mapFrom,
|
body = list(from = mapFrom,
|
||||||
@ -149,7 +145,6 @@ myIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
|
|||||||
myMap <- read.delim(file = textConnection(httr::content(response)),
|
myMap <- read.delim(file = textConnection(httr::content(response)),
|
||||||
sep = "\t",
|
sep = "\t",
|
||||||
stringsAsFactors = FALSE)
|
stringsAsFactors = FALSE)
|
||||||
myMap <- myMap[ , c(1,3)]
|
|
||||||
colnames(myMap) <- c("From", "To")
|
colnames(myMap) <- c("From", "To")
|
||||||
} else {
|
} else {
|
||||||
myMap <- data.frame()
|
myMap <- data.frame()
|
||||||
@ -187,8 +182,7 @@ myIDs <- data.frame(uID = c("P38903", "P31383", "P47177", "P47096", "Q07747",
|
|||||||
"NP_012683", "NP_012559",
|
"NP_012683", "NP_012559",
|
||||||
"NP_010038", "NP_014882",
|
"NP_010038", "NP_014882",
|
||||||
"NP_012616", "NP_013254",
|
"NP_012616", "NP_013254",
|
||||||
"NP_014555", "NP_013629"),
|
"NP_014555", "NP_013629"))
|
||||||
stringsAsFactors = FALSE)
|
|
||||||
|
|
||||||
myIDs
|
myIDs
|
||||||
|
|
||||||
@ -212,7 +206,7 @@ myIDs$name[match(myQuery, myIDs$refID)]
|
|||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Note: if you want to do very many queries in large tables, use the
|
# Note: if you want to do very many queries in very large tables, use the
|
||||||
# fmatch() function in the "fastmatch" package for a considerable
|
# fmatch() function in the "fastmatch" package for a considerable
|
||||||
# speedup.
|
# speedup.
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@
|
|||||||
#TOC> 2.11 node2text() 311
|
#TOC> 2.11 node2text() 311
|
||||||
#TOC> 2.12 dbFetchNCBItaxData() 323
|
#TOC> 2.12 dbFetchNCBItaxData() 323
|
||||||
#TOC> 2.13 UniProtIDmap() 362
|
#TOC> 2.13 UniProtIDmap() 362
|
||||||
#TOC> 3 TESTS 399
|
#TOC> 3 TESTS 401
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
@ -373,6 +373,9 @@ UniProtIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
|
|||||||
# empty data frame if the mapping was unsuccessful. No rows are returned
|
# empty data frame if the mapping was unsuccessful. No rows are returned
|
||||||
# for IDs that are not mapped.
|
# for IDs that are not mapped.
|
||||||
|
|
||||||
|
# Initialize curl
|
||||||
|
httr::set_config(httr::config(http_version = 0))
|
||||||
|
|
||||||
URL <- "https://www.uniprot.org/uploadlists/"
|
URL <- "https://www.uniprot.org/uploadlists/"
|
||||||
response <- httr::POST(URL,
|
response <- httr::POST(URL,
|
||||||
body = list(from = mapFrom,
|
body = list(from = mapFrom,
|
||||||
@ -383,7 +386,6 @@ UniProtIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
|
|||||||
if (httr::status_code(response) == 200) { # 200: oK
|
if (httr::status_code(response) == 200) { # 200: oK
|
||||||
myMap <- read.delim(file = textConnection(httr::content(response)),
|
myMap <- read.delim(file = textConnection(httr::content(response)),
|
||||||
sep = "\t")
|
sep = "\t")
|
||||||
myMap <- myMap[ , c(1,3)]
|
|
||||||
colnames(myMap) <- c("From", "To")
|
colnames(myMap) <- c("From", "To")
|
||||||
} else {
|
} else {
|
||||||
myMap <- data.frame()
|
myMap <- data.frame()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user