add httr and xml2 package, add scripted data download functions
This commit is contained in:
parent
b33feed50a
commit
27dd73664b
@ -9,12 +9,24 @@
|
|||||||
# ====== PACKAGES ==============================================================
|
# ====== PACKAGES ==============================================================
|
||||||
|
|
||||||
|
|
||||||
if (! require("jsonlite", quietly = TRUE)) {
|
if (! require(jsonlite, quietly = TRUE)) {
|
||||||
install.packages("jsonlite")
|
install.packages("jsonlite")
|
||||||
library(jsonlite)
|
library(jsonlite)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (!require(httr, quietly = TRUE)) {
|
||||||
|
install.packages("httr")
|
||||||
|
library(httr)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (!require(xml2, quietly = TRUE)) {
|
||||||
|
install.packages("xml2")
|
||||||
|
library(xml2)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# ====== FUNCTIONS =============================================================
|
# ====== FUNCTIONS =============================================================
|
||||||
|
|
||||||
|
|
||||||
@ -204,4 +216,118 @@ dbAddAnnotation <- function(db, jsonDF) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
dbFetchUniProtSeq <- function(ID) {
|
||||||
|
# Fetch a protein sequence from UniProt.
|
||||||
|
# Parameters:
|
||||||
|
# ID char a UniProt ID (accession number)
|
||||||
|
# Value:
|
||||||
|
# char the sequence
|
||||||
|
# If the operation is not successful, a 0-length string is returned
|
||||||
|
|
||||||
|
URL <- sprintf("http://www.uniprot.org/uniprot/%s.fasta", ID)
|
||||||
|
|
||||||
|
response <- GET(URL)
|
||||||
|
|
||||||
|
mySeq <- character()
|
||||||
|
if (status_code(response) == 200) {
|
||||||
|
x <- as.character(response)
|
||||||
|
x <- strsplit(x, "\n")
|
||||||
|
mySeq <- dbSanitizeSequence(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
return(mySeq)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
dbFetchPrositeFeatures <- function(ID) {
|
||||||
|
# Fetch feature annotations from ScanProsite.
|
||||||
|
# Parameters:
|
||||||
|
# ID char a UniProt ID (accession number)
|
||||||
|
# Value:
|
||||||
|
# data frame uID char UniProt ID
|
||||||
|
# start num start of motif
|
||||||
|
# end num end of motif
|
||||||
|
# psID char PROSITE motif ID
|
||||||
|
# psName char PROSITE motif name
|
||||||
|
# If the operation is not successful, a 0-length data frame is returned.
|
||||||
|
|
||||||
|
URL <- "http://prosite.expasy.org/cgi-bin/prosite/PSScan.cgi"
|
||||||
|
|
||||||
|
response <- POST(URL,
|
||||||
|
body = list(meta = "opt1",
|
||||||
|
meta1_protein = "opt1",
|
||||||
|
seq = ID,
|
||||||
|
skip = "on",
|
||||||
|
output = "tabular"))
|
||||||
|
|
||||||
|
myFeatures <- data.frame()
|
||||||
|
if (status_code(response) == 200) {
|
||||||
|
|
||||||
|
lines <- unlist(strsplit(content(response, "text"), "\\n"))
|
||||||
|
|
||||||
|
patt <- sprintf("\\|%s\\|", UniProtID)
|
||||||
|
lines <- lines[grep(patt, lines)]
|
||||||
|
|
||||||
|
for (line in lines) {
|
||||||
|
tokens <- unlist(strsplit(line, "\\t|\\|"))
|
||||||
|
myFeatures <- rbind(myFeatures,
|
||||||
|
data.frame(uID = tokens[2],
|
||||||
|
start = as.numeric(tokens[4]),
|
||||||
|
end = as.numeric(tokens[5]),
|
||||||
|
psID = tokens[6],
|
||||||
|
psName = tokens[7],
|
||||||
|
stringsAsFactors = FALSE))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return(myFeatures)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
node2text <- function(doc, tag) {
|
||||||
|
# an extractor function for the contents of elements
|
||||||
|
# between given tags in an XML response.
|
||||||
|
# Contents of all matching elements is returned in
|
||||||
|
# a vector of strings.
|
||||||
|
path <- paste0("//", tag)
|
||||||
|
nodes <- xml_find_all(doc, path)
|
||||||
|
return(xml_text(nodes))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
dbFetchNCBItaxData <- function(ID) {
|
||||||
|
# Fetch feature taxID and Organism from the NCBI.
|
||||||
|
# Parameters:
|
||||||
|
# ID char a RefSeq ID (accession number)
|
||||||
|
# Value:
|
||||||
|
# data frame taxID num NCBI taxID
|
||||||
|
# organism char organism for this taxID
|
||||||
|
# If the operation is not successful, a 0-length data frame is returned.
|
||||||
|
|
||||||
|
eUtilsBase <- "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
|
||||||
|
URL <- paste(eUtilsBase,
|
||||||
|
"esearch.fcgi?",
|
||||||
|
"db=protein",
|
||||||
|
"&term=", ID,
|
||||||
|
sep="")
|
||||||
|
myXML <- read_xml(URL)
|
||||||
|
GID <- node2text(myXML, "Id")
|
||||||
|
|
||||||
|
URL <- paste0(eUtilsBase,
|
||||||
|
"esummary.fcgi?",
|
||||||
|
"db=protein",
|
||||||
|
"&id=",
|
||||||
|
GID,
|
||||||
|
"&version=2.0")
|
||||||
|
myXML <- read_xml(URL)
|
||||||
|
|
||||||
|
x <- as.integer(node2text(myXML, "TaxId"))
|
||||||
|
y <- node2text(myXML, "Organism")
|
||||||
|
|
||||||
|
tID <- data.frame()
|
||||||
|
if (length(x) > 0 && length(y) > 0) {
|
||||||
|
tID <- data.frame(taxID = x, organism = y)
|
||||||
|
}
|
||||||
|
return(tID)
|
||||||
|
}
|
||||||
|
|
||||||
# [END]
|
# [END]
|
||||||
|
Loading…
Reference in New Issue
Block a user