add httr and xml2 package, add scripted data download functions

This commit is contained in:
hyginn 2017-10-06 08:51:12 -04:00
parent b33feed50a
commit 27dd73664b

View File

@ -9,12 +9,24 @@
# ====== PACKAGES ============================================================== # ====== PACKAGES ==============================================================
if (! require("jsonlite", quietly = TRUE)) { if (! require(jsonlite, quietly = TRUE)) {
install.packages("jsonlite") install.packages("jsonlite")
library(jsonlite) library(jsonlite)
} }
if (!require(httr, quietly = TRUE)) {
if (!require(xml2, quietly = TRUE)) {
# ====== FUNCTIONS ============================================================= # ====== FUNCTIONS =============================================================
@ -204,4 +216,118 @@ dbAddAnnotation <- function(db, jsonDF) {
} }
dbFetchUniProtSeq <- function(ID) {
# Fetch a protein sequence from UniProt.
# Parameters:
# ID char a UniProt ID (accession number)
# Value:
# char the sequence
# If the operation is not successful, a 0-length string is returned
URL <- sprintf("", ID)
response <- GET(URL)
mySeq <- character()
if (status_code(response) == 200) {
x <- as.character(response)
x <- strsplit(x, "\n")
mySeq <- dbSanitizeSequence(x)
dbFetchPrositeFeatures <- function(ID) {
# Fetch feature annotations from ScanProsite.
# Parameters:
# ID char a UniProt ID (accession number)
# Value:
# data frame uID char UniProt ID
# start num start of motif
# end num end of motif
# psID char PROSITE motif ID
# psName char PROSITE motif name
# If the operation is not successful, a 0-length data frame is returned.
URL <- ""
response <- POST(URL,
body = list(meta = "opt1",
meta1_protein = "opt1",
seq = ID,
skip = "on",
output = "tabular"))
myFeatures <- data.frame()
if (status_code(response) == 200) {
lines <- unlist(strsplit(content(response, "text"), "\\n"))
patt <- sprintf("\\|%s\\|", UniProtID)
lines <- lines[grep(patt, lines)]
for (line in lines) {
tokens <- unlist(strsplit(line, "\\t|\\|"))
myFeatures <- rbind(myFeatures,
data.frame(uID = tokens[2],
start = as.numeric(tokens[4]),
end = as.numeric(tokens[5]),
psID = tokens[6],
psName = tokens[7],
stringsAsFactors = FALSE))
node2text <- function(doc, tag) {
# an extractor function for the contents of elements
# between given tags in an XML response.
# Contents of all matching elements is returned in
# a vector of strings.
path <- paste0("//", tag)
nodes <- xml_find_all(doc, path)
dbFetchNCBItaxData <- function(ID) {
# Fetch feature taxID and Organism from the NCBI.
# Parameters:
# ID char a RefSeq ID (accession number)
# Value:
# data frame taxID num NCBI taxID
# organism char organism for this taxID
# If the operation is not successful, a 0-length data frame is returned.
eUtilsBase <- ""
URL <- paste(eUtilsBase,
"&term=", ID,
myXML <- read_xml(URL)
GID <- node2text(myXML, "Id")
URL <- paste0(eUtilsBase,
myXML <- read_xml(URL)
x <- as.integer(node2text(myXML, "TaxId"))
y <- node2text(myXML, "Organism")
tID <- data.frame()
if (length(x) > 0 && length(y) > 0) {
tID <- data.frame(taxID = x, organism = y)
# [END] # [END]