bugfix in dbFetchPrositeFeatures(); add return of actual motif sequence; stylistic updates
This commit is contained in:
parent
36140bc984
commit
abb146f828
@ -1,20 +1,15 @@
|
||||
# tocID <- "RPR-PROSITE_POST.R"
|
||||
#
|
||||
# ---------------------------------------------------------------------------- #
|
||||
# PATIENCE ... #
|
||||
# Do not yet work wih this code. Updates in progress. Thank you. #
|
||||
# boris.steipe@utoronto.ca #
|
||||
# ---------------------------------------------------------------------------- #
|
||||
#
|
||||
# Purpose: A Bioinformatics Course:
|
||||
# R code accompanying the RPR-Scripting_data_downloads unit.
|
||||
#
|
||||
# Version: 1.1
|
||||
# Version: 1.2
|
||||
#
|
||||
# Date: 2017 10 - 2019 01
|
||||
# Date: 2017-10 - 2020-09
|
||||
# Author: Boris Steipe (boris.steipe@utoronto.ca)
|
||||
#
|
||||
# Versions:
|
||||
# 1.2 2020 Maintenance
|
||||
# 1.1 Change from require() to requireNamespace(),
|
||||
# use <package>::<function>() idiom throughout,
|
||||
# 1.0.1 Updates for slightly changed interfaces
|
||||
@ -35,13 +30,13 @@
|
||||
|
||||
|
||||
#TOC> ==========================================================================
|
||||
#TOC>
|
||||
#TOC>
|
||||
#TOC> Section Title Line
|
||||
#TOC> ---------------------------------------------------------------------
|
||||
#TOC> 1 Constructing a POST command from a Web query 42
|
||||
#TOC> 1.1 Task - fetchPrositeFeatures() function 142
|
||||
#TOC> 2 Task solutions 150
|
||||
#TOC>
|
||||
#TOC> 1 Constructing a POST command from a Web query 43
|
||||
#TOC> 1.1 Task - fetchPrositeFeatures() function 148
|
||||
#TOC> 2 Task solutions 156
|
||||
#TOC>
|
||||
#TOC> ==========================================================================
|
||||
|
||||
|
||||
@ -59,9 +54,10 @@ if (! requireNamespace("httr", quietly = TRUE)) {
|
||||
|
||||
|
||||
|
||||
# We have reverse engineered the Web form for a ScanProsite request, and can now
|
||||
# construct a POST request. The command is similar to GET(), but we need an
|
||||
# explicit request body: a list of key/value pairs
|
||||
# We have reverse engineered the Web form for a ScanProsite request, and can
|
||||
# construct a valid POST request from knowing the required field names. The POST
|
||||
# command is similar to GET(), but we need an explicit request body that
|
||||
# contains a list of key/value pairs
|
||||
|
||||
UniProtID <- "P39678"
|
||||
|
||||
@ -79,19 +75,24 @@ response <- httr::POST(URL,
|
||||
|
||||
httr::status_code(response) # If this is not 200, something went wrong and it
|
||||
# makes no sense to continue. If this persists, ask
|
||||
# on the mailing list what to do.
|
||||
# on the Discussion Board what to do.
|
||||
|
||||
|
||||
# The text contents of the response is available with the
|
||||
# content() function:
|
||||
httr::content(response, "text")
|
||||
|
||||
# ... should show you the same as the page contents that
|
||||
# you have seen in the browser. The date we need Now we need to extract
|
||||
# the data from the page: we need regular expressions, but
|
||||
# only simple ones. First, we strsplit() the response into
|
||||
# individual lines, since each of our data elements is on
|
||||
# its own line. We simply split on the "\\n" newline character.
|
||||
# ... should show you the same as the page contents that you have seen in the
|
||||
# browser. Now we need to extract the data from the page. For this simple
|
||||
# example we can get away with using regular expressions, but in general we need
|
||||
# a real XML parser to parse HTML. We'll cover that in a later unit. Here, we
|
||||
# strsplit() the response into individual lines, since each of our data elements
|
||||
# is on its own line, and then capture the contents. The way Prosite has
|
||||
# formatted their HTML we can simply split on the "\\n" newline character - but
|
||||
# they could write the same valid HTML without any newline-characters at all.
|
||||
# Understand that we are working with a bit of a "hack" here: exploting
|
||||
# empirical assumptions rather than a formal specification. But sometimes quick
|
||||
# and dirty is fine, because quick.
|
||||
|
||||
lines <- unlist(strsplit(httr::content(response, "text"), "\\n"))
|
||||
head(lines)
|
||||
@ -105,10 +106,9 @@ patt <- sprintf("\\|%s\\|", UniProtID)
|
||||
# ... and select only the lines that match this
|
||||
# pattern:
|
||||
|
||||
lines <- lines[grep(patt, lines)]
|
||||
lines
|
||||
( lines <- lines[grep(patt, lines)] )
|
||||
|
||||
# ... captures the four lines of output.
|
||||
# ... captures the three lines of output.
|
||||
|
||||
# Now we break the lines apart into tokens: this is another application of
|
||||
# strsplit(), but this time we split either on "pipe" characters, "|" OR on tabs
|
||||
@ -137,7 +137,7 @@ for (line in lines) {
|
||||
end = as.numeric(tokens[5]),
|
||||
psID = tokens[6],
|
||||
psName = tokens[7],
|
||||
stringsAsFactors = FALSE))
|
||||
psSeq = tokens[11]))
|
||||
}
|
||||
features
|
||||
|
||||
@ -149,8 +149,8 @@ features
|
||||
|
||||
|
||||
# Task: write a function that takes as input a UniProt ID, fetches the
|
||||
# features it contains from ScanProsite and returns a list as given above, or
|
||||
# a list of length 0 if there is an error.
|
||||
# features it contains from ScanProsite and returns a data frame as given above, or
|
||||
# an empty data frame if there is an error.
|
||||
|
||||
|
||||
# = 2 Task solutions ======================================================
|
||||
@ -160,7 +160,7 @@ features
|
||||
# clicking on dbFetchPrositeFeatures() in the Environment pane.
|
||||
|
||||
# Test:
|
||||
dbFetchPrositeFeatures("P39678")
|
||||
dbFetchPrositeFeatures("Q5KMQ9")
|
||||
|
||||
|
||||
|
||||
|
@ -21,10 +21,10 @@
|
||||
#TOC> 2.08 dbAddAnnotation() 215
|
||||
#TOC> 2.09 dbFetchUniProtSeq() 243
|
||||
#TOC> 2.10 dbFetchPrositeFeatures() 289
|
||||
#TOC> 2.11 node2text() 333
|
||||
#TOC> 2.12 dbFetchNCBItaxData() 345
|
||||
#TOC> 2.13 UniProtIDmap() 384
|
||||
#TOC> 3 TESTS 423
|
||||
#TOC> 2.11 node2text() 339
|
||||
#TOC> 2.12 dbFetchNCBItaxData() 351
|
||||
#TOC> 2.13 UniProtIDmap() 390
|
||||
#TOC> 3 TESTS 429
|
||||
#TOC>
|
||||
#TOC> ==========================================================================
|
||||
|
||||
@ -297,6 +297,7 @@ dbFetchPrositeFeatures <- function(ID) {
|
||||
# end num end of motif
|
||||
# psID char PROSITE motif ID
|
||||
# psName char PROSITE motif name
|
||||
# psSeq char sequence annotated to the feature
|
||||
# If the operation is not successful, a 0-length data frame is returned.
|
||||
|
||||
URL <- "https://prosite.expasy.org/cgi-bin/prosite/PSScan.cgi"
|
||||
@ -313,7 +314,7 @@ dbFetchPrositeFeatures <- function(ID) {
|
||||
|
||||
lines <- unlist(strsplit(httr::content(response, "text"), "\\n"))
|
||||
|
||||
patt <- sprintf("\\|%s\\|", UniProtID)
|
||||
patt <- sprintf("\\|%s\\|", ID)
|
||||
lines <- lines[grep(patt, lines)]
|
||||
|
||||
for (line in lines) {
|
||||
@ -323,12 +324,17 @@ dbFetchPrositeFeatures <- function(ID) {
|
||||
start = as.numeric(tokens[4]),
|
||||
end = as.numeric(tokens[5]),
|
||||
psID = tokens[6],
|
||||
psName = tokens[7]))
|
||||
psName = tokens[7],
|
||||
psSeq = tokens[11]))
|
||||
}
|
||||
}
|
||||
return(myFeatures)
|
||||
}
|
||||
|
||||
if (FALSE) {
|
||||
dbFetchPrositeFeatures("P33520") # RES1_SCHPO
|
||||
|
||||
}
|
||||
|
||||
# == 2.11 node2text() ======================================================
|
||||
node2text <- function(doc, tag) {
|
||||
|
Loading…
Reference in New Issue
Block a user