Maintainance fixes and UniProt API change bugfix
This commit is contained in:
parent
ead20d5f18
commit
1dd87e7473
@ -93,7 +93,7 @@ pBar <- function(i, l, nCh = 50) {
|
|||||||
ticks <- round(seq(1, l-1, length.out = nCh))
|
ticks <- round(seq(1, l-1, length.out = nCh))
|
||||||
if (i < l) {
|
if (i < l) {
|
||||||
if (any(i == ticks)) {
|
if (any(i == ticks)) {
|
||||||
p <- which(i == ticks)
|
p <- which(i == ticks)[1] # use only first, in case there are ties
|
||||||
p1 <- paste(rep("#", p), collapse = "")
|
p1 <- paste(rep("#", p), collapse = "")
|
||||||
p2 <- paste(rep("-", nCh - p), collapse = "")
|
p2 <- paste(rep("-", nCh - p), collapse = "")
|
||||||
cat(sprintf("\r|%s%s|", p1, p2))
|
cat(sprintf("\r|%s%s|", p1, p2))
|
||||||
|
@ -3,12 +3,13 @@
|
|||||||
# Purpose: A Bioinformatics Course:
|
# Purpose: A Bioinformatics Course:
|
||||||
# R code accompanying the BIN-Data_integration unit.
|
# R code accompanying the BIN-Data_integration unit.
|
||||||
#
|
#
|
||||||
# Version: 1.0
|
# Version: 1.0.1
|
||||||
#
|
#
|
||||||
# Date: 2017 10 08
|
# Date: 2018 10 30
|
||||||
# Author: Boris Steipe (boris.steipe@utoronto.ca)
|
# Author: Boris Steipe (boris.steipe@utoronto.ca)
|
||||||
#
|
#
|
||||||
# Versions:
|
# Versions:
|
||||||
|
# 1.0.1 Bugfix: UniProt ID Mapping service API change
|
||||||
# 1.0 First live version
|
# 1.0 First live version
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
@ -28,10 +29,10 @@
|
|||||||
|
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> Section Title Line
|
#TOC> Section Title Line
|
||||||
#TOC> -------------------------------------------
|
#TOC> -------------------------------------------------
|
||||||
#TOC> 1 Identifier mapping 45
|
#TOC> 1 Identifier mapping 40
|
||||||
#TOC> 2 Cross-referencing tables 151
|
#TOC> 2 Cross-referencing tables 164
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
@ -73,14 +74,14 @@ myQueryIDs <- "NP_010227 NP_00000 NP_011036"
|
|||||||
# the URL of the server and send a list of items labelled as "query" in the body
|
# the URL of the server and send a list of items labelled as "query" in the body
|
||||||
# of the request. GET() and POST() are functions from httr.
|
# of the request. GET() and POST() are functions from httr.
|
||||||
|
|
||||||
URL <- "http://www.uniprot.org/mapping/"
|
URL <- "https://www.uniprot.org/mapping/"
|
||||||
response <- POST(URL,
|
response <- POST(URL,
|
||||||
body = list(from = "P_REFSEQ_AC", # Refseq Protein
|
body = list(from = "P_REFSEQ_AC", # Refseq Protein
|
||||||
to = "ACC", # UniProt ID
|
to = "ACC", # UniProt ID
|
||||||
format = "tab",
|
format = "tab",
|
||||||
query = myQueryIDs))
|
query = myQueryIDs))
|
||||||
|
|
||||||
response
|
cat(content(response))
|
||||||
|
|
||||||
# We need to check the status code - if it is not 200, an error ocurred and we
|
# We need to check the status code - if it is not 200, an error ocurred and we
|
||||||
# can't process the result:
|
# can't process the result:
|
||||||
@ -94,6 +95,22 @@ myMappedIDs <- read.delim(file = textConnection(content(response)),
|
|||||||
stringsAsFactors = FALSE)
|
stringsAsFactors = FALSE)
|
||||||
myMappedIDs
|
myMappedIDs
|
||||||
|
|
||||||
|
# We actually only need columns 1 and 3, and we can also change the names
|
||||||
|
# to "From" and "To":
|
||||||
|
|
||||||
|
myMappedIDs <- myMappedIDs[ , c(1,3)]
|
||||||
|
colnames(myMappedIDs) <- c("From", "To")
|
||||||
|
|
||||||
|
myMappedIDs
|
||||||
|
|
||||||
|
# If this works as expected, you should see:
|
||||||
|
# From To
|
||||||
|
# 1 NP_010227 P39678
|
||||||
|
# 2 NP_011036 P25302
|
||||||
|
#
|
||||||
|
# ... and note that there are only two entries, because nothing was returned
|
||||||
|
# for the dummy "RefSeq ID" NP_00000
|
||||||
|
|
||||||
# If the query can't be fulfilled because of a problem with the server, a
|
# If the query can't be fulfilled because of a problem with the server, a
|
||||||
# WebPage is returned. But the server status is also returned and we can check
|
# WebPage is returned. But the server status is also returned and we can check
|
||||||
# the status code. I have lately gotten many "503" status codes: Server Not
|
# the status code. I have lately gotten many "503" status codes: Server Not
|
||||||
@ -114,7 +131,7 @@ myIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
|
|||||||
# empty data frame if the mapping was unsuccessful. No rows are returned
|
# empty data frame if the mapping was unsuccessful. No rows are returned
|
||||||
# for IDs that are not mapped.
|
# for IDs that are not mapped.
|
||||||
|
|
||||||
URL <- "http://www.uniprot.org/mapping/"
|
URL <- "https://www.uniprot.org/uploadlists/"
|
||||||
response <- POST(URL,
|
response <- POST(URL,
|
||||||
body = list(from = mapFrom,
|
body = list(from = mapFrom,
|
||||||
to = mapTo,
|
to = mapTo,
|
||||||
@ -125,6 +142,8 @@ myIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
|
|||||||
myMap <- read.delim(file = textConnection(content(response)),
|
myMap <- read.delim(file = textConnection(content(response)),
|
||||||
sep = "\t",
|
sep = "\t",
|
||||||
stringsAsFactors = FALSE)
|
stringsAsFactors = FALSE)
|
||||||
|
myMap <- myMap[ , c(1,3)]
|
||||||
|
colnames(myMap) <- c("From", "To")
|
||||||
} else {
|
} else {
|
||||||
myMap <- data.frame()
|
myMap <- data.frame()
|
||||||
warning(paste("No uniProt ID mapping returned:",
|
warning(paste("No uniProt ID mapping returned:",
|
||||||
|
@ -584,8 +584,8 @@ KLdiv(pmfL1, pmfL2) # 0.1087
|
|||||||
# random samples according to the rL1 distribution, calculate the Kullback
|
# random samples according to the rL1 distribution, calculate the Kullback
|
||||||
# Leibler divergence with countsL1, and compare the distribution we get with the
|
# Leibler divergence with countsL1, and compare the distribution we get with the
|
||||||
# value we observed as the difference with discL2. Essentially, this tells us
|
# value we observed as the difference with discL2. Essentially, this tells us
|
||||||
# the probability that countsL2 is actually a sample from the L1 function. Here we
|
# the probability that countsL2 is actually a sample from the L1 function.
|
||||||
# go:
|
# Here we go:
|
||||||
|
|
||||||
N <- 1000
|
N <- 1000
|
||||||
divs <- numeric(N)
|
divs <- numeric(N)
|
||||||
|
@ -182,7 +182,7 @@ toString(myDNAseq[4:15])
|
|||||||
|
|
||||||
# == 5.1 Views =============================================================
|
# == 5.1 Views =============================================================
|
||||||
|
|
||||||
# Biostring "Views" are objects that store mutliple substrings of one
|
# Biostring "Views" are objects that store multiple substrings of one
|
||||||
# Biostring object.
|
# Biostring object.
|
||||||
|
|
||||||
(myView <- Views(myDNAseq, start = c(1, 19, 37), end = c(15, 30, 45)))
|
(myView <- Views(myDNAseq, start = c(1, 19, 37), end = c(15, 30, 45)))
|
||||||
|
@ -12,7 +12,8 @@
|
|||||||
# 1.0 New unit.
|
# 1.0 New unit.
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# TODO:
|
# TODO: Make a simple solution first, then extend it to error checking, and
|
||||||
|
# to handle .mfa files.
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# == DO NOT SIMPLY source() THIS FILE! =======================================
|
# == DO NOT SIMPLY source() THIS FILE! =======================================
|
||||||
@ -231,7 +232,7 @@ refAPSES[grep("P39678", refAPSES) + 1] # grep() the string and add 1
|
|||||||
# when working with strings, we can use substr(<string>, <start>, <stop>) to
|
# when working with strings, we can use substr(<string>, <start>, <stop>) to
|
||||||
# extract substrings, but more often we expand the string into a vector of
|
# extract substrings, but more often we expand the string into a vector of
|
||||||
# single characters with strsplit(<string>, ""). strsplit() returns a list,
|
# single characters with strsplit(<string>, ""). strsplit() returns a list,
|
||||||
# to accommodate that <string> could be a vector of many elements, therafore
|
# to accommodate that <string> could be a vector of many elements, therefore
|
||||||
# we usually unlist() the result if we use it only on a single string.
|
# we usually unlist() the result if we use it only on a single string.
|
||||||
|
|
||||||
# Example: How many positive charged residues in "MBP1_SACCE"?
|
# Example: How many positive charged residues in "MBP1_SACCE"?
|
||||||
@ -297,8 +298,8 @@ writeFASTA <- function(s, OUT = stdout(), width = 60) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Let's try this. We don't define OUT, so the result is written to the console
|
# Let's try this. If we don't specify OUT, the result is written to the console
|
||||||
# by default. Defualt width for sequence is 60 characters
|
# by default. Default width for sequence is 60 characters
|
||||||
|
|
||||||
writeFASTA(refAPSES)
|
writeFASTA(refAPSES)
|
||||||
|
|
||||||
|
@ -63,8 +63,8 @@ IHKYRRIIREGTEMNIEEVDSSLDVILQTLIANNNKNKGAEQIITISNANSHA"
|
|||||||
nchar(s)
|
nchar(s)
|
||||||
# Must be 969
|
# Must be 969
|
||||||
|
|
||||||
# Fetch the Uniprot ID by retrieving the first string that appears between two
|
# Task: Fetch the Uniprot ID by retrieving the first string that appears between
|
||||||
# vertical bars in the header line.
|
# two vertical bars ("pipes") in the header record.
|
||||||
#
|
#
|
||||||
|
|
||||||
# Develop the regular expression:
|
# Develop the regular expression:
|
||||||
|
@ -107,8 +107,7 @@ expect_error(log(v[1,2])) # This appears oK, but ...
|
|||||||
expect_error(log(v[1,2]), "non-numeric") # ... it's actually a different error!
|
expect_error(log(v[1,2]), "non-numeric") # ... it's actually a different error!
|
||||||
|
|
||||||
# Producing unit tests simply means: we define a function, and then we check
|
# Producing unit tests simply means: we define a function, and then we check
|
||||||
# whether all test pass. Consider a function that is loaded from your utilities
|
# whether all test pass. Consider a function that is loaded on startup:
|
||||||
# file:
|
|
||||||
|
|
||||||
biCode
|
biCode
|
||||||
|
|
||||||
|
@ -345,7 +345,7 @@ UniProtIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
|
|||||||
# empty data frame if the mapping was unsuccessful. No rows are returned
|
# empty data frame if the mapping was unsuccessful. No rows are returned
|
||||||
# for IDs that are not mapped.
|
# for IDs that are not mapped.
|
||||||
|
|
||||||
URL <- "http://www.uniprot.org/mapping/"
|
URL <- "https://www.uniprot.org/uploadlists/"
|
||||||
response <- POST(URL,
|
response <- POST(URL,
|
||||||
body = list(from = mapFrom,
|
body = list(from = mapFrom,
|
||||||
to = mapTo,
|
to = mapTo,
|
||||||
@ -356,6 +356,8 @@ UniProtIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
|
|||||||
myMap <- read.delim(file = textConnection(content(response)),
|
myMap <- read.delim(file = textConnection(content(response)),
|
||||||
sep = "\t",
|
sep = "\t",
|
||||||
stringsAsFactors = FALSE)
|
stringsAsFactors = FALSE)
|
||||||
|
myMap <- myMap[ , c(1,3)]
|
||||||
|
colnames(myMap) <- c("From", "To")
|
||||||
} else {
|
} else {
|
||||||
myMap <- data.frame()
|
myMap <- data.frame()
|
||||||
warning(paste("No uniProt ID mapping returned:",
|
warning(paste("No uniProt ID mapping returned:",
|
||||||
|
Loading…
Reference in New Issue
Block a user