Maintainance fixes and UniProt API change bugfix
This commit is contained in:
		| @@ -93,7 +93,7 @@ pBar <- function(i, l, nCh = 50) { | ||||
|   ticks <- round(seq(1, l-1, length.out = nCh)) | ||||
|   if (i < l) { | ||||
|     if (any(i == ticks)) { | ||||
|       p <- which(i == ticks) | ||||
|       p <- which(i == ticks)[1]  # use only first, in case there are ties | ||||
|       p1 <- paste(rep("#", p), collapse = "") | ||||
|       p2 <- paste(rep("-", nCh - p), collapse = "") | ||||
|       cat(sprintf("\r|%s%s|", p1, p2)) | ||||
|   | ||||
| @@ -3,12 +3,13 @@ | ||||
| # Purpose:  A Bioinformatics Course: | ||||
| #              R code accompanying the BIN-Data_integration unit. | ||||
| # | ||||
| # Version:  1.0 | ||||
| # Version:  1.0.1 | ||||
| # | ||||
| # Date:     2017  10  08 | ||||
| # Date:     2018  10  30 | ||||
| # Author:   Boris Steipe (boris.steipe@utoronto.ca) | ||||
| # | ||||
| # Versions: | ||||
| #           1.0.1  Bugfix: UniProt ID Mapping service API change | ||||
| #           1.0    First live version | ||||
| # | ||||
| # | ||||
| @@ -29,9 +30,9 @@ | ||||
| #TOC> ========================================================================== | ||||
| #TOC>  | ||||
| #TOC>   Section  Title                             Line | ||||
| #TOC> ------------------------------------------- | ||||
| #TOC>   1        Identifier mapping            45 | ||||
| #TOC>   2        Cross-referencing tables     151 | ||||
| #TOC> ------------------------------------------------- | ||||
| #TOC>   1        Identifier mapping                  40 | ||||
| #TOC>   2        Cross-referencing tables           164 | ||||
| #TOC>  | ||||
| #TOC> ========================================================================== | ||||
|  | ||||
| @@ -73,14 +74,14 @@ myQueryIDs <- "NP_010227 NP_00000 NP_011036" | ||||
| # the URL of the server and send a list of items labelled as "query" in the body | ||||
| # of the request. GET() and POST() are functions from httr. | ||||
|  | ||||
| URL <- "http://www.uniprot.org/mapping/" | ||||
| URL <- "https://www.uniprot.org/mapping/" | ||||
| response <- POST(URL, | ||||
|                  body = list(from = "P_REFSEQ_AC",   # Refseq Protein | ||||
|                              to = "ACC",             # UniProt ID | ||||
|                              format = "tab", | ||||
|                              query = myQueryIDs)) | ||||
|  | ||||
| response | ||||
| cat(content(response)) | ||||
|  | ||||
| # We need to check the status code - if it is not 200, an error ocurred and we | ||||
| # can't process the result: | ||||
| @@ -94,6 +95,22 @@ myMappedIDs <- read.delim(file = textConnection(content(response)), | ||||
|                           stringsAsFactors = FALSE) | ||||
| myMappedIDs | ||||
|  | ||||
| # We actually only need columns 1 and 3, and we can also change the names | ||||
| # to "From" and "To": | ||||
|  | ||||
| myMappedIDs <- myMappedIDs[ , c(1,3)] | ||||
| colnames(myMappedIDs) <- c("From", "To") | ||||
|  | ||||
| myMappedIDs | ||||
|  | ||||
| # If this works as expected, you should see: | ||||
| #        From     To | ||||
| # 1 NP_010227 P39678 | ||||
| # 2 NP_011036 P25302 | ||||
| # | ||||
| # ... and note that there are only two entries, because nothing was returned | ||||
| # for the dummy "RefSeq ID" NP_00000 | ||||
|  | ||||
| # If the query can't be fulfilled because of a problem with the server, a | ||||
| # WebPage is returned. But the server status is also returned and we can check | ||||
| # the status code. I have lately gotten many "503" status codes: Server Not | ||||
| @@ -114,7 +131,7 @@ myIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") { | ||||
|   #    empty data frame if the mapping was unsuccessful. No rows are returned | ||||
|   #    for IDs that are not mapped. | ||||
|  | ||||
|   URL <- "http://www.uniprot.org/mapping/" | ||||
|   URL <- "https://www.uniprot.org/uploadlists/" | ||||
|   response <- POST(URL, | ||||
|                    body = list(from = mapFrom, | ||||
|                                to = mapTo, | ||||
| @@ -125,6 +142,8 @@ myIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") { | ||||
|     myMap <- read.delim(file = textConnection(content(response)), | ||||
|                         sep = "\t", | ||||
|                         stringsAsFactors = FALSE) | ||||
|     myMap <- myMap[ , c(1,3)] | ||||
|     colnames(myMap) <- c("From", "To") | ||||
|   } else { | ||||
|     myMap <- data.frame() | ||||
|     warning(paste("No uniProt ID mapping returned:", | ||||
|   | ||||
| @@ -584,8 +584,8 @@ KLdiv(pmfL1, pmfL2)  # 0.1087 | ||||
| # random samples according to the rL1 distribution, calculate the Kullback | ||||
| # Leibler divergence with countsL1, and compare the distribution we get with the | ||||
| # value we observed as the difference with discL2. Essentially, this tells us | ||||
| # the probability that countsL2 is actually a sample from the L1 function. Here we | ||||
| # go: | ||||
| # the probability that countsL2 is actually a sample from the L1 function. | ||||
| # Here we go: | ||||
|  | ||||
| N <- 1000 | ||||
| divs <- numeric(N) | ||||
|   | ||||
| @@ -182,7 +182,7 @@ toString(myDNAseq[4:15]) | ||||
|  | ||||
| # ==   5.1  Views  ============================================================= | ||||
|  | ||||
| # Biostring "Views" are objects that store mutliple substrings of one | ||||
| # Biostring "Views" are objects that store multiple substrings of one | ||||
| # Biostring object. | ||||
|  | ||||
| (myView <- Views(myDNAseq, start = c(1, 19, 37), end = c(15, 30, 45))) | ||||
|   | ||||
| @@ -12,7 +12,8 @@ | ||||
| #           1.0    New unit. | ||||
| # | ||||
| # | ||||
| # TODO: | ||||
| # TODO: Make a simple solution first, then extend it to error checking, and | ||||
| #       to handle .mfa files. | ||||
| # | ||||
| # | ||||
| # == DO NOT SIMPLY  source()  THIS FILE! ======================================= | ||||
| @@ -231,7 +232,7 @@ refAPSES[grep("P39678", refAPSES) + 1]  # grep() the string and add 1 | ||||
| # when working with strings, we can use substr(<string>, <start>, <stop>) to | ||||
| # extract substrings, but more often we expand the string into a vector of | ||||
| # single characters with strsplit(<string>, ""). strsplit() returns a list, | ||||
| # to accommodate that <string> could be a vector of many elements, therafore | ||||
| # to accommodate that <string> could be a vector of many elements, therefore | ||||
| # we usually unlist() the result if we use it only on a single string. | ||||
|  | ||||
| # Example: How many positive charged residues in "MBP1_SACCE"? | ||||
| @@ -297,8 +298,8 @@ writeFASTA <- function(s, OUT = stdout(), width = 60) { | ||||
|  | ||||
| } | ||||
|  | ||||
| # Let's try this. We don't define OUT, so the result is written to the console | ||||
| # by default. Defualt width for sequence is 60 characters | ||||
| # Let's try this. If we don't specify OUT, the result is written to the console | ||||
| # by default. Default width for sequence is 60 characters | ||||
|  | ||||
| writeFASTA(refAPSES) | ||||
|  | ||||
|   | ||||
| @@ -63,8 +63,8 @@ IHKYRRIIREGTEMNIEEVDSSLDVILQTLIANNNKNKGAEQIITISNANSHA" | ||||
| nchar(s) | ||||
| # Must be 969 | ||||
|  | ||||
| # Fetch the Uniprot ID by retrieving the first string that appears between two | ||||
| # vertical bars in the header line. | ||||
| # Task: Fetch the Uniprot ID by retrieving the first string that appears between | ||||
| # two vertical bars ("pipes") in the header record. | ||||
| # | ||||
|  | ||||
| # Develop the regular expression: | ||||
|   | ||||
| @@ -107,8 +107,7 @@ expect_error(log(v[1,2]))                # This appears oK, but ... | ||||
| expect_error(log(v[1,2]), "non-numeric") # ... it's actually a different error! | ||||
|  | ||||
| # Producing unit tests simply means: we define a function, and then we check | ||||
| # whether all test pass. Consider a function that is loaded from your utilities | ||||
| # file: | ||||
| # whether all test pass. Consider a function that is loaded on startup: | ||||
|  | ||||
| biCode | ||||
|  | ||||
|   | ||||
| @@ -345,7 +345,7 @@ UniProtIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") { | ||||
|   #    empty data frame if the mapping was unsuccessful. No rows are returned | ||||
|   #    for IDs that are not mapped. | ||||
|  | ||||
|   URL <- "http://www.uniprot.org/mapping/" | ||||
|   URL <- "https://www.uniprot.org/uploadlists/" | ||||
|   response <- POST(URL, | ||||
|                    body = list(from = mapFrom, | ||||
|                                to = mapTo, | ||||
| @@ -356,6 +356,8 @@ UniProtIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") { | ||||
|     myMap <- read.delim(file = textConnection(content(response)), | ||||
|                         sep = "\t", | ||||
|                         stringsAsFactors = FALSE) | ||||
|     myMap <- myMap[ , c(1,3)] | ||||
|     colnames(myMap) <- c("From", "To") | ||||
|   } else { | ||||
|     myMap <- data.frame() | ||||
|     warning(paste("No uniProt ID mapping returned:", | ||||
|   | ||||
		Reference in New Issue
	
	Block a user