2020 updates - deactivate for maintenance

2020-09-18 21:56:30 +10:00
parent 89bdd14d1c
commit 37ef655d47
42 changed files with 447 additions and 243 deletions
--- a/scripts/ABC-createRefDB.R
+++ b/scripts/ABC-createRefDB.R
@@ -8,7 +8,7 @@
 # http://steipe.biochemistry.utoronto.ca/abc/index.php/Reference_species_for_fungi
 #
 # For the data model, see
-# https://docs.google.com/drawings/d/1uupNvz18_FYFwyyVPebTM0CUxcJCPDQuxuIJGpjWQWg
+# https://docs.google.com/presentation/d/13vWaVcFpWEOGeSNhwmqugj2qTQuH1eZROgxWdHGEMr0
 # For the schema, see dbInit() in ./scripts/ABC-dbUtilities.R
 #
 # ==============================================================================
--- a/scripts/ABC-dbUtilities.R
+++ b/scripts/ABC-dbUtilities.R
@@ -1,12 +1,35 @@
-# ABC-dbUtilities.R
-
+# tocID <- "scripts/ABC-dbUtilities.R"
+#
 # database utilities for ABC learning units
 #
 # ==============================================================================
-#


-# ====== PACKAGES ==============================================================
+#TOC> ==========================================================================
+#TOC> 
+#TOC>   Section  Title                             Line
+#TOC> -------------------------------------------------
+#TOC>   1        PACKAGES                            32
+#TOC>   2        FUNCTIONS                           50
+#TOC>   2.01       dbSanitizeSequence()              53
+#TOC>   2.02       dbConfirmUnique()                 88
+#TOC>   2.03       dbInit()                         106
+#TOC>   2.04       dbAutoincrement()                147
+#TOC>   2.05       dbAddProtein()                   160
+#TOC>   2.06       dbAddFeature()                   180
+#TOC>   2.07       dbAddTaxonomy()                  199
+#TOC>   2.08       dbAddAnnotation()                215
+#TOC>   2.09       dbFetchUniProtSeq()              243
+#TOC>   2.10       dbFetchPrositeFeatures()         267
+#TOC>   2.11       node2text()                      311
+#TOC>   2.12       dbFetchNCBItaxData()             323
+#TOC>   2.13       UniProtIDmap()                   362
+#TOC>   3        TESTS                              399
+#TOC> 
+#TOC> ==========================================================================
+
+
+# =    1  PACKAGES  ============================================================


 if (! requireNamespace("jsonlite", quietly = TRUE)) {
@@ -24,9 +47,10 @@ if (! requireNamespace("xml2", quietly = TRUE)) {
 }


-# ====== FUNCTIONS =============================================================
+# =    2  FUNCTIONS  ===========================================================


+# ==   2.01  dbSanitizeSequence()  =============================================
 dbSanitizeSequence <- function(s, unambiguous = TRUE) {
  # Remove FASTA header lines, if any,
  # flatten any structure that s has,
@@ -61,6 +85,7 @@ dbSanitizeSequence <- function(s, unambiguous = TRUE) {
 }


+# ==   2.02  dbConfirmUnique()  ================================================
 dbConfirmUnique <- function(x) {
  # x is a vector of logicals.
  # returns x if x has exactly one TRUE element.
@@ -78,24 +103,27 @@ dbConfirmUnique <- function(x) {
 }


+# ==   2.03  dbInit()  =========================================================
 dbInit <- function() {
  # Return an empty instance of the protein database
+  # Open the link and study the schema:
+  # https://docs.google.com/presentation/d/13vWaVcFpWEOGeSNhwmqugj2qTQuH1eZROgxWdHGEMr0

  db <- list()

+  db$version <- "1.0"
+
  db$protein <- data.frame(
    ID = numeric(),
    name = character(),
    RefSeqID = character(),
    UniProtID = character(),
    taxonomyID = numeric(),
-    sequence = character(),
-    stringsAsFactors = FALSE)
+    sequence = character())

  db$taxonomy <- data.frame(
    ID = numeric(),
-    species = character(),
-    stringsAsFactors = FALSE)
+    species = character())


  db$annotation <- data.frame(
@@ -103,21 +131,20 @@ dbInit <- function() {
    proteinID = numeric(),
    featureID = numeric(),
    start = numeric(),
-    end = numeric(),
-    stringsAsFactors = FALSE)
+    end = numeric())

  db$feature <- data.frame(
    ID = numeric(),
    name = character(),
    description = character(),
    sourceDB = character(),
-    accession = character(),
-    stringsAsFactors = FALSE)
+    accession = character())

  return(db)
 }


+# ==   2.04  dbAutoincrement()  ================================================
 dbAutoincrement <- function(tb) {
  # Return a unique integer that can be used as a primary key
  # Value:
@@ -130,6 +157,7 @@ dbAutoincrement <- function(tb) {
 }


+# ==   2.05  dbAddProtein()  ===================================================
 dbAddProtein <- function(db, jsonDF) {
  # Add one or more protein entries to the database db.
  # Parameters:
@@ -142,14 +170,14 @@ dbAddProtein <- function(db, jsonDF) {
                    RefSeqID    = jsonDF$RefSeqID[i],
                    UniProtID   = jsonDF$UniProtID[i],
                    taxonomyID  = jsonDF$taxonomyID[i],
-                    sequence    = dbSanitizeSequence(jsonDF$sequence[i]),
-                    stringsAsFactors = FALSE)
+                    sequence    = dbSanitizeSequence(jsonDF$sequence[i]))
    db$protein <- rbind(db$protein, x)
  }
  return(db)
 }


+# ==   2.06  dbAddFeature()  ===================================================
 dbAddFeature <- function(db, jsonDF) {
  # Add one or more feature entries to the database db.
  # Parameters:
@@ -161,14 +189,14 @@ dbAddFeature <- function(db, jsonDF) {
                    name        = jsonDF$name[i],
                    description = jsonDF$description[i],
                    sourceDB    = jsonDF$sourceDB[i],
-                    accession   = jsonDF$accession[i],
-                    stringsAsFactors = FALSE)
+                    accession   = jsonDF$accession[i])
    db$feature <- rbind(db$feature, x)
  }
  return(db)
 }


+# ==   2.07  dbAddTaxonomy()  ==================================================
 dbAddTaxonomy <- function(db, jsonDF) {
  # Add one or more taxonomy entries to the database db.
  # Parameters:
@@ -178,13 +206,13 @@ dbAddTaxonomy <- function(db, jsonDF) {
  for (i in seq_len(nrow(jsonDF))) {
    x <- data.frame(
      ID =  jsonDF$ID[i],
-      species = jsonDF$species[i],
-      stringsAsFactors = FALSE)
+      species = jsonDF$species[i])
    db$taxonomy <- rbind(db$taxonomy, x)
  }
  return(db)
 }

+# ==   2.08  dbAddAnnotation()  ================================================
 dbAddAnnotation <- function(db, jsonDF) {
  # Add one or more annotation entries to the database db.
  # Parameters:
@@ -205,14 +233,14 @@ dbAddAnnotation <- function(db, jsonDF) {
                    proteinID = pID,
                    featureID = fID,
                    start     = as.integer(jsonDF$start[i]),
-                    end       = as.integer(jsonDF$end[i]),
-                    stringsAsFactors = FALSE)
+                    end       = as.integer(jsonDF$end[i]))
    db$annotation <- rbind(db$annotation, x)
  }
  return(db)
 }


+# ==   2.09  dbFetchUniProtSeq()  ==============================================
 dbFetchUniProtSeq <- function(ID) {
  # Fetch a protein sequence from UniProt.
  # Parameters:
@@ -236,6 +264,7 @@ dbFetchUniProtSeq <- function(ID) {
 }


+# ==   2.10  dbFetchPrositeFeatures()  =========================================
 dbFetchPrositeFeatures <- function(ID) {
  # Fetch feature annotations from ScanProsite.
  # Parameters:
@@ -272,14 +301,14 @@ dbFetchPrositeFeatures <- function(ID) {
                                     start =  as.numeric(tokens[4]),
                                     end   =  as.numeric(tokens[5]),
                                     psID  =  tokens[6],
-                                     psName = tokens[7],
-                                     stringsAsFactors = FALSE))
+                                     psName = tokens[7]))
    }
  }
  return(myFeatures)
 }


+# ==   2.11  node2text()  ======================================================
 node2text <- function(doc, tag) {
  # an extractor function for the contents of elements
  # between given tags in an XML response.
@@ -291,6 +320,7 @@ node2text <- function(doc, tag) {
 }


+# ==   2.12  dbFetchNCBItaxData()  =============================================
 dbFetchNCBItaxData <- function(ID) {
  # Fetch feature taxID and Organism from the NCBI.
  # Parameters:
@@ -329,6 +359,7 @@ dbFetchNCBItaxData <- function(ID) {



+# ==   2.13  UniProtIDmap()  ===================================================
 UniProtIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
  # Use UniProt ID mapping service to map one or more IDs
  # Parameters:
@@ -351,8 +382,7 @@ UniProtIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {

  if (httr::status_code(response) == 200) { # 200: oK
    myMap <- read.delim(file = textConnection(httr::content(response)),
-                        sep = "\t",
-                        stringsAsFactors = FALSE)
+                        sep = "\t")
    myMap <- myMap[ , c(1,3)]
    colnames(myMap) <- c("From", "To")
  } else {
@@ -366,7 +396,7 @@ UniProtIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
 }


-# ====== TESTS =================================================================
+# =    3  TESTS  ===============================================================

 if (FALSE) {
  if (! requireNamespace("testthat", quietly = TRUE)) {
--- a/scripts/ABC-makeScCCnet.R
+++ b/scripts/ABC-makeScCCnet.R
@@ -1,4 +1,4 @@
-# ABC-makeScCCnet.R
+# tocID <- "scripts/ABC-makeScCCnet.R"
 #
 # Create a subnetwork of high-confidence yeast genes with a "mitotic cell cycle"
 # GOSlim annotation.
--- a/scripts/ABC-writeALN.R
+++ b/scripts/ABC-writeALN.R
@@ -1,4 +1,4 @@
-# ABC-writeALN.R
+# tocID <- "scripts/ABC-writeALN.R"
 #
 # ToDo:    calculate consensus line
 #          append sequence numbers
--- a/scripts/ABC-writeMFA.R
+++ b/scripts/ABC-writeMFA.R
@@ -40,7 +40,7 @@ writeMFA <- function(ali,
  if (is.na(blockWidth)) {
    stop("PANIC: parameter \"blockWidth\" must be numeric.")
  }
-  if (blockWidth < 1){
+  if (! blockWidth > 0){
    stop("PANIC: parameter \"blockWidth\" must be greater than zero.")
  }

@@ -105,7 +105,7 @@ writeMFA <- function(ali,
    txt <- c(txt, "")  # append an empty line for readability
  }

-  writeLines(txt, con= myCon)
+  writeLines(txt, con = myCon)

 }

--- a/scripts/BLAST.R
+++ b/scripts/BLAST.R
@@ -357,20 +357,23 @@ parseBLASTalignment <- function(hit) {

 # ==== TESTS ===================================================================

-# define query:
-# q   <- paste("IYSARYSGVDVYEFIHSTGSIMKRKKDDWVNATHI", # Mbp1 APSES domain
-#              "LKAANFAKAKRTRILEKEVLKETHEKVQGGFGKYQ",
-#              "GTWVPLNIAKQLAEKFSVYDQLKPLFDFTQTDGSASP",
-#              sep="")
-# or ...
-# q <- "NP_010227" # refseq ID
-#
-# test <- BLAST(q,
-#               nHits = 100,
-#               E = 0.001,
-#               rid = "",
-#               limits = "txid4751[ORGN]")
-# length(test$hits)
+if (FALSE) {
+  # define query:
+  q   <- paste("IYSARYSGVDVYEFIHSTGSIMKRKKDDWVNATHI", # Mbp1 APSES domain
+               "LKAANFAKAKRTRILEKEVLKETHEKVQGGFGKYQ",
+               "GTWVPLNIAKQLAEKFSVYDQLKPLFDFTQTDGSASP",
+               sep="")
+  # or ...
+  q <- "NP_010227" # refseq ID
+
+  test <- BLAST(q,
+                nHits = 100,
+                E = 0.001,
+                rid = "",
+                limits = "txid4751[ORGN]")
+  str(test)
+  length(test$hits)
+}

 # [END]