2020 updates - deactivate for maintenance

2020-09-18 21:56:30 +10:00
parent 89bdd14d1c
commit 37ef655d47
42 changed files with 447 additions and 243 deletions
--- a/.init.R
+++ b/.init.R
@@ -1,42 +0,0 @@
 # .init.R
 # Functions to initialize this collection of learning units
 # Boris Steipe
 # ====================================================================
 # Create a local copy of myScript.R if required, and not been done yet.
 if (! file.exists("myScript.R") && file.exists(".tmp.R")) {
    file.copy(".tmp.R", "myScript.R")
 }
 # If it doesn't exist yet, set up a profile:
 if (! file.exists(".myProfile.R")) {
  # setup profile data
  cat("\nPlease enter the requested values correctly, no spaces, and\n")
  cat("press <enter>.\n")
  e <- readline("Please enter your UofT eMail address: ")
  n <- readline("Please enter your Student Number: ")
  conn <- file(".myProfile.R")
  writeLines(c(sprintf("myEMail <- \"%s\"", e),
               sprintf("myStudentNumber <- %d", as.numeric(n))),
             conn)
  close(conn)
  rm(e, n, conn)
 }
 # Patch YFO -> MYSPE if necessary:
 tmp <- readLines(".myProfile.R")
 if (length(grep("^YFO", tmp)) > 0) {
  idx <- grep("^YFO", tmp)
  tmp[idx] <- gsub("^YFO", "MYSPE", tmp[idx])
  writeLines(tmp, ".myProfile.R")
 }
 rm(tmp)
 source(".myProfile.R")
 source(".utilities.R")
 file.edit("ABC-units.R")
 # [End]
--- a/.utilities.R
+++ b/.utilities.R
@@ -181,19 +181,29 @@ fetchMSAmotif <- function(ali, mot) {
 # ====== PDB ID selection ======================================================
-selectPDBrep <- function(n) {
+selectPDBrep <- function(n, seed = as.numeric(Sys.time())) {
  # Select n PDB IDs from a list of high-resolution, non-homologous, single
  # domain, single chain structure files that represent a CATH topology
  # group.
-  # Parameters   n  num   number of IDs to return.
+  # Parameters:
  #   n     num     number of IDs to return
  #   seed  num     a seed for the RNG
  #
  # Value:          char  PDB IDs
-  # Note: the list is loaded from an RData file in the data directory
+  #
  # Note: the list is loaded from an RData file in the "./data" directory.
  # If you use this function for a course submissio, it MUST be invoked as:
  #
  #         selectPDBrep(n, seed = myStudentNumber)
  #
  # ... and myStudentNumber MUST be correctly initialized
  load("./data/pdbRep.RData")  # loads pdbRep
  if (n > length(pdbRep)) {
-    stop(sprintf("You can select no more than %d IDs.", length(pdbRep)))
+    stop(sprintf("There are only %d PDB IDs in the table to choose from.",
                 length(pdbRep)))
  }
-  set.seed(as.numeric(Sys.time()))
+  set.seed(seed)
  return(sample(pdbRep, n))
 }
--- a/ABC-units.R
+++ b/ABC-units.R
@@ -2,11 +2,16 @@
 #
 # Purpose: A Bioinformatics Course: R code for learning units
 #
-# Version: 0.1
+# Version: 4.0
 #
-# Date:    2017  08  18
+# Date:    2020  09  16
 # Author:  Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
 # V 4.0    2020 version
 # V 3.0    2019 version
 # V 2.0    2018 version
 # V 1.0    2017 version
 # V 0.1    First code
 #
 # TODO:
@@ -14,23 +19,19 @@
 #
 # == HOW TO WORK WITH LEARNING UNIT FILES ======================================
 #
-# Expect that the learning unit files will be continuously updated.
+# The R-scripts and datasets in this project will be continuously updated,
-#
+# and updates will be posted on GitHub. To bring your version into the latest
 # state use the Git-pane (top left) and "pull" (blue downward arrow) from the
 # repository. However, this will overwrite locally edited version of files.
-# If you wish to edit any of the code, for example to add your own comments and
+# To edit code and experiment with it, for example to add your own comments and
-# examples, save any edited version under a different name. Otherwise you will
+# examples, save your edited version into the "myScripts" folder. Otherwise you
-# have problems with git when you update the project to a new version.
+# may have problems with git when you update the project to a new version. It's
 # good practice to change the filename, for example by prepending your initials.
 # This helps distinguish the files you are working with e.g. in a list of
 # recent files. For example if your name is Honjo Tasuku, your edited
 # BIN-Sequence.R might be named HT-BIN-Sequence.R
 # DO NOT SIMPLY  source()  THESE FILES!
 # If there are portions you don't understand, use R's help system, Google for an
 # answer, or ask your instructor. Don't continue if you don't understand what's
 #  going on. That's not how it works ...
 #
 # While this file itself should not be edited by you this is YOUR project
 # directory, and files that you create (notes etc.) will not be harmed when you
 # pull updated version of the master, or other new files, from github.
 #
 # If you pull from github and get the following type of error ...
 #     ---------------
 #     error: Your local changes to the following files would be
@@ -41,8 +42,11 @@
 # ... then, you need to bring the offending file into its original state.
 # Open the Commit window, select the file, and click on the Revert button.
 #
-# Of course, you can save a local copy under a different name before you revert,
+# When working with these script DO NOT SIMPLY  source()  THESE FILES!
-# in case you want to keep your changes.
+
 # If there are portions you don't understand, use R's help system, Google for an
 # answer, or ask your instructor. Don't continue if you don't understand what's
 #  going on. That's not how it works ...
 #
 #
 # ==============================================================================
--- a/BIN-ALI-BLAST.R
+++ b/BIN-ALI-BLAST.R
@@ -1,4 +1,10 @@
-# BIN-ALI-BLAST.R
+# tocID <- "BIN-ALI-BLAST.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-ALI-BLAST unit.
--- a/BIN-ALI-Dotplot.R
+++ b/BIN-ALI-Dotplot.R
@@ -1,4 +1,10 @@
-# BIN-ALI-Dotplot.R
+# tocID <- "BIN-ALI-Dotplot.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-ALI-Dotplot unit.
--- a/BIN-ALI-MSA.R
+++ b/BIN-ALI-MSA.R
@@ -1,4 +1,10 @@
-# BIN-ALI-MSA.R
+# tocID <- "BIN-ALI-MSA.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-ALI-MSA unit.
--- a/BIN-ALI-Optimal_sequence_alignment.R
+++ b/BIN-ALI-Optimal_sequence_alignment.R
@@ -1,4 +1,10 @@
-# BIN-ALI-Optimal_sequence_alignment.R
+# tocID <- "BIN-ALI-Optimal_sequence_alignment.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-ALI-Optimal_sequence_alignment unit.
--- a/BIN-ALI-Similarity.R
+++ b/BIN-ALI-Similarity.R
@@ -1,4 +1,10 @@
-# BIN-ALI-Similarity.R
+# tocID <- "BIN-ALI-Similarity.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-ALI-Similarity unit.
--- a/BIN-Data_integration.R
+++ b/BIN-Data_integration.R
@@ -1,4 +1,10 @@
-# BIN-Data_integration.R
+# tocID <- "BIN-Data_integration.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-Data_integration unit.
--- a/BIN-FUNC-Domain_annotation.R
+++ b/BIN-FUNC-Domain_annotation.R
@@ -1,4 +1,10 @@
-# BIN-FUNC-Domain_annotation.R
+# tocID <- "BIN-FUNC-Domain_annotation.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-FUNC-Domain_annotation unit.
--- a/BIN-FUNC-Semantic_similarity.R
+++ b/BIN-FUNC-Semantic_similarity.R
@@ -1,4 +1,10 @@
-# BIN-FUNC_Semantic_similarity.R
+# tocID <- "BIN-FUNC_Semantic_similarity.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-FUNC_Semantic_similarity unit.
@@ -158,27 +164,9 @@ myEnr <- GOenrichment(mySet, allGenes)
 sort(myEnr$p.values)  # Any significantly enriched terms? All of these are ...
-#Yes: most significantly enriched is GO:0071931. What is this?
+#Most significantly enriched is GO:0071931. What is this?
-getGOTerm("GO:0071931")  # ... makes sense.
+annotate::getGOTerm("GO:0071931")  # ... makes sense.
 (fullSet <- myEnr$genes$`GO:0071931`)  # What genes are annotated to this term?
 intersect(mySet, fullSet) # These are in both sets
 setdiff(mySet, fullSet)   # These mySet members are not annotated to that term
 setdiff(fullSet, mySet)   # These are annotated to that term but not in mySet.
                          # ... that's the most interesting set. From a set of
                          # genes we have identified a function that they
                          # share, and that shared function has allowed us
                          # to identify
 # What are these genes?
 # Select annotations from the annotation database:
 AnnotationDbi::select(org.Sc.sgd.db,
                      keys = setdiff(fullSet, mySet),
                      columns = c("COMMON", "DESCRIPTION"))
 # Note that these annotations are partially redundant to several different
 # aliases of the same three genes.
--- a/BIN-MYSPE.R
+++ b/BIN-MYSPE.R
@@ -1,4 +1,10 @@
-# BIN-MYSPE.R
+# tocID <- "BIN-MYSPE.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose: A Bioinformatics Course:
 #              R code accompanying the BIN-MYSPE unit
--- a/BIN-PHYLO-Data_preparation.R
+++ b/BIN-PHYLO-Data_preparation.R
@@ -1,4 +1,10 @@
-# BIN-PHYLO-Data_preparation.R
+# tocID <- "BIN-PHYLO-Data_preparation.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-PHYLO-Data_preparation unit.
--- a/BIN-PHYLO-Tree_analysis.R
+++ b/BIN-PHYLO-Tree_analysis.R
@@ -1,4 +1,10 @@
-# BIN-PHYLO-Tree_analysis.R
+# tocID <- "BIN-PHYLO-Tree_analysis.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-PHYLO-Tree_analysis unit.
--- a/BIN-PHYLO-Tree_building.R
+++ b/BIN-PHYLO-Tree_building.R
@@ -1,4 +1,10 @@
-# BIN-PHYLO-Tree_building.R
+# tocID <- "BIN-PHYLO-Tree_building.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-PHYLO-Tree_building unit.
--- a/BIN-PPI-Analysis.R
+++ b/BIN-PPI-Analysis.R
@@ -1,4 +1,10 @@
-# BIN-PPI-Analysis.R
+# tocID <- "BIN-PPI-Analysis.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-PPI-Analysis unit.
--- a/BIN-SEQA-Composition.R
+++ b/BIN-SEQA-Composition.R
@@ -1,4 +1,10 @@
-# BIN-SEQA-Composition.R
+# tocID <- "BIN-SEQA-Composition.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose: A Bioinformatics Course:
 #              R code accompanying the BIN-SEQA-Comparison unit
--- a/BIN-Sequence.R
+++ b/BIN-Sequence.R
@@ -1,4 +1,10 @@
-# BIN-Sequence.R
+# tocID <- "BIN-Sequence.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-Sequence unit.
--- a/BIN-Storing_data.R
+++ b/BIN-Storing_data.R
@@ -1,4 +1,10 @@
-# BIN-Storing_data.R
+# tocID <- "BIN-Storing_data.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose: A Bioinformatics Course:
 #              R code accompanying the BIN-Storing_data unit
--- a/FND-Genetic_code.R
+++ b/FND-Genetic_code.R
@@ -1,4 +1,10 @@
-# FND-Genetic_code.R
+# tocID <- "FND-Genetic_code.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the FND-Genetic_code unit.
--- a/FND-MAT-Graphs_and_networks.R
+++ b/FND-MAT-Graphs_and_networks.R
@@ -1,4 +1,10 @@
-# FND-MAT-Graphs_and_networks.R
+# tocID <- "FND-MAT-Graphs_and_networks.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the FND-MAT-Graphs_and_networks unit.
@@ -280,7 +286,7 @@ plot(GBA,
     vertex.color=heat.colors(max(igraph::degree(GBA)+1))[igraph::degree(GBA)+1],
     vertex.size = 200 + (30 * igraph::degree(GBA)),
     vertex.label = NA)
-par(oPar)                              # restore grphics state
+par(oPar)                              # restore graphics state
 # This is a very obviously different graph! Some biological networks have
 # features that look like that - but in my experience the hub nodes are usually
--- a/FND-STA-Information_theory.R
+++ b/FND-STA-Information_theory.R
@@ -1,14 +1,21 @@
-# FND-STA-Information_theory.R
+# tocID <- "FND-STA-Information_theory.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the FND-STA-Information_theory unit.
 #
-# Version:  0.2
+# Version:  0.2.1
 #
-# Date:     2017  MM  DD
+# Date:     2017 - 2019
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
 #           0.2.1  Maintenance
 #           0.2    Under development
 #           0.1    First code copied from 2016 material.
 #
@@ -58,11 +65,33 @@ AAref["Y"] <- 0.0294
 sum(AAref)
 # Function to calculate Shannon entropy
-H <- function(v) {
+H <- function(pmf) {
-  # Shannon entropy (bits)
+  # Calculate Shannon entropy
-  return(-sum(v * (log(v) / log(2))))
+  # Parameters:
  #   pmf (numeric) probability mass function: a vector of states and
  #                 associated probabilities. Each element of
  #                 pmf must be in (0, 1] and sum(pmf) must be 1.
  # Value:
  #   Shannon entropy in bits.
  # Examples:
  #   H(c(A=0.25, C=0.25, G=0.25, T=0.25))  # 2 bits entropy in a random
  #                                         # nucleotide sequence
  #   H(1)     # If all elements are the same, entropy is zero
  #
  if (any(pmf <= 0 | pmf > 1) || isFALSE(all.equal(1.0, sum(pmf)))) {
    stop("Input is not a discrete probability distribution.")
  }
  H <- -sum(pmf * (log(pmf) / log(2)))
  return(H)
 }
 # Why use all.equal()? Exact comparisons with floating point numbers are
 # brittle. Consider for example:
 1/6 + 1/6 + 1/6 + 1/6 + 1/6 + 1/6 == 1
 print(1/6 + 1/6 + 1/6 + 1/6 + 1/6 + 1/6, digits = 22) # 0.9999999999999998889777
 # all.equal() tests for _near_ equality with tolerance of ~ 1.5e-8
 # Entropy of the database frequencies (in bits):
 (Href <- H(AAref))
--- a/FND-STA-Probability_distribution.R
+++ b/FND-STA-Probability_distribution.R
@@ -1,4 +1,10 @@
-# FND-STA-Probability_distribution.R
+# tocID <- "FND-STA-Probability_distribution.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the FND-STA-Probability_distribution unit.
--- a/FND-STA-Significance.R
+++ b/FND-STA-Significance.R
@@ -1,4 +1,10 @@
-# FND-STA-Significance.R
+# tocID <- "FND-STA-Significance.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the FND-STA-Significance unit.
--- a/README.md
+++ b/README.md
@@ -1,2 +1,4 @@
 # ABC-units
 A Bioinformatics Course: R modules for learning units
 Follow the instructions in the learning unit to install your local copy of this R-project.
--- a/RPR-Biostrings.R
+++ b/RPR-Biostrings.R
@@ -1,4 +1,10 @@
-# RPR-Biostrings.R
+# tocID <- "RPR-Biostrings.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR-Biostrings unit.
--- a/RPR-FASTA.R
+++ b/RPR-FASTA.R
@@ -1,4 +1,10 @@
-# RPR-FASTA.R
+# tocID <- "RPR-FASTA.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR-FASTA unit.
--- a/RPR-GEO2R.R
+++ b/RPR-GEO2R.R
@@ -1,4 +1,10 @@
-# RPR_GEO2R.R
+# tocID <- "RPR_GEO2R.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR_GEO2R unit.
--- a/RPR-Genetic_code_optimality.R
+++ b/RPR-Genetic_code_optimality.R
@@ -1,4 +1,10 @@
-# RPR-Genetic_code_optimality.R
+# tocID <- "RPR-Genetic_code_optimality.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR-Genetic_code_optimality unit.
--- a/RPR-Introduction.R
+++ b/RPR-Introduction.R
@@ -1,4 +1,10 @@
-# RPR-Introduction.R
+# tocID <- "RPR-Introduction.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose: A Bioinformatics Course:
 #              R code accompanying the RPR-Introduction unit
--- a/RPR-PROSITE_POST.R
+++ b/RPR-PROSITE_POST.R
@@ -1,4 +1,10 @@
-# RPR-PROSITE_POST.R
+# tocID <- "RPR-PROSITE_POST.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR-Scripting_data_downloads unit.
--- a/RPR-RegEx.R
+++ b/RPR-RegEx.R
@@ -1,4 +1,10 @@
-# RPR-RegEx.R
+# tocID <- "RPR-RegEx.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose: A Bioinformatics Course:
 #              R code accompanying the RPR-RegEx unit
--- a/RPR-SX-PDB.R
+++ b/RPR-SX-PDB.R
@@ -1,4 +1,10 @@
-# RPR-SX-PDB.R
+# tocID <- "RPR-SX-PDB.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR-SX-PDB unit.
--- a/RPR-UniProt_GET.R
+++ b/RPR-UniProt_GET.R
@@ -1,4 +1,10 @@
-# RPR-UniProt_GET.R
+# tocID <- "RPR-UniProt_GET.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR-Scripting_data_downloads unit.
--- a/RPR-Unit_testing.R
+++ b/RPR-Unit_testing.R
@@ -1,4 +1,10 @@
-# RPR-Unit_testing.R
+# tocID <- "RPR-Unit_testing.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR-Unit_testing unit.
@@ -29,10 +35,10 @@
 #TOC> 
 #TOC>   Section  Title                             Line
 #TOC> -------------------------------------------------
-#TOC>   1        Unit Tests with testthat            40
+#TOC>   1        Unit Tests with testthat            46
-#TOC>   2        Organizing your tests              159
+#TOC>   2        Organizing your tests              165
-#TOC>   2.1        Testing scripts                  183
+#TOC>   2.1        Testing scripts                  189
-#TOC>   3        Task solutions                     198
+#TOC>   3        Task solutions                     204
 #TOC> 
 #TOC> ==========================================================================
--- a/RPR-eUtils_XML.R
+++ b/RPR-eUtils_XML.R
@@ -1,4 +1,10 @@
-# RPR-eUtils_and_XML.R
+# tocID <- "RPR-eUtils_and_XML.R"
 #
 # ---------------------------------------------------------------------------- #
 #  PATIENCE  ...                                                               #
 #    Do not yet work wih this code. Updates in progress. Thank you.            #
 #    boris.steipe@utoronto.ca                                                  #
 # ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR-Scripting_data_downloads unit.
--- a/scripts/ABC-createRefDB.R
+++ b/scripts/ABC-createRefDB.R
@@ -8,7 +8,7 @@
 # http://steipe.biochemistry.utoronto.ca/abc/index.php/Reference_species_for_fungi
 #
 # For the data model, see
-# https://docs.google.com/drawings/d/1uupNvz18_FYFwyyVPebTM0CUxcJCPDQuxuIJGpjWQWg
+# https://docs.google.com/presentation/d/13vWaVcFpWEOGeSNhwmqugj2qTQuH1eZROgxWdHGEMr0
 # For the schema, see dbInit() in ./scripts/ABC-dbUtilities.R
 #
 # ==============================================================================
--- a/scripts/ABC-dbUtilities.R
+++ b/scripts/ABC-dbUtilities.R
@@ -1,12 +1,35 @@
-# ABC-dbUtilities.R
+# tocID <- "scripts/ABC-dbUtilities.R"
-
+#
 # database utilities for ABC learning units
 #
 # ==============================================================================
 #
-# ====== PACKAGES ==============================================================
+#TOC> ==========================================================================
 #TOC> 
 #TOC>   Section  Title                             Line
 #TOC> -------------------------------------------------
 #TOC>   1        PACKAGES                            32
 #TOC>   2        FUNCTIONS                           50
 #TOC>   2.01       dbSanitizeSequence()              53
 #TOC>   2.02       dbConfirmUnique()                 88
 #TOC>   2.03       dbInit()                         106
 #TOC>   2.04       dbAutoincrement()                147
 #TOC>   2.05       dbAddProtein()                   160
 #TOC>   2.06       dbAddFeature()                   180
 #TOC>   2.07       dbAddTaxonomy()                  199
 #TOC>   2.08       dbAddAnnotation()                215
 #TOC>   2.09       dbFetchUniProtSeq()              243
 #TOC>   2.10       dbFetchPrositeFeatures()         267
 #TOC>   2.11       node2text()                      311
 #TOC>   2.12       dbFetchNCBItaxData()             323
 #TOC>   2.13       UniProtIDmap()                   362
 #TOC>   3        TESTS                              399
 #TOC> 
 #TOC> ==========================================================================
 # =    1  PACKAGES  ============================================================
 if (! requireNamespace("jsonlite", quietly = TRUE)) {
@@ -24,9 +47,10 @@ if (! requireNamespace("xml2", quietly = TRUE)) {
 }
-# ====== FUNCTIONS =============================================================
+# =    2  FUNCTIONS  ===========================================================
 # ==   2.01  dbSanitizeSequence()  =============================================
 dbSanitizeSequence <- function(s, unambiguous = TRUE) {
  # Remove FASTA header lines, if any,
  # flatten any structure that s has,
@@ -61,6 +85,7 @@ dbSanitizeSequence <- function(s, unambiguous = TRUE) {
 }
 # ==   2.02  dbConfirmUnique()  ================================================
 dbConfirmUnique <- function(x) {
  # x is a vector of logicals.
  # returns x if x has exactly one TRUE element.
@@ -78,24 +103,27 @@ dbConfirmUnique <- function(x) {
 }
 # ==   2.03  dbInit()  =========================================================
 dbInit <- function() {
  # Return an empty instance of the protein database
  # Open the link and study the schema:
  # https://docs.google.com/presentation/d/13vWaVcFpWEOGeSNhwmqugj2qTQuH1eZROgxWdHGEMr0
  db <- list()
  db$version <- "1.0"
  db$protein <- data.frame(
    ID = numeric(),
    name = character(),
    RefSeqID = character(),
    UniProtID = character(),
    taxonomyID = numeric(),
-    sequence = character(),
+    sequence = character())
    stringsAsFactors = FALSE)
  db$taxonomy <- data.frame(
    ID = numeric(),
-    species = character(),
+    species = character())
    stringsAsFactors = FALSE)
  db$annotation <- data.frame(
@@ -103,21 +131,20 @@ dbInit <- function() {
    proteinID = numeric(),
    featureID = numeric(),
    start = numeric(),
-    end = numeric(),
+    end = numeric())
    stringsAsFactors = FALSE)
  db$feature <- data.frame(
    ID = numeric(),
    name = character(),
    description = character(),
    sourceDB = character(),
-    accession = character(),
+    accession = character())
    stringsAsFactors = FALSE)
  return(db)
 }
 # ==   2.04  dbAutoincrement()  ================================================
 dbAutoincrement <- function(tb) {
  # Return a unique integer that can be used as a primary key
  # Value:
@@ -130,6 +157,7 @@ dbAutoincrement <- function(tb) {
 }
 # ==   2.05  dbAddProtein()  ===================================================
 dbAddProtein <- function(db, jsonDF) {
  # Add one or more protein entries to the database db.
  # Parameters:
@@ -142,14 +170,14 @@ dbAddProtein <- function(db, jsonDF) {
                    RefSeqID    = jsonDF$RefSeqID[i],
                    UniProtID   = jsonDF$UniProtID[i],
                    taxonomyID  = jsonDF$taxonomyID[i],
-                    sequence    = dbSanitizeSequence(jsonDF$sequence[i]),
+                    sequence    = dbSanitizeSequence(jsonDF$sequence[i]))
                    stringsAsFactors = FALSE)
    db$protein <- rbind(db$protein, x)
  }
  return(db)
 }
 # ==   2.06  dbAddFeature()  ===================================================
 dbAddFeature <- function(db, jsonDF) {
  # Add one or more feature entries to the database db.
  # Parameters:
@@ -161,14 +189,14 @@ dbAddFeature <- function(db, jsonDF) {
                    name        = jsonDF$name[i],
                    description = jsonDF$description[i],
                    sourceDB    = jsonDF$sourceDB[i],
-                    accession   = jsonDF$accession[i],
+                    accession   = jsonDF$accession[i])
                    stringsAsFactors = FALSE)
    db$feature <- rbind(db$feature, x)
  }
  return(db)
 }
 # ==   2.07  dbAddTaxonomy()  ==================================================
 dbAddTaxonomy <- function(db, jsonDF) {
  # Add one or more taxonomy entries to the database db.
  # Parameters:
@@ -178,13 +206,13 @@ dbAddTaxonomy <- function(db, jsonDF) {
  for (i in seq_len(nrow(jsonDF))) {
    x <- data.frame(
      ID =  jsonDF$ID[i],
-      species = jsonDF$species[i],
+      species = jsonDF$species[i])
      stringsAsFactors = FALSE)
    db$taxonomy <- rbind(db$taxonomy, x)
  }
  return(db)
 }
 # ==   2.08  dbAddAnnotation()  ================================================
 dbAddAnnotation <- function(db, jsonDF) {
  # Add one or more annotation entries to the database db.
  # Parameters:
@@ -205,14 +233,14 @@ dbAddAnnotation <- function(db, jsonDF) {
                    proteinID = pID,
                    featureID = fID,
                    start     = as.integer(jsonDF$start[i]),
-                    end       = as.integer(jsonDF$end[i]),
+                    end       = as.integer(jsonDF$end[i]))
                    stringsAsFactors = FALSE)
    db$annotation <- rbind(db$annotation, x)
  }
  return(db)
 }
 # ==   2.09  dbFetchUniProtSeq()  ==============================================
 dbFetchUniProtSeq <- function(ID) {
  # Fetch a protein sequence from UniProt.
  # Parameters:
@@ -236,6 +264,7 @@ dbFetchUniProtSeq <- function(ID) {
 }
 # ==   2.10  dbFetchPrositeFeatures()  =========================================
 dbFetchPrositeFeatures <- function(ID) {
  # Fetch feature annotations from ScanProsite.
  # Parameters:
@@ -272,14 +301,14 @@ dbFetchPrositeFeatures <- function(ID) {
                                     start =  as.numeric(tokens[4]),
                                     end   =  as.numeric(tokens[5]),
                                     psID  =  tokens[6],
-                                     psName = tokens[7],
+                                     psName = tokens[7]))
                                     stringsAsFactors = FALSE))
    }
  }
  return(myFeatures)
 }
 # ==   2.11  node2text()  ======================================================
 node2text <- function(doc, tag) {
  # an extractor function for the contents of elements
  # between given tags in an XML response.
@@ -291,6 +320,7 @@ node2text <- function(doc, tag) {
 }
 # ==   2.12  dbFetchNCBItaxData()  =============================================
 dbFetchNCBItaxData <- function(ID) {
  # Fetch feature taxID and Organism from the NCBI.
  # Parameters:
@@ -329,6 +359,7 @@ dbFetchNCBItaxData <- function(ID) {
 # ==   2.13  UniProtIDmap()  ===================================================
 UniProtIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
  # Use UniProt ID mapping service to map one or more IDs
  # Parameters:
@@ -351,8 +382,7 @@ UniProtIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
  if (httr::status_code(response) == 200) { # 200: oK
    myMap <- read.delim(file = textConnection(httr::content(response)),
-                        sep = "\t",
+                        sep = "\t")
                        stringsAsFactors = FALSE)
    myMap <- myMap[ , c(1,3)]
    colnames(myMap) <- c("From", "To")
  } else {
@@ -366,7 +396,7 @@ UniProtIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
 }
-# ====== TESTS =================================================================
+# =    3  TESTS  ===============================================================
 if (FALSE) {
  if (! requireNamespace("testthat", quietly = TRUE)) {
--- a/scripts/ABC-makeScCCnet.R
+++ b/scripts/ABC-makeScCCnet.R
@@ -1,4 +1,4 @@
-# ABC-makeScCCnet.R
+# tocID <- "scripts/ABC-makeScCCnet.R"
 #
 # Create a subnetwork of high-confidence yeast genes with a "mitotic cell cycle"
 # GOSlim annotation.
--- a/scripts/ABC-writeALN.R
+++ b/scripts/ABC-writeALN.R
@@ -1,4 +1,4 @@
-# ABC-writeALN.R
+# tocID <- "scripts/ABC-writeALN.R"
 #
 # ToDo:    calculate consensus line
 #          append sequence numbers
--- a/scripts/ABC-writeMFA.R
+++ b/scripts/ABC-writeMFA.R
@@ -40,7 +40,7 @@ writeMFA <- function(ali,
  if (is.na(blockWidth)) {
    stop("PANIC: parameter \"blockWidth\" must be numeric.")
  }
-  if (blockWidth < 1){
+  if (! blockWidth > 0){
    stop("PANIC: parameter \"blockWidth\" must be greater than zero.")
  }
@@ -105,7 +105,7 @@ writeMFA <- function(ali,
    txt <- c(txt, "")  # append an empty line for readability
  }
-  writeLines(txt, con= myCon)
+  writeLines(txt, con = myCon)
 }
--- a/scripts/BLAST.R
+++ b/scripts/BLAST.R
@@ -357,20 +357,23 @@ parseBLASTalignment <- function(hit) {
 # ==== TESTS ===================================================================
-# define query:
+if (FALSE) {
-# q   <- paste("IYSARYSGVDVYEFIHSTGSIMKRKKDDWVNATHI", # Mbp1 APSES domain
+  # define query:
-#              "LKAANFAKAKRTRILEKEVLKETHEKVQGGFGKYQ",
+  q   <- paste("IYSARYSGVDVYEFIHSTGSIMKRKKDDWVNATHI", # Mbp1 APSES domain
-#              "GTWVPLNIAKQLAEKFSVYDQLKPLFDFTQTDGSASP",
+               "LKAANFAKAKRTRILEKEVLKETHEKVQGGFGKYQ",
-#              sep="")
+               "GTWVPLNIAKQLAEKFSVYDQLKPLFDFTQTDGSASP",
-# or ...
+               sep="")
-# q <- "NP_010227" # refseq ID
+  # or ...
-#
+  q <- "NP_010227" # refseq ID
-# test <- BLAST(q,
+
-#               nHits = 100,
+  test <- BLAST(q,
-#               E = 0.001,
+                nHits = 100,
-#               rid = "",
+                E = 0.001,
-#               limits = "txid4751[ORGN]")
+                rid = "",
-# length(test$hits)
+                limits = "txid4751[ORGN]")
  str(test)
  length(test$hits)
 }
 # [END]