From 37ef655d47f6e016b4fa1fbb809eac1bcd905d38 Mon Sep 17 00:00:00 2001
From: hyginn <boris.steipe@utoronto.ca>
Date: Fri, 18 Sep 2020 21:56:30 +1000
Subject: [PATCH] 2020 updates - deactivate for maintenance

---
 .init.R                              | 42 --------------
 .utilities.R                         | 20 +++++--
 ABC-units.R                          | 42 +++++++-------
 BIN-ALI-BLAST.R                      | 12 +++-
 BIN-ALI-Dotplot.R                    | 12 +++-
 BIN-ALI-MSA.R                        | 14 +++--
 BIN-ALI-Optimal_sequence_alignment.R |  8 ++-
 BIN-ALI-Similarity.R                 | 12 +++-
 BIN-Data_integration.R               | 12 +++-
 BIN-FUNC-Domain_annotation.R         | 14 +++--
 BIN-FUNC-Semantic_similarity.R       | 34 ++++--------
 BIN-MYSPE.R                          |  8 ++-
 BIN-PHYLO-Data_preparation.R         | 12 +++-
 BIN-PHYLO-Tree_analysis.R            | 12 +++-
 BIN-PHYLO-Tree_building.R            | 20 ++++---
 BIN-PPI-Analysis.R                   | 12 +++-
 BIN-SEQA-Composition.R               | 12 +++-
 BIN-Sequence.R                       | 24 +++++---
 BIN-Storing_data.R                   | 22 +++++---
 FND-Genetic_code.R                   | 12 +++-
 FND-MAT-Graphs_and_networks.R        | 14 +++--
 FND-STA-Information_theory.R         | 41 ++++++++++++--
 FND-STA-Probability_distribution.R   | 16 ++++--
 FND-STA-Significance.R               | 14 +++--
 README.md                            |  2 +
 RPR-Biostrings.R                     |  8 ++-
 RPR-FASTA.R                          |  8 ++-
 RPR-GEO2R.R                          | 12 +++-
 RPR-Genetic_code_optimality.R        | 20 ++++---
 RPR-Introduction.R                   |  8 ++-
 RPR-PROSITE_POST.R                   | 12 +++-
 RPR-RegEx.R                          |  8 ++-
 RPR-SX-PDB.R                         |  8 ++-
 RPR-UniProt_GET.R                    | 12 +++-
 RPR-Unit_testing.R                   | 16 ++++--
 RPR-eUtils_XML.R                     | 12 +++-
 scripts/ABC-createRefDB.R            |  2 +-
 scripts/ABC-dbUtilities.R            | 82 +++++++++++++++++++---------
 scripts/ABC-makeScCCnet.R            |  2 +-
 scripts/ABC-writeALN.R               |  2 +-
 scripts/ABC-writeMFA.R               |  4 +-
 scripts/BLAST.R                      | 31 ++++++-----
 42 files changed, 447 insertions(+), 243 deletions(-)
 delete mode 100644 .init.R
diff --git a/.init.R b/.init.R
deleted file mode 100644
index 33551f1..0000000
--- a/.init.R
+++ /dev/null
@@ -1,42 +0,0 @@
-# .init.R
-# Functions to initialize this collection of learning units
-# Boris Steipe
-# ====================================================================
-
-# Create a local copy of myScript.R if required, and not been done yet.
-if (! file.exists("myScript.R") && file.exists(".tmp.R")) {
-    file.copy(".tmp.R", "myScript.R")
-}
-
-# If it doesn't exist yet, set up a profile:
-if (! file.exists(".myProfile.R")) {
-  # setup profile data
-  cat("\nPlease enter the requested values correctly, no spaces, and\n")
-  cat("press <enter>.\n")
-  e <- readline("Please enter your UofT eMail address: ")
-  n <- readline("Please enter your Student Number: ")
-
-  conn <- file(".myProfile.R")
-  writeLines(c(sprintf("myEMail <- \"%s\"", e),
-               sprintf("myStudentNumber <- %d", as.numeric(n))),
-             conn)
-  close(conn)
-  rm(e, n, conn)
-}
-
-# Patch YFO -> MYSPE if necessary:
-tmp <- readLines(".myProfile.R")
-if (length(grep("^YFO", tmp)) > 0) {
-  idx <- grep("^YFO", tmp)
-  tmp[idx] <- gsub("^YFO", "MYSPE", tmp[idx])
-  writeLines(tmp, ".myProfile.R")
-}
-rm(tmp)
-
-source(".myProfile.R")
-
-source(".utilities.R")
-
-file.edit("ABC-units.R")
-
-# [End]
diff --git a/.utilities.R b/.utilities.R
index 09f56d4..8d9e7f5 100644
--- a/.utilities.R
+++ b/.utilities.R
@@ -181,19 +181,29 @@ fetchMSAmotif <- function(ali, mot) {
 
 # ====== PDB ID selection ======================================================
 
-selectPDBrep <- function(n) {
+selectPDBrep <- function(n, seed = as.numeric(Sys.time())) {
   # Select n PDB IDs from a list of high-resolution, non-homologous, single
   # domain, single chain structure files that represent a CATH topology
   # group.
-  # Parameters   n  num   number of IDs to return.
+  # Parameters:
+  #   n     num     number of IDs to return
+  #   seed  num     a seed for the RNG
+  #
   # Value:          char  PDB IDs
-  # Note: the list is loaded from an RData file in the data directory
+  #
+  # Note: the list is loaded from an RData file in the "./data" directory.
+  # If you use this function for a course submissio, it MUST be invoked as:
+  #
+  #         selectPDBrep(n, seed = myStudentNumber)
+  #
+  # ... and myStudentNumber MUST be correctly initialized
 
   load("./data/pdbRep.RData")  # loads pdbRep
   if (n > length(pdbRep)) {
-    stop(sprintf("You can select no more than %d IDs.", length(pdbRep)))
+    stop(sprintf("There are only %d PDB IDs in the table to choose from.",
+                 length(pdbRep)))
   }
-  set.seed(as.numeric(Sys.time()))
+  set.seed(seed)
   return(sample(pdbRep, n))
 }
 
diff --git a/ABC-units.R b/ABC-units.R
index 5632b36..e5af3ad 100644
--- a/ABC-units.R
+++ b/ABC-units.R
@@ -2,11 +2,16 @@
 #
 # Purpose: A Bioinformatics Course: R code for learning units
 #
-# Version: 0.1
+# Version: 4.0
 #
-# Date:    2017  08  18
+# Date:    2020  09  16
 # Author:  Boris Steipe (boris.steipe@utoronto.ca)
 #
+# Versions:
+# V 4.0    2020 version
+# V 3.0    2019 version
+# V 2.0    2018 version
+# V 1.0    2017 version
 # V 0.1    First code
 #
 # TODO:
@@ -14,23 +19,19 @@
 #
 # == HOW TO WORK WITH LEARNING UNIT FILES ======================================
 #
-# Expect that the learning unit files will be continuously updated.
-#
+# The R-scripts and datasets in this project will be continuously updated,
+# and updates will be posted on GitHub. To bring your version into the latest
+# state use the Git-pane (top left) and "pull" (blue downward arrow) from the
+# repository. However, this will overwrite locally edited version of files.
 
-# If you wish to edit any of the code, for example to add your own comments and
-# examples, save any edited version under a different name. Otherwise you will
-# have problems with git when you update the project to a new version.
+# To edit code and experiment with it, for example to add your own comments and
+# examples, save your edited version into the "myScripts" folder. Otherwise you
+# may have problems with git when you update the project to a new version. It's
+# good practice to change the filename, for example by prepending your initials.
+# This helps distinguish the files you are working with e.g. in a list of
+# recent files. For example if your name is Honjo Tasuku, your edited
+# BIN-Sequence.R might be named HT-BIN-Sequence.R
 
-# DO NOT SIMPLY  source()  THESE FILES!
-
-# If there are portions you don't understand, use R's help system, Google for an
-# answer, or ask your instructor. Don't continue if you don't understand what's
-#  going on. That's not how it works ...
-#
-# While this file itself should not be edited by you this is YOUR project
-# directory, and files that you create (notes etc.) will not be harmed when you
-# pull updated version of the master, or other new files, from github.
-#
 # If you pull from github and get the following type of error ...
 #     ---------------
 #     error: Your local changes to the following files would be
@@ -41,8 +42,11 @@
 # ... then, you need to bring the offending file into its original state.
 # Open the Commit window, select the file, and click on the Revert button.
 #
-# Of course, you can save a local copy under a different name before you revert,
-# in case you want to keep your changes.
+# When working with these script DO NOT SIMPLY  source()  THESE FILES!
+
+# If there are portions you don't understand, use R's help system, Google for an
+# answer, or ask your instructor. Don't continue if you don't understand what's
+#  going on. That's not how it works ...
 #
 #
 # ==============================================================================
diff --git a/BIN-ALI-BLAST.R b/BIN-ALI-BLAST.R
index 39477f3..55ac089 100644
--- a/BIN-ALI-BLAST.R
+++ b/BIN-ALI-BLAST.R
@@ -1,4 +1,10 @@
-# BIN-ALI-BLAST.R
+# tocID <- "BIN-ALI-BLAST.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-ALI-BLAST unit.
@@ -29,13 +35,13 @@
 
 
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                               Line
 #TOC> ---------------------------------------------------
 #TOC>   1        Defining the APSES domain             42
 #TOC>   2        Executing the BLAST search            64
 #TOC>   3        Analysing results                     86
-#TOC> 
+#TOC>
 #TOC> ==========================================================================
 
 
diff --git a/BIN-ALI-Dotplot.R b/BIN-ALI-Dotplot.R
index 1982bd4..661cdde 100644
--- a/BIN-ALI-Dotplot.R
+++ b/BIN-ALI-Dotplot.R
@@ -1,4 +1,10 @@
-# BIN-ALI-Dotplot.R
+# tocID <- "BIN-ALI-Dotplot.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-ALI-Dotplot unit.
@@ -27,12 +33,12 @@
 
 
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                  Line
 #TOC> --------------------------------------
 #TOC>   1        ___Section___            39
 #TOC>   2        Tasks                   187
-#TOC> 
+#TOC>
 #TOC> ==========================================================================
 
 
diff --git a/BIN-ALI-MSA.R b/BIN-ALI-MSA.R
index ddb58a2..dea1855 100644
--- a/BIN-ALI-MSA.R
+++ b/BIN-ALI-MSA.R
@@ -1,4 +1,10 @@
-# BIN-ALI-MSA.R
+# tocID <- "BIN-ALI-MSA.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-ALI-MSA unit.
@@ -29,7 +35,7 @@
 
 
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                                              Line
 #TOC> ------------------------------------------------------------------
 #TOC>   1        Preparations                                         54
@@ -47,7 +53,7 @@
 #TOC>   6        Sequence Logos                                      546
 #TOC>   6.1        Subsetting an alignment by motif                  555
 #TOC>   6.2        Plot a Sequence Logo                              604
-#TOC> 
+#TOC>
 #TOC> ==========================================================================
 
 
@@ -239,7 +245,7 @@ for (i in seq_along(highScoringRanges$lengths)) {
 #   -  adjust the sequence names
 #   -  convert to msaAAMultipleAlignment object
 
-# ===   4.1.1  importing an .aln file                   
+# ===   4.1.1  importing an .aln file
 
 # The seqinr package has a function to read CLUSTAL W formatted .aln files ...
 if (! requireNamespace("seqinr", quietly=TRUE)) {
diff --git a/BIN-ALI-Optimal_sequence_alignment.R b/BIN-ALI-Optimal_sequence_alignment.R
index 4cc2854..cb7bef8 100644
--- a/BIN-ALI-Optimal_sequence_alignment.R
+++ b/BIN-ALI-Optimal_sequence_alignment.R
@@ -1,4 +1,10 @@
-# BIN-ALI-Optimal_sequence_alignment.R
+# tocID <- "BIN-ALI-Optimal_sequence_alignment.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-ALI-Optimal_sequence_alignment unit.
diff --git a/BIN-ALI-Similarity.R b/BIN-ALI-Similarity.R
index b7a1cb2..447d512 100644
--- a/BIN-ALI-Similarity.R
+++ b/BIN-ALI-Similarity.R
@@ -1,4 +1,10 @@
-# BIN-ALI-Similarity.R
+# tocID <- "BIN-ALI-Similarity.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-ALI-Similarity unit.
@@ -28,13 +34,13 @@
 
 
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                          Line
 #TOC> ----------------------------------------------
 #TOC>   1        Amino Acid Properties            41
 #TOC>   2        Mutation Data matrix            158
 #TOC>   3        Background score                199
-#TOC> 
+#TOC>
 #TOC> ==========================================================================
 
 
diff --git a/BIN-Data_integration.R b/BIN-Data_integration.R
index 38b8d01..4630918 100644
--- a/BIN-Data_integration.R
+++ b/BIN-Data_integration.R
@@ -1,4 +1,10 @@
-# BIN-Data_integration.R
+# tocID <- "BIN-Data_integration.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-Data_integration unit.
@@ -30,12 +36,12 @@
 
 
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                             Line
 #TOC> -------------------------------------------------
 #TOC>   1        Identifier mapping                  42
 #TOC>   2        Cross-referencing tables           165
-#TOC> 
+#TOC>
 #TOC> ==========================================================================
 
 
diff --git a/BIN-FUNC-Domain_annotation.R b/BIN-FUNC-Domain_annotation.R
index 3a7f106..e200381 100644
--- a/BIN-FUNC-Domain_annotation.R
+++ b/BIN-FUNC-Domain_annotation.R
@@ -1,4 +1,10 @@
-# BIN-FUNC-Domain_annotation.R
+# tocID <- "BIN-FUNC-Domain_annotation.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-FUNC-Domain_annotation unit.
@@ -25,7 +31,7 @@
 
 
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                                                               Line
 #TOC> -----------------------------------------------------------------------------------
 #TOC>   1        Update your database script                                           41
@@ -34,7 +40,7 @@
 #TOC>   1.1.2    If you HAVE done the BIN-ALI-Optimal_sequence_alignment               93
 #TOC>   1.2      Execute and Validate                                                 119
 #TOC>   2        Plot Annotations                                                     144
-#TOC> 
+#TOC>
 #TOC> ==========================================================================
 
 
@@ -90,7 +96,7 @@
 # Then SKIP the next section.
 #
 #
-# ===  1.1.2  If you HAVE done the BIN-ALI-Optimal_sequence_alignment         
+# ===  1.1.2  If you HAVE done the BIN-ALI-Optimal_sequence_alignment
 #
 #
 #   You DO already have a file called "<MYSPE>-Annotations.json" in the
diff --git a/BIN-FUNC-Semantic_similarity.R b/BIN-FUNC-Semantic_similarity.R
index 5295bcc..f739efe 100644
--- a/BIN-FUNC-Semantic_similarity.R
+++ b/BIN-FUNC-Semantic_similarity.R
@@ -1,4 +1,10 @@
-# BIN-FUNC_Semantic_similarity.R
+# tocID <- "BIN-FUNC_Semantic_similarity.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-FUNC_Semantic_similarity unit.
@@ -28,14 +34,14 @@
 
 
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                                                Line
 #TOC> --------------------------------------------------------------------
 #TOC>   1        Preparations: Packages, AnnotationDB, Setup            42
 #TOC>   2        Fetch GO Annotations                                   98
 #TOC>   3        Semantic Similarities                                 107
 #TOC>   4        GO Term Enrichment in Gene Sets                       125
-#TOC> 
+#TOC>
 #TOC> ==========================================================================
 
 
@@ -158,27 +164,9 @@ myEnr <- GOenrichment(mySet, allGenes)
 
 sort(myEnr$p.values)  # Any significantly enriched terms? All of these are ...
 
-#Yes: most significantly enriched is GO:0071931. What is this?
-getGOTerm("GO:0071931")  # ... makes sense.
+#Most significantly enriched is GO:0071931. What is this?
+annotate::getGOTerm("GO:0071931")  # ... makes sense.
 
-(fullSet <- myEnr$genes$`GO:0071931`)  # What genes are annotated to this term?
-
-intersect(mySet, fullSet) # These are in both sets
-setdiff(mySet, fullSet)   # These mySet members are not annotated to that term
-setdiff(fullSet, mySet)   # These are annotated to that term but not in mySet.
-                          # ... that's the most interesting set. From a set of
-                          # genes we have identified a function that they
-                          # share, and that shared function has allowed us
-                          # to identify
-
-# What are these genes?
-# Select annotations from the annotation database:
-AnnotationDbi::select(org.Sc.sgd.db,
-                      keys = setdiff(fullSet, mySet),
-                      columns = c("COMMON", "DESCRIPTION"))
-
-# Note that these annotations are partially redundant to several different
-# aliases of the same three genes.
 
 
 
diff --git a/BIN-MYSPE.R b/BIN-MYSPE.R
index cb481b9..f8b1e93 100644
--- a/BIN-MYSPE.R
+++ b/BIN-MYSPE.R
@@ -1,4 +1,10 @@
-# BIN-MYSPE.R
+# tocID <- "BIN-MYSPE.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose: A Bioinformatics Course:
 #              R code accompanying the BIN-MYSPE unit
diff --git a/BIN-PHYLO-Data_preparation.R b/BIN-PHYLO-Data_preparation.R
index e0532a4..382817d 100644
--- a/BIN-PHYLO-Data_preparation.R
+++ b/BIN-PHYLO-Data_preparation.R
@@ -1,4 +1,10 @@
-# BIN-PHYLO-Data_preparation.R
+# tocID <- "BIN-PHYLO-Data_preparation.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-PHYLO-Data_preparation unit.
@@ -29,7 +35,7 @@
 
 
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                                     Line
 #TOC> ---------------------------------------------------------
 #TOC>   1        Preparations                                44
@@ -37,7 +43,7 @@
 #TOC>   3        Multiple Sequence Alignment                117
 #TOC>   4        Reviewing and Editing Alignments           136
 #TOC>   4.1        Masking workflow                         152
-#TOC> 
+#TOC>
 #TOC> ==========================================================================
 
 
diff --git a/BIN-PHYLO-Tree_analysis.R b/BIN-PHYLO-Tree_analysis.R
index 950a000..cf60bec 100644
--- a/BIN-PHYLO-Tree_analysis.R
+++ b/BIN-PHYLO-Tree_analysis.R
@@ -1,4 +1,10 @@
-# BIN-PHYLO-Tree_analysis.R
+# tocID <- "BIN-PHYLO-Tree_analysis.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-PHYLO-Tree_analysis unit.
@@ -31,7 +37,7 @@
 
 
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                              Line
 #TOC> --------------------------------------------------
 #TOC>   1        Preparation and Tree Plot            46
@@ -39,7 +45,7 @@
 #TOC>   2.1        Rooting Trees                     145
 #TOC>   2.2        Rotating Clades                   190
 #TOC>   2.3        Computing tree distances          241
-#TOC> 
+#TOC>
 #TOC> ==========================================================================
 
 
diff --git a/BIN-PHYLO-Tree_building.R b/BIN-PHYLO-Tree_building.R
index 544b9a2..2ba15f9 100644
--- a/BIN-PHYLO-Tree_building.R
+++ b/BIN-PHYLO-Tree_building.R
@@ -1,4 +1,10 @@
-# BIN-PHYLO-Tree_building.R
+# tocID <- "BIN-PHYLO-Tree_building.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-PHYLO-Tree_building unit.
@@ -29,7 +35,7 @@
 
 
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                                       Line
 #TOC> -----------------------------------------------------------
 #TOC>   1        Calculating Trees                             46
@@ -39,7 +45,7 @@
 #TOC>   1.1.3          ... on Linux                            96
 #TOC>   1.1.4          Confirming PROMLPATH                   101
 #TOC>   1.2        Building a maximum likelihood tree         110
-#TOC> 
+#TOC>
 #TOC> ==========================================================================
 
 
@@ -68,7 +74,7 @@ if (! requireNamespace("Rphylip", quietly = TRUE)) {
 # on your computer Phylip has been installed and define the path
 # to the proml program that calculates a maximum-likelihood tree.
 
-# ===   1.1.1  ... on the Mac                    
+# ===   1.1.1  ... on the Mac
 # On the Mac, the standard installation places a phylip folder
 # in the /Applications directory. That folder contains all the
 # individual phylip programs as <name>.app files. These are not
@@ -79,7 +85,7 @@ if (! requireNamespace("Rphylip", quietly = TRUE)) {
 # directly to that subdirectory to find the program it needs:
 # PROMLPATH <- "/Applications/phylip-3.695/exe/proml.app/Contents/MacOS"
 
-# ===   1.1.2  ... on Windows                    
+# ===   1.1.2  ... on Windows
 # On Windows you need to know where the programs have been installed, and you
 # need to specify a path that is correct for the Windows OS. Find the folder
 # that is named "exe", and right-click to inspect its properties. The path
@@ -93,12 +99,12 @@ if (! requireNamespace("Rphylip", quietly = TRUE)) {
 # I have heard that your path must not contain spaces, and it is prudent to
 # avoid other special characters as well.
 
-# ===   1.1.3  ... on Linux                      
+# ===   1.1.3  ... on Linux
 # If you are running Linux I trust you know what to do. It's probably
 # something like
 # PROMLPATH <- "/usr/local/phylip-3.695/bin"
 
-# ===   1.1.4  Confirming PROMLPATH              
+# ===   1.1.4  Confirming PROMLPATH
 # Confirm that the settings are right.
 PROMLPATH                # returns the path
 list.dirs(PROMLPATH)     # returns the directories in that path
diff --git a/BIN-PPI-Analysis.R b/BIN-PPI-Analysis.R
index c75e0fa..becd7f8 100644
--- a/BIN-PPI-Analysis.R
+++ b/BIN-PPI-Analysis.R
@@ -1,4 +1,10 @@
-# BIN-PPI-Analysis.R
+# tocID <- "BIN-PPI-Analysis.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-PPI-Analysis unit.
@@ -29,7 +35,7 @@
 
 
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                                           Line
 #TOC> ---------------------------------------------------------------
 #TOC>   1        Setup and data                                    46
@@ -39,7 +45,7 @@
 #TOC>   2.3        Betweenness Centrality                         180
 #TOC>   3        biomaRt                                          226
 #TOC>   4        Task for submission                              296
-#TOC> 
+#TOC>
 #TOC> ==========================================================================
 
 
diff --git a/BIN-SEQA-Composition.R b/BIN-SEQA-Composition.R
index 4458ffd..59b64a4 100644
--- a/BIN-SEQA-Composition.R
+++ b/BIN-SEQA-Composition.R
@@ -1,4 +1,10 @@
-# BIN-SEQA-Composition.R
+# tocID <- "BIN-SEQA-Composition.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose: A Bioinformatics Course:
 #              R code accompanying the BIN-SEQA-Comparison unit
@@ -29,7 +35,7 @@
 
 
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                                      Line
 #TOC> ----------------------------------------------------------
 #TOC>   1        Preparation                                  47
@@ -40,7 +46,7 @@
 #TOC>   3.3        Plotting log ratios                       185
 #TOC>   3.4        Sort by frequency                         200
 #TOC>   3.5        Color by amino acid type                  215
-#TOC> 
+#TOC>
 #TOC> ==========================================================================
 
 
diff --git a/BIN-Sequence.R b/BIN-Sequence.R
index 2861cb3..00315f3 100644
--- a/BIN-Sequence.R
+++ b/BIN-Sequence.R
@@ -1,4 +1,10 @@
-# BIN-Sequence.R
+# tocID <- "BIN-Sequence.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-Sequence unit.
@@ -30,7 +36,7 @@
 
 
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                                Line
 #TOC> ----------------------------------------------------
 #TOC>   1        Prepare                                63
@@ -50,7 +56,7 @@
 #TOC>   7.2        Sampling                            306
 #TOC>   7.2.1          Equiprobable characters         308
 #TOC>   7.2.2          Defined probability vector      350
-#TOC> 
+#TOC>
 #TOC> ==========================================================================
 
 
@@ -171,16 +177,16 @@ cat(sprintf("\n%s fish", c("one", "two", "red", "blue")))
 
 # =    6  Changing strings  ====================================================
 
-# ===   6.1.1  Changing case              
+# ===   6.1.1  Changing case
 tolower(s)
 toupper(tolower(s))
 
 
-# ===   6.1.2  Reverse                    
+# ===   6.1.2  Reverse
 reverse(s)
 
 
-# ===   6.1.3  Change characters          
+# ===   6.1.3  Change characters
 # chartr(old, new, x) maps all characters in x that appear in "old" to the
 # correpsonding character in "new."
 
@@ -208,7 +214,7 @@ chartr(myCypher, lett, x)
 # (Nb. substitution cyphers are easy to crack!)
 
 
-# ===   6.1.4  Substitute characters      
+# ===   6.1.4  Substitute characters
 (s <- gsub("IV", "i-v", s))  # gsub can change length, first argument is
                              # a "regular expression"!
 
@@ -305,7 +311,7 @@ sum(d <= 2.5) # 276. 276 of our 10000 samples are just as bunched near the
 
 # ==   7.2  Sampling  ==========================================================
 
-# ===   7.2.1  Equiprobable characters    
+# ===   7.2.1  Equiprobable characters
 
 # Assume you need a large random-nucleotide string for some statistical model.
 # How to create such a string? sample() can easily create it:
@@ -347,7 +353,7 @@ length(unlist(x))
 # of the smaller number of Cs and Gs - before biology even comes into play. How
 # do we account for that?
 
-# ===   7.2.2  Defined probability vector 
+# ===   7.2.2  Defined probability vector
 
 # This is where we need to know how to create samples with specific probability
 # distributions. A crude hack would be to create a sampling source vector with
diff --git a/BIN-Storing_data.R b/BIN-Storing_data.R
index fb9891b..479c285 100644
--- a/BIN-Storing_data.R
+++ b/BIN-Storing_data.R
@@ -1,4 +1,10 @@
-# BIN-Storing_data.R
+# tocID <- "BIN-Storing_data.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose: A Bioinformatics Course:
 #              R code accompanying the BIN-Storing_data unit
@@ -27,7 +33,7 @@
 
 
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                                                   Line
 #TOC> -----------------------------------------------------------------------
 #TOC>   1        A Relational Datamodel in R: review                       57
@@ -50,7 +56,7 @@
 #TOC>   3.3        Create an R script to create your own database         535
 #TOC>   3.3.1          Check and validate                                 555
 #TOC>   3.4        Task: submit for credit (part 2/2)                     596
-#TOC> 
+#TOC>
 #TOC> ==========================================================================
 
 
@@ -205,7 +211,7 @@ str(philDB)
 # go back, re-read, play with it, and ask for help. This is essential.
 
 
-# ===   1.1.1  completing the database                       
+# ===   1.1.1  completing the database
 
 
 # Next I'll add one more person, and create the other two tables:
@@ -369,7 +375,7 @@ dbSanitizeSequence(x)
 
 # ==   2.3  Create a protein table for our data model  =========================
 
-# ===   2.3.1  Initialize the database                       
+# ===   2.3.1  Initialize the database
 
 
 # The function dbInit contains all the code to return a list of empty
@@ -381,7 +387,7 @@ myDB <- dbInit()
 str(myDB)
 
 
-# ===   2.3.2  Add data                                      
+# ===   2.3.2  Add data
 
 
 # fromJSON() returns a dataframe that we can readily process to add data
@@ -428,7 +434,7 @@ source("./scripts/ABC-createRefDB.R")
 str(myDB)
 
 
-# ===   2.4.1  Examples of navigating the database           
+# ===   2.4.1  Examples of navigating the database
 
 
 # You can look at the contents of the tables in the usual way we access
@@ -552,7 +558,7 @@ myDB$taxonomy$species[sel]
 # in any of the JSON files. Later you will add more information ...
 
 
-# ===   3.3.1  Check and validate                            
+# ===   3.3.1  Check and validate
 
 
 # Is your protein named according to the pattern "MBP1_MYSPE"? It should be.
diff --git a/FND-Genetic_code.R b/FND-Genetic_code.R
index 5220449..267b127 100644
--- a/FND-Genetic_code.R
+++ b/FND-Genetic_code.R
@@ -1,4 +1,10 @@
-# FND-Genetic_code.R
+# tocID <- "FND-Genetic_code.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the FND-Genetic_code unit.
@@ -28,7 +34,7 @@
 
 
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                                            Line
 #TOC> ----------------------------------------------------------------
 #TOC>   1        Storing the genetic code                           45
@@ -38,7 +44,7 @@
 #TOC>   3        An alternative representation: 3D array           212
 #TOC>   3.1        Print a Genetic code table                      246
 #TOC>   4        Tasks                                             272
-#TOC> 
+#TOC>
 #TOC> ==========================================================================
 
 
diff --git a/FND-MAT-Graphs_and_networks.R b/FND-MAT-Graphs_and_networks.R
index 2348f12..d560fb9 100644
--- a/FND-MAT-Graphs_and_networks.R
+++ b/FND-MAT-Graphs_and_networks.R
@@ -1,4 +1,10 @@
-# FND-MAT-Graphs_and_networks.R
+# tocID <- "FND-MAT-Graphs_and_networks.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the FND-MAT-Graphs_and_networks unit.
@@ -29,7 +35,7 @@
 
 
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                                        Line
 #TOC> ------------------------------------------------------------
 #TOC>   1        Review                                         50
@@ -43,7 +49,7 @@
 #TOC>   4        RANDOM GRAPHS AND GRAPH METRICS               539
 #TOC>   4.1        Diameter                                    576
 #TOC>   5        GRAPH CLUSTERING                              645
-#TOC> 
+#TOC>
 #TOC> ==========================================================================
 
 
@@ -280,7 +286,7 @@ plot(GBA,
      vertex.color=heat.colors(max(igraph::degree(GBA)+1))[igraph::degree(GBA)+1],
      vertex.size = 200 + (30 * igraph::degree(GBA)),
      vertex.label = NA)
-par(oPar)                              # restore grphics state
+par(oPar)                              # restore graphics state
 
 # This is a very obviously different graph! Some biological networks have
 # features that look like that - but in my experience the hub nodes are usually
diff --git a/FND-STA-Information_theory.R b/FND-STA-Information_theory.R
index 49f2589..3161bad 100644
--- a/FND-STA-Information_theory.R
+++ b/FND-STA-Information_theory.R
@@ -1,14 +1,21 @@
-# FND-STA-Information_theory.R
+# tocID <- "FND-STA-Information_theory.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the FND-STA-Information_theory unit.
 #
-# Version:  0.2
+# Version:  0.2.1
 #
-# Date:     2017  MM  DD
+# Date:     2017 - 2019
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
+#           0.2.1  Maintenance
 #           0.2    Under development
 #           0.1    First code copied from 2016 material.
 #
@@ -58,11 +65,33 @@ AAref["Y"] <- 0.0294
 sum(AAref)
 
 # Function to calculate Shannon entropy
-H <- function(v) {
-  # Shannon entropy (bits)
-  return(-sum(v * (log(v) / log(2))))
+H <- function(pmf) {
+  # Calculate Shannon entropy
+  # Parameters:
+  #   pmf (numeric) probability mass function: a vector of states and
+  #                 associated probabilities. Each element of
+  #                 pmf must be in (0, 1] and sum(pmf) must be 1.
+  # Value:
+  #   Shannon entropy in bits.
+  # Examples:
+  #   H(c(A=0.25, C=0.25, G=0.25, T=0.25))  # 2 bits entropy in a random
+  #                                         # nucleotide sequence
+  #   H(1)     # If all elements are the same, entropy is zero
+  #
+  if (any(pmf <= 0 | pmf > 1) || isFALSE(all.equal(1.0, sum(pmf)))) {
+    stop("Input is not a discrete probability distribution.")
+  }
+  H <- -sum(pmf * (log(pmf) / log(2)))
+  return(H)
 }
 
+# Why use all.equal()? Exact comparisons with floating point numbers are
+# brittle. Consider for example:
+1/6 + 1/6 + 1/6 + 1/6 + 1/6 + 1/6 == 1
+print(1/6 + 1/6 + 1/6 + 1/6 + 1/6 + 1/6, digits = 22) # 0.9999999999999998889777
+# all.equal() tests for _near_ equality with tolerance of ~ 1.5e-8
+
+
 
 # Entropy of the database frequencies (in bits):
 (Href <- H(AAref))
diff --git a/FND-STA-Probability_distribution.R b/FND-STA-Probability_distribution.R
index e77dc01..77397e5 100644
--- a/FND-STA-Probability_distribution.R
+++ b/FND-STA-Probability_distribution.R
@@ -1,4 +1,10 @@
-# FND-STA-Probability_distribution.R
+# tocID <- "FND-STA-Probability_distribution.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the FND-STA-Probability_distribution unit.
@@ -28,7 +34,7 @@
 
 
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                                                         Line
 #TOC> -----------------------------------------------------------------------------
 #TOC>   1        Introduction                                                    52
@@ -45,7 +51,7 @@
 #TOC>   4.2.1          An example from tossing dice                             463
 #TOC>   4.2.2          An example from lognormal distributions                  586
 #TOC>   4.3        Kolmogorov-Smirnov test for continuous distributions         629
-#TOC> 
+#TOC>
 #TOC> ==========================================================================
 
 
@@ -460,7 +466,7 @@ chisq.test(countsL1, countsG1.9, simulate.p.value = TRUE, B = 10000)
 # be applied to discrete distributions. But we need to talk a bit about
 # converting counts to p.m.f.'s.
 
-# ===   4.2.1  An example from tossing dice                        
+# ===   4.2.1  An example from tossing dice
 
 #  The p.m.f of an honest die is (1:1/6, 2:1/6, 3:1/6, 4:1/6, 5:1/6, 6:1/6). But
 #  there is an issue when we convert sampled counts to frequencies, and estimate
@@ -583,7 +589,7 @@ abline(v = KLdiv(rep(1/6, 6), pmfPC(counts, 1:6)), col="firebrick")
 # somewhat but not drastically atypical.
 
 
-# ===   4.2.2  An example from lognormal distributions             
+# ===   4.2.2  An example from lognormal distributions
 
 # We had compared a set of lognormal and gamma distributions above, now we
 # can use KL-divergence to quantify their similarity:
diff --git a/FND-STA-Significance.R b/FND-STA-Significance.R
index 4748c7b..08abcdb 100644
--- a/FND-STA-Significance.R
+++ b/FND-STA-Significance.R
@@ -1,4 +1,10 @@
-# FND-STA-Significance.R
+# tocID <- "FND-STA-Significance.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the FND-STA-Significance unit.
@@ -25,7 +31,7 @@
 
 
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                                              Line
 #TOC> ------------------------------------------------------------------
 #TOC>   1        Significance and p-value                             43
@@ -36,7 +42,7 @@
 #TOC>   3        Significance by integration                         198
 #TOC>   4        Significance by simulation or permutation           204
 #TOC>   5        Final tasks                                         312
-#TOC> 
+#TOC>
 #TOC> ==========================================================================
 
 
@@ -100,7 +106,7 @@ print(x, digits = 22)
 # curve, as a fraction of the whole.
 
 
-# ===   1.2.1  p-value illustrated                      
+# ===   1.2.1  p-value illustrated
 
 # Let's illustrate. First we draw a million random values from our
 # standard, normal distribution:
diff --git a/README.md b/README.md
index 9c792a2..edf67e4 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,4 @@
 # ABC-units
 A Bioinformatics Course: R modules for learning units
+
+Follow the instructions in the learning unit to install your local copy of this R-project.
diff --git a/RPR-Biostrings.R b/RPR-Biostrings.R
index b24c08e..5e8a730 100644
--- a/RPR-Biostrings.R
+++ b/RPR-Biostrings.R
@@ -1,4 +1,10 @@
-# RPR-Biostrings.R
+# tocID <- "RPR-Biostrings.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR-Biostrings unit.
diff --git a/RPR-FASTA.R b/RPR-FASTA.R
index 56f9ff8..d7f9706 100644
--- a/RPR-FASTA.R
+++ b/RPR-FASTA.R
@@ -1,4 +1,10 @@
-# RPR-FASTA.R
+# tocID <- "RPR-FASTA.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR-FASTA unit.
diff --git a/RPR-GEO2R.R b/RPR-GEO2R.R
index 7338ce9..3bea234 100644
--- a/RPR-GEO2R.R
+++ b/RPR-GEO2R.R
@@ -1,4 +1,10 @@
-# RPR_GEO2R.R
+# tocID <- "RPR_GEO2R.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR_GEO2R unit.
@@ -34,7 +40,7 @@
 
 
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                                                      Line
 #TOC> --------------------------------------------------------------------------
 #TOC>   1        Preparations                                                 56
@@ -49,7 +55,7 @@
 #TOC>   5.1        Final task: Gene descriptions                             504
 #TOC>   6        Improving on Discovery by Differential Expression           510
 #TOC>   7        Annotation data                                             594
-#TOC> 
+#TOC>
 #TOC> ==========================================================================
 
 
diff --git a/RPR-Genetic_code_optimality.R b/RPR-Genetic_code_optimality.R
index bdf68dd..d3a5cd1 100644
--- a/RPR-Genetic_code_optimality.R
+++ b/RPR-Genetic_code_optimality.R
@@ -1,4 +1,10 @@
-# RPR-Genetic_code_optimality.R
+# tocID <- "RPR-Genetic_code_optimality.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR-Genetic_code_optimality unit.
@@ -30,7 +36,7 @@
 
 
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                                          Line
 #TOC> --------------------------------------------------------------
 #TOC>   1        Designing a computational experiment             57
@@ -43,7 +49,7 @@
 #TOC>   2.2.4          measure effect                            213
 #TOC>   3        Run the experiment                              260
 #TOC>   4        Task solutions                                  356
-#TOC> 
+#TOC>
 #TOC> ==========================================================================
 
 
@@ -142,7 +148,7 @@ swappedGC <- function(GC) {
 #   - we count the number of mutations and evaluate their severity.
 
 
-# ===   2.2.1  reverse-translate                    
+# ===   2.2.1  reverse-translate
 
 # To reverse-translate an amino acid vector, we randomly pick one of its
 # codons from a genetic code, and assemble all codons to a sequence.
@@ -167,7 +173,7 @@ traRev <- function(s, GC) {
 }
 
 
-# ===   2.2.2  Randomly mutate                      
+# ===   2.2.2  Randomly mutate
 
 # To mutate, we split a codon into it's three nucleotides, then randomly replace
 # one of the three with another nucleotide.
@@ -192,7 +198,7 @@ randMut <- function(vC) {
 
 
 
-# ===   2.2.3  Forward- translate                   
+# ===   2.2.3  Forward- translate
 
 traFor <- function(vC, GC) {
   # Parameters:
@@ -210,7 +216,7 @@ traFor <- function(vC, GC) {
   }
 
 
-# ===   2.2.4  measure effect                       
+# ===   2.2.4  measure effect
 
 # How do we evaluate the effect of the mutation? We'll take a simple ad hoc
 # approach: we divide amino acids into hydrophobic, hydrophilic, and neutral
diff --git a/RPR-Introduction.R b/RPR-Introduction.R
index 2598d93..45be703 100644
--- a/RPR-Introduction.R
+++ b/RPR-Introduction.R
@@ -1,4 +1,10 @@
-# RPR-Introduction.R
+# tocID <- "RPR-Introduction.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose: A Bioinformatics Course:
 #              R code accompanying the RPR-Introduction unit
diff --git a/RPR-PROSITE_POST.R b/RPR-PROSITE_POST.R
index 4e85457..392dcee 100644
--- a/RPR-PROSITE_POST.R
+++ b/RPR-PROSITE_POST.R
@@ -1,4 +1,10 @@
-# RPR-PROSITE_POST.R
+# tocID <- "RPR-PROSITE_POST.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR-Scripting_data_downloads unit.
@@ -29,13 +35,13 @@
 
 
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                                                 Line
 #TOC> ---------------------------------------------------------------------
 #TOC>   1        Constructing a POST command from a Web query            42
 #TOC>   1.1        Task - fetchPrositeFeatures() function               142
 #TOC>   2        Task solutions                                         150
-#TOC> 
+#TOC>
 #TOC> ==========================================================================
 
 
diff --git a/RPR-RegEx.R b/RPR-RegEx.R
index aa96cfe..a9173c0 100644
--- a/RPR-RegEx.R
+++ b/RPR-RegEx.R
@@ -1,4 +1,10 @@
-# RPR-RegEx.R
+# tocID <- "RPR-RegEx.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose: A Bioinformatics Course:
 #              R code accompanying the RPR-RegEx unit
diff --git a/RPR-SX-PDB.R b/RPR-SX-PDB.R
index d566b8e..a97c591 100644
--- a/RPR-SX-PDB.R
+++ b/RPR-SX-PDB.R
@@ -1,4 +1,10 @@
-# RPR-SX-PDB.R
+# tocID <- "RPR-SX-PDB.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR-SX-PDB unit.
diff --git a/RPR-UniProt_GET.R b/RPR-UniProt_GET.R
index 74578ee..b8e9c30 100644
--- a/RPR-UniProt_GET.R
+++ b/RPR-UniProt_GET.R
@@ -1,4 +1,10 @@
-# RPR-UniProt_GET.R
+# tocID <- "RPR-UniProt_GET.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR-Scripting_data_downloads unit.
@@ -28,13 +34,13 @@
 
 
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                                      Line
 #TOC> ----------------------------------------------------------
 #TOC>   1        UniProt files via GET                        41
 #TOC>   1.1        Task - fetchUniProtSeq() function         103
 #TOC>   2        Task solutions                              110
-#TOC> 
+#TOC>
 #TOC> ==========================================================================
 
 
diff --git a/RPR-Unit_testing.R b/RPR-Unit_testing.R
index 3fa2e0f..34b6b68 100644
--- a/RPR-Unit_testing.R
+++ b/RPR-Unit_testing.R
@@ -1,4 +1,10 @@
-# RPR-Unit_testing.R
+# tocID <- "RPR-Unit_testing.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR-Unit_testing unit.
@@ -29,10 +35,10 @@
 #TOC> 
 #TOC>   Section  Title                             Line
 #TOC> -------------------------------------------------
-#TOC>   1        Unit Tests with testthat            40
-#TOC>   2        Organizing your tests              159
-#TOC>   2.1        Testing scripts                  183
-#TOC>   3        Task solutions                     198
+#TOC>   1        Unit Tests with testthat            46
+#TOC>   2        Organizing your tests              165
+#TOC>   2.1        Testing scripts                  189
+#TOC>   3        Task solutions                     204
 #TOC> 
 #TOC> ==========================================================================
 
diff --git a/RPR-eUtils_XML.R b/RPR-eUtils_XML.R
index 1f8c23b..6dec655 100644
--- a/RPR-eUtils_XML.R
+++ b/RPR-eUtils_XML.R
@@ -1,4 +1,10 @@
-# RPR-eUtils_and_XML.R
+# tocID <- "RPR-eUtils_and_XML.R"
+#
+# ---------------------------------------------------------------------------- #
+#  PATIENCE  ...                                                               #
+#    Do not yet work wih this code. Updates in progress. Thank you.            #
+#    boris.steipe@utoronto.ca                                                  #
+# ---------------------------------------------------------------------------- #
 #
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR-Scripting_data_downloads unit.
@@ -28,13 +34,13 @@
 
 
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                                       Line
 #TOC> -----------------------------------------------------------
 #TOC>   1        Working with NCBI eUtils                      41
 #TOC>   1.1        Task - fetchNCBItaxData() function         144
 #TOC>   2        Task solutions                               151
-#TOC> 
+#TOC>
 #TOC> ==========================================================================
 
 
diff --git a/scripts/ABC-createRefDB.R b/scripts/ABC-createRefDB.R
index c4a45b9..3df8d08 100644
--- a/scripts/ABC-createRefDB.R
+++ b/scripts/ABC-createRefDB.R
@@ -8,7 +8,7 @@
 # http://steipe.biochemistry.utoronto.ca/abc/index.php/Reference_species_for_fungi
 #
 # For the data model, see
-# https://docs.google.com/drawings/d/1uupNvz18_FYFwyyVPebTM0CUxcJCPDQuxuIJGpjWQWg
+# https://docs.google.com/presentation/d/13vWaVcFpWEOGeSNhwmqugj2qTQuH1eZROgxWdHGEMr0
 # For the schema, see dbInit() in ./scripts/ABC-dbUtilities.R
 #
 # ==============================================================================
diff --git a/scripts/ABC-dbUtilities.R b/scripts/ABC-dbUtilities.R
index 3137611..e4b21d3 100644
--- a/scripts/ABC-dbUtilities.R
+++ b/scripts/ABC-dbUtilities.R
@@ -1,12 +1,35 @@
-# ABC-dbUtilities.R
-
+# tocID <- "scripts/ABC-dbUtilities.R"
+#
 # database utilities for ABC learning units
 #
 # ==============================================================================
-#
 
 
-# ====== PACKAGES ==============================================================
+#TOC> ==========================================================================
+#TOC> 
+#TOC>   Section  Title                             Line
+#TOC> -------------------------------------------------
+#TOC>   1        PACKAGES                            32
+#TOC>   2        FUNCTIONS                           50
+#TOC>   2.01       dbSanitizeSequence()              53
+#TOC>   2.02       dbConfirmUnique()                 88
+#TOC>   2.03       dbInit()                         106
+#TOC>   2.04       dbAutoincrement()                147
+#TOC>   2.05       dbAddProtein()                   160
+#TOC>   2.06       dbAddFeature()                   180
+#TOC>   2.07       dbAddTaxonomy()                  199
+#TOC>   2.08       dbAddAnnotation()                215
+#TOC>   2.09       dbFetchUniProtSeq()              243
+#TOC>   2.10       dbFetchPrositeFeatures()         267
+#TOC>   2.11       node2text()                      311
+#TOC>   2.12       dbFetchNCBItaxData()             323
+#TOC>   2.13       UniProtIDmap()                   362
+#TOC>   3        TESTS                              399
+#TOC> 
+#TOC> ==========================================================================
+
+
+# =    1  PACKAGES  ============================================================
 
 
 if (! requireNamespace("jsonlite", quietly = TRUE)) {
@@ -24,9 +47,10 @@ if (! requireNamespace("xml2", quietly = TRUE)) {
 }
 
 
-# ====== FUNCTIONS =============================================================
+# =    2  FUNCTIONS  ===========================================================
 
 
+# ==   2.01  dbSanitizeSequence()  =============================================
 dbSanitizeSequence <- function(s, unambiguous = TRUE) {
   # Remove FASTA header lines, if any,
   # flatten any structure that s has,
@@ -61,6 +85,7 @@ dbSanitizeSequence <- function(s, unambiguous = TRUE) {
 }
 
 
+# ==   2.02  dbConfirmUnique()  ================================================
 dbConfirmUnique <- function(x) {
   # x is a vector of logicals.
   # returns x if x has exactly one TRUE element.
@@ -78,24 +103,27 @@ dbConfirmUnique <- function(x) {
 }
 
 
+# ==   2.03  dbInit()  =========================================================
 dbInit <- function() {
   # Return an empty instance of the protein database
+  # Open the link and study the schema:
+  # https://docs.google.com/presentation/d/13vWaVcFpWEOGeSNhwmqugj2qTQuH1eZROgxWdHGEMr0
 
   db <- list()
 
+  db$version <- "1.0"
+
   db$protein <- data.frame(
     ID = numeric(),
     name = character(),
     RefSeqID = character(),
     UniProtID = character(),
     taxonomyID = numeric(),
-    sequence = character(),
-    stringsAsFactors = FALSE)
+    sequence = character())
 
   db$taxonomy <- data.frame(
     ID = numeric(),
-    species = character(),
-    stringsAsFactors = FALSE)
+    species = character())
 
 
   db$annotation <- data.frame(
@@ -103,21 +131,20 @@ dbInit <- function() {
     proteinID = numeric(),
     featureID = numeric(),
     start = numeric(),
-    end = numeric(),
-    stringsAsFactors = FALSE)
+    end = numeric())
 
   db$feature <- data.frame(
     ID = numeric(),
     name = character(),
     description = character(),
     sourceDB = character(),
-    accession = character(),
-    stringsAsFactors = FALSE)
+    accession = character())
 
   return(db)
 }
 
 
+# ==   2.04  dbAutoincrement()  ================================================
 dbAutoincrement <- function(tb) {
   # Return a unique integer that can be used as a primary key
   # Value:
@@ -130,6 +157,7 @@ dbAutoincrement <- function(tb) {
 }
 
 
+# ==   2.05  dbAddProtein()  ===================================================
 dbAddProtein <- function(db, jsonDF) {
   # Add one or more protein entries to the database db.
   # Parameters:
@@ -142,14 +170,14 @@ dbAddProtein <- function(db, jsonDF) {
                     RefSeqID    = jsonDF$RefSeqID[i],
                     UniProtID   = jsonDF$UniProtID[i],
                     taxonomyID  = jsonDF$taxonomyID[i],
-                    sequence    = dbSanitizeSequence(jsonDF$sequence[i]),
-                    stringsAsFactors = FALSE)
+                    sequence    = dbSanitizeSequence(jsonDF$sequence[i]))
     db$protein <- rbind(db$protein, x)
   }
   return(db)
 }
 
 
+# ==   2.06  dbAddFeature()  ===================================================
 dbAddFeature <- function(db, jsonDF) {
   # Add one or more feature entries to the database db.
   # Parameters:
@@ -161,14 +189,14 @@ dbAddFeature <- function(db, jsonDF) {
                     name        = jsonDF$name[i],
                     description = jsonDF$description[i],
                     sourceDB    = jsonDF$sourceDB[i],
-                    accession   = jsonDF$accession[i],
-                    stringsAsFactors = FALSE)
+                    accession   = jsonDF$accession[i])
     db$feature <- rbind(db$feature, x)
   }
   return(db)
 }
 
 
+# ==   2.07  dbAddTaxonomy()  ==================================================
 dbAddTaxonomy <- function(db, jsonDF) {
   # Add one or more taxonomy entries to the database db.
   # Parameters:
@@ -178,13 +206,13 @@ dbAddTaxonomy <- function(db, jsonDF) {
   for (i in seq_len(nrow(jsonDF))) {
     x <- data.frame(
       ID =  jsonDF$ID[i],
-      species = jsonDF$species[i],
-      stringsAsFactors = FALSE)
+      species = jsonDF$species[i])
     db$taxonomy <- rbind(db$taxonomy, x)
   }
   return(db)
 }
 
+# ==   2.08  dbAddAnnotation()  ================================================
 dbAddAnnotation <- function(db, jsonDF) {
   # Add one or more annotation entries to the database db.
   # Parameters:
@@ -205,14 +233,14 @@ dbAddAnnotation <- function(db, jsonDF) {
                     proteinID = pID,
                     featureID = fID,
                     start     = as.integer(jsonDF$start[i]),
-                    end       = as.integer(jsonDF$end[i]),
-                    stringsAsFactors = FALSE)
+                    end       = as.integer(jsonDF$end[i]))
     db$annotation <- rbind(db$annotation, x)
   }
   return(db)
 }
 
 
+# ==   2.09  dbFetchUniProtSeq()  ==============================================
 dbFetchUniProtSeq <- function(ID) {
   # Fetch a protein sequence from UniProt.
   # Parameters:
@@ -236,6 +264,7 @@ dbFetchUniProtSeq <- function(ID) {
 }
 
 
+# ==   2.10  dbFetchPrositeFeatures()  =========================================
 dbFetchPrositeFeatures <- function(ID) {
   # Fetch feature annotations from ScanProsite.
   # Parameters:
@@ -272,14 +301,14 @@ dbFetchPrositeFeatures <- function(ID) {
                                      start =  as.numeric(tokens[4]),
                                      end   =  as.numeric(tokens[5]),
                                      psID  =  tokens[6],
-                                     psName = tokens[7],
-                                     stringsAsFactors = FALSE))
+                                     psName = tokens[7]))
     }
   }
   return(myFeatures)
 }
 
 
+# ==   2.11  node2text()  ======================================================
 node2text <- function(doc, tag) {
   # an extractor function for the contents of elements
   # between given tags in an XML response.
@@ -291,6 +320,7 @@ node2text <- function(doc, tag) {
 }
 
 
+# ==   2.12  dbFetchNCBItaxData()  =============================================
 dbFetchNCBItaxData <- function(ID) {
   # Fetch feature taxID and Organism from the NCBI.
   # Parameters:
@@ -329,6 +359,7 @@ dbFetchNCBItaxData <- function(ID) {
 
 
 
+# ==   2.13  UniProtIDmap()  ===================================================
 UniProtIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
   # Use UniProt ID mapping service to map one or more IDs
   # Parameters:
@@ -351,8 +382,7 @@ UniProtIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
 
   if (httr::status_code(response) == 200) { # 200: oK
     myMap <- read.delim(file = textConnection(httr::content(response)),
-                        sep = "\t",
-                        stringsAsFactors = FALSE)
+                        sep = "\t")
     myMap <- myMap[ , c(1,3)]
     colnames(myMap) <- c("From", "To")
   } else {
@@ -366,7 +396,7 @@ UniProtIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
 }
 
 
-# ====== TESTS =================================================================
+# =    3  TESTS  ===============================================================
 
 if (FALSE) {
   if (! requireNamespace("testthat", quietly = TRUE)) {
diff --git a/scripts/ABC-makeScCCnet.R b/scripts/ABC-makeScCCnet.R
index 5138590..54185e3 100644
--- a/scripts/ABC-makeScCCnet.R
+++ b/scripts/ABC-makeScCCnet.R
@@ -1,4 +1,4 @@
-# ABC-makeScCCnet.R
+# tocID <- "scripts/ABC-makeScCCnet.R"
 #
 # Create a subnetwork of high-confidence yeast genes with a "mitotic cell cycle"
 # GOSlim annotation.
diff --git a/scripts/ABC-writeALN.R b/scripts/ABC-writeALN.R
index 0f52e7f..a8b7d80 100644
--- a/scripts/ABC-writeALN.R
+++ b/scripts/ABC-writeALN.R
@@ -1,4 +1,4 @@
-# ABC-writeALN.R
+# tocID <- "scripts/ABC-writeALN.R"
 #
 # ToDo:    calculate consensus line
 #          append sequence numbers
diff --git a/scripts/ABC-writeMFA.R b/scripts/ABC-writeMFA.R
index 3753156..90471d3 100644
--- a/scripts/ABC-writeMFA.R
+++ b/scripts/ABC-writeMFA.R
@@ -40,7 +40,7 @@ writeMFA <- function(ali,
   if (is.na(blockWidth)) {
     stop("PANIC: parameter \"blockWidth\" must be numeric.")
   }
-  if (blockWidth < 1){
+  if (! blockWidth > 0){
     stop("PANIC: parameter \"blockWidth\" must be greater than zero.")
   }
 
@@ -105,7 +105,7 @@ writeMFA <- function(ali,
     txt <- c(txt, "")  # append an empty line for readability
   }
 
-  writeLines(txt, con= myCon)
+  writeLines(txt, con = myCon)
 
 }
 
diff --git a/scripts/BLAST.R b/scripts/BLAST.R
index 32c7e2f..80fd7bd 100644
--- a/scripts/BLAST.R
+++ b/scripts/BLAST.R
@@ -357,20 +357,23 @@ parseBLASTalignment <- function(hit) {
 
 # ==== TESTS ===================================================================
 
-# define query:
-# q   <- paste("IYSARYSGVDVYEFIHSTGSIMKRKKDDWVNATHI", # Mbp1 APSES domain
-#              "LKAANFAKAKRTRILEKEVLKETHEKVQGGFGKYQ",
-#              "GTWVPLNIAKQLAEKFSVYDQLKPLFDFTQTDGSASP",
-#              sep="")
-# or ...
-# q <- "NP_010227" # refseq ID
-#
-# test <- BLAST(q,
-#               nHits = 100,
-#               E = 0.001,
-#               rid = "",
-#               limits = "txid4751[ORGN]")
-# length(test$hits)
+if (FALSE) {
+  # define query:
+  q   <- paste("IYSARYSGVDVYEFIHSTGSIMKRKKDDWVNATHI", # Mbp1 APSES domain
+               "LKAANFAKAKRTRILEKEVLKETHEKVQGGFGKYQ",
+               "GTWVPLNIAKQLAEKFSVYDQLKPLFDFTQTDGSASP",
+               sep="")
+  # or ...
+  q <- "NP_010227" # refseq ID
+
+  test <- BLAST(q,
+                nHits = 100,
+                E = 0.001,
+                rid = "",
+                limits = "txid4751[ORGN]")
+  str(test)
+  length(test$hits)
+}
 
 # [END]