Use requireNamespace(), <package>::<function>() idiom,

Biocmanager:: - not biocLite()
2019-01-08 17:11:25 +10:00
parent 8cdbb9f4cc
commit cffd803c23
30 changed files with 1185 additions and 1069 deletions
--- a/BIN-ALI-BLAST.R
+++ b/BIN-ALI-BLAST.R
@@ -3,12 +3,14 @@
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-ALI-BLAST unit.
 #
-# Version:  1.1
+# Version:  1.2
 #
-# Date:     2017  10  23
+# Date:     2017  10  -  2019  01
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
+#           1.2    Change from require() to requireNamespace(),
+#                      use <package>::<function>() idiom throughout
 #           1.1    Fixed parsing logic.
 #           1.0    First live version 2017.
 #           0.1    First code copied from 2016 material.
@@ -27,31 +29,17 @@


 #TOC> ==========================================================================
-#TOC>
-#TOC>   Section  Title                         Line
-#TOC> ---------------------------------------------
-#TOC>   1        Preparations                    41
-#TOC>   2        Defining the APSES domain       54
-#TOC>   3        Executing the BLAST search      76
-#TOC>   4        Analysing results               98
-#TOC>
+#TOC> 
+#TOC>   Section  Title                               Line
+#TOC> ---------------------------------------------------
+#TOC>   1        Defining the APSES domain             42
+#TOC>   2        Executing the BLAST search            64
+#TOC>   3        Analysing results                     86
+#TOC> 
 #TOC> ==========================================================================


-# =    1  Preparations  ========================================================
-
-if (!require(Biostrings, quietly=TRUE)) {
-  source("https://bioconductor.org/biocLite.R")
-  biocLite("Biostrings")
-  library(Biostrings)
-}
-# Package information:
-#  library(help = Biostrings)       # basic information
-#  browseVignettes("Biostrings")    # available vignettes
-#  data(package = "Biostrings")     # available datasets
-
-
-# =    2  Defining the APSES domain  ===========================================
+# =    1  Defining the APSES domain  ===========================================

 # Load your protein database
 source("makeProteinDB.R")
@@ -73,7 +61,7 @@ source("makeProteinDB.R")
 # BLAST search.


-# =    3  Executing the BLAST search  ==========================================
+# =    2  Executing the BLAST search  ==========================================

 # The ./scripts/BLAST.R code defines two functions to access the BLAST interface
 # through its Web API, and to parse results. Have a look at the script, then
@@ -91,11 +79,11 @@ BLASTresults <- BLAST(apses,                       # MYSPE APSES domain sequence
                     limits = "txid559292[ORGN]")  # S. cerevisiae S288c


-length(BLASTresults$hits)  # There should be at least one hit there. Ask for advice
-                           # in case this step fails.
+length(BLASTresults$hits)  # There should be at least one hit there. Ask for
+                           # advice in case this step fails.


-# =    4  Analysing results  ===================================================
+# =    3  Analysing results  ===================================================

 (topHit <- BLASTresults$hits[[1]])   # Get the top hit

--- a/BIN-ALI-Dotplot.R
+++ b/BIN-ALI-Dotplot.R
@@ -3,12 +3,14 @@
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-ALI-Dotplot unit.
 #
-# Version:  0.1
+# Version:  0.2
 #
-# Date:     2017  08  28
+# Date:     2019  01  07
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
+#           0.2    Change from require() to requireNamespace(),
+#                      use <package>::<function>() idiom throughout
 #           0.1    First code copied from 2016 material.
 #
 #
@@ -23,24 +25,37 @@
 #
 # ==============================================================================

-# = 1 ___Section___

-# First, we install and load the Biostrings package.
-if (!require(Biostrings, quietly=TRUE)) {
-  if (! exists("biocLite")) {
-    source("https://bioconductor.org/biocLite.R")
-  }
-  biocLite("Biostrings")
-  library(Biostrings)
+#TOC> ==========================================================================
+#TOC> 
+#TOC>   Section  Title                  Line
+#TOC> --------------------------------------
+#TOC>   1        ___Section___            39
+#TOC>   2        Tasks                   187
+#TOC> 
+#TOC> ==========================================================================
+
+
+# =    1  ___Section___  =======================================================
+
+if (!requireNamespace("BiocManager", quietly=TRUE)) {
+  install.packages("BiocManager")
 }
+if (!requireNamespace("Biostrings", quietly=TRUE)) {
+  BiocManager::install("Biostrings")
+}
+# Package information:
 #  library(help = Biostrings)       # basic information
 #  browseVignettes("Biostrings")    # available vignettes
 #  data(package = "Biostrings")     # available datasets

+if (!requireNamespace("seqinr", quietly=TRUE)) {
+  install.packages("seqinr")
+}


 # Let's load BLOSUM62
-data(BLOSUM62)
+data(BLOSUM62, package = "Biostrings")

 # Now let's craft code for a dotplot. That's surprisingly simple. We build a
 # matrix that has as many rows as one sequence, as many columns as another. Then
@@ -51,10 +66,10 @@ data(BLOSUM62)

 # First we fetch our sequences and split them into single characters.
 sel <- myDB$protein$name == "MBP1_SACCE"
-MBP1_SACCE <- s2c(myDB$protein$sequence[sel])
+MBP1_SACCE <- seqinr::s2c(myDB$protein$sequence[sel])

 sel <- myDB$protein$name == paste("MBP1_", biCode(MYSPE), sep = "")
-MBP1_MYSPE <- s2c(myDB$protein$sequence[sel])
+MBP1_MYSPE <- seqinr::s2c(myDB$protein$sequence[sel])

 # Check that we have two character vectors of the expected length.
 str(MBP1_SACCE)
@@ -136,7 +151,7 @@ axis(4, at = c(1, seq(10, len, by=10)))
 # utilities file and called it dotPlot2(). Why not dotPlot() ... that's because
 # there already is a dotplot function in the seqinr package:

-dotPlot(MBP1_SACCE, MBP1_MYSPE)                                 # seqinr
+seqinr::dotPlot(MBP1_SACCE, MBP1_MYSPE)                           # seqinr
 dotPlot2(MBP1_SACCE, MBP1_MYSPE, xlab = "SACCE", ylab = "MYSPE")  # Our's

 # Which one do you prefer? You can probably see the block patterns that arise
@@ -169,7 +184,7 @@ dotPlot2(MBP1_SACCE, MBP1_MYSPE, xlab = "SACCE", ylab = "MYSPE", f = myFilter)



-# = 1 Tasks
+# =    2  Tasks  ===============================================================



--- a/BIN-ALI-MSA.R
+++ b/BIN-ALI-MSA.R
@@ -3,12 +3,14 @@
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-ALI-MSA unit.
 #
-# Version:  1.1
+# Version:  1.2
 #
-# Date:     2017  10
+# Date:     2017  10  -  2019  01
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
+#           1.2    Change from require() to requireNamespace(),
+#                      use <package>::<function>() idiom throughout
 #           1.1    Added fetchMSAmotif()
 #           1.0    Fully refactored and rewritten for 2017
 #           0.1    First code copied from 2016 material.
@@ -27,25 +29,25 @@


 #TOC> ==========================================================================
-#TOC>
-#TOC>   Section  Title                                        Line
-#TOC> ------------------------------------------------------------
-#TOC>   1        Preparations                                   51
-#TOC>   2        Aligning full length MBP1 proteins             99
-#TOC>   2.1      Preparing Sequences                           110
-#TOC>   2.2      Compute the MSA                               135
-#TOC>   3        Analyzing an MSA                              156
-#TOC>   4        Comparing MSAs                                227
-#TOC>   4.1      Importing an alignment to msa                 236
-#TOC>   4.1.1    importing an .aln file                        245
-#TOC>   4.1.2    Creating an MsaAAMultipleAlignment object     276
-#TOC>   4.2      More alignments                               313
-#TOC>   4.3      Computing comparison metrics                  325
-#TOC>   5        Profile-Profile alignments                    462
-#TOC>   6        Sequence Logos                                539
-#TOC>   6.1      Subsetting an alignment by motif              548
-#TOC>   6.2      Plot a Sequence Logo                          591
-#TOC>
+#TOC> 
+#TOC>   Section  Title                                              Line
+#TOC> ------------------------------------------------------------------
+#TOC>   1        Preparations                                         54
+#TOC>   2        Aligning full length MBP1 proteins                   96
+#TOC>   2.1        Preparing Sequences                               107
+#TOC>   2.2        Compute the MSA                                   132
+#TOC>   3        Analyzing an MSA                                    153
+#TOC>   4        Comparing MSAs                                      224
+#TOC>   4.1        Importing an alignment to msa                     233
+#TOC>   4.1.1          importing an .aln file                        242
+#TOC>   4.1.2          Creating an MsaAAMultipleAlignment object     273
+#TOC>   4.2        More alignments                                   324
+#TOC>   4.3        Computing comparison metrics                      336
+#TOC>   5        Profile-Profile alignments                          473
+#TOC>   6        Sequence Logos                                      546
+#TOC>   6.1        Subsetting an alignment by motif                  555
+#TOC>   6.2        Plot a Sequence Logo                              604
+#TOC> 
 #TOC> ==========================================================================


@@ -59,28 +61,22 @@
 source("makeProteinDB.R")


-# Multiple sequence alignment algorithms are provided in
-# the Bioconductor msa package.
-
-if (! require(Biostrings, quietly=TRUE)) {
-  if (! exists("biocLite")) {
-    source("https://bioconductor.org/biocLite.R")
-  }
-  biocLite("Biostrings")
-  library(Biostrings)
+if (! requireNamespace("BiocManager", quietly=TRUE)) {
+  install.packages("BiocManager")
+}
+if (! requireNamespace("Biostrings", quietly=TRUE)) {
+  BiocManager::install("Biostrings")
 }
 # Package information:
 #  library(help = Biostrings)       # basic information
 #  browseVignettes("Biostrings")    # available vignettes
 #  data(package = "Biostrings")     # available datasets

+# Multiple sequence alignment algorithms are provided in
+# the Bioconductor msa package.

-if (! require(msa, quietly=TRUE)) {
-  if (! exists("biocLite")) {
-    source("https://bioconductor.org/biocLite.R")
-  }
-  biocLite("msa")
-  library(msa)
+if (! requireNamespace("msa", quietly=TRUE)) {
+  BiocManager::install("msa")
 }
 # Package information:
 #  library(help=msa)       # basic information
@@ -115,7 +111,7 @@ help(package = "msa")
 # of sequence.

 sel <- grep("MBP1", myDB$protein$name)
-MBP1set <- AAStringSet(myDB$protein$sequence[sel])
+MBP1set <- Biostrings::AAStringSet(myDB$protein$sequence[sel])

 # To help us make sense of the alignment we need to add the names for
 # the sequences. Names for a seqSet object are held in the ranges slot...
@@ -142,10 +138,10 @@ MBP1set


 # Let's run an alignment with "Muscle"
-(msaM <-  msaMuscle( MBP1set, order = "aligned"))
+(msaM <-  msa::msaMuscle( MBP1set, order = "aligned"))

 # ... or to see the whole thing (cf. ?MsaAAMultipleAlignment ... print method):
-print(msaM, show=c("alignment", "complete"), showConsensus=FALSE)
+msa::print(msaM, show=c("alignment", "complete"), showConsensus=FALSE)


 # You see that the alignment object has sequence strings with hyphens as
@@ -173,7 +169,7 @@ print(msaM, show=c("alignment", "complete"), showConsensus=FALSE)

 data("BLOSUM62")  # fetch the BLOSUM62 package from the Biostrings package

-msaMScores <- msaConservationScore(msaM, substitutionMatrix = BLOSUM62)
+msaMScores <- msa::msaConservationScore(msaM, substitutionMatrix = BLOSUM62)
 plot(msaMScores, type = "l", col = "#205C5E", xlab = "Alignment Position")

 # That plot shows the well-aligned regions (domains ?) of the sequence, but it
@@ -243,20 +239,20 @@ for (i in seq_along(highScoringRanges$lengths)) {
 #   -  adjust the sequence names
 #   -  convert to msaAAMultipleAlignment object

-# ===  4.1.1  importing an .aln file
+# ===   4.1.1  importing an .aln file                   

 # The seqinr package has a function to read CLUSTAL W formatted .aln files ...
-if (! require(seqinr, quietly=TRUE)) {
-  install.packages(seqinr)
-  library(seqinr)
+if (! requireNamespace("seqinr", quietly=TRUE)) {
+  install.packages("seqinr")
 }
 # Package information:
 #  library(help=seqinr)       # basic information
 #  browseVignettes("seqinr")  # available vignettes
 #  data(package = "seqinr")   # available datasets

-# read the donwloaded file
-tmp <- read.alignment("msaT.aln", format = "clustal")
+# read the T-coffee aligned file that you donwloaded from the EBI MSA tools
+# (cf. http://steipe.biochemistry.utoronto.ca/abc/index.php/BIN-ALI-MSA)
+tmp <- seqinr::read.alignment("msaT.aln", format = "clustal")

 # read.alignment() returns a list. $seq is a list of strings, one for each
 # complete alignment. However, they are converted to lower case.
@@ -274,16 +270,16 @@ for (i in seq_along(x)) {
  names(x)[i] <- myDB$protein$name[sel]     # get the name
 }

-# ===  4.1.2  Creating an MsaAAMultipleAlignment object
+# ===   4.1.2  Creating an MsaAAMultipleAlignment object

 # MsaAAMultipleAlignment objects are S4 objects that contain AAStringSet objects
 # in their @unmasked slot, and a few additional items. Rather then build the
-# object from scratch, we copy an axisting object, and overwrite the dta in its
+# object from scratch, we copy an existing object, and overwrite the data in its
 # slots with what we need. Our goal is pragmatic, we want an object that msa's
 # functions will accept as input.

 # First: convert our named char vector into an AAstringSet
-x <- AAStringSet(x)
+x <- Biostrings::AAStringSet(x)

 # Then: create a new MsaAAMultipleAlignment S4 object. The msa package has
 # defined what such an object should look like, with the SetClass() function. To
@@ -294,8 +290,22 @@ x <- AAStringSet(x)

 str(msaM)

+# There is a catch however in the way R makes such operations specific to
+# the packages they need them: the function that creates the class is
+# defined as a "generic", and when it is called, R looks in the package
+# namespace for a more specific function with precise instructions what
+# to do. However, we have not loaded the package namespace - we access all
+# of the functions directly with the msa:: prefix. This method breaks down
+# when generic functions are involved. I.e. - we could make it work, but
+# the amount of code we need then is unreasonable. The straightforward
+# way is to load the package. We can still use the prefix notation for
+# its functions, just to emphasize where the function comes from. But since
+# the namespace then exists, we ensure that generics are properly dispatched.
+
+library(msa)  # load the msa package namespace
+
 msaT <- new("MsaAAMultipleAlignment", # create new MsaAAMultipleAlignment object
-            unmasked = x,             # "unmasked" slot takes an AASringSet
+            unmasked = x,             # "unmasked" slot takes an AAStringSet
            version = "T-Coffee",     # "version" slot takes a string
            params = list(),          # "params" takes a list(), we leave the
                                      #   list empty, but we could add the
@@ -309,18 +319,18 @@ str(msaT)
 msaT # Now we have fabricated an msaAAMultipleAlignment object, and we can
     # use the msa package functions on it

-msaTScores <- msaConservationScore(msaT, substitutionMatrix = BLOSUM62)
+msaTScores <- msa::msaConservationScore(msaT, substitutionMatrix = BLOSUM62)

 # ==   4.2  More alignments  ===================================================

 # Next, we calculate alignments with msa's two other alignment options:
 # CLUSTAL Omega
-(msaO <- msaClustalOmega( MBP1set, order = "aligned"))
-msaOScores <- msaConservationScore(msaO, substitutionMatrix = BLOSUM62)
+(msaO <- msa::msaClustalOmega( MBP1set, order = "aligned"))
+msaOScores <- msa::msaConservationScore(msaO, substitutionMatrix = BLOSUM62)

 # CLUSTAL W
-(msaW <- msaClustalW( MBP1set, order = "aligned"))
-msaWScores <- msaConservationScore(msaW, substitutionMatrix = BLOSUM62)
+(msaW <- msa::msaClustalW( MBP1set, order = "aligned"))
+msaWScores <- msa::msaConservationScore(msaW, substitutionMatrix = BLOSUM62)


 # ==   4.3  Computing comparison metrics  ======================================
@@ -454,7 +464,7 @@ legend("bottomright",

 # Your alignment is going to be different from mine, due to the inclusion of
 # MYSPE - but what I see is that MUSCLE gives the highest score overall, and
-# achieves this with fewer indels then most, and the lowest number of gaps of
+# achieves this with fewer indels than most, and the lowest number of gaps of
 # all algorithms.

 # To actually compare regions of alignments, we need to align alignments.
@@ -470,12 +480,8 @@ legend("bottomright",
 # to compare two MSAs with each other, by aligning them. The algorithm is
 # provided by the DECIPHER package.

-if (! require(DECIPHER, quietly=TRUE)) {
-  if (! exists("biocLite")) {
-    source("https://bioconductor.org/biocLite.R")
-  }
-  biocLite("DECIPHER")
-  library(DECIPHER)
+if (! requireNamespace("DECIPHER", quietly=TRUE)) {
+  BiocManager::install("DECIPHER")
 }
 # Package information:
 #  library(help = DECIPHER)       # basic information
@@ -484,14 +490,14 @@ if (! require(DECIPHER, quietly=TRUE)) {

 # AlignProfiles() takes two AAStringSets as input. Let's compare the MUSCLE and
 # CLUSTAL W alignments: we could do this directly ...
-AlignProfiles(msaW@unmasked, msaM@unmasked)
+DECIPHER::AlignProfiles(msaW@unmasked, msaM@unmasked)

 # But for ease of comparison, we'll reorder the sequences of the CLUSTAL W
 # alignment into the same order as the MUSCLE alignment:
 m <- as.character(msaM)
 w <- as.character(msaW)[names(m)]

-(ppa <- AlignProfiles(AAStringSet(w), AAStringSet(m)))
+(ppa <- DECIPHER::AlignProfiles(msa::AAStringSet(w), msa::AAStringSet(m)))

 # Conveniently, AlignProfiles() returns an AAStringSet, so we can use our
 # writeALN function to show it. Here is an arbitrary block, from somewhere in
@@ -533,8 +539,8 @@ writeALN(ppa2, range = c(800, 960))
 # Again, go explore, and get a sense of what's going on. You may find that
 # CLUSTAL W has a tendency to insert short gaps all over the alignment, whereas
 # MUSCLE keeps indels in blocks. CLUSTAL's behaviour is exactly what I would
-# expect from an algorithm that builds alignments from pairwise local
-# alignments, without global refinement.
+# expect from an algorithm that builds alignments incrementally from pairwise
+# local alignments, without global refinement.


 # =    6  Sequence Logos  ======================================================
@@ -602,16 +608,15 @@ writeALN(fetchMSAmotif(msaM, wing))
 # ggseqlogo written by by Omar Waghi, a former UofT BCB student who is now at
 # the EBI.

-if (! require(ggseqlogo, quietly=TRUE)) {
+if (! requireNamspace("ggseqlogo", quietly=TRUE)) {
  install.packages(("ggseqlogo"))
-  library(ggseqlogo)
 }
 # Package information:
 #  library(help=ggseqlogo)       # basic information
 #  browseVignettes("ggseqlogo")  # available vignettes
 #  data(package = "ggseqlogo")   # available datasets

-ggseqlogo(as.character(motifAli))
+ggseqlogo::ggseqlogo(as.character(motifAli))



--- a/BIN-ALI-Optimal_sequence_alignment.R
+++ b/BIN-ALI-Optimal_sequence_alignment.R
@@ -3,12 +3,14 @@
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-ALI-Optimal_sequence_alignment unit.
 #
-# Version:  1.4
+# Version:  1.5
 #
-# Date:     2017  09   -   2017  11
+# Date:     2017  09   -   2019  01
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
+#           1.5    Change from require() to requireNamespace(),
+#                    use <package>::<function>() idiom throughout
 #           1.4    Pull s2c() from seqinr package, rather then loading the
 #                    entire library.
 #           1.3    Updated confirmation task with correct logic
@@ -32,29 +34,32 @@


 #TOC> ==========================================================================
-#TOC>
-#TOC>   Section  Title                                                Line
-#TOC> --------------------------------------------------------------------
-#TOC>   1        Prepare                                                52
-#TOC>   2        Biostrings Pairwise Alignment                          66
-#TOC>   2.1      Optimal global alignment                               84
-#TOC>   2.2      Optimal local alignment                               147
-#TOC>   3        APSES Domain annotation by alignment                  171
-#TOC>   4        Update your database script                           252
-#TOC>   4.1      Preparing an annotation file ...                      258
-#TOC>   4.1.1    If you HAVE NOT done the BIN-FUNC-Annotation unit     260
-#TOC>   4.1.2    If you HAVE done the BIN-FUNC-Annotation unit         303
-#TOC>   4.2      Execute and Validate                                  327
-#TOC>
+#TOC> 
+#TOC>   Section  Title                                                      Line
+#TOC> --------------------------------------------------------------------------
+#TOC>   1        Prepare                                                      54
+#TOC>   2        Biostrings Pairwise Alignment                                71
+#TOC>   2.1        Optimal global alignment                                   89
+#TOC>   2.2        Optimal local alignment                                   152
+#TOC>   3        APSES Domain annotation by alignment                        176
+#TOC>   4        Update your database script                                 257
+#TOC>   4.1        Preparing an annotation file ...                          263
+#TOC>   4.1.1          If you HAVE NOT done the BIN-FUNC-Annotation unit     265
+#TOC>   4.1.2          If you HAVE done the BIN-FUNC-Annotation unit         308
+#TOC>   4.2        Execute and Validate                                      332
+#TOC> 
 #TOC> ==========================================================================


 # =    1  Prepare  =============================================================

-# To simplify code, we pull the function s2c(x) from the seqinr package,
-# rather than using the lengthier idiom unlist(strsplit(x, "").
-# This assumes that the seqinr package has been installed previously.
-s2c <- seqinr::s2c
+if (! requireNamespace("seqinr", quietly=TRUE)) {
+  install.packages("seqinr")
+}
+# You can get package information with the following commands:
+# library(help = seqinr)       # basic information
+# browseVignettes("seqinr")    # available vignettes
+# data(package = "seqinr")     # available datasets


 # You need to recreate the protein database that you have constructed in the
@@ -66,13 +71,13 @@ source("makeProteinDB.R")
 # =    2  Biostrings Pairwise Alignment  =======================================


-if (!require(Biostrings, quietly=TRUE)) {
-  if (! exists("biocLite")) {
-    source("https://bioconductor.org/biocLite.R")
-  }
-  biocLite("Biostrings")
-  library(Biostrings)
+if (!requireNamespace("BiocManager", quietly=TRUE)) {
+  install.packages("BiocManager")
 }
+if (!requireNamespace("Biostrings", quietly=TRUE)) {
+  BiocManager::install("Biostrings")
+}
+# Package information:
 #  library(help = Biostrings)       # basic information
 #  browseVignettes("Biostrings")    # available vignettes
 #  data(package = "Biostrings")     # available datasets
@@ -88,15 +93,15 @@ if (!require(Biostrings, quietly=TRUE)) {

 # First: make AAString objects ...
 sel <- myDB$protein$name == "MBP1_SACCE"
-aaMBP1_SACCE <- AAString(myDB$protein$sequence[sel])
+aaMBP1_SACCE <- Biostrings::AAString(myDB$protein$sequence[sel])

 sel <- myDB$protein$name == paste("MBP1_", biCode(MYSPE), sep = "")
-aaMBP1_MYSPE <-   AAString(myDB$protein$sequence[sel])
+aaMBP1_MYSPE <-   Biostrings::AAString(myDB$protein$sequence[sel])

 ?pairwiseAlignment
 # ... and align.
 # Global optimal alignment with end-gap penalties is default.
-ali1 <-  pairwiseAlignment(
+ali1 <-  Biostrings::pairwiseAlignment(
  aaMBP1_SACCE,
  aaMBP1_MYSPE,
  substitutionMatrix = "BLOSUM62",
@@ -108,7 +113,7 @@ str(ali1)  # ... it's complicated
 # This is a Biostrings alignment object. But we can use Biostrings functions to
 # tame it:
 ali1
-writePairwiseAlignments(ali1)   # That should look familiar
+Biostrings::writePairwiseAlignments(ali1)   # That should look familiar

 # And we can make the internal structure work for us  (@ is for classes as
 # $ is for lists ...)
@@ -137,9 +142,9 @@ sum(s2c(as.character(ali1@pattern)) ==
 percentID <- function(al) {
  # returns the percent-identity of a Biostrings alignment object
  return(100 *
-           sum(seqinr::s2c(as.character(al@pattern)) ==
-               seqinr::s2c(as.character(al@subject))) /
-           nchar(al@pattern))
+         sum(seqinr::s2c(as.character(al@pattern)) ==
+             seqinr::s2c(as.character(al@subject))) /
+         nchar(al@pattern))
 }

 percentID(ali1)
@@ -147,7 +152,7 @@ percentID(ali1)
 # ==   2.2  Optimal local alignment  ===========================================

 # Compare with local optimal alignment (like EMBOSS Water)
-ali2 <-  pairwiseAlignment(
+ali2 <-  Biostrings::pairwiseAlignment(
  aaMBP1_SACCE,
  aaMBP1_MYSPE,
  type = "local",
@@ -155,9 +160,9 @@ ali2 <-  pairwiseAlignment(
  gapOpening = 50,
  gapExtension = 10)

-writePairwiseAlignments(ali2)   # This has probably only aligned the N-terminal
-                                # DNA binding domain - but that one has quite
-                                # high sequence identity:
+Biostrings::writePairwiseAlignments(ali2)
+# This has probably only aligned the N-terminal DNA binding domain - but that
+# one has quite high sequence identity:
 percentID(ali2)

 # == TASK: ==
@@ -209,14 +214,14 @@ myDB$annotation[myDB$annotation$ID == proID &
 # the sequence, and used the start and end coordinates to extract a substring.

 # Let's convert this to an AAstring and assign it:
-aaMB1_SACCE_APSES <- AAString(apses)
+aaMB1_SACCE_APSES <- Biostrings::AAString(apses)

 # Now let's align these two sequences of very different length without end-gap
 # penalties using the "overlap" type. "overlap" turns the
 # end-gap penalties off and that is crucially important since
 # the sequences have very different length.

-aliApses <-  pairwiseAlignment(
+aliApses <-  Biostrings::pairwiseAlignment(
  aaMB1_SACCE_APSES,
  aaMBP1_MYSPE,
  type = "overlap",
@@ -228,7 +233,7 @@ aliApses <-  pairwiseAlignment(
 # homologous, and have (almost) no indels. The entire "pattern"
 # sequence from QIYSAR ... to ... KPLFDF  should be matched
 # with the "query". Is this correct?
-writePairwiseAlignments(aliApses)
+Biostrings::writePairwiseAlignments(aliApses)

 # If this is correct, you can extract the matched sequence from
 # the alignment object. The syntax is a bit different from what
@@ -257,7 +262,7 @@ aliApses@subject@range@start + aliApses@subject@range@width - 1

 # ==   4.1  Preparing an annotation file ...  ==================================
 #
-# ===  4.1.1  If you HAVE NOT done the BIN-FUNC-Annotation unit
+# ===   4.1.1  If you HAVE NOT done the BIN-FUNC-Annotation unit
 #
 #
 #   You DON'T already have a file called "<MYSPE>-Annotations.json" in the
@@ -300,7 +305,7 @@ aliApses@subject@range@start + aliApses@subject@range@width - 1
 # Then SKIP the next section.
 #
 #
-# ===  4.1.2  If you HAVE done the BIN-FUNC-Annotation unit
+# ===   4.1.2  If you HAVE done the BIN-FUNC-Annotation unit    
 #
 #
 #   You DO already have a file called "<MYSPE>-Annotations.json" in the
--- a/BIN-ALI-Similarity.R
+++ b/BIN-ALI-Similarity.R
@@ -3,12 +3,14 @@
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-ALI-Similarity unit.
 #
-# Version:  1.0
+# Version:  1.1
 #
-# Date:     2017  10  20
+# Date:     2017  10  -  2019  01
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
+#           1.1    Change from require() to requireNamespace(),
+#                      use <package>::<function>() idiom throughout
 #           1.0    Refactored for 2017; add aaindex, ternary plot.
 #           0.1    First code copied from 2016 material.
 #
@@ -27,11 +29,11 @@

 #TOC> ==========================================================================
 #TOC> 
-#TOC>   Section  Title                    Line
-#TOC> ----------------------------------------
-#TOC>   1        Amino Acid Properties      43
-#TOC>   2        Mutation Data matrix      163
-#TOC>   3        Background score          205
+#TOC>   Section  Title                          Line
+#TOC> ----------------------------------------------
+#TOC>   1        Amino Acid Properties            41
+#TOC>   2        Mutation Data matrix            158
+#TOC>   3        Background score                199
 #TOC> 
 #TOC> ==========================================================================

@@ -41,9 +43,8 @@
 # A large collection of amino acid property tables is available via the seqinr
 # package:

-if (!require(seqinr)) {
+if (! requireNamespace("seqinr", quietly=TRUE)) {
  install.packages("seqinr")
-  library(seqinr)
 }
 # Package information:
 #  library(help = seqinr)       # basic information
@@ -127,9 +128,8 @@ text(Y$I, K$I, names(Y$I))
 # plots are in general unintuitive and hard to interpret. One alternative is a
 # so-called "ternary plot":

-if (!require(ggtern)) {
+if (! requireNamespace("ggtern", quietly=TRUE)) {
  install.packages("ggtern")
-  library(ggtern)
 }
 # Package information:
 #  library(help = ggtern)       # basic information
@@ -145,12 +145,11 @@ myDat <- data.frame("phi" = 0.9*(((Y$I-min(Y$I))/(max(Y$I)-min(Y$I))))+0.05,
                    stringsAsFactors = FALSE)
 rownames(myDat) <- names(Y$I)

-ggtern(data = myDat,
-       aes(x = vol,
-           y = phi,
-           z = pK,
-           label = rownames(myDat))) +
-  geom_text()
+ggtern::ggtern(data = myDat,
+               ggplot2::aes(x = vol,
+                   y = phi,
+                   z = pK,
+                   label = rownames(myDat))) + ggplot2::geom_text()

 # This results in a mapping of amino acids relative to each other that is
 # similar to the Venn diagram you have seen in the notes.
@@ -162,12 +161,11 @@ ggtern(data = myDat,

 # The Biostrings package contains the most common mutation data matrices.

-if (!require(Biostrings, quietly=TRUE)) {
-  if (! exists("biocLite")) {
-    source("https://bioconductor.org/biocLite.R")
-  }
-  biocLite("Biostrings")
-  library(Biostrings)
+if (! requireNamespace("BiocManager", quietly=TRUE)) {
+  install.packages("BiocManager")
+}
+if (! requireNamespace("Biostrings", quietly=TRUE)) {
+  BiocManager::install("Biostrings")
 }
 # Package information:
 #  library(help=Biostrings)       # basic information
@@ -200,7 +198,9 @@ BLOSUM62["W", "R"]

 # =    3  Background score  ====================================================

-# The mutation data matrix is designed to give high scores to homologous sequences, low scores to non-homologous sequences. What score on average should we expect for a random sequence?
+# The mutation data matrix is designed to give high scores to homologous
+# sequences, low scores to non-homologous sequences. What score on average
+# should we expect for a random sequence?

 # If we sample amino acid pairs at random, we will get a score that is the
 # average of the individual pairscores in the matrix. Omitting the ambiguity
@@ -219,12 +219,12 @@ sum(BLOSUM62[1:20, 1:20])/400
 # PDB ID 3FG7 - a villin headpiece structure with a large amount of
 # low-complexity amino acid sequence ...

-aa3FG7 <- readAAStringSet("./data/3FG7.fa")[[1]]
+aa3FG7 <- Biostrings::readAAStringSet("./data/3FG7.fa")[[1]]

 # ... and the FASTA file for the E. coli OmpG outer membrane porin (PDB: 2F1C)
 # with an exceptionally high percentage of hydrophobic residues.

-aa2F1C <- readAAStringSet("./data/2F1C.fa")[[1]]
+aa2F1C <- Biostrings::readAAStringSet("./data/2F1C.fa")[[1]]

 # Here is a function that takes two sequences and
 # returns their average pairscore.
--- a/BIN-Data_integration.R
+++ b/BIN-Data_integration.R
@@ -3,12 +3,14 @@
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-Data_integration unit.
 #
-# Version:  1.0.1
+# Version:  1.1
 #
-# Date:     2018  10  30
+# Date:     2018  10  -  2019  01
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
+#           1.1    Change from require() to requireNamespace(),
+#                      use <package>::<function>() idiom throughout
 #           1.0.1  Bugfix: UniProt ID Mapping service API change
 #           1.0    First live version
 #
@@ -31,8 +33,8 @@
 #TOC> 
 #TOC>   Section  Title                             Line
 #TOC> -------------------------------------------------
-#TOC>   1        Identifier mapping                  40
-#TOC>   2        Cross-referencing tables           164
+#TOC>   1        Identifier mapping                  42
+#TOC>   2        Cross-referencing tables           165
 #TOC> 
 #TOC> ==========================================================================

@@ -54,9 +56,8 @@

 # To begin, we load  httr, which supports sending and receiving data via the
 # http protocol, just like a Web browser.
-if (!require(httr, quietly=TRUE)) {
-  install.packages("httr")
-  library(httr)
+if (! requireNamespace("httpr", quietly=TRUE)) {
+  install.packages("httpr")
 }
 # Package information:
 #  library(help = httr)       # basic information
@@ -75,22 +76,22 @@ myQueryIDs <- "NP_010227 NP_00000 NP_011036"
 # of the request. GET() and POST() are functions from httr.

 URL <- "https://www.uniprot.org/mapping/"
-response <- POST(URL,
-                 body = list(from = "P_REFSEQ_AC",   # Refseq Protein
-                             to = "ACC",             # UniProt ID
-                             format = "tab",
-                             query = myQueryIDs))
+response <- httr::POST(URL,
+                       body = list(from = "P_REFSEQ_AC",   # Refseq Protein
+                                   to = "ACC",             # UniProt ID
+                                   format = "tab",
+                                   query = myQueryIDs))

-cat(content(response))
+cat(httr::content(response))

 # We need to check the status code - if it is not 200, an error ocurred and we
 # can't process the result:
-status_code(response)
+httr::status_code(response)

 # If the query is successful, tabbed text is returned. We can assign that to a
 # data frame. Note that we use textConnection() to read data directly from a char object, which can go in the spot where read.delim() expects a file-name argument.

-myMappedIDs <- read.delim(file = textConnection(content(response)),
+myMappedIDs <- read.delim(file = textConnection(httr::content(response)),
                          sep = "\t",
                          stringsAsFactors = FALSE)
 myMappedIDs
@@ -132,14 +133,14 @@ myIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
  #    for IDs that are not mapped.

  URL <- "https://www.uniprot.org/uploadlists/"
-  response <- POST(URL,
-                   body = list(from = mapFrom,
-                               to = mapTo,
-                               format = "tab",
-                               query = s))
+  response <- httr::POST(URL,
+                         body = list(from = mapFrom,
+                                     to = mapTo,
+                                     format = "tab",
+                                     query = s))

-  if (status_code(response) == 200) { # 200: oK
-    myMap <- read.delim(file = textConnection(content(response)),
+  if (httr::status_code(response) == 200) { # 200: oK
+    myMap <- read.delim(file = textConnection(httr::content(response)),
                        sep = "\t",
                        stringsAsFactors = FALSE)
    myMap <- myMap[ , c(1,3)]
@@ -148,7 +149,7 @@ myIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
    myMap <- data.frame()
    warning(paste("No uniProt ID mapping returned:",
                  "server sent status",
-                  status_code(response)))
+                  httr::status_code(response)))
  }

  return(myMap)
@@ -168,7 +169,8 @@ myIDmap("NP_010227 NP_011036 NP_012881 NP_013729 NP_012165")
 # Nomenclature commission. How do we map one set of identifiers to another one?

 # The function to use is match().
-# Here is a tiny set of identifiers taken from a much larger table to illustrate the principle:
+# Here is a tiny set of identifiers taken from a much larger table to
+# illustrate the principle:
 #

 myIDs <- data.frame(uID =   c("P38903", "P31383", "P47177", "P47096", "Q07747",
--- a/BIN-FUNC-Semantic_similarity.R
+++ b/BIN-FUNC-Semantic_similarity.R
@@ -3,12 +3,15 @@
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-FUNC_Semantic_similarity unit.
 #
-# Version:  1.0
+# Version:  1.1
 #
-# Date:     2017  11  12
+# Date:     2017  11  -  2019  01
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
+#           1.1    Change from require() to requireNamespace(),
+#                      use <package>::<function>() idiom throughout,
+#                      use Biocmanager:: not biocLite()
 #           1.0    New code.
 #
 #
@@ -25,64 +28,70 @@


 #TOC> ==========================================================================
-#TOC>
-#TOC>   Section  Title                                          Line
-#TOC> --------------------------------------------------------------
-#TOC>   1        Preparations: Packages, AnnotationDB, Setup      39
-#TOC>   2        Fetch GO Annotations                             89
-#TOC>   3        Semantic Similarities                            98
-#TOC>   4        GO Term Enrichment in Gene Sets                 116
-#TOC>
+#TOC> 
+#TOC>   Section  Title                                                Line
+#TOC> --------------------------------------------------------------------
+#TOC>   1        Preparations: Packages, AnnotationDB, Setup            42
+#TOC>   2        Fetch GO Annotations                                   98
+#TOC>   3        Semantic Similarities                                 107
+#TOC>   4        GO Term Enrichment in Gene Sets                       125
+#TOC> 
 #TOC> ==========================================================================


 # =    1  Preparations: Packages, AnnotationDB, Setup  =========================

+if (! requireNamespace("BiocManager", quietly = TRUE)) {
+  install.packages("BiocManager")
+}

 # GOSim is an R-package in the Bioconductor project.
-if (! require(GOSim, quietly=TRUE)) {
-  if (! exists("biocLite")) {
-    source("https://bioconductor.org/biocLite.R")
-  }
-  biocLite("GOSim")
-  library(GOSim)
+if (! requireNamespace("GOSim", quietly = TRUE)) {
+  BiocManager::install("GOSim")
 }
 # Package information:
 #  library(help = GOSim)       # basic information
 #  browseVignettes("GOSim")    # available vignettes
 #  data(package = "GOSim")     # available datasets

+# GOSim makes extensive assumptions about loaded packages, and many base
+# methods are masked. We will thus use library(GOSim) to load it
+# in its entirety and with all packages it depends on. We will still use
+# the <package>::<function>() syntax in the code below, but this now serves
+# more of a didactic purpose, rather than actual syntax requirements.
+
+library(GOSim)

 # GOSim loads human annotations by default. We load yeast annotations instead...
-if (!require(org.Sc.sgd.db, quietly=TRUE)) {
-  if (! exists("biocLite")) {
-    source("https://bioconductor.org/biocLite.R")
-  }
-  biocLite("org.Sc.sgd.db")
-  library(org.Sc.sgd.db)
+if (! requireNamespace("org.Sc.sgd.db", quietly = TRUE)) {
+  BiocManager::install("org.Sc.sgd.db")
 }

+# Bioconductor annotation packages won't work stably unless we actually load
+# them:
+library(org.Sc.sgd.db)
+
 # org.Sc.sgd.db is a Bioconductor annotation database curated by SGD. Such
 # databases exist for all model organisms. It's a kind of a fancy data frame
 # from which we can get annotations by rows (genes) with the keys() funtion ...
-keys(org.Sc.sgd.db)[1500:1510]
+AnnotationDbi::keys(org.Sc.sgd.db)[1500:1510]

 # ... and the types of available annotations with the columns() function
-columns(org.Sc.sgd.db)
+AnnotationDbi::columns(org.Sc.sgd.db)

 # Note that one of the columns is "GO" ... and we load that into the
 # datastructures used by GOSim:

 # Choose GOterms to use
-setEvidenceLevel(evidences="all",
-                 organism=org.Sc.sgdORGANISM,
-                 gomap=org.Sc.sgdGO)
+GOSim::setEvidenceLevel(evidences = "all",
+                        organism = org.Sc.sgdORGANISM,
+                        gomap = org.Sc.sgdGO)

 # Use Biological Process ontology
-setOntology("BP", loadIC=FALSE)
+GOSim::setOntology("BP", loadIC = FALSE)

 # confirm that we loaded the correct ontology
-head(get("gomap", envir=GOSimEnv))
+head(get("gomap", envir = GOSimEnv))



@@ -92,7 +101,7 @@ head(get("gomap", envir=GOSimEnv))
 # All keys being used here are yeast systematic names.

 # Get one set of annotations
-getGOInfo(c("YDL056W"))  # Mbp1
+GOSim::getGOInfo(c("YDL056W"))  # Mbp1


 # =    3  Semantic Similarities  ===============================================
@@ -104,31 +113,31 @@ getGOInfo(c("YDL056W"))  # Mbp1
 # There are _many_ different metrics of term similarity implemented
 # in this package.

-                                                     # Mbp1 and...
-getGeneSim("YDL056W", "YLR182W", similarity = "OA")  # Swi6 - MCB complex
-getGeneSim("YDL056W", "YER111C", similarity = "OA")  # Swi4 - collaborators
-getGeneSim("YDL056W", "YBR160W", similarity = "OA")  # Cdc28 - mediator
-getGeneSim("YDL056W", "YGR108W", similarity = "OA")  # Clb1 - antagonist
-getGeneSim("YDL056W", "YLR079W", similarity = "OA")  # Sic1 - antagonist
-getGeneSim("YDL056W", "YJL130C", similarity = "OA")  # Pgk1 - Gluconeogenesis
+                                                         # Mbp1 and...
+GOSim::getGeneSim("YDL056W","YLR182W",similarity = "OA") # Swi6 - MCB complex
+GOSim::getGeneSim("YDL056W","YER111C",similarity = "OA") # Swi4 - collaborators
+GOSim::getGeneSim("YDL056W","YBR160W",similarity = "OA") # Cdc28 - mediator
+GOSim::getGeneSim("YDL056W","YGR108W",similarity = "OA") # Clb1 - antagonist
+GOSim::getGeneSim("YDL056W","YLR079W",similarity = "OA") # Sic1 - antagonist
+GOSim::getGeneSim("YDL056W","YJL130C",similarity = "OA") # Pgk1 - Gluconeogenesis


 # =    4  GO Term Enrichment in Gene Sets  =====================================


-# Calculating GO term enrichment in gene sets is done with the topGO package.
-if (! require(topGO, quietly=TRUE)) {
-  if (! exists("biocLite")) {
-    source("https://bioconductor.org/biocLite.R")
-  }
-  biocLite("topGO")
-  library(topGO)
+# Calculating GO term enrichment in gene sets is done with the Bioconductor
+# topGO package.
+if (! requireNamespace("topGO", quietly = TRUE)) {
+  BiocManager::install("topGO")
 }
 # Package information:
 #  library(help = topGO)       # basic information
 #  browseVignettes("topGO")    # available vignettes
 #  data(package = "topGO")     # available datasets

+# Once again - assumptions are made by GOsim that require us to load the
+# topGO package wholesale:
+library(topGO)

 # Let's define a gene set: GOterm enrichment for G1/S switch activators:
 mySet <- c("YFR028C", # Cdc14
@@ -141,7 +150,7 @@ mySet <- c("YFR028C", # Cdc14
           "YPL256C", # Cln2
           "YAL040C") # Cln3

-allGenes <- keys(org.Sc.sgd.db)
+allGenes <- AnnotationDbi::keys(org.Sc.sgd.db)
 allGenes <- allGenes[grep("^Y", allGenes)]  # This is the context against which
                                            # we define enrichment

@@ -164,9 +173,9 @@ setdiff(fullSet, mySet)   # These are annotated to that term but not in mySet.

 # What are these genes?
 # Select annotations from the annotation database:
-select(org.Sc.sgd.db,
-       keys = setdiff(fullSet, mySet),
-       columns = c("COMMON", "DESCRIPTION"))
+AnnotationDbi::select(org.Sc.sgd.db,
+                      keys = setdiff(fullSet, mySet),
+                      columns = c("COMMON", "DESCRIPTION"))

 # Note that these annotations are partially redundant to several different
 # aliases of the same three genes.
--- a/BIN-PHYLO-Data_preparation.R
+++ b/BIN-PHYLO-Data_preparation.R
@@ -3,12 +3,15 @@
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-PHYLO-Data_preparation unit.
 #
-# Version:  1.0
+# Version:  1.1
 #
-# Date:     2017  10  31
+# Date:     2017  10  -  2019  01
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
+#           1.1    Change from require() to requireNamespace(),
+#                      use <package>::<function>() idiom throughout,
+#                      use Biocmanager:: not biocLite()
 #           1.0    First 2017 version
 #           0.1    First code copied from 2016 material.
 #
@@ -26,15 +29,15 @@


 #TOC> ==========================================================================
-#TOC>
-#TOC>   Section  Title                               Line
-#TOC> ---------------------------------------------------
-#TOC>   1        Preparations                          41
-#TOC>   2        Fetching sequences                    78
-#TOC>   3        Multiple Sequence Alignment          119
-#TOC>   4        Reviewing and Editing Alignments     138
-#TOC>   4.1      Masking workflow                     154
-#TOC>
+#TOC> 
+#TOC>   Section  Title                                     Line
+#TOC> ---------------------------------------------------------
+#TOC>   1        Preparations                                44
+#TOC>   2        Fetching sequences                          76
+#TOC>   3        Multiple Sequence Alignment                117
+#TOC>   4        Reviewing and Editing Alignments           136
+#TOC>   4.1        Masking workflow                         152
+#TOC> 
 #TOC> ==========================================================================


@@ -49,12 +52,11 @@ source("makeProteinDB.R")

 # Load packages we need

-if (! require(Biostrings, quietly=TRUE)) {
-  if (! exists("biocLite")) {
-    source("https://bioconductor.org/biocLite.R")
-  }
-  biocLite("Biostrings")
-  library(Biostrings)
+if (! requireNamespace("BiocManager", quietly = TRUE)) {
+  install.packages("BiocManager")
+}
+if (! requireNamespace("Biostrings", quietly = TRUE)) {
+  BiocManager::install("Biostrings")
 }
 # Package information:
 #  library(help = Biostrings)       # basic information
@@ -62,12 +64,8 @@ if (! require(Biostrings, quietly=TRUE)) {
 #  data(package = "Biostrings")     # available datasets


-if (! require(msa, quietly=TRUE)) {
-  if (! exists("biocLite")) {
-    source("https://bioconductor.org/biocLite.R")
-  }
-  biocLite("msa")
-  library(msa)
+if (! requireNamespace("msa", quietly = TRUE)) {
+  BiocManager::install("msa")
 }
 # Package information:
 #  library(help = msa)       # basic information
@@ -123,8 +121,8 @@ tail(APSI)
 # the MSA algorithms in Biostrings.
 #

-APSESSet <- AAStringSet(APSI)
-APSESMsa <- msaMuscle(APSESSet, order = "aligned")
+APSESSet <- Biostrings::AAStringSet(APSI)
+APSESMsa <- msa::msaMuscle(APSESSet, order = "aligned")

 # Nb. msaMuscle() sometimes fails - reproducibly, but I am not sure why. If
 # that happens in your case, just use msaClustalOmega() instead.
--- a/BIN-PHYLO-Tree_analysis.R
+++ b/BIN-PHYLO-Tree_analysis.R
@@ -3,12 +3,15 @@
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-PHYLO-Tree_analysis unit.
 #
-# Version:  1.0.2
+# Version:  1.1
 #
-# Date:     2017  10  31
+# Date:     2017  10  -  2019  01
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
+#           1.1    Change from require() to requireNamespace(),
+#                      use <package>::<function>() idiom throughout,
+#                      use Biocmanager:: not biocLite()
 #           1.0.2  Typo in variable name, style changes
 #           1.0.1  Wrong section heading
 #           1.0    First 2017 version
@@ -31,11 +34,11 @@
 #TOC> 
 #TOC>   Section  Title                              Line
 #TOC> --------------------------------------------------
-#TOC>   1        Preparation and Tree Plot            43
-#TOC>   2        Tree Analysis                        82
-#TOC>   2.1        Rooting Trees                     141
-#TOC>   2.2        Rotating Clades                   187
-#TOC>   2.3        Computing tree distances          234
+#TOC>   1        Preparation and Tree Plot            46
+#TOC>   2        Tree Analysis                        86
+#TOC>   2.1        Rooting Trees                     145
+#TOC>   2.2        Rotating Clades                   190
+#TOC>   2.3        Computing tree distances          241
 #TOC> 
 #TOC> ==========================================================================

@@ -43,19 +46,17 @@
 # =    1  Preparation and Tree Plot  ===========================================


-if (!require(Rphylip, quietly=TRUE)) {
-  install.packages("Rphylip")
-  library(Rphylip)
+if (! requireNamespace("ape", quietly = TRUE)) {
+  install.packages("ape")
 }
 # Package information:
-#  library(help = Rphylip)       # basic information
-#  browseVignettes("Rphylip")    # available vignettes
-#  data(package = "Rphylip")     # available datasets
-
+#  library(help = ape)       # basic information
+#  browseVignettes("ape")    # available vignettes
+#  data(package = "ape")     # available datasets


 # Read the species tree that you have created at the phyloT Website:
-fungiTree <- read.tree("fungiTree.txt")
+fungiTree <- ape::read.tree("fungiTree.txt")

 plot(fungiTree)

@@ -73,7 +74,10 @@ for (i in seq_along(fungiTree$tip.label)) {

 # Plot the tree
 plot(fungiTree, cex = 1.0, root.edge = TRUE, no.margin = TRUE)
-nodelabels(text = fungiTree$node.label, cex = 0.6, adj = 0.2, bg = "#D4F2DA")
+ape::nodelabels(text = fungiTree$node.label,
+                cex = 0.6,
+                adj = 0.2,
+                bg = "#D4F2DA")
 # Note that you can use the arrow buttons in the menu above the plot to scroll
 # back to plots you have created earlier - so you can reference back to the
 # species tree.
@@ -91,10 +95,10 @@ nodelabels(text = fungiTree$node.label, cex = 0.6, adj = 0.2, bg = "#D4F2DA")
 # trees in Newick format and visualize them elsewhere.

 # The "phylo" class object is one of R's "S3" objects and methods to plot and
-# print it have been defined with the Rphylip package, and the package ape that
-# Rphylip has loaded. You can simply call plot(<your-tree>) and R knows what to
-# do with <your-tree> and how to plot it. The underlying function is
-# plot.phylo(), and documentation for its many options can by found by typing:
+# print it have been defined with the Rphylip package, and in ape. You can
+# simply call plot(<your-tree>) and R knows what to do with <your-tree> and how
+# to plot it. The underlying function is plot.phylo(), and documentation for its
+# many options can by found by typing:

 ?plot.phylo

@@ -125,40 +129,39 @@ apsTree$edge.length

 # show the node / edge and tip labels on a plot
 plot(apsTree)
-nodelabels()
-edgelabels()
-tiplabels()
+ape::nodelabels()
+ape::edgelabels()
+ape::tiplabels()

 # show the number of nodes, edges and tips
-Nnode(apsTree)
-Nedge(apsTree)
-Ntip(apsTree)
+ape::Nnode(apsTree)
+ape::Nedge(apsTree)
+ape::Ntip(apsTree)


 # Finally, write the tree to console in Newick format
-write.tree(apsTree)
+ape::write.tree(apsTree)

 # ==   2.1  Rooting Trees  =====================================================

 # In order to analyse the tree, it is helpful to root it first and reorder its
 # clades. Contrary to documentation, Rproml() returns an unrooted tree.

-is.rooted(apsTree)
+ape::is.rooted(apsTree)

-# You can root the tree with the command root() from the "ape" package. ape is
-# automatically installed and loaded with Rphylip.
+# You can root the tree with the command root() from the "ape" package.

 plot(apsTree)

 # add labels for internal nodes and tips
-nodelabels(cex = 0.5, frame = "circle")
-tiplabels(cex = 0.5, frame = "rect")
+ape::nodelabels(cex = 0.5, frame = "circle")
+ape::tiplabels(cex = 0.5, frame = "rect")

 # The outgroup of the tree is tip "11" in my sample tree, it may be a different
 # number in yours. Substitute the correct node number below for "outgroup".
-apsTree <- root(apsTree, outgroup = 11, resolve.root = TRUE)
+apsTree <- ape::root(apsTree, outgroup = 11, resolve.root = TRUE)
 plot(apsTree)
-is.rooted(apsTree)
+ape::is.rooted(apsTree)

 # This tree _looks_ unchanged, beacuse when the root trifurcation was resolved,
 # an edge of length zero was added to connect the MRCA (Most Recent Common
@@ -172,7 +175,7 @@ apsTree$edge.length
 # overlap.
 apsTree$edge.length[1] <- 0.1
 plot(apsTree, cex = 0.7)
-nodelabels(text = "MRCA", node = 12, cex = 0.5, adj = 0.1, bg = "#ff8866")
+ape::nodelabels(text = "MRCA", node = 12, cex = 0.5, adj = 0.1, bg = "#ff8866")


 # This procedure does however not assign an actual length to a root edge, and
@@ -181,7 +184,7 @@ nodelabels(text = "MRCA", node = 12, cex = 0.5, adj = 0.1, bg = "#ff8866")

 apsTree$root.edge <- mean(apsTree$edge.length) * 1.5
 plot(apsTree, cex = 0.7, root.edge = TRUE)
-nodelabels(text = "MRCA", node = 12, cex = 0.5, adj = 0.8, bg = "#ff8866")
+ape::nodelabels(text = "MRCA", node = 12, cex = 0.5, adj = 0.8, bg = "#ff8866")


 # ==   2.2  Rotating Clades  ===================================================
@@ -192,9 +195,9 @@ nodelabels(text = "MRCA", node = 12, cex = 0.5, adj = 0.8, bg = "#ff8866")
 # We can either rotate around individual internal nodes ...
 layout(matrix(1:2, 1, 2))
 plot(apsTree, no.margin = TRUE, root.edge = TRUE)
-nodelabels(node = 17, cex = 0.7, bg = "#ff8866")
-plot(rotate(apsTree, node = 17), no.margin = TRUE, root.edge = TRUE)
-nodelabels(node = 17, cex = 0.7, bg = "#88ff66")
+ape::nodelabels(node = 13, cex = 0.7, bg = "#ff8866")
+plot(ape::rotate(apsTree, node = 13), no.margin = TRUE, root.edge = TRUE)
+ape::nodelabels(node = 13, cex = 0.7, bg = "#88ff66")
 # Note that the species at the bottom of the clade descending from node
 # 17 is now plotted at the top.
 layout(matrix(1), widths = 1.0, heights = 1.0)
@@ -211,11 +214,15 @@ nOrg <- length(apsTree$tip.label)
 layout(matrix(1:2, 1, 2))
 plot(fungiTree,
     no.margin = TRUE, root.edge = TRUE)
-nodelabels(text = fungiTree$node.label, cex = 0.5, adj = 0.2, bg = "#D4F2DA")
+ape::nodelabels(text = fungiTree$node.label,
+                cex = 0.5,
+                adj = 0.2,
+                bg = "#D4F2DA")

-plot(rotateConstr(apsTree, apsTree$tip.label[nOrg:1]),
-     no.margin = TRUE, root.edge = TRUE)
-add.scale.bar(length = 0.5)
+plot(ape::rotateConstr(apsTree, apsTree$tip.label[nOrg:1]),
+     no.margin = TRUE,
+     root.edge = TRUE)
+ape::add.scale.bar(length = 0.5)
 layout(matrix(1), widths = 1.0, heights = 1.0)

 # Task: Study the two trees and consider their similarities and differences.
@@ -236,9 +243,8 @@ layout(matrix(1), widths = 1.0, heights = 1.0)

 # Many superb phylogeny tools are contributed by the phangorn package.

-if (!require(phangorn, quietly=TRUE)) {
+if (! requireNamespace("phangorn", quietly = TRUE)) {
  install.packages("phangorn")
-  library(phangorn)
 }
 # Package information:
 #  library(help = phangorn)       # basic information
@@ -253,20 +259,20 @@ apsTree2$tip.label <- gsub("(MBP1_)|(KILA_)", "", apsTree2$tip.label)
 # phangorn provides several functions to compute tree-differences (and there
 # is a _whole_ lot of theory on how to compare trees). treedist() returns the
 # "symmetric difference"
-treedist(fungiTree, apsTree2, check.labels = TRUE)
+phangorn::treedist(fungiTree, apsTree2, check.labels = TRUE)

 # Numbers. What do they mean? How much more similar is our apsTree to the
 # (presumably) ground truth of fungiTree than a random tree would be?
-# The ape package (which was loaded with RPhylip) provides the function rtree()
+# The ape package provides the function rtree()
 # to compute random trees.

-rtree(n = length(apsTree2$tip.label),  # number of tips
-      rooted = TRUE,                   # we rooted the tree above,
-                                       #  and fungiTree is rooted anyway
-      tip.label = apsTree2$tip.label,  # use the apsTree2 labels
-      br = NULL)                       # don't generate branch lengths since
-                                       #   fungiTree has none, so we can't
-                                       #   compare them anyway.
+ape::rtree(n = length(apsTree2$tip.label), # number of tips
+          rooted = TRUE,                   # we rooted the tree above,
+                                           #  and fungiTree is rooted anyway
+          tip.label = apsTree2$tip.label,  # use the apsTree2 labels
+          br = NULL)                       # don't generate branch lengths since
+                                           #   fungiTree has none, so we can't
+                                           #   compare them anyway.

 # Let's compute some random trees this way, calculate the distances to
 # fungiTree, and then compare the values we get for apsTree2. The random
@@ -278,17 +284,17 @@ colnames(myTreeDistances) <- c("symm", "path")

 set.seed(112358)
 for (i in 1:N) {
-  xTree <- rtree(n = length(apsTree2$tip.label),
-                 rooted = TRUE,
-                 tip.label = apsTree2$tip.label,
-                 br = NULL)
-  myTreeDistances[i, ] <- treedist(fungiTree, xTree)
+  xTree <- ape::rtree(n = length(apsTree2$tip.label),
+                      rooted = TRUE,
+                      tip.label = apsTree2$tip.label,
+                      br = NULL)
+  myTreeDistances[i, ] <- phangorn::treedist(fungiTree, xTree)
 }
 set.seed(NULL)                      # reset the random number generator

 table(myTreeDistances[, "symm"])

-(symmObs <- treedist(fungiTree, apsTree2)[1])
+(symmObs <- phangorn::treedist(fungiTree, apsTree2)[1])

 # Random events less-or-equal to observation, divided by total number of
 # events gives us the empirical p-value.
@@ -298,7 +304,7 @@ cat(sprintf("\nEmpirical p-value for symmetric diff. of observed tree is %1.4f\n
 hist(myTreeDistances[, "path"],
     col = "aliceblue",
     main = "Distances of random Trees to fungiTree")
-(pathObs <- treedist(fungiTree, apsTree2)[2])
+(pathObs <- phangorn::treedist(fungiTree, apsTree2)[2])
 abline(v = pathObs, col = "chartreuse")

 # Random events less-or-equal to observation, divided by total number of
--- a/BIN-PHYLO-Tree_building.R
+++ b/BIN-PHYLO-Tree_building.R
@@ -3,12 +3,14 @@
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-PHYLO-Tree_building unit.
 #
-# Version:  1.0
+# Version:  1.1
 #
 # Date:     2017  10.  31
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
+#           1.1    Change from require() to requireNamespace(),
+#                      use <package>::<function>() idiom throughout,
 #           1.0    First 2017 version
 #           0.1    First code copied from 2016 material.
 #
@@ -27,17 +29,17 @@


 #TOC> ==========================================================================
-#TOC>
-#TOC>   Section  Title                                   Line
-#TOC> -------------------------------------------------------
-#TOC>   1        Calculating Trees                         43
-#TOC>   1.1      PROMLPATH ...                             64
-#TOC>   1.1.1    ... on the Mac                            69
-#TOC>   1.1.2    ... on Windows                            80
-#TOC>   1.1.3    ... on Linux                              94
-#TOC>   1.1.4    Confirming PROMLPATH                      99
-#TOC>   1.2      Building a maximum likelihood tree       108
-#TOC>
+#TOC> 
+#TOC>   Section  Title                                       Line
+#TOC> -----------------------------------------------------------
+#TOC>   1        Calculating Trees                             46
+#TOC>   1.1        PROMLPATH ...                               66
+#TOC>   1.1.1          ... on the Mac                          71
+#TOC>   1.1.2          ... on Windows                          82
+#TOC>   1.1.3          ... on Linux                            96
+#TOC>   1.1.4          Confirming PROMLPATH                   101
+#TOC>   1.2        Building a maximum likelihood tree         110
+#TOC> 
 #TOC> ==========================================================================


@@ -50,9 +52,8 @@
 # After you have installed Phylip on your computer, install the R package that
 # provides an interface to the Phylip functions.

-if (!require(Rphylip, quietly=TRUE)) {
+if (! requireNamespace("Rphylip", quietly = TRUE)) {
  install.packages("Rphylip")
-  library(Rphylip)
 }
 # Package information:
 #  library(help = Rphylip)       # basic information
@@ -67,7 +68,7 @@ if (!require(Rphylip, quietly=TRUE)) {
 # on your computer Phylip has been installed and define the path
 # to the proml program that calculates a maximum-likelihood tree.

-# ===  1.1.1  ... on the Mac
+# ===   1.1.1  ... on the Mac                    
 # On the Mac, the standard installation places a phylip folder
 # in the /Applications directory. That folder contains all the
 # individual phylip programs as <name>.app files. These are not
@@ -78,7 +79,7 @@ if (!require(Rphylip, quietly=TRUE)) {
 # directly to that subdirectory to find the program it needs:
 # PROMLPATH <- "/Applications/phylip-3.695/exe/proml.app/Contents/MacOS"

-# ===  1.1.2  ... on Windows
+# ===   1.1.2  ... on Windows                    
 # On Windows you need to know where the programs have been installed, and you
 # need to specify a path that is correct for the Windows OS. Find the folder
 # that is named "exe", and right-click to inspect its properties. The path
@@ -92,12 +93,12 @@ if (!require(Rphylip, quietly=TRUE)) {
 # I have heard that your path must not contain spaces, and it is prudent to
 # avoid other special characters as well.

-# ===  1.1.3  ... on Linux
+# ===   1.1.3  ... on Linux                      
 # If you are running Linux I trust you know what to do. It's probably
 # something like
 # PROMLPATH <- "/usr/local/phylip-3.695/bin"

-# ===  1.1.4  Confirming PROMLPATH
+# ===   1.1.4  Confirming PROMLPATH              
 # Confirm that the settings are right.
 PROMLPATH                # returns the path
 list.dirs(PROMLPATH)     # returns the directories in that path
@@ -110,7 +111,7 @@ list.files(PROMLPATH)    # lists the files [1] "proml"   "proml.command"
 # Now read the mfa file you have saved in the BIB-PHYLO-Data_preparation unit,
 # as a "proseq" object with the read.protein() function of the RPhylip package:

-apsIn <- read.protein("APSESphyloSet.mfa")
+apsIn <- Rphylip::read.protein("APSESphyloSet.mfa")

 # ... and you are ready to build a tree.

@@ -125,7 +126,7 @@ apsIn <- read.protein("APSESphyloSet.mfa")
 # process will take us about 5 to 10 minutes. Run this, and anjoy a good cup
 # of coffee while you are waiting.

-apsTree <- Rproml(apsIn, path=PROMLPATH)
+apsTree <- Rphylip::Rproml(apsIn, path=PROMLPATH)

 # A quick first look:

--- a/BIN-PPI-Analysis.R
+++ b/BIN-PPI-Analysis.R
@@ -3,12 +3,15 @@
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-PPI-Analysis unit.
 #
-# Version:   1.0
+# Version:   1.1
 #
-# Date:     2017  08  - 2017 11
+# Date:     2017  08  -  2019  01
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
+#           1.1    Change from require() to requireNamespace(),
+#                      use <package>::<function>() idiom throughout,
+#                      use Biocmanager:: not biocLite()
 #           1.0    First live version
 #           0.1    First code copied from 2016 material.
 #
@@ -29,13 +32,13 @@
 #TOC> 
 #TOC>   Section  Title                                           Line
 #TOC> ---------------------------------------------------------------
-#TOC>   1        Setup and data                                    43
-#TOC>   2        Functional Edges in the Human Proteome            80
-#TOC>   2.1        Cliques                                        123
-#TOC>   2.2        Communities                                    164
-#TOC>   2.3        Betweenness Centrality                         178
-#TOC>   3        biomaRt                                          224
-#TOC>   4        Task for submission                              295
+#TOC>   1        Setup and data                                    46
+#TOC>   2        Functional Edges in the Human Proteome            82
+#TOC>   2.1        Cliques                                        125
+#TOC>   2.2        Communities                                    166
+#TOC>   2.3        Betweenness Centrality                         180
+#TOC>   3        biomaRt                                          226
+#TOC>   4        Task for submission                              296
 #TOC> 
 #TOC> ==========================================================================

@@ -45,9 +48,8 @@

 # Not surprisingly, the analysis of PPI networks needs iGraph:

-if (!require(igraph, quietly=TRUE)) {
+if (! requireNamespace("igraph", quietly = TRUE)) {
  install.packages("igraph")
-  library(igraph)
 }
 # Package information:
 #  library(help = igraph)       # basic information
@@ -88,9 +90,9 @@ head(STRINGedges)


 # Make a graph from this dataframe
-?graph_from_data_frame
+?igraph::graph_from_data_frame

-gSTR <- graph_from_data_frame(STRINGedges, directed = FALSE)
+gSTR <- igraph::graph_from_data_frame(STRINGedges, directed = FALSE)

 # CAUTION you DON'T want to plot a graph with 6,500 nodes and 50,000 edges -
 # layout of such large graphs is possible, but requires specialized code. Google
@@ -99,13 +101,13 @@ gSTR <- graph_from_data_frame(STRINGedges, directed = FALSE)

 # Of course simple computations on this graph are reasonably fast:

-compSTR <- components(gSTR)
+compSTR <- igraph::components(gSTR)
 summary(compSTR) # our graph is fully connected!

-hist(log(degree(gSTR)), col="#FEE0AF")
+hist(log(igraph::degree(gSTR)), col="#FEE0AF")
 # this actually does look rather scale-free

-(freqRank <- table(degree(gSTR)))
+(freqRank <- table(igraph::degree(gSTR)))
 plot(log10(as.numeric(names(freqRank)) + 1),
     log10(as.numeric(freqRank)), type = "b",
     pch = 21, bg = "#FEE0AF",
@@ -126,29 +128,29 @@ abline(regressionLine, col = "firebrick")
 # subgraph, i.e. a subgraph in which every node is connected to every other.
 # Biological complexes often appear as cliques in interaction graphs.

-clique_num(gSTR)
+igraph::clique_num(gSTR)
 # The largest clique has 63 members.

-(C <- largest_cliques(gSTR)[[1]])
+(C <- igraph::largest_cliques(gSTR)[[1]])

 # Pick one of the proteins and find out what this fully connected cluster of 63
 # proteins is (you can simply Google for any of the IDs). Is this expected?

 # Plot this ...
-R <- induced_subgraph(gSTR, C) # makes a graph from a selected set of vertices
+R <- igraph::induced_subgraph(gSTR, C) # a graph from a selected set of vertices

 # color the vertices along a color spectrum
-vCol <- rainbow(gorder(R)) # gorder(): order of a graph = number of nodes
+vCol <- rainbow(igraph::gorder(R)) # "order" of a graph == number of nodes

 # color the edges to have the same color as the originating node
 eCol <- character()
 for (i in seq_along(vCol)) {
-  eCol <- c(eCol, rep(vCol[i], gorder(R)))
+  eCol <- c(eCol, rep(vCol[i], igraph::gorder(R)))
 }

 oPar <- par(mar= rep(0,4)) # Turn margins off
 plot(R,
-     layout = layout_in_circle(R),
+     layout = igraph::layout_in_circle(R),
     vertex.size = 3,
     vertex.color = vCol,
     edge.color = eCol,
@@ -164,14 +166,14 @@ par(oPar)
 # ==   2.2  Communities  =======================================================

 set.seed(112358)                       # set RNG seed for repeatable randomness
-gSTRclusters <- cluster_infomap(gSTR)
+gSTRclusters <- igraph::cluster_infomap(gSTR)
 set.seed(NULL)                         # reset the RNG

-modularity(gSTRclusters) # ... measures how separated the different membership
-                         # types are from each other
-tMem <- table(membership(gSTRclusters))
+igraph::modularity(gSTRclusters) # ... measures how separated the different
+                                 # membership types are from each other
+tMem <- table(igraph::membership(gSTRclusters))
 length(tMem)  # More than 2000 communities identified
-hist(tMem, breaks = 50)  # most clusters are small ...
+hist(tMem, breaks = 50, col = "skyblue")  # most clusters are small ...
 range(tMem) # ... but one has > 100 members


@@ -179,14 +181,14 @@ range(tMem) # ... but one has > 100 members

 # Let's find the nodes with the 10 - highest betweenness centralities.
 #
-BC <- centr_betw(gSTR)
+BC <- igraph::centr_betw(gSTR)

 # remember: BC$res contains the results
 head(BC$res)

 BC$res[1]   # betweeness centrality of node 1 in the graph ...
 # ... which one is node 1?
-V(gSTR)[1]
+igraph::V(gSTR)[1]

 # to get the ten-highest nodes, we simply label the elements of BC with their
 # index ...
@@ -203,7 +205,7 @@ head(sBC)

 # We can use the first ten labels to subset the nodes in gSTR and fetch the
 # IDs...
-(ENSPsel <- names(V(gSTR)[BCsel]))
+(ENSPsel <- names(igraph::V(gSTR)[BCsel]))

 # We are going to use these IDs to produce some output for a submitted task:
 # so I need you to personalize ENSPsel with the following
@@ -231,12 +233,11 @@ set.seed(NULL)                      # reset the random number generator
 # day), simply a few lines of sample code to get you started on the specific use
 # case of retrieving descriptions for ensembl protein IDs.

-if (!require(biomaRt, quietly=TRUE)) {
-  if (! exists("biocLite")) {
-    source("https://bioconductor.org/biocLite.R")
-  }
-  biocLite("biomaRt")
-  library(biomaRt)
+if (! requireNamespace("BiocManager", quietly = TRUE)) {
+  install.packages("BiocManager")
+}
+if (! requireNamespace("biomaRt", quietly = TRUE)) {
+  BiocManager::install("biomaRt")
 }
 # Package information:
 #  library(help = biomaRt)       # basic information
@@ -244,14 +245,14 @@ if (!require(biomaRt, quietly=TRUE)) {
 #  data(package = "biomaRt")     # available datasets

 # define which dataset to use ...
-myMart <- useMart("ensembl", dataset="hsapiens_gene_ensembl")
+myMart <- biomaRt::useMart("ensembl", dataset="hsapiens_gene_ensembl")

 # what filters are defined?
-(filters <- listFilters(myMart))
+(filters <- biomaRt::listFilters(myMart))


 # and what attributes can we filter for?
-(attributes <- listAttributes(myMart))
+(attributes <- biomaRt::listAttributes(myMart))


 # Soooo many options - let's look for the correct name of filters that are
@@ -259,18 +260,18 @@ myMart <- useMart("ensembl", dataset="hsapiens_gene_ensembl")
 filters[grep("ENSP", filters$description), ]

 # ... and the correct attribute names for gene symbols and descriptions ...
-attributes[grep("symbol", attributes$description, ignore.case=TRUE), ]
-attributes[grep("description", attributes$description, ignore.case=TRUE), ]
+attributes[grep("symbol", attributes$description, ignore.case = TRUE), ]
+attributes[grep("description", attributes$description, ignore.case = TRUE), ]


 # ... so we can put this together: here is a syntax example:
-getBM(filters = "ensembl_peptide_id",
-      attributes = c("hgnc_symbol",
-                     "wikigene_description",
-                     "interpro_description",
-                     "phenotype_description"),
-      values = "ENSP00000000442",
-      mart = myMart)
+biomaRt::getBM(filters = "ensembl_peptide_id",
+               attributes = c("hgnc_symbol",
+                              "wikigene_description",
+                              "interpro_description",
+                              "phenotype_description"),
+               values = "ENSP00000000442",
+               mart = myMart)

 # A simple loop will now get us the information for our 10 most central genes
 # from the human subset of STRING.
@@ -279,13 +280,13 @@ CPdefs <- list()  # Since we don't know how many matches one of our queries
 # will return, we'll put the result dataframes into a list.

 for (ID in ENSPsel) {
-  CPdefs[[ID]] <- getBM(filters = "ensembl_peptide_id",
-                        attributes = c("hgnc_symbol",
-                                       "wikigene_description",
-                                       "interpro_description",
-                                       "phenotype_description"),
-                        values = ID,
-                        mart = myMart)
+  CPdefs[[ID]] <- biomaRt::getBM(filters = "ensembl_peptide_id",
+                                 attributes = c("hgnc_symbol",
+                                                "wikigene_description",
+                                                "interpro_description",
+                                                "phenotype_description"),
+                                 values = ID,
+                                 mart = myMart)
 }

 # So what are the proteins with the ten highest betweenness centralities?
--- a/BIN-SEQA-Composition.R
+++ b/BIN-SEQA-Composition.R
@@ -3,13 +3,17 @@
 # Purpose: A Bioinformatics Course:
 #              R code accompanying the BIN-SEQA-Comparison unit
 #
-# Version: 1.0
+# Version: 1.1
 #
-# Date:    2017  11  17
+# Date:    2017  11  -  2019  01
 # Author:  Boris Steipe (boris.steipe@utoronto.ca)
 #
-# V 1.0    First live version 2017
-# V 0.1    First code copied from BCH441_A03_makeYFOlist.R
+#           1.1    Change from require() to requireNamespace(),
+#                      use <package>::<function>() idiom throughout,
+#                      use Biocmanager:: not biocLite()
+# Versions:
+#           1.0    First live version 2017
+#           0.1    First code copied from BCH441_A03_makeYFOlist.R
 #
 # TODO:
 #
@@ -25,26 +29,25 @@


 #TOC> ==========================================================================
-#TOC>
-#TOC>   Section  Title                                Line
-#TOC> ----------------------------------------------------
-#TOC>   1        Preparation                            41
-#TOC>   2        Aggregate properties                   63
-#TOC>   3        Sequence Composition Enrichment       106
-#TOC>   3.1      Barplot, and side-by-side barplot     129
-#TOC>   3.2      Plotting ratios                       164
-#TOC>   3.3      Plotting log ratios                   180
-#TOC>   3.4      Sort by frequency                     195
-#TOC>   3.5      Color by amino acid type              210
-#TOC>
+#TOC> 
+#TOC>   Section  Title                                      Line
+#TOC> ----------------------------------------------------------
+#TOC>   1        Preparation                                  47
+#TOC>   2        Aggregate properties                         68
+#TOC>   3        Sequence Composition Enrichment             111
+#TOC>   3.1        Barplot, and side-by-side barplot         134
+#TOC>   3.2        Plotting ratios                           169
+#TOC>   3.3        Plotting log ratios                       185
+#TOC>   3.4        Sort by frequency                         200
+#TOC>   3.5        Color by amino acid type                  215
+#TOC> 
 #TOC> ==========================================================================


 # =    1  Preparation  =========================================================

-if (!require(seqinr, quietly=TRUE)) {
+if (! requireNamespace("seqinr", quietly = TRUE)) {
  install.packages("seqinr")
-  library(seqinr)
 }
 # Package information:
 #  library(help = seqinr)       # basic information
@@ -66,7 +69,7 @@ if (!require(seqinr, quietly=TRUE)) {


 # Let's try a simple function from seqinr: computing the pI of the sequence
-?computePI
+?seqinr::computePI

 # This takes as input a vector of upper-case AA codes

@@ -82,13 +85,13 @@ s <- unlist(s)             # strsplit() returns a list! Why?
 # the function s2c() to convert strings into
 # character vectors (and c2s to convert them back).

-s2c(mySeq)
+seqinr::s2c(mySeq)


-computePI(s2c(mySeq))  # isoelectric point
-pmw(s2c(mySeq))        # molecular weight
-AAstat(s2c(mySeq))     # This also plots the distribution of
-                       # values along the sequence
+seqinr::computePI(s2c(mySeq))  # isoelectric point
+seqinr::pmw(s2c(mySeq))        # molecular weight
+seqinr::AAstat(s2c(mySeq))     # This also plots the distribution of
+                               # values along the sequence

 # A true Labor of Love has gone into the
 # compilation of the "aaindex" data:
@@ -116,9 +119,9 @@ aaindex[[459]]$D
 # Let's construct an enrichment plot to compare average frequencies
 # with the amino acid counts in our sequence.

-(refData <- aaindex[[459]]$I)        # reference frequencies in %
-names(refData) <- a(names(refData))  # change names to single-letter
-                                     # code using seqinr's "a()" function
+(refData <- aaindex[[459]]$I)                # reference frequencies in %
+names(refData) <- seqinr::a(names(refData))  # change names to single-letter
+                                             # code using seqinr's "a()" function
 sum(refData)
 refData        # ... in %

--- a/BIN-Sequence.R
+++ b/BIN-Sequence.R
@@ -3,12 +3,15 @@
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the BIN-Sequence unit.
 #
-# Version:  1.3
+# Version:  1.4
 #
 # Date:     2017  09  - 2019  01
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
+#           1.4    Change from require() to requireNamespace(),
+#                      use <package>::<function>() idiom throughout,
+#                      use Biocmanager:: not biocLite()
 #           1.3    Update set.seed() usage
 #           1.2    Removed irrelevant task. How did that even get in there? smh
 #           1.1    Add chartr()
@@ -30,23 +33,23 @@
 #TOC> 
 #TOC>   Section  Title                                Line
 #TOC> ----------------------------------------------------
-#TOC>   1        Prepare                                60
-#TOC>   2        Storing Sequence                       78
-#TOC>   3        String properties                     107
-#TOC>   4        Substrings                            114
-#TOC>   5        Creating strings: sprintf()           135
-#TOC>   6        Changing strings                      170
-#TOC>   6.1.1          Changing case                   172
-#TOC>   6.1.2          Reverse                         177
-#TOC>   6.1.3          Change characters               181
-#TOC>   6.1.4          Substitute characters           209
-#TOC>   6.2        stringi and stringr                 229
-#TOC>   6.3        dbSanitizeSequence()                239
-#TOC>   7        Permuting and sampling                251
-#TOC>   7.1        Permutations                        258
-#TOC>   7.2        Sampling                            304
-#TOC>   7.2.1          Equiprobable characters         306
-#TOC>   7.2.2          Defined probability vector      348
+#TOC>   1        Prepare                                63
+#TOC>   2        Storing Sequence                       80
+#TOC>   3        String properties                     109
+#TOC>   4        Substrings                            116
+#TOC>   5        Creating strings: sprintf()           137
+#TOC>   6        Changing strings                      172
+#TOC>   6.1.1          Changing case                   174
+#TOC>   6.1.2          Reverse                         179
+#TOC>   6.1.3          Change characters               183
+#TOC>   6.1.4          Substitute characters           211
+#TOC>   6.2        stringi and stringr                 231
+#TOC>   6.3        dbSanitizeSequence()                241
+#TOC>   7        Permuting and sampling                253
+#TOC>   7.1        Permutations                        260
+#TOC>   7.2        Sampling                            306
+#TOC>   7.2.1          Equiprobable characters         308
+#TOC>   7.2.2          Defined probability vector      350
 #TOC> 
 #TOC> ==========================================================================

@@ -62,12 +65,11 @@
 # Much basic sequence handling is supported by the Bioconductor package
 # Biostrings.

-if (! require(Biostrings, quietly=TRUE)) {
-  if (! exists("biocLite")) {
-    source("https://bioconductor.org/biocLite.R")
-  }
-  biocLite("Biostrings")
-  library(Biostrings)
+if (! requireNamespace("BiocManager", quietly = TRUE)) {
+  install.packages("BiocManager")
+}
+if (! requireNamespace("Biostrings", quietly = TRUE)) {
+  BiocManager::install("Biostrings")
 }
 # Package information:
 #  library(help = Biostrings)       # basic information
@@ -86,7 +88,7 @@ if (! require(Biostrings, quietly=TRUE)) {

 # ... or as more complex objects with rich metadata e.g. as a Biostrings
 # DNAstring, RNAstring, AAString, etc.
-(a <- AAString("DIVMTQ"))
+(a <- Biostrings::AAString("DIVMTQ"))

 # ... and all of these representations can be interconverted:

@@ -314,6 +316,7 @@ N <- 100
 set.seed(16818)                        # set RNG seed for repeatable randomness
 v <- sample(nuc, N, replace = TRUE)
 set.seed(NULL)                         # reset the RNG
+
 (mySeq <- paste(v, collapse = ""))

 # What's the GC content?
@@ -323,9 +326,8 @@ sum(table(v)[c("G", "C")]) # 51 is close to expected
 # What's the number of CpG motifs? Easy to check with the stringi
 # stri_match_all() function

-if (! require(stringi, quietly=TRUE)) {
+if (! requireNamespace("stringi", quietly = TRUE)) {
  install.packages("stringi")
-  library(stringi)
 }
 # Package information:
 #  library(help = stringi)       # basic information
--- a/BIN-Storing_data.R
+++ b/BIN-Storing_data.R
@@ -27,30 +27,30 @@


 #TOC> ==========================================================================
-#TOC>
-#TOC>   Section  Title                                             Line
-#TOC> -----------------------------------------------------------------
-#TOC>   1        A Relational Datamodel in R: review                 62
-#TOC>   1.1      Building a sample database structure               102
-#TOC>   1.1.1    completing the database                            213
-#TOC>   1.2      Querying the database                              248
-#TOC>   1.3      Task: submit for credit (part 1/2)                 277
-#TOC>   2        Implementing the protein datamodel                 289
-#TOC>   2.1      JSON formatted source data                         315
-#TOC>   2.2      "Sanitizing" sequence data                         355
-#TOC>   2.3      Create a protein table for our data model          375
-#TOC>   2.3.1    Initialize the database                            377
-#TOC>   2.3.2    Add data                                           389
-#TOC>   2.4      Complete the database                              409
-#TOC>   2.4.1    Examples of navigating the database                436
-#TOC>   2.5      Updating the database                              468
-#TOC>   3        Add your own data                                  480
-#TOC>   3.1      Find a protein                                     488
-#TOC>   3.2      Put the information into JSON files                517
-#TOC>   3.3      Create an R script to create your own database     540
-#TOC>   3.3.1    Check and validate                                 560
-#TOC>   3.4      Task: submit for credit (part 2/2)                 601
-#TOC>
+#TOC> 
+#TOC>   Section  Title                                                   Line
+#TOC> -----------------------------------------------------------------------
+#TOC>   1        A Relational Datamodel in R: review                       57
+#TOC>   1.1        Building a sample database structure                    97
+#TOC>   1.1.1          completing the database                            208
+#TOC>   1.2        Querying the database                                  243
+#TOC>   1.3        Task: submit for credit (part 1/2)                     272
+#TOC>   2        Implementing the protein datamodel                       284
+#TOC>   2.1        JSON formatted source data                             310
+#TOC>   2.2        "Sanitizing" sequence data                             350
+#TOC>   2.3        Create a protein table for our data model              370
+#TOC>   2.3.1          Initialize the database                            372
+#TOC>   2.3.2          Add data                                           384
+#TOC>   2.4        Complete the database                                  404
+#TOC>   2.4.1          Examples of navigating the database                431
+#TOC>   2.5        Updating the database                                  463
+#TOC>   3        Add your own data                                        475
+#TOC>   3.1        Find a protein                                         483
+#TOC>   3.2        Put the information into JSON files                    512
+#TOC>   3.3        Create an R script to create your own database         535
+#TOC>   3.3.1          Check and validate                                 555
+#TOC>   3.4        Task: submit for credit (part 2/2)                     596
+#TOC> 
 #TOC> ==========================================================================


@@ -205,7 +205,7 @@ str(philDB)
 # go back, re-read, play with it, and ask for help. This is essential.


-# ===  1.1.1  completing the database
+# ===   1.1.1  completing the database                       


 # Next I'll add one more person, and create the other two tables:
@@ -328,11 +328,11 @@ file.show("./data/MBP1_SACCE.json")
 #     sanitize the sequence at some point. But since we need to do that
 #     anyway, it is easier to see the whole sequence if we store it in chunks.

-# Let's load the "jsonlite" package and have a look at how it reads this data.
+# Let's make sure the "jsonlite" package exists on your computer, then we'll
+# explore how it reads this data.

-if (! require(jsonlite, quietly=TRUE)) {
+if (! requireNamespace("jsonlite", quietly = TRUE)) {
  install.packages("jsonlite")
-  library(jsonlite)
 }
 # Package information:
 #  library(help = jsonlite)       # basic information
@@ -340,7 +340,7 @@ if (! require(jsonlite, quietly=TRUE)) {
 #  data(package = "jsonlite")     # available datasets


-x <- fromJSON("./data/MBP1_SACCE.json")
+x <- jsonlite::fromJSON("./data/MBP1_SACCE.json")
 str(x)

 x$name
@@ -369,7 +369,7 @@ dbSanitizeSequence(x)

 # ==   2.3  Create a protein table for our data model  =========================

-# ===  2.3.1  Initialize the database
+# ===   2.3.1  Initialize the database                       


 # The function dbInit contains all the code to return a list of empty
@@ -381,7 +381,7 @@ myDB <- dbInit()
 str(myDB)


-# ===  2.3.2  Add data
+# ===   2.3.2  Add data                                      


 # fromJSON() returns a dataframe that we can readily process to add data
@@ -389,7 +389,7 @@ str(myDB)

 dbAddProtein

-myDB <- dbAddProtein(myDB, fromJSON("./data/MBP1_SACCE.json"))
+myDB <- dbAddProtein(myDB, jsonlite::fromJSON("./data/MBP1_SACCE.json"))
 str(myDB)

 # Lets check that the 833 amino acids of the yeast MBP1 sequence have
@@ -428,7 +428,7 @@ source("./scripts/ABC-createRefDB.R")
 str(myDB)


-# ===  2.4.1  Examples of navigating the database
+# ===   2.4.1  Examples of navigating the database           


 # You can look at the contents of the tables in the usual way we access
@@ -552,7 +552,7 @@ myDB$taxonomy$species[sel]
 # in any of the JSON files. Later you will add more information ...


-# ===  3.3.1  Check and validate
+# ===   3.3.1  Check and validate                            


 # Is your protein named according to the pattern "MBP1_MYSPE"? It should be.
--- a/FND-Genetic_code.R
+++ b/FND-Genetic_code.R
@@ -3,12 +3,15 @@
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the FND-Genetic_code unit.
 #
-# Version:  1.0.1
+# Version:  1.1
 #
-# Date:     2017  10  12
+# Date:     2017  10  -  2019  01
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
+#           1.1    Change from require() to requireNamespace(),
+#                      use <package>::<function>() idiom throughout,
+#                      use Biocmanager:: not biocLite()
 #           1.0.1  Comment on "incomplete final line" warning in FASTA
 #           1.0    First live version
 #
@@ -25,17 +28,17 @@


 #TOC> ==========================================================================
-#TOC>
-#TOC>   Section  Title                                      Line
-#TOC> ----------------------------------------------------------
-#TOC>   1        Storing the genetic code                     47
-#TOC>   1.1      Genetic code in Biostrings                   65
-#TOC>   2        Working with the genetic code                97
-#TOC>   2.1      Translate a sequence.                       126
-#TOC>   3        An alternative representation: 3D array     208
-#TOC>   3.1      Print a Genetic code table                  241
-#TOC>   4        Tasks                                       267
-#TOC>
+#TOC> 
+#TOC>   Section  Title                                            Line
+#TOC> ----------------------------------------------------------------
+#TOC>   1        Storing the genetic code                           45
+#TOC>   1.1        Genetic code in Biostrings                       63
+#TOC>   2        Working with the genetic code                      94
+#TOC>   2.1        Translate a sequence.                           129
+#TOC>   3        An alternative representation: 3D array           212
+#TOC>   3.1        Print a Genetic code table                      246
+#TOC>   4        Tasks                                             272
+#TOC> 
 #TOC> ==========================================================================


@@ -63,12 +66,11 @@ x["TAA"]
 # available in the Bioconductor "Biostrings" package:


-if (! require(Biostrings, quietly=TRUE)) {
-  if (! exists("biocLite")) {
-    source("https://bioconductor.org/biocLite.R")
-  }
-  biocLite("Biostrings")
-  library(Biostrings)
+if (! requireNamespace("BiocManager", quietly = TRUE)) {
+  install.packages("BiocManager")
+}
+if (! requireNamespace("Biostrings", quietly = TRUE)) {
+  BiocManager::install("Biostrings")
 }
 # Package information:
 #  library(help = Biostrings)       # basic information
@@ -77,45 +79,51 @@ if (! require(Biostrings, quietly=TRUE)) {


 # The standard genetic code vector
-GENETIC_CODE
+Biostrings::GENETIC_CODE

 # The table of genetic codes. This information corresponds to this page
 # at the NCBI:
 # https://www.ncbi.nlm.nih.gov/Taxonomy/taxonomyhome.html/index.cgi?chapter=tgencodes
-GENETIC_CODE_TABLE
+Biostrings::GENETIC_CODE_TABLE

 # Most of the alternative codes are mitochondrial codes. The id of the
 # Alternative Yeast Nuclear code is "12"
-getGeneticCode("12")  # Alternative Yeast Nuclear
+Biostrings::getGeneticCode("12")  # Alternative Yeast Nuclear


 # =    2  Working with the genetic code  =======================================

-# GENETIC_CODE is a "named vector"
+# We'll use Biostrings::GENETIC_CODE a lot in this script, so we'll assign it
+# to a "local" variable, rather than retrieving it from the package all the
+# time.

-str(GENETIC_CODE)
+genCode <- Biostrings::GENETIC_CODE
+
+# This is a named vector of characters ...
+
+str(genCode)

 # ... which also stores the alternative initiation codons TTG and CTG in
 # an attribute of the vector. (Alternative initiation codons sometimes are
 # used instead of ATG to intiate translation, if if not ATG they are translated
 # with fMet.)

-attr(GENETIC_CODE, "alt_init_codons")
+attr(genCode, "alt_init_codons")

 # But the key to use this vector is in the "names" which we use for subsetting
 # the list of amino acids in whatever way we need.
-names(GENETIC_CODE)
+names(genCode)

 # The translation of "TGG" ...
-GENETIC_CODE["TGG"]
+genCode["TGG"]

 # All stop codons
-names(GENETIC_CODE)[GENETIC_CODE == "*"]
+names(genCode)[genCode == "*"]

 # All start codons
-names(GENETIC_CODE)[GENETIC_CODE == "M"] # ... or
-c(names(GENETIC_CODE)[GENETIC_CODE == "M"],
-  attr(GENETIC_CODE, "alt_init_codons"))
+names(genCode)[genCode == "M"] # ... or
+c(names(genCode)[genCode == "M"],
+  attr(genCode, "alt_init_codons"))


 # ==   2.1  Translate a sequence.  =============================================
@@ -165,7 +173,7 @@ nchar(mbp1)/3
 # attributes that are useful for Biostrings. Thus we convert the sequence first
 # with DNAstring(), then split it up, then convert it into a plain
 # character vector.
-mbp1Codons <- as.character(codons(DNAString(mbp1)))
+mbp1Codons <- as.character(Biostrings::codons(Biostrings::DNAString(mbp1)))

 head(mbp1Codons)

@@ -173,7 +181,7 @@ head(mbp1Codons)

 mbp1AA <- character(834)
 for (i in seq_along(mbp1Codons)) {
-  mbp1AA[i] <- GENETIC_CODE[mbp1Codons[i]]
+  mbp1AA[i] <- genCode[mbp1Codons[i]]
 }

 head(mbp1Codons)
@@ -196,7 +204,8 @@ sort(table(mbp1AA), decreasing = TRUE)
 mbp1AA <- mbp1AA[-(length(mbp1AA))]
 tail(mbp1AA) # Note the stop is gone!

-# paste it together, collapsing the elements without separation-character
+# paste it together, collapsing the elements using an empty string as the
+# separation-character (i.e.: nothing)
 (Mbp1 <- paste(mbp1AA, sep = "", collapse = ""))


@@ -204,14 +213,15 @@ tail(mbp1AA) # Note the stop is gone!


 # We don't use 3D arrays often - usually just 2D tables and data frames, so
-# here is a good opportunity to review the syntax with a genetic code cube:
+# here is a good opportunity to review the syntax of 3D arrays with a
+# genetic code cube:

-# Initialize, using A C G T as the names of the elements in each dimension
+# Initialize, using A G C T as the names of the elements in each dimension
 cCube <- array(data     = character(64),
               dim      = c(4, 4, 4),
-               dimnames = list(c("A", "C", "G", "T"),
-                               c("A", "C", "G", "T"),
-                               c("A", "C", "G", "T")))
+               dimnames = list(c("A", "G", "C", "T"),
+                               c("A", "G", "C", "T"),
+                               c("A", "G", "C", "T")))

 # fill it with amino acid codes using three nested loops
 for (i in 1:4) {
@@ -222,7 +232,7 @@ for (i in 1:4) {
                       dimnames(cCube)[[3]][k],
                       sep = "",
                       collapse = "")
-      cCube[i, j, k] <- GENETIC_CODE[myCodon]
+      cCube[i, j, k] <- genCode[myCodon]
    }
  }
 }
@@ -290,25 +300,25 @@ for (i in nuc) {

 # Solution:

-          # Fetch the code
-          GENETIC_CODE_TABLE
-          GENETIC_CODE_TABLE$name[GENETIC_CODE_TABLE$id == "12"]
-          altYcode <- getGeneticCode("12")
+    # Fetch the code
+    Biostrings::GENETIC_CODE_TABLE
+    Biostrings::GENETIC_CODE_TABLE$name[Biostrings::GENETIC_CODE_TABLE$id=="12"]
+    altYcode <- Biostrings::getGeneticCode("12")

-          # what's the difference?
-          (delta <- which(GENETIC_CODE != altYcode))
+    # what's the difference?
+    (delta <- which(Biostrings::GENETIC_CODE != altYcode))

-          GENETIC_CODE[delta]
-          altYcode[delta]
+    Biostrings::GENETIC_CODE[delta]
+    altYcode[delta]

-          # translate
-          altYAA <- character(834)
-          for (i in seq_along(mbp1Codons)) {
-            altYAA[i] <- altYcode[mbp1Codons[i]]
-          }
+    # translate
+    altYAA <- character(834)
+    for (i in seq_along(mbp1Codons)) {
+      altYAA[i] <- altYcode[mbp1Codons[i]]
+    }

-          table(mbp1AA)
-          table(altYAA)
+    table(mbp1AA)
+    table(altYAA)

 # Task: The genetic code has significant redundacy, i.e. there are up to six
 #         codons that code for the same amino acid. Write code that lists how
@@ -319,7 +329,7 @@ for (i in nuc) {
 #
 #
 # Solution:
-table(table(GENETIC_CODE))
+table(table(Biostrings::GENETIC_CODE))


 # [END]
--- a/FND-MAT-Graphs_and_networks.R
+++ b/FND-MAT-Graphs_and_networks.R
@@ -3,12 +3,14 @@
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the FND-MAT-Graphs_and_networks unit.
 #
-# Version:  1.1
+# Version:  1.2
 #
 # Date:     2017  10  -  2019  01
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
+#           1.2    Change from require() to requireNamespace(),
+#                      use <package>::<function>() idiom throughout
 #           1.1    Update set.seed() usage
 #           1.0    First final version for learning units.
 #           0.1    First code copied from 2016 material.
@@ -30,17 +32,17 @@
 #TOC> 
 #TOC>   Section  Title                                        Line
 #TOC> ------------------------------------------------------------
-#TOC>   1        Review                                         48
-#TOC>   2        DEGREE DISTRIBUTIONS                          201
-#TOC>   2.1        Random graph                                207
-#TOC>   2.2        scale-free graph (Barabasi-Albert)          255
-#TOC>   2.3        Random geometric graph                      320
-#TOC>   3        A CLOSER LOOK AT THE igraph PACKAGE           442
-#TOC>   3.1        Basics                                      445
-#TOC>   3.2        Components                                  517
-#TOC>   4        RANDOM GRAPHS AND GRAPH METRICS               536
-#TOC>   4.1        Diameter                                    573
-#TOC>   5        GRAPH CLUSTERING                              641
+#TOC>   1        Review                                         50
+#TOC>   2        DEGREE DISTRIBUTIONS                          204
+#TOC>   2.1        Random graph                                210
+#TOC>   2.2        scale-free graph (Barabasi-Albert)          258
+#TOC>   2.3        Random geometric graph                      323
+#TOC>   3        A CLOSER LOOK AT THE igraph PACKAGE           445
+#TOC>   3.1        Basics                                      448
+#TOC>   3.2        Components                                  520
+#TOC>   4        RANDOM GRAPHS AND GRAPH METRICS               539
+#TOC>   4.1        Diameter                                    576
+#TOC>   5        GRAPH CLUSTERING                              645
 #TOC> 
 #TOC> ==========================================================================

@@ -123,9 +125,8 @@ set.seed(NULL)                         # reset the RNG
 # standard package for work with graphs in r is "igraph". We'll go into more
 # details of the igraph package a bit later, for now we just use it to plot:

-if (! require(igraph, quietly=TRUE)) {
+if (! requireNamespace("igraph", quietly = TRUE)) {
  install.packages("igraph")
-  library(igraph)
 }
 # Package information:
 #  library(help = igraph)       # basic information
@@ -133,10 +134,12 @@ if (! require(igraph, quietly=TRUE)) {
 #  data(package = "igraph")     # available datasets


-myG <- graph_from_adjacency_matrix(myRandAM, mode = "undirected")
+myG <- igraph::graph_from_adjacency_matrix(myRandAM, mode = "undirected")

 set.seed(112358)                       # set RNG seed for repeatable randomness
-myGxy <- layout_with_graphopt(myG, charge=0.0012) # calculate layout coordinates
+                                       # calculate layout coordinates
+myGxy <- igraph::layout_with_graphopt(myG,
+                                      charge=0.0012)
 set.seed(NULL)                         # reset the RNG


@@ -157,9 +160,9 @@ plot(myG,
     rescale = FALSE,
     xlim = c(min(myGxy[,1]) * 0.99, max(myGxy[,1]) * 1.01),
     ylim = c(min(myGxy[,2]) * 0.99, max(myGxy[,2]) * 1.01),
-     vertex.color=heat.colors(max(degree(myG)+1))[degree(myG)+1],
-     vertex.size = 1600 + (300 * degree(myG)),
-     vertex.label = sprintf("%s(%i)", names(V(myG)), degree(myG)),
+     vertex.color=heat.colors(max(igraph::degree(myG)+1))[igraph::degree(myG)+1],
+     vertex.size = 1600 + (300 * igraph::degree(myG)),
+     vertex.label = sprintf("%s(%i)", names(igraph::V(myG)), igraph::degree(myG)),
     vertex.label.family = "sans",
     vertex.label.cex = 0.7)
 par(oPar)  # reset plot window
@@ -179,10 +182,10 @@ sum(myRandAM)
 rowSums(myRandAM) +  colSums(myRandAM) # check this against the plot!

 # The function degree() gives the same values
-degree(myG)
+igraph::degree(myG)

 # Let's  plot the degree distribution in a histogram:
-degG <- degree(myG)
+degG <- igraph::degree(myG)
 brk <- seq(min(degG)-0.5, max(degG)+0.5, by=1)  # define histogram breaks
 hist(degG, breaks=brk, col="#A5CCF5",
     xlim = c(-1,8), xaxt = "n",
@@ -212,9 +215,9 @@ set.seed(31415927)                     # set RNG seed for repeatable randomness
 my200AM <- makeRandomAM(as.character(1:N), p = 0.015)
 set.seed(NULL)                         # reset the RNG

-myG200 <- graph_from_adjacency_matrix(my200AM, mode = "undirected")
-myGxy <- layout_with_graphopt(myG200, charge=0.0001) # calculate layout
-                                                     # coordinates
+myG200 <- igraph::graph_from_adjacency_matrix(my200AM, mode = "undirected")
+myGxy <- igraph::layout_with_graphopt(myG200, charge=0.0001) # calculate layout
+                                                             # coordinates

 oPar <- par(mar= rep(0,4))             # Turn margins off, save graphics state
 plot(myG200,
@@ -222,8 +225,8 @@ plot(myG200,
     rescale = FALSE,
     xlim = c(min(myGxy[,1]) * 0.99, max(myGxy[,1]) * 1.01),
     ylim = c(min(myGxy[,2]) * 0.99, max(myGxy[,2]) * 1.01),
-     vertex.color=heat.colors(max(degree(myG200)+1))[degree(myG200)+1],
-     vertex.size = 150 + (60 * degree(myG200)),
+     vertex.color=heat.colors(max(igraph::degree(myG200)+1))[igraph::degree(myG200)+1],
+     vertex.size = 150 + (60 * igraph::degree(myG200)),
     vertex.label = NA)
 par(oPar)                              # restore graphics state

@@ -231,7 +234,7 @@ par(oPar)                              # restore graphics state
 # biological graphs look approximately like this.

 # Calculate degree distributions
-dg <- degree(myG200)
+dg <- igraph::degree(myG200)
 brk <- seq(min(dg)-0.5, max(dg)+0.5, by=1)
 hist(dg, breaks=brk, col="#A5F5CC",
     xlim = c(-1,11), xaxt = "n",
@@ -263,10 +266,10 @@ plot(log10(as.numeric(names(freqRank)) + 1),
 N <- 200

 set.seed(31415927)                     # set RNG seed for repeatable randomness
-GBA <- sample_pa(N, power = 0.8, directed = FALSE)
+GBA <- igraph::sample_pa(N, power = 0.8, directed = FALSE)
 set.seed(NULL)                         # reset the RNG

-GBAxy <- layout_with_graphopt(GBA, charge=0.0001) # calculate layout coordinates
+GBAxy <- igraph::layout_with_graphopt(GBA, charge=0.0001)

 oPar <- par(mar= rep(0,4))             # Turn margins off, save graphics state
 plot(GBA,
@@ -274,8 +277,8 @@ plot(GBA,
     rescale = FALSE,
     xlim = c(min(GBAxy[,1]) * 0.99, max(GBAxy[,1]) * 1.01),
     ylim = c(min(GBAxy[,2]) * 0.99, max(GBAxy[,2]) * 1.01),
-     vertex.color=heat.colors(max(degree(GBA)+1))[degree(GBA)+1],
-     vertex.size = 200 + (30 * degree(GBA)),
+     vertex.color=heat.colors(max(igraph::degree(GBA)+1))[igraph::degree(GBA)+1],
+     vertex.size = 200 + (30 * igraph::degree(GBA)),
     vertex.label = NA)
 par(oPar)                              # restore grphics state

@@ -287,7 +290,7 @@ par(oPar)                              # restore grphics state
 # singletons.

 # What's the degree distribution of this graph?
-(dg <- degree(GBA))
+(dg <- igraph::degree(GBA))
 brk <- seq(min(dg)-0.5, max(dg)+0.5, by=1)
 hist(dg, breaks=brk, col="#DCF5B5",
     xlim = c(0,max(dg)+1), xaxt = "n",
@@ -307,8 +310,8 @@ plot(log10(as.numeric(names(freqRank)) + 1),
 # Sort-of linear, but many of the higher ranked nodes have a frequency of only
 # one. That behaviour smooths out in larger graphs:
 #
-X <- sample_pa(100000, power = 0.8, directed = FALSE)  # 100,000 nodes
-freqRank <- table(degree(X))
+X <- igraph::sample_pa(1e5, power = 0.8, directed = FALSE)  # 100,000 nodes
+freqRank <- table(igraph::degree(X))
 plot(log10(as.numeric(names(freqRank)) + 1),
     log10(as.numeric(freqRank)), type = "b",
     xlab = "log(Rank)", ylab = "log(frequency)",
@@ -367,7 +370,7 @@ makeRandomGeometricAM <- function(nam, B = 25, Q = 0.001, t = 0.6) {
    for (iCol in (iRow+1):N) {
      # geometric distance ...
      d <- sqrt((AM$x[iRow] - AM$x[iCol])^2 +
-                  (AM$y[iRow] - AM$y[iCol])^2)  # Pythagoras
+                (AM$y[iRow] - AM$y[iCol])^2)  # Pythagoras
      # distance dependent probability
      p <- 1 - 1/((1 + (Q * (exp(-B * (d-t)))))^(1 / nu))
      if (runif(1) < p) {
@@ -404,7 +407,7 @@ rGAM <- makeRandomGeometricAM(as.character(1:N), t = 0.4)
 set.seed(NULL)                         # reset the RNG


-myGRG <- graph_from_adjacency_matrix(rGAM$mat, mode = "undirected")
+myGRG <- igraph::graph_from_adjacency_matrix(rGAM$mat, mode = "undirected")

 oPar <- par(mar= rep(0,4)) # Turn margins off
 plot(myGRG,
@@ -412,13 +415,13 @@ plot(myGRG,
     rescale = FALSE,
     xlim = c(min(rGAM$x) * 0.9, max(rGAM$x) * 1.1),
     ylim = c(min(rGAM$y) * 0.9, max(rGAM$y) * 1.1),
-     vertex.color=heat.colors(max(degree(myGRG)+1))[degree(myGRG)+1],
-     vertex.size = 0.1 + (0.2 * degree(myGRG)),
+     vertex.color=heat.colors(max(igraph::degree(myGRG)+1))[igraph::degree(myGRG)+1],
+     vertex.size = 0.1 + (0.2 * igraph::degree(myGRG)),
     vertex.label = NA)
 par(oPar)

 # degree distribution:
-(dg <- degree(myGRG))
+(dg <- igraph::degree(myGRG))
 brk <- seq(min(dg) - 0.5, max(dg) + 0.5, by = 1)
 hist(dg, breaks = brk, col = "#FCC6D2",
     xlim = c(0, 25), xaxt = "n",
@@ -450,7 +453,7 @@ summary(myG)

 # This output means: this is an IGRAPH graph, with U = UN-directed edges
 #  and N = named nodes, that has 20 nodes and 20 edges. For details, see
-?print.igraph
+?igraph::print.igraph

 mode(myG)
 class(myG)
@@ -463,11 +466,11 @@ class(myG)
 # recipes, called _games_ in this package.

 # Two basic functions retrieve nodes "Vertices", and "Edges":
-V(myG)
-E(myG)
+igraph::V(myG)
+igraph::E(myG)

 # additional properties can be retrieved from the Vertices ...
-V(myG)$name
+igraph::V(myG)$name


 # As with many R objects, loading the package provides special functions that
@@ -487,12 +490,12 @@ plot(myG)  # this is the result of default plot parameters
 # Plot with some customizing parameters
 oPar <- par(mar= rep(0,4)) # Turn margins off
 plot(myG,
-     layout = layout_with_fr(myG),
-     vertex.color=heat.colors(max(degree(myG)+1))[degree(myG)+1],
-     vertex.size = 9 + (2 * degree(myG)),
-     vertex.label.cex = 0.5 + (0.05 * degree(myG)),
+     layout = igraph::layout_with_fr(myG),
+     vertex.color=heat.colors(max(igraph::degree(myG)+1))[igraph::degree(myG)+1],
+     vertex.size = 9 + (2 * igraph::degree(myG)),
+     vertex.label.cex = 0.5 + (0.05 * igraph::degree(myG)),
     edge.width = 2,
-     vertex.label = V(myG)$name,
+     vertex.label = igraph::V(myG)$name,
     vertex.label.family = "sans",
     vertex.label.cex = 0.9)
 par(oPar)
@@ -500,12 +503,12 @@ par(oPar)
 # ... or with a different layout:
 oPar <- par(mar= rep(0,4)) # Turn margins off
 plot(myG,
-     layout = layout_in_circle(myG),
-     vertex.color=heat.colors(max(degree(myG)+1))[degree(myG)+1],
-     vertex.size = 9 + (2 * degree(myG)),
-     vertex.label.cex = 0.5 + (0.05 * degree(myG)),
+     layout = igraph::layout_in_circle(myG),
+     vertex.color=heat.colors(max(igraph::degree(myG)+1))[igraph::degree(myG)+1],
+     vertex.size = 9 + (2 * igraph::degree(myG)),
+     vertex.label.cex = 0.5 + (0.05 * igraph::degree(myG)),
     edge.width = 2,
-     vertex.label = V(myG)$name,
+     vertex.label = igraph::V(myG)$name,
     vertex.label.family = "sans",
     vertex.label.cex = 0.9)
 par(oPar)
@@ -518,18 +521,18 @@ par(oPar)

 # The igraph function components() tells us whether there are components of the
 # graph in which there is no path to other components.
-components(myG)
+igraph::components(myG)

 # In the _membership_ vector, nodes are annotated with the index of the
 # component they are part of. Sui7 is the only node of component 2, Cyj1 is in
 # the third component etc. This is perhaps more clear if we sort by component
 # index
-sort(components(myG)$membership, decreasing = TRUE)
+sort(igraph::components(myG)$membership, decreasing = TRUE)

 # Retrieving e.g. the members of the first component from the list can be done by subsetting:

-(sel <- components(myG)$membership == 1)  # boolean vector ..
-(c1 <- components(myG)$membership[sel])
+(sel <- igraph::components(myG)$membership == 1)  # boolean vector ..
+(c1 <- igraph::components(myG)$membership[sel])
 names(c1)


@@ -542,9 +545,9 @@ names(c1)
 # preferential-attachment ... but igraph has ways to simulate the basic ones
 # (and we could easily simulate our own). Look at the following help pages:

-?sample_gnm                # see also sample_gnp for the Erdös-Rényi models
-?sample_smallworld         # for the Watts & Strogatz model
-?sample_pa                 # for the Barabasi-Albert model
+?igraph::sample_gnm             # see also sample_gnp for the Erdös-Rényi models
+?igraph::sample_smallworld      # for the Watts & Strogatz model
+?igraph::sample_pa              # for the Barabasi-Albert model

 # But note that there are many more sample_ functions. Check out the docs!

@@ -554,7 +557,7 @@ names(c1)
 # layout drawas them, obviously.

 set.seed(112358)                       # set RNG seed for repeatable randomness
-myGxy <- layout_with_fr(myG)           # calculate layout coordinates
+myGxy <- igraph::layout_with_fr(myG)   # calculate layout coordinates
 set.seed(NULL)                         # reset the RNG

 oPar <- par(mar = rep(0, 4))           # turn margins off, save graphics state
@@ -563,30 +566,31 @@ plot(myG,
     rescale = FALSE,
     xlim = c(min(myGxy[,1]) * 0.99, max(myGxy[,1]) * 1.01),
     ylim = c(min(myGxy[,2]) * 0.99, max(myGxy[,2]) * 1.01),
-     vertex.color=heat.colors(max(degree(myG) + 1))[degree(myG) + 1],
-     vertex.size = 20 + (10 * degree(myG)),
-     vertex.label = V(myG)$name,
+     vertex.color=heat.colors(max(igraph::degree(myG)+1))[igraph::degree(myG)+1],
+     vertex.size = 20 + (10 * igraph::degree(myG)),
+     vertex.label = igraph::V(myG)$name,
     vertex.label.family = "sans",
     vertex.label.cex = 0.8)
 par(oPar)                              # restore graphics state

 # ==   4.1  Diameter  ==========================================================

-diameter(myG)  # The diameter of a graph is its maximum length shortest path.
+igraph::diameter(myG)  # The diameter of a graph is its maximum length
+                       # shortest path.

 # let's plot this path: here are the nodes ...
-get_diameter(myG)
+igraph::get_diameter(myG)

 # ... and we can get the x, y coordinates from iGxy by subsetting with the node
 # names. The we draw the diameter-path with a transparent, thick pink line:
-lines(myGxy[get_diameter(myG),], lwd=10, col="#ff63a788")
+lines(myGxy[igraph::get_diameter(myG),], lwd=10, col="#ff63a788")

 # == Centralization scores

-?centralize
+?igraph::centralize
 # replot our graph, and color by log_betweenness:

-bC <- centr_betw(myG)  # calculate betweenness centrality
+bC <- igraph::centr_betw(myG)  # calculate betweenness centrality
 nodeBetw <- bC$res
 nodeBetw <- round(log(nodeBetw +1)) + 1

@@ -597,8 +601,8 @@ plot(myG,
     xlim = c(min(myGxy[,1]) * 0.99, max(myGxy[,1]) * 1.01),
     ylim = c(min(myGxy[,2]) * 0.99, max(myGxy[,2]) * 1.01),
     vertex.color=heat.colors(max(nodeBetw))[nodeBetw],
-     vertex.size = 20 + (10 * degree(myG)),
-     vertex.label = V(myG)$name,
+     vertex.size = 20 + (10 * igraph::degree(myG)),
+     vertex.label = igraph::V(myG)$name,
     vertex.label.family = "sans",
     vertex.label.cex = 0.7)
 par(oPar)
@@ -613,7 +617,7 @@ par(oPar)
 #
 # Lets plot betweenness centrality for our random geometric graph:

-bCmyGRG <- centr_betw(myGRG)  # calculate betweenness centrality
+bCmyGRG <- igraph::centr_betw(myGRG)  # calculate betweenness centrality

 nodeBetw <- bCmyGRG$res
 nodeBetw <- round((log(nodeBetw +1))^2.5) + 1
@@ -630,9 +634,9 @@ plot(myGRG,
     vertex.label = NA)
 par(oPar)

-diameter(myGRG)
-lines(rGAM$x[get_diameter(myGRG)],
-      rGAM$y[get_diameter(myGRG)],
+igraph::diameter(myGRG)
+lines(rGAM$x[igraph::get_diameter(myGRG)],
+      rGAM$y[igraph::get_diameter(myGRG)],
      lwd = 10,
      col = "#ff335533")

@@ -648,11 +652,11 @@ lines(rGAM$x[get_diameter(myGRG)],
 # http://www.ncbi.nlm.nih.gov/pubmed/18216267 and htttp://www.mapequation.org


-myGRGclusters <- cluster_infomap(myGRG)
-modularity(myGRGclusters) # ... measures how separated the different membership
-                         # types are from each other
-membership(myGRGclusters) # which nodes are in what cluster?
-table(membership(myGRGclusters))  # how large are the clusters?
+myGRGclusters <- igraph::cluster_infomap(myGRG)
+igraph::modularity(myGRGclusters)  # ... measures how separated the different
+                                   # membership types are from each other
+igraph::membership(myGRGclusters)         # which nodes are in what cluster?
+table(igraph::membership(myGRGclusters))  # how large are the clusters?

 # The largest cluster has 48 members, the second largest has 25, etc.

@@ -661,7 +665,7 @@ table(membership(myGRGclusters))  # how large are the clusters?
 # their cluster membership:

 # first, make a vector with as many grey colors as we have communities ...
-commColors <- rep("#f1eef6", max(membership(myGRGclusters)))
+commColors <- rep("#f1eef6", max(igraph::membership(myGRGclusters)))
 # ... then overwrite the first five with "real colors" - something like rust,
 # lilac, pink, and mauve or so.
 commColors[1:5] <- c("#980043", "#dd1c77", "#df65b0", "#c994c7", "#d4b9da")
@@ -673,8 +677,8 @@ plot(myGRG,
     rescale = FALSE,
     xlim = c(min(rGAM$x) * 0.9, max(rGAM$x) * 1.1),
     ylim = c(min(rGAM$y) * 0.9, max(rGAM$y) * 1.1),
-     vertex.color=commColors[membership(myGRGclusters)],
-     vertex.size = 0.1 + (0.1 * degree(myGRG)),
+     vertex.color=commColors[igraph::membership(myGRGclusters)],
+     vertex.size = 0.1 + (0.1 * igraph::degree(myGRG)),
     vertex.label = NA)
 par(oPar)

--- a/FND-STA-Probability_distribution.R
+++ b/FND-STA-Probability_distribution.R
@@ -3,12 +3,14 @@
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the FND-STA-Probability_distribution unit.
 #
-# Version:  1.2
+# Version:  1.3
 #
 # Date:     2017  10  -  2019  01
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
+#           1.3    Change from require() to requireNamespace(),
+#                      use <package>::<function>() idiom throughout,
 #           1.2    Update set.seed() usage
 #           1.1    Corrected empirical p-value
 #           1.0    First code live version
@@ -26,24 +28,24 @@


 #TOC> ==========================================================================
-#TOC>
-#TOC>   Section  Title                                                     Line
-#TOC> -------------------------------------------------------------------------
-#TOC>   1        Introduction                                                50
-#TOC>   2        Three fundamental distributions                            113
-#TOC>   2.1        The Poisson Distribution                                 116
-#TOC>   2.2        The uniform distribution                                 170
-#TOC>   2.3        The Normal Distribution                                  190
-#TOC>   3        quantile-quantile comparison                               231
-#TOC>   3.1        qqnorm()                                                 241
-#TOC>   3.2        qqplot()                                                 307
-#TOC>   4        Quantifying the difference                                 324
-#TOC>   4.1        Chi2 test for discrete distributions                     359
-#TOC>   4.2        Kullback-Leibler divergence                              451
-#TOC>   4.2.1          An example from tossing dice                         462
-#TOC>   4.2.2          An example from lognormal distributions              585
-#TOC>   4.3        Kolmogorov-Smirnov test for continuous distributions     628
-#TOC>
+#TOC> 
+#TOC>   Section  Title                                                         Line
+#TOC> -----------------------------------------------------------------------------
+#TOC>   1        Introduction                                                    52
+#TOC>   2        Three fundamental distributions                                115
+#TOC>   2.1        The Poisson Distribution                                     118
+#TOC>   2.2        The uniform distribution                                     172
+#TOC>   2.3        The Normal Distribution                                      192
+#TOC>   3        quantile-quantile comparison                                   233
+#TOC>   3.1        qqnorm()                                                     243
+#TOC>   3.2        qqplot()                                                     309
+#TOC>   4        Quantifying the difference                                     326
+#TOC>   4.1        Chi2 test for discrete distributions                         361
+#TOC>   4.2        Kullback-Leibler divergence                                  452
+#TOC>   4.2.1          An example from tossing dice                             463
+#TOC>   4.2.2          An example from lognormal distributions                  586
+#TOC>   4.3        Kolmogorov-Smirnov test for continuous distributions         629
+#TOC> 
 #TOC> ==========================================================================


@@ -385,9 +387,8 @@ hist(rG1.5, breaks = myBreaks, col = myCols[4])
 # package information - plotrix has _many_ useful utilities to enhance
 # plots or produce informative visualizations.

-if (! require(plotrix, quietly=TRUE)) {
+if (! requireNamespace("plotrix", quietly = TRUE)) {
  install.packages("plotrix")
-  library(plotrix)
 }
 # Package information:
 #  library(help = plotrix)       # basic information
@@ -395,9 +396,9 @@ if (! require(plotrix, quietly=TRUE)) {
 #  data(package = "plotrix")     # available datasets


-h <- multhist(list(rL1, rL2, rG1.2, rG1.5, rG1.9 ),
-              breaks = myBreaks,
-              col = myCols)
+h <- plotrix::multhist(list(rL1, rL2, rG1.2, rG1.5, rG1.9 ),
+                       breaks = myBreaks,
+                       col = myCols)
 legend("topright",
       legend = c("rL1", "rL2", "rG1.2", "rG1.5", "rG1.9"),
       pch=15,
@@ -459,7 +460,7 @@ chisq.test(countsL1, countsG1.9, simulate.p.value = TRUE, B = 10000)
 # be applied to discrete distributions. But we need to talk a bit about
 # converting counts to p.m.f.'s.

-# ===   4.2.1  An example from tossing dice
+# ===   4.2.1  An example from tossing dice                        

 #  The p.m.f of an honest die is (1:1/6, 2:1/6, 3:1/6, 4:1/6, 5:1/6, 6:1/6). But
 #  there is an issue when we convert sampled counts to frequencies, and estimate
@@ -582,7 +583,7 @@ abline(v = KLdiv(rep(1/6, 6), pmfPC(counts, 1:6)), col="firebrick")
 # somewhat but not drastically atypical.


-# ===   4.2.2  An example from lognormal distributions
+# ===   4.2.2  An example from lognormal distributions             

 # We had compared a set of lognormal and gamma distributions above, now we
 # can use KL-divergence to quantify their similarity:
--- a/RPR-Biostrings.R
+++ b/RPR-Biostrings.R
@@ -3,12 +3,15 @@
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR-Biostrings unit.
 #
-# Version:  1.0
+# Version:  1.1
 #
-# Date:     2017  10  20
+# Date:     2017  10  -  2019  01
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
+#           1.1    Change from require() to requireNamespace(),
+#                      use <package>::<function>() idiom throughout,
+#                      use Biocmanager:: not biocLite()
 #           1.0    2017 Revisions
 #           0.1    First code copied from 2016 material.
 #
@@ -27,20 +30,20 @@

 #TOC> ==========================================================================
 #TOC>
-#TOC>   Section  Title                                     Line
-#TOC> ---------------------------------------------------------
-#TOC>   1        The Biostrings Package                      52
-#TOC>   2        Getting Data into Biostrings Objects        85
-#TOC>   3        Working with Biostrings Objects            106
-#TOC>   3.1      Properties                                 109
-#TOC>   3.2      Subsetting                                 146
-#TOC>   3.3      Operators                                  158
-#TOC>   3.4      Transformations                            165
-#TOC>   4        Getting Data out of Biostrings Objects     172
-#TOC>   5        More                                       181
-#TOC>   5.1      Views                                      183
-#TOC>   5.2      Iranges                                    195
-#TOC>   5.3      StringSets                                 201
+#TOC>   Section  Title                                           Line
+#TOC> ---------------------------------------------------------------
+#TOC>   1        The Biostrings Package                            55
+#TOC>   2        Getting Data into Biostrings Objects              86
+#TOC>   3        Working with Biostrings Objects                  108
+#TOC>   3.1        Properties                                     125
+#TOC>   3.2        Subsetting                                     163
+#TOC>   3.3        Operators                                      175
+#TOC>   3.4        Transformations                                182
+#TOC>   4        Getting Data out of Biostrings Objects           189
+#TOC>   5        More                                             198
+#TOC>   5.1        Views                                          200
+#TOC>   5.2        Iranges                                        214
+#TOC>   5.3        StringSets                                     220
 #TOC>
 #TOC> ==========================================================================

@@ -54,14 +57,12 @@

 # First, we install and load the Biostrings package from bioconductor

-if (! require(Biostrings, quietly=TRUE)) {
-  if (! exists("biocLite")) {
-    source("https://bioconductor.org/biocLite.R")
-  }
-  biocLite("Biostrings")
-  library(Biostrings)
+if (! requireNamespace("BiocManager", quietly = TRUE)) {
+  install.packages("BiocManager")
+}
+if (! requireNamespace("Biostrings", quietly = TRUE)) {
+  BiocManager::install("Biostrings")
 }
-
 # Examine the package information:
 library(help = Biostrings)       # basic information
 browseVignettes("Biostrings")    # available vignettes
@@ -72,72 +73,88 @@ data(package = "Biostrings")     # available datasets
 # of a "class" in R as a special kind of list), that can take on particular
 # flavours for RNA, DNA or amino acid sequence information.

-class(RNAString("AUG"))
-class(DNAString("ATG"))
-class(AAString("M"))
+class(Biostrings::RNAString("AUG"))
+class(Biostrings::DNAString("ATG"))
+class(Biostrings::AAString("M"))

 # An essential property of Biostrings objects is that they only allow letters
 # from the applicable IUPAC alphabet:
-RNAString("AUG")
-DNAString("AUG")  # Error! No "U" in IUPAC DNA codes
+Biostrings::RNAString("AUG")
+Biostrings::DNAString("AUG")  # Error! No "U" in IUPAC DNA codes


 # =    2  Getting Data into Biostrings Objects  ================================


 # Example: read FASTA. Extract sequence. Convert to DNAString object.
-x <- readLines("./data/S288C_YDL056W_MBP1_coding.fsa")
-x <- dbSanitizeSequence(x)
-myDNAseq <- DNAString(x)   # takes the nucleotide sequence and converts into a
-# object of class DNAstring
+rawSeq <- readLines("./data/S288C_YDL056W_MBP1_coding.fsa")
+rawSeq <- dbSanitizeSequence(rawSeq)
+biosDNAseq <- Biostrings::DNAString(rawSeq) # converts the nucleotide sequence
+                                            # into an object of class DNAstring

 # Multi FASTA files can be read directly as a "XStringSet) ...
-(myDNASet <- readDNAStringSet("./data/S288C_YDL056W_MBP1_coding.fsa"))
+rawMFAfile <- "./data/S288C_YDL056W_MBP1_coding.fsa"
+(biosDNASet <- Biostrings::readDNAStringSet(rawMFAfile))

 # ... and if you subset one sequence from the set, you get an XString object
 # back again.
-(Xseq <- myDNASet[[1]])
+(Xseq <- biosDNASet[[1]])

-myDNAseq == Xseq           # the comparison evaluates to TRUE ...
-identical(myDNAseq, Xseq)  # ... and indeed the objects are deemed identical.
+biosDNAseq == Xseq           # the comparison evaluates to TRUE ...
+identical(biosDNAseq, Xseq)  # ... and indeed the objects are deemed identical.



 # =    3  Working with Biostrings Objects  =====================================

+# Biostrings is a highly engineered package that is tightly integrated into
+# the Bioconductor world - unfortunately that brings with it a somewhat
+# undesirable level of computational overhead and dependencies. Using the
+# package as we normally do - i.e. calling required functions with their
+# explicit package prefix is therefore not advisable. There are generics
+# that won't be propery dispatched. If you only need a small number of
+# functions for a very specific context, you will probably get away with
+# Biostrings::<function>() - but even in the demonstration code of this script
+# not everything works out of the box. We'll therefore load the library,
+# but we'll (redundantly) use the prefix anyway so as to emphasize where
+# the functions come from.
+
+library(Biostrings)
+

 # ==   3.1  Properties  ========================================================
-str(myDNAseq)
-length(myDNAseq)  # This gives you the _number of nucleotides_!
-# By comparison ...
-length(x)         # ... is 1: one string only. To get the number of
-# characters in a string, you need nchar().
-nchar(x)          # However ...
-nchar(myDNAseq)   # ... also works.
+str(rawSeq)
+str(biosDNAseq)

-uniqueLetters(myDNAseq)
+length(rawSeq)       # ... is 1: one string only. To get the number of
+                     # characters in a string, you need nchar().
+length(biosDNAseq)   # but the length of a "Bstring" is the number of elements
+nchar(rawSeq)
+nchar(biosDNAseq)    # ... but nchar() works too.
+
+(uL <- Biostrings::uniqueLetters(biosDNAseq))

 # Count frequencies - with strings, you would strsplit() into a character
 # vector and then use table(). biost
-alphabetFrequency(myDNAseq)
+Biostrings::alphabetFrequency(biosDNAseq)

 # letterFrequency() works with a defined alphabet - such as what uniqueLetters()
 # returns.
-letterFrequency(myDNAseq, uniqueLetters(myDNAseq))
+Biostrings::letterFrequency(biosDNAseq, uL)
+sum(Biostrings::letterFrequency(biosDNAseq, c("G", "C"))) /
+  length(biosDNAseq) # GC contents

-sum(letterFrequency(myDNAseq, c("G", "C"))) / length(myDNAseq) # GC contents
+Biostrings::dinucleotideFrequency(biosDNAseq)
+barplot(sort(Biostrings::dinucleotideFrequency(biosDNAseq)), cex.names = 0.5)

-dinucleotideFrequency(myDNAseq)
-barplot(sort(dinucleotideFrequency(myDNAseq)), cex.names = 0.5)
-
-(triNuc <- trinucleotideFrequency(myDNAseq))
+(triNuc <- Biostrings::trinucleotideFrequency(biosDNAseq))
 barplot(sort(triNuc), col="#4499EE33")
 triNuc[triNuc == max(triNuc)]
 triNuc[triNuc == min(triNuc)]
 max(triNuc) / min(triNuc)  # AAA is more than 13 times as frequent as CGT

 # compare to a shuffled sequence:
-(triNuc <- trinucleotideFrequency(sample(myDNAseq)))
+(triNuc <- Biostrings::trinucleotideFrequency(sample(biosDNAseq)))
 barplot(sort(triNuc), col="#EEEE4433", add = TRUE)

 # Interpret this plot.
@@ -146,34 +163,34 @@ barplot(sort(triNuc), col="#EEEE4433", add = TRUE)
 # ==   3.2  Subsetting  ========================================================

 # Subsetting any XString object works as expected:
-myDNAseq[4:15]
+biosDNAseq[4:15]

-# ... well - maybe not expected, because x[4:15] would not work.
+# ... well - maybe not expected, because rawSeq[4:15] would not work.

 # Alternatively to the "[" operator, use the subseq() function - especially for
 # long sequences. This is far more efficient.
-subseq(myDNAseq, start = 1, end = 30)
+Biostrings::subseq(biosDNAseq, start = 1, end = 30)


 # ==   3.3  Operators  =========================================================

 # RNAstring() and DNAstring() objects compare U and T as equals!
-RNAString("AUGUCUAACCAAAUAUACUCAGCGAGAUAU") ==
-  DNAString("ATGTCTAACCAAATATACTCAGCGAGATAT")
+  Biostrings::RNAString("AUGUCUAACCAAAUAUACUCAGCGAGAUAU") ==
+  Biostrings::DNAString("ATGTCTAACCAAATATACTCAGCGAGATAT")


 # ==   3.4  Transformations  ===================================================

-myDNAseq[4:15]
-reverseComplement(myDNAseq[4:15])
-translate(myDNAseq[4:15])
+biosDNAseq[4:15]
+Biostrings::reverseComplement(biosDNAseq[4:15])
+Biostrings::translate(biosDNAseq[4:15])


 # =    4  Getting Data out of Biostrings Objects  ==============================

 # If you need a character object, use toString():

-toString(myDNAseq[4:15])
+Biostrings::toString(biosDNAseq[4:15])

 # save() and load() works like on all other R objects.

@@ -185,7 +202,9 @@ toString(myDNAseq[4:15])
 # Biostring "Views" are objects that store multiple substrings of one
 # Biostring object.

-(myView <- Views(myDNAseq, start = c(1, 19, 37), end = c(15, 30, 45)))
+(myView <- Biostrings::Views(biosDNAseq,
+                             start = c(1, 19, 37),
+                             end = c(15, 30, 45)))

 # Views are convenient to store feature annotations
 names(myView) <- c("Feature-A", "Feature-B", "Feature-C")
@@ -202,20 +221,20 @@ cat(sprintf("\n%s\t(%d)\t%s", names(myView), width(myView), myView ))

 # Biostring "StringSets" store multiple sequences.
 #
-ompA <- AAString("MKKTAIAIAVALAGFATVAQA")
+ompA <- Biostrings::AAString("MKKTAIAIAVALAGFATVAQA")
 sample(ompA) # sample can work directly on a Biostring object to shuffle it

-x[1] <- toString(ompA)
+x <- Biostrings::toString(ompA)
 for (i in 2:10) {
-  x[i] <- toString(sample(ompA))
+  x[i] <- Biostrings::toString(sample(ompA))
 }
-shuffledPeptideSet <- AAStringSet(x)
+shuffledPeptideSet <- Biostrings::AAStringSet(x)
 names(shuffledPeptideSet) <- c("ompA", paste("shuffle.", 1:9, sep=""))
 shuffledPeptideSet

 length(shuffledPeptideSet)
-width(shuffledPeptideSet)
-alphabetFrequency(shuffledPeptideSet)
+Biostrings::width(shuffledPeptideSet)
+Biostrings::alphabetFrequency(shuffledPeptideSet)


 # [END]
--- a/RPR-GEO2R.R
+++ b/RPR-GEO2R.R
@@ -3,12 +3,15 @@
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR_GEO2R unit.
 #
-# Version:  1.1
+# Version:  1.2
 #
-# Date:     2017 09  -  2018 01
+# Date:     2017 09  -  2019 01
 # Author:   Boris Steipe <boris.steipe@utoronto.ca>
 #
 # Versions:
+#           1.2    Change from require() to requireNamespace(),
+#                      use <package>::<function>() idiom throughout,
+#                      use Biocmanager:: not biocLite()
 #           1.1    Add section on GPL annotations
 #           1.0    Updates for BCH441 2017
 #           0.1    First code copied from 2016 material.
@@ -31,22 +34,22 @@


 #TOC> ==========================================================================
-#TOC>
-#TOC>   Section  Title                                                Line
-#TOC> --------------------------------------------------------------------
-#TOC>   1        Preparations                                           53
-#TOC>   2        Loading a GEO Dataset                                  84
-#TOC>   3        Column wise analysis - time points                    154
-#TOC>   3.1      Task - Comparison of experiments                      160
-#TOC>   3.2      Grouped Samples                                       207
-#TOC>   4        Row-wise Analysis: Expression Profiles                242
-#TOC>   4.1      Task - Read a table of features                       277
-#TOC>   4.2      Selected Expression profiles                          325
-#TOC>   5        Differential Expression                               366
-#TOC>   5.1      Final task: Gene descriptions                         490
-#TOC>   6        Improving on Discovery by Differential Expression     495
-#TOC>   7        Annotation data                                       577
-#TOC>
+#TOC> 
+#TOC>   Section  Title                                                      Line
+#TOC> --------------------------------------------------------------------------
+#TOC>   1        Preparations                                                 56
+#TOC>   2        Loading a GEO Dataset                                        82
+#TOC>   3        Column wise analysis - time points                          152
+#TOC>   3.1        Task - Comparison of experiments                          158
+#TOC>   3.2        Grouped Samples                                           205
+#TOC>   4        Row-wise Analysis: Expression Profiles                      240
+#TOC>   4.1        Task - Read a table of features                           275
+#TOC>   4.2        Selected Expression profiles                              323
+#TOC>   5        Differential Expression                                     364
+#TOC>   5.1        Final task: Gene descriptions                             504
+#TOC>   6        Improving on Discovery by Differential Expression           510
+#TOC>   7        Annotation data                                             594
+#TOC> 
 #TOC> ==========================================================================


@@ -55,12 +58,11 @@
 # To load and analyze GEO datasets we use a number of Bioconductor packages:


-if (! require(Biobase, quietly=TRUE)) {
-  if (! exists("biocLite")) {
-    source("https://bioconductor.org/biocLite.R")
-  }
-  biocLite("Biobase")
-  library(Biobase)
+if (! requireNamespace("BiocManager", quietly = TRUE)) {
+  install.packages("BiocManager")
+}
+if (! requireNamespace("Biobase", quietly = TRUE)) {
+  BiocManager::install("Biobase")
 }
 # Package information:
 #  library(help = Biobase)       # basic information
@@ -68,12 +70,8 @@ if (! require(Biobase, quietly=TRUE)) {
 #  data(package = "Biobase")     # available datasets


-if (! require(GEOquery, quietly=TRUE)) {
-  if (! exists("biocLite")) {
-    source("https://bioconductor.org/biocLite.R")
-  }
-  biocLite("GEOquery")
-  library(GEOquery)
+if (! requireNamespace("GEOquery", quietly = TRUE)) {
+  BiocManager::install("GEOquery")
 }
 # Package information:
 #  library(help = GEOquery)       # basic information
@@ -94,7 +92,7 @@ if (! require(GEOquery, quietly=TRUE)) {
 # I have experienced outages over several hours. If the command below does
 # not work for you, skip ahead to the fallback procedure.

-GSE3635 <- getGEO("GSE3635", GSEMatrix =TRUE, getGPL=FALSE)
+GSE3635 <- GEOquery::getGEO("GSE3635", GSEMatrix =TRUE, getGPL=FALSE)
 # Note: GEO2R scripts call the expression data set
 #       "gset" throughout ... in this script I give
 #       it the name "GSE3635" for clarity.
@@ -136,14 +134,14 @@ help("ExpressionSet-class")
 GSE3635

 # Access contents via methods:
-featureNames(GSE3635)[1:20]   # Rows. What are these features?
-sampleNames(GSE3635)[1:10]    # Columns. What are these columns?
+Biobase::featureNames(GSE3635)[1:20]   # Rows. What are these features?
+Biobase::sampleNames(GSE3635)[1:10]    # Columns. What are these columns?

 # Access contents by subsetting:
 ( tmp <- GSE3635[12:17, 1:6] )

 # Access data
-exprs(tmp)   # exprs() gives us the actual expression values.
+Biobase::exprs(tmp)   # exprs() gives us the actual expression values.


 #TASK> What are the data:
@@ -160,9 +158,9 @@ exprs(tmp)   # exprs() gives us the actual expression values.
 # ==   3.1  Task - Comparison of experiments  ==================================

 # Get an overview of the distribution of data values in individual columns
-summary(exprs(GSE3635)[ , 1])
-summary(exprs(GSE3635)[ , 4])
-summary(exprs(GSE3635)[ , 7])
+summary(Biobase::exprs(GSE3635)[ , 1])
+summary(Biobase::exprs(GSE3635)[ , 4])
+summary(Biobase::exprs(GSE3635)[ , 7])

 # as a boxplot
 cyclicPalette <- colorRampPalette(c("#00AAFF",
@@ -173,7 +171,7 @@ cyclicPalette <- colorRampPalette(c("#00AAFF",
                                    "#FFAA00",
                                    "#00AAFF"))
 tCols <- cyclicPalette(13)
-boxplot(exprs(GSE3635), col = tCols)
+boxplot(Biobase::exprs(GSE3635), col = tCols)


 #TASK>     Study this boxplot. What's going on? Are these expression values?
@@ -181,11 +179,11 @@ boxplot(exprs(GSE3635), col = tCols)


 # Lets plot the distributions of values in a more fine-grained manner:
-hT0  <- hist(exprs(GSE3635)[ ,  1], breaks = 100)
-hT3  <- hist(exprs(GSE3635)[ ,  4], breaks = 100)
-hT6  <- hist(exprs(GSE3635)[ ,  7], breaks = 100)
-hT9  <- hist(exprs(GSE3635)[ , 10], breaks = 100)
-hT12 <- hist(exprs(GSE3635)[ , 13], breaks = 100)
+hT0  <- hist(Biobase::exprs(GSE3635)[ ,  1], breaks = 100)
+hT3  <- hist(Biobase::exprs(GSE3635)[ ,  4], breaks = 100)
+hT6  <- hist(Biobase::exprs(GSE3635)[ ,  7], breaks = 100)
+hT9  <- hist(Biobase::exprs(GSE3635)[ , 10], breaks = 100)
+hT12 <- hist(Biobase::exprs(GSE3635)[ , 13], breaks = 100)


 plot(  hT0$mids,  hT0$counts,  type = "l", col =  tCols[1], xlim = c(-0.5, 0.5))
@@ -218,7 +216,7 @@ for (i in 1:nchar(gsms)) {
 sml <- paste("G", sml, sep="")  # set group names

 # order samples by group
-ex <- exprs(GSE3635)[ , order(sml)]
+ex <- Biobase::exprs(GSE3635)[ , order(sml)]
 sml <- sml[order(sml)]
 fl <- as.factor(sml)
 labels <- c("t0","t10","t20","t30","t40","t50") # these are the labels we
@@ -231,8 +229,8 @@ labels <- c("t0","t10","t20","t30","t40","t50") # these are the labels we
 GEOcols <- c("#dfeaf4", "#f4dfdf", "#f2cb98", "#dcdaa5",
             "#dff4e4", "#f4dff4",  "#AABBCC")
 dev.new(width = 4 + dim(GSE3635)[[2]] / 5, height = 6) # plot into a new window
-par(mar = c(2 + round(max(nchar(sampleNames(GSE3635))) / 2), 4, 2, 1))
-title <- paste ("GSE3635", '/', annotation(GSE3635),
+par(mar = c(2 + round(max(nchar(Biobase::sampleNames(GSE3635))) / 2), 4, 2, 1))
+title <- paste ("GSE3635", '/', Biobase::annotation(GSE3635),
                " grouped samples", sep ='')
 boxplot(ex, boxwex = 0.6, notch = TRUE, main = title, outline=FALSE,
        las = 2, col = GEOcols[fl])
@@ -331,7 +329,7 @@ gName <- "MBP1"
 (iFeature <- which(SGD_features$name == gName))
 (iExprs   <- which(featureNames(GSE3635) == SGD_features$sysName[iFeature]))
 plot(seq(0, 120, by = 10),
-     exprs(GSE3635)[iExprs, ],
+     Biobase::exprs(GSE3635)[iExprs, ],
     main = paste("Expression profile for", gName),
     xlab = "time (min)",
     ylab = "expression",
@@ -368,12 +366,8 @@ SGD_features$description[iFeature]
 # GEO2R discovers the top differentially expressed  expressed genes by
 # using functions in the Bioconductor limma package.

-if (! require(limma, quietly=TRUE)) {
-  if (! exists("biocLite")) {
-    source("https://bioconductor.org/biocLite.R")
-  }
-  biocLite("limma")
-  library(limma)
+if (! requireNamespace("limma", quietly = TRUE)) {
+  BiocManager::install("limma")
 }
 # Package information:
 #  library(help = limma)       # basic information
@@ -392,6 +386,20 @@ if (! require(limma, quietly=TRUE)) {
 #     the groups
 # 4. Format results.

+# Biobase is a highly engineered package that is tightly integrated into
+# the Bioconductor world - unfortunately that brings with it a somewhat
+# undesirable level of computational overhead and dependencies. Using the
+# package as we normally do - i.e. calling required functions with their
+# explicit package prefix is therefore not advisable. There are generics
+# that won't be propery dispatched. If you only need a small number of
+# functions for a very specific context, you will probably get away with
+# Biobase::<function>() - but even in the demonstration code of this script
+# not everything works out of the box. We'll therefore load the library,
+# but we'll (redundantly) use the prefix anyway so as to emphasize where
+# the functions come from.
+
+library(Biobase)
+
 # We are recapitulating the experiment in which we assigned the 0, 10, 60 and
 # 70 minute samples to one group, the 30, 40, 90 and 100 minute samples to
 # another group, and calculated differential expression values between these
@@ -415,15 +423,15 @@ myDesign

 # Now we can calculate the fit of all rows to a linear model that depends
 # on the two groups as specified in the design:
-myFit <- lmFit(mySet, myDesign)
+myFit <- limma::lmFit(mySet, myDesign)

 # Next we calculate the contrasts, given the fit ...
-myCont.matrix <- makeContrasts(A - B, levels = myDesign)
-myFit2 <- contrasts.fit(myFit, myCont.matrix)
+myCont.matrix <- limma::makeContrasts(A - B, levels = myDesign)
+myFit2 <- limma::contrasts.fit(myFit, myCont.matrix)

 # ... compute appropriate probabilites from a modified t-test
 #     (empirical Bayes) ...
-myFit2 <- eBayes(myFit2, 0.01)
+myFit2 <- limma::eBayes(myFit2, 0.01)

 # ... add the gene names to the fit - object ...
 myFit2$genes <- featureNames(mySet)
@@ -433,7 +441,10 @@ myFit2$genes <- featureNames(mySet)
 #     gave us only the top 250 genes, but we might as well do 1000, just so we
 #     can be reasonable sure that our gens of interest are included.
 N <- 1000
-myTable <- topTable(myFit2, adjust.method = "fdr", sort.by = "B", number = N)
+myTable <- limma::topTable(myFit2,
+                           adjust.method = "fdr",
+                           sort.by = "B",
+                           number = N)

 str(myTable)
 # The gene names are now in the $ID column
@@ -461,7 +472,7 @@ abline(h = 0, col = "#00000055")

 for (i in 1:10) {
  thisID <- myTable$ID[i]
-  points(seq(0, 120, by = 10), exprs(GSE3635)[thisID, ], type = "b")
+  points(seq(0, 120, by = 10), Biobase::exprs(GSE3635)[thisID, ], type = "b")
 }

 # Our guess that we might discover interesting genes be selecting groups A and B
@@ -480,7 +491,10 @@ for (i in 1:10) {
 myControls <- c("Cdc14", "Mbp1", "Swi6", "Swi4", "Whi5", "Cln1", "Cln2", "Cln3")
 for (name in toupper(myControls)) {
  thisID <- SGD_features$sysName[which(SGD_features$name == name)]
-  points(seq(0, 120, by=10), exprs(GSE3635)[thisID, ], type="b", col="#AA0000")
+  points(seq(0, 120, by=10),
+         Biobase::exprs(GSE3635)[thisID, ],
+         type="b",
+         col="#AA0000")
 }

 # Indeed, the discovered gene profiles look much "cleaner" than the real cycle
@@ -504,7 +518,7 @@ for (name in toupper(myControls)) {
 gName <- "CLN2"
 (iFeature <- which(SGD_features$name == gName))
 (iExprs   <- which(featureNames(GSE3635) == SGD_features$sysName[iFeature]))
-Cln2Profile <- exprs(GSE3635)[iExprs, ]
+Cln2Profile <- Biobase::exprs(GSE3635)[iExprs, ]
 plot(seq(0, 120, by = 10),
     Cln2Profile,
     ylim = c(-1, 1),
@@ -519,16 +533,16 @@ abline(v = 60, col = "#00000055")
 # Set up a vector of correlation values


-myCorrelations <- numeric(nrow(exprs(GSE3635)))
-names(myCorrelations) <- featureNames(GSE3635)
+myCorrelations <- numeric(nrow(Biobase::exprs(GSE3635)))
+names(myCorrelations) <- Biobase::featureNames(GSE3635)
 for (i in 1:length(myCorrelations)) {
-  myCorrelations[i] <- cor(Cln2Profile, exprs(GSE3635)[i, ])
+  myCorrelations[i] <- cor(Cln2Profile, Biobase::exprs(GSE3635)[i, ])
 }

 myTopC <- order(myCorrelations, decreasing = TRUE)[1:10]  # top ten

 # Number 1
-(ID <- featureNames(GSE3635)[myTopC[1]])
+(ID <- Biobase::featureNames(GSE3635)[myTopC[1]])

 # Get information
 SGD_features[which(SGD_features$sysName == ID), ]
@@ -537,12 +551,13 @@ SGD_features[which(SGD_features$sysName == ID), ]

 # Let's plot the rest
 for (i in 2:length(myTopC)) {
-  ID <- featureNames(GSE3635)[myTopC[i]]
+  ID <- Biobase::featureNames(GSE3635)[myTopC[i]]
  points(seq(0, 120, by = 10),
-       exprs(GSE3635)[ID, ],
+         Biobase::exprs(GSE3635)[ID, ],
       type = "b",
       col= "chartreuse")
-  print(SGD_features[which(SGD_features$sysName == ID), c("name", "description")])
+  print(SGD_features[which(SGD_features$sysName == ID),
+                     c("name", "description")])
 }

 # Note that all of these genes are highly correlated with a known cell cycle
@@ -554,12 +569,13 @@ for (i in 2:length(myTopC)) {
 # And we haven't even looked at the anticorrelated genes yet...
 myBottomC <- order(myCorrelations, decreasing = FALSE)[1:10]  # bottom ten
 for (i in 1:length(myBottomC)) {
-  ID <- featureNames(GSE3635)[myBottomC[i]]
+  ID <- Biobase::featureNames(GSE3635)[myBottomC[i]]
  points(seq(0, 120, by = 10),
-         exprs(GSE3635)[ID, ],
+         Biobase::exprs(GSE3635)[ID, ],
         type = "b",
         col= "coral")
-  print(SGD_features[which(SGD_features$sysName == ID), c("name", "description")])
+  print(SGD_features[which(SGD_features$sysName == ID),
+                     c("name", "description")])
 }
 # ... which are very interesting in their own right.

@@ -583,7 +599,7 @@ for (i in 1:length(myBottomC)) {
 # we used getGEO("GSE3635", GSEMatrix = TRUE, getGPL = FALSE), and the GPL
 # annotations were not loaded. We could use getGPL = TRUE instead ...

-GSE3635annot <- getGEO("GSE3635", GSEMatrix = TRUE, getGPL = TRUE)
+GSE3635annot <- GEOquery::getGEO("GSE3635", GSEMatrix = TRUE, getGPL = TRUE)
 GSE3635annot <- GSE3635annot[[1]]

 # ... and the feature data is then available in the GSE3635@featureData@data
@@ -597,13 +613,8 @@ GSE3635annot@featureData@data[ 1:20 , ]
 myAnnot <- GSE3635annot@featureData@data[ , c("SPOT_ID", "Gene")]
 str(myAnnot)

-# ... Note that this is a data frame, but - oy veh - the gene symbols are
-#     factors. Really, we need to fix this! To convert a factor into a string,
-#     we need to cast it to character.
-
-myAnnot$Gene <- as.character(myAnnot$Gene)
-
-# ... whereupon we can find things we might be looking for ...
+# ... Note that this is a data frame and it is easy to find things we
+#  might be looking for ...
 myAnnot[which(myAnnot$Gene == "MBP1"), ]

 # ... or identify rows that might give us trouble, such as probes that
@@ -614,13 +625,11 @@ myAnnot[which(myAnnot$Gene == "MBP1"), ]
 GSE3635@annotation   # "GPL1914"

 # ... and downloaded it directly from NCBI:
-GPL1914 <- getGEO("GPL1914")
+GPL1914 <- GEOquery::getGEO("GPL1914")
 str(GPL1914)

 # ... from which we can get the data - which is however NOT necessarily
-# matched to the rows of our expression dataset. Note that here too: the
-# majority of data elements are factors and will likely have to be converted
-# before use.
+# matched to the rows of our expression dataset.


 # [END]
--- a/RPR-Genetic_code_optimality.R
+++ b/RPR-Genetic_code_optimality.R
@@ -3,12 +3,15 @@
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR-Genetic_code_optimality unit.
 #
-# Version:  1.1
+# Version:  1.2
 #
 # Date:     2017  10  -  2019  01
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
+#           1.2    Change from require() to requireNamespace(),
+#                      use <package>::<function>() idiom throughout,
+#                      use Biocmanager:: not biocLite()
 #           1.1      Update set.seed() usage
 #           1.0.1    Fixed two bugs discovered by Suan Chin Yeo.
 #           1.0      New material.
@@ -30,16 +33,16 @@
 #TOC> 
 #TOC>   Section  Title                                          Line
 #TOC> --------------------------------------------------------------
-#TOC>   1        Designing a computational experiment             54
-#TOC>   2        Setting up the tools                             70
-#TOC>   2.1        Natural and alternative genetic codes          73
-#TOC>   2.2        Effect of mutations                           132
-#TOC>   2.2.1          reverse-translate                         143
-#TOC>   2.2.2          Randomly mutate                           168
-#TOC>   2.2.3          Forward- translate                        193
-#TOC>   2.2.4          measure effect                            211
-#TOC>   3        Run the experiment                              258
-#TOC>   4        Task solutions                                  351
+#TOC>   1        Designing a computational experiment             57
+#TOC>   2        Setting up the tools                             73
+#TOC>   2.1        Natural and alternative genetic codes          76
+#TOC>   2.2        Effect of mutations                           134
+#TOC>   2.2.1          reverse-translate                         145
+#TOC>   2.2.2          Randomly mutate                           170
+#TOC>   2.2.3          Forward- translate                        195
+#TOC>   2.2.4          measure effect                            213
+#TOC>   3        Run the experiment                              260
+#TOC>   4        Task solutions                                  356
 #TOC> 
 #TOC> ==========================================================================

@@ -73,12 +76,11 @@
 # ==   2.1  Natural and alternative genetic codes  =============================

 # Load genetic code tables from the Biostrings package
-if (! require(Biostrings, quietly=TRUE)) {
-  if (! exists("biocLite")) {
-    source("https://bioconductor.org/biocLite.R")
-  }
-  biocLite("Biostrings")
-  library(Biostrings)
+if (! requireNamespace("BiocManager", quietly = TRUE)) {
+  install.packages("BiocManager")
+}
+if (! requireNamespace("Biostrings", quietly = TRUE)) {
+  BiocManager::install("Biostrings")
 }
 # Package information:
 #  library(help = Biostrings)       # basic information
@@ -257,52 +259,55 @@ evalMut <- function(nat, mut) {

 # =    3  Run the experiment  ==================================================

+# Fetch the standard Genetic code from Biostrings::
+
+stdCode <- Biostrings::GENETIC_CODE

 # Fetch the nucleotide sequence for MBP1:

 myDNA <- readLines("./data/S288C_YDL056W_MBP1_coding.fsa")[-1]
 myDNA <- paste0(myDNA, collapse = "")
-myDNA <- as.character(codons(DNAString(myDNA)))
+myDNA <- as.character(Biostrings::codons(Biostrings::DNAString(myDNA)))
 myDNA <- myDNA[-length(myDNA)]  # drop the stop codon

-myAA <- traFor(myDNA, GENETIC_CODE)
+myAA <- traFor(myDNA, stdCode)

 # Mutate and evaluate
 set.seed(112358)
 x <- randMut(myDNA)
 set.seed(NULL)
-x <- traFor(x, GENETIC_CODE)
+x <- traFor(x, stdCode)
 evalMut(myAA, x)  # 166.4

 # Try this 200 times, and see how the values are distributed.
 N <- 200
-valUGC <- numeric(N)
+valSTDC <- numeric(N)

 set.seed(112358)                   # set RNG seed for repeatable randomness
 for (i in 1:N) {
  x <- randMut(myDNA)              # mutate
-  x <- traFor(x, GENETIC_CODE)     # translate
-  valUGC[i] <- evalMut(myAA, x)    # evaluate
+  x <- traFor(x, stdCode)     # translate
+  valSTDC[i] <- evalMut(myAA, x)    # evaluate
 }
 set.seed(NULL)                     # reset the RNG

-hist(valUGC,
+hist(valSTDC,
     breaks = 15,
     col = "palegoldenrod",
     xlim = c(0, 400),
     ylim = c(0, N/4),
-     main = "Universal vs. Synthetic Genetic Code",
+     main = "Standard vs. Synthetic Genetic Code",
     xlab = "Mutation penalty")

 # This looks like a normal distribution. Let's assume the effect of mutations
-# under the universal genetic code is the mean of this distribution:
-effectUGC <- mean(valUGC)  # 178.1
+# under the standard genetic code is the mean of this distribution:
+effectSTDC <- mean(valSTDC)  # 178.1

 # Now we can look at the effects of alternate genetic codes:

 set.seed(112358)
 # choose a new code
-GC <- randomGC(GENETIC_CODE)
+GC <- randomGC(stdCode)
 set.seed(NULL)

 # reverse translate hypothetical sequence according to the new code
@@ -321,7 +326,7 @@ valXGC <- numeric(N)

 set.seed(1414214)                # set RNG seed for repeatable randomness
 for (i in 1:N) {
-  GC <- randomGC(GENETIC_CODE)   # Choose code
+  GC <- randomGC(stdCode)   # Choose code
  x <- traRev(myAA, GC)          # reverse translate
  x <- randMut(x)                # mutate
  x <- traFor(x, GC)             # translate
@@ -355,7 +360,7 @@ valSGC <- numeric(N)

 set.seed(2718282)                # set RNG seed for repeatable randomness
 for (i in 1:N) {
-  GC <- swappedGC(GENETIC_CODE)  # Choose code
+  GC <- swappedGC(stdCode)  # Choose code
  x <- traRev(myAA, GC)          # reverse translate
  x <- randMut(x)                # mutate
  x <- traFor(x, GC)             # translate
--- a/RPR-PROSITE_POST.R
+++ b/RPR-PROSITE_POST.R
@@ -3,12 +3,14 @@
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR-Scripting_data_downloads unit.
 #
-# Version:  1.0.1
+# Version:  1.1
 #
-# Date:     2017  10  -  2018  12
+# Date:     2017  10  -  2019  01
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
+#           1.1    Change from require() to requireNamespace(),
+#                      use <package>::<function>() idiom throughout,
 #           1.0.1  Updates for slightly changed interfaces
 #           1.0    First ABC units version
 #           0.1    First code copied from 2016 material.
@@ -27,22 +29,21 @@


 #TOC> ==========================================================================
-#TOC>
-#TOC>   Section  Title                                           Line
-#TOC> ---------------------------------------------------------------
-#TOC>   1        Constructing a POST command from a Web query      44
-#TOC>   1.1      Task - fetchPrositeFeatures() function           145
-#TOC>   2        Task solutions                                   153
-#TOC>
+#TOC> 
+#TOC>   Section  Title                                                 Line
+#TOC> ---------------------------------------------------------------------
+#TOC>   1        Constructing a POST command from a Web query            42
+#TOC>   1.1        Task - fetchPrositeFeatures() function               142
+#TOC>   2        Task solutions                                         150
+#TOC> 
 #TOC> ==========================================================================


 # =    1  Constructing a POST command from a Web query  ========================


-if (! require(httr, quietly=TRUE)) {
+if (! requireNamespace("httr", quietly = TRUE)) {
  install.packages("httr")
-  library(httr)
 }
 # Package information:
 #  library(help = httr)       # basic information
@@ -60,24 +61,24 @@ UniProtID <- "P39678"

 URL <- "https://prosite.expasy.org/cgi-bin/prosite/PSScan.cgi"

-response <- POST(URL,
-                 body = list(meta = "opt1",
-                             meta1_protein = "opt1",
-                             seq = UniProtID,
-                             skip = "on",
-                             output = "tabular"))
+response <- httr::POST(URL,
+                       body = list(meta = "opt1",
+                                   meta1_protein = "opt1",
+                                   seq = UniProtID,
+                                   skip = "on",
+                                   output = "tabular"))

 # Send off this request, and you should have a response in a few
 # seconds. Let's check the status first:

-status_code(response)  # If this is not 200, something went wrong and it
-                       # makes no sense to continue. If this persists, ask
-                       # on the mailing list what to do.
+httr::status_code(response)  # If this is not 200, something went wrong and it
+                             # makes no sense to continue. If this persists, ask
+                             # on the mailing list what to do.


 # The text contents of the response is available with the
 # content() function:
-content(response, "text")
+httr::content(response, "text")

 # ... should show you the same as the page contents that
 # you have seen in the browser. The date we need Now we need to extract
@@ -86,7 +87,7 @@ content(response, "text")
 # individual lines, since each of our data elements is on
 # its own line. We simply split on the "\\n" newline character.

-lines <- unlist(strsplit(content(response, "text"), "\\n"))
+lines <- unlist(strsplit(httr::content(response, "text"), "\\n"))
 head(lines)

 # Now we define a query pattern for the lines we want:
--- a/RPR-SX-PDB.R
+++ b/RPR-SX-PDB.R
@@ -3,12 +3,14 @@
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR-SX-PDB unit.
 #
-# Version:  1.0
+# Version:  1.1
 #
-# Date:     2017  10  19
+# Date:     2017  10  -  2019  01
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
+#           1.1    Change from require() to requireNamespace(),
+#                      use <package>::<function>() idiom throughout
 #           1.0    First live version, completely refactores 2016 code
 #                     with remarkable speed gains. Added section on x, y, z
 #                     (density) plots.
@@ -28,22 +30,22 @@


 #TOC> ==========================================================================
-#TOC>
-#TOC>   Section  Title                                Line
-#TOC> ----------------------------------------------------
-#TOC>   1        Introduction to the bio3D package      63
-#TOC>   2        A Ramachandran plot                   151
-#TOC>   3        Density plots                         227
-#TOC>   3.1      Density-based colours                 241
-#TOC>   3.2      Plotting with smoothScatter()         260
-#TOC>   3.3      Plotting hexbins                      275
-#TOC>   3.4      Plotting density contours             299
-#TOC>   3.4.1    ... as overlay on a colored grid      333
-#TOC>   3.4.2    ... as filled countour                350
-#TOC>   3.4.3    ... as a perspective plot             381
-#TOC>   4        cis-peptide bonds                     399
-#TOC>   5        H-bond lengths                        414
-#TOC>
+#TOC> 
+#TOC>   Section  Title                                      Line
+#TOC> ----------------------------------------------------------
+#TOC>   1        Introduction to the bio3D package            61
+#TOC>   2        A Ramachandran plot                         152
+#TOC>   3        Density plots                               228
+#TOC>   3.1        Density-based colours                     242
+#TOC>   3.2        Plotting with smoothScatter()             261
+#TOC>   3.3        Plotting hexbins                          276
+#TOC>   3.4        Plotting density contours                 304
+#TOC>   3.4.1          ... as overlay on a colored grid      337
+#TOC>   3.4.2          ... as filled countour                354
+#TOC>   3.4.3          ... as a perspective plot             385
+#TOC>   4        cis-peptide bonds                           403
+#TOC>   5        H-bond lengths                              418
+#TOC> 
 #TOC> ==========================================================================


@@ -59,9 +61,8 @@
 # =    1  Introduction to the bio3D package  ===================================


-if (! require(bio3d, quietly=TRUE)) {
+if (! requireNamespace("bio3d", quietly = TRUE)) {
  install.packages("bio3d")
-  library(bio3d)
 }
 # Package information:
 #  library(help = bio3d)       # basic information
@@ -89,8 +90,8 @@ file.show("./data/1BM8.pdb")
 #          Are all atoms of the N-terminal residue present?
 #          Are all atoms of the C-terminal residue present?

-apses <- read.pdb("1bm8")  # load a molecule directly from the PDB via the
-                           # Internet. (This is not your local version.)
+apses <- bio3d::read.pdb("1bm8")  # load a molecule directly from the PDB via
+                                  # the Internet.

 # check what we have:
 apses
@@ -121,10 +122,11 @@ apses$atom[apses$atom[,"resno"] == i, ]
 apses$seqres[1:10]  # the "A"s here identify chain "A"

 # Can we convert this to one letter code?
-aa321(apses$seqres[1:10])
+bio3d::aa321(apses$seqres[1:10])

 # Lets get the implicit sequence:
-aa321((apses$atom$resid[apses$calpha])[1:10])  # Do you understand this code?
+bio3d::aa321((apses$atom$resid[apses$calpha])[1:10])
+# Do you understand this code?

 # Do explicit and implicit sequence have the same length?
 length(apses$seqres)
@@ -140,7 +142,10 @@ apses$atom[sel, c("eleno", "elety", "resid", "chain", "resno", "insert")]
 # The introduction to bio3d tutorial at
 #   http://thegrantlab.org/bio3d/tutorials/structure-analysis
 # has the following example:
-plot.bio3d(apses$atom$b[apses$calpha], sse=apses, typ="l", ylab="B-factor")
+bio3d::plot.bio3d(apses$atom$b[apses$calpha],
+                  sse=apses,
+                  typ="l",
+                  ylab="B-factor")

 # Good for now. Let's do some real work.

@@ -149,7 +154,7 @@ plot.bio3d(apses$atom$b[apses$calpha], sse=apses, typ="l", ylab="B-factor")
 # Calculate a Ramachandran plot for the structure. The torsion.pdb() function
 # calculates all dihedral angles for backbone and sidechain bonds, NA where
 # the bond does not exist in an amino acid.
-tor <- torsion.pdb(apses)
+tor <- bio3d::torsion.pdb(apses)
 plot(tor$phi, tor$psi,
     xlim = c(-180, 180), ylim = c(-180, 180),
     main = "Ramachandran plot for 1BM8",
@@ -164,7 +169,7 @@ abline(v = 0, lwd = 0.5, col = "#00000044")
 # color the points for glycine residues differently. First, we
 # get a vector of glycine residue indices in the structure:

-mySeq <- pdbseq(apses)
+mySeq <- bio3d::pdbseq(apses)

 # Explore the result object and extract the indices of GLY resiues.
              mySeq == "G"
@@ -210,7 +215,7 @@ for (i in 1:nrow(dat)) {
  points(dat$phi[i], dat$psi[i], pch=21, cex=0.9, bg="#CC0000")
  text(dat$phi[i],
       dat$psi[i],
-       labels = sprintf("%s%d", aa321(dat$resid[i]), dat$resno[i]),
+       labels = sprintf("%s%d", bio3d::aa321(dat$resid[i]), dat$resno[i]),
       pos = 4,
       offset = 0.4,
       cex = 0.7)
@@ -272,9 +277,8 @@ abline(v = 0, lwd = 0.5, col = "#00000044")

 # If we wish to approximate values in a histogram-like fashion, we can use
 # hexbin()
-if (! require(hexbin, quietly=TRUE)) {
+if (! requireNamespace("hexbin", quietly = TRUE)) {
  install.packages("hexbin")
-  library(hexbin)
 }
 # Package information:
 #  library(help = hexbin)       # basic information
@@ -285,11 +289,16 @@ if (! require(hexbin, quietly=TRUE)) {
 myColorRamp <- colorRampPalette(c("#EEEEEE",
                                  "#3399CC",
                                  "#2266DD"))
-plot(hexbin(phi, psi, xbins = 10),
-     colramp = myColorRamp,
-     main = "phi-psi Density Bins for 1BM8",
-     xlab = expression(phi),
-     ylab = expression(psi))
+hexbin::gplot.hexbin(hexbin::hexbin(phi, psi, xbins = 10),
+                     colramp = myColorRamp,
+                     main = "phi-psi Density Bins for 1BM8",
+                     xlab = expression(phi),
+                     ylab = expression(psi))
+
+# Note:
+# Had we loaded hexbin with library(hexbin), the plot function would have
+# been dispatched by the plot() generic, and we could simply have written:
+#   plot(hexbin(phi, psi, xbins = 10), ...


 # ==   3.4  Plotting density contours  =========================================
@@ -305,19 +314,18 @@ plot(hexbin(phi, psi, xbins = 10),
 # distributions. But for 2D data like or phi-psi plots, we need a function from
 # the MASS package: kde2d()

-if (! require(MASS, quietly=TRUE)) {
+if (! requireNamespace("MASS", quietly = TRUE)) {
  install.packages("MASS")
-  library(MASS)
 }
 # Package information:
 #  library(help = MASS)       # basic information
 #  browseVignettes("MASS")    # available vignettes
 #  data(package = "MASS")     # available datasets

-?kde2d
-dPhiPsi <-kde2d(phi, psi,
-               n = 60,
-               lims = c(-180, 180, -180, 180))
+?MASS::kde2d
+dPhiPsi <-MASS::kde2d(phi, psi,
+                      n = 60,
+                      lims = c(-180, 180, -180, 180))

 str(dPhiPsi)
 # This is a list, with gridpoints in x and y, and the estimated densities in z.
@@ -326,7 +334,7 @@ str(dPhiPsi)
 contour(dPhiPsi)


-# ===  3.4.1  ... as overlay on a colored grid
+# ===   3.4.1  ... as overlay on a colored grid 

 image(dPhiPsi,
      col = myColorRamp(100),
@@ -343,7 +351,7 @@ abline(h = 0, lwd = 0.5, col = "#00000044")
 abline(v = 0, lwd = 0.5, col = "#00000044")


-# ===  3.4.2  ... as filled countour
+# ===   3.4.2  ... as filled countour           

 filled.contour(dPhiPsi,
               xlim = c(-180, 180), ylim = c(-180, 180),
@@ -374,7 +382,7 @@ filled.contour(dPhiPsi,
                    abline(v = 0, lwd = 0.5, col = "#00000044")
                  })

-# ===  3.4.3  ... as a perspective plot
+# ===   3.4.3  ... as a perspective plot        

 persp(dPhiPsi,
      xlab = "phi",
@@ -469,12 +477,12 @@ ssSelect <- function(PDB, myChain = "A", ssType, myElety) {

  # get id's from PDB

-  x <- atom.select(PDB,
-                   string = "protein",
-                   type = "ATOM",
-                   chain = myChain,
-                   resno = myResno,
-                   elety = myElety)
+  x <- bio3d::atom.select(PDB,
+                          string = "protein",
+                          type = "ATOM",
+                          chain = myChain,
+                          resno = myResno,
+                          elety = myElety)

  return(x$atom)
 }
@@ -506,7 +514,7 @@ pairDist <- function(PDB, a, b) {

  A <- PDB$atom[a, c("x", "y", "z")]
  B <- PDB$atom[b, c("x", "y", "z")]
-  dMat <- dist.xyz(A, B)
+  dMat <- bio3d::dist.xyz(A, B)

  }
  return(dMat)
--- a/RPR-UniProt_GET.R
+++ b/RPR-UniProt_GET.R
@@ -3,12 +3,14 @@
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR-Scripting_data_downloads unit.
 #
-# Version:  1.0
+# Version:  1.1
 #
-# Date:     2017  10  05
+# Date:     2017  10  -  2019  01
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
+#           1.1    Change from require() to requireNamespace(),
+#                      use <package>::<function>() idiom throughout
 #           1.0    First ABC units version
 #           0.1    First code copied from 2016 material.
 #
@@ -27,11 +29,11 @@

 #TOC> ==========================================================================
 #TOC> 
-#TOC>   Section  Title                                Line
-#TOC> ----------------------------------------------------
-#TOC>   1        UniProt files via GET                  44
-#TOC>   1.1      Task - fetchUniProtSeq() function     107
-#TOC>   2        Task solutions                        114
+#TOC>   Section  Title                                      Line
+#TOC> ----------------------------------------------------------
+#TOC>   1        UniProt files via GET                        41
+#TOC>   1.1        Task - fetchUniProtSeq() function         103
+#TOC>   2        Task solutions                              110
 #TOC> 
 #TOC> ==========================================================================

@@ -48,9 +50,8 @@
 # a Web browser. Since this is a short and simple request, the GET verb is the
 # right tool:

-if (! require(httr, quietly=TRUE)) {
+if (! requireNamespace("httr", quietly = TRUE)) {
  install.packages("httr")
-  library(httr)
 }
 # Package information:
 #  library(help = httr)       # basic information
@@ -69,7 +70,7 @@ UniProtID <- "P39678"
 (URL <- sprintf("http://www.uniprot.org/uniprot/%s.fasta", UniProtID))

 # the GET() function from httr will get the data.
-response <- GET(URL)
+response <- httr::GET(URL)

 str(response) # the response object is a bit complex ...
 as.character(response) # ... but it is easy to pull out the data.
@@ -82,21 +83,21 @@ dbSanitizeSequence(x)
 # Simple.
 # But what happens if there is an error, e.g. the uniprot ID does not exist?

-response <- GET("http://www.uniprot.org/uniprot/X000000.fasta")
+response <- httr::GET("http://www.uniprot.org/uniprot/X000000.fasta")
 as.character(response)
 # this is a large HTML page that tells us the URL was not found. So we need to
-# check for errors.  The Right way to do this is to evaluate the staus code that
+# check for errors.  The Right Way to do this is to evaluate the staus code that
 # every Web server returns for every transaction.
 #
-status_code(response)  # 404 == Page Not Found
+httr::status_code(response)  # 404 == Page Not Found

 # There are many possible codes, but the only code we will be happy with
 # is 200 - oK.
 # (cf. https://en.wikipedia.org/wiki/List_of_HTTP_status_codes )

 URL <- sprintf("http://www.uniprot.org/uniprot/%s.fasta", UniProtID)
-response <- GET(URL)
-status_code(response)
+response <- httr::GET(URL)
+httr::status_code(response)


 # ==   1.1  Task - fetchUniProtSeq() function  =================================
--- a/RPR-Unit_testing.R
+++ b/RPR-Unit_testing.R
@@ -3,12 +3,13 @@
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR-Unit_testing unit.
 #
-# Version:  1.0
+# Version:  1.1
 #
-# Date:     2017  10  15
+# Date:     2017  10  -  2019  01
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
+#           1.1    Change from require() to requireNamespace()
 #           1.0    New code
 #
 #
@@ -25,13 +26,14 @@


 #TOC> ==========================================================================
-#TOC>
-#TOC>   Section  Title                       Line
-#TOC> -------------------------------------------
-#TOC>   1        Unit Tests with testthat      43
-#TOC>   2        Organizing your tests        156
-#TOC>   3        Task solutions               181
-#TOC>
+#TOC> 
+#TOC>   Section  Title                             Line
+#TOC> -------------------------------------------------
+#TOC>   1        Unit Tests with testthat            40
+#TOC>   2        Organizing your tests              159
+#TOC>   2.1        Testing scripts                  183
+#TOC>   3        Task solutions                     198
+#TOC> 
 #TOC> ==========================================================================


@@ -39,15 +41,22 @@

 # The testthat package supports writing and executing unit tests in many ways.

-if (! require(testthat, quietly=TRUE)) {
+if (! requireNamespace("testthat", quietly = TRUE)) {
  install.packages("testthat")
-  library(testthat)
 }
 # Package information:
 #  library(help = testthat)       # basic information
 #  browseVignettes("testthat")    # available vignettes
 #  data(package = "testthat")     # available datasets

+# testthat is one of those packages that we either use A LOT in a script,
+# or not at all. Therfore it's more reasonable to depart from our usual
+# <package>::<function>() idiom, and load the entire library. In fact, if
+# we author packages, it is common practice to load testthat in the part
+# of the package that automates testing.
+
+library(testthat)
+
 # An atomic test consists of an expectation about the bahaviour of a function or
 # the existence of an object. testthat provides a number of useful expectations:

@@ -171,6 +180,20 @@ test_file("./tests/test_biCode.R")
 # .. or execute all the test files in the directory:
 test_dir("./tests")

+# ==   2.1  Testing scripts  ===================================================
+
+# Scripts need special consideration since we do not necessarily source() them
+# entirely. Therefore automated testing is not reasonable. What you can do
+# instead is to place a conditional block at the end of your script, that
+# never gets executed - then you can manually execute the code in the block
+# whenever you wish to test your functions. For example:
+
+if (FALSE) {
+  # ... your tests go here
+
+}
+
+

 # =    3  Task solutions  ======================================================

--- a/RPR-eUtils_XML.R
+++ b/RPR-eUtils_XML.R
@@ -3,12 +3,14 @@
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the RPR-Scripting_data_downloads unit.
 #
-# Version:  1.0
+# Version:  1.1
 #
 # Date:     2017  10  05
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
+#           1.1    Change from require() to requireNamespace(),
+#                      use <package>::<function>() idiom throughout
 #           1.0    First ABC units version
 #           0.1    First code copied from 2016 material.
 #
@@ -27,11 +29,11 @@

 #TOC> ==========================================================================
 #TOC> 
-#TOC>   Section  Title                                 Line
-#TOC> -----------------------------------------------------
-#TOC>   1        Working with NCBI eUtils                44
-#TOC>   1.1      Task - fetchNCBItaxData() function     162
-#TOC>   2        Task solutions                         169
+#TOC>   Section  Title                                       Line
+#TOC> -----------------------------------------------------------
+#TOC>   1        Working with NCBI eUtils                      41
+#TOC>   1.1        Task - fetchNCBItaxData() function         144
+#TOC>   2        Task solutions                               151
 #TOC> 
 #TOC> ==========================================================================

@@ -40,26 +42,11 @@



-# To begin, we load some libraries with functions
-# we need...
-
-# ... the package httr, which sends and receives information via the http
-# protocol, just like a Web browser.
-if (! require(httr, quietly=TRUE)) {
-  install.packages("httr")
-  library(httr)
-}
-# Package information:
-#  library(help = httr)       # basic information
-#  browseVignettes("httr")    # available vignettes
-#  data(package = "httr")     # available datasets
-
-
-# ...plus the package xml2: NCBI's eUtils send information in XML format so we
-# need to be able to parse XML.
-if (! require(xml2, quietly=TRUE)) {
+# To begin, we load the xml2 package that contains functions
+# we need to receive and parse html data. NCBI's eUtils send information in
+# XML format so we need to be able to parse XML.
+if (! requireNamespace("xml2", quietly=TRUE)) {
  install.packages("xml2")
-  library(xml2)
 }
 # Package information:
 #  library(help = xml2)       # basic information
@@ -91,24 +78,23 @@ URL <- paste(eUtilsBase,
 # what the response should look like.
 URL

-# To fetch a response in R, we use the function GET() from the httr package
+# To fetch a response in R, we use the function read_xml()
 # with our URL as its argument.
-myXML <- read_xml(URL)
-myXML
+(myXML <- xml2::read_xml(URL))

 # This is XML. We can take the response apart into
 # its indvidual components with the as_list() function.

-as_list(myXML)
+xml2::as_list(myXML)

 # Note how the XML "tree" is represented as a list of
 # lists of lists ...
 # If we know exactly what elelement we are looking for,
 # we can extract it from this structure:
-as_list(myXML)[["IdList"]][["Id"]][[1]]
+xml2::as_list(myXML)[["eSearchResult"]][["IdList"]][["Id"]][[1]]

 # But this is not very robust, it would break with the
-# slightest change that the NCBI makes to their response
+# slightest change that the NCBI makes to their data format -
 # and the NCBI changes things A LOT!

 # Somewhat more robust is to specify the type of element
@@ -116,11 +102,12 @@ as_list(myXML)[["IdList"]][["Id"]][[1]]
 # element, and use the XPath XML parsing language to
 # retrieve it.

-xml_find_all(myXML, "//Id") # returns a "node set"
+xml2::xml_find_all(myXML, "//Id") # returns a "node set"

-xml_text(xml_find_all(myXML, "//Id")) # returns the contents of the node set
+xml2::xml_text(xml2::xml_find_all(myXML, "//Id")) # returns the contents
+                                                  # of the node set

-# We will need doing this a lot, so we write a function
+# We will need to do this more than once, so we write a function
 # for it...
 node2text <- function(doc, tag) {
  # an extractor function for the contents of elements
@@ -128,8 +115,8 @@ node2text <- function(doc, tag) {
  # Contents of all matching elements is returned in
  # a vector of strings.
  path <- paste0("//", tag)
-  nodes <- xml_find_all(doc, path)
-  return(xml_text(nodes))
+  nodes <- xml2::xml_find_all(doc, path)
+  return(xml2::xml_text(nodes))
 }

 # using node2text() ...
@@ -145,7 +132,7 @@ URL <- paste0(eUtilsBase,
              "&id=",
              GID,
              "&version=2.0")
-(myXML <- read_xml(URL))
+(myXML <- xml2::read_xml(URL))

 (taxID <- node2text(myXML, "TaxId"))
 (organism <- node2text(myXML, "Organism"))
--- a/scriptTemplate.R
+++ b/scriptTemplate.R
@@ -22,17 +22,18 @@ setwd("<your/project/directory>")


 # ====  PACKAGES  ==============================================================
-# Load all required packages.
+# Check that required packages have been installed. Install if needed.

-if (! require(seqinr, quietly=TRUE)) {
+if (! requireNamespace("seqinr", quietly=TRUE)) {
  install.packages("seqinr")
-  library(seqinr)
 }
 # Package information:
 #  library(help = seqinr)       # basic information
 #  browseVignettes("seqinr")    # available vignettes
 #  data(package = "seqinr")     # available datasets

+# Note: use package functions with the :: operator - eg.
+# seqinr::aaa("K")



--- a/scripts/ABC-dbUtilities.R
+++ b/scripts/ABC-dbUtilities.R
@@ -9,21 +9,18 @@
 # ====== PACKAGES ==============================================================


-if (! require(jsonlite, quietly = TRUE)) {
+if (! requireNamespace("jsonlite", quietly = TRUE)) {
  install.packages("jsonlite")
-  library(jsonlite)
 }


-if (! require(httr, quietly = TRUE)) {
+if (! requireNamespace("httr", quietly = TRUE)) {
  install.packages("httr")
-  library(httr)
 }


-if (! require(xml2, quietly = TRUE)) {
+if (! requireNamespace("xml2", quietly = TRUE)) {
  install.packages("xml2")
-  library(xml2)
 }


@@ -226,10 +223,10 @@ dbFetchUniProtSeq <- function(ID) {

  URL <- sprintf("http://www.uniprot.org/uniprot/%s.fasta", ID)

-  response <- GET(URL)
+  response <- httr::GET(URL)

  mySeq <- character()
-  if (status_code(response) == 200) {
+  if (httr::status_code(response) == 200) {
    x <- as.character(response)
    x <- strsplit(x, "\n")
    mySeq <- dbSanitizeSequence(x)
@@ -253,17 +250,17 @@ dbFetchPrositeFeatures <- function(ID) {

  URL <- "https://prosite.expasy.org/cgi-bin/prosite/PSScan.cgi"

-  response <- POST(URL,
-                   body = list(meta = "opt1",
-                               meta1_protein = "opt1",
-                               seq = ID,
-                               skip = "on",
-                               output = "tabular"))
+  response <- httr::POST(URL,
+                         body = list(meta = "opt1",
+                                     meta1_protein = "opt1",
+                                     seq = ID,
+                                     skip = "on",
+                                     output = "tabular"))

  myFeatures <- data.frame()
-  if (status_code(response) == 200) {
+  if (httr::status_code(response) == 200) {

-    lines <- unlist(strsplit(content(response, "text"), "\\n"))
+    lines <- unlist(strsplit(httr::content(response, "text"), "\\n"))

    patt <- sprintf("\\|%s\\|", UniProtID)
    lines <- lines[grep(patt, lines)]
@@ -289,8 +286,8 @@ node2text <- function(doc, tag) {
  # Contents of all matching elements is returned in
  # a vector of strings.
  path <- paste0("//", tag)
-  nodes <- xml_find_all(doc, path)
-  return(xml_text(nodes))
+  nodes <- xml2::xml_find_all(doc, path)
+  return(xml2::xml_text(nodes))
 }


@@ -309,7 +306,7 @@ dbFetchNCBItaxData <- function(ID) {
               "db=protein",
               "&term=", ID,
               sep="")
-  myXML <- read_xml(URL)
+  myXML <- xml2::read_xml(URL)
  GID <- node2text(myXML, "Id")

  URL <- paste0(eUtilsBase,
@@ -318,7 +315,7 @@ dbFetchNCBItaxData <- function(ID) {
                "&id=",
                GID,
                "&version=2.0")
-  myXML <- read_xml(URL)
+  myXML <- xml2::read_xml(URL)

  x <- as.integer(node2text(myXML, "TaxId"))
  y <- node2text(myXML, "Organism")
@@ -346,14 +343,14 @@ UniProtIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
  #    for IDs that are not mapped.

  URL <- "https://www.uniprot.org/uploadlists/"
-  response <- POST(URL,
-                   body = list(from = mapFrom,
-                               to = mapTo,
-                               format = "tab",
-                               query = s))
+  response <- httr::POST(URL,
+                         body = list(from = mapFrom,
+                                     to = mapTo,
+                                     format = "tab",
+                                     query = s))

-  if (status_code(response) == 200) { # 200: oK
-    myMap <- read.delim(file = textConnection(content(response)),
+  if (httr::status_code(response) == 200) { # 200: oK
+    myMap <- read.delim(file = textConnection(httr::content(response)),
                        sep = "\t",
                        stringsAsFactors = FALSE)
    myMap <- myMap[ , c(1,3)]
@@ -362,12 +359,23 @@ UniProtIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
    myMap <- data.frame()
    warning(paste("No uniProt ID mapping returned:",
                  "server sent status",
-                  status_code(response)))
+                  httr::status_code(response)))
  }

  return(myMap)
 }


+# ====== TESTS =================================================================
+
+if (FALSE) {
+  if (! requireNamespace("testthat", quietly = TRUE)) {
+    install.packages("testthat")
+  }
+
+  # ToDo: test everything here
+
+}
+

 # [END]
--- a/scripts/ABC-makeMYSPElist.R
+++ b/scripts/ABC-makeMYSPElist.R
@@ -3,14 +3,16 @@
 # Purpose:  Create a list of genome sequenced fungi with protein annotations and
 #               Mbp1 homologues.
 #
-# Version: 1.1.2
+# Version: 1.2
 #
-# Date:    2016 09 - 2017 09
+# Date:    2016  09  -  2019  01
 # Author:  Boris Steipe (boris.steipe@utoronto.ca)
 #
-# V 1.1.2  Moved BLAST.R to ./scripts directory
-# V 1.1    Update 2017
-# V 1.0    First code 2016
+# Versions
+#          1.2    Change from require() to requireNamespace()
+#          1.1.2  Moved BLAST.R to ./scripts directory
+#          1.1    Update 2017
+#          1.0    First code 2016
 #
 # TODO:
 #
@@ -31,27 +33,25 @@
 # the respective intermediate results.
 #

+
 #TOC> ==========================================================================
-#TOC>
-#TOC>   Section  Title                               Line
-#TOC> ---------------------------------------------------
-#TOC>   1        The strategy                          54
-#TOC>   2        GOLD species                          66
-#TOC>   2.1      Initialize                            71
-#TOC>   2.2      Import                                77
-#TOC>   2.3      Unique species                       129
-#TOC>   3        BLAST species                        171
-#TOC>   3.1      find homologous proteins             178
-#TOC>   3.2      Identify species in "hits"           202
-#TOC>   4        Intersect GOLD and BLAST species     247
-#TOC>   5        Cleanup and finish                   265
-#TOC>
+#TOC> 
+#TOC>   Section  Title                                     Line
+#TOC> ---------------------------------------------------------
+#TOC>   1        The strategy                                55
+#TOC>   2        GOLD species                                67
+#TOC>   2.1        Initialize                                72
+#TOC>   2.2        Import                                    79
+#TOC>   2.3        Unique species                           131
+#TOC>   3        BLAST species                              173
+#TOC>   3.1        find homologous proteins                 180
+#TOC>   3.2        Identify species in "hits"               204
+#TOC>   4        Intersect GOLD and BLAST species           249
+#TOC>   5        Cleanup and finish                         267
+#TOC> 
 #TOC> ==========================================================================


-#TOC>
-#TOC>
-
 # =    1  The strategy  ========================================================

 # This script will create a list of "MYSPE" species and save it in an R object
@@ -70,9 +70,10 @@
 #  (https://gold.jgi.doe.gov/). Use the data that is hosted at the NCBI.

 # ==   2.1  Initialize  ========================================================
-if (! require(httr)) { # httr provides interfaces to Webservers on the Internet
-    install.packages("httr")
-    library(httr)
+
+# httr provides interfaces to Webservers on the Internet
+if (! requireNamespace("httr", quietly = TRUE)) {
+  install.packages("httr")
 }

 # ==   2.2  Import  ============================================================
--- a/scripts/ABC-makeScCCnet.R
+++ b/scripts/ABC-makeScCCnet.R
@@ -15,12 +15,14 @@
 #        Data: (3 mb) https://downloads.yeastgenome.org/curation/literature/go_slim_mapping.tab
 #
 #
-# Version:  1.0
+# Version:  1.1
 #
-# Date:     2017  10  06
+# Date:     2017  10  -  2019  01
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
+#           1.1    Change from require() to requireNamespace(),
+#                      use <package>::<function>() idiom throughout
 #           1.0    First code copied from 2016 material.
 #
 # TODO:
@@ -28,16 +30,16 @@
 #
 # ==============================================================================

-if (! require(readr, quietly = TRUE)) {
+if (! requireNamespace("readr", quietly = TRUE)) {
  install.packages("readr")
-  library(readr)
 }


 # STRING functional interaction data

 # Read STRING Data (needs to be downloaded from database, see URL in Notes)
-STR <- read_delim("./data/4932.protein.links.full.v10.5.txt", delim = " ")
+STR <- readr::read_delim("./data/4932.protein.links.full.v10.5.txt",
+                         delim = " ")

 # Subset only IDs and combined_score column
 STR <- STR[ , c("protein1", "protein2", "combined_score")]
@@ -61,14 +63,14 @@ myIntxGenes <- unique(c(STR$protein1, STR$protein2))  # yeast systematic gene
 #
 # Read GOSlim data  (needs to be downloaded from database, see URL in Notes)

-Gsl <- read_tsv("./data/go_slim_mapping.tab",
-                col_names = c("ID",
-                              "name",
-                              "SGDId",
-                              "Ontology",
-                              "termName",
-                              "termID",
-                              "status"))
+Gsl <- readr::read_tsv("./data/go_slim_mapping.tab",
+                       col_names = c("ID",
+                                     "name",
+                                     "SGDId",
+                                     "Ontology",
+                                     "termName",
+                                     "termID",
+                                     "status"))

 # head(Gsl)
 #
--- a/scripts/BLAST.R
+++ b/scripts/BLAST.R
@@ -7,11 +7,13 @@
 #          https://ncbi.github.io/blast-cloud/dev/api.html
 #
 #
-# Version: 3
-# Date:    2016 09 - 2017 11
+# Version: 3.1
+# Date:    2016 09 - 2019 01
 # Author:  Boris Steipe
 #
 # Versions:
+#    3.1   Change from require() to requireNamespace(),
+#          use <package>::<function>() idiom throughout
 #    3     parsing logic had not been fully implemented; Fixed.
 #    2.1   bugfix in BLAST(), bug was blanking non-split deflines;
 #          refactored parseBLASTalignment() to handle lists with multiple hits.
@@ -29,9 +31,8 @@
 # ==============================================================================


-if (! require(httr, quietly = TRUE)) {
+if (! requireNamespace(httr, quietly = TRUE)) {
  install.packages("httr")
-  library(httr)
 }


@@ -92,13 +93,13 @@ BLAST <- function(q,
      }

      # send it off ...
-      response <- GET(results$query)
-      if (http_status(response)$category != "Success" ) {
+      response <- httr::GET(results$query)
+      if (httr::http_status(response)$category != "Success" ) {
        stop(sprintf("PANIC: Can't send query. BLAST server status error: %s",
-                     http_status(response)$message))
+                     httr::http_status(response)$message))
      }

-      txt <- content(response, "text", encoding = "UTF-8")
+      txt <- httr::content(response, "text", encoding = "UTF-8")

      patt <- "RID = (\\w+)" # match the request id
      results$rid  <- regmatches(txt, regexec(patt,  txt))[[1]][2]
@@ -127,13 +128,13 @@ BLAST <- function(q,

    while (TRUE) {
      # Check whether the result is ready
-      response <- GET(checkStatus)
-      if (http_status(response)$category != "Success" ) {
+      response <- httr::GET(checkStatus)
+      if (httr::http_status(response)$category != "Success" ) {
        stop(sprintf("PANIC: Can't check status. BLAST server status error: %s",
-                     http_status(response)$message))
+                     httr::http_status(response)$message))
      }

-      txt <- content(response, "text", encoding = "UTF-8")
+      txt <- httr::content(response, "text", encoding = "UTF-8")

      if (length(grep("Status=WAITING",  txt)) > 0) {
        myTimeout <- myTimeout - EXTRAWAIT
@@ -184,13 +185,13 @@ BLAST <- function(q,
                      "&FORMAT_TYPE=Text",
                      sep = "")

-    response <- GET(retrieve)
-    if (http_status(response)$category != "Success" ) {
+    response <- httr::GET(retrieve)
+    if (httr::http_status(response)$category != "Success" ) {
      stop(sprintf("PANIC: Can't retrieve. BLAST server status error: %s",
-                   http_status(response)$message))
+                   httr::http_status(response)$message))
    }

-    txt <- content(response, "text", encoding = "UTF-8")
+    txt <- httr::content(response, "text", encoding = "UTF-8")

    # txt contains the whole set of results. Process:

@@ -357,7 +358,7 @@ parseBLASTalignment <- function(hit) {
 # ==== TESTS ===================================================================

 # define query:
-# q   <- paste("IYSARYSGVDVYEFIHSTGSIMKRKKDDWVNATHI", # Mbp1 APSES domain sequence
+# q   <- paste("IYSARYSGVDVYEFIHSTGSIMKRKKDDWVNATHI", # Mbp1 APSES domain
 #              "LKAANFAKAKRTRILEKEVLKETHEKVQGGFGKYQ",
 #              "GTWVPLNIAKQLAEKFSVYDQLKPLFDFTQTDGSASP",
 #              sep="")