From 9ac45565f44b1bf4b439a6f494368e350c3e5e28 Mon Sep 17 00:00:00 2001
From: hyginn <boris.steipe@utoronto.ca>
Date: Tue, 3 Oct 2017 23:38:48 -0400
Subject: [PATCH] Changing "YFO" to "MYSPE"

---
 .init.R                                  |   9 ++++
 ABC-makeYFOlist.R => ABC-makeMYSPElist.R |  22 ++++-----
 BIN-ALI-Dotplot.R                        |  32 ++++++-------
 BIN-ALI-Optimal_sequence_alignment.R     |  22 ++++-----
 BIN-FUNC-Domain_annotation.R             |   2 +-
 BIN-YFO.R => BIN-MYSPE.R                 |  44 ++++++++---------
 BIN-PHYLO-Data_preparation.R             |   2 +-
 BIN-PHYLO-Tree_analysis.R                |   2 +-
 BIN-SEQA-Comparison.R                    |   2 +-
 BIN-Storing_data.R                       |  58 +++++++++++------------
 data/MYSPEspecies.RData                  | Bin 0 -> 2565 bytes
 data/YFOspecies.RData                    | Bin 2563 -> 0 bytes
 12 files changed, 102 insertions(+), 93 deletions(-)
 rename ABC-makeYFOlist.R => ABC-makeMYSPElist.R (96%)
 rename BIN-YFO.R => BIN-MYSPE.R (67%)
 create mode 100644 data/MYSPEspecies.RData
 delete mode 100644 data/YFOspecies.RData

diff --git a/.init.R b/.init.R
index 60c20cb..33551f1 100644
--- a/.init.R
+++ b/.init.R
@@ -24,6 +24,15 @@ if (! file.exists(".myProfile.R")) {
   rm(e, n, conn)
 }
 
+# Patch YFO -> MYSPE if necessary:
+tmp <- readLines(".myProfile.R")
+if (length(grep("^YFO", tmp)) > 0) {
+  idx <- grep("^YFO", tmp)
+  tmp[idx] <- gsub("^YFO", "MYSPE", tmp[idx])
+  writeLines(tmp, ".myProfile.R")
+}
+rm(tmp)
+
 source(".myProfile.R")
 
 source(".utilities.R")
diff --git a/ABC-makeYFOlist.R b/ABC-makeMYSPElist.R
similarity index 96%
rename from ABC-makeYFOlist.R
rename to ABC-makeMYSPElist.R
index 505d7b0..4b7cc2d 100644
--- a/ABC-makeYFOlist.R
+++ b/ABC-makeMYSPElist.R
@@ -1,9 +1,9 @@
-# ABC_makeYFOlist.R
+# ABC_makeMYSPElist.R
 #
 # Purpose:  Create a list of genome sequenced fungi with protein annotations and
 #               Mbp1 homologues.
 #
-# Version: 1.1
+# Version: 1.1.1
 #
 # Date:    2016 09 - 2017 09
 # Author:  Boris Steipe (boris.steipe@utoronto.ca)
@@ -29,9 +29,9 @@
 # those parts. If you only want to study the general workflow, just load()
 # the respective intermediate results.
 #
- 
+
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                               Line
 #TOC> ---------------------------------------------------
 #TOC>   1        The strategy                          54
@@ -44,17 +44,17 @@
 #TOC>   3.2      Identify species in "hits"           202
 #TOC>   4        Intersect GOLD and BLAST species     247
 #TOC>   5        Cleanup and finish                   265
-#TOC> 
+#TOC>
 #TOC> ==========================================================================
- 
+
 
 #TOC>
 #TOC>
 
 # =    1  The strategy  ========================================================
 
-# This script will create a list of "YFO" species and save it in an R object
-# YFOspecies that is stored in the data subdirectory of this project from where
+# This script will create a list of "MYSPE" species and save it in an R object
+# MYSPEspecies that is stored in the data subdirectory of this project from where
 # it can be loaded. The strategy is as follows: we download a list of all
 # genome projects and then select species for which protein annotations are
 # available - i.e. these are all genome-sequenced species that have been
@@ -251,7 +251,7 @@ length(BLASTspecies)
 # etc. See here:
 ?union
 
-YFOspecies <- intersect(GOLDspecies, BLASTspecies)
+MYSPEspecies <- intersect(GOLDspecies, BLASTspecies)
 
 # Again: interpret this:
 #  - what is the number of GOLDspecies?
@@ -272,9 +272,9 @@ YFOspecies <- intersect(GOLDspecies, BLASTspecies)
 
 REFspecies
 
-YFOspecies <- sort(setdiff(YFOspecies, REFspecies))
+MYSPEspecies <- sort(setdiff(MYSPEspecies, REFspecies))
 
-# save(YFOspecies, file = "data/YFOspecies.RData")
+# save(MYSPEspecies, file = "data/MYSPEspecies.RData")
 
 
 
diff --git a/BIN-ALI-Dotplot.R b/BIN-ALI-Dotplot.R
index dd2fffd..d17361f 100644
--- a/BIN-ALI-Dotplot.R
+++ b/BIN-ALI-Dotplot.R
@@ -46,31 +46,31 @@ data(BLOSUM62)
 sel <- myDB$protein$name == "MBP1_SACCE"
 MBP1_SACCE <- s2c(myDB$protein$sequence[sel])
 
-sel <- myDB$protein$name == paste("MBP1_", biCode(YFO), sep = "")
-MBP1_YFO <- s2c(myDB$protein$sequence[sel])
+sel <- myDB$protein$name == paste("MBP1_", biCode(MYSPE), sep = "")
+MBP1_MYSPE <- s2c(myDB$protein$sequence[sel])
 
 # Check that we have two character vectors of the expected length.
 str(MBP1_SACCE)
-str(MBP1_YFO)
+str(MBP1_MYSPE)
 
 # How do we get the pairscore values? Consider: a single pair of amino acids can
-# be obtained from sequence SACCE and YFO eg. from position 13 and 21 ...
+# be obtained from sequence SACCE and MYSPE eg. from position 13 and 21 ...
 MBP1_SACCE[13]
-MBP1_YFO[21]
+MBP1_MYSPE[21]
 
 # ... using these as subsetting expressions, we can pull the pairscore
 # from the MDM
-BLOSUM62[MBP1_SACCE[13], MBP1_YFO[21]]
+BLOSUM62[MBP1_SACCE[13], MBP1_MYSPE[21]]
 
 # First we build an empty matrix that will hold all pairscores ...
-dotMat <- matrix(numeric(length(MBP1_SACCE) * length(MBP1_YFO)),
-                 nrow = length(MBP1_SACCE), ncol = length(MBP1_YFO))
+dotMat <- matrix(numeric(length(MBP1_SACCE) * length(MBP1_MYSPE)),
+                 nrow = length(MBP1_SACCE), ncol = length(MBP1_MYSPE))
 
 # ... then we loop over the sequences and store the scores in the matrix.
 #
 for (i in 1:length(MBP1_SACCE)) {
-  for (j in 1:length(MBP1_YFO)) {
-    dotMat[i, j] <- BLOSUM62[MBP1_SACCE[i], MBP1_YFO[j]]
+  for (j in 1:length(MBP1_MYSPE)) {
+    dotMat[i, j] <- BLOSUM62[MBP1_SACCE[i], MBP1_MYSPE[j]]
   }
 }
 
@@ -80,7 +80,7 @@ for (i in 1:length(MBP1_SACCE)) {
 dotMat[1:10, 1:10]
 
 # Rows in this matrix correspond to an amino acid from MBP1_SACCE, columns in
-# the matrix correspond to an amino acid from MBP1_YFO.
+# the matrix correspond to an amino acid from MBP1_MYSPE.
 
 # To plot this, we use the image() function. Here, with default parameters.
 
@@ -110,13 +110,13 @@ image(x = 1:200, y = 1:200,  dotMat[1:200, 1:200], ylim=c(200,1))
 
 # ... and labels! Axis labels would be nice ...
 image(x = 1:200, y = 1:200,  dotMat[1:200, 1:200], ylim=c(200,1),
-      xlab = "MBP1_YFO", ylab = "MBP1_SACCE" )
+      xlab = "MBP1_MYSPE", ylab = "MBP1_SACCE" )
 
 # ... and why don't we have axis-numbers on all four sides? Go, make that right
 # too ...
 len <- 200
 image(x = 1:len, y = 1:len,  dotMat[1:len, 1:len], ylim=c(len,1),
-      xlab = "MBP1_YFO", ylab = "MBP1_SACCE", axes = FALSE)
+      xlab = "MBP1_MYSPE", ylab = "MBP1_SACCE", axes = FALSE)
 box()
 axis(1, at = c(1, seq(10, len, by=10)))
 axis(2, at = c(1, seq(10, len, by=10)))
@@ -129,8 +129,8 @@ axis(4, at = c(1, seq(10, len, by=10)))
 # utilities file and called it dotPlot2(). Why not dotPlot() ... that's because
 # there already is a dotplot function in the seqinr package:
 
-dotPlot(MBP1_SACCE, MBP1_YFO)                                 # seqinr
-dotPlot2(MBP1_SACCE, MBP1_YFO, xlab = "SACCE", ylab = "YFO")  # Our's
+dotPlot(MBP1_SACCE, MBP1_MYSPE)                                 # seqinr
+dotPlot2(MBP1_SACCE, MBP1_MYSPE, xlab = "SACCE", ylab = "MYSPE")  # Our's
 
 # Which one do you prefer? You can probably see the block patterns that arise
 # from segments of repetitive, low complexity sequence. But you probably have to
@@ -153,7 +153,7 @@ myFilter[5, ] <- c( 0, 0, 0, 0, 1)
 
 # I have added the option to read such filters (or others that you could define on your own) as a parameter of the function.
 
-dotPlot2(MBP1_SACCE, MBP1_YFO, xlab = "SACCE", ylab = "YFO", f = myFilter)
+dotPlot2(MBP1_SACCE, MBP1_MYSPE, xlab = "SACCE", ylab = "MYSPE", f = myFilter)
 
 # I think the result shows quite nicely how the two sequences are globally
 # related and where the regions of sequence similarity are. Play with this a bit
diff --git a/BIN-ALI-Optimal_sequence_alignment.R b/BIN-ALI-Optimal_sequence_alignment.R
index ac173b1..8f0f6a4 100644
--- a/BIN-ALI-Optimal_sequence_alignment.R
+++ b/BIN-ALI-Optimal_sequence_alignment.R
@@ -52,8 +52,8 @@ toString(s)      # using the Biostrings function toString()
 sel <- myDB$protein$name == "MBP1_SACCE"
 aaMBP1_SACCE <- AAString(myDB$protein$sequence[sel])
 
-sel <- myDB$protein$name == paste("MBP1_", biCode(YFO), sep = "")
-aaMBP1_YFO <-   AAString(myDB$protein$sequence[sel])
+sel <- myDB$protein$name == paste("MBP1_", biCode(MYSPE), sep = "")
+aaMBP1_MYSPE <-   AAString(myDB$protein$sequence[sel])
 
 ?pairwiseAlignment
 
@@ -61,7 +61,7 @@ aaMBP1_YFO <-   AAString(myDB$protein$sequence[sel])
 # Global optimal alignment with end-gap penalties is default. (like EMBOSS needle)
 ali1 <-  pairwiseAlignment(
   aaMBP1_SACCE,
-  aaMBP1_YFO,
+  aaMBP1_MYSPE,
   substitutionMatrix = "BLOSUM62",
   gapOpening = 10,
   gapExtension = 0.5)
@@ -110,7 +110,7 @@ percentID(ali1)
 # Compare with local optimal alignment (like EMBOSS Water)
 ali2 <-  pairwiseAlignment(
   aaMBP1_SACCE,
-  aaMBP1_YFO,
+  aaMBP1_MYSPE,
   type = "local",
   substitutionMatrix = "BLOSUM62",
   gapOpening = 50,
@@ -135,7 +135,7 @@ percentID(ali2)
 #        PART FOUR: APSES Domain annotation by alignment
 # ==============================================================================
 
-# In this section we define the YFO APSES sequence by performing a global,
+# In this section we define the MYSPE APSES sequence by performing a global,
 # optimal sequence alignment of the yeast domain with the full length protein
 # sequence of the protein that was the most similar to the yeast APSES domain.
 #
@@ -190,11 +190,11 @@ aaMB1_SACCE_APSES <- AAString(dbGetFeatureSequence(myDB,
                                                    "MBP1_SACCE",
                                                    "APSES fold"))
 
-# To align, we need the YFO sequence. Here is it's definition again, just
+# To align, we need the MYSPE sequence. Here is it's definition again, just
 # in case ...
 
-sel <- myDB$protein$name == paste("MBP1_", biCode(YFO), sep = "")
-aaMBP1_YFO <- AAString(myDB$protein$sequence[sel])
+sel <- myDB$protein$name == paste("MBP1_", biCode(MYSPE), sep = "")
+aaMBP1_MYSPE <- AAString(myDB$protein$sequence[sel])
 
 # Now let's align these two sequences of very different length without end-gap
 # penalties using the "overlap" type. "overlap" turns the
@@ -203,7 +203,7 @@ aaMBP1_YFO <- AAString(myDB$protein$sequence[sel])
 
 aliApses <-  pairwiseAlignment(
   aaMB1_SACCE_APSES,
-  aaMBP1_YFO,
+  aaMBP1_MYSPE,
   type = "overlap",
   substitutionMatrix = "BLOSUM62",
   gapOpening = 10,
@@ -237,7 +237,7 @@ aliApses@subject@range@start + aliApses@subject@range@width - 1
 # right away and store it in myDB.  Copy the code-template below to your
 # myCode.R file, edit it to replace the placeholder items with your data:
 #
-#  - The <PROTEIN ID> is to be replaced with the ID of MBP1_YFO
+#  - The <PROTEIN ID> is to be replaced with the ID of MBP1_MYSPE
 #  - The <FEATURE ID> is to be replaced with the ID of "APSES fold"
 #  - <START> and <END> are to be replaced with the coordinates you got above
 #
@@ -277,7 +277,7 @@ myDB$proteinAnnotation[nrow(myDB$proteinAnnotation), ]
 # If this is correct, save it
 save(myDB, file = "myDB.02.RData")  # Note that it gets a new version number!
 
-# Done with this part. Copy the sequence of the APSES domain of MBP1_<YFO> - you
+# Done with this part. Copy the sequence of the APSES domain of MBP1_MYSPE - you
 # need it for the reverse BLAST search, and return to the course Wiki.
 
 
diff --git a/BIN-FUNC-Domain_annotation.R b/BIN-FUNC-Domain_annotation.R
index 9384510..250e0c8 100644
--- a/BIN-FUNC-Domain_annotation.R
+++ b/BIN-FUNC-Domain_annotation.R
@@ -43,7 +43,7 @@ save(myDB, file = "myDB.04.RData") # save the new version
 # from your myCode.R script. Here is again the table of feature IDs:
 myDB$feature[ , c("ID", "name", "description")]
 
-# Add every SMART annotated feaure for MBP1_YFO to the database. If you make
+# Add every SMART annotated feaure for MBP1_MYSPE to the database. If you make
 # mistakes, just reload the latest version (probably "myDB.04.RData"), then run
 # your corrected annotation script again. Execute ...
 myDB$proteinAnnotation
diff --git a/BIN-YFO.R b/BIN-MYSPE.R
similarity index 67%
rename from BIN-YFO.R
rename to BIN-MYSPE.R
index 7cc06d7..7963185 100644
--- a/BIN-YFO.R
+++ b/BIN-MYSPE.R
@@ -1,15 +1,15 @@
-# BIN-YFO.R
+# BIN-MYSPE.R
 #
 # Purpose: A Bioinformatics Course:
-#              R code accompanying the BIN-YFO unit
+#              R code accompanying the BIN-MYSPE unit
 #
 # Version: 1.0
 #
 # Date:    2017  09  21
 # Author:  Boris Steipe (boris.steipe@utoronto.ca)
 #
-# V 1.0    Final code, after rewriting BLAST parser and creating current YFOlist
-# V 0.1    First code copied from BCH441_A03_makeYFOlist.R
+# V 1.0    Final code, after rewriting BLAST parser and creating current MYSPElist
+# V 0.1    First code copied from BCH441_A03_makeMYSPElist.R
 #
 # TODO:
 #
@@ -23,17 +23,17 @@
 #  going on. That's not how it works ...
 #
 # ==============================================================================
- 
+
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                   Line
 #TOC> ---------------------------------------
 #TOC>   1        Preparations              38
-#TOC>   2        Suitable YFO Species      50
-#TOC>   3        Adopt "YFO"               64
-#TOC> 
+#TOC>   2        Suitable MYSPE Species    50
+#TOC>   3        Adopt "MYSPE"             64
+#TOC>
 #TOC> ==========================================================================
- 
+
 
 # =    1  Preparations  ========================================================
 #
@@ -47,39 +47,39 @@ if (! exists("myStudentNumber")) {
 }
 
 
-# =    2  Suitable YFO Species  ================================================
+# =    2  Suitable MYSPE Species  ==============================================
 
 
 # In this unit we will select one species from a list of genome sequenced fungi
 # and write it into your personalized profile file. This species will be called
-# "YFO" (Your Favourite Organism) for other learning units and exercises.
+# "MYSPE" (Your Favourite Organism) for other learning units and exercises.
 
 # A detailed description of the process of compiling the list of genome
 # sequenced fungi with protein annotations and Mbp1 homologues is in the file
-# ABC-makeYFOlist.R
+# ABC-makeMYSPElist.R
 
-# Task: Study ABC-makeYFOlist.R, it implements a rather typical workflow of
+# Task: Study ABC-makeMYSPElist.R, it implements a rather typical workflow of
 # selecting and combining data from various public-domain data resources.
 
-# =    3  Adopt "YFO"  =========================================================
+# =    3  Adopt "MYSPE"  =======================================================
 
 
 # In the code below, we load the resulting vector of species name, then pick one
 # of them in a random but reproducible way, determined by your student number.
 
-load("data/YFOspecies.RData")  # load the species names
-set.seed(myStudentNumber)      # seed the random number generator
-YFO <- sample(YFOspecies, 1)   # pick a species at random
+load("data/MYSPEspecies.RData")     # load the species names
+set.seed(myStudentNumber)           # seed the random number generator
+MYSPE <- sample(MYSPEspecies, 1)    # pick a species at random
 # write the result to your personalized profile data so we can use the result in
 # other functions
-cat(sprintf("YFO <- \"%s\"\n", YFO), file = ".myProfile.R", append = TRUE)
+cat(sprintf("MYSPE <- \"%s\"\n", MYSPE), file = ".myProfile.R", append = TRUE)
 
-YFO         # so, which species is it ... ?
-biCode(YFO) # and what is it's "BiCode" ... ?
+MYSPE         # so, which species is it ... ?
+biCode(MYSPE) # and what is it's "BiCode" ... ?
 
 # Task: Note down the species name and its five letter label on your Student
 # Wiki user page. Use this species whenever this or future assignments refer
-# to YFO. In code, we will automatically load it from your.myProfile.R file.
+# to MYSPE. In code, we will automatically load it from your.myProfile.R file.
 
 
 # [END]
diff --git a/BIN-PHYLO-Data_preparation.R b/BIN-PHYLO-Data_preparation.R
index cd712a5..c376ae5 100644
--- a/BIN-PHYLO-Data_preparation.R
+++ b/BIN-PHYLO-Data_preparation.R
@@ -41,7 +41,7 @@ list.files(pattern = "myDB.*")
 load("myDB.05.RData")
 
 # The database contains the ten Mbp1 orthologues from the reference species
-# and the Mbp1 RBM for YFO.
+# and the Mbp1 RBM for MYSPE.
 #
 # We will construct a phylogenetic tree from the proteins' APSES domains.
 # You have annotated their ranges as a feature.
diff --git a/BIN-PHYLO-Tree_analysis.R b/BIN-PHYLO-Tree_analysis.R
index 3eb0073..918e62a 100644
--- a/BIN-PHYLO-Tree_analysis.R
+++ b/BIN-PHYLO-Tree_analysis.R
@@ -156,7 +156,7 @@ layout(matrix(1), widths=1.0, heights=1.0)
 # ... or we can plot the tree so it corresponds as well as possible to a
 # predefined tip ordering. Here we use the ordering that NCBI Global Tree
 # returns for the reference species - we have used it above to make the vector
-# apsMbp1Names. You inserted your YFO name into that vector - but you should
+# apsMbp1Names. You inserted your MYSPE name into that vector - but you should
 # move it to its correct position in the cladogram.
 
 # (Nb. we need to reverse the ordering for the plot. This is why we use the
diff --git a/BIN-SEQA-Comparison.R b/BIN-SEQA-Comparison.R
index 175183b..b057e08 100644
--- a/BIN-SEQA-Comparison.R
+++ b/BIN-SEQA-Comparison.R
@@ -39,7 +39,7 @@ help(package = seqinr) # shows the available functions
 ?computePI
 
 # This takes as input a vector of upper-case AA codes
-# Let's retrieve the YFO sequence from our datamodel
+# Let's retrieve the MYSPE sequence from our datamodel
 # (assuming it is the last one that was added):
 
 db$protein[nrow(db$protein), "sequence"]
diff --git a/BIN-Storing_data.R b/BIN-Storing_data.R
index fc1f0f1..55a5d0e 100644
--- a/BIN-Storing_data.R
+++ b/BIN-Storing_data.R
@@ -23,9 +23,9 @@
 #  going on. That's not how it works ...
 #
 # ==============================================================================
- 
+
 #TOC> ==========================================================================
-#TOC> 
+#TOC>
 #TOC>   Section  Title                                        Line
 #TOC> ------------------------------------------------------------
 #TOC>   1        A Relational Datamodel in R: review            55
@@ -48,9 +48,9 @@
 #TOC>   3.3      Create an R script to create the database     522
 #TOC>   3.3.1    Check and validate                            542
 #TOC>   3.4      Task: submit for credit (part 2/2)            583
-#TOC> 
+#TOC>
 #TOC> ==========================================================================
- 
+
 
 # =    1  A Relational Datamodel in R: review  =================================
 
@@ -203,7 +203,7 @@ str(philDB)
 # go back, re-read, play with it, and ask for help. This is essential.
 
 
-# ===  1.1.1  completing the database                  
+# ===  1.1.1  completing the database
 
 
 # Next I'll add one more person, and create the other two tables:
@@ -362,7 +362,7 @@ dbSanitizeSequence(x)
 
 # ==   2.3  Create a protein table for our data model  =========================
 
-# ===  2.3.1  Initialize the database                  
+# ===  2.3.1  Initialize the database
 
 
 # The function dbInit contains all the code to return a list of empty
@@ -374,7 +374,7 @@ myDB <- dbInit()
 str(myDB)
 
 
-# ===  2.3.2  Add data                                 
+# ===  2.3.2  Add data
 
 
 # fromJSON() returns a dataframe that we can readily process to add data
@@ -421,7 +421,7 @@ source("./scripts/ABC-createRefDB.R")
 str(myDB)
 
 
-# ===  2.4.1  Examples of navigating the database      
+# ===  2.4.1  Examples of navigating the database
 
 
 # You can look at the contents of the tables in the usual way we access
@@ -468,8 +468,8 @@ myDB$taxonomy$species[sel]
 # =    3  Add your own data  ===================================================
 
 
-# You have chosen an organism as "YFO", and you final task will be to find the
-# protein in YFO that is most similar to yeast Mbp1 and enter its information
+# You have chosen an organism as "MYSPE", and you final task will be to find the
+# protein in MYSPE that is most similar to yeast Mbp1 and enter its information
 # into the database.
 
 
@@ -483,7 +483,7 @@ myDB$taxonomy$species[sel]
 #   Protein BLAST.
 # - Enter NP_010227 into the "Query Sequence" field.
 # - Choose "Reference proteins (refseq_protein)" as the "Database".
-# - Paste the YFO species name into the "Organism" field.
+# - Paste the MYSPE species name into the "Organism" field.
 #
 # - Click "BLAST".
 
@@ -493,28 +493,28 @@ myDB$taxonomy$species[sel]
 
 # Otherwise, look for the top-hit in the "Alignments" section. In some cases
 # there will be more than one hit with nearly similar E-values. If this is the
-# case for YFO, choose the one with the higher degree of similarity (more
+# case for MYSPE, choose the one with the higher degree of similarity (more
 # identities) with the N-terminus of the query - i.e. the Query sequence of
 # the first ~ 100 amino acids.
 
 # -  Follow the link to the protein data page, linked from "Sequence ID".
 # -  From there, in a separate tab, open the link to the taxonomy database page
-#      for YFO which is linked from the "ORGANISM" record.
+#      for MYSPE which is linked from the "ORGANISM" record.
 
 
 # ==   3.2  Put the information into JSON files  ===============================
 
 
 # - Next make a copy of the file "./data/MBP1_SACCE.json" in your project
-#     directory and give it a new name that corresponds to YFO - e.g. if
-#     YFO is called "Crptycoccus neoformans", your file should be called
+#     directory and give it a new name that corresponds to MYSPE - e.g. if
+#     MYSPE is called "Crptycoccus neoformans", your file should be called
 #     "MBP1_CRYNE.json"; in that case "MBP1_CRYNE" would also be the
 #     "name" of your protein. Open the file in the RStudio editor and replace
 #     all of the MBP1_SACCE data with the corresponding data of your protein.
 #
-# - Do a similar thing for the YFO taxonomy entry. Copy
-#     "./data/refTaxonomy.json" and make a new file named "YFOtaxonomy.json".
-#     Create a valid JSON file with only one single entry - that of YFO.
+# - Do a similar thing for the MYSPE taxonomy entry. Copy
+#     "./data/refTaxonomy.json" and make a new file named "MYSPEtaxonomy.json".
+#     Create a valid JSON file with only one single entry - that of MYSPE.
 #
 # - Validate your two files online at https://jsonlint.com/
 
@@ -529,7 +529,7 @@ myDB$taxonomy$species[sel]
 # - than add the two commands that add your protein and taxonomy data,
 #     they should look like:
 #     myDB <- dbAddProtein(    myDB, fromJSON("MBP1_<code>.json"))
-#     myDB <- dbAddTaxonomy(   myDB, fromJSON("YFOtaxonomy.json"))
+#     myDB <- dbAddTaxonomy(   myDB, fromJSON("MYSPEtaxonomy.json"))
 #
 # - save the file and source() it:
 #     source("makeProteinDB.R")
@@ -539,12 +539,12 @@ myDB$taxonomy$species[sel]
 # in any of the JSON files. Later you will add more information ...
 
 
-# ===  3.3.1  Check and validate                       
+# ===  3.3.1  Check and validate
 
 
-# Is your protein named according to the pattern "MBP1_<YFO>"? It should be.
+# Is your protein named according to the pattern "MBP1_MYSPE"? It should be.
 # And does the taxonomy table contain the systematic name? It should be the same
-# that you get when you type YFO into the console.
+# that you get when you type MYSPE into the console.
 
 # Let's compute sequence lengths on the fly (with the function nchar() ), and
 # open this with the table viewer function View()
@@ -562,18 +562,18 @@ View(cbind(myDB$protein[ , c("ID", "name", "RefSeqID")],
 myDB$protein$sequence[nrow(myDB$protein)]
 
 # If not, don't continue! Fix the problem first.
-# Let me repeat: If this does not give you the right sequence of the YFO
+# Let me repeat: If this does not give you the right sequence of the MYSPE
 #                Mbp1 homologue, DO NOT CONTINUE. Fix the problem.
 
-# Is that the right taxonomy ID and binomial name for YFO?
-sel <- myDB$taxonomy$species == YFO
+# Is that the right taxonomy ID and binomial name for MYSPE?
+sel <- myDB$taxonomy$species == MYSPE
 myDB$taxonomy[sel, ]
 
 # If not, or if the result was "<0 rows> ... " then DO NOT CONTINUE.
 # Fix the problem first.
 
-# Does this give you the right refseq ID for MBP1_<YFO>?
-sel <- myDB$protein$name == paste0("MBP1_", biCode(YFO))
+# Does this give you the right refseq ID for MBP1_MYSPE?
+sel <- myDB$protein$name == paste0("MBP1_", biCode(MYSPE))
 myDB$protein$RefSeqID[sel]
 
 # If not, or if the result was "<0 rows> ... " then DO NOT CONTINUE.
@@ -589,8 +589,8 @@ myDB$protein$RefSeqID[sel]
 #     page on the Student Wiki
 # - Execute the two commands below and show the result on your submission page
 
-biCode(myDB$taxonomy$species) %in% biCode(YFO)
-myDB$protein$taxonomyID %in% myDB$taxonomy$ID[(myDB$taxonomy$species == YFO)]
+biCode(myDB$taxonomy$species) %in% biCode(MYSPE)
+myDB$protein$taxonomyID %in% myDB$taxonomy$ID[(myDB$taxonomy$species == MYSPE)]
 
 # That is all.
 
diff --git a/data/MYSPEspecies.RData b/data/MYSPEspecies.RData
new file mode 100644
index 0000000000000000000000000000000000000000..0fbbccbe24c2d0fad75d54831289ea205b52df82
GIT binary patch
literal 2565
zcmV+g3i|aQiwFP!0000019e#KcHFiRRUH3m(k7O6wUTTn-51E8zJVlv_1JRvcvZ(q
zPk&%REC~q&SO8p$`<{KBp56gT0F;z-Y)j_F$6zpX=MFx-`trB$zI=CZaPZ{d+o#_g
zJoy&CK7E3JzBzb$@F)EI^VM(b)oa%XE``JYpW*L+2kggxT{2zBg8h5RS}Wb~`C0nx
zg>K{b!}F3^$y>MFNY@zKj==m}d0{m`wq*UUo(<U9x%QQ1-0;qM>6Tk%IB%74<A9TM
ziveb1yzSi5C|Md>e1xC%a3O5Xmc}zNd_OvO+{i+(rENFDaiK?Rj?ZygTS}#1DZ^fZ
z`OMS|<cjV72-`}>D`92YF2gXdeRR8RTgx(A@-PRrEQNh+J#A_kr%VuIyL~+JEJxsF
zE8?WZd25B)FeeMy)=SN5xsh?=*%Wo{rc{herbIkLOBbuc4fgOR-oVKP+uVqq6iTtB
z)W%6~cIkqCzF;2rTNz$;-qQMo&;>4(d2AOAykKo%q{jLUgL&{Q$ItQ17lO4rLA%3B
zClm7QLN<o%a?6@^X}#n0-5G0cY2`R+mcX6l-q_@d!vyVx<oGq7QZeUE-ErZT@G-A2
z#!DIPp!lQ!=28nQqJ2ZEAnrGGq8Ll#_oCe>7>DpowvU)WXq(1KWP^M#(KEegUs*o!
zC}P$$*iEI>ykw0v<XsMLWa*ax(a3PP(MOjR6P^OEUX}{!=h}L7MFx4~Ky8{zstBK5
zDpt@ZrtmKU0e7Z-=Kkjo25@GrgcYMVPalksb=~S>T;b&dym74V?O=asMkh)uT-@Xs
z19pL^*b*5d;m{NZ^GnbW;*%)E(eBcxSe;-EsH$Ktv)w2K_aN4J1rKc{;wxME=X*Gu
zD*~hS>qQ^k8)cEQIuFPf_eNB*ETyL0h&OR`37?J*YQ0ETun0)l6;l~Po`Nvly|D#i
zp+Qnf1;-@rKfSbF;|(X027A_>GD_$~XtN0ps!J*I*UJ_z7QMk?Z==EIf|W!X(dgsL
zZX+zrDUe5OdBX~s0%LL6H5G_vIFx7+=qx7N;uYh*Q-Th|mKB2`_;1YmS7O6#H>B_u
z4A#X^m_QZMRHQv;ugG2m01o8-(}ERa79YKmjWR`_RzAO-*=t=Ggmr^6HV7`2^5gWi
z6}3U)aPmFx>J1CT8p(EoquRNyiN>5vKnJejAkMh<_pe*uHHhbw;q%vbeHMU&V113P
zQWzvG6i_d7PM#&S7H*PU#|auLGEILt&o#Kbt+0>GyA#@Mq=DBW_(pDUpG3z?i))|A
zfYUd01+qtLQAjFq2&YK2ZwzjYBN&#TUmlNtlD^li^0GD-u@jd)N^m1km7EzpJ%7_W
z7Sf~7J;7{BtcaPrE=Zjbc^;9*2~@O)b5G+46f~S3p)tw)#akxYnrS^mBhrONjePwU
zu>xb2*_$rs{`p(tk*4Y>(lCi_MBH-z76+pxDNu>@%5)=V?*u$K6n_M(gls6O(vRV|
zc!xS?8dSfUh`MnAeOme*5|WBdMYXxLK4bBtzq=q9AbJcHVHYp!^gUCs$Lu?-2_=F2
zm{!2|Y!Auc0wpimb|MDm?^WBucY}YRRD3qk@LRKEy%wnvl`KiZ`Fm5dl0FdHRBXFV
zN&fyTEDr$GqL;|zn2!l6#0^4gA`23T1C`e7CI|T;!ITMOtbSme2iI(9Ei;O~Q+x;c
zaNKWSS3T&_Ceg6?AV6sjmIo<RlXy+&iw{(#shsIj_A#Ix{N$33JNsaYB6?0JgOL-Z
zessl3jdB3tA*SLI2tvS#JvQ$3K)lQhX>fpUpps~HhP$qAh%b{e!MfHtI7w3^v^r(<
z^DCiP-MCPnsj-m1G^S})qOaMNKy9kzJ`^!-Wg^0P4)yCs>@de_c9W!f4j?CxpQ)~&
z0fgk&^45IqZ>|Nq6<H^9B|7*v#B|wGTGJImkc}(+a3$|B6RAj2Mnv}+;r4R46DTES
zDa10P;2cF|+=rTHjZJOGvyYV3`*0~;lM?LsBSX^ddSf_*E7inULvpZS(h5Ccl8=aS
ziZ6s2?lIJImN2tLISch29LMY~x^?l9%5kr!yg>9#io)U_A@e9iEa{Cd5OJj8h~7tj
zTCva_Cc*UxUz|J`q*}j^2Yx{?WlhSR5cPR^p&{U#(&*s`+7ubDAFrSY0#{Q5BTI;)
zj0AH~+@XO<a^@(<s1=R*I$MEgAWlIx(D>9*MwUNWk+Fzq)OsDF6y#B7#(*UdYMgYq
zAoZ-%NzPV5@<P2;p#pQT^VZlp^#ZF*6GNb-NL*DNAL4MM*sX9%Kr<$7x9VyBwk_bo
z-5xz()MW!yiyGI1l8^Q=opipU>>E3LV!lk#O|TmY28FhXv^+uUHq$OAE1bV?X)5qU
zSM+i=M%2m_!%dQ$%O?J~GFT0vhROly8tG;g3N{y(5A(M2W)fvD`w?YVYr`DrTSS_-
zEyPA5;Rj>Dz!(C78xy)860)yCKuq>{zG^l1W2cQYgtkJd&mT-yt!=1ZAA?FsYSxqL
zKU{Ul=;3}*+eNZJ2YOlYixmx=kdprsLnr0>ju2hA%j)^rrwU3P0-_iT9={cOFGZA0
zpDH-rQ)aQTV~JHD$HI?X^qj0w5i1reQ@deZ7J2hEliq|)5ulBN9EkonU2{=nt&Hr&
zXwqA2UWx6Nj?yoV%Wtx7?ije-N9Ba|LC>*S>p?fMj-yiu9q+irPmhhd0n6!(#~)qF
zaBn6B7}lcXgA?ci-0*wGLx*dD+gDT(VMd)jD6B;$4SZ{P64tXd`Z+d820M8^J6$)-
zSF8{C24xzhxyY67IxHEgF7nUGUCh6ABlpyb^(Sd4Zu7^oH8ps?vhr?8FJ+qFrpJ;s
zg=Oejt!opHK3}(`?9ifwn?_%uO~p1cORvQ>^-5!{^fsu<lvz27Mn}`V%g3hnF_4+N
zxTcELNH;tPfd4XPypK+`<IC$_tf4U-HG6y&iG}mJLefBbv;Jj{QakeMwWLm7_V1(6
z{Vd7L>yR-0u7yxgG9q*2>>5di5)fF%O|<0v8pWeeQ-N;QhX=KIoX~>zZ>HqMorHFZ
zN-sxCkU<jkIbsFYeH7V|qHxo_kX~pFpsjTUT1Zcp6BOk!Mc&B-iY!`BXAaK@AgwBu
zlxx=_*E-S@U!^X$@O+US0-S5%6-Z|@>D^}k)SL(oT|0Q2&X=g~Gt)TSSEhVV+@w?T
zJeq&*4FuuAXY@e9Vx}B_j*T8Ml-klCkxfis!77!#1pJDX1l+a6QGG?MgZ^dA++QW=
z*TS7qI#T2$Cb^GuYQ$|k(qF!G(ElhwVkgpl{Kl*e`c~09XqR>8r@wdQJ@&Guy^HJ3
be}`~$)Sv_c8oJhlga7>>?EI@qgctw-<!$iY

literal 0
HcmV?d00001

diff --git a/data/YFOspecies.RData b/data/YFOspecies.RData
deleted file mode 100644
index 11cba92c36fc82b826d18b165c2f3fdb33bbdbc6..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 2563
zcmV+e3jFmSiwFP!0000019e#2a@;l&RUE&pQ%Nk%Xe8NAc77l)`v;PI+wvt-9<AeK
zYacKmhJ*wHTmT%4^Pl;htvwBr04OO{wk7l6(r9#_K8;T=zx?CLmnR1Y2agWEef-VA
zqi^x+<45@Cn}f#(f5XowzrX&~HG)gw@V}?{`{#iC`0q=m3t6!LELm%%8$LfxpS{p+
z{C;>|GAnuOmK*6BW7`p!pDQn{2FRAI|JAbrJ3H6DvWy$vIWOIEs|@F@GHx7ja&9re
zY>c;^TN))xLyM2_(;hB_t=ZCeCWh}v=Z+g$2)4BCMmR3?XwC6CPHRi46f9-fOE90A
znt@!g-Cto_>3AiqOxtA`=Cu!Qw{2@#W=kICpq8bu53Q$7E#s64Vr;jMN1o*fylh3B
zv^a0AFdOD%A=`SXSuHm*PCT2UuHBT1amkd3XK3kSRk*<(-ozU?xnP@Xv6Dh6wv^g9
z>CG-((9ajl<9;i{tIk_mzYw~>g))!rqJbBzEsWGyzhN*Bp5^#Cp7}zsb|+|eSm|U!
zo?ghtkX>$Bvo5W7oW46_%?+&_C(RPLliVAdTydD7y^tKg##1Whys0}b+!8+K6~=fe
zqa75V6u?|+VMVlWNEO8WhE5b?Y5ZQa8wKMKp2_wRGYD<dIEieK_a=I#_v|anCmu!2
zng+Y6l$w{Uv4*_M;f*Z)5+E8G4mbMfvSPwh;ML1gA^luikFLldj~u8?Q%M!!vrEMa
z`ot9eMIhkLw9nlC?A`#*td+20^ycZk5wfmZU5qQdxQ926)x90;FU{ygX@!fM9Am&P
z5EWY@V<a4!;$VIW8bW*$g*e(>`V^}ZtN~RO%w@J4rQjaKI<MfNtwelfEB|~4hjT?>
zw0^zlqdTK4QdZ{y`TWj^N|vS6lpFCTjxOQT(Lt>j=?WGB3A<t{L&#GQrn@(`KrA#!
zDyiU@#Qmq2wrjlMMABf-x>H68od|6<!9jH?MgD%-!o{LDIP7gS_*}4(NFy43eA#V;
zg*gTCh%K*KAyZ&1F1w}z(F}(YEdrgzWLvys+;>XQVc4=_Fa-aNS^rXOnC*rX-h#op
z7zz`pLYj)S=j<ifivYlZ+<#lJV$9;Bm$Ff&=+nyQw=;XC3xlw3aK;9~rBZ&JzOte=
zNE}YS=Uu&FfmkEiPH<E^*EP|YlL_d+H5|kl*Z%HR>$?W=oHBg=>bB1Ua1gAou~iC#
zgoOg?WzNa7gx11MlIu7@Lq(?P_vg6=m$w!6fqAz=n~gN^S_EIq4epcZcxiF%6B%&&
znyx_hXe|m!1rFg9iT1U@t#Jgy67<XC@lVqCx>a7*#v*p&vPTJS1gerVqo?PuTgO6r
z^tmUPO^FpTbK3=}Ga}C-(l~*Nc7N_^9D#y{(<3w{nZJ0$L|ZehhiF8)(5R8G-yl|C
ztTKDk<=j7iLp;({9Yq=@v5kmZ&fnl*v?K*8kzSc@<m|10Cx_yXV3m*!B~|({92aj<
z=S+j@R})b;4xmp<zePe)v8kvwx7KGYe)LZl1Or5mp(5<!Wu3lb3ig<Nhc%%jkRQ_u
z_>S!%8C;;`CEHHK!2F$RJNRz!50r|}CK`TYcC6PTHKLLwNjQIJYF5$*LYs<hw<*cr
zeTC%#fLintnH=*mL4~+RXia260&$?yn%(3eKO~qkVT{%HjPu}{4XtHH(RYgPARmtV
z?dz%sJ=!E17ViZp&B5{@g=!M734Q*asx*}|UCKTNw1b~q(s5_+O;JS831u*HqSTK*
zuu`KOKzN9$xCDX_aAJ>*dp!^@Gea62pc|+pTAksps~h6Wq)f1`bq-F_6bY?P8U5^o
zP^@lTsL#|`$X^=Mv?|fp?1MmUs^mTtF>Yld!g&t$>qhJ_$7*(+q<Rh@Cy<}1uAc#f
z<k#}XeC=<p1-lhlC-Xsc@NJ0cvZb`9D}*2$SNP$Byv0nUB1st$-DiZ`%i&I-l$fOu
z%Z!3^6qRuwYMwPVwH;4CQdaN7rF2b7u;Y&mNw@2b;SjD=6Jrg@!GcLE^n^)1BE~7c
z5N5c?P|I1u%ogP=)OT<kv%Bcl#YZZ~y`J&{(Kjgyi(f<LQHWU58(kpcNW&4mi~O`=
zp*u{1>jAzvxi?6)ejg9~oM6hDlsh5n^YTJNz&EAQ!x6M8GG0GkK@kM5rUpiq5JedY
z=AgJm1C!*;QI1h78uN9w0?|O6f^4AisiTZ6f3hND5z(miIz%bRqt1*0OCZ!Z>2N{n
zS*Ment%Br*daFVO=3wWov32SNR+%P-KueLhsyaTz;YP6=;go=8OxkYM)BH_az=gX#
zdcLU32B;P_t_LL_?PEIWd_~zecKF16nWCFu*Afg0Z4+sEg4S)OT~1awf8Wwn;EAs2
z<!p?ol_`dsBsrH&{BdQl8bS?~1JX6p%_<aZE-WABZRPbO%3ky%%C6RiInuX?G;dpo
zjYPut#(;q_1OhiEbU!3yUxk2}?D1^XYVOBQ8)*n_g;JkCn5<gcP`^F~m6Ft~C)Izr
z>X6aH{i3#uWPc9yvf>vj8aN>(|0jk{%Jm%~x^S1(^RrJClsW`NF%~?2BlKR1D49N0
zaJr|=Vq?b=t3Zy0AGzo`S)(FWELNs=!@4Z;=4&Rs37aB78wEKK{c*bHqR3hq*^AMn
zx7NH8+btcXUmTa;WZm2`aJi4l3G0KNW3$$SZekrrrw}^cafzQE8g&hp(;1IHxRl}E
zObRfpMac&z&;_{R_l$=Q*8;b%s3O9QI(tx9i%c5$*778*XKVCxY>*6g@_cr>ZkVrF
zAMg#zG)i-kE8TTiGE`mUpOd?of9po>sTJ!_(oo#yk7a9W@O)+E?UG)~G`~rYC2I=H
z(6d_CCLVpZZcEvrMF}^JzCxRdZDf{Si!17t##-rZP?agOaukh@rhAtUP3>bKGk0-C
z6|0eMcn|>pWz2XNoodGySG`z6V>)W~_$m?$=T(KIf%Inm%N(V4<kc%loxJSdN1^*!
zl9yK@VftMQp`c_$=E&I<k_;svu#B5%$@vwEN1vtw-K-A}YVkOs1@GTX$%{J)?G%+>
zj+P*UB<OR*3atAevLi*|rh6g1&>BEn>k71xo-8LQ%43SWlL-`Aw4Tl!o)JJ=RVpc0
zu0^hOq$j>gU2fs|B0B^)*TO51&SuiP&Hkl15gfX9@HU+<QQv2#ak#Hc`JT8*r{sAw
z|J)l0!h_G~fr7<MIsP0QJz^-er9UE@n81QnDtig|4J!$_Yl)-!idYBz%b2;pNzku_
zJEL@@$Vp6c7w6Q7+jyk!zjV<5C_!Q;(tZ5StPT2B(K~3Db?2vlbmblPvZlR_>&^dw
ZaC6k41Oghm)`Nrp{U0Nr0o;NZ004#0?&tsj