Update to student-edited file locations

2020-09-28 17:49:45 +10:00
parent 744e762dc2
commit 12213b73a4
1 changed files with 47 additions and 26 deletions
--- a/BIN-Storing_data.R
+++ b/BIN-Storing_data.R
@@ -3,11 +3,13 @@
 # Purpose: A Bioinformatics Course:
 #              R code accompanying the BIN-Storing_data unit
 #
-# Version: 1.2
+# Version: 1.3
 #
 # Date:    2017-10  -  2020-09
 # Author:  Boris Steipe (boris.steipe@utoronto.ca)
 #
 # V 1.3    Made file locations more consistent. All student-edited files
 #          go into the myScripts directory
 # V 1.2    2020 updates. Finally removed stringAsFactors  :-)
 # V 1.1    Add instructions to retrieve UniProt ID from ID mapping service.
 # V 1.0    First live version, complete rebuilt. Now using JSON data sources.
@@ -32,26 +34,26 @@
 #TOC> 
 #TOC>   Section  Title                                                   Line
 #TOC> -----------------------------------------------------------------------
-#TOC>   1        A Relational Datamodel in R: review                       59
+#TOC>   1        A Relational Datamodel in R: review                       61
-#TOC>   1.1        Building a sample database structure                    99
+#TOC>   1.1        Building a sample database structure                   101
-#TOC>   1.1.1          completing the database                            205
+#TOC>   1.1.1          completing the database                            207
-#TOC>   1.2        Querying the database                                  238
+#TOC>   1.2        Querying the database                                  240
-#TOC>   1.3        Task: submit for credit (part 1/2)                     269
+#TOC>   1.3        Task: submit for credit (part 1/2)                     271
-#TOC>   2        Implementing the protein datamodel                       291
+#TOC>   2        Implementing the protein datamodel                       293
-#TOC>   2.1        JSON formatted source data                             317
+#TOC>   2.1        JSON formatted source data                             319
-#TOC>   2.2        "Sanitizing" sequence data                             358
+#TOC>   2.2        "Sanitizing" sequence data                             360
-#TOC>   2.3        Create a protein table for our data model              380
+#TOC>   2.3        Create a protein table for our data model              382
-#TOC>   2.3.1          Initialize the database                            382
+#TOC>   2.3.1          Initialize the database                            384
-#TOC>   2.3.2          Add data                                           394
+#TOC>   2.3.2          Add data                                           396
-#TOC>   2.4        Complete the database                                  414
+#TOC>   2.4        Complete the database                                  416
-#TOC>   2.4.1          Examples of navigating the database                441
+#TOC>   2.4.1          Examples of navigating the database                443
-#TOC>   2.5        Updating the database                                  473
+#TOC>   2.5        Updating the database                                  475
-#TOC>   3        Add your own data                                        485
+#TOC>   3        Add your own data                                        487
-#TOC>   3.1        Find a protein                                         493
+#TOC>   3.1        Find a protein                                         495
-#TOC>   3.2        Put the information into JSON files                    523
+#TOC>   3.2        Put the information into JSON files                    525
-#TOC>   3.3        Create an R script to create your own database         546
+#TOC>   3.3        Create an R script to create your own database         567
-#TOC>   3.3.1          Check and validate                                 569
+#TOC>   3.3.1          Check and validate                                 590
-#TOC>   3.4        Task: submit for credit (part 2/2)                     614
+#TOC>   3.4        Task: submit for credit (part 2/2)                     635
 #TOC> 
 #TOC> ==========================================================================
@@ -523,23 +525,42 @@ myDB$taxonomy$species[sel]
 # ==   3.2  Put the information into JSON files  ===============================
-# - Next make a copy of the file "./data/MBP1_SACCE.json" in your project
+# - Next make a copy of the file "./data/MBP1_SACCE.json" in the "data"
 #     directory and give it a new name that corresponds to MYSPE - e.g. if
 #     MYSPE is called "Crptycoccus neoformans", your file should be called
 #     "MBP1_CRYNE.json"; in that case "MBP1_CRYNE" would also be the
 #     "name" of your protein. Open the file in the RStudio editor and replace
 #     all of the MBP1_SACCE data with the corresponding data of your protein.
 #
-#     The UniProt ID may not be discoverable from the NCBI page. To retrieve
+#     Note: The UniProt ID may not be listed on the NCBI page. To retrieve
 #     it, navigate to http://www.uniprot.org/mapping/ , paste your RefSeq ID
 #     into the query field, make sure "RefSeqProtein" is selected for "From"
 #     and "UniProtKB" is selected for "To", and click "Go". In case this does
 #     not retrieve a single UniProt ID, contact me.
 #
 #     Save your .json file into your myScripts directory.
 #
 #     Confirm this step:
 if (file.exists(sprintf("./myScripts/MBP1_%s.json", biCode(MYSPE)))) {
  cat("Excellent - all good to continue.\n")
 } else {
  stop(sprintf(" The file \"./myScripts/MBP1_%s.json\" does not exist",
       biCode(MYSPE)))
 }
 #
 #
 # - Do a similar thing for the MYSPE taxonomy entry. Copy
 #     "./data/refTaxonomy.json" and make a new file named "MYSPEtaxonomy.json".
 #     Create a valid JSON file with only one single entry - that of MYSPE.
 #
 #     Confirm this step:
 if (file.exists(sprintf("./myScripts/%staxonomy.json", biCode(MYSPE)))) {
  cat("Excellent - all good to continue.\n")
 } else {
  stop(sprintf(" The file \"./myScripts/%staxonomy.json\" does not exist",
               biCode(MYSPE)))
 }
 # - Validate your two files online at https://jsonlint.com/
@@ -552,14 +573,14 @@ myDB$taxonomy$species[sel]
 #     source("./scripts/ABC-createRefDB.R")
 # - than add the two commands that add your protein and taxonomy data,
 #     they should look like:
-#     myDB <- dbAddProtein(    myDB, fromJSON("MBP1_<code>.json"))
+#     myDB <- dbAddProtein(    myDB, fromJSON("./myScripts/MBP1_<MYSPE>.json"))
-#     myDB <- dbAddTaxonomy(   myDB, fromJSON("MYSPEtaxonomy.json"))
+#     myDB <- dbAddTaxonomy(   myDB, fromJSON("./myScripts/MYSPEtaxonomy.json"))
 #
 # - save the file in the ./myScripts/ folder and source() it:
 #     source("./myScripts/makeProteinDB.R")
 # This command needs to be executed whenever you recreate
-# the database. In particular, whenver you have added or modified data
+# the database. In particular, whenever you have added or modified data
 # in any of the JSON files. Later you will add more information ...
 # Remember this principle. Don't rely on objects in memory - you might