From 9ac45565f44b1bf4b439a6f494368e350c3e5e28 Mon Sep 17 00:00:00 2001 From: hyginn Date: Tue, 3 Oct 2017 23:38:48 -0400 Subject: [PATCH] Changing "YFO" to "MYSPE" --- .init.R | 9 ++++ ABC-makeYFOlist.R => ABC-makeMYSPElist.R | 22 ++++----- BIN-ALI-Dotplot.R | 32 ++++++------- BIN-ALI-Optimal_sequence_alignment.R | 22 ++++----- BIN-FUNC-Domain_annotation.R | 2 +- BIN-YFO.R => BIN-MYSPE.R | 44 ++++++++--------- BIN-PHYLO-Data_preparation.R | 2 +- BIN-PHYLO-Tree_analysis.R | 2 +- BIN-SEQA-Comparison.R | 2 +- BIN-Storing_data.R | 58 +++++++++++------------ data/MYSPEspecies.RData | Bin 0 -> 2565 bytes data/YFOspecies.RData | Bin 2563 -> 0 bytes 12 files changed, 102 insertions(+), 93 deletions(-) rename ABC-makeYFOlist.R => ABC-makeMYSPElist.R (96%) rename BIN-YFO.R => BIN-MYSPE.R (67%) create mode 100644 data/MYSPEspecies.RData delete mode 100644 data/YFOspecies.RData diff --git a/.init.R b/.init.R index 60c20cb..33551f1 100644 --- a/.init.R +++ b/.init.R @@ -24,6 +24,15 @@ if (! file.exists(".myProfile.R")) { rm(e, n, conn) } +# Patch YFO -> MYSPE if necessary: +tmp <- readLines(".myProfile.R") +if (length(grep("^YFO", tmp)) > 0) { + idx <- grep("^YFO", tmp) + tmp[idx] <- gsub("^YFO", "MYSPE", tmp[idx]) + writeLines(tmp, ".myProfile.R") +} +rm(tmp) + source(".myProfile.R") source(".utilities.R") diff --git a/ABC-makeYFOlist.R b/ABC-makeMYSPElist.R similarity index 96% rename from ABC-makeYFOlist.R rename to ABC-makeMYSPElist.R index 505d7b0..4b7cc2d 100644 --- a/ABC-makeYFOlist.R +++ b/ABC-makeMYSPElist.R @@ -1,9 +1,9 @@ -# ABC_makeYFOlist.R +# ABC_makeMYSPElist.R # # Purpose: Create a list of genome sequenced fungi with protein annotations and # Mbp1 homologues. # -# Version: 1.1 +# Version: 1.1.1 # # Date: 2016 09 - 2017 09 # Author: Boris Steipe (boris.steipe@utoronto.ca) @@ -29,9 +29,9 @@ # those parts. If you only want to study the general workflow, just load() # the respective intermediate results. # - + #TOC> ========================================================================== -#TOC> +#TOC> #TOC> Section Title Line #TOC> --------------------------------------------------- #TOC> 1 The strategy 54 @@ -44,17 +44,17 @@ #TOC> 3.2 Identify species in "hits" 202 #TOC> 4 Intersect GOLD and BLAST species 247 #TOC> 5 Cleanup and finish 265 -#TOC> +#TOC> #TOC> ========================================================================== - + #TOC> #TOC> # = 1 The strategy ======================================================== -# This script will create a list of "YFO" species and save it in an R object -# YFOspecies that is stored in the data subdirectory of this project from where +# This script will create a list of "MYSPE" species and save it in an R object +# MYSPEspecies that is stored in the data subdirectory of this project from where # it can be loaded. The strategy is as follows: we download a list of all # genome projects and then select species for which protein annotations are # available - i.e. these are all genome-sequenced species that have been @@ -251,7 +251,7 @@ length(BLASTspecies) # etc. See here: ?union -YFOspecies <- intersect(GOLDspecies, BLASTspecies) +MYSPEspecies <- intersect(GOLDspecies, BLASTspecies) # Again: interpret this: # - what is the number of GOLDspecies? @@ -272,9 +272,9 @@ YFOspecies <- intersect(GOLDspecies, BLASTspecies) REFspecies -YFOspecies <- sort(setdiff(YFOspecies, REFspecies)) +MYSPEspecies <- sort(setdiff(MYSPEspecies, REFspecies)) -# save(YFOspecies, file = "data/YFOspecies.RData") +# save(MYSPEspecies, file = "data/MYSPEspecies.RData") diff --git a/BIN-ALI-Dotplot.R b/BIN-ALI-Dotplot.R index dd2fffd..d17361f 100644 --- a/BIN-ALI-Dotplot.R +++ b/BIN-ALI-Dotplot.R @@ -46,31 +46,31 @@ data(BLOSUM62) sel <- myDB$protein$name == "MBP1_SACCE" MBP1_SACCE <- s2c(myDB$protein$sequence[sel]) -sel <- myDB$protein$name == paste("MBP1_", biCode(YFO), sep = "") -MBP1_YFO <- s2c(myDB$protein$sequence[sel]) +sel <- myDB$protein$name == paste("MBP1_", biCode(MYSPE), sep = "") +MBP1_MYSPE <- s2c(myDB$protein$sequence[sel]) # Check that we have two character vectors of the expected length. str(MBP1_SACCE) -str(MBP1_YFO) +str(MBP1_MYSPE) # How do we get the pairscore values? Consider: a single pair of amino acids can -# be obtained from sequence SACCE and YFO eg. from position 13 and 21 ... +# be obtained from sequence SACCE and MYSPE eg. from position 13 and 21 ... MBP1_SACCE[13] -MBP1_YFO[21] +MBP1_MYSPE[21] # ... using these as subsetting expressions, we can pull the pairscore # from the MDM -BLOSUM62[MBP1_SACCE[13], MBP1_YFO[21]] +BLOSUM62[MBP1_SACCE[13], MBP1_MYSPE[21]] # First we build an empty matrix that will hold all pairscores ... -dotMat <- matrix(numeric(length(MBP1_SACCE) * length(MBP1_YFO)), - nrow = length(MBP1_SACCE), ncol = length(MBP1_YFO)) +dotMat <- matrix(numeric(length(MBP1_SACCE) * length(MBP1_MYSPE)), + nrow = length(MBP1_SACCE), ncol = length(MBP1_MYSPE)) # ... then we loop over the sequences and store the scores in the matrix. # for (i in 1:length(MBP1_SACCE)) { - for (j in 1:length(MBP1_YFO)) { - dotMat[i, j] <- BLOSUM62[MBP1_SACCE[i], MBP1_YFO[j]] + for (j in 1:length(MBP1_MYSPE)) { + dotMat[i, j] <- BLOSUM62[MBP1_SACCE[i], MBP1_MYSPE[j]] } } @@ -80,7 +80,7 @@ for (i in 1:length(MBP1_SACCE)) { dotMat[1:10, 1:10] # Rows in this matrix correspond to an amino acid from MBP1_SACCE, columns in -# the matrix correspond to an amino acid from MBP1_YFO. +# the matrix correspond to an amino acid from MBP1_MYSPE. # To plot this, we use the image() function. Here, with default parameters. @@ -110,13 +110,13 @@ image(x = 1:200, y = 1:200, dotMat[1:200, 1:200], ylim=c(200,1)) # ... and labels! Axis labels would be nice ... image(x = 1:200, y = 1:200, dotMat[1:200, 1:200], ylim=c(200,1), - xlab = "MBP1_YFO", ylab = "MBP1_SACCE" ) + xlab = "MBP1_MYSPE", ylab = "MBP1_SACCE" ) # ... and why don't we have axis-numbers on all four sides? Go, make that right # too ... len <- 200 image(x = 1:len, y = 1:len, dotMat[1:len, 1:len], ylim=c(len,1), - xlab = "MBP1_YFO", ylab = "MBP1_SACCE", axes = FALSE) + xlab = "MBP1_MYSPE", ylab = "MBP1_SACCE", axes = FALSE) box() axis(1, at = c(1, seq(10, len, by=10))) axis(2, at = c(1, seq(10, len, by=10))) @@ -129,8 +129,8 @@ axis(4, at = c(1, seq(10, len, by=10))) # utilities file and called it dotPlot2(). Why not dotPlot() ... that's because # there already is a dotplot function in the seqinr package: -dotPlot(MBP1_SACCE, MBP1_YFO) # seqinr -dotPlot2(MBP1_SACCE, MBP1_YFO, xlab = "SACCE", ylab = "YFO") # Our's +dotPlot(MBP1_SACCE, MBP1_MYSPE) # seqinr +dotPlot2(MBP1_SACCE, MBP1_MYSPE, xlab = "SACCE", ylab = "MYSPE") # Our's # Which one do you prefer? You can probably see the block patterns that arise # from segments of repetitive, low complexity sequence. But you probably have to @@ -153,7 +153,7 @@ myFilter[5, ] <- c( 0, 0, 0, 0, 1) # I have added the option to read such filters (or others that you could define on your own) as a parameter of the function. -dotPlot2(MBP1_SACCE, MBP1_YFO, xlab = "SACCE", ylab = "YFO", f = myFilter) +dotPlot2(MBP1_SACCE, MBP1_MYSPE, xlab = "SACCE", ylab = "MYSPE", f = myFilter) # I think the result shows quite nicely how the two sequences are globally # related and where the regions of sequence similarity are. Play with this a bit diff --git a/BIN-ALI-Optimal_sequence_alignment.R b/BIN-ALI-Optimal_sequence_alignment.R index ac173b1..8f0f6a4 100644 --- a/BIN-ALI-Optimal_sequence_alignment.R +++ b/BIN-ALI-Optimal_sequence_alignment.R @@ -52,8 +52,8 @@ toString(s) # using the Biostrings function toString() sel <- myDB$protein$name == "MBP1_SACCE" aaMBP1_SACCE <- AAString(myDB$protein$sequence[sel]) -sel <- myDB$protein$name == paste("MBP1_", biCode(YFO), sep = "") -aaMBP1_YFO <- AAString(myDB$protein$sequence[sel]) +sel <- myDB$protein$name == paste("MBP1_", biCode(MYSPE), sep = "") +aaMBP1_MYSPE <- AAString(myDB$protein$sequence[sel]) ?pairwiseAlignment @@ -61,7 +61,7 @@ aaMBP1_YFO <- AAString(myDB$protein$sequence[sel]) # Global optimal alignment with end-gap penalties is default. (like EMBOSS needle) ali1 <- pairwiseAlignment( aaMBP1_SACCE, - aaMBP1_YFO, + aaMBP1_MYSPE, substitutionMatrix = "BLOSUM62", gapOpening = 10, gapExtension = 0.5) @@ -110,7 +110,7 @@ percentID(ali1) # Compare with local optimal alignment (like EMBOSS Water) ali2 <- pairwiseAlignment( aaMBP1_SACCE, - aaMBP1_YFO, + aaMBP1_MYSPE, type = "local", substitutionMatrix = "BLOSUM62", gapOpening = 50, @@ -135,7 +135,7 @@ percentID(ali2) # PART FOUR: APSES Domain annotation by alignment # ============================================================================== -# In this section we define the YFO APSES sequence by performing a global, +# In this section we define the MYSPE APSES sequence by performing a global, # optimal sequence alignment of the yeast domain with the full length protein # sequence of the protein that was the most similar to the yeast APSES domain. # @@ -190,11 +190,11 @@ aaMB1_SACCE_APSES <- AAString(dbGetFeatureSequence(myDB, "MBP1_SACCE", "APSES fold")) -# To align, we need the YFO sequence. Here is it's definition again, just +# To align, we need the MYSPE sequence. Here is it's definition again, just # in case ... -sel <- myDB$protein$name == paste("MBP1_", biCode(YFO), sep = "") -aaMBP1_YFO <- AAString(myDB$protein$sequence[sel]) +sel <- myDB$protein$name == paste("MBP1_", biCode(MYSPE), sep = "") +aaMBP1_MYSPE <- AAString(myDB$protein$sequence[sel]) # Now let's align these two sequences of very different length without end-gap # penalties using the "overlap" type. "overlap" turns the @@ -203,7 +203,7 @@ aaMBP1_YFO <- AAString(myDB$protein$sequence[sel]) aliApses <- pairwiseAlignment( aaMB1_SACCE_APSES, - aaMBP1_YFO, + aaMBP1_MYSPE, type = "overlap", substitutionMatrix = "BLOSUM62", gapOpening = 10, @@ -237,7 +237,7 @@ aliApses@subject@range@start + aliApses@subject@range@width - 1 # right away and store it in myDB. Copy the code-template below to your # myCode.R file, edit it to replace the placeholder items with your data: # -# - The is to be replaced with the ID of MBP1_YFO +# - The is to be replaced with the ID of MBP1_MYSPE # - The is to be replaced with the ID of "APSES fold" # - and are to be replaced with the coordinates you got above # @@ -277,7 +277,7 @@ myDB$proteinAnnotation[nrow(myDB$proteinAnnotation), ] # If this is correct, save it save(myDB, file = "myDB.02.RData") # Note that it gets a new version number! -# Done with this part. Copy the sequence of the APSES domain of MBP1_ - you +# Done with this part. Copy the sequence of the APSES domain of MBP1_MYSPE - you # need it for the reverse BLAST search, and return to the course Wiki. diff --git a/BIN-FUNC-Domain_annotation.R b/BIN-FUNC-Domain_annotation.R index 9384510..250e0c8 100644 --- a/BIN-FUNC-Domain_annotation.R +++ b/BIN-FUNC-Domain_annotation.R @@ -43,7 +43,7 @@ save(myDB, file = "myDB.04.RData") # save the new version # from your myCode.R script. Here is again the table of feature IDs: myDB$feature[ , c("ID", "name", "description")] -# Add every SMART annotated feaure for MBP1_YFO to the database. If you make +# Add every SMART annotated feaure for MBP1_MYSPE to the database. If you make # mistakes, just reload the latest version (probably "myDB.04.RData"), then run # your corrected annotation script again. Execute ... myDB$proteinAnnotation diff --git a/BIN-YFO.R b/BIN-MYSPE.R similarity index 67% rename from BIN-YFO.R rename to BIN-MYSPE.R index 7cc06d7..7963185 100644 --- a/BIN-YFO.R +++ b/BIN-MYSPE.R @@ -1,15 +1,15 @@ -# BIN-YFO.R +# BIN-MYSPE.R # # Purpose: A Bioinformatics Course: -# R code accompanying the BIN-YFO unit +# R code accompanying the BIN-MYSPE unit # # Version: 1.0 # # Date: 2017 09 21 # Author: Boris Steipe (boris.steipe@utoronto.ca) # -# V 1.0 Final code, after rewriting BLAST parser and creating current YFOlist -# V 0.1 First code copied from BCH441_A03_makeYFOlist.R +# V 1.0 Final code, after rewriting BLAST parser and creating current MYSPElist +# V 0.1 First code copied from BCH441_A03_makeMYSPElist.R # # TODO: # @@ -23,17 +23,17 @@ # going on. That's not how it works ... # # ============================================================================== - + #TOC> ========================================================================== -#TOC> +#TOC> #TOC> Section Title Line #TOC> --------------------------------------- #TOC> 1 Preparations 38 -#TOC> 2 Suitable YFO Species 50 -#TOC> 3 Adopt "YFO" 64 -#TOC> +#TOC> 2 Suitable MYSPE Species 50 +#TOC> 3 Adopt "MYSPE" 64 +#TOC> #TOC> ========================================================================== - + # = 1 Preparations ======================================================== # @@ -47,39 +47,39 @@ if (! exists("myStudentNumber")) { } -# = 2 Suitable YFO Species ================================================ +# = 2 Suitable MYSPE Species ============================================== # In this unit we will select one species from a list of genome sequenced fungi # and write it into your personalized profile file. This species will be called -# "YFO" (Your Favourite Organism) for other learning units and exercises. +# "MYSPE" (Your Favourite Organism) for other learning units and exercises. # A detailed description of the process of compiling the list of genome # sequenced fungi with protein annotations and Mbp1 homologues is in the file -# ABC-makeYFOlist.R +# ABC-makeMYSPElist.R -# Task: Study ABC-makeYFOlist.R, it implements a rather typical workflow of +# Task: Study ABC-makeMYSPElist.R, it implements a rather typical workflow of # selecting and combining data from various public-domain data resources. -# = 3 Adopt "YFO" ========================================================= +# = 3 Adopt "MYSPE" ======================================================= # In the code below, we load the resulting vector of species name, then pick one # of them in a random but reproducible way, determined by your student number. -load("data/YFOspecies.RData") # load the species names -set.seed(myStudentNumber) # seed the random number generator -YFO <- sample(YFOspecies, 1) # pick a species at random +load("data/MYSPEspecies.RData") # load the species names +set.seed(myStudentNumber) # seed the random number generator +MYSPE <- sample(MYSPEspecies, 1) # pick a species at random # write the result to your personalized profile data so we can use the result in # other functions -cat(sprintf("YFO <- \"%s\"\n", YFO), file = ".myProfile.R", append = TRUE) +cat(sprintf("MYSPE <- \"%s\"\n", MYSPE), file = ".myProfile.R", append = TRUE) -YFO # so, which species is it ... ? -biCode(YFO) # and what is it's "BiCode" ... ? +MYSPE # so, which species is it ... ? +biCode(MYSPE) # and what is it's "BiCode" ... ? # Task: Note down the species name and its five letter label on your Student # Wiki user page. Use this species whenever this or future assignments refer -# to YFO. In code, we will automatically load it from your.myProfile.R file. +# to MYSPE. In code, we will automatically load it from your.myProfile.R file. # [END] diff --git a/BIN-PHYLO-Data_preparation.R b/BIN-PHYLO-Data_preparation.R index cd712a5..c376ae5 100644 --- a/BIN-PHYLO-Data_preparation.R +++ b/BIN-PHYLO-Data_preparation.R @@ -41,7 +41,7 @@ list.files(pattern = "myDB.*") load("myDB.05.RData") # The database contains the ten Mbp1 orthologues from the reference species -# and the Mbp1 RBM for YFO. +# and the Mbp1 RBM for MYSPE. # # We will construct a phylogenetic tree from the proteins' APSES domains. # You have annotated their ranges as a feature. diff --git a/BIN-PHYLO-Tree_analysis.R b/BIN-PHYLO-Tree_analysis.R index 3eb0073..918e62a 100644 --- a/BIN-PHYLO-Tree_analysis.R +++ b/BIN-PHYLO-Tree_analysis.R @@ -156,7 +156,7 @@ layout(matrix(1), widths=1.0, heights=1.0) # ... or we can plot the tree so it corresponds as well as possible to a # predefined tip ordering. Here we use the ordering that NCBI Global Tree # returns for the reference species - we have used it above to make the vector -# apsMbp1Names. You inserted your YFO name into that vector - but you should +# apsMbp1Names. You inserted your MYSPE name into that vector - but you should # move it to its correct position in the cladogram. # (Nb. we need to reverse the ordering for the plot. This is why we use the diff --git a/BIN-SEQA-Comparison.R b/BIN-SEQA-Comparison.R index 175183b..b057e08 100644 --- a/BIN-SEQA-Comparison.R +++ b/BIN-SEQA-Comparison.R @@ -39,7 +39,7 @@ help(package = seqinr) # shows the available functions ?computePI # This takes as input a vector of upper-case AA codes -# Let's retrieve the YFO sequence from our datamodel +# Let's retrieve the MYSPE sequence from our datamodel # (assuming it is the last one that was added): db$protein[nrow(db$protein), "sequence"] diff --git a/BIN-Storing_data.R b/BIN-Storing_data.R index fc1f0f1..55a5d0e 100644 --- a/BIN-Storing_data.R +++ b/BIN-Storing_data.R @@ -23,9 +23,9 @@ # going on. That's not how it works ... # # ============================================================================== - + #TOC> ========================================================================== -#TOC> +#TOC> #TOC> Section Title Line #TOC> ------------------------------------------------------------ #TOC> 1 A Relational Datamodel in R: review 55 @@ -48,9 +48,9 @@ #TOC> 3.3 Create an R script to create the database 522 #TOC> 3.3.1 Check and validate 542 #TOC> 3.4 Task: submit for credit (part 2/2) 583 -#TOC> +#TOC> #TOC> ========================================================================== - + # = 1 A Relational Datamodel in R: review ================================= @@ -203,7 +203,7 @@ str(philDB) # go back, re-read, play with it, and ask for help. This is essential. -# === 1.1.1 completing the database +# === 1.1.1 completing the database # Next I'll add one more person, and create the other two tables: @@ -362,7 +362,7 @@ dbSanitizeSequence(x) # == 2.3 Create a protein table for our data model ========================= -# === 2.3.1 Initialize the database +# === 2.3.1 Initialize the database # The function dbInit contains all the code to return a list of empty @@ -374,7 +374,7 @@ myDB <- dbInit() str(myDB) -# === 2.3.2 Add data +# === 2.3.2 Add data # fromJSON() returns a dataframe that we can readily process to add data @@ -421,7 +421,7 @@ source("./scripts/ABC-createRefDB.R") str(myDB) -# === 2.4.1 Examples of navigating the database +# === 2.4.1 Examples of navigating the database # You can look at the contents of the tables in the usual way we access @@ -468,8 +468,8 @@ myDB$taxonomy$species[sel] # = 3 Add your own data =================================================== -# You have chosen an organism as "YFO", and you final task will be to find the -# protein in YFO that is most similar to yeast Mbp1 and enter its information +# You have chosen an organism as "MYSPE", and you final task will be to find the +# protein in MYSPE that is most similar to yeast Mbp1 and enter its information # into the database. @@ -483,7 +483,7 @@ myDB$taxonomy$species[sel] # Protein BLAST. # - Enter NP_010227 into the "Query Sequence" field. # - Choose "Reference proteins (refseq_protein)" as the "Database". -# - Paste the YFO species name into the "Organism" field. +# - Paste the MYSPE species name into the "Organism" field. # # - Click "BLAST". @@ -493,28 +493,28 @@ myDB$taxonomy$species[sel] # Otherwise, look for the top-hit in the "Alignments" section. In some cases # there will be more than one hit with nearly similar E-values. If this is the -# case for YFO, choose the one with the higher degree of similarity (more +# case for MYSPE, choose the one with the higher degree of similarity (more # identities) with the N-terminus of the query - i.e. the Query sequence of # the first ~ 100 amino acids. # - Follow the link to the protein data page, linked from "Sequence ID". # - From there, in a separate tab, open the link to the taxonomy database page -# for YFO which is linked from the "ORGANISM" record. +# for MYSPE which is linked from the "ORGANISM" record. # == 3.2 Put the information into JSON files =============================== # - Next make a copy of the file "./data/MBP1_SACCE.json" in your project -# directory and give it a new name that corresponds to YFO - e.g. if -# YFO is called "Crptycoccus neoformans", your file should be called +# directory and give it a new name that corresponds to MYSPE - e.g. if +# MYSPE is called "Crptycoccus neoformans", your file should be called # "MBP1_CRYNE.json"; in that case "MBP1_CRYNE" would also be the # "name" of your protein. Open the file in the RStudio editor and replace # all of the MBP1_SACCE data with the corresponding data of your protein. # -# - Do a similar thing for the YFO taxonomy entry. Copy -# "./data/refTaxonomy.json" and make a new file named "YFOtaxonomy.json". -# Create a valid JSON file with only one single entry - that of YFO. +# - Do a similar thing for the MYSPE taxonomy entry. Copy +# "./data/refTaxonomy.json" and make a new file named "MYSPEtaxonomy.json". +# Create a valid JSON file with only one single entry - that of MYSPE. # # - Validate your two files online at https://jsonlint.com/ @@ -529,7 +529,7 @@ myDB$taxonomy$species[sel] # - than add the two commands that add your protein and taxonomy data, # they should look like: # myDB <- dbAddProtein( myDB, fromJSON("MBP1_.json")) -# myDB <- dbAddTaxonomy( myDB, fromJSON("YFOtaxonomy.json")) +# myDB <- dbAddTaxonomy( myDB, fromJSON("MYSPEtaxonomy.json")) # # - save the file and source() it: # source("makeProteinDB.R") @@ -539,12 +539,12 @@ myDB$taxonomy$species[sel] # in any of the JSON files. Later you will add more information ... -# === 3.3.1 Check and validate +# === 3.3.1 Check and validate -# Is your protein named according to the pattern "MBP1_"? It should be. +# Is your protein named according to the pattern "MBP1_MYSPE"? It should be. # And does the taxonomy table contain the systematic name? It should be the same -# that you get when you type YFO into the console. +# that you get when you type MYSPE into the console. # Let's compute sequence lengths on the fly (with the function nchar() ), and # open this with the table viewer function View() @@ -562,18 +562,18 @@ View(cbind(myDB$protein[ , c("ID", "name", "RefSeqID")], myDB$protein$sequence[nrow(myDB$protein)] # If not, don't continue! Fix the problem first. -# Let me repeat: If this does not give you the right sequence of the YFO +# Let me repeat: If this does not give you the right sequence of the MYSPE # Mbp1 homologue, DO NOT CONTINUE. Fix the problem. -# Is that the right taxonomy ID and binomial name for YFO? -sel <- myDB$taxonomy$species == YFO +# Is that the right taxonomy ID and binomial name for MYSPE? +sel <- myDB$taxonomy$species == MYSPE myDB$taxonomy[sel, ] # If not, or if the result was "<0 rows> ... " then DO NOT CONTINUE. # Fix the problem first. -# Does this give you the right refseq ID for MBP1_? -sel <- myDB$protein$name == paste0("MBP1_", biCode(YFO)) +# Does this give you the right refseq ID for MBP1_MYSPE? +sel <- myDB$protein$name == paste0("MBP1_", biCode(MYSPE)) myDB$protein$RefSeqID[sel] # If not, or if the result was "<0 rows> ... " then DO NOT CONTINUE. @@ -589,8 +589,8 @@ myDB$protein$RefSeqID[sel] # page on the Student Wiki # - Execute the two commands below and show the result on your submission page -biCode(myDB$taxonomy$species) %in% biCode(YFO) -myDB$protein$taxonomyID %in% myDB$taxonomy$ID[(myDB$taxonomy$species == YFO)] +biCode(myDB$taxonomy$species) %in% biCode(MYSPE) +myDB$protein$taxonomyID %in% myDB$taxonomy$ID[(myDB$taxonomy$species == MYSPE)] # That is all. diff --git a/data/MYSPEspecies.RData b/data/MYSPEspecies.RData new file mode 100644 index 0000000000000000000000000000000000000000..0fbbccbe24c2d0fad75d54831289ea205b52df82 GIT binary patch literal 2565 zcmV+g3i|aQiwFP!0000019e#KcHFiRRUH3m(k7O6wUTTn-51E8zJVlv_1JRvcvZ(q zPk&%REC~q&SO8p$`<{KBp56gT0F;z-Y)j_F$6zpX=MFx-`trB$zI=CZaPZ{d+o#_g zJoy&CK7E3JzBzb$@F)EI^VM(b)oa%XE``JYpW*L+2kggxT{2zBg8h5RS}Wb~`C0nx zg>K{b!}F3^$y>MFNY@zKj==m}d0{m`wq*UUo(6Tk%IB%74e1xC%a3O5Xmc}zNd_OvO+{i+(rENFDaiK?Rj?ZygTS}#1DZ^fZ z`OMS|D`92YF2gXdeRR8RTgx(A@-PRrEQNh+J#A_kr%VuIyL~+JEJxsF zE8?WZd25B)FeeMy)=SN5xsh?=*%Wo{rc{herbIkLOBbuc4fgOR-oVKP+uVqq6iTtB z)W%6~cIkqCzF;2rTNz$;-qQMo&;>4(d2AOAykKo%q{jLUgL&{Q$ItQ17lO4rLA%3B zClm7QLN7>DpowvU)WXq(1KWP^M#(KEegUs*o! zC}P$$*iEI>ykw0vT;b&dym74V?O=asMkh)uT-@Xs z19pL^*b*5d;m{NZ^GnbW;*%)E(eBcxSe;-EsH$Ktv)w2K_aN4J1rKc{;wxME=X*Gu zD*~hS>qQ^k8)cEQIuFPf_eNB*ETyL0h&OR`37?J*YQ0ETun0)l6;l~Po`Nvly|D#i zp+Qnf1;-@rKfSbF;|(X027A_>GD_$~XtN0ps!J*I*UJ_z7QMk?Z==EIf|W!X(dgsL zZX+zrDUe5OdBX~s0%LL6H5G_vIFx7+=qx7N;uYh*Q-Th|mKB2`_;1YmS7O6#H>B_u z4A#X^m_QZMRHQv;ugG2m01o8-(}ERa79YKmjWR`_RzAO-*=t=Ggmr^6HV7`2^5gWi z6}3U)aPmFx>J1CT8p(EoquRNyiN>5vKnJejAkMh<_pe*uHHhbw;q%vbeHMU&V113P zQWzvG6i_d7PM#&S7H*PU#|auLGEILt&o#Kbt+0>GyA#@Mq=DBW_(pDUpG3z?i))|A zfYUd01+qtLQAjFq2&YK2ZwzjYBN&#TUmlNtlD^li^0GD-u@jd)N^m1km7EzpJ%7_W z7Sf~7J;7{BtcaPrE=Zjbc^;9*2~@O)b5G+46f~S3p)tw)#akxYnrS^mBhrONjePwU zu>xb2*_$rs{`p(tk*4Y>(lCi_MBH-z76+pxDNu>@%5)=V?*u$K6n_M(gls6O(vRV| zc!xS?8dSfUh`MnAeOme*5|WBdMYXxLK4bBtzq=q9AbJcHVHYp!^gUCs$Lu?-2_=F2 zm{!2|Y!Auc0wpimb|MDm?^WBucY}YRRD3qk@LRKEy%wnvl`KiZ`Fm5dl0FdHRBXFV zN&fyTEDr$GqL;|zn2!l6#0^4gA`23T1C`e7CI|T;!ITMOtbSme2iI(9Ei;O~Q+x;c zaNKWSS3T&_Ceg6?AV6sjmIofpUpps~HhP$qAh%b{e!MfHtI7w3^v^r(< z^DCiP-MCPnsj-m1G^S})qOaMNKy9kzJ`^!-Wg^0P4)yCs>@de_c9W!f4j?CxpQ)~& z0fgk&^45IqZ>|Nq69#io)U_A@e9iEa{Cd5OJj8h~7tj zTCva_Cc*UxUz|J`q*}j^2Yx{?WlhSR5cPR^p&{U#(&*s`+7ubDAFrSY0#{Q5BTI;) zj0AH~+@XO9*MwUNWk+Fzq)OsDF6y#B7#(*UdYMgYq zAoZ-%NzPV5@>E3LV!lk#O|TmY28FhXv^+uUHq$OAE1bV?X)5qU zSM+i=M%2m_!%dQ$%O?J~GFT0vhROly8tG;g3N{y(5A(M2W)fvD`w?YVYr`DrTSS_- zEyPA5;Rj>Dz!(C78xy)860)yCKuq>{zG^l1W2cQYgtkJd&mT-yt!=1ZAA?FsYSxqL zKU{Ul=;3}*+eNZJ2YOlYixmx=kdprsLnr0>ju2hA%j)^rrwU3P0-_iT9={cOFGZA0 zpDH-rQ)aQTV~JHD$HI?X^qj0w5i1reQ@deZ7J2hEliq|)5ulBN9EkonU2{=nt&Hr& zXwqA2UWx6Nj?yoV%Wtx7?ije-N9Ba|LC>*S>p?fMj-yiu9q+irPmhhd0n6!(#~)qF zaBn6B7}lcXgA?ci-0*wGLx*dD+gDT(VMd)jD6B;$4SZ{P64tXd`Z+d820M8^J6$)- zSF8{C24xzhxyY67IxHEgF7nUGUCh6ABlpyb^(Sd4Zu7^oH8ps?vhr?8FJ+qFrpJ;s zg=Oejt!opHK3}(`?9ifwn?_%uO~p1cORvQ>^-5!{^fsuyR-0u7yxgG9q*2>>5di5)fF%O|<0v8pWeeQ-N;QhX=KIoX~>zZ>HqMorHFZ zN-sxCkUD^}k)SL(oT|0Q2&X=g~Gt)TSSEhVV+@w?T zJeq&*4FuuAXY@e9Vx}B_j*T8Ml-klCkxfis!77!#1pJDX1l+a6QGG?MgZ^dA++QW= z*TS7qI#T2$Cb^GuYQ$|k(qF!G(ElhwVkgpl{Kl*e`c~09XqR>8r@wdQJ@&Guy^HJ3 be}`~$)Sv_c8oJhlga7>>?EI@qgctw-|GAnuOmK*6BW7`p!pDQn{2FRAI|JAbrJ3H6DvWy$vIWOIEs|@F@GHx7ja&9re zY>c;^TN))xLyM2_(;hB_t=ZCeCWh}v=Z+g$2)4BCMmR3?XwC6CPHRi46f9-fOE90A znt@!g-Cto_>3AiqOxtA`=Cu!Qw{2@#W=kICpq8bu53Q$7E#s64Vr;jMN1o*fylh3B zv^a0AFdOD%A=`SXSuHm*PCT2UuHBT1amkd3XK3kSRk*<(-ozU?xnP@Xv6Dh6wv^g9 z>CG-((9ajl<9;i{tIk_mzYw~>g))!rqJbBzEsWGyzhN*Bp5^#Cp7}zsb|+|eSm|U! zo?ghtkX>$Bvo5W7oW46_%?+&_C(RPLliVAdTydD7y^tKg##1Whys0}b+!8+K6~=fe zqa75V6u?|+VMVlWNEO8WhE5b?Y5ZQa8wKMKp2_wRGYD`V^}ZtN~RO%w@J4rQjaKI zw0^zlqdTK4QdZ{y`TWj^N|vS6lpFCTjxOQT(Lt>j=?WGB3AH68od|6XQVc4=_Fa-aNS^rXOnC*rX-h#op z7zz`pLYj)S=j$?W=oHBg=>bB1Ua1gAou~iC# zgoOg?WzNa7gx11MlIu7@Lq(?P_vg6=m$w!6fqAz=n~gN^S_EIq4epcZcxiF%6B%&& znyx_hXe|m!1rFg9iT1U@t#Jgy67a7*#v*p&vPTJS1gerVqo?PuTgO6r z^tmUPO^FpTbK3=}Ga}C-(l~*Nc7N_^9D#y{(<3w{nZJ0$L|ZehhiF8)(5R8G-yl|C ztTKDk<=j7iLp;({9Yq=@v5kmZ&fnl*v?K*8kzSc@S!%8C;;`CEHHK!2F$RJNRz!50r|}CK`TYcC6PTHKLLwNjQIJYF5$*LYs*dp!^@Gea62pc|+pTAksps~h6Wq)f1`bq-F_6bY?P8U5^o zP^@lTsL#|`$X^=Mv?|fp?1MmUs^mTtF>Yld!g&t$>qhJ_$7*(+qjAzvxi?6)ejg9~oM6hDlsh5n^YTJNz&EAQ!x6M8GG0GkK@kM5rUpiq5JedY z=AgJm1C!*;QI1h78uN9w0?|O6f^4AisiTZ6f3hND5z(miIz%bRqt1*0OCZ!Z>2N{n zS*Ment%Br*daFVO=3wWov32SNR+%P-KueLhsyaTz;YP6=;go=8OxkYM)BH_az=gX# zdcLU32B;P_t_LL_?PEIWd_~zecKF16nWCFu*Afg0Z4+sEg4S)OT~1awf8Wwn;EAs2 zX6aH{i3#uWPc9yvf>vj8aN>(|0jk{%Jm%~x^S1(^RrJClsW`NF%~?2BlKR1D49N0 zaJr|=Vq?b=t3Zy0AGzo`S)(FWELNs=!@4Z;=4&Rs37aB78wEKK{c*bHqR3hq*^AMn zx7NH8+btcXUmTa;WZm2`aJi4l3G0KNW3$$SZekrrrw}^cafzQE8g&hp(;1IHxRl}E zObRfpMac&z&;_{R_l$=Q*8;b%s3O9QI(tx9i%c5$*778*XKVCxY>*6g@_cr>ZkVrF zAMg#zG)i-kE8TTiGE`mUpOd?of9po>sTJ!_(oo#yk7a9W@O)+E?UG)~G`~rYC2I=H z(6d_CCLVpZZcEvrMF}^JzCxRdZDf{Si!17t##-rZP?agOaukh@rhAtUP3>bKGk0-C z6|0eMcn|>pWz2XNoodGySG`z6V>)W~_$m?$=T(KIf%Inm%N(V4 zj+P*UBBEn>k71xo-8LQ%43SWlL-`Aw4Tl!o)JJ=RVpc0 zu0^hOq$j>gU2fs|B0B^)*TO51&SuiP&Hkl15gfX9@HU+&^dw ZaC6k41Oghm)`Nrp{U0Nr0o;NZ004#0?&tsj