diff --git a/.init.R b/.init.R index 60c20cb..33551f1 100644 --- a/.init.R +++ b/.init.R @@ -24,6 +24,15 @@ if (! file.exists(".myProfile.R")) { rm(e, n, conn) } +# Patch YFO -> MYSPE if necessary: +tmp <- readLines(".myProfile.R") +if (length(grep("^YFO", tmp)) > 0) { + idx <- grep("^YFO", tmp) + tmp[idx] <- gsub("^YFO", "MYSPE", tmp[idx]) + writeLines(tmp, ".myProfile.R") +} +rm(tmp) + source(".myProfile.R") source(".utilities.R") diff --git a/ABC-makeYFOlist.R b/ABC-makeMYSPElist.R similarity index 96% rename from ABC-makeYFOlist.R rename to ABC-makeMYSPElist.R index 505d7b0..4b7cc2d 100644 --- a/ABC-makeYFOlist.R +++ b/ABC-makeMYSPElist.R @@ -1,9 +1,9 @@ -# ABC_makeYFOlist.R +# ABC_makeMYSPElist.R # # Purpose: Create a list of genome sequenced fungi with protein annotations and # Mbp1 homologues. # -# Version: 1.1 +# Version: 1.1.1 # # Date: 2016 09 - 2017 09 # Author: Boris Steipe (boris.steipe@utoronto.ca) @@ -29,9 +29,9 @@ # those parts. If you only want to study the general workflow, just load() # the respective intermediate results. # - + #TOC> ========================================================================== -#TOC> +#TOC> #TOC> Section Title Line #TOC> --------------------------------------------------- #TOC> 1 The strategy 54 @@ -44,17 +44,17 @@ #TOC> 3.2 Identify species in "hits" 202 #TOC> 4 Intersect GOLD and BLAST species 247 #TOC> 5 Cleanup and finish 265 -#TOC> +#TOC> #TOC> ========================================================================== - + #TOC> #TOC> # = 1 The strategy ======================================================== -# This script will create a list of "YFO" species and save it in an R object -# YFOspecies that is stored in the data subdirectory of this project from where +# This script will create a list of "MYSPE" species and save it in an R object +# MYSPEspecies that is stored in the data subdirectory of this project from where # it can be loaded. The strategy is as follows: we download a list of all # genome projects and then select species for which protein annotations are # available - i.e. these are all genome-sequenced species that have been @@ -251,7 +251,7 @@ length(BLASTspecies) # etc. See here: ?union -YFOspecies <- intersect(GOLDspecies, BLASTspecies) +MYSPEspecies <- intersect(GOLDspecies, BLASTspecies) # Again: interpret this: # - what is the number of GOLDspecies? @@ -272,9 +272,9 @@ YFOspecies <- intersect(GOLDspecies, BLASTspecies) REFspecies -YFOspecies <- sort(setdiff(YFOspecies, REFspecies)) +MYSPEspecies <- sort(setdiff(MYSPEspecies, REFspecies)) -# save(YFOspecies, file = "data/YFOspecies.RData") +# save(MYSPEspecies, file = "data/MYSPEspecies.RData") diff --git a/BIN-ALI-Dotplot.R b/BIN-ALI-Dotplot.R index dd2fffd..d17361f 100644 --- a/BIN-ALI-Dotplot.R +++ b/BIN-ALI-Dotplot.R @@ -46,31 +46,31 @@ data(BLOSUM62) sel <- myDB$protein$name == "MBP1_SACCE" MBP1_SACCE <- s2c(myDB$protein$sequence[sel]) -sel <- myDB$protein$name == paste("MBP1_", biCode(YFO), sep = "") -MBP1_YFO <- s2c(myDB$protein$sequence[sel]) +sel <- myDB$protein$name == paste("MBP1_", biCode(MYSPE), sep = "") +MBP1_MYSPE <- s2c(myDB$protein$sequence[sel]) # Check that we have two character vectors of the expected length. str(MBP1_SACCE) -str(MBP1_YFO) +str(MBP1_MYSPE) # How do we get the pairscore values? Consider: a single pair of amino acids can -# be obtained from sequence SACCE and YFO eg. from position 13 and 21 ... +# be obtained from sequence SACCE and MYSPE eg. from position 13 and 21 ... MBP1_SACCE[13] -MBP1_YFO[21] +MBP1_MYSPE[21] # ... using these as subsetting expressions, we can pull the pairscore # from the MDM -BLOSUM62[MBP1_SACCE[13], MBP1_YFO[21]] +BLOSUM62[MBP1_SACCE[13], MBP1_MYSPE[21]] # First we build an empty matrix that will hold all pairscores ... -dotMat <- matrix(numeric(length(MBP1_SACCE) * length(MBP1_YFO)), - nrow = length(MBP1_SACCE), ncol = length(MBP1_YFO)) +dotMat <- matrix(numeric(length(MBP1_SACCE) * length(MBP1_MYSPE)), + nrow = length(MBP1_SACCE), ncol = length(MBP1_MYSPE)) # ... then we loop over the sequences and store the scores in the matrix. # for (i in 1:length(MBP1_SACCE)) { - for (j in 1:length(MBP1_YFO)) { - dotMat[i, j] <- BLOSUM62[MBP1_SACCE[i], MBP1_YFO[j]] + for (j in 1:length(MBP1_MYSPE)) { + dotMat[i, j] <- BLOSUM62[MBP1_SACCE[i], MBP1_MYSPE[j]] } } @@ -80,7 +80,7 @@ for (i in 1:length(MBP1_SACCE)) { dotMat[1:10, 1:10] # Rows in this matrix correspond to an amino acid from MBP1_SACCE, columns in -# the matrix correspond to an amino acid from MBP1_YFO. +# the matrix correspond to an amino acid from MBP1_MYSPE. # To plot this, we use the image() function. Here, with default parameters. @@ -110,13 +110,13 @@ image(x = 1:200, y = 1:200, dotMat[1:200, 1:200], ylim=c(200,1)) # ... and labels! Axis labels would be nice ... image(x = 1:200, y = 1:200, dotMat[1:200, 1:200], ylim=c(200,1), - xlab = "MBP1_YFO", ylab = "MBP1_SACCE" ) + xlab = "MBP1_MYSPE", ylab = "MBP1_SACCE" ) # ... and why don't we have axis-numbers on all four sides? Go, make that right # too ... len <- 200 image(x = 1:len, y = 1:len, dotMat[1:len, 1:len], ylim=c(len,1), - xlab = "MBP1_YFO", ylab = "MBP1_SACCE", axes = FALSE) + xlab = "MBP1_MYSPE", ylab = "MBP1_SACCE", axes = FALSE) box() axis(1, at = c(1, seq(10, len, by=10))) axis(2, at = c(1, seq(10, len, by=10))) @@ -129,8 +129,8 @@ axis(4, at = c(1, seq(10, len, by=10))) # utilities file and called it dotPlot2(). Why not dotPlot() ... that's because # there already is a dotplot function in the seqinr package: -dotPlot(MBP1_SACCE, MBP1_YFO) # seqinr -dotPlot2(MBP1_SACCE, MBP1_YFO, xlab = "SACCE", ylab = "YFO") # Our's +dotPlot(MBP1_SACCE, MBP1_MYSPE) # seqinr +dotPlot2(MBP1_SACCE, MBP1_MYSPE, xlab = "SACCE", ylab = "MYSPE") # Our's # Which one do you prefer? You can probably see the block patterns that arise # from segments of repetitive, low complexity sequence. But you probably have to @@ -153,7 +153,7 @@ myFilter[5, ] <- c( 0, 0, 0, 0, 1) # I have added the option to read such filters (or others that you could define on your own) as a parameter of the function. -dotPlot2(MBP1_SACCE, MBP1_YFO, xlab = "SACCE", ylab = "YFO", f = myFilter) +dotPlot2(MBP1_SACCE, MBP1_MYSPE, xlab = "SACCE", ylab = "MYSPE", f = myFilter) # I think the result shows quite nicely how the two sequences are globally # related and where the regions of sequence similarity are. Play with this a bit diff --git a/BIN-ALI-Optimal_sequence_alignment.R b/BIN-ALI-Optimal_sequence_alignment.R index ac173b1..8f0f6a4 100644 --- a/BIN-ALI-Optimal_sequence_alignment.R +++ b/BIN-ALI-Optimal_sequence_alignment.R @@ -52,8 +52,8 @@ toString(s) # using the Biostrings function toString() sel <- myDB$protein$name == "MBP1_SACCE" aaMBP1_SACCE <- AAString(myDB$protein$sequence[sel]) -sel <- myDB$protein$name == paste("MBP1_", biCode(YFO), sep = "") -aaMBP1_YFO <- AAString(myDB$protein$sequence[sel]) +sel <- myDB$protein$name == paste("MBP1_", biCode(MYSPE), sep = "") +aaMBP1_MYSPE <- AAString(myDB$protein$sequence[sel]) ?pairwiseAlignment @@ -61,7 +61,7 @@ aaMBP1_YFO <- AAString(myDB$protein$sequence[sel]) # Global optimal alignment with end-gap penalties is default. (like EMBOSS needle) ali1 <- pairwiseAlignment( aaMBP1_SACCE, - aaMBP1_YFO, + aaMBP1_MYSPE, substitutionMatrix = "BLOSUM62", gapOpening = 10, gapExtension = 0.5) @@ -110,7 +110,7 @@ percentID(ali1) # Compare with local optimal alignment (like EMBOSS Water) ali2 <- pairwiseAlignment( aaMBP1_SACCE, - aaMBP1_YFO, + aaMBP1_MYSPE, type = "local", substitutionMatrix = "BLOSUM62", gapOpening = 50, @@ -135,7 +135,7 @@ percentID(ali2) # PART FOUR: APSES Domain annotation by alignment # ============================================================================== -# In this section we define the YFO APSES sequence by performing a global, +# In this section we define the MYSPE APSES sequence by performing a global, # optimal sequence alignment of the yeast domain with the full length protein # sequence of the protein that was the most similar to the yeast APSES domain. # @@ -190,11 +190,11 @@ aaMB1_SACCE_APSES <- AAString(dbGetFeatureSequence(myDB, "MBP1_SACCE", "APSES fold")) -# To align, we need the YFO sequence. Here is it's definition again, just +# To align, we need the MYSPE sequence. Here is it's definition again, just # in case ... -sel <- myDB$protein$name == paste("MBP1_", biCode(YFO), sep = "") -aaMBP1_YFO <- AAString(myDB$protein$sequence[sel]) +sel <- myDB$protein$name == paste("MBP1_", biCode(MYSPE), sep = "") +aaMBP1_MYSPE <- AAString(myDB$protein$sequence[sel]) # Now let's align these two sequences of very different length without end-gap # penalties using the "overlap" type. "overlap" turns the @@ -203,7 +203,7 @@ aaMBP1_YFO <- AAString(myDB$protein$sequence[sel]) aliApses <- pairwiseAlignment( aaMB1_SACCE_APSES, - aaMBP1_YFO, + aaMBP1_MYSPE, type = "overlap", substitutionMatrix = "BLOSUM62", gapOpening = 10, @@ -237,7 +237,7 @@ aliApses@subject@range@start + aliApses@subject@range@width - 1 # right away and store it in myDB. Copy the code-template below to your # myCode.R file, edit it to replace the placeholder items with your data: # -# - The is to be replaced with the ID of MBP1_YFO +# - The is to be replaced with the ID of MBP1_MYSPE # - The is to be replaced with the ID of "APSES fold" # - and are to be replaced with the coordinates you got above # @@ -277,7 +277,7 @@ myDB$proteinAnnotation[nrow(myDB$proteinAnnotation), ] # If this is correct, save it save(myDB, file = "myDB.02.RData") # Note that it gets a new version number! -# Done with this part. Copy the sequence of the APSES domain of MBP1_ - you +# Done with this part. Copy the sequence of the APSES domain of MBP1_MYSPE - you # need it for the reverse BLAST search, and return to the course Wiki. diff --git a/BIN-FUNC-Domain_annotation.R b/BIN-FUNC-Domain_annotation.R index 9384510..250e0c8 100644 --- a/BIN-FUNC-Domain_annotation.R +++ b/BIN-FUNC-Domain_annotation.R @@ -43,7 +43,7 @@ save(myDB, file = "myDB.04.RData") # save the new version # from your myCode.R script. Here is again the table of feature IDs: myDB$feature[ , c("ID", "name", "description")] -# Add every SMART annotated feaure for MBP1_YFO to the database. If you make +# Add every SMART annotated feaure for MBP1_MYSPE to the database. If you make # mistakes, just reload the latest version (probably "myDB.04.RData"), then run # your corrected annotation script again. Execute ... myDB$proteinAnnotation diff --git a/BIN-YFO.R b/BIN-MYSPE.R similarity index 67% rename from BIN-YFO.R rename to BIN-MYSPE.R index 7cc06d7..7963185 100644 --- a/BIN-YFO.R +++ b/BIN-MYSPE.R @@ -1,15 +1,15 @@ -# BIN-YFO.R +# BIN-MYSPE.R # # Purpose: A Bioinformatics Course: -# R code accompanying the BIN-YFO unit +# R code accompanying the BIN-MYSPE unit # # Version: 1.0 # # Date: 2017 09 21 # Author: Boris Steipe (boris.steipe@utoronto.ca) # -# V 1.0 Final code, after rewriting BLAST parser and creating current YFOlist -# V 0.1 First code copied from BCH441_A03_makeYFOlist.R +# V 1.0 Final code, after rewriting BLAST parser and creating current MYSPElist +# V 0.1 First code copied from BCH441_A03_makeMYSPElist.R # # TODO: # @@ -23,17 +23,17 @@ # going on. That's not how it works ... # # ============================================================================== - + #TOC> ========================================================================== -#TOC> +#TOC> #TOC> Section Title Line #TOC> --------------------------------------- #TOC> 1 Preparations 38 -#TOC> 2 Suitable YFO Species 50 -#TOC> 3 Adopt "YFO" 64 -#TOC> +#TOC> 2 Suitable MYSPE Species 50 +#TOC> 3 Adopt "MYSPE" 64 +#TOC> #TOC> ========================================================================== - + # = 1 Preparations ======================================================== # @@ -47,39 +47,39 @@ if (! exists("myStudentNumber")) { } -# = 2 Suitable YFO Species ================================================ +# = 2 Suitable MYSPE Species ============================================== # In this unit we will select one species from a list of genome sequenced fungi # and write it into your personalized profile file. This species will be called -# "YFO" (Your Favourite Organism) for other learning units and exercises. +# "MYSPE" (Your Favourite Organism) for other learning units and exercises. # A detailed description of the process of compiling the list of genome # sequenced fungi with protein annotations and Mbp1 homologues is in the file -# ABC-makeYFOlist.R +# ABC-makeMYSPElist.R -# Task: Study ABC-makeYFOlist.R, it implements a rather typical workflow of +# Task: Study ABC-makeMYSPElist.R, it implements a rather typical workflow of # selecting and combining data from various public-domain data resources. -# = 3 Adopt "YFO" ========================================================= +# = 3 Adopt "MYSPE" ======================================================= # In the code below, we load the resulting vector of species name, then pick one # of them in a random but reproducible way, determined by your student number. -load("data/YFOspecies.RData") # load the species names -set.seed(myStudentNumber) # seed the random number generator -YFO <- sample(YFOspecies, 1) # pick a species at random +load("data/MYSPEspecies.RData") # load the species names +set.seed(myStudentNumber) # seed the random number generator +MYSPE <- sample(MYSPEspecies, 1) # pick a species at random # write the result to your personalized profile data so we can use the result in # other functions -cat(sprintf("YFO <- \"%s\"\n", YFO), file = ".myProfile.R", append = TRUE) +cat(sprintf("MYSPE <- \"%s\"\n", MYSPE), file = ".myProfile.R", append = TRUE) -YFO # so, which species is it ... ? -biCode(YFO) # and what is it's "BiCode" ... ? +MYSPE # so, which species is it ... ? +biCode(MYSPE) # and what is it's "BiCode" ... ? # Task: Note down the species name and its five letter label on your Student # Wiki user page. Use this species whenever this or future assignments refer -# to YFO. In code, we will automatically load it from your.myProfile.R file. +# to MYSPE. In code, we will automatically load it from your.myProfile.R file. # [END] diff --git a/BIN-PHYLO-Data_preparation.R b/BIN-PHYLO-Data_preparation.R index cd712a5..c376ae5 100644 --- a/BIN-PHYLO-Data_preparation.R +++ b/BIN-PHYLO-Data_preparation.R @@ -41,7 +41,7 @@ list.files(pattern = "myDB.*") load("myDB.05.RData") # The database contains the ten Mbp1 orthologues from the reference species -# and the Mbp1 RBM for YFO. +# and the Mbp1 RBM for MYSPE. # # We will construct a phylogenetic tree from the proteins' APSES domains. # You have annotated their ranges as a feature. diff --git a/BIN-PHYLO-Tree_analysis.R b/BIN-PHYLO-Tree_analysis.R index 3eb0073..918e62a 100644 --- a/BIN-PHYLO-Tree_analysis.R +++ b/BIN-PHYLO-Tree_analysis.R @@ -156,7 +156,7 @@ layout(matrix(1), widths=1.0, heights=1.0) # ... or we can plot the tree so it corresponds as well as possible to a # predefined tip ordering. Here we use the ordering that NCBI Global Tree # returns for the reference species - we have used it above to make the vector -# apsMbp1Names. You inserted your YFO name into that vector - but you should +# apsMbp1Names. You inserted your MYSPE name into that vector - but you should # move it to its correct position in the cladogram. # (Nb. we need to reverse the ordering for the plot. This is why we use the diff --git a/BIN-SEQA-Comparison.R b/BIN-SEQA-Comparison.R index 175183b..b057e08 100644 --- a/BIN-SEQA-Comparison.R +++ b/BIN-SEQA-Comparison.R @@ -39,7 +39,7 @@ help(package = seqinr) # shows the available functions ?computePI # This takes as input a vector of upper-case AA codes -# Let's retrieve the YFO sequence from our datamodel +# Let's retrieve the MYSPE sequence from our datamodel # (assuming it is the last one that was added): db$protein[nrow(db$protein), "sequence"] diff --git a/BIN-Storing_data.R b/BIN-Storing_data.R index fc1f0f1..55a5d0e 100644 --- a/BIN-Storing_data.R +++ b/BIN-Storing_data.R @@ -23,9 +23,9 @@ # going on. That's not how it works ... # # ============================================================================== - + #TOC> ========================================================================== -#TOC> +#TOC> #TOC> Section Title Line #TOC> ------------------------------------------------------------ #TOC> 1 A Relational Datamodel in R: review 55 @@ -48,9 +48,9 @@ #TOC> 3.3 Create an R script to create the database 522 #TOC> 3.3.1 Check and validate 542 #TOC> 3.4 Task: submit for credit (part 2/2) 583 -#TOC> +#TOC> #TOC> ========================================================================== - + # = 1 A Relational Datamodel in R: review ================================= @@ -203,7 +203,7 @@ str(philDB) # go back, re-read, play with it, and ask for help. This is essential. -# === 1.1.1 completing the database +# === 1.1.1 completing the database # Next I'll add one more person, and create the other two tables: @@ -362,7 +362,7 @@ dbSanitizeSequence(x) # == 2.3 Create a protein table for our data model ========================= -# === 2.3.1 Initialize the database +# === 2.3.1 Initialize the database # The function dbInit contains all the code to return a list of empty @@ -374,7 +374,7 @@ myDB <- dbInit() str(myDB) -# === 2.3.2 Add data +# === 2.3.2 Add data # fromJSON() returns a dataframe that we can readily process to add data @@ -421,7 +421,7 @@ source("./scripts/ABC-createRefDB.R") str(myDB) -# === 2.4.1 Examples of navigating the database +# === 2.4.1 Examples of navigating the database # You can look at the contents of the tables in the usual way we access @@ -468,8 +468,8 @@ myDB$taxonomy$species[sel] # = 3 Add your own data =================================================== -# You have chosen an organism as "YFO", and you final task will be to find the -# protein in YFO that is most similar to yeast Mbp1 and enter its information +# You have chosen an organism as "MYSPE", and you final task will be to find the +# protein in MYSPE that is most similar to yeast Mbp1 and enter its information # into the database. @@ -483,7 +483,7 @@ myDB$taxonomy$species[sel] # Protein BLAST. # - Enter NP_010227 into the "Query Sequence" field. # - Choose "Reference proteins (refseq_protein)" as the "Database". -# - Paste the YFO species name into the "Organism" field. +# - Paste the MYSPE species name into the "Organism" field. # # - Click "BLAST". @@ -493,28 +493,28 @@ myDB$taxonomy$species[sel] # Otherwise, look for the top-hit in the "Alignments" section. In some cases # there will be more than one hit with nearly similar E-values. If this is the -# case for YFO, choose the one with the higher degree of similarity (more +# case for MYSPE, choose the one with the higher degree of similarity (more # identities) with the N-terminus of the query - i.e. the Query sequence of # the first ~ 100 amino acids. # - Follow the link to the protein data page, linked from "Sequence ID". # - From there, in a separate tab, open the link to the taxonomy database page -# for YFO which is linked from the "ORGANISM" record. +# for MYSPE which is linked from the "ORGANISM" record. # == 3.2 Put the information into JSON files =============================== # - Next make a copy of the file "./data/MBP1_SACCE.json" in your project -# directory and give it a new name that corresponds to YFO - e.g. if -# YFO is called "Crptycoccus neoformans", your file should be called +# directory and give it a new name that corresponds to MYSPE - e.g. if +# MYSPE is called "Crptycoccus neoformans", your file should be called # "MBP1_CRYNE.json"; in that case "MBP1_CRYNE" would also be the # "name" of your protein. Open the file in the RStudio editor and replace # all of the MBP1_SACCE data with the corresponding data of your protein. # -# - Do a similar thing for the YFO taxonomy entry. Copy -# "./data/refTaxonomy.json" and make a new file named "YFOtaxonomy.json". -# Create a valid JSON file with only one single entry - that of YFO. +# - Do a similar thing for the MYSPE taxonomy entry. Copy +# "./data/refTaxonomy.json" and make a new file named "MYSPEtaxonomy.json". +# Create a valid JSON file with only one single entry - that of MYSPE. # # - Validate your two files online at https://jsonlint.com/ @@ -529,7 +529,7 @@ myDB$taxonomy$species[sel] # - than add the two commands that add your protein and taxonomy data, # they should look like: # myDB <- dbAddProtein( myDB, fromJSON("MBP1_.json")) -# myDB <- dbAddTaxonomy( myDB, fromJSON("YFOtaxonomy.json")) +# myDB <- dbAddTaxonomy( myDB, fromJSON("MYSPEtaxonomy.json")) # # - save the file and source() it: # source("makeProteinDB.R") @@ -539,12 +539,12 @@ myDB$taxonomy$species[sel] # in any of the JSON files. Later you will add more information ... -# === 3.3.1 Check and validate +# === 3.3.1 Check and validate -# Is your protein named according to the pattern "MBP1_"? It should be. +# Is your protein named according to the pattern "MBP1_MYSPE"? It should be. # And does the taxonomy table contain the systematic name? It should be the same -# that you get when you type YFO into the console. +# that you get when you type MYSPE into the console. # Let's compute sequence lengths on the fly (with the function nchar() ), and # open this with the table viewer function View() @@ -562,18 +562,18 @@ View(cbind(myDB$protein[ , c("ID", "name", "RefSeqID")], myDB$protein$sequence[nrow(myDB$protein)] # If not, don't continue! Fix the problem first. -# Let me repeat: If this does not give you the right sequence of the YFO +# Let me repeat: If this does not give you the right sequence of the MYSPE # Mbp1 homologue, DO NOT CONTINUE. Fix the problem. -# Is that the right taxonomy ID and binomial name for YFO? -sel <- myDB$taxonomy$species == YFO +# Is that the right taxonomy ID and binomial name for MYSPE? +sel <- myDB$taxonomy$species == MYSPE myDB$taxonomy[sel, ] # If not, or if the result was "<0 rows> ... " then DO NOT CONTINUE. # Fix the problem first. -# Does this give you the right refseq ID for MBP1_? -sel <- myDB$protein$name == paste0("MBP1_", biCode(YFO)) +# Does this give you the right refseq ID for MBP1_MYSPE? +sel <- myDB$protein$name == paste0("MBP1_", biCode(MYSPE)) myDB$protein$RefSeqID[sel] # If not, or if the result was "<0 rows> ... " then DO NOT CONTINUE. @@ -589,8 +589,8 @@ myDB$protein$RefSeqID[sel] # page on the Student Wiki # - Execute the two commands below and show the result on your submission page -biCode(myDB$taxonomy$species) %in% biCode(YFO) -myDB$protein$taxonomyID %in% myDB$taxonomy$ID[(myDB$taxonomy$species == YFO)] +biCode(myDB$taxonomy$species) %in% biCode(MYSPE) +myDB$protein$taxonomyID %in% myDB$taxonomy$ID[(myDB$taxonomy$species == MYSPE)] # That is all. diff --git a/data/MYSPEspecies.RData b/data/MYSPEspecies.RData new file mode 100644 index 0000000..0fbbccb Binary files /dev/null and b/data/MYSPEspecies.RData differ diff --git a/data/YFOspecies.RData b/data/YFOspecies.RData deleted file mode 100644 index 11cba92..0000000 Binary files a/data/YFOspecies.RData and /dev/null differ