set to active and update submission tasks
This commit is contained in:
parent
cfbfee9dba
commit
03674b57dc
@ -1,25 +1,21 @@
|
|||||||
# tocID <- "BIN-Storing_data.R"
|
# tocID <- "BIN-Storing_data.R"
|
||||||
#
|
#
|
||||||
# ---------------------------------------------------------------------------- #
|
|
||||||
# PATIENCE ... #
|
|
||||||
# Do not yet work wih this code. Updates in progress. Thank you. #
|
|
||||||
# boris.steipe@utoronto.ca #
|
|
||||||
# ---------------------------------------------------------------------------- #
|
|
||||||
#
|
|
||||||
# Purpose: A Bioinformatics Course:
|
# Purpose: A Bioinformatics Course:
|
||||||
# R code accompanying the BIN-Storing_data unit
|
# R code accompanying the BIN-Storing_data unit
|
||||||
#
|
#
|
||||||
# Version: 1.1
|
# Version: 1.2
|
||||||
#
|
#
|
||||||
# Date: 2017 10 08
|
# Date: 2017-10 - 2020-09
|
||||||
# Author: Boris Steipe (boris.steipe@utoronto.ca)
|
# Author: Boris Steipe (boris.steipe@utoronto.ca)
|
||||||
#
|
#
|
||||||
|
# V 1.2 2020 updates. Finally removed stringAsFactors :-)
|
||||||
# V 1.1 Add instructions to retrieve UniProt ID from ID mapping service.
|
# V 1.1 Add instructions to retrieve UniProt ID from ID mapping service.
|
||||||
# V 1.0 First live version, complete rebuilt. Now using JSON data sources.
|
# V 1.0 First live version, complete rebuilt. Now using JSON data sources.
|
||||||
# V 0.1 First code copied from BCH441_A03_makeYFOlist.R
|
# V 0.1 First code copied from BCH441_A03_makeYFOlist.R
|
||||||
#
|
#
|
||||||
# TODO:
|
# TODO:
|
||||||
#
|
# The sameSpecies() approach is a bit of a hack - can we solve the
|
||||||
|
# species vs. strain issue in a more principled way?
|
||||||
#
|
#
|
||||||
# == HOW TO WORK WITH LEARNING UNIT FILES ======================================
|
# == HOW TO WORK WITH LEARNING UNIT FILES ======================================
|
||||||
#
|
#
|
||||||
@ -33,30 +29,30 @@
|
|||||||
|
|
||||||
|
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> Section Title Line
|
#TOC> Section Title Line
|
||||||
#TOC> -----------------------------------------------------------------------
|
#TOC> -----------------------------------------------------------------------
|
||||||
#TOC> 1 A Relational Datamodel in R: review 57
|
#TOC> 1 A Relational Datamodel in R: review 59
|
||||||
#TOC> 1.1 Building a sample database structure 97
|
#TOC> 1.1 Building a sample database structure 99
|
||||||
#TOC> 1.1.1 completing the database 208
|
#TOC> 1.1.1 completing the database 205
|
||||||
#TOC> 1.2 Querying the database 243
|
#TOC> 1.2 Querying the database 238
|
||||||
#TOC> 1.3 Task: submit for credit (part 1/2) 272
|
#TOC> 1.3 Task: submit for credit (part 1/2) 269
|
||||||
#TOC> 2 Implementing the protein datamodel 284
|
#TOC> 2 Implementing the protein datamodel 291
|
||||||
#TOC> 2.1 JSON formatted source data 310
|
#TOC> 2.1 JSON formatted source data 317
|
||||||
#TOC> 2.2 "Sanitizing" sequence data 350
|
#TOC> 2.2 "Sanitizing" sequence data 358
|
||||||
#TOC> 2.3 Create a protein table for our data model 370
|
#TOC> 2.3 Create a protein table for our data model 380
|
||||||
#TOC> 2.3.1 Initialize the database 372
|
#TOC> 2.3.1 Initialize the database 382
|
||||||
#TOC> 2.3.2 Add data 384
|
#TOC> 2.3.2 Add data 394
|
||||||
#TOC> 2.4 Complete the database 404
|
#TOC> 2.4 Complete the database 414
|
||||||
#TOC> 2.4.1 Examples of navigating the database 431
|
#TOC> 2.4.1 Examples of navigating the database 441
|
||||||
#TOC> 2.5 Updating the database 463
|
#TOC> 2.5 Updating the database 473
|
||||||
#TOC> 3 Add your own data 475
|
#TOC> 3 Add your own data 485
|
||||||
#TOC> 3.1 Find a protein 483
|
#TOC> 3.1 Find a protein 493
|
||||||
#TOC> 3.2 Put the information into JSON files 512
|
#TOC> 3.2 Put the information into JSON files 523
|
||||||
#TOC> 3.3 Create an R script to create your own database 535
|
#TOC> 3.3 Create an R script to create your own database 546
|
||||||
#TOC> 3.3.1 Check and validate 555
|
#TOC> 3.3.1 Check and validate 569
|
||||||
#TOC> 3.4 Task: submit for credit (part 2/2) 596
|
#TOC> 3.4 Task: submit for credit (part 2/2) 614
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
|
|
||||||
@ -111,8 +107,7 @@ x <- data.frame(id = c(1,2),
|
|||||||
name = c("Laozi", "Martin Heidegger"),
|
name = c("Laozi", "Martin Heidegger"),
|
||||||
born = c(NA, "1889"),
|
born = c(NA, "1889"),
|
||||||
died = c("531 BCE", "1976"),
|
died = c("531 BCE", "1976"),
|
||||||
school = c("Daoism", "Phenomenology"),
|
school = c("Daoism", "Phenomenology"))
|
||||||
stringsAsFactors = FALSE)
|
|
||||||
str(x)
|
str(x)
|
||||||
|
|
||||||
# Lets add the dataframe to the philDB list and call it "person" there.
|
# Lets add the dataframe to the philDB list and call it "person" there.
|
||||||
@ -131,7 +126,6 @@ philDB$person$name[1] # Laozi
|
|||||||
# task: Write an expression that returns all "school" entries from the
|
# task: Write an expression that returns all "school" entries from the
|
||||||
# person table.
|
# person table.
|
||||||
|
|
||||||
|
|
||||||
# Let's now add another person. There are several ways to do this, the
|
# Let's now add another person. There are several ways to do this, the
|
||||||
# conceptually cleanest is to create a one-row dataframe with the data, and
|
# conceptually cleanest is to create a one-row dataframe with the data, and
|
||||||
# rbind() it to the existing dataframe. Doing this, we must take care that
|
# rbind() it to the existing dataframe. Doing this, we must take care that
|
||||||
@ -145,8 +139,14 @@ rbind(x, y)
|
|||||||
rbind(x, y)
|
rbind(x, y)
|
||||||
|
|
||||||
# All clear? That's good - this behaviour provides us with a sanity check on the
|
# All clear? That's good - this behaviour provides us with a sanity check on the
|
||||||
# operation.
|
# operation. Incidentally: rbind(x, y) did NOT change the table ...
|
||||||
|
x
|
||||||
|
# rather rbind() had the chnaged table as its return value and that's why it
|
||||||
|
# was printed. To actually change the table, you need to ASSIGN the return
|
||||||
|
# value of rbind() ... like so:
|
||||||
|
x <- rbind(x, y)
|
||||||
|
|
||||||
|
# To continue ...
|
||||||
(x <- data.frame(id = 2,
|
(x <- data.frame(id = 2,
|
||||||
name = "Zhuangzi",
|
name = "Zhuangzi",
|
||||||
born = "369 BCE",
|
born = "369 BCE",
|
||||||
@ -159,21 +159,13 @@ philDB$person <- rbind(philDB$person, x)
|
|||||||
# ... and examine the result:
|
# ... and examine the result:
|
||||||
str(philDB)
|
str(philDB)
|
||||||
|
|
||||||
# Now one thing you should note is that we had forgotten to declare
|
# We made a serious error in our data! Did you spot it?
|
||||||
# stringsAsFactors = FALSE when we created x - but this did not damage
|
|
||||||
# the database. This is because the existing columns had type chr and the
|
|
||||||
# implicit coercion, e.g. ...
|
|
||||||
as.character(x$name)
|
|
||||||
# happened to do the right thing. Don't rely on that. The Right Way is to
|
|
||||||
# turn factors off, even when you are making just a single row.
|
|
||||||
|
|
||||||
# But we made a serious error in our data! Did you spot it?
|
|
||||||
#
|
#
|
||||||
# If not, look at ...
|
# If not, look at ...
|
||||||
philDB$person$id
|
philDB$person$id
|
||||||
# ... does that look oK?
|
# ... does that look oK?
|
||||||
#
|
#
|
||||||
# Absolutely not! id is the Primary Key in the table, and it has to be
|
# Absolutely not! "id" is the Primary Key in the table, and it has to be
|
||||||
# unique. How can we guarantee it to be unique? Certainly not when we
|
# unique. How can we guarantee it to be unique? Certainly not when we
|
||||||
# enter it by hand. We need a function that generates a unique key. Here's
|
# enter it by hand. We need a function that generates a unique key. Here's
|
||||||
# a simple version, without any error-checking. It assumes that a column
|
# a simple version, without any error-checking. It assumes that a column
|
||||||
@ -202,16 +194,15 @@ x <- data.frame(id = autoincrement(philDB$person),
|
|||||||
name = "Zhuangzi",
|
name = "Zhuangzi",
|
||||||
born = "369 BCE",
|
born = "369 BCE",
|
||||||
died = "286 BCE",
|
died = "286 BCE",
|
||||||
school = "Daoism",
|
school = "Daoism")
|
||||||
stringsAsFactors = FALSE)
|
|
||||||
philDB$person <- rbind(philDB$person, x)
|
philDB$person <- rbind(philDB$person, x)
|
||||||
str(philDB)
|
str(philDB)
|
||||||
|
|
||||||
# So far so good. Be honest with yourself. If you didn't follow any of this,
|
# So far so good. Be honest with yourself. If you didn't follow any of this,
|
||||||
# go back, re-read, play with it, and ask for help. This is essential.
|
# go back, re-read, play with it, and ask for help. These are the foundations.
|
||||||
|
|
||||||
|
|
||||||
# === 1.1.1 completing the database
|
# === 1.1.1 completing the database
|
||||||
|
|
||||||
|
|
||||||
# Next I'll add one more person, and create the other two tables:
|
# Next I'll add one more person, and create the other two tables:
|
||||||
@ -220,11 +211,10 @@ x <- data.frame(id = autoincrement(philDB$person),
|
|||||||
name = "Kongzi",
|
name = "Kongzi",
|
||||||
born = "551 BCE",
|
born = "551 BCE",
|
||||||
died = "479 BCE",
|
died = "479 BCE",
|
||||||
school = "Confucianism",
|
school = "Confucianism")
|
||||||
stringsAsFactors = FALSE)
|
|
||||||
philDB$person <- rbind(philDB$person, x)
|
philDB$person <- rbind(philDB$person, x)
|
||||||
|
|
||||||
|
# a table of major works ...
|
||||||
philDB[["books"]] <- data.frame(id = 1:5,
|
philDB[["books"]] <- data.frame(id = 1:5,
|
||||||
title = c("Zhuangzi",
|
title = c("Zhuangzi",
|
||||||
"Analects",
|
"Analects",
|
||||||
@ -235,13 +225,12 @@ philDB[["books"]] <- data.frame(id = 1:5,
|
|||||||
"220 BCE",
|
"220 BCE",
|
||||||
"1927",
|
"1927",
|
||||||
"530 BCE",
|
"530 BCE",
|
||||||
"1959"),
|
"1959"))
|
||||||
stringsAsFactors = FALSE)
|
|
||||||
|
|
||||||
|
# a "join" table that links works and their author ...
|
||||||
philDB[["works"]] <- data.frame(id = 1:5,
|
philDB[["works"]] <- data.frame(id = 1:5,
|
||||||
personID = c(3, 4, 2, 1, 2),
|
personID = c(3, 4, 2, 1, 2),
|
||||||
bookID = c(1, 2, 3, 4, 5),
|
bookID = c(1, 2, 3, 4, 5))
|
||||||
stringsAsFactors = FALSE)
|
|
||||||
|
|
||||||
str(philDB)
|
str(philDB)
|
||||||
|
|
||||||
@ -261,8 +250,10 @@ philDB$books$title
|
|||||||
# author:
|
# author:
|
||||||
(sel <- order(philDB$person$name)) # check out ?order and describe to
|
(sel <- order(philDB$person$name)) # check out ?order and describe to
|
||||||
# someone you know what it does, so that
|
# someone you know what it does, so that
|
||||||
# you are sure you understand it.
|
# you are sure you understand it. Its
|
||||||
(pID <- philDB$person$id[sel])
|
# indirection can be a bit tricky to
|
||||||
|
# understand.
|
||||||
|
( pID <- philDB$person$id[sel] )
|
||||||
sel <- numeric() # initialize the vector
|
sel <- numeric() # initialize the vector
|
||||||
for (ID in pID) {
|
for (ID in pID) {
|
||||||
sel <- which(philDB$works$personID == ID) # get all rows for which
|
sel <- which(philDB$works$personID == ID) # get all rows for which
|
||||||
@ -278,13 +269,23 @@ for (ID in pID) {
|
|||||||
# == 1.3 Task: submit for credit (part 1/2) ================================
|
# == 1.3 Task: submit for credit (part 1/2) ================================
|
||||||
|
|
||||||
|
|
||||||
# Write and submit code that adds another philosopher to the datamodel:
|
# Write code that adds another philosopher to the datamodel:
|
||||||
# Immanuel Kant, (1724 - 1804), Enlightenment Philosophy.
|
# Immanuel Kant, (1724 - 1804), Enlightenment Philosophy.
|
||||||
# Works: Critique of Pure Reason (1781), Critique of Judgement (1790)
|
# Works: Critique of Pure Reason (1781), Critique of Judgement (1790)
|
||||||
# Write and submit code that lists the books in alphabetical order,
|
# Paste your code into your submission page. Enclose it in <pre> ... </pre>
|
||||||
# followed by the author and the year of publishing. Format your output like:
|
# tags.
|
||||||
# "Analects" - Kongzi (220 BCE)
|
#
|
||||||
# Show the result.
|
# Write and submit code that lists the philosophical schools in
|
||||||
|
# alphabetical order, and the books associated with them, also
|
||||||
|
# alphabetically. Format your output like:
|
||||||
|
# Confucianism
|
||||||
|
# Analects - (220 BCE)
|
||||||
|
# Daoism
|
||||||
|
# Daodejing - (530 BCE)
|
||||||
|
# ... etc.
|
||||||
|
#
|
||||||
|
# Show the output of your code. Make sure the code itself is enclosed
|
||||||
|
# in <pre> ... </pre> tags.
|
||||||
|
|
||||||
|
|
||||||
# = 2 Implementing the protein datamodel ==================================
|
# = 2 Implementing the protein datamodel ==================================
|
||||||
@ -296,13 +297,13 @@ for (ID in pID) {
|
|||||||
# mistakes.
|
# mistakes.
|
||||||
# - Data needs to be captured in a human-readable form so it can be verified
|
# - Data needs to be captured in a human-readable form so it can be verified
|
||||||
# and validated;
|
# and validated;
|
||||||
# - Some aspects of the database should _never_ be done by hand because they
|
# - Some aspects of the database should _never_ be done by hand because
|
||||||
# errors are easy to make and hard to see. That essentially includes
|
# errors are easy to make and hard to see. That essentially includes
|
||||||
# every operation that has to do with abstract, primary keys;
|
# every operation that has to do with abstract, primary keys;
|
||||||
# - Elementary operations we need to support are: adding data, selecting
|
# - Elementary operations we need to support are: adding data, selecting
|
||||||
# data, modifying data and deleting data.
|
# data, modifying data and deleting data.
|
||||||
|
|
||||||
# We will therefore construct our database in the following way:
|
# We will therefore construct our protein database in the following way:
|
||||||
# - For each table, we will keep the primary information in JSON files. There
|
# - For each table, we will keep the primary information in JSON files. There
|
||||||
# it is easy to read, edit if needed, and modify it.
|
# it is easy to read, edit if needed, and modify it.
|
||||||
# - We will use simple scripts to read the JSON data and assemble it in
|
# - We will use simple scripts to read the JSON data and assemble it in
|
||||||
@ -334,8 +335,9 @@ file.show("./data/MBP1_SACCE.json")
|
|||||||
# sanitize the sequence at some point. But since we need to do that
|
# sanitize the sequence at some point. But since we need to do that
|
||||||
# anyway, it is easier to see the whole sequence if we store it in chunks.
|
# anyway, it is easier to see the whole sequence if we store it in chunks.
|
||||||
|
|
||||||
# Let's make sure the "jsonlite" package exists on your computer, then we'll
|
# The .utilities.R script that get's loaded whenever you open this project
|
||||||
# explore how it reads this data.
|
# has already made sure the "jsonlite" package exists on your computer. This
|
||||||
|
# package supports our work with .json formatted data.
|
||||||
|
|
||||||
if (! requireNamespace("jsonlite", quietly = TRUE)) {
|
if (! requireNamespace("jsonlite", quietly = TRUE)) {
|
||||||
install.packages("jsonlite")
|
install.packages("jsonlite")
|
||||||
@ -365,17 +367,19 @@ dbSanitizeSequence
|
|||||||
dbSanitizeSequence(c("GAA", "ttc"))
|
dbSanitizeSequence(c("GAA", "ttc"))
|
||||||
dbSanitizeSequence("MsnQ00%0 I@#>YSary S
|
dbSanitizeSequence("MsnQ00%0 I@#>YSary S
|
||||||
G1 V2DV3Y>")
|
G1 V2DV3Y>")
|
||||||
x <- " 1 msnqiysary sgvdvyefih stgsimkrkk ddwvnathil kaanfakakr trilekevlk
|
x <- "
|
||||||
|
1 msnqiysary sgvdvyefih stgsimkrkk ddwvnathil kaanfakakr trilekevlk
|
||||||
61 ethekvqggf gkyqgtwvpl niakqlaekf svydqlkplf dftqtdgsas pppapkhhha
|
61 ethekvqggf gkyqgtwvpl niakqlaekf svydqlkplf dftqtdgsas pppapkhhha
|
||||||
121 skvdrkkair sastsaimet krnnkkaeen qfqsskilgn ptaaprkrgr pvgstrgsrr
|
121 skvdrkkair sastsaimet krnnkkaeen qfqsskilgn ptaaprkrgr pvgstrgsrr
|
||||||
..." # copy/paste from Genbank
|
...
|
||||||
|
" # copy/paste from Genbank
|
||||||
|
|
||||||
dbSanitizeSequence(x)
|
dbSanitizeSequence(x)
|
||||||
|
|
||||||
|
|
||||||
# == 2.3 Create a protein table for our data model =========================
|
# == 2.3 Create a protein table for our data model =========================
|
||||||
|
|
||||||
# === 2.3.1 Initialize the database
|
# === 2.3.1 Initialize the database
|
||||||
|
|
||||||
|
|
||||||
# The function dbInit contains all the code to return a list of empty
|
# The function dbInit contains all the code to return a list of empty
|
||||||
@ -387,7 +391,7 @@ myDB <- dbInit()
|
|||||||
str(myDB)
|
str(myDB)
|
||||||
|
|
||||||
|
|
||||||
# === 2.3.2 Add data
|
# === 2.3.2 Add data
|
||||||
|
|
||||||
|
|
||||||
# fromJSON() returns a dataframe that we can readily process to add data
|
# fromJSON() returns a dataframe that we can readily process to add data
|
||||||
@ -434,7 +438,7 @@ source("./scripts/ABC-createRefDB.R")
|
|||||||
str(myDB)
|
str(myDB)
|
||||||
|
|
||||||
|
|
||||||
# === 2.4.1 Examples of navigating the database
|
# === 2.4.1 Examples of navigating the database
|
||||||
|
|
||||||
|
|
||||||
# You can look at the contents of the tables in the usual way we access
|
# You can look at the contents of the tables in the usual way we access
|
||||||
@ -481,9 +485,9 @@ myDB$taxonomy$species[sel]
|
|||||||
# = 3 Add your own data ===================================================
|
# = 3 Add your own data ===================================================
|
||||||
|
|
||||||
|
|
||||||
# You have chosen an organism as "MYSPE", and you final task will be to find the
|
# You have defined a genome sequence fungus as "MYSPE", and your final task
|
||||||
# protein in MYSPE that is most similar to yeast Mbp1 and enter its information
|
# will be to find the protein in MYSPE that is most similar to yeast Mbp1, and
|
||||||
# into the database.
|
# to enter its information into the database.
|
||||||
|
|
||||||
|
|
||||||
# == 3.1 Find a protein ====================================================
|
# == 3.1 Find a protein ====================================================
|
||||||
@ -495,22 +499,23 @@ myDB$taxonomy$species[sel]
|
|||||||
# - Navigate to https://blast.ncbi.nlm.nih.gov/Blast.cgi and click on
|
# - Navigate to https://blast.ncbi.nlm.nih.gov/Blast.cgi and click on
|
||||||
# Protein BLAST.
|
# Protein BLAST.
|
||||||
# - Enter NP_010227 into the "Query Sequence" field.
|
# - Enter NP_010227 into the "Query Sequence" field.
|
||||||
# - Choose "Reference proteins (refseq_protein)" as the "Database".
|
# - Choose "Reference proteins (refseq_protein)" as the "Database" in the
|
||||||
|
# "Choose Search Set" section.
|
||||||
# - Paste the MYSPE species name into the "Organism" field.
|
# - Paste the MYSPE species name into the "Organism" field.
|
||||||
#
|
#
|
||||||
# - Click "BLAST".
|
# - Click the "BLAST" button.
|
||||||
|
|
||||||
# You will probably get more than one result. If you get dozens of results or
|
# You will probably get more than one result. If you get dozens of results or
|
||||||
# more, or if you get no results, something went wrong. Reconsider whether the
|
# more, or if you get no results, something went wrong. Reconsider whether the
|
||||||
# problem was with your input, try something different, or ask for help.
|
# problem was with your input, try something different, or ask for help.
|
||||||
|
|
||||||
# Otherwise, look for the top-hit in the "Alignments" section. In some cases
|
# Otherwise, look for the top-hit in the "Descriptions" tab In some cases
|
||||||
# there will be more than one hit with nearly similar E-values. If this is the
|
# there will be more than one hit with nearly similar E-values. If this is the
|
||||||
# case for MYSPE, choose the one with the higher degree of similarity (more
|
# case for MYSPE, choose the one with the higher degree of similarity (more
|
||||||
# identities) with the N-terminus of the query - i.e. the Query sequence of
|
# identities) with the N-terminus of the query - i.e. the Query sequence of
|
||||||
# the first ~ 100 amino acids.
|
# the first ~ 100 amino acids.
|
||||||
|
|
||||||
# - Follow the link to the protein data page, linked from "Sequence ID".
|
# - Follow the link to the protein data page, linked from "Accession".
|
||||||
# - From there, in a separate tab, open the link to the taxonomy database page
|
# - From there, in a separate tab, open the link to the taxonomy database page
|
||||||
# for MYSPE which is linked from the "ORGANISM" record.
|
# for MYSPE which is linked from the "ORGANISM" record.
|
||||||
|
|
||||||
@ -550,15 +555,18 @@ myDB$taxonomy$species[sel]
|
|||||||
# myDB <- dbAddProtein( myDB, fromJSON("MBP1_<code>.json"))
|
# myDB <- dbAddProtein( myDB, fromJSON("MBP1_<code>.json"))
|
||||||
# myDB <- dbAddTaxonomy( myDB, fromJSON("MYSPEtaxonomy.json"))
|
# myDB <- dbAddTaxonomy( myDB, fromJSON("MYSPEtaxonomy.json"))
|
||||||
#
|
#
|
||||||
# - save the file and source() it:
|
# - save the file in the ./myScripts/ folder and source() it:
|
||||||
# source("makeProteinDB.R")
|
# source("./myScripts/makeProteinDB.R")
|
||||||
|
|
||||||
# This command needs to be executed whenever you recreate
|
# This command needs to be executed whenever you recreate
|
||||||
# the database. In particular, whenver you have added or modified data
|
# the database. In particular, whenver you have added or modified data
|
||||||
# in any of the JSON files. Later you will add more information ...
|
# in any of the JSON files. Later you will add more information ...
|
||||||
|
|
||||||
|
# Remember this principle. Don't rely on objects in memory - you might
|
||||||
|
# "break" them with a code experiment. But always have a script with
|
||||||
|
# which you can create what you need.
|
||||||
|
|
||||||
# === 3.3.1 Check and validate
|
# === 3.3.1 Check and validate
|
||||||
|
|
||||||
|
|
||||||
# Is your protein named according to the pattern "MBP1_MYSPE"? It should be.
|
# Is your protein named according to the pattern "MBP1_MYSPE"? It should be.
|
||||||
@ -585,7 +593,11 @@ myDB$protein$sequence[nrow(myDB$protein)]
|
|||||||
# Mbp1 homologue, DO NOT CONTINUE. Fix the problem.
|
# Mbp1 homologue, DO NOT CONTINUE. Fix the problem.
|
||||||
|
|
||||||
# Is that the right taxonomy ID and binomial name for MYSPE?
|
# Is that the right taxonomy ID and binomial name for MYSPE?
|
||||||
sel <- myDB$taxonomy$species == MYSPE
|
# This question may be a bit non-trivial ... MYSPE is a species, but the
|
||||||
|
# recorded taxonomy ID may be a strain. We have a utility function,
|
||||||
|
# sameSpecies() that normalizes organism name to the binomial species.
|
||||||
|
#
|
||||||
|
sel <- sameSpecies(myDB$taxonomy$species, MYSPE)
|
||||||
myDB$taxonomy[sel, ]
|
myDB$taxonomy[sel, ]
|
||||||
|
|
||||||
# If not, or if the result was "<0 rows> ... " then DO NOT CONTINUE.
|
# If not, or if the result was "<0 rows> ... " then DO NOT CONTINUE.
|
||||||
@ -605,11 +617,13 @@ myDB$protein$RefSeqID[sel]
|
|||||||
# - On your submission page, note the E-value of your protein and link
|
# - On your submission page, note the E-value of your protein and link
|
||||||
# to its NCBI protein database page.
|
# to its NCBI protein database page.
|
||||||
# - Copy and paste the contents of your two JSON files on your submission
|
# - Copy and paste the contents of your two JSON files on your submission
|
||||||
# page on the Student Wiki
|
# page on the Student Wiki. Make sure they are enclosed in <pre> ... </pre>
|
||||||
# - Execute the two commands below and show the result on your submission page
|
# tags.
|
||||||
|
# - Execute the three commands below and show the result on your submission page
|
||||||
|
|
||||||
biCode(myDB$taxonomy$species) %in% biCode(MYSPE)
|
biCode(myDB$taxonomy$species) %in% biCode(MYSPE)
|
||||||
myDB$protein$taxonomyID %in% myDB$taxonomy$ID[(myDB$taxonomy$species == MYSPE)]
|
sel <- sameSpecies(myDB$taxonomy$species, MYSPE)
|
||||||
|
myDB$protein$taxonomyID %in% myDB$taxonomy$ID[sel]
|
||||||
|
|
||||||
# That is all.
|
# That is all.
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user