New unit, and update to annotation file logic
This commit is contained in:
parent
2433bc8652
commit
206e2e14bb
@ -3,12 +3,14 @@
|
|||||||
# Purpose: A Bioinformatics Course:
|
# Purpose: A Bioinformatics Course:
|
||||||
# R code accompanying the BIN-ALI-Optimal_sequence_alignment unit.
|
# R code accompanying the BIN-ALI-Optimal_sequence_alignment unit.
|
||||||
#
|
#
|
||||||
# Version: 1.0.1
|
# Version: 1.1
|
||||||
#
|
#
|
||||||
# Date: 2017 09 - 2017 10
|
# Date: 2017 09 - 2017 11
|
||||||
# Author: Boris Steipe (boris.steipe@utoronto.ca)
|
# Author: Boris Steipe (boris.steipe@utoronto.ca)
|
||||||
#
|
#
|
||||||
# Versions:
|
# Versions:
|
||||||
|
# 1.1 Update annotation file logic - it could already have been
|
||||||
|
# prepared in the BIN-FUNC-Annotation unit.
|
||||||
# 1.0.1 bugfix
|
# 1.0.1 bugfix
|
||||||
# 1.0 First 2017 live version.
|
# 1.0 First 2017 live version.
|
||||||
# 0.1 First code copied from 2016 material.
|
# 0.1 First code copied from 2016 material.
|
||||||
@ -27,14 +29,18 @@
|
|||||||
|
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> Section Title Line
|
#TOC> Section Title Line
|
||||||
#TOC> -------------------------------------------------------
|
#TOC> --------------------------------------------------------------------
|
||||||
#TOC> 1 Prepare 45
|
#TOC> 1 Prepare 48
|
||||||
#TOC> 2 Biostrings Pairwise Alignment 53
|
#TOC> 2 Biostrings Pairwise Alignment 56
|
||||||
#TOC> 2.1 Optimal global alignment 70
|
#TOC> 2.1 Optimal global alignment 73
|
||||||
#TOC> 2.2 Optimal local alignment 133
|
#TOC> 2.2 Optimal local alignment 136
|
||||||
#TOC> 3 APSES Domain annotation by alignment 157
|
#TOC> 3 APSES Domain annotation by alignment 160
|
||||||
#TOC> 4 Update your database script 238
|
#TOC> 4 Update your database script 241
|
||||||
|
#TOC> 4.1 Preparing an annotation file ... 247
|
||||||
|
#TOC> 4.1.1 If you HAVE NOT done the BIN-FUNC-Annotation unit 249
|
||||||
|
#TOC> 4.1.2 If you HAVE done the BIN-FUNC-Annotation unit 292
|
||||||
|
#TOC> 4.2 Execute and Validate 316
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
@ -236,38 +242,90 @@ aliApses@subject@range@start + aliApses@subject@range@width - 1
|
|||||||
|
|
||||||
|
|
||||||
# Since we have this feature defined now, we can create a feature annotation
|
# Since we have this feature defined now, we can create a feature annotation
|
||||||
# right away and store it in myDB. Follow the following steps carefully:
|
# right away and store it in myDB.
|
||||||
|
|
||||||
|
# == 4.1 Preparing an annotation file ... ==================================
|
||||||
|
#
|
||||||
|
# === 4.1.1 If you HAVE NOT done the BIN-FUNC-Annotation unit
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# - Make a copy of the file "./data/refAnnotations.json" in your project
|
# You DON'T already have a file called "<MYSPE>-Annotations.json" in the
|
||||||
# directory and give it a new name that corresponds to MYSPE - e.g. if
|
# ./data/ directory:
|
||||||
# MYSPE is called "Crptycoccus neoformans", your file should be called
|
#
|
||||||
# "CRYNEAnnotations.json"; in that case "MBP1_CRYNE" would also be the
|
# - Make a copy of the file "./data/refAnnotations.json" and put it in your
|
||||||
# "name" of your protein. Open the file in the RStudio editor and delete
|
# project directory.
|
||||||
# all annotations but one for an "APSES fold". Edit that annotation to
|
#
|
||||||
# correspond to the your MBP1_MYSPE protein and enter the start end end
|
# - Give it a name that is structured like "<MYSPE>-Annotations.json" - e.g.
|
||||||
# coordinates you have just discovered for the APSES domain in your
|
# if MYSPE is called "Crptycoccus neoformans", your file should be called
|
||||||
# sequence. Save your file.
|
# "CRYNE-Annotations.json" (and the "name" of your Mbp1 orthologue is
|
||||||
|
# "MBP1_CRYNE").
|
||||||
|
#
|
||||||
|
# - Open the file in the RStudio editor and delete all blocks for
|
||||||
|
# the Mbp1 protein annotations except the first one.
|
||||||
|
#
|
||||||
|
# - From that block, delete all lines except for the line that says:
|
||||||
|
#
|
||||||
|
# {"pName" : "MBP1_SACCE", "fName" : "APSES fold", "start" : "4", "end" : "102"},
|
||||||
|
#
|
||||||
|
# - Then delete the comma at the end of the line (your file will just have
|
||||||
|
# this one annotation).
|
||||||
|
#
|
||||||
|
# - Edit that annotation: change MBP1_SACCE to MBP1_<MYSPE> and change the
|
||||||
|
# "start" and "end" features to the coordinates you just discovered for the
|
||||||
|
# APSES domain in your sequence.
|
||||||
|
#
|
||||||
|
# - Save your file.
|
||||||
|
#
|
||||||
|
## - Validate your file online at https://jsonlint.com/
|
||||||
|
#
|
||||||
|
# - Update your "makeProteinDB.R" script to load your new
|
||||||
|
# annotation when you recreate the database. Open the script in the
|
||||||
|
# RStudio editor, and add the following command at the end:
|
||||||
|
#
|
||||||
|
# myDB <- dbAddAnnotation(myDB, fromJSON("<MYSPE>-Annotations.json"))
|
||||||
|
#
|
||||||
|
# - save and close the file.
|
||||||
|
#
|
||||||
|
# Then SKIP the next section.
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# === 4.1.2 If you HAVE done the BIN-FUNC-Annotation unit
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# You DO already have a file called "<MYSPE>-Annotations.json" in the
|
||||||
|
# ./data/ directory:
|
||||||
|
#
|
||||||
|
# - Open the file in the RStudio editor.
|
||||||
|
#
|
||||||
|
# - Below the last feature lines (but before the closing "]") add the
|
||||||
|
# following feature line (without the "#")
|
||||||
|
#
|
||||||
|
# {"pName" : "MBP1_SACCE", "fName" : "APSES fold", "start" : "4", "end" : "102"}
|
||||||
|
#
|
||||||
|
# - Edit that annotation: change MBP1_SACCE to MBP1_<MYSPE> and change the
|
||||||
|
# "start" and "end" features to the coordinates you just discovered for the
|
||||||
|
# APSES domain in your sequence.
|
||||||
|
#
|
||||||
|
# - Add a comma after the preceding feature line.
|
||||||
|
#
|
||||||
|
# - Save your file.
|
||||||
#
|
#
|
||||||
# - Validate your file online at https://jsonlint.com/
|
# - Validate your file online at https://jsonlint.com/
|
||||||
#
|
#
|
||||||
# - Next, you need to update your "makeProteinDB.R" script to load the
|
|
||||||
# annotation when you recreate the database. Open the script in the
|
|
||||||
# RStudio ediotr, and add the following command at the end:
|
|
||||||
#
|
#
|
||||||
# myDB <- dbAddAnnotation(myDB, fromJSON("<MYSPE>Annotations.json"))
|
# == 4.2 Execute and Validate ==============================================
|
||||||
#
|
#
|
||||||
# - save the file and source() it:
|
# - source() your database creation script:
|
||||||
#
|
#
|
||||||
# source("makeProteinDB.R")
|
# source("makeProteinDB.R")
|
||||||
#
|
#
|
||||||
# This should run without errors or warnings. If it doesn't work and you
|
# This should run without errors or warnings. If it doesn't work and you
|
||||||
# can't figure out quickly what's happeneing, ask on the mailing list for
|
# can't figure out quickly what's happening, ask on the mailing list for
|
||||||
# help.
|
# help.
|
||||||
#
|
#
|
||||||
# - Confirm
|
# - Confirm
|
||||||
# The following commands should retrieve the correct start and end
|
# The following commands should retrieve the correct start and end
|
||||||
# coordinates for the MBP1_MYSPE APSES domain:
|
# coordinates and sequence of the MBP1_MYSPE APSES domain:
|
||||||
|
|
||||||
sel <- myDB$protein$name == paste("MBP1_", biCode(MYSPE), sep = "")
|
sel <- myDB$protein$name == paste("MBP1_", biCode(MYSPE), sep = "")
|
||||||
aaMBP1_MYSPE <- AAString(myDB$protein$sequence[sel])
|
aaMBP1_MYSPE <- AAString(myDB$protein$sequence[sel])
|
||||||
@ -276,7 +334,7 @@ aaMBP1_MYSPE <- AAString(myDB$protein$sequence[sel])
|
|||||||
(proID <- myDB$protein$ID[myDB$protein$name == "MBP1_<MYSSPE>"]) # <<< EDIT
|
(proID <- myDB$protein$ID[myDB$protein$name == "MBP1_<MYSSPE>"]) # <<< EDIT
|
||||||
(ftrID <- myDB$feature$ID[myDB$feature$name == "APSES fold"])
|
(ftrID <- myDB$feature$ID[myDB$feature$name == "APSES fold"])
|
||||||
(fanID <- myDB$annotation$ID[myDB$annotation$proteinID == proID &
|
(fanID <- myDB$annotation$ID[myDB$annotation$proteinID == proID &
|
||||||
myDB$annotation$featureID == ftrID])
|
myDB$annotation$featureID == ftrID])
|
||||||
(start <- myDB$annotation$start[myDB$annotation$ID == fanID])
|
(start <- myDB$annotation$start[myDB$annotation$ID == fanID])
|
||||||
(end <- myDB$annotation$end[myDB$annotation$ID == fanID])
|
(end <- myDB$annotation$end[myDB$annotation$ID == fanID])
|
||||||
(apses <- substr(myDB$protein$sequence[myDB$protein$ID == proID],
|
(apses <- substr(myDB$protein$sequence[myDB$protein$ID == proID],
|
||||||
@ -284,5 +342,4 @@ aaMBP1_MYSPE <- AAString(myDB$protein$sequence[sel])
|
|||||||
end))
|
end))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# [END]
|
# [END]
|
||||||
|
@ -3,65 +3,160 @@
|
|||||||
# Purpose: A Bioinformatics Course:
|
# Purpose: A Bioinformatics Course:
|
||||||
# R code accompanying the BIN-FUNC-Domain_annotation unit.
|
# R code accompanying the BIN-FUNC-Domain_annotation unit.
|
||||||
#
|
#
|
||||||
# Version: 0.1
|
# Version: 1.0
|
||||||
#
|
#
|
||||||
# Date: 2017 08 28
|
# Date: 2017 11 13
|
||||||
# Author: Boris Steipe (boris.steipe@utoronto.ca)
|
# Author: Boris Steipe (boris.steipe@utoronto.ca)
|
||||||
#
|
#
|
||||||
# Versions:
|
# Versions:
|
||||||
|
# 1.0 Live version 2017
|
||||||
# 0.1 First code copied from 2016 material.
|
# 0.1 First code copied from 2016 material.
|
||||||
|
|
||||||
#
|
#
|
||||||
# TODO:
|
# TODO:
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# == DO NOT SIMPLY source() THIS FILE! =======================================
|
# == DO NOT SIMPLY source() THIS FILE! =======================================
|
||||||
|
#
|
||||||
# If there are portions you don't understand, use R's help system, Google for an
|
# If there are portions you don't understand, use R's help system, Google for an
|
||||||
# answer, or ask your instructor. Don't continue if you don't understand what's
|
# answer, or ask your instructor. Don't continue if you don't understand what's
|
||||||
# going on. That's not how it works ...
|
# going on. That's not how it works ...
|
||||||
|
#
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
# = 1 SMART Domain annotations
|
|
||||||
|
|
||||||
# Plot domain annotations as colored rectangles on a sequence.
|
#TOC> ==========================================================================
|
||||||
# Step one: enter your domain annotations as features into the database.
|
#TOC>
|
||||||
#
|
#TOC> Section Title Line
|
||||||
# == Update myDB
|
#TOC> -----------------------------------------------------------------------------------
|
||||||
# If the reference database has changed, we need to merge it in with myDB.
|
#TOC> 1 Update your database script 41
|
||||||
load("myDB.03.RData") # load the previous version of myDB
|
#TOC> 1.1 Preparing an annotation file ... 47
|
||||||
# the new version of refDB was loaded when you
|
#TOC> 1.1.1 If you HAVE NOT done the BIN-ALI-Optimal_sequence_alignment unit 49
|
||||||
# pulled it from GitHub, and then typed init()
|
#TOC> 1.1.2 If you HAVE done the BIN-ALI-Optimal_sequence_alignment 93
|
||||||
myDB <- dbMerge(myDB) # merge the two databases and update myDB with the result
|
#TOC> 1.2 Execute and Validate 119
|
||||||
save(myDB, file = "myDB.04.RData") # save the new version
|
#TOC> 2 Plot Annotations 144
|
||||||
|
#TOC>
|
||||||
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
# == Update myDB
|
|
||||||
|
|
||||||
# Every annotated feature requires its own entry in the database. You have added
|
# = 1 Update your database script =========================================
|
||||||
# the feature for the "APSES fold" before, so you can copy and edit that code
|
|
||||||
# from your myCode.R script. Here is again the table of feature IDs:
|
|
||||||
myDB$feature[ , c("ID", "name", "description")]
|
|
||||||
|
|
||||||
# Add every SMART annotated feaure for MBP1_MYSPE to the database. If you make
|
|
||||||
# mistakes, just reload the latest version (probably "myDB.04.RData"), then run
|
# Since you have recorded domain features at the SMART database, we can store
|
||||||
# your corrected annotation script again. Execute ...
|
# the feature annotations in myDB.
|
||||||
myDB$proteinAnnotation
|
|
||||||
# ... to confirm.
|
# == 1.1 Preparing an annotation file ... ==================================
|
||||||
#
|
#
|
||||||
# Once you are sure your annotations are correct, save the database again.
|
# === 1.1.1 If you HAVE NOT done the BIN-ALI-Optimal_sequence_alignment unit
|
||||||
save(myDB, file = "myDB.05.RData") # save the new version
|
|
||||||
#
|
#
|
||||||
# Now let's plot the annotations.
|
|
||||||
#
|
#
|
||||||
|
# You DON'T already have a file called "<MYSPE>-Annotations.json" in the
|
||||||
|
# ./data/ directory:
|
||||||
|
#
|
||||||
|
# - Make a copy of the file "./data/refAnnotations.json" and put it in your
|
||||||
|
# project directory.
|
||||||
|
#
|
||||||
|
# - Give it a name that is structured like "<MYSPE>-Annotations.json" - e.g.
|
||||||
|
# if MYSPE is called "Crptycoccus neoformans", your file should be called
|
||||||
|
# "CRYNE-Annotations.json" (and the "name" of your Mbp1 orthologue is
|
||||||
|
# "MBP1_CRYNE").
|
||||||
|
#
|
||||||
|
# - Open the file in the RStudio editor and delete all blocks for
|
||||||
|
# the Mbp1 protein annotations except the first one.
|
||||||
|
#
|
||||||
|
# - From that block, delete all lines that have annotations you did not
|
||||||
|
# find in SMART for MBP1_MYSPE.
|
||||||
|
#
|
||||||
|
# - Make enough copies of the "Ankyrin fold" and "low complexity" region
|
||||||
|
# lines to have a line for each feature you found.
|
||||||
|
#
|
||||||
|
# - Then delete the comma at the end of the last line.
|
||||||
|
#
|
||||||
|
# - Edit the annotations: change MBP1_SACCE to MBP1_<MYSPE> everywhere
|
||||||
|
# and change the "start" and "end" features to the coordinates you
|
||||||
|
# recorded in the SMART database.
|
||||||
|
#
|
||||||
|
# - Save your file.
|
||||||
|
#
|
||||||
|
# - Validate your file online at https://jsonlint.com/
|
||||||
|
#
|
||||||
|
# - Update your "makeProteinDB.R" script to load your new
|
||||||
|
# annotation when you recreate the database. Open the script in the
|
||||||
|
# RStudio editor, and add the following command at the end:
|
||||||
|
#
|
||||||
|
# myDB <- dbAddAnnotation(myDB, fromJSON("<MYSPE>-Annotations.json"))
|
||||||
|
#
|
||||||
|
# - save and close the file.
|
||||||
|
#
|
||||||
|
# Then SKIP the next section.
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# === 1.1.2 If you HAVE done the BIN-ALI-Optimal_sequence_alignment
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# You DO already have a file called "<MYSPE>-Annotations.json" in the
|
||||||
|
# ./data/ directory:
|
||||||
|
#
|
||||||
|
# - Open the file in the RStudio editor.
|
||||||
|
#
|
||||||
|
# - Make as many copies of the "APSES fold" line as you have found
|
||||||
|
# features in SMART.
|
||||||
|
#
|
||||||
|
# - Add a comma after every line except for the last one
|
||||||
|
#
|
||||||
|
# - Edit the annotations but include only features that are in the
|
||||||
|
# myDB$feature table. Check which features are in the databse by executing
|
||||||
|
#
|
||||||
|
# myDB$feature$name
|
||||||
|
#
|
||||||
|
# - Update the "start" and "end" coordinates for each feature to the
|
||||||
|
# values you found.
|
||||||
|
#
|
||||||
|
# - Save your file.
|
||||||
|
#
|
||||||
|
# - Validate your file online at https://jsonlint.com/
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# == 1.2 Execute and Validate ==============================================
|
||||||
|
#
|
||||||
|
# - source() your database creation script:
|
||||||
|
#
|
||||||
|
# source("makeProteinDB.R")
|
||||||
|
#
|
||||||
|
# This should run without errors or warnings. If it doesn't work and you
|
||||||
|
# can't figure out quickly what's happening, ask on the mailing list for
|
||||||
|
# help.
|
||||||
|
#
|
||||||
|
# - Confirm
|
||||||
|
# The following commands should retrieve all of the features that have been
|
||||||
|
# annotated for MBP1_MYSPE
|
||||||
|
|
||||||
|
sel <- myDB$protein$name == paste("MBP1_", biCode(MYSPE), sep = "")
|
||||||
|
|
||||||
|
(proID <- myDB$protein$ID[sel])
|
||||||
|
(fanIDs <- myDB$annotation$ID[myDB$annotation$proteinID == proID])
|
||||||
|
(ftrIDs <- unique(myDB$annotation$featureID[fanIDs]))
|
||||||
|
myDB$feature$name[ftrIDs] # This should list ALL of your annotated features
|
||||||
|
# (once). If not, consider what could have gone wrong
|
||||||
|
# and ask on the list if you have difficulties fixing
|
||||||
|
# it.
|
||||||
|
|
||||||
|
|
||||||
|
# = 2 Plot Annotations ====================================================
|
||||||
|
|
||||||
|
# In this section we will plot domain annotations as colored rectangles on a
|
||||||
|
# sequence, as an example for using the R plotting system for generic, data
|
||||||
|
# driven images.
|
||||||
|
|
||||||
# We need a small utility function that draws the annotation boxes on a
|
# We need a small utility function that draws the annotation boxes on a
|
||||||
# representation of sequence. It will accept the left and right boundaries, the
|
# representation of sequence. It should accept the start and end coordinates,
|
||||||
# height and the color of the box and plot it using R's rect() function.
|
# the y value where it should be plotted and the color of the box, and plot a
|
||||||
|
# rectangle using R's rect() function.
|
||||||
|
|
||||||
drawBox <- function(xLeft, xRight, y, colour) {
|
drawBox <- function(xStart, xEnd, y, myCol) {
|
||||||
# Draw a box from xLeft to xRight at y, filled with colour
|
# Draw a box from xStart to xEnd at y, filled with colour myCol
|
||||||
rect(xLeft, (y - 0.1), xRight, (y + 0.1),
|
delta <- 0.1
|
||||||
border = "black", col = colour)
|
rect(xStart, (y - delta), xEnd, (y + delta),
|
||||||
|
border = "black", col = myCol)
|
||||||
}
|
}
|
||||||
|
|
||||||
# test this:
|
# test this:
|
||||||
@ -71,10 +166,10 @@ drawBox(-1, 1, 0.0, "peachpuff")
|
|||||||
# Next, we define a function to plot annotations for one protein: the name of
|
# Next, we define a function to plot annotations for one protein: the name of
|
||||||
# the protein, a horizontal grey line for its length, and all of its features.
|
# the protein, a horizontal grey line for its length, and all of its features.
|
||||||
|
|
||||||
plotProtein <- function(DB, ID, y) {
|
plotProtein <- function(DB, name, y) {
|
||||||
# DB: protein database, probably you want myDB
|
# DB: protein database
|
||||||
# ID: the ID of the protein to plot.
|
# name: the name of the protein in the database.
|
||||||
# y: where to draw the plot
|
# y: height where to draw the plot
|
||||||
#
|
#
|
||||||
# Define colors: we create a vector of color values, one for
|
# Define colors: we create a vector of color values, one for
|
||||||
# each feature, and we give it names of the feature ID. Then we
|
# each feature, and we give it names of the feature ID. Then we
|
||||||
@ -89,58 +184,79 @@ plotProtein <- function(DB, ID, y) {
|
|||||||
space="Lab",
|
space="Lab",
|
||||||
interpolate="linear")(nrow(DB$feature))
|
interpolate="linear")(nrow(DB$feature))
|
||||||
# B: Features may overlap, so we make the colors transparent by setting
|
# B: Features may overlap, so we make the colors transparent by setting
|
||||||
# their "alpha channel" to 1/2 (hex: 7F)
|
# their "alpha channel" to 1/3 (hex: 55)
|
||||||
ftrCol <- paste(ftrCol, "7F", sep = "")
|
ftrCol <- paste0(ftrCol, "55")
|
||||||
# C: we asssign names
|
# C: we asssign names
|
||||||
names(ftrCol) <- DB$feature$ID
|
names(ftrCol) <- DB$feature$ID
|
||||||
# E.g. color for the third feature: ftrCol[ DB$feature$ID[3] ]
|
# E.g. color for the third feature: ftrCol[ DB$feature$ID[3] ]
|
||||||
|
|
||||||
# find the row-index of the protein ID in the protein table of DB
|
# find the row-index of the protein ID in the protein table of DB
|
||||||
iProtein <- which(DB$protein$ID == ID)
|
iProtein <- which(DB$protein$name == name)
|
||||||
|
|
||||||
# write the name of the protein
|
# write the name of the protein
|
||||||
text(-30, y, adj=1, labels=DB$protein$name[iProtein], cex=0.75 )
|
text(-30, y, adj=1, labels=name, cex=0.75 )
|
||||||
|
|
||||||
#draw a line from 0 to nchar(sequence-of-the-protein)
|
#draw a line from 0 to nchar(sequence-of-the-protein)
|
||||||
lines(c(0, nchar(DB$protein$sequence[iProtein])), c(y, y),
|
lines(c(0, nchar(DB$protein$sequence[iProtein])), c(y, y),
|
||||||
lwd=3, col="#999999")
|
lwd=3, col="#999999")
|
||||||
|
|
||||||
# get the rows of feature annotations for the protein
|
# get the rows of feature annotations for the protein
|
||||||
iFtr <- which(DB$proteinAnnotation$protein.ID == ID)
|
iFtr <- which(DB$annotation$proteinID == DB$protein$ID[iProtein])
|
||||||
|
|
||||||
# draw a colored box for each feature
|
# draw a colored box for each feature
|
||||||
for (i in iFtr) {
|
for (i in iFtr) {
|
||||||
drawBox(DB$proteinAnnotation$start[i],
|
drawBox(DB$annotation$start[i],
|
||||||
DB$proteinAnnotation$end[i],
|
DB$annotation$end[i],
|
||||||
y,
|
y,
|
||||||
ftrCol[ DB$proteinAnnotation$feature.ID[i] ])
|
ftrCol[ DB$annotation$featureID[i] ])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Plot each annotated protein:
|
# Plot each annotated protein:
|
||||||
# Get the rows of all unique annotated protein IDs in the protein table
|
# Get the rows of all unique annotated Mbp1 proteins in myDB
|
||||||
iRows <- which(myDB$protein$ID %in% unique(myDB$proteinAnnotation$protein.ID))
|
|
||||||
|
iRows <- grep("^MBP1_", myDB$protein$name)
|
||||||
|
|
||||||
# define the size of the plot-frame to accomodate all proteins
|
# define the size of the plot-frame to accomodate all proteins
|
||||||
yMax <- length(iRows) * 1.1
|
yMax <- length(iRows) * 1.1
|
||||||
xMax <- max(nchar(myDB$protein$sequence[iRows])) * 1.1 # longest sequence
|
xMax <- max(nchar(myDB$protein$sequence[iRows])) * 1.1 # longest sequence
|
||||||
|
|
||||||
# plot an empty frame
|
# plot an empty frame
|
||||||
plot(1,1, xlim=c(-200, xMax), ylim=c(0, yMax),
|
plot(1, 1,
|
||||||
type="n", axes=FALSE, bty="n", xlab="sequence position", ylab="")
|
xlim = c(-200, xMax + 100),
|
||||||
|
ylim = c(0, yMax),
|
||||||
|
type = "n",
|
||||||
|
axes = FALSE,
|
||||||
|
bty = "n",
|
||||||
|
main = "Mbp1 orthologue domain annotations",
|
||||||
|
xlab = "sequence position",
|
||||||
|
ylab="")
|
||||||
axis(1, at = seq(0, xMax, by = 100))
|
axis(1, at = seq(0, xMax, by = 100))
|
||||||
|
myCol <- colorRampPalette(c("#f2003c", "#F0A200",
|
||||||
|
"#f0ea00", "#62C923",
|
||||||
|
"#0A9A9B", "#1958C3",
|
||||||
|
"#8000D3", "#D0007F"),
|
||||||
|
space="Lab",
|
||||||
|
interpolate="linear")(nrow(myDB$feature))
|
||||||
|
myCol <- paste0(myCol, "55")
|
||||||
|
legend(xMax - 150, 6,
|
||||||
|
legend = myDB$feature$name,
|
||||||
|
cex = 0.7,
|
||||||
|
fill = myCol)
|
||||||
|
|
||||||
|
|
||||||
# Finally, iterate over all proteins and call plotProtein()
|
# Finally, iterate over all proteins and call plotProtein()
|
||||||
for (i in 1:length(iRows)) {
|
for (i in seq_along(iRows)) {
|
||||||
plotProtein(myDB, myDB$protein$ID[iRows[i]], i)
|
plotProtein(myDB, myDB$protein$name[iRows[i]], i)
|
||||||
}
|
}
|
||||||
|
|
||||||
# The plot shows clearly what is variable and what is constant about the
|
# The plot shows what is variable and what is constant about the annotations in
|
||||||
# annotations in a group of related proteins. Print the plot and bring it to
|
# a group of related proteins. Your MBP1_MYSPE annotations should appear at the
|
||||||
# class for the next quiz.
|
# top.
|
||||||
#
|
|
||||||
|
|
||||||
# = 1 Tasks
|
|
||||||
|
|
||||||
|
# Task:
|
||||||
|
# Put a copy of the plot into your journal and interpret it with respect
|
||||||
|
# to MBP1_MYSPE, i.e. and note what you learn about MBP1_MYSPE from the plot.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user