minor updates

This commit is contained in:
hyginn 2020-09-25 15:24:51 +10:00
parent abe30a5fb6
commit b42adac3f3

View File

@ -1,20 +1,15 @@
# tocID <- "BIN-FUNC-Domain_annotation.R" # tocID <- "BIN-FUNC-Domain_annotation.R"
# #
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
#
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the BIN-FUNC-Domain_annotation unit. # R code accompanying the BIN-FUNC-Domain_annotation unit.
# #
# Version: 1.0 # Version: 1.1
# #
# Date: 2017 11 13 # Date: 2017-11 - 2020-09
# Author: Boris Steipe (boris.steipe@utoronto.ca) # Author: Boris Steipe (boris.steipe@utoronto.ca)
# #
# Versions: # Versions:
# 1.1 2020 Updates
# 1.0 Live version 2017 # 1.0 Live version 2017
# 0.1 First code copied from 2016 material. # 0.1 First code copied from 2016 material.
# #
@ -31,16 +26,16 @@
#TOC> ========================================================================== #TOC> ==========================================================================
#TOC> #TOC>
#TOC> Section Title Line #TOC> Section Title Line
#TOC> ----------------------------------------------------------------------------------- #TOC> ---------------------------------------------------------------------
#TOC> 1 Update your database script 41 #TOC> 1 Update your database script 42
#TOC> 1.1 Preparing an annotation file ... 47 #TOC> 1.1 Preparing an annotation file ... 49
#TOC> 1.1.1 If you HAVE NOT done the BIN-ALI-Optimal_sequence_alignment unit 49 #TOC> 1.1.1 BEFORE "BIN-ALI-Optimal_sequence_alignment" 52
#TOC> 1.1.2 If you HAVE done the BIN-ALI-Optimal_sequence_alignment 93 #TOC> 1.1.2 AFTER "BIN-ALI-Optimal_sequence_alignment" 97
#TOC> 1.2 Execute and Validate 119 #TOC> 1.2 Execute and Validate 124
#TOC> 2 Plot Annotations 144 #TOC> 2 Plot Annotations 149
#TOC> #TOC>
#TOC> ========================================================================== #TOC> ==========================================================================
@ -48,12 +43,15 @@
# Since you have recorded domain features at the SMART database, we can store # Since you have recorded domain features at the SMART database, we can store
# the feature annotations in myDB. # the feature annotations in myDB ...
# == 1.1 Preparing an annotation file ... ================================== # == 1.1 Preparing an annotation file ... ==================================
# === 1.1.1 BEFORE "BIN-ALI-Optimal_sequence_alignment"
# #
# === 1.1.1 If you HAVE NOT done the BIN-ALI-Optimal_sequence_alignment unit # IF YOU HAVE NOT YET COMPLETED THE BIN-ALI-OPTIMAL_SEQUENCE_ALIGNMENT UNIT:
#
# #
# You DON'T already have a file called "<MYSPE>-Annotations.json" in the # You DON'T already have a file called "<MYSPE>-Annotations.json" in the
# ./data/ directory: # ./data/ directory:
@ -96,10 +94,11 @@
# Then SKIP the next section. # Then SKIP the next section.
# #
# #
# === 1.1.2 If you HAVE done the BIN-ALI-Optimal_sequence_alignment # === 1.1.2 AFTER "BIN-ALI-Optimal_sequence_alignment"
# #
# IF YOU HAVE ALREADY COMPLETED THE BIN-ALI-OPTIMAL_SEQUENCE_ALIGNMENT UNIT:
# #
# You DO already have a file called "<MYSPE>-Annotations.json" in the # You SHOULD have a file called "<MYSPE>-Annotations.json" in the
# ./data/ directory: # ./data/ directory:
# #
# - Open the file in the RStudio editor. # - Open the file in the RStudio editor.
@ -129,8 +128,8 @@
# source("makeProteinDB.R") # source("makeProteinDB.R")
# #
# This should run without errors or warnings. If it doesn't work and you # This should run without errors or warnings. If it doesn't work and you
# can't figure out quickly what's happening, ask on the mailing list for # can't figure out quickly what's happening, ask for help on the
# help. # Discussion Board.
# #
# - Confirm # - Confirm
# The following commands should retrieve all of the features that have been # The following commands should retrieve all of the features that have been
@ -150,7 +149,7 @@ myDB$feature$name[ftrIDs] # This should list ALL of your annotated features
# = 2 Plot Annotations ==================================================== # = 2 Plot Annotations ====================================================
# In this section we will plot domain annotations as colored rectangles on a # In this section we will plot domain annotations as colored rectangles on a
# sequence, as an example for using the R plotting system for generic, data # sequence, as an example of using the R plotting system for generic, data
# driven images. # driven images.
# We need a small utility function that draws the annotation boxes on a # We need a small utility function that draws the annotation boxes on a
@ -158,10 +157,10 @@ myDB$feature$name[ftrIDs] # This should list ALL of your annotated features
# the y value where it should be plotted and the color of the box, and plot a # the y value where it should be plotted and the color of the box, and plot a
# rectangle using R's rect() function. # rectangle using R's rect() function.
drawBox <- function(xStart, xEnd, y, myCol) { drawBox <- function(xStart, xEnd, y, myCol, DELTA = 0.2) {
# Draw a box from xStart to xEnd at y, filled with colour myCol # Draw a box from xStart to xEnd at y, filled with colour myCol
delta <- 0.1 # The height of the box is y +- DELTA
rect(xStart, (y - delta), xEnd, (y + delta), rect(xStart, (y - DELTA), xEnd, (y + DELTA),
border = "black", col = myCol) border = "black", col = myCol)
} }
@ -228,6 +227,8 @@ yMax <- length(iRows) * 1.1
xMax <- max(nchar(myDB$protein$sequence[iRows])) * 1.1 # longest sequence xMax <- max(nchar(myDB$protein$sequence[iRows])) * 1.1 # longest sequence
# plot an empty frame # plot an empty frame
oPar <- par(mar = c(4.2, 0.1, 3, 0.1)) # save the current plot parameters and
# decrease margins
plot(1, 1, plot(1, 1,
xlim = c(-200, xMax + 100), xlim = c(-200, xMax + 100),
ylim = c(0, yMax), ylim = c(0, yMax),
@ -236,6 +237,7 @@ plot(1, 1,
bty = "n", bty = "n",
main = "Mbp1 orthologue domain annotations", main = "Mbp1 orthologue domain annotations",
xlab = "sequence position", xlab = "sequence position",
cex.axis = 0.8,
ylab="") ylab="")
axis(1, at = seq(0, xMax, by = 100)) axis(1, at = seq(0, xMax, by = 100))
myCol <- colorRampPalette(c("#f2003c", "#F0A200", myCol <- colorRampPalette(c("#f2003c", "#F0A200",
@ -250,11 +252,12 @@ legend(xMax - 150, 6,
cex = 0.7, cex = 0.7,
fill = myCol) fill = myCol)
# Finally, iterate over all proteins and call plotProtein() # Finally, iterate over all proteins and call plotProtein()
for (i in seq_along(iRows)) { for (i in seq_along(iRows)) {
plotProtein(myDB, myDB$protein$name[iRows[i]], i) plotProtein(myDB, myDB$protein$name[iRows[i]], i)
} }
par(oPar) # reset the plot parameters
# The plot shows what is variable and what is constant about the annotations in # The plot shows what is variable and what is constant about the annotations in
# a group of related proteins. Your MBP1_MYSPE annotations should appear at the # a group of related proteins. Your MBP1_MYSPE annotations should appear at the
@ -264,6 +267,9 @@ for (i in seq_along(iRows)) {
# Put a copy of the plot into your journal and interpret it with respect # Put a copy of the plot into your journal and interpret it with respect
# to MBP1_MYSPE, i.e. and note what you learn about MBP1_MYSPE from the plot. # to MBP1_MYSPE, i.e. and note what you learn about MBP1_MYSPE from the plot.
# Task:
# It would be better to align the motif borders, at least approximately (not
# all proteins have all motifs). How would you go about doing that?
# [END] # [END]