minor updates
This commit is contained in:
		@@ -1,20 +1,15 @@
 | 
				
			|||||||
# tocID <- "BIN-FUNC-Domain_annotation.R"
 | 
					# tocID <- "BIN-FUNC-Domain_annotation.R"
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
# ---------------------------------------------------------------------------- #
 | 
					 | 
				
			||||||
#  PATIENCE  ...                                                               #
 | 
					 | 
				
			||||||
#    Do not yet work wih this code. Updates in progress. Thank you.            #
 | 
					 | 
				
			||||||
#    boris.steipe@utoronto.ca                                                  #
 | 
					 | 
				
			||||||
# ---------------------------------------------------------------------------- #
 | 
					 | 
				
			||||||
#
 | 
					 | 
				
			||||||
# Purpose:  A Bioinformatics Course:
 | 
					# Purpose:  A Bioinformatics Course:
 | 
				
			||||||
#              R code accompanying the BIN-FUNC-Domain_annotation unit.
 | 
					#              R code accompanying the BIN-FUNC-Domain_annotation unit.
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
# Version:  1.0
 | 
					# Version:  1.1
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
# Date:     2017  11 13
 | 
					# Date:     2017-11  -  2020-09
 | 
				
			||||||
# Author:   Boris Steipe (boris.steipe@utoronto.ca)
 | 
					# Author:   Boris Steipe (boris.steipe@utoronto.ca)
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
# Versions:
 | 
					# Versions:
 | 
				
			||||||
 | 
					#           1.1    2020 Updates
 | 
				
			||||||
#           1.0    Live version 2017
 | 
					#           1.0    Live version 2017
 | 
				
			||||||
#           0.1    First code copied from 2016 material.
 | 
					#           0.1    First code copied from 2016 material.
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
@@ -31,16 +26,16 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#TOC> ==========================================================================
 | 
					#TOC> ==========================================================================
 | 
				
			||||||
#TOC>
 | 
					#TOC> 
 | 
				
			||||||
#TOC>   Section  Title                                                               Line
 | 
					#TOC>   Section  Title                                                 Line
 | 
				
			||||||
#TOC> -----------------------------------------------------------------------------------
 | 
					#TOC> ---------------------------------------------------------------------
 | 
				
			||||||
#TOC>   1        Update your database script                                           41
 | 
					#TOC>   1        Update your database script                             42
 | 
				
			||||||
#TOC>   1.1      Preparing an annotation file ...                                      47
 | 
					#TOC>   1.1        Preparing an annotation file ...                      49
 | 
				
			||||||
#TOC>   1.1.1    If you HAVE NOT done the BIN-ALI-Optimal_sequence_alignment unit      49
 | 
					#TOC>   1.1.1          BEFORE  "BIN-ALI-Optimal_sequence_alignment"      52
 | 
				
			||||||
#TOC>   1.1.2    If you HAVE done the BIN-ALI-Optimal_sequence_alignment               93
 | 
					#TOC>   1.1.2          AFTER "BIN-ALI-Optimal_sequence_alignment"        97
 | 
				
			||||||
#TOC>   1.2      Execute and Validate                                                 119
 | 
					#TOC>   1.2        Execute and Validate                                 124
 | 
				
			||||||
#TOC>   2        Plot Annotations                                                     144
 | 
					#TOC>   2        Plot Annotations                                       149
 | 
				
			||||||
#TOC>
 | 
					#TOC> 
 | 
				
			||||||
#TOC> ==========================================================================
 | 
					#TOC> ==========================================================================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -48,12 +43,15 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Since you have recorded domain features at the SMART database, we can store
 | 
					# Since you have recorded domain features at the SMART database, we can store
 | 
				
			||||||
# the feature annotations in myDB.
 | 
					# the feature annotations in myDB ...
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# ==   1.1  Preparing an annotation file ...  ==================================
 | 
					# ==   1.1  Preparing an annotation file ...  ==================================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# ===   1.1.1  BEFORE  "BIN-ALI-Optimal_sequence_alignment"
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
# ===  1.1.1  If you HAVE NOT done the BIN-ALI-Optimal_sequence_alignment unit
 | 
					#   IF YOU HAVE NOT YET COMPLETED THE BIN-ALI-OPTIMAL_SEQUENCE_ALIGNMENT UNIT:
 | 
				
			||||||
#
 | 
					 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
#   You DON'T already have a file called "<MYSPE>-Annotations.json" in the
 | 
					#   You DON'T already have a file called "<MYSPE>-Annotations.json" in the
 | 
				
			||||||
#   ./data/ directory:
 | 
					#   ./data/ directory:
 | 
				
			||||||
@@ -96,10 +94,11 @@
 | 
				
			|||||||
# Then SKIP the next section.
 | 
					# Then SKIP the next section.
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
# ===  1.1.2  If you HAVE done the BIN-ALI-Optimal_sequence_alignment
 | 
					# ===   1.1.2  AFTER "BIN-ALI-Optimal_sequence_alignment"  
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
 | 
					#   IF YOU HAVE ALREADY COMPLETED THE BIN-ALI-OPTIMAL_SEQUENCE_ALIGNMENT UNIT:
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
#   You DO already have a file called "<MYSPE>-Annotations.json" in the
 | 
					#   You SHOULD have a file called "<MYSPE>-Annotations.json" in the
 | 
				
			||||||
#   ./data/ directory:
 | 
					#   ./data/ directory:
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
#   - Open the file in the RStudio editor.
 | 
					#   - Open the file in the RStudio editor.
 | 
				
			||||||
@@ -129,8 +128,8 @@
 | 
				
			|||||||
#     source("makeProteinDB.R")
 | 
					#     source("makeProteinDB.R")
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
#     This should run without errors or warnings. If it doesn't work and you
 | 
					#     This should run without errors or warnings. If it doesn't work and you
 | 
				
			||||||
#     can't figure out quickly what's happening, ask on the mailing list for
 | 
					#     can't figure out quickly what's happening, ask for help on the
 | 
				
			||||||
#     help.
 | 
					#     Discussion Board.
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
#   - Confirm
 | 
					#   - Confirm
 | 
				
			||||||
#     The following commands should retrieve all of the features that have been
 | 
					#     The following commands should retrieve all of the features that have been
 | 
				
			||||||
@@ -150,7 +149,7 @@ myDB$feature$name[ftrIDs] # This should list ALL of your annotated features
 | 
				
			|||||||
# =    2  Plot Annotations  ====================================================
 | 
					# =    2  Plot Annotations  ====================================================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# In this section we will plot domain annotations as colored rectangles on a
 | 
					# In this section we will plot domain annotations as colored rectangles on a
 | 
				
			||||||
# sequence, as an example for using the R plotting system for generic, data
 | 
					# sequence, as an example of using the R plotting system for generic, data
 | 
				
			||||||
# driven images.
 | 
					# driven images.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# We need a small utility function that draws the annotation boxes on a
 | 
					# We need a small utility function that draws the annotation boxes on a
 | 
				
			||||||
@@ -158,10 +157,10 @@ myDB$feature$name[ftrIDs] # This should list ALL of your annotated features
 | 
				
			|||||||
# the y value where it should be plotted and the color of the box, and plot a
 | 
					# the y value where it should be plotted and the color of the box, and plot a
 | 
				
			||||||
# rectangle using R's rect() function.
 | 
					# rectangle using R's rect() function.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
drawBox <- function(xStart, xEnd, y, myCol) {
 | 
					drawBox <- function(xStart, xEnd, y, myCol, DELTA = 0.2) {
 | 
				
			||||||
  # Draw a box from xStart to xEnd at y, filled with colour myCol
 | 
					  # Draw a box from xStart to xEnd at y, filled with colour myCol
 | 
				
			||||||
  delta <- 0.1
 | 
					  # The height of the box is y +- DELTA
 | 
				
			||||||
  rect(xStart, (y - delta), xEnd, (y + delta),
 | 
					  rect(xStart, (y - DELTA), xEnd, (y + DELTA),
 | 
				
			||||||
       border = "black", col = myCol)
 | 
					       border = "black", col = myCol)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -228,6 +227,8 @@ yMax <- length(iRows) * 1.1
 | 
				
			|||||||
xMax <- max(nchar(myDB$protein$sequence[iRows])) * 1.1  # longest sequence
 | 
					xMax <- max(nchar(myDB$protein$sequence[iRows])) * 1.1  # longest sequence
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# plot an empty frame
 | 
					# plot an empty frame
 | 
				
			||||||
 | 
					oPar <- par(mar = c(4.2, 0.1, 3, 0.1))  # save the current plot parameters and
 | 
				
			||||||
 | 
					                                      # decrease margins
 | 
				
			||||||
plot(1, 1,
 | 
					plot(1, 1,
 | 
				
			||||||
     xlim = c(-200, xMax + 100),
 | 
					     xlim = c(-200, xMax + 100),
 | 
				
			||||||
     ylim = c(0, yMax),
 | 
					     ylim = c(0, yMax),
 | 
				
			||||||
@@ -236,6 +237,7 @@ plot(1, 1,
 | 
				
			|||||||
     bty = "n",
 | 
					     bty = "n",
 | 
				
			||||||
     main = "Mbp1 orthologue domain annotations",
 | 
					     main = "Mbp1 orthologue domain annotations",
 | 
				
			||||||
     xlab = "sequence position",
 | 
					     xlab = "sequence position",
 | 
				
			||||||
 | 
					     cex.axis = 0.8,
 | 
				
			||||||
     ylab="")
 | 
					     ylab="")
 | 
				
			||||||
axis(1, at = seq(0, xMax, by = 100))
 | 
					axis(1, at = seq(0, xMax, by = 100))
 | 
				
			||||||
myCol <- colorRampPalette(c("#f2003c", "#F0A200",
 | 
					myCol <- colorRampPalette(c("#f2003c", "#F0A200",
 | 
				
			||||||
@@ -250,11 +252,12 @@ legend(xMax - 150, 6,
 | 
				
			|||||||
       cex = 0.7,
 | 
					       cex = 0.7,
 | 
				
			||||||
       fill = myCol)
 | 
					       fill = myCol)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
# Finally, iterate over all proteins and call plotProtein()
 | 
					# Finally, iterate over all proteins and call plotProtein()
 | 
				
			||||||
for (i in seq_along(iRows)) {
 | 
					for (i in seq_along(iRows)) {
 | 
				
			||||||
  plotProtein(myDB, myDB$protein$name[iRows[i]], i)
 | 
					  plotProtein(myDB, myDB$protein$name[iRows[i]], i)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					par(oPar)  # reset the plot parameters
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# The plot shows what is variable and what is constant about the annotations in
 | 
					# The plot shows what is variable and what is constant about the annotations in
 | 
				
			||||||
# a group of related proteins. Your MBP1_MYSPE annotations should appear at the
 | 
					# a group of related proteins. Your MBP1_MYSPE annotations should appear at the
 | 
				
			||||||
@@ -264,6 +267,9 @@ for (i in seq_along(iRows)) {
 | 
				
			|||||||
#    Put a copy of the plot into your journal and interpret it with respect
 | 
					#    Put a copy of the plot into your journal and interpret it with respect
 | 
				
			||||||
#    to MBP1_MYSPE, i.e. and note what you learn about MBP1_MYSPE from the plot.
 | 
					#    to MBP1_MYSPE, i.e. and note what you learn about MBP1_MYSPE from the plot.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Task:
 | 
				
			||||||
 | 
					#    It would be better to align the motif borders, at least approximately (not
 | 
				
			||||||
 | 
					#    all proteins have all motifs). How would you go about doing that?
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# [END]
 | 
					# [END]
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user