Maintenance. Use AACOLS.
This commit is contained in:
parent
80079f9c5d
commit
f3a436cb6e
@ -1,19 +1,14 @@
|
|||||||
# tocID <- "BIN-SEQA-Composition.R"
|
# tocID <- "BIN-SEQA-Composition.R"
|
||||||
#
|
#
|
||||||
# ---------------------------------------------------------------------------- #
|
|
||||||
# PATIENCE ... #
|
|
||||||
# Do not yet work wih this code. Updates in progress. Thank you. #
|
|
||||||
# boris.steipe@utoronto.ca #
|
|
||||||
# ---------------------------------------------------------------------------- #
|
|
||||||
#
|
|
||||||
# Purpose: A Bioinformatics Course:
|
# Purpose: A Bioinformatics Course:
|
||||||
# R code accompanying the BIN-SEQA-Comparison unit
|
# R code accompanying the BIN-SEQA-Comparison unit
|
||||||
#
|
#
|
||||||
# Version: 1.1
|
# Version: 1.2
|
||||||
#
|
#
|
||||||
# Date: 2017 11 - 2019 01
|
# Date: 2017-11 - 2020-09
|
||||||
# Author: Boris Steipe (boris.steipe@utoronto.ca)
|
# Author: Boris Steipe (boris.steipe@utoronto.ca)
|
||||||
#
|
#
|
||||||
|
# 1.2 2020 Maintenance
|
||||||
# 1.1 Change from require() to requireNamespace(),
|
# 1.1 Change from require() to requireNamespace(),
|
||||||
# use <package>::<function>() idiom throughout,
|
# use <package>::<function>() idiom throughout,
|
||||||
# use Biocmanager:: not biocLite()
|
# use Biocmanager:: not biocLite()
|
||||||
@ -38,14 +33,14 @@
|
|||||||
#TOC>
|
#TOC>
|
||||||
#TOC> Section Title Line
|
#TOC> Section Title Line
|
||||||
#TOC> ----------------------------------------------------------
|
#TOC> ----------------------------------------------------------
|
||||||
#TOC> 1 Preparation 47
|
#TOC> 1 Preparation 48
|
||||||
#TOC> 2 Aggregate properties 68
|
#TOC> 2 Aggregate properties 69
|
||||||
#TOC> 3 Sequence Composition Enrichment 111
|
#TOC> 3 Sequence Composition Enrichment 113
|
||||||
#TOC> 3.1 Barplot, and side-by-side barplot 134
|
#TOC> 3.1 Barplot, and side-by-side barplot 136
|
||||||
#TOC> 3.2 Plotting ratios 169
|
#TOC> 3.2 Plotting ratios 171
|
||||||
#TOC> 3.3 Plotting log ratios 185
|
#TOC> 3.3 Plotting log ratios 188
|
||||||
#TOC> 3.4 Sort by frequency 200
|
#TOC> 3.4 Sort by frequency 204
|
||||||
#TOC> 3.5 Color by amino acid type 215
|
#TOC> 3.5 Color by amino acid type 221
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
@ -94,19 +89,20 @@ s <- unlist(s) # strsplit() returns a list! Why?
|
|||||||
seqinr::s2c(mySeq)
|
seqinr::s2c(mySeq)
|
||||||
|
|
||||||
|
|
||||||
seqinr::computePI(s2c(mySeq)) # isoelectric point
|
seqinr::computePI(seqinr::s2c(mySeq)) # isoelectric point
|
||||||
seqinr::pmw(s2c(mySeq)) # molecular weight
|
seqinr::pmw(seqinr::s2c(mySeq)) # molecular weight
|
||||||
seqinr::AAstat(s2c(mySeq)) # This also plots the distribution of
|
seqinr::AAstat(seqinr::s2c(mySeq)) # This also plots the distribution of
|
||||||
# values along the sequence
|
# values along the sequence
|
||||||
|
|
||||||
# A true Labor of Love has gone into the
|
# A true Labor of Love has gone into the
|
||||||
# compilation of the "aaindex" data:
|
# compilation of the "aaindex" data:
|
||||||
|
|
||||||
?aaindex
|
?seqinr::aaindex
|
||||||
data(aaindex) # "attach" the dataset - i.e. make it accessible as an
|
data(aaindex, package = "seqinr") # "attach" the dataset - i.e. make it
|
||||||
# R object
|
# accessible as an R object
|
||||||
|
|
||||||
length(aaindex)
|
length(aaindex) # no seqinr:: needed for the dataset since we just
|
||||||
|
# "attached" it with data()
|
||||||
|
|
||||||
# Here are all the index descriptions
|
# Here are all the index descriptions
|
||||||
for (i in 1:length(aaindex)) {
|
for (i in 1:length(aaindex)) {
|
||||||
@ -133,7 +129,7 @@ refData # ... in %
|
|||||||
|
|
||||||
|
|
||||||
# tabulate the amino acid counts in mySeq
|
# tabulate the amino acid counts in mySeq
|
||||||
(obsData <- table(s2c(mySeq))) # counts
|
(obsData <- table(seqinr::s2c(mySeq))) # counts
|
||||||
(obsData <- 100 * (obsData / sum(obsData))) # frequencies
|
(obsData <- 100 * (obsData / sum(obsData))) # frequencies
|
||||||
|
|
||||||
|
|
||||||
@ -180,12 +176,13 @@ legend (x = 1, y = 12,
|
|||||||
barplot(obsData / refData,
|
barplot(obsData / refData,
|
||||||
col = "#CCCCCC",
|
col = "#CCCCCC",
|
||||||
ylab = "Sequence / Average",
|
ylab = "Sequence / Average",
|
||||||
|
ylim = c(0, 2.5),
|
||||||
cex.names = 0.7)
|
cex.names = 0.7)
|
||||||
abline(h = 1, col="#BB0000")
|
abline(h = 1, col="#BB0000")
|
||||||
abline(h = c(1/3, 3), lty = 2, col="#BB000055")
|
abline(h = c(1/2, 2), lty = 2, col="#BB000055")
|
||||||
|
|
||||||
# ... but ratios are not very good here, since the difference in height on the
|
# ... but ratios are not very good here, since the difference in height on the
|
||||||
# plot now depends on the order we compare in: ratios of 1/3 and 3 (dotted
|
# plot now depends on the order we compare in: ratios of 1/2 and 2 (dotted
|
||||||
# lines) are exactly the same fold-difference !
|
# lines) are exactly the same fold-difference !
|
||||||
|
|
||||||
# == 3.3 Plotting log ratios ===============================================
|
# == 3.3 Plotting log ratios ===============================================
|
||||||
@ -196,69 +193,53 @@ abline(h = c(1/3, 3), lty = 2, col="#BB000055")
|
|||||||
barplot(log(obsData / refData),
|
barplot(log(obsData / refData),
|
||||||
col = "#CCCCCC",
|
col = "#CCCCCC",
|
||||||
ylab = "log(Sequence / Average)",
|
ylab = "log(Sequence / Average)",
|
||||||
|
ylim = log(c(1/3, 3)),
|
||||||
cex.names = 0.7)
|
cex.names = 0.7)
|
||||||
abline(h = log(1), col="#BB0000")
|
abline(h = log(1), col="#BB0000")
|
||||||
abline(h = log(c(1/3, 3)), lty = 2, col="#BB000055")
|
abline(h = log(c(1/2, 2)), lty = 2, col="#BB000055")
|
||||||
|
|
||||||
# Note how the three-fold difference lines are now the same distance from the
|
# Note how the two-fold difference lines are now the same distance from the
|
||||||
# line of equal ratio.
|
# line of equal ratio.
|
||||||
|
|
||||||
# == 3.4 Sort by frequency =================================================
|
# == 3.4 Sort by frequency =================================================
|
||||||
|
|
||||||
barplot(sort(log(obsData / refData), decreasing = TRUE),
|
barplot(sort(log(obsData / refData), decreasing = TRUE),
|
||||||
ylim = c(-3.5,2),
|
ylim = log(c(1/3, 3)),
|
||||||
col = "#CCCCCC",
|
col = "#CCCCCC",
|
||||||
ylab = "log(Sequence / Average)",
|
ylab = "log(Sequence / Average)",
|
||||||
cex.names = 0.7)
|
cex.names = 0.7)
|
||||||
abline(h = log(1), col="#BB0000")
|
abline(h = log(1), col="#BB0000")
|
||||||
abline(h = log(c(1/3, 3)), lty = 2, col="#BB000055")
|
abline(h = log(c(1/2, 2)), lty = 2, col="#BB000055")
|
||||||
|
|
||||||
arrows(4, 1.8, 0, 1.8, length = 0.07)
|
yTxt <- log(0.9)
|
||||||
text(5.5, 1.8, "Enriched", cex = 0.7)
|
arrows(4, yTxt, 0, yTxt, length = 0.07)
|
||||||
arrows(20, 1.8, 24, 1.8, length = 0.07)
|
text(5.5, yTxt, "Enriched", cex = 0.7)
|
||||||
text(19.5, 1.8, "Depleted", pos = 2, cex = 0.7)
|
yTxt <- log(1.1)
|
||||||
|
arrows(20, yTxt, 24, yTxt, length = 0.07)
|
||||||
|
text(19.5, yTxt, "Depleted", pos = 2, cex = 0.7)
|
||||||
|
|
||||||
# == 3.5 Color by amino acid type ==========================================
|
# == 3.5 Color by amino acid type ==========================================
|
||||||
|
|
||||||
# color the bars by type.
|
# Color the bars by amino acid type. Use AACOLS , defined in the .utilities.R
|
||||||
# define colors
|
# script, or define your own.
|
||||||
AAcol <- character()
|
|
||||||
AAcol["A"] <- "#AABBAA"
|
|
||||||
AAcol["C"] <- "#FFEE77"
|
|
||||||
AAcol["D"] <- "#DD6600"
|
|
||||||
AAcol["E"] <- "#DD3300"
|
|
||||||
AAcol["F"] <- "#767D38"
|
|
||||||
AAcol["G"] <- "#BBBBCC"
|
|
||||||
AAcol["H"] <- "#A2A1FD"
|
|
||||||
AAcol["I"] <- "#70B6C6"
|
|
||||||
AAcol["K"] <- "#4563BB"
|
|
||||||
AAcol["L"] <- "#80C6B6"
|
|
||||||
AAcol["M"] <- "#AFCC34"
|
|
||||||
AAcol["N"] <- "#BB88CC"
|
|
||||||
AAcol["P"] <- "#7292B7"
|
|
||||||
AAcol["Q"] <- "#8866BB"
|
|
||||||
AAcol["R"] <- "#74A0FF"
|
|
||||||
AAcol["S"] <- "#9999CC"
|
|
||||||
AAcol["T"] <- "#99AADD"
|
|
||||||
AAcol["V"] <- "#9DB500"
|
|
||||||
AAcol["W"] <- "#76AD48"
|
|
||||||
AAcol["Y"] <- "#44CA97"
|
|
||||||
|
|
||||||
barplot(rep(1, 20), names.arg = names(AAcol), col = AAcol, cex.names = 0.5)
|
barplot(rep(1, 20), names.arg = names(AACOLS), col = AACOLS, cex.names = 0.5)
|
||||||
|
|
||||||
lR <- sort(log(obsData / refData), decreasing = TRUE)
|
lR <- sort(log(obsData / refData), decreasing = TRUE)
|
||||||
barplot(lR,
|
barplot(lR,
|
||||||
ylim = c(-3.5,2),
|
ylim = log(c(1/3, 3)),
|
||||||
col = AAcol[names(lR)],
|
col = AACOLS[names(lR)],
|
||||||
ylab = "log(Sequence / Average)",
|
ylab = "log(Sequence / Average)",
|
||||||
cex.names = 0.7)
|
cex.names = 0.7)
|
||||||
abline(h = log(1), col="#00000055")
|
abline(h = log(1), col="#00000055")
|
||||||
abline(h = log(c(1/3, 3)), lty = 2, col="#00000033")
|
abline(h = log(c(1/2, 2)), lty = 2, col="#00000033")
|
||||||
|
|
||||||
arrows(4, 1.8, 0, 1.8, length = 0.07)
|
yTxt <- log(0.9)
|
||||||
text(5.5, 1.8, "Enriched", cex = 0.7)
|
arrows(4, yTxt, 0, yTxt, length = 0.07)
|
||||||
arrows(20, 1.8, 24, 1.8, length = 0.07)
|
text(5.5, yTxt, "Enriched", cex = 0.7)
|
||||||
text(19.5, 1.8, "Depleted", pos = 2, cex = 0.7)
|
yTxt <- log(1.1)
|
||||||
|
arrows(20, yTxt, 24, yTxt, length = 0.07)
|
||||||
|
text(19.5, yTxt, "Depleted", pos = 2, cex = 0.7)
|
||||||
|
|
||||||
|
|
||||||
# Task:
|
# Task:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user