stylisitc updates
This commit is contained in:
parent
67c92a49d9
commit
ca0d78c4b9
56
RPR-SX-PDB.R
56
RPR-SX-PDB.R
@ -9,13 +9,14 @@
|
|||||||
# Purpose: A Bioinformatics Course:
|
# Purpose: A Bioinformatics Course:
|
||||||
# R code accompanying the RPR-SX-PDB unit.
|
# R code accompanying the RPR-SX-PDB unit.
|
||||||
#
|
#
|
||||||
# Version: 1.2
|
# Version: 1.3
|
||||||
#
|
#
|
||||||
# Date: 2017 10 - 2019 11
|
# Date: 2017-10 - 2020-09
|
||||||
# Author: Boris Steipe (boris.steipe@utoronto.ca)
|
# Author: Boris Steipe (boris.steipe@utoronto.ca)
|
||||||
#
|
#
|
||||||
# Versions:
|
# Versions:
|
||||||
# 1.2 Maintenance
|
# 1.3 2020 Maintenance
|
||||||
|
# 1.2 2019 Maintenance
|
||||||
# 1.1 Change from require() to requireNamespace(),
|
# 1.1 Change from require() to requireNamespace(),
|
||||||
# use <package>::<function>() idiom throughout
|
# use <package>::<function>() idiom throughout
|
||||||
# 1.0 First live version, completely refactores 2016 code
|
# 1.0 First live version, completely refactores 2016 code
|
||||||
@ -57,9 +58,9 @@
|
|||||||
|
|
||||||
|
|
||||||
# In this example of protein structure interpretation, we ...
|
# In this example of protein structure interpretation, we ...
|
||||||
# - load the library "bio3D" which supports work with
|
# - download the package bio3D:: which supports work with
|
||||||
# protein structure files,
|
# protein structure files,
|
||||||
# - explore some elementary functions of the library
|
# - explore some elementary functions of the package
|
||||||
# - explore plotting of density values with scatterplots
|
# - explore plotting of density values with scatterplots
|
||||||
# - assemble a script to see whether H-bond lengths systematically differ
|
# - assemble a script to see whether H-bond lengths systematically differ
|
||||||
# between alpha-helical and beta-sheet structures.
|
# between alpha-helical and beta-sheet structures.
|
||||||
@ -77,7 +78,7 @@ if (! requireNamespace("bio3d", quietly = TRUE)) {
|
|||||||
# data(package = "bio3d") # available datasets
|
# data(package = "bio3d") # available datasets
|
||||||
|
|
||||||
|
|
||||||
# bio3d can load molecules directly from the PDB servers, you don't _have_ to
|
# bio3d:: can load molecules directly from the PDB servers, you don't _have_ to
|
||||||
# store them locally, but you could.
|
# store them locally, but you could.
|
||||||
#
|
#
|
||||||
# But before you _load_ a file, have a look what such a file contains. I have
|
# But before you _load_ a file, have a look what such a file contains. I have
|
||||||
@ -87,8 +88,8 @@ file.show("./data/1BM8.pdb")
|
|||||||
# Have a look at the header section, the atom records, the coordinate records
|
# Have a look at the header section, the atom records, the coordinate records
|
||||||
# etc.
|
# etc.
|
||||||
#
|
#
|
||||||
# Task: Answer the following questions:
|
# Task:
|
||||||
#
|
# Answer the following questions:
|
||||||
# What is the resolution of the structure?
|
# What is the resolution of the structure?
|
||||||
# Is the first residue in the SEQRES section also the first residue
|
# Is the first residue in the SEQRES section also the first residue
|
||||||
# with an ATOM record?
|
# with an ATOM record?
|
||||||
@ -119,7 +120,7 @@ str(apses)
|
|||||||
i <- 5
|
i <- 5
|
||||||
apses$atom[i,]
|
apses$atom[i,]
|
||||||
apses$atom[i, c("x", "y", "z")] # here we are selecting with column names!
|
apses$atom[i, c("x", "y", "z")] # here we are selecting with column names!
|
||||||
apses$xyz[c(i*3-2, i*3-1, i*3)] # here we are selcting with row numbers
|
apses$xyz[c(i*3-2, i*3-1, i*3)] # here we are selecting with row numbers
|
||||||
|
|
||||||
# all atoms of a residue ...
|
# all atoms of a residue ...
|
||||||
i <- 48
|
i <- 48
|
||||||
@ -140,11 +141,12 @@ length(apses$seqres)
|
|||||||
length(apses$atom$resid[apses$calpha])
|
length(apses$atom$resid[apses$calpha])
|
||||||
|
|
||||||
# Are the sequences the same?
|
# Are the sequences the same?
|
||||||
sum(apses$seqres == apses$atom$resid[apses$calpha])
|
any(apses$seqres != apses$atom$resid[apses$calpha])
|
||||||
|
|
||||||
# get a list of all CA atoms of arginine residues
|
# get a list of all CA atoms of arginine residues
|
||||||
|
cols <- c("eleno", "elety", "resid", "chain", "resno", "insert")
|
||||||
sel <- apses$atom$resid == "ARG" & apses$atom$elety == "CA"
|
sel <- apses$atom$resid == "ARG" & apses$atom$elety == "CA"
|
||||||
apses$atom[sel, c("eleno", "elety", "resid", "chain", "resno", "insert")]
|
apses$atom[sel, cols]
|
||||||
|
|
||||||
# The introduction to bio3d tutorial at
|
# The introduction to bio3d tutorial at
|
||||||
# http://thegrantlab.org/bio3d/tutorials/structure-analysis
|
# http://thegrantlab.org/bio3d/tutorials/structure-analysis
|
||||||
@ -647,33 +649,34 @@ hist(dE, col="#00AA70")
|
|||||||
|
|
||||||
|
|
||||||
# For better comparison, plot both in the
|
# For better comparison, plot both in the
|
||||||
# same window:
|
# same window (use the "add = TRUE" parameter in hist() ):
|
||||||
|
|
||||||
hist(dH, col="#DD0055")
|
hist(dH, col="#DD0055")
|
||||||
hist(dE, col="#00AA70", add=TRUE)
|
hist(dE, col="#00AA70", add = TRUE)
|
||||||
|
|
||||||
# ... oops, we dind't reset the graphics parameters. You can either close the
|
# ... oops, we dind't reset the graphics parameters. You can either use
|
||||||
# window, a new window will open with default parameters, or ...
|
# the broom icon of the Plot tab to clear asll plots, or ...
|
||||||
par(opar) # ... reset the graphics parameters
|
par(opar) # ... reset the graphics parameters
|
||||||
|
|
||||||
hist(dH, col="#DD0055")
|
hist(dH, col = "#DD0055")
|
||||||
hist(dE, col="#00AA70", add=TRUE)
|
hist(dE, col = "#00AA70", add = TRUE)
|
||||||
|
|
||||||
# We see that the leftmost column of the sheet bonds hides the helix bonds in
|
# We see that the leftmost column of the sheet bonds hides the helix bonds in
|
||||||
# that column. Not good. But we can make the colours transparent! We just need to
|
# that column. Not good. But we can make the colours transparent! We just need to
|
||||||
# add a fourth set of two hexadecimal-numbers to the #RRGGBB triplet. Lets use
|
# add a fourth set of two hexadecimal-numbers to the #RRGGBB triplet. (This
|
||||||
# 2/3 transparent, in hexadecimal, 1/3 of 256 is x55 - i.e. an RGB triplet
|
# sets the transparency of the colour, it is called the "alpha channel"). Lets
|
||||||
|
# use 2/3 transparent, in hexadecimal, 1/3 of 256 is x55 - i.e. an RGB triplet
|
||||||
# specied as #RRGGBB55 is only 33% opaque:
|
# specied as #RRGGBB55 is only 33% opaque:
|
||||||
|
|
||||||
hist(dH, col="#DD005555")
|
hist(dH, col = "#DD005555")
|
||||||
hist(dE, col="#00AA7055", add=TRUE)
|
hist(dE, col = "#00AA7055", add = TRUE)
|
||||||
|
|
||||||
# To finalize the plots, let's do two more things: Explicitly define the breaks,
|
# To finalize the plots, let's do two more things: Explicitly define the breaks,
|
||||||
# to make sure they match up - otherwise they would not need to... like in this
|
# to make sure they match up - otherwise they would not need to... like in this
|
||||||
# example:
|
# example:
|
||||||
|
|
||||||
hist(dH, col="#DD005555")
|
hist(dH, col ="#DD005555")
|
||||||
hist(dE[dE < 3], col="#00AA7055", add=TRUE)
|
hist(dE[dE < 3], col = "#00AA7055", add = TRUE)
|
||||||
|
|
||||||
# Breaks are a parameter in hist() that can either be a scalar, to define how
|
# Breaks are a parameter in hist() that can either be a scalar, to define how
|
||||||
# many columns you want, or a vector, that defines the actual breakpoints.
|
# many columns you want, or a vector, that defines the actual breakpoints.
|
||||||
@ -687,9 +690,10 @@ hist(dE, col="#00AA7055", breaks=brk, add=TRUE)
|
|||||||
# impression of the relative frequency, but it is of course skewed by observing
|
# impression of the relative frequency, but it is of course skewed by observing
|
||||||
# relatively more or less of one type of secondary structure in a protein. As
|
# relatively more or less of one type of secondary structure in a protein. As
|
||||||
# part of the hist() function we can rescale the values so that the sum over all
|
# part of the hist() function we can rescale the values so that the sum over all
|
||||||
# is one: set the prameter freq=FALSE.
|
# is one: set the parameter freq=FALSE. And we explicitly set y-axis limits
|
||||||
|
# to accommodate bot distributions.
|
||||||
|
|
||||||
hist(dH, col="#DD005555", breaks=brk, freq=FALSE)
|
hist(dH, col="#DD005555", breaks=brk, freq=FALSE, ylim = c(0, 4))
|
||||||
hist(dE, col="#00AA7055", breaks=brk, freq=FALSE, add=TRUE)
|
hist(dE, col="#00AA7055", breaks=brk, freq=FALSE, add=TRUE)
|
||||||
|
|
||||||
# Adding labels and legend ...
|
# Adding labels and legend ...
|
||||||
@ -757,7 +761,7 @@ legend('topright',
|
|||||||
border = NA,
|
border = NA,
|
||||||
inset = 0.1)
|
inset = 0.1)
|
||||||
|
|
||||||
# It looks more and more that the distribution is indeed different. Our sample
|
# It's apparent that the distributions are indeed different. Our sample
|
||||||
# is large, but derives from a single protein. To do database scale statistics,
|
# is large, but derives from a single protein. To do database scale statistics,
|
||||||
# we should look at many more proteins. To give you a sense of how, let's do
|
# we should look at many more proteins. To give you a sense of how, let's do
|
||||||
# this for just ten proteins, randomly selected from non-homologous,
|
# this for just ten proteins, randomly selected from non-homologous,
|
||||||
|
Loading…
Reference in New Issue
Block a user