Added package information code after every library() call.
This commit is contained in:
parent
e57db9e9a0
commit
4479fa2d4d
@ -23,28 +23,31 @@
|
|||||||
# going on. That's not how it works ...
|
# going on. That's not how it works ...
|
||||||
#
|
#
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
|
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> Section Title Line
|
#TOC> Section Title Line
|
||||||
#TOC> ---------------------------------------------
|
#TOC> ---------------------------------------------
|
||||||
#TOC> 1 Packages 41
|
#TOC> 1 Preparations 41
|
||||||
#TOC> 2 Defining the APSES domain 50
|
#TOC> 2 Defining the APSES domain 54
|
||||||
#TOC> 3 Executing the BLAST search 72
|
#TOC> 3 Executing the BLAST search 76
|
||||||
#TOC> 4 Analysing results 94
|
#TOC> 4 Analysing results 98
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# = 1 Preparations ========================================================
|
||||||
# = 1 Packages ============================================================
|
|
||||||
|
|
||||||
if (!require(Biostrings, quietly=TRUE)) {
|
if (!require(Biostrings, quietly=TRUE)) {
|
||||||
source("https://bioconductor.org/biocLite.R")
|
source("https://bioconductor.org/biocLite.R")
|
||||||
biocLite("Biostrings")
|
biocLite("Biostrings")
|
||||||
library(Biostrings)
|
library(Biostrings)
|
||||||
}
|
}
|
||||||
|
# Package information:
|
||||||
|
# library(help = Biostrings) # basic information
|
||||||
|
# browseVignettes("Biostrings") # available vignettes
|
||||||
|
# data(package = "Biostrings") # available datasets
|
||||||
|
|
||||||
|
|
||||||
# = 2 Defining the APSES domain ===========================================
|
# = 2 Defining the APSES domain ===========================================
|
||||||
|
@ -10,27 +10,34 @@
|
|||||||
#
|
#
|
||||||
# Versions:
|
# Versions:
|
||||||
# 0.1 First code copied from 2016 material.
|
# 0.1 First code copied from 2016 material.
|
||||||
|
#
|
||||||
#
|
#
|
||||||
# TODO:
|
# TODO:
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# == DO NOT SIMPLY source() THIS FILE! =======================================
|
# == DO NOT SIMPLY source() THIS FILE! =======================================
|
||||||
|
#
|
||||||
# If there are portions you don't understand, use R's help system, Google for an
|
# If there are portions you don't understand, use R's help system, Google for an
|
||||||
# answer, or ask your instructor. Don't continue if you don't understand what's
|
# answer, or ask your instructor. Don't continue if you don't understand what's
|
||||||
# going on. That's not how it works ...
|
# going on. That's not how it works ...
|
||||||
|
#
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
# = 1 ___Section___
|
# = 1 ___Section___
|
||||||
|
|
||||||
# First, we install and load the Biostrings package.
|
# First, we install and load the Biostrings package.
|
||||||
if (!require(Biostrings, quietly=TRUE)) {
|
if (!require(Biostrings, quietly=TRUE)) {
|
||||||
source("https://bioconductor.org/biocLite.R")
|
if (! exists("biocLite")) {
|
||||||
|
source("https://bioconductor.org/biocLite.R")
|
||||||
|
}
|
||||||
biocLite("Biostrings")
|
biocLite("Biostrings")
|
||||||
library(Biostrings)
|
library(Biostrings)
|
||||||
}
|
}
|
||||||
|
# library(help = Biostrings) # basic information
|
||||||
|
# browseVignettes("Biostrings") # available vignettes
|
||||||
|
# data(package = "Biostrings") # available datasets
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Let's load BLOSUM62
|
# Let's load BLOSUM62
|
||||||
data(BLOSUM62)
|
data(BLOSUM62)
|
||||||
|
@ -22,22 +22,21 @@
|
|||||||
#
|
#
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
|
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> Section Title Line
|
#TOC> Section Title Line
|
||||||
#TOC> -------------------------------------------------------
|
#TOC> -------------------------------------------------------
|
||||||
#TOC> 1 Prepare 41
|
#TOC> 1 Prepare 45
|
||||||
#TOC> 2 Biostrings Pairwise Alignment 49
|
#TOC> 2 Biostrings Pairwise Alignment 53
|
||||||
#TOC> 2.1 Optimal global alignment 60
|
#TOC> 2.1 Optimal global alignment 70
|
||||||
#TOC> 2.2 Optimal local alignment 123
|
#TOC> 2.2 Optimal local alignment 133
|
||||||
#TOC> 3 APSES Domain annotation by alignment 147
|
#TOC> 3 APSES Domain annotation by alignment 157
|
||||||
#TOC> 4 Update your database script 228
|
#TOC> 4 Update your database script 238
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# = 1 Prepare =============================================================
|
# = 1 Prepare =============================================================
|
||||||
|
|
||||||
# You need to recreate the protein database that you have constructed in the
|
# You need to recreate the protein database that you have constructed in the
|
||||||
@ -49,13 +48,19 @@ source("makeProteinDB.R")
|
|||||||
# = 2 Biostrings Pairwise Alignment =======================================
|
# = 2 Biostrings Pairwise Alignment =======================================
|
||||||
|
|
||||||
if (!require(Biostrings, quietly=TRUE)) {
|
if (!require(Biostrings, quietly=TRUE)) {
|
||||||
source("https://bioconductor.org/biocLite.R")
|
if (! exists("biocLite")) {
|
||||||
|
source("https://bioconductor.org/biocLite.R")
|
||||||
|
}
|
||||||
biocLite("Biostrings")
|
biocLite("Biostrings")
|
||||||
library(Biostrings)
|
library(Biostrings)
|
||||||
}
|
}
|
||||||
|
# library(help = Biostrings) # basic information
|
||||||
|
# browseVignettes("Biostrings") # available vignettes
|
||||||
|
# data(package = "Biostrings") # available datasets
|
||||||
|
|
||||||
# Biostrings stores sequences in "XString" objects. Once we have onverted our
|
|
||||||
# traget sequences to AAString objects, the alignment itself is straightforward.
|
# Biostrings stores sequences in "XString" objects. Once we have converted our
|
||||||
|
# target sequences to AAString objects, the alignment itself is straightforward.
|
||||||
|
|
||||||
# == 2.1 Optimal global alignment ==========================================
|
# == 2.1 Optimal global alignment ==========================================
|
||||||
|
|
||||||
|
@ -23,18 +23,17 @@
|
|||||||
# going on. That's not how it works ...
|
# going on. That's not how it works ...
|
||||||
#
|
#
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
|
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> Section Title Line
|
#TOC> Section Title Line
|
||||||
#TOC> ----------------------------------------
|
#TOC> ----------------------------------------
|
||||||
#TOC> 1 Amino Acid Properties 40
|
#TOC> 1 Amino Acid Properties 43
|
||||||
#TOC> 2 Mutation Data matrix 150
|
#TOC> 2 Mutation Data matrix 163
|
||||||
#TOC> 3 Background score 188
|
#TOC> 3 Background score 205
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# = 1 Amino Acid Properties ===============================================
|
# = 1 Amino Acid Properties ===============================================
|
||||||
@ -46,6 +45,10 @@ if (!require(seqinr)) {
|
|||||||
install.packages("seqinr")
|
install.packages("seqinr")
|
||||||
library(seqinr)
|
library(seqinr)
|
||||||
}
|
}
|
||||||
|
# Package information:
|
||||||
|
# library(help = seqinr) # basic information
|
||||||
|
# browseVignettes("seqinr") # available vignettes
|
||||||
|
# data(package = "seqinr") # available datasets
|
||||||
|
|
||||||
# A true Labor of Love has gone into the compilation of the seqinr "aaindex"
|
# A true Labor of Love has gone into the compilation of the seqinr "aaindex"
|
||||||
# data:
|
# data:
|
||||||
@ -128,6 +131,12 @@ if (!require(ggtern)) {
|
|||||||
install.packages("ggtern")
|
install.packages("ggtern")
|
||||||
library(ggtern)
|
library(ggtern)
|
||||||
}
|
}
|
||||||
|
# Package information:
|
||||||
|
# library(help = ggtern) # basic information
|
||||||
|
# browseVignettes("ggtern") # available vignettes
|
||||||
|
# data(package = "ggtern") # available datasets
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# collect into data frame, normalize to (0.05, 0.95)
|
# collect into data frame, normalize to (0.05, 0.95)
|
||||||
myDat <- data.frame("phi" = 0.9*(((Y$I-min(Y$I))/(max(Y$I)-min(Y$I))))+0.05,
|
myDat <- data.frame("phi" = 0.9*(((Y$I-min(Y$I))/(max(Y$I)-min(Y$I))))+0.05,
|
||||||
@ -154,12 +163,16 @@ ggtern(data = myDat,
|
|||||||
# The Biostrings package contains the most common mutation data matrices.
|
# The Biostrings package contains the most common mutation data matrices.
|
||||||
|
|
||||||
if (!require(Biostrings, quietly=TRUE)) {
|
if (!require(Biostrings, quietly=TRUE)) {
|
||||||
source("https://bioconductor.org/biocLite.R")
|
if (! exists("biocLite")) {
|
||||||
|
source("https://bioconductor.org/biocLite.R")
|
||||||
|
}
|
||||||
biocLite("Biostrings")
|
biocLite("Biostrings")
|
||||||
library(Biostrings)
|
library(Biostrings)
|
||||||
}
|
}
|
||||||
|
# Package information:
|
||||||
data(package = "Biostrings")
|
# library(help=Biostrings) # basic information
|
||||||
|
# browseVignettes("Biostrings") # available vignettes
|
||||||
|
# data(package = "Biostrings") # available datasets
|
||||||
|
|
||||||
# Let's load the BLOSUM62 mutation data matrix from the package
|
# Let's load the BLOSUM62 mutation data matrix from the package
|
||||||
data(BLOSUM62)
|
data(BLOSUM62)
|
||||||
|
@ -24,18 +24,16 @@
|
|||||||
# going on. That's not how it works ...
|
# going on. That's not how it works ...
|
||||||
#
|
#
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
|
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> Section Title Line
|
#TOC> Section Title Line
|
||||||
#TOC> -------------------------------------------
|
#TOC> -------------------------------------------
|
||||||
#TOC> 1 Identifier mapping 41
|
#TOC> 1 Identifier mapping 45
|
||||||
#TOC> 2 Cross-referencing tables 142
|
#TOC> 2 Cross-referencing tables 151
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# = 1 Identifier mapping ==================================================
|
# = 1 Identifier mapping ==================================================
|
||||||
@ -59,6 +57,11 @@ if (!require(httr, quietly=TRUE)) {
|
|||||||
install.packages("httr")
|
install.packages("httr")
|
||||||
library(httr)
|
library(httr)
|
||||||
}
|
}
|
||||||
|
# Package information:
|
||||||
|
# library(help = httr) # basic information
|
||||||
|
# browseVignettes("httr") # available vignettes
|
||||||
|
# data(package = "httr") # available datasets
|
||||||
|
|
||||||
|
|
||||||
# We will walk through the process with the refSeqID
|
# We will walk through the process with the refSeqID
|
||||||
# of yeast Mbp1 and Swi4, and we will also enter a dummy ID to check what
|
# of yeast Mbp1 and Swi4, and we will also enter a dummy ID to check what
|
||||||
@ -68,7 +71,7 @@ myQueryIDs <- "NP_010227 NP_00000 NP_011036"
|
|||||||
|
|
||||||
# The UniProt ID mapping service API is very straightforward to use: just define
|
# The UniProt ID mapping service API is very straightforward to use: just define
|
||||||
# the URL of the server and send a list of items labelled as "query" in the body
|
# the URL of the server and send a list of items labelled as "query" in the body
|
||||||
# of the request.
|
# of the request. GET() and POST() are functions from httr.
|
||||||
|
|
||||||
URL <- "http://www.uniprot.org/mapping/"
|
URL <- "http://www.uniprot.org/mapping/"
|
||||||
response <- POST(URL,
|
response <- POST(URL,
|
||||||
|
@ -39,9 +39,15 @@ if (!require(Rphylip, quietly=TRUE)) {
|
|||||||
install.packages("Rphylip")
|
install.packages("Rphylip")
|
||||||
library(Rphylip)
|
library(Rphylip)
|
||||||
}
|
}
|
||||||
|
# Package information:
|
||||||
|
# library(help = Rphylip) # basic information
|
||||||
|
# browseVignettes("Rphylip") # available vignettes
|
||||||
|
# data(package = "Rphylip") # available datasets
|
||||||
|
|
||||||
# This will install RPhylip, as well as its dependency, the package "ape".
|
# This will install RPhylip, as well as its dependency, the package "ape".
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# The next part may be tricky. You will need to figure out where
|
# The next part may be tricky. You will need to figure out where
|
||||||
# on your computer Phylip has been installed and define the path
|
# on your computer Phylip has been installed and define the path
|
||||||
# to the proml program that calculates a maximum-likelihood tree.
|
# to the proml program that calculates a maximum-likelihood tree.
|
||||||
|
@ -154,11 +154,17 @@ ENSPsel
|
|||||||
# day), simply a few lines of sample code to get you started on the specific use
|
# day), simply a few lines of sample code to get you started on the specific use
|
||||||
# case of retrieving descriptions for ensembl protein IDs.
|
# case of retrieving descriptions for ensembl protein IDs.
|
||||||
|
|
||||||
if (!require(biomaRt)) {
|
if (!require(biomaRt, quietly=TRUE)) {
|
||||||
source("http://bioconductor.org/biocLite.R")
|
if (! exists("biocLite")) {
|
||||||
|
source("https://bioconductor.org/biocLite.R")
|
||||||
|
}
|
||||||
biocLite("biomaRt")
|
biocLite("biomaRt")
|
||||||
library("biomaRt")
|
library(biomaRt)
|
||||||
}
|
}
|
||||||
|
# Package information:
|
||||||
|
# library(help = biomaRt) # basic information
|
||||||
|
# browseVignettes("biomaRt") # available vignettes
|
||||||
|
# data(package = "biomaRt") # available datasets
|
||||||
|
|
||||||
# define which dataset to use ...
|
# define which dataset to use ...
|
||||||
myMart <- useMart("ensembl", dataset="hsapiens_gene_ensembl")
|
myMart <- useMart("ensembl", dataset="hsapiens_gene_ensembl")
|
||||||
|
@ -32,8 +32,11 @@ if (!require(seqinr, quietly=TRUE)) {
|
|||||||
install.packages("seqinr")
|
install.packages("seqinr")
|
||||||
library(seqinr)
|
library(seqinr)
|
||||||
}
|
}
|
||||||
|
# Package information:
|
||||||
|
# library(help = seqinr) # basic information
|
||||||
|
# browseVignettes("seqinr") # available vignettes
|
||||||
|
# data(package = "seqinr") # available datasets
|
||||||
|
|
||||||
help(package = seqinr) # shows the available functions
|
|
||||||
|
|
||||||
# Let's try a simple function
|
# Let's try a simple function
|
||||||
?computePI
|
?computePI
|
||||||
|
@ -23,26 +23,29 @@
|
|||||||
#
|
#
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
|
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> Section Title Line
|
#TOC> Section Title Line
|
||||||
#TOC> ----------------------------------------------
|
#TOC> ----------------------------------------------
|
||||||
#TOC> 1 Prepare 52
|
#TOC> 1 Prepare 56
|
||||||
#TOC> 2 Storing Sequence 66
|
#TOC> 2 Storing Sequence 74
|
||||||
#TOC> 3 String properties 95
|
#TOC> 3 String properties 103
|
||||||
#TOC> 4 Substrings 102
|
#TOC> 4 Substrings 110
|
||||||
#TOC> 5 Creating strings: sprintf() 108
|
#TOC> 5 Creating strings: sprintf() 116
|
||||||
#TOC> 6 Changing strings 139
|
#TOC> 6 Changing strings 147
|
||||||
#TOC> 6.1 stringi and stringr 191
|
#TOC> 6.1 stringi and stringr 199
|
||||||
#TOC> 6.2 dbSanitizeSequence() 201
|
#TOC> 6.2 dbSanitizeSequence() 209
|
||||||
#TOC> 7 Permuting and sampling 213
|
#TOC> 7 Permuting and sampling 221
|
||||||
#TOC> 7.1 Permutations 220
|
#TOC> 7.1 Permutations 228
|
||||||
#TOC> 7.2 Sampling 263
|
#TOC> 7.2 Sampling 271
|
||||||
#TOC> 7.2.1 Equiprobable characters 265
|
#TOC> 7.2.1 Equiprobable characters 273
|
||||||
#TOC> 7.2.2 Defined probability vector 300
|
#TOC> 7.2.2 Defined probability vector 313
|
||||||
#TOC> 8 Tasks 328
|
#TOC> 8 Tasks 341
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
@ -54,13 +57,17 @@
|
|||||||
# Much basic sequence handling is supported by the Bioconductor package
|
# Much basic sequence handling is supported by the Bioconductor package
|
||||||
# Biostrings.
|
# Biostrings.
|
||||||
|
|
||||||
if (! require(Biostrings)) {
|
if (! require(Biostrings, quietly=TRUE)) {
|
||||||
if (! exists("biocLite")) {
|
if (! exists("biocLite")) {
|
||||||
source("https://bioconductor.org/biocLite.R")
|
source("https://bioconductor.org/biocLite.R")
|
||||||
}
|
}
|
||||||
biocLite("Biostrings")
|
biocLite("Biostrings")
|
||||||
library(Biostrings)
|
library(Biostrings)
|
||||||
}
|
}
|
||||||
|
# Package information:
|
||||||
|
# library(help = Biostrings) # basic information
|
||||||
|
# browseVignettes("Biostrings") # available vignettes
|
||||||
|
# data(package = "Biostrings") # available datasets
|
||||||
|
|
||||||
|
|
||||||
# = 2 Storing Sequence ====================================================
|
# = 2 Storing Sequence ====================================================
|
||||||
@ -262,7 +269,7 @@ sum(d <= 2.5) # 276. 276 of our 10000 samples are just as bunched near the
|
|||||||
|
|
||||||
# == 7.2 Sampling ==========================================================
|
# == 7.2 Sampling ==========================================================
|
||||||
|
|
||||||
# === 7.2.1 Equiprobable characters
|
# === 7.2.1 Equiprobable characters
|
||||||
|
|
||||||
# Assume you need a large random-nucleotide string for some statistical model.
|
# Assume you need a large random-nucleotide string for some statistical model.
|
||||||
# How to create such a string? sample() can easily create it:
|
# How to create such a string? sample() can easily create it:
|
||||||
@ -280,10 +287,15 @@ sum(table(v)[c("G", "C")]) # 51 is close to expected
|
|||||||
# What's the number of CpG motifs? Easy to check with the stringi
|
# What's the number of CpG motifs? Easy to check with the stringi
|
||||||
# stri_match_all() function
|
# stri_match_all() function
|
||||||
|
|
||||||
if (! require(stringi)) {
|
if (! require(stringi, quietly=TRUE)) {
|
||||||
install.packages("stringi")
|
install.packages("stringi")
|
||||||
library(stringi)
|
library(stringi)
|
||||||
}
|
}
|
||||||
|
# Package information:
|
||||||
|
# library(help = stringi) # basic information
|
||||||
|
# browseVignettes("stringi") # available vignettes
|
||||||
|
# data(package = "stringi") # available datasets
|
||||||
|
|
||||||
|
|
||||||
(x <- stri_match_all(mySeq, regex = "CG"))
|
(x <- stri_match_all(mySeq, regex = "CG"))
|
||||||
length(unlist(x))
|
length(unlist(x))
|
||||||
@ -297,7 +309,7 @@ length(unlist(x))
|
|||||||
# of the smaller number of Cs and Gs - before biology even comes into play. How
|
# of the smaller number of Cs and Gs - before biology even comes into play. How
|
||||||
# do we account for that?
|
# do we account for that?
|
||||||
|
|
||||||
# === 7.2.2 Defined probability vector
|
# === 7.2.2 Defined probability vector
|
||||||
|
|
||||||
# This is where we need to know how to create samples with specific probability
|
# This is where we need to know how to create samples with specific probability
|
||||||
# distributions. A crude hack would be to create a sampling source vector with
|
# distributions. A crude hack would be to create a sampling source vector with
|
||||||
|
@ -24,36 +24,35 @@
|
|||||||
# going on. That's not how it works ...
|
# going on. That's not how it works ...
|
||||||
#
|
#
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
#TOC> ==========================================================================
|
|
||||||
#TOC>
|
|
||||||
#TOC> Section Title Line
|
|
||||||
#TOC> ------------------------------------------------------------
|
|
||||||
#TOC> 1 A Relational Datamodel in R: review 58
|
|
||||||
#TOC> 1.1 Building a sample database structure 98
|
|
||||||
#TOC> 1.1.1 completing the database 209
|
|
||||||
#TOC> 1.2 Querying the database 244
|
|
||||||
#TOC> 1.3 Task: submit for credit (part 1/2) 273
|
|
||||||
#TOC> 2 Implementing the protein datamodel 285
|
|
||||||
#TOC> 2.1 JSON formatted source data 311
|
|
||||||
#TOC> 2.2 "Sanitizing" sequence data 346
|
|
||||||
#TOC> 2.3 Create a protein table for our data model 366
|
|
||||||
#TOC> 2.3.1 Initialize the database 368
|
|
||||||
#TOC> 2.3.2 Add data 380
|
|
||||||
#TOC> 2.4 Complete the database 400
|
|
||||||
#TOC> 2.4.1 Examples of navigating the database 427
|
|
||||||
#TOC> 2.5 Updating the database 459
|
|
||||||
#TOC> 3 Add your own data 471
|
|
||||||
#TOC> 3.1 Find a protein 479
|
|
||||||
#TOC> 3.2 Put the information into JSON files 508
|
|
||||||
#TOC> 3.3 Create an R script to create the database 531
|
|
||||||
#TOC> 3.3.1 Check and validate 551
|
|
||||||
#TOC> 3.4 Task: submit for credit (part 2/2) 592
|
|
||||||
#TOC>
|
|
||||||
#TOC> ==========================================================================
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#TOC> ==========================================================================
|
||||||
|
#TOC>
|
||||||
|
#TOC> Section Title Line
|
||||||
|
#TOC> -----------------------------------------------------------------
|
||||||
|
#TOC> 1 A Relational Datamodel in R: review 62
|
||||||
|
#TOC> 1.1 Building a sample database structure 102
|
||||||
|
#TOC> 1.1.1 completing the database 213
|
||||||
|
#TOC> 1.2 Querying the database 248
|
||||||
|
#TOC> 1.3 Task: submit for credit (part 1/2) 277
|
||||||
|
#TOC> 2 Implementing the protein datamodel 289
|
||||||
|
#TOC> 2.1 JSON formatted source data 315
|
||||||
|
#TOC> 2.2 "Sanitizing" sequence data 355
|
||||||
|
#TOC> 2.3 Create a protein table for our data model 375
|
||||||
|
#TOC> 2.3.1 Initialize the database 377
|
||||||
|
#TOC> 2.3.2 Add data 389
|
||||||
|
#TOC> 2.4 Complete the database 409
|
||||||
|
#TOC> 2.4.1 Examples of navigating the database 436
|
||||||
|
#TOC> 2.5 Updating the database 468
|
||||||
|
#TOC> 3 Add your own data 480
|
||||||
|
#TOC> 3.1 Find a protein 488
|
||||||
|
#TOC> 3.2 Put the information into JSON files 517
|
||||||
|
#TOC> 3.3 Create an R script to create your own database 540
|
||||||
|
#TOC> 3.3.1 Check and validate 560
|
||||||
|
#TOC> 3.4 Task: submit for credit (part 2/2) 601
|
||||||
|
#TOC>
|
||||||
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
|
|
||||||
# = 1 A Relational Datamodel in R: review =================================
|
# = 1 A Relational Datamodel in R: review =================================
|
||||||
|
|
||||||
@ -206,7 +205,7 @@ str(philDB)
|
|||||||
# go back, re-read, play with it, and ask for help. This is essential.
|
# go back, re-read, play with it, and ask for help. This is essential.
|
||||||
|
|
||||||
|
|
||||||
# === 1.1.1 completing the database
|
# === 1.1.1 completing the database
|
||||||
|
|
||||||
|
|
||||||
# Next I'll add one more person, and create the other two tables:
|
# Next I'll add one more person, and create the other two tables:
|
||||||
@ -331,10 +330,15 @@ file.edit("./data/MBP1_SACCE.json")
|
|||||||
|
|
||||||
# Let's load the "jsonlite" package and have a look at how it reads this data.
|
# Let's load the "jsonlite" package and have a look at how it reads this data.
|
||||||
|
|
||||||
if (! require("jsonlite", quietly = TRUE)) {
|
if (! require(jsonlite, quietly=TRUE)) {
|
||||||
install.packages("jsonlite")
|
install.packages("jsonlite")
|
||||||
library(jsonlite)
|
library(jsonlite)
|
||||||
}
|
}
|
||||||
|
# Package information:
|
||||||
|
# library(help = jsonlite) # basic information
|
||||||
|
# browseVignettes("jsonlite") # available vignettes
|
||||||
|
# data(package = "jsonlite") # available datasets
|
||||||
|
|
||||||
|
|
||||||
x <- fromJSON("./data/MBP1_SACCE.json")
|
x <- fromJSON("./data/MBP1_SACCE.json")
|
||||||
str(x)
|
str(x)
|
||||||
@ -365,7 +369,7 @@ dbSanitizeSequence(x)
|
|||||||
|
|
||||||
# == 2.3 Create a protein table for our data model =========================
|
# == 2.3 Create a protein table for our data model =========================
|
||||||
|
|
||||||
# === 2.3.1 Initialize the database
|
# === 2.3.1 Initialize the database
|
||||||
|
|
||||||
|
|
||||||
# The function dbInit contains all the code to return a list of empty
|
# The function dbInit contains all the code to return a list of empty
|
||||||
@ -377,7 +381,7 @@ myDB <- dbInit()
|
|||||||
str(myDB)
|
str(myDB)
|
||||||
|
|
||||||
|
|
||||||
# === 2.3.2 Add data
|
# === 2.3.2 Add data
|
||||||
|
|
||||||
|
|
||||||
# fromJSON() returns a dataframe that we can readily process to add data
|
# fromJSON() returns a dataframe that we can readily process to add data
|
||||||
@ -424,7 +428,7 @@ source("./scripts/ABC-createRefDB.R")
|
|||||||
str(myDB)
|
str(myDB)
|
||||||
|
|
||||||
|
|
||||||
# === 2.4.1 Examples of navigating the database
|
# === 2.4.1 Examples of navigating the database
|
||||||
|
|
||||||
|
|
||||||
# You can look at the contents of the tables in the usual way we access
|
# You can look at the contents of the tables in the usual way we access
|
||||||
@ -528,10 +532,10 @@ myDB$taxonomy$species[sel]
|
|||||||
# - Validate your two files online at https://jsonlint.com/
|
# - Validate your two files online at https://jsonlint.com/
|
||||||
|
|
||||||
|
|
||||||
# == 3.3 Create an R script to create the database =========================
|
# == 3.3 Create an R script to create your own database ====================
|
||||||
|
|
||||||
|
|
||||||
# Next: to create the database.
|
# Next: to create your own database.
|
||||||
# - Make a new R script, call it "makeProteinDB.R"
|
# - Make a new R script, call it "makeProteinDB.R"
|
||||||
# - enter the following expression as the first command:
|
# - enter the following expression as the first command:
|
||||||
# source("./scripts/ABC-createRefDB.R")
|
# source("./scripts/ABC-createRefDB.R")
|
||||||
@ -548,7 +552,7 @@ myDB$taxonomy$species[sel]
|
|||||||
# in any of the JSON files. Later you will add more information ...
|
# in any of the JSON files. Later you will add more information ...
|
||||||
|
|
||||||
|
|
||||||
# === 3.3.1 Check and validate
|
# === 3.3.1 Check and validate
|
||||||
|
|
||||||
|
|
||||||
# Is your protein named according to the pattern "MBP1_MYSPE"? It should be.
|
# Is your protein named according to the pattern "MBP1_MYSPE"? It should be.
|
||||||
|
@ -22,22 +22,21 @@
|
|||||||
# going on. That's not how it works ...
|
# going on. That's not how it works ...
|
||||||
#
|
#
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
|
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> Section Title Line
|
#TOC> Section Title Line
|
||||||
#TOC> ----------------------------------------------------------
|
#TOC> ----------------------------------------------------------
|
||||||
#TOC> 1 Storing the genetic code 43
|
#TOC> 1 Storing the genetic code 47
|
||||||
#TOC> 1.1 Genetic code in Biostrings 61
|
#TOC> 1.1 Genetic code in Biostrings 65
|
||||||
#TOC> 2 Working with the genetic code 88
|
#TOC> 2 Working with the genetic code 97
|
||||||
#TOC> 2.1 Translate a sequence. 117
|
#TOC> 2.1 Translate a sequence. 126
|
||||||
#TOC> 3 An alternative representation: 3D array 199
|
#TOC> 3 An alternative representation: 3D array 208
|
||||||
#TOC> 3.1 Print a Genetic code table 232
|
#TOC> 3.1 Print a Genetic code table 241
|
||||||
#TOC> 4 Tasks 258
|
#TOC> 4 Tasks 267
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# = 1 Storing the genetic code ============================================
|
# = 1 Storing the genetic code ============================================
|
||||||
@ -64,13 +63,18 @@ x["TAA"]
|
|||||||
# available in the Bioconductor "Biostrings" package:
|
# available in the Bioconductor "Biostrings" package:
|
||||||
|
|
||||||
|
|
||||||
if (! require(Biostrings)) {
|
if (! require(Biostrings, quietly=TRUE)) {
|
||||||
if (! exists("biocLite")) {
|
if (! exists("biocLite")) {
|
||||||
source("https://bioconductor.org/biocLite.R")
|
source("https://bioconductor.org/biocLite.R")
|
||||||
}
|
}
|
||||||
biocLite("Biostrings")
|
biocLite("Biostrings")
|
||||||
library(Biostrings)
|
library(Biostrings)
|
||||||
}
|
}
|
||||||
|
# Package information:
|
||||||
|
# library(help = Biostrings) # basic information
|
||||||
|
# browseVignettes("Biostrings") # available vignettes
|
||||||
|
# data(package = "Biostrings") # available datasets
|
||||||
|
|
||||||
|
|
||||||
# The standard genetic code vector
|
# The standard genetic code vector
|
||||||
GENETIC_CODE
|
GENETIC_CODE
|
||||||
|
@ -23,26 +23,25 @@
|
|||||||
# going on. That's not how it works ...
|
# going on. That's not how it works ...
|
||||||
#
|
#
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
|
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> Section Title Line
|
#TOC> Section Title Line
|
||||||
#TOC> ------------------------------------------------------
|
#TOC> ------------------------------------------------------
|
||||||
#TOC> 1 Review 48
|
#TOC> 1 Review 52
|
||||||
#TOC> 2 DEGREE DISTRIBUTIONS 192
|
#TOC> 2 DEGREE DISTRIBUTIONS 201
|
||||||
#TOC> 2.1 Random graph 198
|
#TOC> 2.1 Random graph 207
|
||||||
#TOC> 2.2 scale-free graph (Barabasi-Albert) 242
|
#TOC> 2.2 scale-free graph (Barabasi-Albert) 251
|
||||||
#TOC> 2.3 Random geometric graph 304
|
#TOC> 2.3 Random geometric graph 313
|
||||||
#TOC> 3 A CLOSER LOOK AT THE igraph PACKAGE 424
|
#TOC> 3 A CLOSER LOOK AT THE igraph PACKAGE 433
|
||||||
#TOC> 3.1 Basics 427
|
#TOC> 3.1 Basics 436
|
||||||
#TOC> 3.2 Components 499
|
#TOC> 3.2 Components 508
|
||||||
#TOC> 4 RANDOM GRAPHS AND GRAPH METRICS 518
|
#TOC> 4 RANDOM GRAPHS AND GRAPH METRICS 527
|
||||||
#TOC> 4.1 Diameter 553
|
#TOC> 4.1 Diameter 562
|
||||||
#TOC> 5 GRAPH CLUSTERING 621
|
#TOC> 5 GRAPH CLUSTERING 630
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# = 1 Review ==============================================================
|
# = 1 Review ==============================================================
|
||||||
@ -121,10 +120,15 @@ set.seed(112358)
|
|||||||
# standard package for work with graphs in r is "igraph". We'll go into more
|
# standard package for work with graphs in r is "igraph". We'll go into more
|
||||||
# details of the igraph package a bit later, for now we just use it to plot:
|
# details of the igraph package a bit later, for now we just use it to plot:
|
||||||
|
|
||||||
if (!require(igraph)) {
|
if (! require(igraph, quietly=TRUE)) {
|
||||||
install.packages("igraph")
|
install.packages("igraph")
|
||||||
library(igraph)
|
library(igraph)
|
||||||
}
|
}
|
||||||
|
# Package information:
|
||||||
|
# library(help = igraph) # basic information
|
||||||
|
# browseVignettes("igraph") # available vignettes
|
||||||
|
# data(package = "igraph") # available datasets
|
||||||
|
|
||||||
|
|
||||||
myG <- graph_from_adjacency_matrix(myRandAM, mode = "undirected")
|
myG <- graph_from_adjacency_matrix(myRandAM, mode = "undirected")
|
||||||
set.seed(112358)
|
set.seed(112358)
|
||||||
|
@ -22,31 +22,29 @@
|
|||||||
#
|
#
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
|
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> Section Title Line
|
#TOC> Section Title Line
|
||||||
#TOC> -----------------------------------------------------------------------
|
#TOC> -----------------------------------------------------------------------
|
||||||
#TOC> 1 Introduction 50
|
#TOC> 1 Introduction 54
|
||||||
#TOC> 2 Three fundamental distributions 113
|
#TOC> 2 Three fundamental distributions 117
|
||||||
#TOC> 2.1 The Poisson Distribution 116
|
#TOC> 2.1 The Poisson Distribution 120
|
||||||
#TOC> 2.2 The uniform distribution 169
|
#TOC> 2.2 The uniform distribution 173
|
||||||
#TOC> 2.3 The Normal Distribution 189
|
#TOC> 2.3 The Normal Distribution 193
|
||||||
#TOC> 3 quantile-quantile comparison 230
|
#TOC> 3 quantile-quantile comparison 234
|
||||||
#TOC> 3.1 qqnorm() 240
|
#TOC> 3.1 qqnorm() 244
|
||||||
#TOC> 3.2 qqplot() 300
|
#TOC> 3.2 qqplot() 304
|
||||||
#TOC> 4 Quantifying the difference 317
|
#TOC> 4 Quantifying the difference 321
|
||||||
#TOC> 4.1 Chi2 test for discrete distributions 351
|
#TOC> 4.1 Chi2 test for discrete distributions 355
|
||||||
#TOC> 4.2 Kullback-Leibler divergence 435
|
#TOC> 4.2 Kullback-Leibler divergence 446
|
||||||
#TOC> 4.2.1 An example from tossing dice 446
|
#TOC> 4.2.1 An example from tossing dice 457
|
||||||
#TOC> 4.2.2 An example from lognormal distributions 568
|
#TOC> 4.2.2 An example from lognormal distributions 579
|
||||||
#TOC> 4.3 Kolmogorov-Smirnov test for continuous distributions 609
|
#TOC> 4.3 Kolmogorov-Smirnov test for continuous distributions 620
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# = 1 Introduction ========================================================
|
# = 1 Introduction ========================================================
|
||||||
|
|
||||||
# The space of possible outcomes of events is called a probability distribution
|
# The space of possible outcomes of events is called a probability distribution
|
||||||
@ -372,12 +370,19 @@ myBreaks <- c(myBreaks, maxX) # ... and one that contains the outliers
|
|||||||
hist(rG1.5, breaks = myBreaks, col = myCols[4])
|
hist(rG1.5, breaks = myBreaks, col = myCols[4])
|
||||||
|
|
||||||
# ... but basic R has no inbuilt function to stack histogram bars side-by-side.
|
# ... but basic R has no inbuilt function to stack histogram bars side-by-side.
|
||||||
# We use the multhist() function in the plotrix package:
|
# We use the multhist() function in the plotrix package: check out the
|
||||||
|
# package information - plotrix has _many_ useful utilities to enhance
|
||||||
|
# plots or produce informative visualizations.
|
||||||
|
|
||||||
if (!require(plotrix)) {
|
if (! require(plotrix, quietly=TRUE)) {
|
||||||
install.packages("plotrix")
|
install.packages("plotrix")
|
||||||
library(plotrix)
|
library(plotrix)
|
||||||
}
|
}
|
||||||
|
# Package information:
|
||||||
|
# library(help = plotrix) # basic information
|
||||||
|
# browseVignettes("plotrix") # available vignettes
|
||||||
|
# data(package = "plotrix") # available datasets
|
||||||
|
|
||||||
|
|
||||||
h <- multhist(list(rL1, rL2, rG1.2, rG1.5, rG1.9 ),
|
h <- multhist(list(rL1, rL2, rG1.2, rG1.5, rG1.9 ),
|
||||||
breaks = myBreaks,
|
breaks = myBreaks,
|
||||||
@ -436,14 +441,14 @@ chisq.test(countsL1, countsG1.9, simulate.p.value = TRUE, B = 10000)
|
|||||||
|
|
||||||
# For discrete probability distributions, there is a much better statistic, the
|
# For discrete probability distributions, there is a much better statistic, the
|
||||||
# Kullback-Leibler divergence (or relative entropy). It is based in information
|
# Kullback-Leibler divergence (or relative entropy). It is based in information
|
||||||
# theory, and evaluates how different each matching pair of outcomem categories
|
# theory, and evaluates how different the matched pairs of outcome categories
|
||||||
# are. Its inputs are the probability mass functions (p.m.f.) of the two
|
# are. Its inputs are the probability mass functions (p.m.f.) of the two
|
||||||
# functions to be compared. A probability mass function is the probability of
|
# functions to be compared. A probability mass function is the probability of
|
||||||
# every outcome the process can have. Kullback-Leibler divergence therefore can
|
# every outcome the process can have. Kullback-Leibler divergence therefore can
|
||||||
# be applied to discrete distributions. But we need to talk a bit about
|
# be applied to discrete distributions. But we need to talk a bit about
|
||||||
# converting counts to p.m.f.'s.
|
# converting counts to p.m.f.'s.
|
||||||
|
|
||||||
# === 4.2.1 An example from tossing dice
|
# === 4.2.1 An example from tossing dice
|
||||||
|
|
||||||
# The p.m.f of an honest die is (1:1/6, 2:1/6, 3:1/6, 4:1/6, 5:1/6, 6:1/6). But
|
# The p.m.f of an honest die is (1:1/6, 2:1/6, 3:1/6, 4:1/6, 5:1/6, 6:1/6). But
|
||||||
# there is an issue when we convert sampled counts to frequencies, and estimate
|
# there is an issue when we convert sampled counts to frequencies, and estimate
|
||||||
@ -565,7 +570,7 @@ abline(v = KLdiv(rep(1/6, 6), pmfPC(counts, 1:6)), col="firebrick")
|
|||||||
# somewhat but not drastically atypical.
|
# somewhat but not drastically atypical.
|
||||||
|
|
||||||
|
|
||||||
# === 4.2.2 An example from lognormal distributions
|
# === 4.2.2 An example from lognormal distributions
|
||||||
|
|
||||||
# We had compared a set of lognormal and gamma distributions above, now we
|
# We had compared a set of lognormal and gamma distributions above, now we
|
||||||
# can use KL-divergence to quantify their similarity:
|
# can use KL-divergence to quantify their similarity:
|
||||||
|
@ -23,27 +23,26 @@
|
|||||||
# going on. That's not how it works ...
|
# going on. That's not how it works ...
|
||||||
#
|
#
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
|
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> Section Title Line
|
#TOC> Section Title Line
|
||||||
#TOC> ---------------------------------------------------------
|
#TOC> ---------------------------------------------------------
|
||||||
#TOC> 1 The Biostrings package 53
|
#TOC> 1 The Biostrings package 57
|
||||||
#TOC> 2 Getting Data into Biostrings Objects 82
|
#TOC> 2 Getting Data into Biostrings Objects 91
|
||||||
#TOC> 3 Working with Biostrings Objects 102
|
#TOC> 3 Working with Biostrings Objects 111
|
||||||
#TOC> 3.1 Properties 105
|
#TOC> 3.1 Properties 114
|
||||||
#TOC> 3.2 Subsetting 142
|
#TOC> 3.2 Subsetting 151
|
||||||
#TOC> 3.3 Operators 154
|
#TOC> 3.3 Operators 163
|
||||||
#TOC> 3.4 Transformations 161
|
#TOC> 3.4 Transformations 170
|
||||||
#TOC> 4 Getting Data out of Biostrings Objects 168
|
#TOC> 4 Getting Data out of Biostrings Objects 177
|
||||||
#TOC> 5 More 177
|
#TOC> 5 More 186
|
||||||
#TOC> 5.1 Views 179
|
#TOC> 5.1 Views 188
|
||||||
#TOC> 5.2 Iranges 191
|
#TOC> 5.2 Iranges 200
|
||||||
#TOC> 5.3 StringSets 197
|
#TOC> 5.3 StringSets 206
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# This is a very brief introduction to the biostrings package, other units will
|
# This is a very brief introduction to the biostrings package, other units will
|
||||||
@ -55,15 +54,20 @@
|
|||||||
|
|
||||||
# First, we install and load the Biostrings package from bioconductor
|
# First, we install and load the Biostrings package from bioconductor
|
||||||
|
|
||||||
if (!require(Biostrings, quietly=TRUE)) {
|
if (! require(Biostrings, quietly=TRUE)) {
|
||||||
source("https://bioconductor.org/biocLite.R")
|
if (! exists("biocLite")) {
|
||||||
|
source("https://bioconductor.org/biocLite.R")
|
||||||
|
}
|
||||||
biocLite("Biostrings")
|
biocLite("Biostrings")
|
||||||
library(Biostrings)
|
library(Biostrings)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Examine the ackage information:
|
||||||
|
library(help = Biostrings) # basic information
|
||||||
|
browseVignettes("Biostrings") # available vignettes
|
||||||
|
data(package = "Biostrings") # available datasets
|
||||||
|
|
||||||
|
|
||||||
# This is a large collection of tools ...
|
|
||||||
help(package = "Biostrings")
|
|
||||||
|
|
||||||
# At its core, Biostrings objects are "classes" of type XString (you can think
|
# At its core, Biostrings objects are "classes" of type XString (you can think
|
||||||
# of a "class" in R as a special kind of list), that can take on particular
|
# of a "class" in R as a special kind of list), that can take on particular
|
||||||
|
@ -22,25 +22,24 @@
|
|||||||
# going on. That's not how it works ...
|
# going on. That's not how it works ...
|
||||||
#
|
#
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
|
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> Section Title Line
|
#TOC> Section Title Line
|
||||||
#TOC> --------------------------------------------------------
|
#TOC> --------------------------------------------------------
|
||||||
#TOC> 1 Designing a computational experiment 53
|
#TOC> 1 Designing a computational experiment 57
|
||||||
#TOC> 2 Setting up the tools 69
|
#TOC> 2 Setting up the tools 73
|
||||||
#TOC> 2.1 Natural and alternative genetic codes 72
|
#TOC> 2.1 Natural and alternative genetic codes 76
|
||||||
#TOC> 2.2 Effect of mutations 126
|
#TOC> 2.2 Effect of mutations 135
|
||||||
#TOC> 2.2.1 reverse-translate 137
|
#TOC> 2.2.1 reverse-translate 146
|
||||||
#TOC> 2.2.2 Randomly mutate 162
|
#TOC> 2.2.2 Randomly mutate 171
|
||||||
#TOC> 2.2.3 Forward- translate 187
|
#TOC> 2.2.3 Forward- translate 196
|
||||||
#TOC> 2.2.4 measure effect 205
|
#TOC> 2.2.4 measure effect 214
|
||||||
#TOC> 3 Run the experiment 252
|
#TOC> 3 Run the experiment 261
|
||||||
#TOC> 4 Task solutions 339
|
#TOC> 4 Task solutions 348
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# This unit demonstrates R code to simulate alternate genetic codes and evaluate
|
# This unit demonstrates R code to simulate alternate genetic codes and evaluate
|
||||||
@ -71,14 +70,19 @@
|
|||||||
|
|
||||||
# == 2.1 Natural and alternative genetic codes =============================
|
# == 2.1 Natural and alternative genetic codes =============================
|
||||||
|
|
||||||
# Load the code from the Biostrings package
|
# Load genetic code tables from the Biostrings package
|
||||||
if (! require(Biostrings)) {
|
if (! require(Biostrings, quietly=TRUE)) {
|
||||||
if (! exists("biocLite")) {
|
if (! exists("biocLite")) {
|
||||||
source("https://bioconductor.org/biocLite.R")
|
source("https://bioconductor.org/biocLite.R")
|
||||||
}
|
}
|
||||||
biocLite("Biostrings")
|
biocLite("Biostrings")
|
||||||
library(Biostrings)
|
library(Biostrings)
|
||||||
}
|
}
|
||||||
|
# Package information:
|
||||||
|
# library(help = Biostrings) # basic information
|
||||||
|
# browseVignettes("Biostrings") # available vignettes
|
||||||
|
# data(package = "Biostrings") # available datasets
|
||||||
|
|
||||||
|
|
||||||
# There are many ways to generate alternative codes. The simplest way is to
|
# There are many ways to generate alternative codes. The simplest way is to
|
||||||
# randomly assign amino acids to codons. A more sophisticated way is to keep the
|
# randomly assign amino acids to codons. A more sophisticated way is to keep the
|
||||||
|
@ -23,27 +23,33 @@
|
|||||||
# going on. That's not how it works ...
|
# going on. That's not how it works ...
|
||||||
#
|
#
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
|
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> Section Title Line
|
#TOC> Section Title Line
|
||||||
#TOC> ---------------------------------------------------------------
|
#TOC> ---------------------------------------------------------------
|
||||||
#TOC> 1 Constructing a POST command from a Web query 40
|
#TOC> 1 Constructing a POST command from a Web query 44
|
||||||
#TOC> 1.1 Task - fetchPrositeFeatures() function 134
|
#TOC> 1.1 Task - fetchPrositeFeatures() function 145
|
||||||
#TOC> 2 Task solutions 142
|
#TOC> 2 Task solutions 153
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# = 1 Constructing a POST command from a Web query ========================
|
# = 1 Constructing a POST command from a Web query ========================
|
||||||
|
|
||||||
|
|
||||||
if (!require(httr)) {
|
if (! require(httr, quietly=TRUE)) {
|
||||||
install.packages("httr")
|
install.packages("httr")
|
||||||
library(httr)
|
library(httr)
|
||||||
}
|
}
|
||||||
|
# Package information:
|
||||||
|
# library(help = httr) # basic information
|
||||||
|
# browseVignettes("httr") # available vignettes
|
||||||
|
# data(package = "httr") # available datasets
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# We have reverse engineered the Web form for a ScanProsite request, and can now
|
# We have reverse engineered the Web form for a ScanProsite request, and can now
|
||||||
# construct a POST request. The command is similar to GET(), but we need an
|
# construct a POST request. The command is similar to GET(), but we need an
|
||||||
|
55
RPR-SX-PDB.R
55
RPR-SX-PDB.R
@ -24,27 +24,26 @@
|
|||||||
# going on. That's not how it works ...
|
# going on. That's not how it works ...
|
||||||
#
|
#
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
|
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> Section Title Line
|
#TOC> Section Title Line
|
||||||
#TOC> ----------------------------------------------------
|
#TOC> ----------------------------------------------------
|
||||||
#TOC> 1 Introduction to the bio3D package 59
|
#TOC> 1 Introduction to the bio3D package 63
|
||||||
#TOC> 2 A Ramachandran plot 148
|
#TOC> 2 A Ramachandran plot 151
|
||||||
#TOC> 3 Density plots 224
|
#TOC> 3 Density plots 227
|
||||||
#TOC> 3.1 Density-based colours 238
|
#TOC> 3.1 Density-based colours 241
|
||||||
#TOC> 3.2 Plotting with smoothScatter() 257
|
#TOC> 3.2 Plotting with smoothScatter() 260
|
||||||
#TOC> 3.3 Plotting hexbins 272
|
#TOC> 3.3 Plotting hexbins 275
|
||||||
#TOC> 3.4 Plotting density contours 291
|
#TOC> 3.4 Plotting density contours 299
|
||||||
#TOC> 3.4.1 ... as overlay on a colored grid 321
|
#TOC> 3.4.1 ... as overlay on a colored grid 333
|
||||||
#TOC> 3.4.2 ... as filled countour 338
|
#TOC> 3.4.2 ... as filled countour 350
|
||||||
#TOC> 3.4.3 ... as a perspective plot 369
|
#TOC> 3.4.3 ... as a perspective plot 381
|
||||||
#TOC> 4 cis-peptide bonds 387
|
#TOC> 4 cis-peptide bonds 399
|
||||||
#TOC> 5 H-bond lengths 402
|
#TOC> 5 H-bond lengths 414
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# In this example of protein structure interpretation, we ...
|
# In this example of protein structure interpretation, we ...
|
||||||
@ -59,16 +58,15 @@
|
|||||||
# = 1 Introduction to the bio3D package ===================================
|
# = 1 Introduction to the bio3D package ===================================
|
||||||
|
|
||||||
|
|
||||||
if(!require(bio3d)) {
|
if (! require(bio3d, quietly=TRUE)) {
|
||||||
install.packages("bio3d", dependencies=TRUE)
|
install.packages("bio3d")
|
||||||
library(bio3d)
|
library(bio3d)
|
||||||
}
|
}
|
||||||
|
# Package information:
|
||||||
|
# library(help = bio3d) # basic information
|
||||||
|
# browseVignettes("bio3d") # available vignettes
|
||||||
|
# data(package = "bio3d") # available datasets
|
||||||
|
|
||||||
lbio3d() # ... lists the newly installed functions,
|
|
||||||
# they all have help files associated.
|
|
||||||
# More information is available in the so-called
|
|
||||||
# "vignettes" that are distributed with most R packages:
|
|
||||||
vignette("bio3d_vignettes")
|
|
||||||
|
|
||||||
# bio3d can load molecules directly from the PDB servers, you don't _have_ to
|
# bio3d can load molecules directly from the PDB servers, you don't _have_ to
|
||||||
# store them locally, but you could.
|
# store them locally, but you could.
|
||||||
@ -273,10 +271,15 @@ abline(v = 0, lwd = 0.5, col = "#00000044")
|
|||||||
|
|
||||||
# If we wish to approximate values in a histogram-like fashion, we can use
|
# If we wish to approximate values in a histogram-like fashion, we can use
|
||||||
# hexbin()
|
# hexbin()
|
||||||
if (!require(hexbin)) {
|
if (! require(hexbin, quietly=TRUE)) {
|
||||||
install.packages("hexbin")
|
install.packages("hexbin")
|
||||||
library(hexbin)
|
library(hexbin)
|
||||||
}
|
}
|
||||||
|
# Package information:
|
||||||
|
# library(help = hexbin) # basic information
|
||||||
|
# browseVignettes("hexbin") # available vignettes
|
||||||
|
# data(package = "hexbin") # available datasets
|
||||||
|
|
||||||
|
|
||||||
myColorRamp <- colorRampPalette(c("#EEEEEE",
|
myColorRamp <- colorRampPalette(c("#EEEEEE",
|
||||||
"#3399CC",
|
"#3399CC",
|
||||||
@ -301,10 +304,14 @@ plot(hexbin(phi, psi, xbins = 10),
|
|||||||
# distributions. But for 2D data like or phi-psi plots, we need a function from
|
# distributions. But for 2D data like or phi-psi plots, we need a function from
|
||||||
# the MASS package: kde2d()
|
# the MASS package: kde2d()
|
||||||
|
|
||||||
if (!require(MASS)) {
|
if (! require(MASS, quietly=TRUE)) {
|
||||||
install.packages("MASS")
|
install.packages("MASS")
|
||||||
library(MASS)
|
library(MASS)
|
||||||
}
|
}
|
||||||
|
# Package information:
|
||||||
|
# library(help = MASS) # basic information
|
||||||
|
# browseVignettes("MASS") # available vignettes
|
||||||
|
# data(package = "MASS") # available datasets
|
||||||
|
|
||||||
?kde2d
|
?kde2d
|
||||||
dPhiPsi <-kde2d(phi, psi,
|
dPhiPsi <-kde2d(phi, psi,
|
||||||
|
@ -23,18 +23,17 @@
|
|||||||
# going on. That's not how it works ...
|
# going on. That's not how it works ...
|
||||||
#
|
#
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
|
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> Section Title Line
|
#TOC> Section Title Line
|
||||||
#TOC> ----------------------------------------------------
|
#TOC> ----------------------------------------------------
|
||||||
#TOC> 1 UniProt files via GET 40
|
#TOC> 1 UniProt files via GET 44
|
||||||
#TOC> 1.1 Task - fetchUniProtSeq() function 98
|
#TOC> 1.1 Task - fetchUniProtSeq() function 107
|
||||||
#TOC> 2 Task solutions 105
|
#TOC> 2 Task solutions 114
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# = 1 UniProt files via GET ===============================================
|
# = 1 UniProt files via GET ===============================================
|
||||||
@ -49,10 +48,15 @@
|
|||||||
# a Web browser. Since this is a short and simple request, the GET verb is the
|
# a Web browser. Since this is a short and simple request, the GET verb is the
|
||||||
# right tool:
|
# right tool:
|
||||||
|
|
||||||
if (!require(httr)) {
|
if (! require(httr, quietly=TRUE)) {
|
||||||
install.packages("httr")
|
install.packages("httr")
|
||||||
library(httr)
|
library(httr)
|
||||||
}
|
}
|
||||||
|
# Package information:
|
||||||
|
# library(help = httr) # basic information
|
||||||
|
# browseVignettes("httr") # available vignettes
|
||||||
|
# data(package = "httr") # available datasets
|
||||||
|
|
||||||
|
|
||||||
# The UniProt ID for Mbp1 is ...
|
# The UniProt ID for Mbp1 is ...
|
||||||
|
|
||||||
|
@ -23,27 +23,30 @@
|
|||||||
#
|
#
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
|
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> Section Title Line
|
#TOC> Section Title Line
|
||||||
#TOC> -------------------------------------------
|
#TOC> -------------------------------------------
|
||||||
#TOC> 1 Unit Tests with testthat 39
|
#TOC> 1 Unit Tests with testthat 43
|
||||||
#TOC> 2 Organizing your tests 148
|
#TOC> 2 Organizing your tests 156
|
||||||
#TOC> 3 Task solutions 173
|
#TOC> 3 Task solutions 181
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# = 1 Unit Tests with testthat ============================================
|
# = 1 Unit Tests with testthat ============================================
|
||||||
|
|
||||||
# The testthat package supports writing and executing unit tests in many ways.
|
# The testthat package supports writing and executing unit tests in many ways.
|
||||||
|
|
||||||
if (!require(testthat)) {
|
if (! require(testthat, quietly=TRUE)) {
|
||||||
install.packages("testthat")
|
install.packages("testthat")
|
||||||
library(testthat)
|
library(testthat)
|
||||||
}
|
}
|
||||||
|
# Package information:
|
||||||
|
# library(help = testthat) # basic information
|
||||||
|
# browseVignettes("testthat") # available vignettes
|
||||||
|
# data(package = "testthat") # available datasets
|
||||||
|
|
||||||
# An atomic test consists of an expectation about the bahaviour of a function or
|
# An atomic test consists of an expectation about the bahaviour of a function or
|
||||||
# the existence of an object. testthat provides a number of useful expectations:
|
# the existence of an object. testthat provides a number of useful expectations:
|
||||||
|
@ -23,18 +23,17 @@
|
|||||||
# going on. That's not how it works ...
|
# going on. That's not how it works ...
|
||||||
#
|
#
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
|
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> Section Title Line
|
#TOC> Section Title Line
|
||||||
#TOC> -----------------------------------------------------
|
#TOC> -----------------------------------------------------
|
||||||
#TOC> 1 Working with NCBI eUtils 40
|
#TOC> 1 Working with NCBI eUtils 44
|
||||||
#TOC> 1.1 Task - fetchNCBItaxData() function 149
|
#TOC> 1.1 Task - fetchNCBItaxData() function 162
|
||||||
#TOC> 2 Task solutions 156
|
#TOC> 2 Task solutions 169
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# = 1 Working with NCBI eUtils ============================================
|
# = 1 Working with NCBI eUtils ============================================
|
||||||
@ -44,19 +43,28 @@
|
|||||||
# To begin, we load some libraries with functions
|
# To begin, we load some libraries with functions
|
||||||
# we need...
|
# we need...
|
||||||
|
|
||||||
# httr sends and receives information via the http
|
# ... the package httr, which sends and receives information via the http
|
||||||
# protocol, just like a Web browser.
|
# protocol, just like a Web browser.
|
||||||
if (!require(httr, quietly=TRUE)) {
|
if (! require(httr, quietly=TRUE)) {
|
||||||
install.packages("httr")
|
install.packages("httr")
|
||||||
library(httr)
|
library(httr)
|
||||||
}
|
}
|
||||||
|
# Package information:
|
||||||
|
# library(help = httr) # basic information
|
||||||
|
# browseVignettes("httr") # available vignettes
|
||||||
|
# data(package = "httr") # available datasets
|
||||||
|
|
||||||
# NCBI's eUtils send information in XML format; we
|
|
||||||
|
# ...plus the package xml2: NCBI's eUtils send information in XML format so we
|
||||||
# need to be able to parse XML.
|
# need to be able to parse XML.
|
||||||
if (!require(xml2)) {
|
if (! require(xml2, quietly=TRUE)) {
|
||||||
install.packages("xml2")
|
install.packages("xml2")
|
||||||
library(xml2)
|
library(xml2)
|
||||||
}
|
}
|
||||||
|
# Package information:
|
||||||
|
# library(help = xml2) # basic information
|
||||||
|
# browseVignettes("xml2") # available vignettes
|
||||||
|
# data(package = "xml2") # available datasets
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
#
|
#
|
||||||
# ToDo:
|
# ToDo:
|
||||||
# Notes:
|
# Notes:
|
||||||
#
|
#
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
setwd("<your/project/directory>")
|
setwd("<your/project/directory>")
|
||||||
@ -24,10 +24,16 @@ setwd("<your/project/directory>")
|
|||||||
# ==== PACKAGES ==============================================================
|
# ==== PACKAGES ==============================================================
|
||||||
# Load all required packages.
|
# Load all required packages.
|
||||||
|
|
||||||
if (!require(RUnit, quietly=TRUE)) {
|
if (! require(seqinr, quietly=TRUE)) {
|
||||||
install.packages("RUnit")
|
install.packages("seqinr")
|
||||||
library(RUnit)
|
library(seqinr)
|
||||||
}
|
}
|
||||||
|
# Package information:
|
||||||
|
# library(help = seqinr) # basic information
|
||||||
|
# browseVignettes("seqinr") # available vignettes
|
||||||
|
# data(package = "seqinr") # available datasets
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# ==== FUNCTIONS =============================================================
|
# ==== FUNCTIONS =============================================================
|
||||||
@ -43,9 +49,9 @@ myFunction <- function(a, b=1) {
|
|||||||
# b: ...
|
# b: ...
|
||||||
# Value:
|
# Value:
|
||||||
# result: ...
|
# result: ...
|
||||||
|
|
||||||
# code ...
|
# code ...
|
||||||
|
|
||||||
return(result)
|
return(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user