diff --git a/BIN-ALI-BLAST.R b/BIN-ALI-BLAST.R index 0bdcfc6..f5dad1a 100644 --- a/BIN-ALI-BLAST.R +++ b/BIN-ALI-BLAST.R @@ -23,28 +23,31 @@ # going on. That's not how it works ... # # ============================================================================== - + + #TOC> ========================================================================== #TOC> #TOC> Section Title Line #TOC> --------------------------------------------- -#TOC> 1 Packages 41 -#TOC> 2 Defining the APSES domain 50 -#TOC> 3 Executing the BLAST search 72 -#TOC> 4 Analysing results 94 +#TOC> 1 Preparations 41 +#TOC> 2 Defining the APSES domain 54 +#TOC> 3 Executing the BLAST search 76 +#TOC> 4 Analysing results 98 #TOC> #TOC> ========================================================================== - - -# = 1 Packages ============================================================ +# = 1 Preparations ======================================================== if (!require(Biostrings, quietly=TRUE)) { source("https://bioconductor.org/biocLite.R") biocLite("Biostrings") library(Biostrings) } +# Package information: +# library(help = Biostrings) # basic information +# browseVignettes("Biostrings") # available vignettes +# data(package = "Biostrings") # available datasets # = 2 Defining the APSES domain =========================================== diff --git a/BIN-ALI-Dotplot.R b/BIN-ALI-Dotplot.R index d17361f..ab12437 100644 --- a/BIN-ALI-Dotplot.R +++ b/BIN-ALI-Dotplot.R @@ -10,27 +10,34 @@ # # Versions: # 0.1 First code copied from 2016 material. - +# # # TODO: # # # == DO NOT SIMPLY source() THIS FILE! ======================================= - +# # If there are portions you don't understand, use R's help system, Google for an # answer, or ask your instructor. Don't continue if you don't understand what's # going on. That's not how it works ... - +# # ============================================================================== # = 1 ___Section___ # First, we install and load the Biostrings package. if (!require(Biostrings, quietly=TRUE)) { - source("https://bioconductor.org/biocLite.R") + if (! exists("biocLite")) { + source("https://bioconductor.org/biocLite.R") + } biocLite("Biostrings") library(Biostrings) } +# library(help = Biostrings) # basic information +# browseVignettes("Biostrings") # available vignettes +# data(package = "Biostrings") # available datasets + + # Let's load BLOSUM62 data(BLOSUM62) diff --git a/BIN-ALI-Optimal_sequence_alignment.R b/BIN-ALI-Optimal_sequence_alignment.R index 20cdcfb..b418451 100644 --- a/BIN-ALI-Optimal_sequence_alignment.R +++ b/BIN-ALI-Optimal_sequence_alignment.R @@ -22,22 +22,21 @@ # # ============================================================================== + #TOC> ========================================================================== #TOC> #TOC> Section Title Line #TOC> ------------------------------------------------------- -#TOC> 1 Prepare 41 -#TOC> 2 Biostrings Pairwise Alignment 49 -#TOC> 2.1 Optimal global alignment 60 -#TOC> 2.2 Optimal local alignment 123 -#TOC> 3 APSES Domain annotation by alignment 147 -#TOC> 4 Update your database script 228 +#TOC> 1 Prepare 45 +#TOC> 2 Biostrings Pairwise Alignment 53 +#TOC> 2.1 Optimal global alignment 70 +#TOC> 2.2 Optimal local alignment 133 +#TOC> 3 APSES Domain annotation by alignment 157 +#TOC> 4 Update your database script 238 #TOC> #TOC> ========================================================================== - - # = 1 Prepare ============================================================= # You need to recreate the protein database that you have constructed in the @@ -49,13 +48,19 @@ source("makeProteinDB.R") # = 2 Biostrings Pairwise Alignment ======================================= if (!require(Biostrings, quietly=TRUE)) { - source("https://bioconductor.org/biocLite.R") + if (! exists("biocLite")) { + source("https://bioconductor.org/biocLite.R") + } biocLite("Biostrings") library(Biostrings) } +# library(help = Biostrings) # basic information +# browseVignettes("Biostrings") # available vignettes +# data(package = "Biostrings") # available datasets -# Biostrings stores sequences in "XString" objects. Once we have onverted our -# traget sequences to AAString objects, the alignment itself is straightforward. + +# Biostrings stores sequences in "XString" objects. Once we have converted our +# target sequences to AAString objects, the alignment itself is straightforward. # == 2.1 Optimal global alignment ========================================== diff --git a/BIN-ALI-Similarity.R b/BIN-ALI-Similarity.R index 7c44015..370f9cd 100644 --- a/BIN-ALI-Similarity.R +++ b/BIN-ALI-Similarity.R @@ -23,18 +23,17 @@ # going on. That's not how it works ... # # ============================================================================== - + + #TOC> ========================================================================== #TOC> #TOC> Section Title Line #TOC> ---------------------------------------- -#TOC> 1 Amino Acid Properties 40 -#TOC> 2 Mutation Data matrix 150 -#TOC> 3 Background score 188 +#TOC> 1 Amino Acid Properties 43 +#TOC> 2 Mutation Data matrix 163 +#TOC> 3 Background score 205 #TOC> #TOC> ========================================================================== - - # = 1 Amino Acid Properties =============================================== @@ -46,6 +45,10 @@ if (!require(seqinr)) { install.packages("seqinr") library(seqinr) } +# Package information: +# library(help = seqinr) # basic information +# browseVignettes("seqinr") # available vignettes +# data(package = "seqinr") # available datasets # A true Labor of Love has gone into the compilation of the seqinr "aaindex" # data: @@ -128,6 +131,12 @@ if (!require(ggtern)) { install.packages("ggtern") library(ggtern) } +# Package information: +# library(help = ggtern) # basic information +# browseVignettes("ggtern") # available vignettes +# data(package = "ggtern") # available datasets + + # collect into data frame, normalize to (0.05, 0.95) myDat <- data.frame("phi" = 0.9*(((Y$I-min(Y$I))/(max(Y$I)-min(Y$I))))+0.05, @@ -154,12 +163,16 @@ ggtern(data = myDat, # The Biostrings package contains the most common mutation data matrices. if (!require(Biostrings, quietly=TRUE)) { - source("https://bioconductor.org/biocLite.R") + if (! exists("biocLite")) { + source("https://bioconductor.org/biocLite.R") + } biocLite("Biostrings") library(Biostrings) } - -data(package = "Biostrings") +# Package information: +# library(help=Biostrings) # basic information +# browseVignettes("Biostrings") # available vignettes +# data(package = "Biostrings") # available datasets # Let's load the BLOSUM62 mutation data matrix from the package data(BLOSUM62) diff --git a/BIN-Data_integration.R b/BIN-Data_integration.R index af47d83..b514c9b 100644 --- a/BIN-Data_integration.R +++ b/BIN-Data_integration.R @@ -24,18 +24,16 @@ # going on. That's not how it works ... # # ============================================================================== - + + #TOC> ========================================================================== #TOC> #TOC> Section Title Line #TOC> ------------------------------------------- -#TOC> 1 Identifier mapping 41 -#TOC> 2 Cross-referencing tables 142 +#TOC> 1 Identifier mapping 45 +#TOC> 2 Cross-referencing tables 151 #TOC> #TOC> ========================================================================== - - - # = 1 Identifier mapping ================================================== @@ -59,6 +57,11 @@ if (!require(httr, quietly=TRUE)) { install.packages("httr") library(httr) } +# Package information: +# library(help = httr) # basic information +# browseVignettes("httr") # available vignettes +# data(package = "httr") # available datasets + # We will walk through the process with the refSeqID # of yeast Mbp1 and Swi4, and we will also enter a dummy ID to check what @@ -68,7 +71,7 @@ myQueryIDs <- "NP_010227 NP_00000 NP_011036" # The UniProt ID mapping service API is very straightforward to use: just define # the URL of the server and send a list of items labelled as "query" in the body -# of the request. +# of the request. GET() and POST() are functions from httr. URL <- "http://www.uniprot.org/mapping/" response <- POST(URL, diff --git a/BIN-PHYLO-Tree_building.R b/BIN-PHYLO-Tree_building.R index dabf817..d9a43d6 100644 --- a/BIN-PHYLO-Tree_building.R +++ b/BIN-PHYLO-Tree_building.R @@ -39,9 +39,15 @@ if (!require(Rphylip, quietly=TRUE)) { install.packages("Rphylip") library(Rphylip) } +# Package information: +# library(help = Rphylip) # basic information +# browseVignettes("Rphylip") # available vignettes +# data(package = "Rphylip") # available datasets # This will install RPhylip, as well as its dependency, the package "ape". + + # The next part may be tricky. You will need to figure out where # on your computer Phylip has been installed and define the path # to the proml program that calculates a maximum-likelihood tree. diff --git a/BIN-PPI-Analysis.R b/BIN-PPI-Analysis.R index 678939e..e2ccd0a 100644 --- a/BIN-PPI-Analysis.R +++ b/BIN-PPI-Analysis.R @@ -154,11 +154,17 @@ ENSPsel # day), simply a few lines of sample code to get you started on the specific use # case of retrieving descriptions for ensembl protein IDs. -if (!require(biomaRt)) { - source("http://bioconductor.org/biocLite.R") +if (!require(biomaRt, quietly=TRUE)) { + if (! exists("biocLite")) { + source("https://bioconductor.org/biocLite.R") + } biocLite("biomaRt") - library("biomaRt") + library(biomaRt) } +# Package information: +# library(help = biomaRt) # basic information +# browseVignettes("biomaRt") # available vignettes +# data(package = "biomaRt") # available datasets # define which dataset to use ... myMart <- useMart("ensembl", dataset="hsapiens_gene_ensembl") diff --git a/BIN-SEQA-Comparison.R b/BIN-SEQA-Comparison.R index b057e08..2e5f8c2 100644 --- a/BIN-SEQA-Comparison.R +++ b/BIN-SEQA-Comparison.R @@ -32,8 +32,11 @@ if (!require(seqinr, quietly=TRUE)) { install.packages("seqinr") library(seqinr) } +# Package information: +# library(help = seqinr) # basic information +# browseVignettes("seqinr") # available vignettes +# data(package = "seqinr") # available datasets -help(package = seqinr) # shows the available functions # Let's try a simple function ?computePI diff --git a/BIN-Sequence.R b/BIN-Sequence.R index 8674594..18d924a 100644 --- a/BIN-Sequence.R +++ b/BIN-Sequence.R @@ -23,26 +23,29 @@ # # ============================================================================== + #TOC> ========================================================================== -#TOC> +#TOC> #TOC> Section Title Line #TOC> ---------------------------------------------- -#TOC> 1 Prepare 52 -#TOC> 2 Storing Sequence 66 -#TOC> 3 String properties 95 -#TOC> 4 Substrings 102 -#TOC> 5 Creating strings: sprintf() 108 -#TOC> 6 Changing strings 139 -#TOC> 6.1 stringi and stringr 191 -#TOC> 6.2 dbSanitizeSequence() 201 -#TOC> 7 Permuting and sampling 213 -#TOC> 7.1 Permutations 220 -#TOC> 7.2 Sampling 263 -#TOC> 7.2.1 Equiprobable characters 265 -#TOC> 7.2.2 Defined probability vector 300 -#TOC> 8 Tasks 328 -#TOC> +#TOC> 1 Prepare 56 +#TOC> 2 Storing Sequence 74 +#TOC> 3 String properties 103 +#TOC> 4 Substrings 110 +#TOC> 5 Creating strings: sprintf() 116 +#TOC> 6 Changing strings 147 +#TOC> 6.1 stringi and stringr 199 +#TOC> 6.2 dbSanitizeSequence() 209 +#TOC> 7 Permuting and sampling 221 +#TOC> 7.1 Permutations 228 +#TOC> 7.2 Sampling 271 +#TOC> 7.2.1 Equiprobable characters 273 +#TOC> 7.2.2 Defined probability vector 313 +#TOC> 8 Tasks 341 +#TOC> #TOC> ========================================================================== + + # # # @@ -54,13 +57,17 @@ # Much basic sequence handling is supported by the Bioconductor package # Biostrings. -if (! require(Biostrings)) { +if (! require(Biostrings, quietly=TRUE)) { if (! exists("biocLite")) { source("https://bioconductor.org/biocLite.R") } biocLite("Biostrings") library(Biostrings) } +# Package information: +# library(help = Biostrings) # basic information +# browseVignettes("Biostrings") # available vignettes +# data(package = "Biostrings") # available datasets # = 2 Storing Sequence ==================================================== @@ -262,7 +269,7 @@ sum(d <= 2.5) # 276. 276 of our 10000 samples are just as bunched near the # == 7.2 Sampling ========================================================== -# === 7.2.1 Equiprobable characters +# === 7.2.1 Equiprobable characters # Assume you need a large random-nucleotide string for some statistical model. # How to create such a string? sample() can easily create it: @@ -280,10 +287,15 @@ sum(table(v)[c("G", "C")]) # 51 is close to expected # What's the number of CpG motifs? Easy to check with the stringi # stri_match_all() function -if (! require(stringi)) { +if (! require(stringi, quietly=TRUE)) { install.packages("stringi") library(stringi) } +# Package information: +# library(help = stringi) # basic information +# browseVignettes("stringi") # available vignettes +# data(package = "stringi") # available datasets + (x <- stri_match_all(mySeq, regex = "CG")) length(unlist(x)) @@ -297,7 +309,7 @@ length(unlist(x)) # of the smaller number of Cs and Gs - before biology even comes into play. How # do we account for that? -# === 7.2.2 Defined probability vector +# === 7.2.2 Defined probability vector # This is where we need to know how to create samples with specific probability # distributions. A crude hack would be to create a sampling source vector with diff --git a/BIN-Storing_data.R b/BIN-Storing_data.R index 64fbb0f..350a261 100644 --- a/BIN-Storing_data.R +++ b/BIN-Storing_data.R @@ -24,36 +24,35 @@ # going on. That's not how it works ... # # ============================================================================== - -#TOC> ========================================================================== -#TOC> -#TOC> Section Title Line -#TOC> ------------------------------------------------------------ -#TOC> 1 A Relational Datamodel in R: review 58 -#TOC> 1.1 Building a sample database structure 98 -#TOC> 1.1.1 completing the database 209 -#TOC> 1.2 Querying the database 244 -#TOC> 1.3 Task: submit for credit (part 1/2) 273 -#TOC> 2 Implementing the protein datamodel 285 -#TOC> 2.1 JSON formatted source data 311 -#TOC> 2.2 "Sanitizing" sequence data 346 -#TOC> 2.3 Create a protein table for our data model 366 -#TOC> 2.3.1 Initialize the database 368 -#TOC> 2.3.2 Add data 380 -#TOC> 2.4 Complete the database 400 -#TOC> 2.4.1 Examples of navigating the database 427 -#TOC> 2.5 Updating the database 459 -#TOC> 3 Add your own data 471 -#TOC> 3.1 Find a protein 479 -#TOC> 3.2 Put the information into JSON files 508 -#TOC> 3.3 Create an R script to create the database 531 -#TOC> 3.3.1 Check and validate 551 -#TOC> 3.4 Task: submit for credit (part 2/2) 592 -#TOC> -#TOC> ========================================================================== - +#TOC> ========================================================================== +#TOC> +#TOC> Section Title Line +#TOC> ----------------------------------------------------------------- +#TOC> 1 A Relational Datamodel in R: review 62 +#TOC> 1.1 Building a sample database structure 102 +#TOC> 1.1.1 completing the database 213 +#TOC> 1.2 Querying the database 248 +#TOC> 1.3 Task: submit for credit (part 1/2) 277 +#TOC> 2 Implementing the protein datamodel 289 +#TOC> 2.1 JSON formatted source data 315 +#TOC> 2.2 "Sanitizing" sequence data 355 +#TOC> 2.3 Create a protein table for our data model 375 +#TOC> 2.3.1 Initialize the database 377 +#TOC> 2.3.2 Add data 389 +#TOC> 2.4 Complete the database 409 +#TOC> 2.4.1 Examples of navigating the database 436 +#TOC> 2.5 Updating the database 468 +#TOC> 3 Add your own data 480 +#TOC> 3.1 Find a protein 488 +#TOC> 3.2 Put the information into JSON files 517 +#TOC> 3.3 Create an R script to create your own database 540 +#TOC> 3.3.1 Check and validate 560 +#TOC> 3.4 Task: submit for credit (part 2/2) 601 +#TOC> +#TOC> ========================================================================== + # = 1 A Relational Datamodel in R: review ================================= @@ -206,7 +205,7 @@ str(philDB) # go back, re-read, play with it, and ask for help. This is essential. -# === 1.1.1 completing the database +# === 1.1.1 completing the database # Next I'll add one more person, and create the other two tables: @@ -331,10 +330,15 @@ file.edit("./data/MBP1_SACCE.json") # Let's load the "jsonlite" package and have a look at how it reads this data. -if (! require("jsonlite", quietly = TRUE)) { +if (! require(jsonlite, quietly=TRUE)) { install.packages("jsonlite") library(jsonlite) } +# Package information: +# library(help = jsonlite) # basic information +# browseVignettes("jsonlite") # available vignettes +# data(package = "jsonlite") # available datasets + x <- fromJSON("./data/MBP1_SACCE.json") str(x) @@ -365,7 +369,7 @@ dbSanitizeSequence(x) # == 2.3 Create a protein table for our data model ========================= -# === 2.3.1 Initialize the database +# === 2.3.1 Initialize the database # The function dbInit contains all the code to return a list of empty @@ -377,7 +381,7 @@ myDB <- dbInit() str(myDB) -# === 2.3.2 Add data +# === 2.3.2 Add data # fromJSON() returns a dataframe that we can readily process to add data @@ -424,7 +428,7 @@ source("./scripts/ABC-createRefDB.R") str(myDB) -# === 2.4.1 Examples of navigating the database +# === 2.4.1 Examples of navigating the database # You can look at the contents of the tables in the usual way we access @@ -528,10 +532,10 @@ myDB$taxonomy$species[sel] # - Validate your two files online at https://jsonlint.com/ -# == 3.3 Create an R script to create the database ========================= +# == 3.3 Create an R script to create your own database ==================== -# Next: to create the database. +# Next: to create your own database. # - Make a new R script, call it "makeProteinDB.R" # - enter the following expression as the first command: # source("./scripts/ABC-createRefDB.R") @@ -548,7 +552,7 @@ myDB$taxonomy$species[sel] # in any of the JSON files. Later you will add more information ... -# === 3.3.1 Check and validate +# === 3.3.1 Check and validate # Is your protein named according to the pattern "MBP1_MYSPE"? It should be. diff --git a/FND-Genetic_code.R b/FND-Genetic_code.R index b25730f..74b8556 100644 --- a/FND-Genetic_code.R +++ b/FND-Genetic_code.R @@ -22,22 +22,21 @@ # going on. That's not how it works ... # # ============================================================================== - + + #TOC> ========================================================================== #TOC> #TOC> Section Title Line #TOC> ---------------------------------------------------------- -#TOC> 1 Storing the genetic code 43 -#TOC> 1.1 Genetic code in Biostrings 61 -#TOC> 2 Working with the genetic code 88 -#TOC> 2.1 Translate a sequence. 117 -#TOC> 3 An alternative representation: 3D array 199 -#TOC> 3.1 Print a Genetic code table 232 -#TOC> 4 Tasks 258 +#TOC> 1 Storing the genetic code 47 +#TOC> 1.1 Genetic code in Biostrings 65 +#TOC> 2 Working with the genetic code 97 +#TOC> 2.1 Translate a sequence. 126 +#TOC> 3 An alternative representation: 3D array 208 +#TOC> 3.1 Print a Genetic code table 241 +#TOC> 4 Tasks 267 #TOC> #TOC> ========================================================================== - - # = 1 Storing the genetic code ============================================ @@ -64,13 +63,18 @@ x["TAA"] # available in the Bioconductor "Biostrings" package: -if (! require(Biostrings)) { +if (! require(Biostrings, quietly=TRUE)) { if (! exists("biocLite")) { source("https://bioconductor.org/biocLite.R") } biocLite("Biostrings") library(Biostrings) } +# Package information: +# library(help = Biostrings) # basic information +# browseVignettes("Biostrings") # available vignettes +# data(package = "Biostrings") # available datasets + # The standard genetic code vector GENETIC_CODE diff --git a/FND-MAT-Graphs_and_networks.R b/FND-MAT-Graphs_and_networks.R index 5b33c06..7af6e22 100644 --- a/FND-MAT-Graphs_and_networks.R +++ b/FND-MAT-Graphs_and_networks.R @@ -23,26 +23,25 @@ # going on. That's not how it works ... # # ============================================================================== - + + #TOC> ========================================================================== #TOC> #TOC> Section Title Line #TOC> ------------------------------------------------------ -#TOC> 1 Review 48 -#TOC> 2 DEGREE DISTRIBUTIONS 192 -#TOC> 2.1 Random graph 198 -#TOC> 2.2 scale-free graph (Barabasi-Albert) 242 -#TOC> 2.3 Random geometric graph 304 -#TOC> 3 A CLOSER LOOK AT THE igraph PACKAGE 424 -#TOC> 3.1 Basics 427 -#TOC> 3.2 Components 499 -#TOC> 4 RANDOM GRAPHS AND GRAPH METRICS 518 -#TOC> 4.1 Diameter 553 -#TOC> 5 GRAPH CLUSTERING 621 +#TOC> 1 Review 52 +#TOC> 2 DEGREE DISTRIBUTIONS 201 +#TOC> 2.1 Random graph 207 +#TOC> 2.2 scale-free graph (Barabasi-Albert) 251 +#TOC> 2.3 Random geometric graph 313 +#TOC> 3 A CLOSER LOOK AT THE igraph PACKAGE 433 +#TOC> 3.1 Basics 436 +#TOC> 3.2 Components 508 +#TOC> 4 RANDOM GRAPHS AND GRAPH METRICS 527 +#TOC> 4.1 Diameter 562 +#TOC> 5 GRAPH CLUSTERING 630 #TOC> #TOC> ========================================================================== - - # = 1 Review ============================================================== @@ -121,10 +120,15 @@ set.seed(112358) # standard package for work with graphs in r is "igraph". We'll go into more # details of the igraph package a bit later, for now we just use it to plot: -if (!require(igraph)) { +if (! require(igraph, quietly=TRUE)) { install.packages("igraph") library(igraph) } +# Package information: +# library(help = igraph) # basic information +# browseVignettes("igraph") # available vignettes +# data(package = "igraph") # available datasets + myG <- graph_from_adjacency_matrix(myRandAM, mode = "undirected") set.seed(112358) diff --git a/FND-STA-Probability_distribution.R b/FND-STA-Probability_distribution.R index 98a580a..98b944b 100644 --- a/FND-STA-Probability_distribution.R +++ b/FND-STA-Probability_distribution.R @@ -22,31 +22,29 @@ # # ============================================================================== + #TOC> ========================================================================== -#TOC> +#TOC> #TOC> Section Title Line #TOC> ----------------------------------------------------------------------- -#TOC> 1 Introduction 50 -#TOC> 2 Three fundamental distributions 113 -#TOC> 2.1 The Poisson Distribution 116 -#TOC> 2.2 The uniform distribution 169 -#TOC> 2.3 The Normal Distribution 189 -#TOC> 3 quantile-quantile comparison 230 -#TOC> 3.1 qqnorm() 240 -#TOC> 3.2 qqplot() 300 -#TOC> 4 Quantifying the difference 317 -#TOC> 4.1 Chi2 test for discrete distributions 351 -#TOC> 4.2 Kullback-Leibler divergence 435 -#TOC> 4.2.1 An example from tossing dice 446 -#TOC> 4.2.2 An example from lognormal distributions 568 -#TOC> 4.3 Kolmogorov-Smirnov test for continuous distributions 609 -#TOC> +#TOC> 1 Introduction 54 +#TOC> 2 Three fundamental distributions 117 +#TOC> 2.1 The Poisson Distribution 120 +#TOC> 2.2 The uniform distribution 173 +#TOC> 2.3 The Normal Distribution 193 +#TOC> 3 quantile-quantile comparison 234 +#TOC> 3.1 qqnorm() 244 +#TOC> 3.2 qqplot() 304 +#TOC> 4 Quantifying the difference 321 +#TOC> 4.1 Chi2 test for discrete distributions 355 +#TOC> 4.2 Kullback-Leibler divergence 446 +#TOC> 4.2.1 An example from tossing dice 457 +#TOC> 4.2.2 An example from lognormal distributions 579 +#TOC> 4.3 Kolmogorov-Smirnov test for continuous distributions 620 +#TOC> #TOC> ========================================================================== - - - # = 1 Introduction ======================================================== # The space of possible outcomes of events is called a probability distribution @@ -372,12 +370,19 @@ myBreaks <- c(myBreaks, maxX) # ... and one that contains the outliers hist(rG1.5, breaks = myBreaks, col = myCols[4]) # ... but basic R has no inbuilt function to stack histogram bars side-by-side. -# We use the multhist() function in the plotrix package: +# We use the multhist() function in the plotrix package: check out the +# package information - plotrix has _many_ useful utilities to enhance +# plots or produce informative visualizations. -if (!require(plotrix)) { +if (! require(plotrix, quietly=TRUE)) { install.packages("plotrix") library(plotrix) } +# Package information: +# library(help = plotrix) # basic information +# browseVignettes("plotrix") # available vignettes +# data(package = "plotrix") # available datasets + h <- multhist(list(rL1, rL2, rG1.2, rG1.5, rG1.9 ), breaks = myBreaks, @@ -436,14 +441,14 @@ chisq.test(countsL1, countsG1.9, simulate.p.value = TRUE, B = 10000) # For discrete probability distributions, there is a much better statistic, the # Kullback-Leibler divergence (or relative entropy). It is based in information -# theory, and evaluates how different each matching pair of outcomem categories +# theory, and evaluates how different the matched pairs of outcome categories # are. Its inputs are the probability mass functions (p.m.f.) of the two # functions to be compared. A probability mass function is the probability of # every outcome the process can have. Kullback-Leibler divergence therefore can # be applied to discrete distributions. But we need to talk a bit about # converting counts to p.m.f.'s. -# === 4.2.1 An example from tossing dice +# === 4.2.1 An example from tossing dice # The p.m.f of an honest die is (1:1/6, 2:1/6, 3:1/6, 4:1/6, 5:1/6, 6:1/6). But # there is an issue when we convert sampled counts to frequencies, and estimate @@ -565,7 +570,7 @@ abline(v = KLdiv(rep(1/6, 6), pmfPC(counts, 1:6)), col="firebrick") # somewhat but not drastically atypical. -# === 4.2.2 An example from lognormal distributions +# === 4.2.2 An example from lognormal distributions # We had compared a set of lognormal and gamma distributions above, now we # can use KL-divergence to quantify their similarity: diff --git a/RPR-Biostrings.R b/RPR-Biostrings.R index b936af3..a158b0a 100644 --- a/RPR-Biostrings.R +++ b/RPR-Biostrings.R @@ -23,27 +23,26 @@ # going on. That's not how it works ... # # ============================================================================== - + + #TOC> ========================================================================== #TOC> #TOC> Section Title Line #TOC> --------------------------------------------------------- -#TOC> 1 The Biostrings package 53 -#TOC> 2 Getting Data into Biostrings Objects 82 -#TOC> 3 Working with Biostrings Objects 102 -#TOC> 3.1 Properties 105 -#TOC> 3.2 Subsetting 142 -#TOC> 3.3 Operators 154 -#TOC> 3.4 Transformations 161 -#TOC> 4 Getting Data out of Biostrings Objects 168 -#TOC> 5 More 177 -#TOC> 5.1 Views 179 -#TOC> 5.2 Iranges 191 -#TOC> 5.3 StringSets 197 +#TOC> 1 The Biostrings package 57 +#TOC> 2 Getting Data into Biostrings Objects 91 +#TOC> 3 Working with Biostrings Objects 111 +#TOC> 3.1 Properties 114 +#TOC> 3.2 Subsetting 151 +#TOC> 3.3 Operators 163 +#TOC> 3.4 Transformations 170 +#TOC> 4 Getting Data out of Biostrings Objects 177 +#TOC> 5 More 186 +#TOC> 5.1 Views 188 +#TOC> 5.2 Iranges 200 +#TOC> 5.3 StringSets 206 #TOC> #TOC> ========================================================================== - - # This is a very brief introduction to the biostrings package, other units will @@ -55,15 +54,20 @@ # First, we install and load the Biostrings package from bioconductor -if (!require(Biostrings, quietly=TRUE)) { - source("https://bioconductor.org/biocLite.R") +if (! require(Biostrings, quietly=TRUE)) { + if (! exists("biocLite")) { + source("https://bioconductor.org/biocLite.R") + } biocLite("Biostrings") library(Biostrings) } +# Examine the ackage information: +library(help = Biostrings) # basic information +browseVignettes("Biostrings") # available vignettes +data(package = "Biostrings") # available datasets + -# This is a large collection of tools ... -help(package = "Biostrings") # At its core, Biostrings objects are "classes" of type XString (you can think # of a "class" in R as a special kind of list), that can take on particular diff --git a/RPR-Genetic_code_optimality.R b/RPR-Genetic_code_optimality.R index a2345b5..3c99cea 100644 --- a/RPR-Genetic_code_optimality.R +++ b/RPR-Genetic_code_optimality.R @@ -22,25 +22,24 @@ # going on. That's not how it works ... # # ============================================================================== - + + #TOC> ========================================================================== #TOC> #TOC> Section Title Line #TOC> -------------------------------------------------------- -#TOC> 1 Designing a computational experiment 53 -#TOC> 2 Setting up the tools 69 -#TOC> 2.1 Natural and alternative genetic codes 72 -#TOC> 2.2 Effect of mutations 126 -#TOC> 2.2.1 reverse-translate 137 -#TOC> 2.2.2 Randomly mutate 162 -#TOC> 2.2.3 Forward- translate 187 -#TOC> 2.2.4 measure effect 205 -#TOC> 3 Run the experiment 252 -#TOC> 4 Task solutions 339 +#TOC> 1 Designing a computational experiment 57 +#TOC> 2 Setting up the tools 73 +#TOC> 2.1 Natural and alternative genetic codes 76 +#TOC> 2.2 Effect of mutations 135 +#TOC> 2.2.1 reverse-translate 146 +#TOC> 2.2.2 Randomly mutate 171 +#TOC> 2.2.3 Forward- translate 196 +#TOC> 2.2.4 measure effect 214 +#TOC> 3 Run the experiment 261 +#TOC> 4 Task solutions 348 #TOC> #TOC> ========================================================================== - - # This unit demonstrates R code to simulate alternate genetic codes and evaluate @@ -71,14 +70,19 @@ # == 2.1 Natural and alternative genetic codes ============================= -# Load the code from the Biostrings package -if (! require(Biostrings)) { +# Load genetic code tables from the Biostrings package +if (! require(Biostrings, quietly=TRUE)) { if (! exists("biocLite")) { source("https://bioconductor.org/biocLite.R") } biocLite("Biostrings") library(Biostrings) } +# Package information: +# library(help = Biostrings) # basic information +# browseVignettes("Biostrings") # available vignettes +# data(package = "Biostrings") # available datasets + # There are many ways to generate alternative codes. The simplest way is to # randomly assign amino acids to codons. A more sophisticated way is to keep the diff --git a/RPR-PROSITE_POST.R b/RPR-PROSITE_POST.R index 4b66124..612ffc8 100644 --- a/RPR-PROSITE_POST.R +++ b/RPR-PROSITE_POST.R @@ -23,27 +23,33 @@ # going on. That's not how it works ... # # ============================================================================== - + + #TOC> ========================================================================== #TOC> #TOC> Section Title Line #TOC> --------------------------------------------------------------- -#TOC> 1 Constructing a POST command from a Web query 40 -#TOC> 1.1 Task - fetchPrositeFeatures() function 134 -#TOC> 2 Task solutions 142 +#TOC> 1 Constructing a POST command from a Web query 44 +#TOC> 1.1 Task - fetchPrositeFeatures() function 145 +#TOC> 2 Task solutions 153 #TOC> #TOC> ========================================================================== - - # = 1 Constructing a POST command from a Web query ======================== -if (!require(httr)) { +if (! require(httr, quietly=TRUE)) { install.packages("httr") library(httr) } +# Package information: +# library(help = httr) # basic information +# browseVignettes("httr") # available vignettes +# data(package = "httr") # available datasets + + + # We have reverse engineered the Web form for a ScanProsite request, and can now # construct a POST request. The command is similar to GET(), but we need an diff --git a/RPR-SX-PDB.R b/RPR-SX-PDB.R index ca6b7e4..a1f3dcd 100644 --- a/RPR-SX-PDB.R +++ b/RPR-SX-PDB.R @@ -24,27 +24,26 @@ # going on. That's not how it works ... # # ============================================================================== - + + #TOC> ========================================================================== #TOC> #TOC> Section Title Line #TOC> ---------------------------------------------------- -#TOC> 1 Introduction to the bio3D package 59 -#TOC> 2 A Ramachandran plot 148 -#TOC> 3 Density plots 224 -#TOC> 3.1 Density-based colours 238 -#TOC> 3.2 Plotting with smoothScatter() 257 -#TOC> 3.3 Plotting hexbins 272 -#TOC> 3.4 Plotting density contours 291 -#TOC> 3.4.1 ... as overlay on a colored grid 321 -#TOC> 3.4.2 ... as filled countour 338 -#TOC> 3.4.3 ... as a perspective plot 369 -#TOC> 4 cis-peptide bonds 387 -#TOC> 5 H-bond lengths 402 +#TOC> 1 Introduction to the bio3D package 63 +#TOC> 2 A Ramachandran plot 151 +#TOC> 3 Density plots 227 +#TOC> 3.1 Density-based colours 241 +#TOC> 3.2 Plotting with smoothScatter() 260 +#TOC> 3.3 Plotting hexbins 275 +#TOC> 3.4 Plotting density contours 299 +#TOC> 3.4.1 ... as overlay on a colored grid 333 +#TOC> 3.4.2 ... as filled countour 350 +#TOC> 3.4.3 ... as a perspective plot 381 +#TOC> 4 cis-peptide bonds 399 +#TOC> 5 H-bond lengths 414 #TOC> #TOC> ========================================================================== - - # In this example of protein structure interpretation, we ... @@ -59,16 +58,15 @@ # = 1 Introduction to the bio3D package =================================== -if(!require(bio3d)) { - install.packages("bio3d", dependencies=TRUE) +if (! require(bio3d, quietly=TRUE)) { + install.packages("bio3d") library(bio3d) } +# Package information: +# library(help = bio3d) # basic information +# browseVignettes("bio3d") # available vignettes +# data(package = "bio3d") # available datasets -lbio3d() # ... lists the newly installed functions, -# they all have help files associated. -# More information is available in the so-called -# "vignettes" that are distributed with most R packages: -vignette("bio3d_vignettes") # bio3d can load molecules directly from the PDB servers, you don't _have_ to # store them locally, but you could. @@ -273,10 +271,15 @@ abline(v = 0, lwd = 0.5, col = "#00000044") # If we wish to approximate values in a histogram-like fashion, we can use # hexbin() -if (!require(hexbin)) { +if (! require(hexbin, quietly=TRUE)) { install.packages("hexbin") library(hexbin) } +# Package information: +# library(help = hexbin) # basic information +# browseVignettes("hexbin") # available vignettes +# data(package = "hexbin") # available datasets + myColorRamp <- colorRampPalette(c("#EEEEEE", "#3399CC", @@ -301,10 +304,14 @@ plot(hexbin(phi, psi, xbins = 10), # distributions. But for 2D data like or phi-psi plots, we need a function from # the MASS package: kde2d() -if (!require(MASS)) { +if (! require(MASS, quietly=TRUE)) { install.packages("MASS") library(MASS) } +# Package information: +# library(help = MASS) # basic information +# browseVignettes("MASS") # available vignettes +# data(package = "MASS") # available datasets ?kde2d dPhiPsi <-kde2d(phi, psi, diff --git a/RPR-UniProt_GET.R b/RPR-UniProt_GET.R index 95bce7c..79557f2 100644 --- a/RPR-UniProt_GET.R +++ b/RPR-UniProt_GET.R @@ -23,18 +23,17 @@ # going on. That's not how it works ... # # ============================================================================== - + + #TOC> ========================================================================== #TOC> #TOC> Section Title Line #TOC> ---------------------------------------------------- -#TOC> 1 UniProt files via GET 40 -#TOC> 1.1 Task - fetchUniProtSeq() function 98 -#TOC> 2 Task solutions 105 +#TOC> 1 UniProt files via GET 44 +#TOC> 1.1 Task - fetchUniProtSeq() function 107 +#TOC> 2 Task solutions 114 #TOC> #TOC> ========================================================================== - - # = 1 UniProt files via GET =============================================== @@ -49,10 +48,15 @@ # a Web browser. Since this is a short and simple request, the GET verb is the # right tool: -if (!require(httr)) { +if (! require(httr, quietly=TRUE)) { install.packages("httr") library(httr) } +# Package information: +# library(help = httr) # basic information +# browseVignettes("httr") # available vignettes +# data(package = "httr") # available datasets + # The UniProt ID for Mbp1 is ... diff --git a/RPR-Unit_testing.R b/RPR-Unit_testing.R index b8fcbe2..c3d9111 100644 --- a/RPR-Unit_testing.R +++ b/RPR-Unit_testing.R @@ -23,27 +23,30 @@ # # ============================================================================== + #TOC> ========================================================================== -#TOC> +#TOC> #TOC> Section Title Line #TOC> ------------------------------------------- -#TOC> 1 Unit Tests with testthat 39 -#TOC> 2 Organizing your tests 148 -#TOC> 3 Task solutions 173 -#TOC> +#TOC> 1 Unit Tests with testthat 43 +#TOC> 2 Organizing your tests 156 +#TOC> 3 Task solutions 181 +#TOC> #TOC> ========================================================================== - - # = 1 Unit Tests with testthat ============================================ # The testthat package supports writing and executing unit tests in many ways. -if (!require(testthat)) { +if (! require(testthat, quietly=TRUE)) { install.packages("testthat") library(testthat) } +# Package information: +# library(help = testthat) # basic information +# browseVignettes("testthat") # available vignettes +# data(package = "testthat") # available datasets # An atomic test consists of an expectation about the bahaviour of a function or # the existence of an object. testthat provides a number of useful expectations: diff --git a/RPR-eUtils_XML.R b/RPR-eUtils_XML.R index 34ab151..bdc5eaa 100644 --- a/RPR-eUtils_XML.R +++ b/RPR-eUtils_XML.R @@ -23,18 +23,17 @@ # going on. That's not how it works ... # # ============================================================================== - + + #TOC> ========================================================================== #TOC> #TOC> Section Title Line #TOC> ----------------------------------------------------- -#TOC> 1 Working with NCBI eUtils 40 -#TOC> 1.1 Task - fetchNCBItaxData() function 149 -#TOC> 2 Task solutions 156 +#TOC> 1 Working with NCBI eUtils 44 +#TOC> 1.1 Task - fetchNCBItaxData() function 162 +#TOC> 2 Task solutions 169 #TOC> #TOC> ========================================================================== - - # = 1 Working with NCBI eUtils ============================================ @@ -44,19 +43,28 @@ # To begin, we load some libraries with functions # we need... -# httr sends and receives information via the http +# ... the package httr, which sends and receives information via the http # protocol, just like a Web browser. -if (!require(httr, quietly=TRUE)) { +if (! require(httr, quietly=TRUE)) { install.packages("httr") library(httr) } +# Package information: +# library(help = httr) # basic information +# browseVignettes("httr") # available vignettes +# data(package = "httr") # available datasets -# NCBI's eUtils send information in XML format; we + +# ...plus the package xml2: NCBI's eUtils send information in XML format so we # need to be able to parse XML. -if (!require(xml2)) { +if (! require(xml2, quietly=TRUE)) { install.packages("xml2") library(xml2) } +# Package information: +# library(help = xml2) # basic information +# browseVignettes("xml2") # available vignettes +# data(package = "xml2") # available datasets diff --git a/scriptTemplate.R b/scriptTemplate.R index a394a34..1db9d7a 100644 --- a/scriptTemplate.R +++ b/scriptTemplate.R @@ -11,7 +11,7 @@ # # ToDo: # Notes: -# +# # ============================================================================== setwd("") @@ -24,10 +24,16 @@ setwd("") # ==== PACKAGES ============================================================== # Load all required packages. -if (!require(RUnit, quietly=TRUE)) { - install.packages("RUnit") - library(RUnit) +if (! require(seqinr, quietly=TRUE)) { + install.packages("seqinr") + library(seqinr) } +# Package information: +# library(help = seqinr) # basic information +# browseVignettes("seqinr") # available vignettes +# data(package = "seqinr") # available datasets + + # ==== FUNCTIONS ============================================================= @@ -43,9 +49,9 @@ myFunction <- function(a, b=1) { # b: ... # Value: # result: ... - + # code ... - + return(result) }