2020 updates - deactivate for maintenance

This commit is contained in:
hyginn 2020-09-18 21:56:30 +10:00
parent 89bdd14d1c
commit 37ef655d47
42 changed files with 447 additions and 243 deletions

42
.init.R
View File

@ -1,42 +0,0 @@
# .init.R
# Functions to initialize this collection of learning units
# Boris Steipe
# ====================================================================
# Create a local copy of myScript.R if required, and not been done yet.
if (! file.exists("myScript.R") && file.exists(".tmp.R")) {
file.copy(".tmp.R", "myScript.R")
}
# If it doesn't exist yet, set up a profile:
if (! file.exists(".myProfile.R")) {
# setup profile data
cat("\nPlease enter the requested values correctly, no spaces, and\n")
cat("press <enter>.\n")
e <- readline("Please enter your UofT eMail address: ")
n <- readline("Please enter your Student Number: ")
conn <- file(".myProfile.R")
writeLines(c(sprintf("myEMail <- \"%s\"", e),
sprintf("myStudentNumber <- %d", as.numeric(n))),
conn)
close(conn)
rm(e, n, conn)
}
# Patch YFO -> MYSPE if necessary:
tmp <- readLines(".myProfile.R")
if (length(grep("^YFO", tmp)) > 0) {
idx <- grep("^YFO", tmp)
tmp[idx] <- gsub("^YFO", "MYSPE", tmp[idx])
writeLines(tmp, ".myProfile.R")
}
rm(tmp)
source(".myProfile.R")
source(".utilities.R")
file.edit("ABC-units.R")
# [End]

View File

@ -181,19 +181,29 @@ fetchMSAmotif <- function(ali, mot) {
# ====== PDB ID selection ====================================================== # ====== PDB ID selection ======================================================
selectPDBrep <- function(n) { selectPDBrep <- function(n, seed = as.numeric(Sys.time())) {
# Select n PDB IDs from a list of high-resolution, non-homologous, single # Select n PDB IDs from a list of high-resolution, non-homologous, single
# domain, single chain structure files that represent a CATH topology # domain, single chain structure files that represent a CATH topology
# group. # group.
# Parameters n num number of IDs to return. # Parameters:
# n num number of IDs to return
# seed num a seed for the RNG
#
# Value: char PDB IDs # Value: char PDB IDs
# Note: the list is loaded from an RData file in the data directory #
# Note: the list is loaded from an RData file in the "./data" directory.
# If you use this function for a course submissio, it MUST be invoked as:
#
# selectPDBrep(n, seed = myStudentNumber)
#
# ... and myStudentNumber MUST be correctly initialized
load("./data/pdbRep.RData") # loads pdbRep load("./data/pdbRep.RData") # loads pdbRep
if (n > length(pdbRep)) { if (n > length(pdbRep)) {
stop(sprintf("You can select no more than %d IDs.", length(pdbRep))) stop(sprintf("There are only %d PDB IDs in the table to choose from.",
length(pdbRep)))
} }
set.seed(as.numeric(Sys.time())) set.seed(seed)
return(sample(pdbRep, n)) return(sample(pdbRep, n))
} }

View File

@ -2,11 +2,16 @@
# #
# Purpose: A Bioinformatics Course: R code for learning units # Purpose: A Bioinformatics Course: R code for learning units
# #
# Version: 0.1 # Version: 4.0
# #
# Date: 2017 08 18 # Date: 2020 09 16
# Author: Boris Steipe (boris.steipe@utoronto.ca) # Author: Boris Steipe (boris.steipe@utoronto.ca)
# #
# Versions:
# V 4.0 2020 version
# V 3.0 2019 version
# V 2.0 2018 version
# V 1.0 2017 version
# V 0.1 First code # V 0.1 First code
# #
# TODO: # TODO:
@ -14,23 +19,19 @@
# #
# == HOW TO WORK WITH LEARNING UNIT FILES ====================================== # == HOW TO WORK WITH LEARNING UNIT FILES ======================================
# #
# Expect that the learning unit files will be continuously updated. # The R-scripts and datasets in this project will be continuously updated,
# # and updates will be posted on GitHub. To bring your version into the latest
# state use the Git-pane (top left) and "pull" (blue downward arrow) from the
# repository. However, this will overwrite locally edited version of files.
# If you wish to edit any of the code, for example to add your own comments and # To edit code and experiment with it, for example to add your own comments and
# examples, save any edited version under a different name. Otherwise you will # examples, save your edited version into the "myScripts" folder. Otherwise you
# have problems with git when you update the project to a new version. # may have problems with git when you update the project to a new version. It's
# good practice to change the filename, for example by prepending your initials.
# This helps distinguish the files you are working with e.g. in a list of
# recent files. For example if your name is Honjo Tasuku, your edited
# BIN-Sequence.R might be named HT-BIN-Sequence.R
# DO NOT SIMPLY source() THESE FILES!
# If there are portions you don't understand, use R's help system, Google for an
# answer, or ask your instructor. Don't continue if you don't understand what's
# going on. That's not how it works ...
#
# While this file itself should not be edited by you this is YOUR project
# directory, and files that you create (notes etc.) will not be harmed when you
# pull updated version of the master, or other new files, from github.
#
# If you pull from github and get the following type of error ... # If you pull from github and get the following type of error ...
# --------------- # ---------------
# error: Your local changes to the following files would be # error: Your local changes to the following files would be
@ -41,8 +42,11 @@
# ... then, you need to bring the offending file into its original state. # ... then, you need to bring the offending file into its original state.
# Open the Commit window, select the file, and click on the Revert button. # Open the Commit window, select the file, and click on the Revert button.
# #
# Of course, you can save a local copy under a different name before you revert, # When working with these script DO NOT SIMPLY source() THESE FILES!
# in case you want to keep your changes.
# If there are portions you don't understand, use R's help system, Google for an
# answer, or ask your instructor. Don't continue if you don't understand what's
# going on. That's not how it works ...
# #
# #
# ============================================================================== # ==============================================================================

View File

@ -1,4 +1,10 @@
# BIN-ALI-BLAST.R # tocID <- "BIN-ALI-BLAST.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the BIN-ALI-BLAST unit. # R code accompanying the BIN-ALI-BLAST unit.

View File

@ -1,4 +1,10 @@
# BIN-ALI-Dotplot.R # tocID <- "BIN-ALI-Dotplot.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the BIN-ALI-Dotplot unit. # R code accompanying the BIN-ALI-Dotplot unit.

View File

@ -1,4 +1,10 @@
# BIN-ALI-MSA.R # tocID <- "BIN-ALI-MSA.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the BIN-ALI-MSA unit. # R code accompanying the BIN-ALI-MSA unit.

View File

@ -1,4 +1,10 @@
# BIN-ALI-Optimal_sequence_alignment.R # tocID <- "BIN-ALI-Optimal_sequence_alignment.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the BIN-ALI-Optimal_sequence_alignment unit. # R code accompanying the BIN-ALI-Optimal_sequence_alignment unit.

View File

@ -1,4 +1,10 @@
# BIN-ALI-Similarity.R # tocID <- "BIN-ALI-Similarity.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the BIN-ALI-Similarity unit. # R code accompanying the BIN-ALI-Similarity unit.

View File

@ -1,4 +1,10 @@
# BIN-Data_integration.R # tocID <- "BIN-Data_integration.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the BIN-Data_integration unit. # R code accompanying the BIN-Data_integration unit.

View File

@ -1,4 +1,10 @@
# BIN-FUNC-Domain_annotation.R # tocID <- "BIN-FUNC-Domain_annotation.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the BIN-FUNC-Domain_annotation unit. # R code accompanying the BIN-FUNC-Domain_annotation unit.

View File

@ -1,4 +1,10 @@
# BIN-FUNC_Semantic_similarity.R # tocID <- "BIN-FUNC_Semantic_similarity.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the BIN-FUNC_Semantic_similarity unit. # R code accompanying the BIN-FUNC_Semantic_similarity unit.
@ -158,27 +164,9 @@ myEnr <- GOenrichment(mySet, allGenes)
sort(myEnr$p.values) # Any significantly enriched terms? All of these are ... sort(myEnr$p.values) # Any significantly enriched terms? All of these are ...
#Yes: most significantly enriched is GO:0071931. What is this? #Most significantly enriched is GO:0071931. What is this?
getGOTerm("GO:0071931") # ... makes sense. annotate::getGOTerm("GO:0071931") # ... makes sense.
(fullSet <- myEnr$genes$`GO:0071931`) # What genes are annotated to this term?
intersect(mySet, fullSet) # These are in both sets
setdiff(mySet, fullSet) # These mySet members are not annotated to that term
setdiff(fullSet, mySet) # These are annotated to that term but not in mySet.
# ... that's the most interesting set. From a set of
# genes we have identified a function that they
# share, and that shared function has allowed us
# to identify
# What are these genes?
# Select annotations from the annotation database:
AnnotationDbi::select(org.Sc.sgd.db,
keys = setdiff(fullSet, mySet),
columns = c("COMMON", "DESCRIPTION"))
# Note that these annotations are partially redundant to several different
# aliases of the same three genes.

View File

@ -1,4 +1,10 @@
# BIN-MYSPE.R # tocID <- "BIN-MYSPE.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the BIN-MYSPE unit # R code accompanying the BIN-MYSPE unit

View File

@ -1,4 +1,10 @@
# BIN-PHYLO-Data_preparation.R # tocID <- "BIN-PHYLO-Data_preparation.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the BIN-PHYLO-Data_preparation unit. # R code accompanying the BIN-PHYLO-Data_preparation unit.

View File

@ -1,4 +1,10 @@
# BIN-PHYLO-Tree_analysis.R # tocID <- "BIN-PHYLO-Tree_analysis.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the BIN-PHYLO-Tree_analysis unit. # R code accompanying the BIN-PHYLO-Tree_analysis unit.

View File

@ -1,4 +1,10 @@
# BIN-PHYLO-Tree_building.R # tocID <- "BIN-PHYLO-Tree_building.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the BIN-PHYLO-Tree_building unit. # R code accompanying the BIN-PHYLO-Tree_building unit.

View File

@ -1,4 +1,10 @@
# BIN-PPI-Analysis.R # tocID <- "BIN-PPI-Analysis.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the BIN-PPI-Analysis unit. # R code accompanying the BIN-PPI-Analysis unit.

View File

@ -1,4 +1,10 @@
# BIN-SEQA-Composition.R # tocID <- "BIN-SEQA-Composition.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the BIN-SEQA-Comparison unit # R code accompanying the BIN-SEQA-Comparison unit

View File

@ -1,4 +1,10 @@
# BIN-Sequence.R # tocID <- "BIN-Sequence.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the BIN-Sequence unit. # R code accompanying the BIN-Sequence unit.

View File

@ -1,4 +1,10 @@
# BIN-Storing_data.R # tocID <- "BIN-Storing_data.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the BIN-Storing_data unit # R code accompanying the BIN-Storing_data unit

View File

@ -1,4 +1,10 @@
# FND-Genetic_code.R # tocID <- "FND-Genetic_code.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the FND-Genetic_code unit. # R code accompanying the FND-Genetic_code unit.

View File

@ -1,4 +1,10 @@
# FND-MAT-Graphs_and_networks.R # tocID <- "FND-MAT-Graphs_and_networks.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the FND-MAT-Graphs_and_networks unit. # R code accompanying the FND-MAT-Graphs_and_networks unit.
@ -280,7 +286,7 @@ plot(GBA,
vertex.color=heat.colors(max(igraph::degree(GBA)+1))[igraph::degree(GBA)+1], vertex.color=heat.colors(max(igraph::degree(GBA)+1))[igraph::degree(GBA)+1],
vertex.size = 200 + (30 * igraph::degree(GBA)), vertex.size = 200 + (30 * igraph::degree(GBA)),
vertex.label = NA) vertex.label = NA)
par(oPar) # restore grphics state par(oPar) # restore graphics state
# This is a very obviously different graph! Some biological networks have # This is a very obviously different graph! Some biological networks have
# features that look like that - but in my experience the hub nodes are usually # features that look like that - but in my experience the hub nodes are usually

View File

@ -1,14 +1,21 @@
# FND-STA-Information_theory.R # tocID <- "FND-STA-Information_theory.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the FND-STA-Information_theory unit. # R code accompanying the FND-STA-Information_theory unit.
# #
# Version: 0.2 # Version: 0.2.1
# #
# Date: 2017 MM DD # Date: 2017 - 2019
# Author: Boris Steipe (boris.steipe@utoronto.ca) # Author: Boris Steipe (boris.steipe@utoronto.ca)
# #
# Versions: # Versions:
# 0.2.1 Maintenance
# 0.2 Under development # 0.2 Under development
# 0.1 First code copied from 2016 material. # 0.1 First code copied from 2016 material.
# #
@ -58,11 +65,33 @@ AAref["Y"] <- 0.0294
sum(AAref) sum(AAref)
# Function to calculate Shannon entropy # Function to calculate Shannon entropy
H <- function(v) { H <- function(pmf) {
# Shannon entropy (bits) # Calculate Shannon entropy
return(-sum(v * (log(v) / log(2)))) # Parameters:
# pmf (numeric) probability mass function: a vector of states and
# associated probabilities. Each element of
# pmf must be in (0, 1] and sum(pmf) must be 1.
# Value:
# Shannon entropy in bits.
# Examples:
# H(c(A=0.25, C=0.25, G=0.25, T=0.25)) # 2 bits entropy in a random
# # nucleotide sequence
# H(1) # If all elements are the same, entropy is zero
#
if (any(pmf <= 0 | pmf > 1) || isFALSE(all.equal(1.0, sum(pmf)))) {
stop("Input is not a discrete probability distribution.")
}
H <- -sum(pmf * (log(pmf) / log(2)))
return(H)
} }
# Why use all.equal()? Exact comparisons with floating point numbers are
# brittle. Consider for example:
1/6 + 1/6 + 1/6 + 1/6 + 1/6 + 1/6 == 1
print(1/6 + 1/6 + 1/6 + 1/6 + 1/6 + 1/6, digits = 22) # 0.9999999999999998889777
# all.equal() tests for _near_ equality with tolerance of ~ 1.5e-8
# Entropy of the database frequencies (in bits): # Entropy of the database frequencies (in bits):
(Href <- H(AAref)) (Href <- H(AAref))

View File

@ -1,4 +1,10 @@
# FND-STA-Probability_distribution.R # tocID <- "FND-STA-Probability_distribution.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the FND-STA-Probability_distribution unit. # R code accompanying the FND-STA-Probability_distribution unit.

View File

@ -1,4 +1,10 @@
# FND-STA-Significance.R # tocID <- "FND-STA-Significance.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the FND-STA-Significance unit. # R code accompanying the FND-STA-Significance unit.

View File

@ -1,2 +1,4 @@
# ABC-units # ABC-units
A Bioinformatics Course: R modules for learning units A Bioinformatics Course: R modules for learning units
Follow the instructions in the learning unit to install your local copy of this R-project.

View File

@ -1,4 +1,10 @@
# RPR-Biostrings.R # tocID <- "RPR-Biostrings.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the RPR-Biostrings unit. # R code accompanying the RPR-Biostrings unit.

View File

@ -1,4 +1,10 @@
# RPR-FASTA.R # tocID <- "RPR-FASTA.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the RPR-FASTA unit. # R code accompanying the RPR-FASTA unit.

View File

@ -1,4 +1,10 @@
# RPR_GEO2R.R # tocID <- "RPR_GEO2R.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the RPR_GEO2R unit. # R code accompanying the RPR_GEO2R unit.

View File

@ -1,4 +1,10 @@
# RPR-Genetic_code_optimality.R # tocID <- "RPR-Genetic_code_optimality.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the RPR-Genetic_code_optimality unit. # R code accompanying the RPR-Genetic_code_optimality unit.

View File

@ -1,4 +1,10 @@
# RPR-Introduction.R # tocID <- "RPR-Introduction.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the RPR-Introduction unit # R code accompanying the RPR-Introduction unit

View File

@ -1,4 +1,10 @@
# RPR-PROSITE_POST.R # tocID <- "RPR-PROSITE_POST.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the RPR-Scripting_data_downloads unit. # R code accompanying the RPR-Scripting_data_downloads unit.

View File

@ -1,4 +1,10 @@
# RPR-RegEx.R # tocID <- "RPR-RegEx.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the RPR-RegEx unit # R code accompanying the RPR-RegEx unit

View File

@ -1,4 +1,10 @@
# RPR-SX-PDB.R # tocID <- "RPR-SX-PDB.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the RPR-SX-PDB unit. # R code accompanying the RPR-SX-PDB unit.

View File

@ -1,4 +1,10 @@
# RPR-UniProt_GET.R # tocID <- "RPR-UniProt_GET.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the RPR-Scripting_data_downloads unit. # R code accompanying the RPR-Scripting_data_downloads unit.

View File

@ -1,4 +1,10 @@
# RPR-Unit_testing.R # tocID <- "RPR-Unit_testing.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the RPR-Unit_testing unit. # R code accompanying the RPR-Unit_testing unit.
@ -29,10 +35,10 @@
#TOC> #TOC>
#TOC> Section Title Line #TOC> Section Title Line
#TOC> ------------------------------------------------- #TOC> -------------------------------------------------
#TOC> 1 Unit Tests with testthat 40 #TOC> 1 Unit Tests with testthat 46
#TOC> 2 Organizing your tests 159 #TOC> 2 Organizing your tests 165
#TOC> 2.1 Testing scripts 183 #TOC> 2.1 Testing scripts 189
#TOC> 3 Task solutions 198 #TOC> 3 Task solutions 204
#TOC> #TOC>
#TOC> ========================================================================== #TOC> ==========================================================================

View File

@ -1,4 +1,10 @@
# RPR-eUtils_and_XML.R # tocID <- "RPR-eUtils_and_XML.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
# #
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the RPR-Scripting_data_downloads unit. # R code accompanying the RPR-Scripting_data_downloads unit.

View File

@ -8,7 +8,7 @@
# http://steipe.biochemistry.utoronto.ca/abc/index.php/Reference_species_for_fungi # http://steipe.biochemistry.utoronto.ca/abc/index.php/Reference_species_for_fungi
# #
# For the data model, see # For the data model, see
# https://docs.google.com/drawings/d/1uupNvz18_FYFwyyVPebTM0CUxcJCPDQuxuIJGpjWQWg # https://docs.google.com/presentation/d/13vWaVcFpWEOGeSNhwmqugj2qTQuH1eZROgxWdHGEMr0
# For the schema, see dbInit() in ./scripts/ABC-dbUtilities.R # For the schema, see dbInit() in ./scripts/ABC-dbUtilities.R
# #
# ============================================================================== # ==============================================================================

View File

@ -1,12 +1,35 @@
# ABC-dbUtilities.R # tocID <- "scripts/ABC-dbUtilities.R"
#
# database utilities for ABC learning units # database utilities for ABC learning units
# #
# ============================================================================== # ==============================================================================
#
# ====== PACKAGES ============================================================== #TOC> ==========================================================================
#TOC>
#TOC> Section Title Line
#TOC> -------------------------------------------------
#TOC> 1 PACKAGES 32
#TOC> 2 FUNCTIONS 50
#TOC> 2.01 dbSanitizeSequence() 53
#TOC> 2.02 dbConfirmUnique() 88
#TOC> 2.03 dbInit() 106
#TOC> 2.04 dbAutoincrement() 147
#TOC> 2.05 dbAddProtein() 160
#TOC> 2.06 dbAddFeature() 180
#TOC> 2.07 dbAddTaxonomy() 199
#TOC> 2.08 dbAddAnnotation() 215
#TOC> 2.09 dbFetchUniProtSeq() 243
#TOC> 2.10 dbFetchPrositeFeatures() 267
#TOC> 2.11 node2text() 311
#TOC> 2.12 dbFetchNCBItaxData() 323
#TOC> 2.13 UniProtIDmap() 362
#TOC> 3 TESTS 399
#TOC>
#TOC> ==========================================================================
# = 1 PACKAGES ============================================================
if (! requireNamespace("jsonlite", quietly = TRUE)) { if (! requireNamespace("jsonlite", quietly = TRUE)) {
@ -24,9 +47,10 @@ if (! requireNamespace("xml2", quietly = TRUE)) {
} }
# ====== FUNCTIONS ============================================================= # = 2 FUNCTIONS ===========================================================
# == 2.01 dbSanitizeSequence() =============================================
dbSanitizeSequence <- function(s, unambiguous = TRUE) { dbSanitizeSequence <- function(s, unambiguous = TRUE) {
# Remove FASTA header lines, if any, # Remove FASTA header lines, if any,
# flatten any structure that s has, # flatten any structure that s has,
@ -61,6 +85,7 @@ dbSanitizeSequence <- function(s, unambiguous = TRUE) {
} }
# == 2.02 dbConfirmUnique() ================================================
dbConfirmUnique <- function(x) { dbConfirmUnique <- function(x) {
# x is a vector of logicals. # x is a vector of logicals.
# returns x if x has exactly one TRUE element. # returns x if x has exactly one TRUE element.
@ -78,24 +103,27 @@ dbConfirmUnique <- function(x) {
} }
# == 2.03 dbInit() =========================================================
dbInit <- function() { dbInit <- function() {
# Return an empty instance of the protein database # Return an empty instance of the protein database
# Open the link and study the schema:
# https://docs.google.com/presentation/d/13vWaVcFpWEOGeSNhwmqugj2qTQuH1eZROgxWdHGEMr0
db <- list() db <- list()
db$version <- "1.0"
db$protein <- data.frame( db$protein <- data.frame(
ID = numeric(), ID = numeric(),
name = character(), name = character(),
RefSeqID = character(), RefSeqID = character(),
UniProtID = character(), UniProtID = character(),
taxonomyID = numeric(), taxonomyID = numeric(),
sequence = character(), sequence = character())
stringsAsFactors = FALSE)
db$taxonomy <- data.frame( db$taxonomy <- data.frame(
ID = numeric(), ID = numeric(),
species = character(), species = character())
stringsAsFactors = FALSE)
db$annotation <- data.frame( db$annotation <- data.frame(
@ -103,21 +131,20 @@ dbInit <- function() {
proteinID = numeric(), proteinID = numeric(),
featureID = numeric(), featureID = numeric(),
start = numeric(), start = numeric(),
end = numeric(), end = numeric())
stringsAsFactors = FALSE)
db$feature <- data.frame( db$feature <- data.frame(
ID = numeric(), ID = numeric(),
name = character(), name = character(),
description = character(), description = character(),
sourceDB = character(), sourceDB = character(),
accession = character(), accession = character())
stringsAsFactors = FALSE)
return(db) return(db)
} }
# == 2.04 dbAutoincrement() ================================================
dbAutoincrement <- function(tb) { dbAutoincrement <- function(tb) {
# Return a unique integer that can be used as a primary key # Return a unique integer that can be used as a primary key
# Value: # Value:
@ -130,6 +157,7 @@ dbAutoincrement <- function(tb) {
} }
# == 2.05 dbAddProtein() ===================================================
dbAddProtein <- function(db, jsonDF) { dbAddProtein <- function(db, jsonDF) {
# Add one or more protein entries to the database db. # Add one or more protein entries to the database db.
# Parameters: # Parameters:
@ -142,14 +170,14 @@ dbAddProtein <- function(db, jsonDF) {
RefSeqID = jsonDF$RefSeqID[i], RefSeqID = jsonDF$RefSeqID[i],
UniProtID = jsonDF$UniProtID[i], UniProtID = jsonDF$UniProtID[i],
taxonomyID = jsonDF$taxonomyID[i], taxonomyID = jsonDF$taxonomyID[i],
sequence = dbSanitizeSequence(jsonDF$sequence[i]), sequence = dbSanitizeSequence(jsonDF$sequence[i]))
stringsAsFactors = FALSE)
db$protein <- rbind(db$protein, x) db$protein <- rbind(db$protein, x)
} }
return(db) return(db)
} }
# == 2.06 dbAddFeature() ===================================================
dbAddFeature <- function(db, jsonDF) { dbAddFeature <- function(db, jsonDF) {
# Add one or more feature entries to the database db. # Add one or more feature entries to the database db.
# Parameters: # Parameters:
@ -161,14 +189,14 @@ dbAddFeature <- function(db, jsonDF) {
name = jsonDF$name[i], name = jsonDF$name[i],
description = jsonDF$description[i], description = jsonDF$description[i],
sourceDB = jsonDF$sourceDB[i], sourceDB = jsonDF$sourceDB[i],
accession = jsonDF$accession[i], accession = jsonDF$accession[i])
stringsAsFactors = FALSE)
db$feature <- rbind(db$feature, x) db$feature <- rbind(db$feature, x)
} }
return(db) return(db)
} }
# == 2.07 dbAddTaxonomy() ==================================================
dbAddTaxonomy <- function(db, jsonDF) { dbAddTaxonomy <- function(db, jsonDF) {
# Add one or more taxonomy entries to the database db. # Add one or more taxonomy entries to the database db.
# Parameters: # Parameters:
@ -178,13 +206,13 @@ dbAddTaxonomy <- function(db, jsonDF) {
for (i in seq_len(nrow(jsonDF))) { for (i in seq_len(nrow(jsonDF))) {
x <- data.frame( x <- data.frame(
ID = jsonDF$ID[i], ID = jsonDF$ID[i],
species = jsonDF$species[i], species = jsonDF$species[i])
stringsAsFactors = FALSE)
db$taxonomy <- rbind(db$taxonomy, x) db$taxonomy <- rbind(db$taxonomy, x)
} }
return(db) return(db)
} }
# == 2.08 dbAddAnnotation() ================================================
dbAddAnnotation <- function(db, jsonDF) { dbAddAnnotation <- function(db, jsonDF) {
# Add one or more annotation entries to the database db. # Add one or more annotation entries to the database db.
# Parameters: # Parameters:
@ -205,14 +233,14 @@ dbAddAnnotation <- function(db, jsonDF) {
proteinID = pID, proteinID = pID,
featureID = fID, featureID = fID,
start = as.integer(jsonDF$start[i]), start = as.integer(jsonDF$start[i]),
end = as.integer(jsonDF$end[i]), end = as.integer(jsonDF$end[i]))
stringsAsFactors = FALSE)
db$annotation <- rbind(db$annotation, x) db$annotation <- rbind(db$annotation, x)
} }
return(db) return(db)
} }
# == 2.09 dbFetchUniProtSeq() ==============================================
dbFetchUniProtSeq <- function(ID) { dbFetchUniProtSeq <- function(ID) {
# Fetch a protein sequence from UniProt. # Fetch a protein sequence from UniProt.
# Parameters: # Parameters:
@ -236,6 +264,7 @@ dbFetchUniProtSeq <- function(ID) {
} }
# == 2.10 dbFetchPrositeFeatures() =========================================
dbFetchPrositeFeatures <- function(ID) { dbFetchPrositeFeatures <- function(ID) {
# Fetch feature annotations from ScanProsite. # Fetch feature annotations from ScanProsite.
# Parameters: # Parameters:
@ -272,14 +301,14 @@ dbFetchPrositeFeatures <- function(ID) {
start = as.numeric(tokens[4]), start = as.numeric(tokens[4]),
end = as.numeric(tokens[5]), end = as.numeric(tokens[5]),
psID = tokens[6], psID = tokens[6],
psName = tokens[7], psName = tokens[7]))
stringsAsFactors = FALSE))
} }
} }
return(myFeatures) return(myFeatures)
} }
# == 2.11 node2text() ======================================================
node2text <- function(doc, tag) { node2text <- function(doc, tag) {
# an extractor function for the contents of elements # an extractor function for the contents of elements
# between given tags in an XML response. # between given tags in an XML response.
@ -291,6 +320,7 @@ node2text <- function(doc, tag) {
} }
# == 2.12 dbFetchNCBItaxData() =============================================
dbFetchNCBItaxData <- function(ID) { dbFetchNCBItaxData <- function(ID) {
# Fetch feature taxID and Organism from the NCBI. # Fetch feature taxID and Organism from the NCBI.
# Parameters: # Parameters:
@ -329,6 +359,7 @@ dbFetchNCBItaxData <- function(ID) {
# == 2.13 UniProtIDmap() ===================================================
UniProtIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") { UniProtIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
# Use UniProt ID mapping service to map one or more IDs # Use UniProt ID mapping service to map one or more IDs
# Parameters: # Parameters:
@ -351,8 +382,7 @@ UniProtIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
if (httr::status_code(response) == 200) { # 200: oK if (httr::status_code(response) == 200) { # 200: oK
myMap <- read.delim(file = textConnection(httr::content(response)), myMap <- read.delim(file = textConnection(httr::content(response)),
sep = "\t", sep = "\t")
stringsAsFactors = FALSE)
myMap <- myMap[ , c(1,3)] myMap <- myMap[ , c(1,3)]
colnames(myMap) <- c("From", "To") colnames(myMap) <- c("From", "To")
} else { } else {
@ -366,7 +396,7 @@ UniProtIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
} }
# ====== TESTS ================================================================= # = 3 TESTS ===============================================================
if (FALSE) { if (FALSE) {
if (! requireNamespace("testthat", quietly = TRUE)) { if (! requireNamespace("testthat", quietly = TRUE)) {

View File

@ -1,4 +1,4 @@
# ABC-makeScCCnet.R # tocID <- "scripts/ABC-makeScCCnet.R"
# #
# Create a subnetwork of high-confidence yeast genes with a "mitotic cell cycle" # Create a subnetwork of high-confidence yeast genes with a "mitotic cell cycle"
# GOSlim annotation. # GOSlim annotation.

View File

@ -1,4 +1,4 @@
# ABC-writeALN.R # tocID <- "scripts/ABC-writeALN.R"
# #
# ToDo: calculate consensus line # ToDo: calculate consensus line
# append sequence numbers # append sequence numbers

View File

@ -40,7 +40,7 @@ writeMFA <- function(ali,
if (is.na(blockWidth)) { if (is.na(blockWidth)) {
stop("PANIC: parameter \"blockWidth\" must be numeric.") stop("PANIC: parameter \"blockWidth\" must be numeric.")
} }
if (blockWidth < 1){ if (! blockWidth > 0){
stop("PANIC: parameter \"blockWidth\" must be greater than zero.") stop("PANIC: parameter \"blockWidth\" must be greater than zero.")
} }
@ -105,7 +105,7 @@ writeMFA <- function(ali,
txt <- c(txt, "") # append an empty line for readability txt <- c(txt, "") # append an empty line for readability
} }
writeLines(txt, con= myCon) writeLines(txt, con = myCon)
} }

View File

@ -357,20 +357,23 @@ parseBLASTalignment <- function(hit) {
# ==== TESTS =================================================================== # ==== TESTS ===================================================================
# define query: if (FALSE) {
# q <- paste("IYSARYSGVDVYEFIHSTGSIMKRKKDDWVNATHI", # Mbp1 APSES domain # define query:
# "LKAANFAKAKRTRILEKEVLKETHEKVQGGFGKYQ", q <- paste("IYSARYSGVDVYEFIHSTGSIMKRKKDDWVNATHI", # Mbp1 APSES domain
# "GTWVPLNIAKQLAEKFSVYDQLKPLFDFTQTDGSASP", "LKAANFAKAKRTRILEKEVLKETHEKVQGGFGKYQ",
# sep="") "GTWVPLNIAKQLAEKFSVYDQLKPLFDFTQTDGSASP",
# or ... sep="")
# q <- "NP_010227" # refseq ID # or ...
# q <- "NP_010227" # refseq ID
# test <- BLAST(q,
# nHits = 100, test <- BLAST(q,
# E = 0.001, nHits = 100,
# rid = "", E = 0.001,
# limits = "txid4751[ORGN]") rid = "",
# length(test$hits) limits = "txid4751[ORGN]")
str(test)
length(test$hits)
}
# [END] # [END]