add digest:: and jsonlite::, and sameSpecies(),
This commit is contained in:
parent
b1d712891f
commit
cfbfee9dba
85
.utilities.R
85
.utilities.R
@ -6,7 +6,7 @@
|
|||||||
# Date: 2017-09 - 2020-09
|
# Date: 2017-09 - 2020-09
|
||||||
# Author: Boris Steipe
|
# Author: Boris Steipe
|
||||||
#
|
#
|
||||||
# V 1.4 Maintenance
|
# V 1.4 Maintenance, and new validation utilities
|
||||||
# V 1.3.1 prefix Biostrings:: to subseq()
|
# V 1.3.1 prefix Biostrings:: to subseq()
|
||||||
# V 1.3 load msa support functions
|
# V 1.3 load msa support functions
|
||||||
# V 1.2 update database utilities to support 2017 version of JSON sources
|
# V 1.2 update database utilities to support 2017 version of JSON sources
|
||||||
@ -23,19 +23,21 @@
|
|||||||
#TOC>
|
#TOC>
|
||||||
#TOC> Section Title Line
|
#TOC> Section Title Line
|
||||||
#TOC> -----------------------------------------------------------
|
#TOC> -----------------------------------------------------------
|
||||||
#TOC> 1 SCRIPTS TO SOURCE 42
|
#TOC> 1 SCRIPTS TO SOURCE 45
|
||||||
#TOC> 2 SUPPORT FUNCTIONS 49
|
#TOC> 2 PACKAGES 51
|
||||||
#TOC> 2.1 objectInfo() 52
|
#TOC> 3 SUPPORT FUNCTIONS 62
|
||||||
#TOC> 2.2 biCode() 80
|
#TOC> 3.1 objectInfo() 65
|
||||||
#TOC> 2.3 pBar() 114
|
#TOC> 3.2 biCode() 93
|
||||||
#TOC> 2.4 waitTimer() 136
|
#TOC> 3.3 sameSpecies() 127
|
||||||
#TOC> 2.5 fetchMSAmotif() 164
|
#TOC> 3.4 pBar() 146
|
||||||
#TOC> 2.6 H() (Shannon entropy) 208
|
#TOC> 3.5 waitTimer() 168
|
||||||
#TOC> 3 DATA 222
|
#TOC> 3.6 fetchMSAmotif() 196
|
||||||
#TOC> 3.1 REFspecies 224
|
#TOC> 3.7 H() (Shannon entropy) 240
|
||||||
#TOC> 4 FUNCTIONS TO CUSTOMIZE ASSIGNMENTS 239
|
#TOC> 4 DATA 254
|
||||||
#TOC> 4.1 getMYSPE() 242
|
#TOC> 4.1 REFspecies 256
|
||||||
#TOC> 4.2 selectPDBrep() 251
|
#TOC> 5 FUNCTIONS TO CUSTOMIZE ASSIGNMENTS 271
|
||||||
|
#TOC> 5.1 getMYSPE() 274
|
||||||
|
#TOC> 5.2 selectPDBrep() 283
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
@ -46,11 +48,21 @@ source("./scripts/ABC-dbUtilities.R")
|
|||||||
source("./scripts/ABC-writeALN.R")
|
source("./scripts/ABC-writeALN.R")
|
||||||
source("./scripts/ABC-writeMFA.R")
|
source("./scripts/ABC-writeMFA.R")
|
||||||
|
|
||||||
|
# = 2 PACKAGES ============================================================
|
||||||
|
|
||||||
# = 2 SUPPORT FUNCTIONS ===================================================
|
if (! requireNamespace("digest", quietly = TRUE)) {
|
||||||
|
install.packages("digest")
|
||||||
|
}
|
||||||
|
|
||||||
|
if (! requireNamespace("jsonlite", quietly = TRUE)) {
|
||||||
|
install.packages("jsonlite")
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# == 2.1 objectInfo() ======================================================
|
# = 3 SUPPORT FUNCTIONS ===================================================
|
||||||
|
|
||||||
|
|
||||||
|
# == 3.1 objectInfo() ======================================================
|
||||||
objectInfo <- function(x) {
|
objectInfo <- function(x) {
|
||||||
# Function to combine various information items about R objects
|
# Function to combine various information items about R objects
|
||||||
#
|
#
|
||||||
@ -78,7 +90,7 @@ objectInfo <- function(x) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# == 2.2 biCode() ==========================================================
|
# == 3.2 biCode() ==========================================================
|
||||||
biCode <- function(s) {
|
biCode <- function(s) {
|
||||||
# Make a 5 character "biCode" from a binomial name by concatening
|
# Make a 5 character "biCode" from a binomial name by concatening
|
||||||
# the uppercased first three letter of the first word and the first
|
# the uppercased first three letter of the first word and the first
|
||||||
@ -112,8 +124,27 @@ biCode <- function(s) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# == 2.3 pBar() ============================================================
|
# == 3.3 sameSpecies() =====================================================
|
||||||
pBar <- function(i, l, nCh = 50) {
|
sameSpecies <- function(a, b) {
|
||||||
|
# Parameters: a, b two vectors that contain
|
||||||
|
# binomial species names and maybe additional strain information.
|
||||||
|
# Value: a boolean vector, true where the species in a is the same as
|
||||||
|
# the species in b.
|
||||||
|
# Note: the usual vector recycling applies. Length is not checked.
|
||||||
|
a <- gsub("^(\\S+\\s\\S+).*", "\\1", a)
|
||||||
|
b <- gsub("^(\\S+\\s\\S+).*", "\\1", b)
|
||||||
|
if (any(! grepl("^\\S+\\s\\S+$", a))) {
|
||||||
|
stop("\"a\" contains elements that are not binomial names.")
|
||||||
|
}
|
||||||
|
if (any(! grepl("^\\S+\\s\\S+$", b))) {
|
||||||
|
stop("\"b\" contains elements that are not binomial names.")
|
||||||
|
}
|
||||||
|
return(a == b)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# == 3.4 pBar() ============================================================
|
||||||
|
pBar <- function(i, l, nCh = 50) {
|
||||||
# Draw a progress bar in the console
|
# Draw a progress bar in the console
|
||||||
# i: the current iteration
|
# i: the current iteration
|
||||||
# l: the total number of iterations
|
# l: the total number of iterations
|
||||||
@ -134,7 +165,7 @@ pBar <- function(i, l, nCh = 50) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# == 2.4 waitTimer() =======================================================
|
# == 3.5 waitTimer() =======================================================
|
||||||
waitTimer <- function(t, nIntervals = 50) {
|
waitTimer <- function(t, nIntervals = 50) {
|
||||||
# pause and wait for t seconds and display a progress bar as
|
# pause and wait for t seconds and display a progress bar as
|
||||||
# you are waiting
|
# you are waiting
|
||||||
@ -162,7 +193,7 @@ waitTimer <- function(t, nIntervals = 50) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# == 2.5 fetchMSAmotif() ===================================================
|
# == 3.6 fetchMSAmotif() ===================================================
|
||||||
fetchMSAmotif <- function(ali, mot) {
|
fetchMSAmotif <- function(ali, mot) {
|
||||||
# Retrieve a subset from ali that spans the sequence in mot.
|
# Retrieve a subset from ali that spans the sequence in mot.
|
||||||
# Biostrings package must be installed.
|
# Biostrings package must be installed.
|
||||||
@ -206,7 +237,7 @@ fetchMSAmotif <- function(ali, mot) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# == 2.6 H() (Shannon entropy) =============================================
|
# == 3.7 H() (Shannon entropy) =============================================
|
||||||
H <- function(x, N) {
|
H <- function(x, N) {
|
||||||
# calculate the Shannon entropy of the vector x given N possible states
|
# calculate the Shannon entropy of the vector x given N possible states
|
||||||
# (in bits).
|
# (in bits).
|
||||||
@ -220,9 +251,9 @@ H <- function(x, N) {
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
# = 3 DATA ================================================================
|
# = 4 DATA ================================================================
|
||||||
|
|
||||||
# == 3.1 REFspecies ========================================================
|
# == 4.1 REFspecies ========================================================
|
||||||
# 10 species of fungi for reference analysis.
|
# 10 species of fungi for reference analysis.
|
||||||
# http://steipe.biochemistry.utoronto.ca/abc/index.php/Reference_species_for_fungi
|
# http://steipe.biochemistry.utoronto.ca/abc/index.php/Reference_species_for_fungi
|
||||||
REFspecies <- c("Aspergillus nidulans",
|
REFspecies <- c("Aspergillus nidulans",
|
||||||
@ -237,10 +268,10 @@ REFspecies <- c("Aspergillus nidulans",
|
|||||||
"Wallemia mellicola")
|
"Wallemia mellicola")
|
||||||
|
|
||||||
|
|
||||||
# = 4 FUNCTIONS TO CUSTOMIZE ASSIGNMENTS ==================================
|
# = 5 FUNCTIONS TO CUSTOMIZE ASSIGNMENTS ==================================
|
||||||
|
|
||||||
|
|
||||||
# == 4.1 getMYSPE() ========================================================
|
# == 5.1 getMYSPE() ========================================================
|
||||||
getMYSPE <- function(x) {
|
getMYSPE <- function(x) {
|
||||||
dat <- readRDS("./data/sDat.rds")
|
dat <- readRDS("./data/sDat.rds")
|
||||||
map <- readRDS("./data/MYSPEmap.rds")
|
map <- readRDS("./data/MYSPEmap.rds")
|
||||||
@ -249,7 +280,7 @@ getMYSPE <- function(x) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# == 4.2 selectPDBrep() ====================================================
|
# == 5.2 selectPDBrep() ====================================================
|
||||||
selectPDBrep <- function(n, seed) {
|
selectPDBrep <- function(n, seed) {
|
||||||
# Select n PDB IDs from a list of high-resolution, non-homologous, single
|
# Select n PDB IDs from a list of high-resolution, non-homologous, single
|
||||||
# domain, single chain structure files that represent a CATH topology
|
# domain, single chain structure files that represent a CATH topology
|
||||||
|
Loading…
Reference in New Issue
Block a user