235 lines
8.8 KiB
R
235 lines
8.8 KiB
R
# tocID <- "RPR-Unit_testing.R"
|
|
#
|
|
# Purpose: A Bioinformatics Course:
|
|
# R code accompanying the RPR-Unit_testing unit.
|
|
#
|
|
# Version: 1.2
|
|
#
|
|
# Date: 2017 10 - 2019 01
|
|
# Author: Boris Steipe (boris.steipe@utoronto.ca)
|
|
#
|
|
# Versions:
|
|
# 1.2 2020 Updates. Discuss local tests.
|
|
# 1.1 Change from require() to requireNamespace()
|
|
# 1.0 New code
|
|
#
|
|
#
|
|
# TODO:
|
|
#
|
|
#
|
|
# == DO NOT SIMPLY source() THIS FILE! =======================================
|
|
#
|
|
# If there are portions you don't understand, use R's help system, Google for an
|
|
# answer, or ask your instructor. Don't continue if you don't understand what's
|
|
# going on. That's not how it works ...
|
|
#
|
|
# ==============================================================================
|
|
|
|
|
|
#TOC> ==========================================================================
|
|
#TOC>
|
|
#TOC> Section Title Line
|
|
#TOC> -------------------------------------------------
|
|
#TOC> 1 Unit Tests with testthat 42
|
|
#TOC> 2 Organizing your tests 165
|
|
#TOC> 2.1 Testing scripts 189
|
|
#TOC> 2.2 Rethinking testing 202
|
|
#TOC> 3 Task solutions 220
|
|
#TOC>
|
|
#TOC> ==========================================================================
|
|
|
|
|
|
# = 1 Unit Tests with testthat ============================================
|
|
|
|
# The testthat package supports writing and executing unit tests in many ways.
|
|
|
|
if (! requireNamespace("testthat", quietly = TRUE)) {
|
|
install.packages("testthat")
|
|
}
|
|
# Package information:
|
|
# library(help = testthat) # basic information
|
|
# browseVignettes("testthat") # available vignettes
|
|
# data(package = "testthat") # available datasets
|
|
|
|
# testthat is one of those packages that we either use A LOT in a script,
|
|
# or not at all. Therefore it's more reasonable to depart from our usual
|
|
# <package>::<function>() idiom, and load the entire library. In fact, if
|
|
# we author packages, it is common practice to load testthat in the part
|
|
# of the package that automates testing.
|
|
|
|
library(testthat)
|
|
|
|
# An atomic test consists of an expectation about the bahaviour of a function or
|
|
# the existence of an object. testthat provides a number of useful expectations:
|
|
|
|
# At the most basic level, you can use expect_true() and expect_false():
|
|
|
|
expect_true(file.exists("./data/S288C_YDL056W_MBP1_coding.fsa"))
|
|
expect_true(file.exists("NO-SUCH-FILE.txt"))
|
|
|
|
expect_false(is.integer(NA))
|
|
|
|
# More commonly, you will test for equality of an output with a given result.
|
|
# But you need to consider what it means for two numbers to be "equal" on a
|
|
# digital computer. Consider:
|
|
|
|
49*(1/49) == 1 # Surprised? Read FAQ 7.31
|
|
# https://cran.r-project.org/doc/FAQ/R-FAQ.html
|
|
49*(1/49) - 1 # NOT zero (but almost)
|
|
|
|
# This is really unpredictable ...
|
|
0.1 + 0.05 == 0.15
|
|
0.2 + 0.07 == 0.27
|
|
|
|
# It's easy to be caught on the wrong foot with numeric comparisons, therefore
|
|
# R uses the function all.equal() to test whether two numbers are equal for
|
|
# practical puposes up to machine precision.
|
|
49*(1/49) == 1
|
|
all.equal(49*(1/49), 1)
|
|
|
|
# The testthat function expect_equal() uses all.equal internally:
|
|
expect_equal(49*(1/49), 1)
|
|
|
|
# ... which is reasonable, or, if things MUST be exactly the same ...
|
|
expect_identical(49*(1/49), 1)
|
|
|
|
# ... but consider:
|
|
expect_identical(2, 2L) # one is typeof() "double", the other is integer"
|
|
|
|
# Some very useful expectations are expect_warning(), and expect_error(), for
|
|
# constructing tests that check for erroneous output:
|
|
|
|
as.integer(c("1", "2", "three"))
|
|
expect_warning(as.integer(c("1", "2", "three"))) # Note that the warning is NOT
|
|
# printed.
|
|
1/"x"
|
|
expect_warning(1/"x")
|
|
expect_error(1/"x") # Again: note that the error is NOT printed, as well
|
|
# code execution will continue.
|
|
|
|
# Even better, you can check if the warning or error is what you expect it
|
|
# to be - because it could actually have occured somewhere else in your code.
|
|
|
|
v <- c("1", "x")
|
|
log(v[1:2])
|
|
expect_error(log(v[1:2]), "non-numeric argument to mathematical function")
|
|
expect_error(log(v[1:2]), "non-numeric") # We can abbreviate the error message.
|
|
expect_error(log(v[1,2])) # This appears oK, but ...
|
|
expect_error(log(v[1,2]), "non-numeric") # ... it's actually a different error!
|
|
|
|
# Producing unit tests simply means: we define a function, and then we check
|
|
# whether all test pass. Consider a function that is loaded on startup from
|
|
# the .utilities.R script:
|
|
|
|
biCode
|
|
|
|
# We could test it like so:
|
|
|
|
expect_equal(biCode(""), ".....")
|
|
expect_equal(biCode(" "), ".....")
|
|
expect_equal(biCode("123 12"), ".....")
|
|
expect_equal(biCode("h sapiens"), "H..SA")
|
|
expect_equal(biCode("homo sapiens"), "HOMSA")
|
|
expect_equal(biCode("[homo sapiens neanderthaliensis]"), "HOMSA")
|
|
expect_equal(biCode(c("Phascolarctos cinereus", "Macropus rufus")),
|
|
c("PHACI", "MACRU"))
|
|
expect_error(biCode(), "argument \"s\" is missing, with no default")
|
|
|
|
# The test_that() function allows to group related tests, include an informative
|
|
# message which test is being executed, and run a number of tests that are
|
|
# passed to the function inside a code block - i.e. {...}
|
|
# test_that("<descriptive string>, {<code block>})
|
|
|
|
test_that("NA values are preserved", {
|
|
# bicode() respects vector length: input and output must have the smae length.
|
|
# Therefore NA's can't be simply skipped, bust must be properly passed
|
|
# into output:
|
|
expect_true(is.na((biCode(NA))))
|
|
expect_equal(biCode(c("first", NA, "last")),
|
|
c("FIRST", NA, "LAST."))
|
|
})
|
|
|
|
|
|
# Task: Write a function calcGC() that calculates GC content in a sequence.
|
|
# Hint: you could strsplit() the sequence into a vector, and count
|
|
# G's and C's; or you could use gsub("[AT]", "", <sequence>) to remove
|
|
# A's and T's, and use nchar() before and after to calculate the content
|
|
# from the length difference.
|
|
# Then write tests that:
|
|
# confirm that calcGC("AATT") is 0;
|
|
# confirm that calcGC("ATGC") is 0.5;
|
|
# confirm that calcGC("AC") is 0.5;
|
|
# confirm that calcGC("CGCG") is 1;
|
|
|
|
|
|
# = 2 Organizing your tests ===============================================
|
|
|
|
|
|
# Tests are only useful if they are actually executed and we need to make sure
|
|
# there are no barriers to do that. The testthat package supports automatic
|
|
# execution of tests:
|
|
# - put your tests into an R-script,
|
|
# - save your tests in a file called "test_<my-function-name>.R"
|
|
# - execute the test with test_file("test_<my-function-name>.R") ...
|
|
# ... or, if you are working on a project ...
|
|
# - place the file in a test-directory (e.g. the directory "test" in this
|
|
# project),
|
|
# - execute all your tests with test_dir("<my-test-directory>")
|
|
|
|
# For example I have provided a "tests" directory with this project, and
|
|
# placed the file "test_biCode.R" inside.
|
|
file.show("./tests/test_biCode.R")
|
|
|
|
# Execute the file ...
|
|
test_file("./tests/test_biCode.R")
|
|
|
|
# .. or execute all the test files in the directory:
|
|
test_dir("./tests")
|
|
|
|
# == 2.1 Testing scripts ===================================================
|
|
|
|
# Scripts need special consideration since we do not necessarily source() them
|
|
# entirely. Therefore automated testing is not reasonable. What you can do
|
|
# instead is to place a conditional block at the end of your script, that
|
|
# never gets executed - then you can manually execute the code in the block
|
|
# whenever you wish to test your functions. For example:
|
|
|
|
if (FALSE) {
|
|
# ... your tests go here
|
|
|
|
}
|
|
|
|
# == 2.2 Rethinking testing ================================================
|
|
|
|
# However, it is important to keep in mind that different objectives lead to
|
|
# different ideas of what works best. There is never a "best" in and of itself,
|
|
# the question is always: "Best for what?" While automated unit testing is a
|
|
# great way to assure the integrity of packages and larger software artefacts as
|
|
# they are being developed, more loosely conceived aggregates of code - like the
|
|
# scripts for this course for example - have different objectives and in this
|
|
# case I find the testthat approach to actually be inferior. The reason is its
|
|
# tendency to physically separate code and tests. Keeping assets, and functions
|
|
# that operate on those assets separated is always poor design. I have found
|
|
# over time that a more stable approach is to move individual functions into
|
|
# their individual scripts, all in one folder, one function (and its helpers)
|
|
# per file, and examples, demos and tests in an if (FALSE) { ... } block, as
|
|
# explained above.
|
|
|
|
|
|
|
|
# = 3 Task solutions ======================================================
|
|
|
|
calcGC <- function(s) {
|
|
s <- gsub("[^agctAGCT]", "", s)
|
|
return(nchar(gsub("[atAT]", "", s)) / nchar(s))
|
|
}
|
|
|
|
expect_equal(calcGC("AATT"), 0)
|
|
expect_equal(calcGC("ATGC"), 0.5)
|
|
expect_equal(calcGC("AC"), 0.5)
|
|
expect_equal(calcGC("CGCG"), 1)
|
|
|
|
|
|
|
|
# [END]
|