# tocID <- "RPR-Unit_testing.R" # # Purpose: A Bioinformatics Course: # R code accompanying the RPR-Unit_testing unit. # # Version: 1.2 # # Date: 2017 10 - 2019 01 # Author: Boris Steipe (boris.steipe@utoronto.ca) # # Versions: # 1.2 2020 Updates. Discuss local tests. # 1.1 Change from require() to requireNamespace() # 1.0 New code # # # TODO: # # # == DO NOT SIMPLY source() THIS FILE! ======================================= # # If there are portions you don't understand, use R's help system, Google for an # answer, or ask your instructor. Don't continue if you don't understand what's # going on. That's not how it works ... # # ============================================================================== #TOC> ========================================================================== #TOC> #TOC> Section Title Line #TOC> ------------------------------------------------- #TOC> 1 Unit Tests with testthat 42 #TOC> 2 Organizing your tests 165 #TOC> 2.1 Testing scripts 189 #TOC> 2.2 Rethinking testing 202 #TOC> 3 Task solutions 220 #TOC> #TOC> ========================================================================== # = 1 Unit Tests with testthat ============================================ # The testthat package supports writing and executing unit tests in many ways. if (! requireNamespace("testthat", quietly = TRUE)) { install.packages("testthat") } # Package information: # library(help = testthat) # basic information # browseVignettes("testthat") # available vignettes # data(package = "testthat") # available datasets # testthat is one of those packages that we either use A LOT in a script, # or not at all. Therefore it's more reasonable to depart from our usual # ::() idiom, and load the entire library. In fact, if # we author packages, it is common practice to load testthat in the part # of the package that automates testing. library(testthat) # An atomic test consists of an expectation about the bahaviour of a function or # the existence of an object. testthat provides a number of useful expectations: # At the most basic level, you can use expect_true() and expect_false(): expect_true(file.exists("./data/S288C_YDL056W_MBP1_coding.fsa")) expect_true(file.exists("NO-SUCH-FILE.txt")) expect_false(is.integer(NA)) # More commonly, you will test for equality of an output with a given result. # But you need to consider what it means for two numbers to be "equal" on a # digital computer. Consider: 49*(1/49) == 1 # Surprised? Read FAQ 7.31 # https://cran.r-project.org/doc/FAQ/R-FAQ.html 49*(1/49) - 1 # NOT zero (but almost) # This is really unpredictable ... 0.1 + 0.05 == 0.15 0.2 + 0.07 == 0.27 # It's easy to be caught on the wrong foot with numeric comparisons, therefore # R uses the function all.equal() to test whether two numbers are equal for # practical puposes up to machine precision. 49*(1/49) == 1 all.equal(49*(1/49), 1) # The testthat function expect_equal() uses all.equal internally: expect_equal(49*(1/49), 1) # ... which is reasonable, or, if things MUST be exactly the same ... expect_identical(49*(1/49), 1) # ... but consider: expect_identical(2, 2L) # one is typeof() "double", the other is integer" # Some very useful expectations are expect_warning(), and expect_error(), for # constructing tests that check for erroneous output: as.integer(c("1", "2", "three")) expect_warning(as.integer(c("1", "2", "three"))) # Note that the warning is NOT # printed. 1/"x" expect_warning(1/"x") expect_error(1/"x") # Again: note that the error is NOT printed, as well # code execution will continue. # Even better, you can check if the warning or error is what you expect it # to be - because it could actually have occured somewhere else in your code. v <- c("1", "x") log(v[1:2]) expect_error(log(v[1:2]), "non-numeric argument to mathematical function") expect_error(log(v[1:2]), "non-numeric") # We can abbreviate the error message. expect_error(log(v[1,2])) # This appears oK, but ... expect_error(log(v[1,2]), "non-numeric") # ... it's actually a different error! # Producing unit tests simply means: we define a function, and then we check # whether all test pass. Consider a function that is loaded on startup from # the .utilities.R script: biCode # We could test it like so: expect_equal(biCode(""), ".....") expect_equal(biCode(" "), ".....") expect_equal(biCode("123 12"), ".....") expect_equal(biCode("h sapiens"), "H..SA") expect_equal(biCode("homo sapiens"), "HOMSA") expect_equal(biCode("[homo sapiens neanderthaliensis]"), "HOMSA") expect_equal(biCode(c("Phascolarctos cinereus", "Macropus rufus")), c("PHACI", "MACRU")) expect_error(biCode(), "argument \"s\" is missing, with no default") # The test_that() function allows to group related tests, include an informative # message which test is being executed, and run a number of tests that are # passed to the function inside a code block - i.e. {...} # test_that(", {}) test_that("NA values are preserved", { # bicode() respects vector length: input and output must have the smae length. # Therefore NA's can't be simply skipped, bust must be properly passed # into output: expect_true(is.na((biCode(NA)))) expect_equal(biCode(c("first", NA, "last")), c("FIRST", NA, "LAST.")) }) # Task: Write a function calcGC() that calculates GC content in a sequence. # Hint: you could strsplit() the sequence into a vector, and count # G's and C's; or you could use gsub("[AT]", "", ) to remove # A's and T's, and use nchar() before and after to calculate the content # from the length difference. # Then write tests that: # confirm that calcGC("AATT") is 0; # confirm that calcGC("ATGC") is 0.5; # confirm that calcGC("AC") is 0.5; # confirm that calcGC("CGCG") is 1; # = 2 Organizing your tests =============================================== # Tests are only useful if they are actually executed and we need to make sure # there are no barriers to do that. The testthat package supports automatic # execution of tests: # - put your tests into an R-script, # - save your tests in a file called "test_.R" # - execute the test with test_file("test_.R") ... # ... or, if you are working on a project ... # - place the file in a test-directory (e.g. the directory "test" in this # project), # - execute all your tests with test_dir("") # For example I have provided a "tests" directory with this project, and # placed the file "test_biCode.R" inside. file.show("./tests/test_biCode.R") # Execute the file ... test_file("./tests/test_biCode.R") # .. or execute all the test files in the directory: test_dir("./tests") # == 2.1 Testing scripts =================================================== # Scripts need special consideration since we do not necessarily source() them # entirely. Therefore automated testing is not reasonable. What you can do # instead is to place a conditional block at the end of your script, that # never gets executed - then you can manually execute the code in the block # whenever you wish to test your functions. For example: if (FALSE) { # ... your tests go here } # == 2.2 Rethinking testing ================================================ # However, it is important to keep in mind that different objectives lead to # different ideas of what works best. There is never a "best" in and of itself, # the question is always: "Best for what?" While automated unit testing is a # great way to assure the integrity of packages and larger software artefacts as # they are being developed, more loosely conceived aggregates of code - like the # scripts for this course for example - have different objectives and in this # case I find the testthat approach to actually be inferior. The reason is its # tendency to physically separate code and tests. Keeping assets, and functions # that operate on those assets separated is always poor design. I have found # over time that a more stable approach is to move individual functions into # their individual scripts, all in one folder, one function (and its helpers) # per file, and examples, demos and tests in an if (FALSE) { ... } block, as # explained above. # = 3 Task solutions ====================================================== calcGC <- function(s) { s <- gsub("[^agctAGCT]", "", s) return(nchar(gsub("[atAT]", "", s)) / nchar(s)) } expect_equal(calcGC("AATT"), 0) expect_equal(calcGC("ATGC"), 0.5) expect_equal(calcGC("AC"), 0.5) expect_equal(calcGC("CGCG"), 1) # [END]