bch441-work-abc-units/BIN-MYSPE.R

143 lines
5.1 KiB
R
Raw Normal View History

# tocID <- "BIN-MYSPE.R"
#
2017-09-12 20:09:20 +00:00
# Purpose: A Bioinformatics Course:
2017-10-04 03:38:48 +00:00
# R code accompanying the BIN-MYSPE unit
2017-09-12 20:09:20 +00:00
#
# Version: 1.1
2017-09-12 20:09:20 +00:00
#
# Date: 2020-09-18
2017-09-12 20:09:20 +00:00
# Author: Boris Steipe (boris.steipe@utoronto.ca)
#
# V 1.1 2020 Workflow changes
2017-10-23 16:37:09 +00:00
# V 1.0.1 Move ABC-makeMYSPElist.R to ./scripts directory
# V 1.0 Final code, after rewriting BLAST parser and updating MYSPElist
2017-10-04 03:38:48 +00:00
# V 0.1 First code copied from BCH441_A03_makeMYSPElist.R
2017-09-12 20:09:20 +00:00
#
# TODO:
#
#
# == HOW TO WORK WITH LEARNING UNIT FILES ======================================
#
# DO NOT SIMPLY source() THESE FILES!
2017-09-21 21:49:55 +00:00
#
2017-09-12 20:09:20 +00:00
# If there are portions you don't understand, use R's help system, Google for an
# answer, or ask your instructor. Don't continue if you don't understand what's
# going on. That's not how it works ...
#
# ==============================================================================
2017-10-04 03:38:48 +00:00
2017-09-21 21:49:55 +00:00
#TOC> ==========================================================================
2020-09-21 04:54:30 +00:00
#TOC>
#TOC> Section Title Line
#TOC> -----------------------------------------------
#TOC> 1 Preparations 47
#TOC> 2 Suitable MYSPE Species 59
#TOC> 3 Adopt "MYSPE" 83
2020-09-21 04:54:30 +00:00
#TOC>
2017-09-21 21:49:55 +00:00
#TOC> ==========================================================================
2017-10-04 03:38:48 +00:00
2017-09-21 21:49:55 +00:00
# = 1 Preparations ========================================================
#
2017-09-12 20:09:20 +00:00
2017-09-21 21:49:55 +00:00
# Execute the two conditionals below:
2020-09-21 04:54:30 +00:00
if (! file.exists("./myScripts/.myProfile.R")) {
2017-09-21 21:49:55 +00:00
stop("PANIC: profile file does not exist. Fix problem or ask for help.")
}
if (! exists("myStudentNumber")) {
stop("PANIC: profile data wasn't loaded. Fix problem or ask for help.")
}
2017-09-12 20:09:20 +00:00
2017-10-04 03:38:48 +00:00
# = 2 Suitable MYSPE Species ==============================================
2017-09-12 20:09:20 +00:00
2017-09-21 21:49:55 +00:00
# In this unit we will select one species from a list of genome sequenced fungi
# and write it into your personalized profile file. This species will be called
2017-10-04 03:38:48 +00:00
# "MYSPE" (Your Favourite Organism) for other learning units and exercises.
2017-09-12 20:09:20 +00:00
2017-09-21 21:49:55 +00:00
# A detailed description of the process of compiling the list of genome
2017-09-12 20:09:20 +00:00
# sequenced fungi with protein annotations and Mbp1 homologues is in the file
# ./scripts/ABC-makeMYSPElist.R In brief, data for genome-sequenced fungi
# was retrieved from https://fungi.ensembl.org; a search for homologues to
# yeast Mbp1 was performed with BLAST at the NCBI, and the data was merged.
# A representative organism at each genus-level was chosen from those hits
# that actual;ly have a homologue. Finally, a mapping table was constructed to
# asymmetrically retrieve unique species: a student number will retrieve
# a species, but (public) knowledge of the species cannot reconstruct the
# student number.
2017-09-21 21:49:55 +00:00
2017-10-23 16:37:09 +00:00
# Task: Study ./scripts/ABC-makeMYSPElist.R, it implements a typical workflow
# of selecting and combining data from various data resources. Studying
# it will give you a better sense of how such workflows can be
# implemented in practice.
2017-09-21 21:49:55 +00:00
2017-10-04 03:38:48 +00:00
# = 3 Adopt "MYSPE" =======================================================
2017-09-21 21:49:55 +00:00
# Execute:
( MYSPE <- getMYSPE(myStudentNumber) )
2017-09-21 21:49:55 +00:00
# If this produced an error, this session has not been properly set up. You
# may not yet have run init() and edited .myProfile.R , or that file is not
# in your myScripts/ folder. Fix this, and execute source(".Rprofile") .
2017-09-12 20:09:20 +00:00
# If this produced NA, your Student Number may not be correct, or you are not
# in my class-list. Contact me.
# Otherwise, this should have printed a species name. Your unique species
# for this course.
2017-09-12 20:09:20 +00:00
2017-10-04 03:38:48 +00:00
biCode(MYSPE) # and what is it's "BiCode" ... ?
2017-09-12 20:09:20 +00:00
# Task: Note down the species name and its five letter BiCode on your Student
2017-09-21 21:49:55 +00:00
# Wiki user page. Use this species whenever this or future assignments refer
# to MYSPE. Whenever you start a session, it will automatically be loaded
# from myScripts/.myProfile.R and is available as MYSPE .
# Here is some more information:
fungiDat <- read.csv("data/Species.csv")
# number of sequenced fungal genomes:
nrow(fungiDat)
# sequenced genomes of species:
sel <- MYSPE == gsub("^(\\S+\\s\\S+).*$", "\\1", fungiDat$Name)
( x <- fungiDat[sel, "Name"] )
Nspc <- length(x) # save this for later ...
# sequenced genomes of genus:
sel <- gsub("\\s.*", "", MYSPE) == gsub("\\s.*", "", fungiDat$Name)
( x <- fungiDat[sel, "Name"] )
Ngen <- length(x) - Nspc
# order:
( x <- unique(fungiDat[sel, "Classification"]) )
Nord <- sum(fungiDat$Classification == x) - Ngen - Nspc
Nfng <- nrow(fungiDat) - Nord - Ngen - Nspc
# proportions
pCol <- c("#ed394e", "#ff9582", "#ffd5c4", "#f2f2f0")
2020-09-21 05:23:27 +00:00
oPar <- par(mar = c(0.1, 0.1, 2.5, 0.1))
pie(c(Nspc, Ngen, Nord, Nfng),
labels = "",
2020-09-21 05:23:27 +00:00
radius = 0.9,
main = "MYSPE in genome-sequenced fungi",
2020-09-21 05:06:27 +00:00
lty = 0, # no borders for wedges
col = pCol,
clockwise = TRUE,
init.angle = 90)
2020-09-21 05:23:27 +00:00
title(main = MYSPE, line = 0, cex.main = 0.7)
legend(x = 0.95, y = 0.8, # position
legend = c("Species", "Genus", "Order", "Fungi"),
2020-09-21 05:06:27 +00:00
y.intersp = 2, # line spacing for labels
cex = 0.8, # character size for labels
bty = "n", # "no" box around the legend
pt.cex = 2, # size of colour boxes
pch = 15,
col = pCol)
2020-09-21 05:06:27 +00:00
par(oPar)
2017-09-12 20:09:20 +00:00
# [END]