bch441-work-abc-units/BIN-MYSPE.R
2020-09-21 15:06:27 +10:00

149 lines
5.5 KiB
R

# tocID <- "BIN-MYSPE.R"
#
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
#
# Purpose: A Bioinformatics Course:
# R code accompanying the BIN-MYSPE unit
#
# Version: 1.1
#
# Date: 2020-09-18
# Author: Boris Steipe (boris.steipe@utoronto.ca)
#
# V 1.1 2020 Workflow changes
# V 1.0.1 Move ABC-makeMYSPElist.R to ./scripts directory
# V 1.0 Final code, after rewriting BLAST parser and updating MYSPElist
# V 0.1 First code copied from BCH441_A03_makeMYSPElist.R
#
# TODO:
#
#
# == HOW TO WORK WITH LEARNING UNIT FILES ======================================
#
# DO NOT SIMPLY source() THESE FILES!
#
# If there are portions you don't understand, use R's help system, Google for an
# answer, or ask your instructor. Don't continue if you don't understand what's
# going on. That's not how it works ...
#
# ==============================================================================
#TOC> ==========================================================================
#TOC>
#TOC> Section Title Line
#TOC> -----------------------------------------------
#TOC> 1 Preparations 47
#TOC> 2 Suitable MYSPE Species 59
#TOC> 3 Adopt "MYSPE" 83
#TOC>
#TOC> ==========================================================================
# = 1 Preparations ========================================================
#
# Execute the two conditionals below:
if (! file.exists("./myScripts/.myProfile.R")) {
stop("PANIC: profile file does not exist. Fix problem or ask for help.")
}
if (! exists("myStudentNumber")) {
stop("PANIC: profile data wasn't loaded. Fix problem or ask for help.")
}
# = 2 Suitable MYSPE Species ==============================================
# In this unit we will select one species from a list of genome sequenced fungi
# and write it into your personalized profile file. This species will be called
# "MYSPE" (Your Favourite Organism) for other learning units and exercises.
# A detailed description of the process of compiling the list of genome
# sequenced fungi with protein annotations and Mbp1 homologues is in the file
# ./scripts/ABC-makeMYSPElist.R In brief, data for genome-sequenced fungi
# was retrieved from https://fungi.ensembl.org; a search for homologues to
# yeast Mbp1 was performed with BLAST at the NCBI, and the data was merged.
# A representative organism at each genus-level was chosen from those hits
# that actual;ly have a homologue. Finally, a mapping table was constructed to
# asymmetrically retrieve unique species: a student number will retrieve
# a species, but (public) knowledge of the species cannot reconstruct the
# student number.
# Task: Study ./scripts/ABC-makeMYSPElist.R, it implements a typical workflow
# of selecting and combining data from various data resources. Studying
# it will give you a better sense of how such workflows can be
# implemented in practice.
# = 3 Adopt "MYSPE" =======================================================
# Execute:
( MYSPE <- getMYSPE(myStudentNumber) )
# If this produced an error, this session has not been properly set up. You
# may not yet have run init() and edited .myProfile.R , or that file is not
# in your myScripts/ folder. Fix this, and execute source(".Rprofile") .
# If this produced NA, your Student Number may not be correct, or you are not
# in my class-list. Contact me.
# Otherwise, this should have printed a species name. Your unique species
# for this course.
biCode(MYSPE) # and what is it's "BiCode" ... ?
# Task: Note down the species name and its five letter BiCode on your Student
# Wiki user page. Use this species whenever this or future assignments refer
# to MYSPE. Whenever you start a session, it will automatically be loaded
# from myScripts/.myProfile.R and is available as MYSPE .
# Here is some more information:
fungiDat <- read.csv("data/Species.csv")
# number of sequenced fungal genomes:
nrow(fungiDat)
# sequenced genomes of species:
sel <- MYSPE == gsub("^(\\S+\\s\\S+).*$", "\\1", fungiDat$Name)
( x <- fungiDat[sel, "Name"] )
Nspc <- length(x) # save this for later ...
# sequenced genomes of genus:
sel <- gsub("\\s.*", "", MYSPE) == gsub("\\s.*", "", fungiDat$Name)
( x <- fungiDat[sel, "Name"] )
Ngen <- length(x) - Nspc
# order:
( x <- unique(fungiDat[sel, "Classification"]) )
Nord <- sum(fungiDat$Classification == x) - Ngen - Nspc
Nfng <- nrow(fungiDat) - Nord - Ngen - Nspc
# proportions
pCol <- c("#ed394e", "#ff9582", "#ffd5c4", "#f2f2f0")
oPar <- par(mar = c(1.1, 0, 1.1, 1))
pie(c(Nspc, Ngen, Nord, Nfng),
labels = "",
radius = 1,
main = "MYSPE in genome-sequenced fungi",
sub = MYSPE,
lty = 0, # no borders for wedges
col = pCol,
clockwise = TRUE,
init.angle = 90)
legend(x = 1.05, y = 0.8, # position
legend = c("Species", "Genus", "Order", "Fungi"),
y.intersp = 2, # line spacing for labels
cex = 0.8, # character size for labels
bty = "n", # "no" box around the legend
pt.cex = 2, # size of colour boxes
pch = 15,
col = pCol)
par(oPar)
# [END]