bch441-work-abc-units/BIN-MYSPE.R

# tocID <- "BIN-MYSPE.R"
#
# ---------------------------------------------------------------------------- #
#  PATIENCE  ...                                                               #
#    Do not yet work wih this code. Updates in progress. Thank you.            #
#    boris.steipe@utoronto.ca                                                  #
# ---------------------------------------------------------------------------- #
#
# Purpose: A Bioinformatics Course:
#              R code accompanying the BIN-MYSPE unit
#
# Version: 1.1
#
# Date:    2020-09-18
# Author:  Boris Steipe (boris.steipe@utoronto.ca)
#
# V 1.1    2020 Workflow changes
# V 1.0.1  Move ABC-makeMYSPElist.R to ./scripts directory
# V 1.0    Final code, after rewriting BLAST parser and updating MYSPElist
# V 0.1    First code copied from BCH441_A03_makeMYSPElist.R
#
# TODO:
#
#
# == HOW TO WORK WITH LEARNING UNIT FILES ======================================
#
# DO NOT SIMPLY  source()  THESE FILES!
#
# If there are portions you don't understand, use R's help system, Google for an
# answer, or ask your instructor. Don't continue if you don't understand what's
#  going on. That's not how it works ...
#
# ==============================================================================


#TOC> ==========================================================================
#TOC>
#TOC>   Section  Title                           Line
#TOC> -----------------------------------------------
#TOC>   1        Preparations                      47
#TOC>   2        Suitable MYSPE Species            59
#TOC>   3        Adopt "MYSPE"                     83
#TOC>
#TOC> ==========================================================================


# =    1  Preparations  ========================================================
#

# Execute the two conditionals below:
if (! file.exists("./myScripts/.myProfile.R")) {
  stop("PANIC: profile file does not exist. Fix problem or ask for help.")
}
if (! exists("myStudentNumber")) {
  stop("PANIC: profile data wasn't loaded. Fix problem or ask for help.")
}


# =    2  Suitable MYSPE Species  ==============================================


# In this unit we will select one species from a list of genome sequenced fungi
# and write it into your personalized profile file. This species will be called
# "MYSPE" (Your Favourite Organism) for other learning units and exercises.

# A detailed description of the process of compiling the list of genome
# sequenced fungi with protein annotations and Mbp1 homologues is in the file
# ./scripts/ABC-makeMYSPElist.R  In brief, data for genome-sequenced fungi
# was retrieved from https://fungi.ensembl.org; a search for homologues to
# yeast Mbp1 was performed with BLAST at the NCBI, and the data was merged.
# A representative organism at each genus-level was chosen from those hits
# that actual;ly have a homologue. Finally, a mapping table was constructed to
# asymmetrically retrieve unique species: a student number will retrieve
# a species, but (public) knowledge of the species cannot reconstruct the
# student number.

# Task: Study ./scripts/ABC-makeMYSPElist.R, it implements a typical workflow
#       of selecting and combining data from various data resources. Studying
#       it will give you a better sense of how such workflows can be
#       implemented in practice.


# =    3  Adopt "MYSPE"  =======================================================

# Execute:
( MYSPE <- getMYSPE(myStudentNumber) )

# If this produced an error, this session has not been properly set up. You
# may not yet have run  init()  and edited  .myProfile.R , or that file is not
# in your  myScripts/  folder. Fix this, and execute  source(".Rprofile") .

# If this produced NA, your Student Number may not be correct, or you are not
# in my class-list. Contact me.
# Otherwise, this should have printed a species name. Your unique species
# for this course.

biCode(MYSPE) # and what is it's "BiCode" ... ?

# Task: Note down the species name and its five letter BiCode on your Student
# Wiki user page. Use this species whenever this or future assignments refer
# to MYSPE. Whenever you start a session, it will automatically be loaded
# from  myScripts/.myProfile.R  and is available as  MYSPE .

# Here is some more information:
fungiDat <- read.csv("data/Species.csv")

# number of sequenced fungal genomes:
nrow(fungiDat)

# sequenced genomes of species:
sel <- MYSPE == gsub("^(\\S+\\s\\S+).*$", "\\1", fungiDat$Name)
( x <- fungiDat[sel, "Name"] )
Nspc <- length(x)  # save this for later ...

# sequenced genomes of genus:
sel <- gsub("\\s.*", "", MYSPE) == gsub("\\s.*", "", fungiDat$Name)
( x <- fungiDat[sel, "Name"] )
Ngen <- length(x) - Nspc

# order:
( x <- unique(fungiDat[sel, "Classification"]) )
Nord <- sum(fungiDat$Classification == x) - Ngen - Nspc
Nfng <- nrow(fungiDat) - Nord - Ngen - Nspc

# proportions
pCol <- c("#ed394e", "#ff9582", "#ffd5c4", "#f2f2f0")
oPar <- par(mar = c(1.1, 0, 1.1, 1))
pie(c(Nspc, Ngen, Nord, Nfng),
    labels = "",
    radius = 1,
    main = "MYSPE in genome-sequenced fungi",
    sub = MYSPE,
    lty = 0,                 # no borders for wedges
    col = pCol,
    clockwise = TRUE,
    init.angle = 90)
legend(x = 1.05, y = 0.8,     # position
       legend = c("Species", "Genus", "Order", "Fungi"),
       y.intersp = 2,      # line spacing for labels
       cex = 0.8,            # character size for labels
       bty = "n",            # "no" box around the legend
       pt.cex = 2,           # size of colour boxes
       pch = 15,
       col = pCol)
par(oPar)


# [END]