Maintenance

This commit is contained in:
hyginn 2020-09-24 08:55:14 +10:00
parent 4c793a6074
commit f8adefc6f9

View File

@ -1,20 +1,15 @@
# tocID <- "FND-Genetic_code.R" # tocID <- "FND-Genetic_code.R"
# #
# ---------------------------------------------------------------------------- #
# PATIENCE ... #
# Do not yet work wih this code. Updates in progress. Thank you. #
# boris.steipe@utoronto.ca #
# ---------------------------------------------------------------------------- #
#
# Purpose: A Bioinformatics Course: # Purpose: A Bioinformatics Course:
# R code accompanying the FND-Genetic_code unit. # R code accompanying the FND-Genetic_code unit.
# #
# Version: 1.1 # Version: 1.2
# #
# Date: 2017 10 - 2019 01 # Date: 2017 10 - 2019 01
# Author: Boris Steipe (boris.steipe@utoronto.ca) # Author: Boris Steipe (boris.steipe@utoronto.ca)
# #
# Versions: # Versions:
# 1.2 2020 Maintenance
# 1.1 Change from require() to requireNamespace(), # 1.1 Change from require() to requireNamespace(),
# use <package>::<function>() idiom throughout, # use <package>::<function>() idiom throughout,
# use Biocmanager:: not biocLite() # use Biocmanager:: not biocLite()
@ -51,20 +46,26 @@
# = 1 Storing the genetic code ============================================ # = 1 Storing the genetic code ============================================
# The genetic code maps trinucleotide codons to amino acids. To store it, we # The genetic code maps trinucleotide codons to amino acids. To store it, we
# need some mechanism to associate these two informattion items. The most # need some mechanism to associate the two representations. The most
# convenient way to do that is a "named vector" which holds the amino acid # convenient way to do that is a "named vector" which holds the amino acid
# code and assigns the codons as names to its elements. # code and assigns the codons as names to its elements.
x <- c("M", "*") x <- c("M", "H", "H", "*", "*", "*")
names(x) <- c("ATG", "TAA") names(x) <- c("ATG", "CAC", "CAT", "TAA", "TAG", "TGA")
x x
# Then we can access the vector by the codon as name, and retrieve the # Then we can access the vector by the codon as name, and retrieve the
# amino acid. # amino acid ...
x["ATG"] x["ATG"]
x["CAC"]
x["TAA"] x["TAA"]
# ... or the names of elements, to retrieve the codon(s)
names(x)[x == "M"]
names(x)[x == "H"]
names(x)[x == "*"]
# == 1.1 Genetic code in Biostrings ======================================== # == 1.1 Genetic code in Biostrings ========================================
@ -103,33 +104,33 @@ Biostrings::getGeneticCode("12") # Alternative Yeast Nuclear
# to a "local" variable, rather than retrieving it from the package all the # to a "local" variable, rather than retrieving it from the package all the
# time. # time.
genCode <- Biostrings::GENETIC_CODE GC <- Biostrings::GENETIC_CODE
# This is a named vector of characters ... # This is a named vector of characters ...
str(genCode) str(GC)
# ... which also stores the alternative initiation codons TTG and CTG in # ... which also stores the alternative initiation codons TTG and CTG in
# an attribute of the vector. (Alternative initiation codons sometimes are # an attribute of the vector. (Alternative initiation codons sometimes are
# used instead of ATG to intiate translation, if if not ATG they are translated # used instead of ATG to intiate translation, if translation is not initiated
# with fMet.) # at ATG thses are still translated with fMet.)
attr(genCode, "alt_init_codons") attr(GC, "alt_init_codons")
# But the key to use this vector is in the "names" which we use for subsetting # But the key to use this vector is in the "names" which we use for subsetting
# the list of amino acids in whatever way we need. # the list of amino acids in whatever way we need.
names(genCode) names(GC)
# The translation of "TGG" ... # The translation of "TGG" ...
genCode["TGG"] GC["TGG"]
# All stop codons # All stop codons
names(genCode)[genCode == "*"] names(GC)[GC == "*"]
# All start codons # All start codons
names(genCode)[genCode == "M"] # ... or names(GC)[GC == "M"] # ... or
c(names(genCode)[genCode == "M"], c(names(GC)[GC == "M"],
attr(genCode, "alt_init_codons")) attr(GC, "alt_init_codons"))
# == 2.1 Translate a sequence. ============================================= # == 2.1 Translate a sequence. =============================================
@ -148,7 +149,7 @@ mbp1 <- readLines("./data/S288C_YDL056W_MBP1_coding.fsa")
# The reason for this is that the last character of the file is the letter "A" # The reason for this is that the last character of the file is the letter "A"
# and not a "\n" line break. This file is exactly how it was sent from the # and not a "\n" line break. This file is exactly how it was sent from the
# server; I think good, defensive programming practice would have been to # NCBI server; I think good, defensive programming practice would have been to
# include some kind of an end-marker in the file, like a final "\n". This helps # include some kind of an end-marker in the file, like a final "\n". This helps
# us recognize an incomplete transmission. Let's parse the actual sequence from # us recognize an incomplete transmission. Let's parse the actual sequence from
# the file, and then check for completeness. # the file, and then check for completeness.
@ -187,7 +188,7 @@ head(mbp1Codons)
mbp1AA <- character(834) mbp1AA <- character(834)
for (i in seq_along(mbp1Codons)) { for (i in seq_along(mbp1Codons)) {
mbp1AA[i] <- genCode[mbp1Codons[i]] mbp1AA[i] <- GC[mbp1Codons[i]]
} }
head(mbp1Codons) head(mbp1Codons)
@ -238,7 +239,7 @@ for (i in 1:4) {
dimnames(cCube)[[3]][k], dimnames(cCube)[[3]][k],
sep = "", sep = "",
collapse = "") collapse = "")
cCube[i, j, k] <- genCode[myCodon] cCube[i, j, k] <- GC[myCodon]
} }
} }
} }
@ -249,6 +250,7 @@ cCube["T", "T", "T"] # phenylalanine
cCube["T", "A", "G"] # stop (amber) cCube["T", "A", "G"] # stop (amber)
# == 3.1 Print a Genetic code table ======================================== # == 3.1 Print a Genetic code table ========================================
@ -265,13 +267,15 @@ cCube["T", "A", "G"] # stop (amber)
nuc <- c("T", "C", "A", "G") nuc <- c("T", "C", "A", "G")
for (i in nuc) { # (calling variables f, s, t to indicate first, second, and third position ...)
for (k in nuc) { for (f in nuc) { # first varies in blocks
for (j in nuc) { for (t in nuc) { # third varies in columns
cat(sprintf("%s%s%s: %s ", i, j, k, cCube[i, j, k])) for (s in nuc) { # second varies in rows
cat(sprintf("%s%s%s: %s ", f, s, t, cCube[f, s, t]))
} }
cat("\n") cat("\n")
} }
cat("\n")
} }
@ -335,7 +339,11 @@ for (i in nuc) {
# #
# #
# Solution: # Solution:
table(table(Biostrings::GENETIC_CODE)) ( x <- table(table(Biostrings::GENETIC_CODE)) )
# confirm
sum(x * as.numeric(names(x)))
# [END] # [END]