Maintenance

2020-09-24 08:55:14 +10:00
parent 4c793a6074
commit f8adefc6f9
1 changed files with 38 additions and 30 deletions
--- a/FND-Genetic_code.R
+++ b/FND-Genetic_code.R
@@ -1,20 +1,15 @@
 # tocID <- "FND-Genetic_code.R"
 #
-# ---------------------------------------------------------------------------- #
-#  PATIENCE  ...                                                               #
-#    Do not yet work wih this code. Updates in progress. Thank you.            #
-#    boris.steipe@utoronto.ca                                                  #
-# ---------------------------------------------------------------------------- #
-#
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the FND-Genetic_code unit.
 #
-# Version:  1.1
+# Version:  1.2
 #
 # Date:     2017  10  -  2019  01
 # Author:   Boris Steipe (boris.steipe@utoronto.ca)
 #
 # Versions:
+#           1.2    2020 Maintenance
 #           1.1    Change from require() to requireNamespace(),
 #                      use <package>::<function>() idiom throughout,
 #                      use Biocmanager:: not biocLite()
@@ -51,20 +46,26 @@
 # =    1  Storing the genetic code  ============================================

 # The genetic code maps trinucleotide codons to amino acids. To store it, we
-# need some mechanism to associate these two informattion items. The most
+# need some mechanism to associate the two representations. The most
 # convenient way to do that is a "named vector" which holds the amino acid
 # code and assigns the codons as names to its elements.

-x <- c("M", "*")
-names(x) <- c("ATG", "TAA")
+x <- c("M", "H", "H", "*", "*", "*")
+names(x) <- c("ATG", "CAC", "CAT", "TAA", "TAG", "TGA")
 x

 # Then we can access the vector by the codon as name, and retrieve the
-# amino acid.
+# amino acid ...

 x["ATG"]
+x["CAC"]
 x["TAA"]

+# ... or the names of elements, to retrieve the codon(s)
+names(x)[x == "M"]
+names(x)[x == "H"]
+names(x)[x == "*"]
+

 # ==   1.1  Genetic code in Biostrings  ========================================

@@ -103,33 +104,33 @@ Biostrings::getGeneticCode("12")  # Alternative Yeast Nuclear
 # to a "local" variable, rather than retrieving it from the package all the
 # time.

-genCode <- Biostrings::GENETIC_CODE
+GC <- Biostrings::GENETIC_CODE

 # This is a named vector of characters ...

-str(genCode)
+str(GC)

 # ... which also stores the alternative initiation codons TTG and CTG in
 # an attribute of the vector. (Alternative initiation codons sometimes are
-# used instead of ATG to intiate translation, if if not ATG they are translated
-# with fMet.)
+# used instead of ATG to intiate translation, if translation is not initiated
+# at ATG thses are still translated with fMet.)

-attr(genCode, "alt_init_codons")
+attr(GC, "alt_init_codons")

 # But the key to use this vector is in the "names" which we use for subsetting
 # the list of amino acids in whatever way we need.
-names(genCode)
+names(GC)

 # The translation of "TGG" ...
-genCode["TGG"]
+GC["TGG"]

 # All stop codons
-names(genCode)[genCode == "*"]
+names(GC)[GC == "*"]

 # All start codons
-names(genCode)[genCode == "M"] # ... or
-c(names(genCode)[genCode == "M"],
-  attr(genCode, "alt_init_codons"))
+names(GC)[GC == "M"] # ... or
+c(names(GC)[GC == "M"],
+  attr(GC, "alt_init_codons"))


 # ==   2.1  Translate a sequence.  =============================================
@@ -148,7 +149,7 @@ mbp1 <- readLines("./data/S288C_YDL056W_MBP1_coding.fsa")

 # The reason for this is that the last character of the file is the letter "A"
 # and not a "\n" line break. This file is exactly how it was sent from the
-# server; I think good, defensive programming practice would have been to
+# NCBI server; I think good, defensive programming practice would have been to
 # include some kind of an end-marker in the file, like a final "\n". This helps
 # us recognize an incomplete transmission. Let's parse the actual sequence from
 # the file, and then check for completeness.
@@ -187,7 +188,7 @@ head(mbp1Codons)

 mbp1AA <- character(834)
 for (i in seq_along(mbp1Codons)) {
-  mbp1AA[i] <- genCode[mbp1Codons[i]]
+  mbp1AA[i] <- GC[mbp1Codons[i]]
 }

 head(mbp1Codons)
@@ -238,7 +239,7 @@ for (i in 1:4) {
                       dimnames(cCube)[[3]][k],
                       sep = "",
                       collapse = "")
-      cCube[i, j, k] <- genCode[myCodon]
+      cCube[i, j, k] <- GC[myCodon]
    }
  }
 }
@@ -249,6 +250,7 @@ cCube["T", "T", "T"] # phenylalanine
 cCube["T", "A", "G"] # stop (amber)


+
 # ==   3.1  Print a Genetic code table  ========================================


@@ -265,13 +267,15 @@ cCube["T", "A", "G"] # stop (amber)

 nuc <- c("T", "C", "A", "G")

-for (i in nuc) {
-  for (k in nuc) {
-    for (j in nuc) {
-      cat(sprintf("%s%s%s: %s   ", i, j, k, cCube[i, j, k]))
+# (calling variables f, s, t to indicate first, second, and third position ...)
+for (f in nuc) {      # first varies in blocks
+  for (t in nuc) {    # third varies in columns
+    for (s in nuc) {  # second varies in rows
+      cat(sprintf("%s%s%s: %s   ", f, s, t, cCube[f, s, t]))
    }
    cat("\n")
  }
+  cat("\n")
 }


@@ -335,7 +339,11 @@ for (i in nuc) {
 #
 #
 # Solution:
-table(table(Biostrings::GENETIC_CODE))
+( x <- table(table(Biostrings::GENETIC_CODE)) )
+
+# confirm
+sum(x * as.numeric(names(x)))
+


 # [END]