bugfix

2017-11-03 15:08:17 -04:00
parent 1d6170f417
commit 67abde8920
1 changed files with 29 additions and 29 deletions
--- a/RPR-Biostrings.R
+++ b/RPR-Biostrings.R
@@ -29,18 +29,18 @@
 #TOC> 
 #TOC>   Section  Title                                     Line
 #TOC> ---------------------------------------------------------
-#TOC>   1        The Biostrings package                      57
+#TOC>   1        The Biostrings Package                      52
-#TOC>   2        Getting Data into Biostrings Objects        91
+#TOC>   2        Getting Data into Biostrings Objects        85
-#TOC>   3        Working with Biostrings Objects            111
+#TOC>   3        Working with Biostrings Objects            106
-#TOC>   3.1      Properties                                 114
+#TOC>   3.1      Properties                                 109
-#TOC>   3.2      Subsetting                                 151
+#TOC>   3.2      Subsetting                                 146
-#TOC>   3.3      Operators                                  163
+#TOC>   3.3      Operators                                  158
-#TOC>   3.4      Transformations                            170
+#TOC>   3.4      Transformations                            165
-#TOC>   4        Getting Data out of Biostrings Objects     177
+#TOC>   4        Getting Data out of Biostrings Objects     172
-#TOC>   5        More                                       186
+#TOC>   5        More                                       181
-#TOC>   5.1      Views                                      188
+#TOC>   5.1      Views                                      183
-#TOC>   5.2      Iranges                                    200
+#TOC>   5.2      Iranges                                    195
-#TOC>   5.3      StringSets                                 206
+#TOC>   5.3      StringSets                                 201
 #TOC> 
 #TOC> ==========================================================================
@@ -49,7 +49,7 @@
 # be using more of the biostrings functions.
-# =    1  The Biostrings package  ==============================================
+# =    1  The Biostrings Package  ==============================================
 # First, we install and load the Biostrings package from bioconductor
@@ -62,13 +62,12 @@ if (! require(Biostrings, quietly=TRUE)) {
  library(Biostrings)
 }
-# Examine the ackage information:
+# Examine the package information:
 library(help = Biostrings)       # basic information
 browseVignettes("Biostrings")    # available vignettes
 data(package = "Biostrings")     # available datasets
 # At its core, Biostrings objects are "classes" of type XString (you can think
 # of a "class" in R as a special kind of list), that can take on particular
 # flavours for RNA, DNA or amino acid sequence information.
@@ -89,17 +88,18 @@ DNAString("AUG")  # Error! No "U" in IUPAC DNA codes
 # Example: read FASTA. Extract sequence. Convert to DNAString object.
 x <- readLines("./data/S288C_YDL056W_MBP1_coding.fsa")
 x <- dbSanitizeSequence(x)
-myDNAseq <- DNAString(x)   # takes the nucleotide sequence and conerts into a
+myDNAseq <- DNAString(x)   # takes the nucleotide sequence and converts into a
                           # object of class DNAstring
-# Multi FASTA files can be read directly ...
+# Multi FASTA files can be read directly as a "XStringSet) ...
-readDNAStringSet("./data/S288C_YDL056W_MBP1_coding.fsa") # Note: XStringSet
+(myDNASet <- readDNAStringSet("./data/S288C_YDL056W_MBP1_coding.fsa"))
 # ... and if you subset one sequence from the set, you get an XString object
-(  x <- readDNAStringSet("./data/S288C_YDL056W_MBP1_coding.fsa")[[1]]  )
+# back again.
 (Xseq <- myDNASet[[1]])
-myDNAseq == x
+myDNAseq == Xseq           # the comparison evaluates to TRUE ...
-identical(myDNAseq, x)
+identical(myDNAseq, Xseq)  # ... and indeed the objects are deemed identical.
@@ -108,7 +108,7 @@ identical(myDNAseq, x)
 # ==   3.1  Properties  ========================================================
 str(myDNAseq)
-length(nchar(x))  # This gives you the _number of nucleotides_!
+length(myDNAseq)  # This gives you the _number of nucleotides_!
                  # By comparison ...
 length(x)         # ... is 1: one string only. To get the number of
                  # characters in a string, you need nchar().
@@ -130,15 +130,15 @@ sum(letterFrequency(myDNAseq, c("G", "C"))) / length(myDNAseq) # GC contents
 dinucleotideFrequency(myDNAseq)
 barplot(sort(dinucleotideFrequency(myDNAseq)), cex.names = 0.5)
-(x <- trinucleotideFrequency(myDNAseq))
+(triNuc <- trinucleotideFrequency(myDNAseq))
-barplot(sort(x), col="#4499EE33")
+barplot(sort(triNuc), col="#4499EE33")
-x[x == max(x)]
+triNuc[triNuc == max(triNuc)]
-x[x == min(x)]
+triNuc[triNuc == min(triNuc)]
-max(x) / min(x)  # AAA is more than 13 times as frequent as CGT
+max(triNuc) / min(triNuc)  # AAA is more than 13 times as frequent as CGT
 # compare to a shuffled sequence:
-(x <- trinucleotideFrequency(sample(myDNAseq)))
+(triNuc <- trinucleotideFrequency(sample(myDNAseq)))
-barplot(sort(x), col="#EEEE4433", add = TRUE)
+barplot(sort(triNuc), col="#EEEE4433", add = TRUE)
 # Interpret this plot.