bugfix
This commit is contained in:
		@@ -29,18 +29,18 @@
 | 
				
			|||||||
#TOC> 
 | 
					#TOC> 
 | 
				
			||||||
#TOC>   Section  Title                                     Line
 | 
					#TOC>   Section  Title                                     Line
 | 
				
			||||||
#TOC> ---------------------------------------------------------
 | 
					#TOC> ---------------------------------------------------------
 | 
				
			||||||
#TOC>   1        The Biostrings package                      57
 | 
					#TOC>   1        The Biostrings Package                      52
 | 
				
			||||||
#TOC>   2        Getting Data into Biostrings Objects        91
 | 
					#TOC>   2        Getting Data into Biostrings Objects        85
 | 
				
			||||||
#TOC>   3        Working with Biostrings Objects            111
 | 
					#TOC>   3        Working with Biostrings Objects            106
 | 
				
			||||||
#TOC>   3.1      Properties                                 114
 | 
					#TOC>   3.1      Properties                                 109
 | 
				
			||||||
#TOC>   3.2      Subsetting                                 151
 | 
					#TOC>   3.2      Subsetting                                 146
 | 
				
			||||||
#TOC>   3.3      Operators                                  163
 | 
					#TOC>   3.3      Operators                                  158
 | 
				
			||||||
#TOC>   3.4      Transformations                            170
 | 
					#TOC>   3.4      Transformations                            165
 | 
				
			||||||
#TOC>   4        Getting Data out of Biostrings Objects     177
 | 
					#TOC>   4        Getting Data out of Biostrings Objects     172
 | 
				
			||||||
#TOC>   5        More                                       186
 | 
					#TOC>   5        More                                       181
 | 
				
			||||||
#TOC>   5.1      Views                                      188
 | 
					#TOC>   5.1      Views                                      183
 | 
				
			||||||
#TOC>   5.2      Iranges                                    200
 | 
					#TOC>   5.2      Iranges                                    195
 | 
				
			||||||
#TOC>   5.3      StringSets                                 206
 | 
					#TOC>   5.3      StringSets                                 201
 | 
				
			||||||
#TOC> 
 | 
					#TOC> 
 | 
				
			||||||
#TOC> ==========================================================================
 | 
					#TOC> ==========================================================================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -49,7 +49,7 @@
 | 
				
			|||||||
# be using more of the biostrings functions.
 | 
					# be using more of the biostrings functions.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# =    1  The Biostrings package  ==============================================
 | 
					# =    1  The Biostrings Package  ==============================================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# First, we install and load the Biostrings package from bioconductor
 | 
					# First, we install and load the Biostrings package from bioconductor
 | 
				
			||||||
@@ -62,13 +62,12 @@ if (! require(Biostrings, quietly=TRUE)) {
 | 
				
			|||||||
  library(Biostrings)
 | 
					  library(Biostrings)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Examine the ackage information:
 | 
					# Examine the package information:
 | 
				
			||||||
library(help = Biostrings)       # basic information
 | 
					library(help = Biostrings)       # basic information
 | 
				
			||||||
browseVignettes("Biostrings")    # available vignettes
 | 
					browseVignettes("Biostrings")    # available vignettes
 | 
				
			||||||
data(package = "Biostrings")     # available datasets
 | 
					data(package = "Biostrings")     # available datasets
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
# At its core, Biostrings objects are "classes" of type XString (you can think
 | 
					# At its core, Biostrings objects are "classes" of type XString (you can think
 | 
				
			||||||
# of a "class" in R as a special kind of list), that can take on particular
 | 
					# of a "class" in R as a special kind of list), that can take on particular
 | 
				
			||||||
# flavours for RNA, DNA or amino acid sequence information.
 | 
					# flavours for RNA, DNA or amino acid sequence information.
 | 
				
			||||||
@@ -89,17 +88,18 @@ DNAString("AUG")  # Error! No "U" in IUPAC DNA codes
 | 
				
			|||||||
# Example: read FASTA. Extract sequence. Convert to DNAString object.
 | 
					# Example: read FASTA. Extract sequence. Convert to DNAString object.
 | 
				
			||||||
x <- readLines("./data/S288C_YDL056W_MBP1_coding.fsa")
 | 
					x <- readLines("./data/S288C_YDL056W_MBP1_coding.fsa")
 | 
				
			||||||
x <- dbSanitizeSequence(x)
 | 
					x <- dbSanitizeSequence(x)
 | 
				
			||||||
myDNAseq <- DNAString(x)   # takes the nucleotide sequence and conerts into a
 | 
					myDNAseq <- DNAString(x)   # takes the nucleotide sequence and converts into a
 | 
				
			||||||
                           # object of class DNAstring
 | 
					                           # object of class DNAstring
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Multi FASTA files can be read directly ...
 | 
					# Multi FASTA files can be read directly as a "XStringSet) ...
 | 
				
			||||||
readDNAStringSet("./data/S288C_YDL056W_MBP1_coding.fsa") # Note: XStringSet
 | 
					(myDNASet <- readDNAStringSet("./data/S288C_YDL056W_MBP1_coding.fsa"))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# ... and if you subset one sequence from the set, you get an XString object
 | 
					# ... and if you subset one sequence from the set, you get an XString object
 | 
				
			||||||
(  x <- readDNAStringSet("./data/S288C_YDL056W_MBP1_coding.fsa")[[1]]  )
 | 
					# back again.
 | 
				
			||||||
 | 
					(Xseq <- myDNASet[[1]])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
myDNAseq == x
 | 
					myDNAseq == Xseq           # the comparison evaluates to TRUE ...
 | 
				
			||||||
identical(myDNAseq, x)
 | 
					identical(myDNAseq, Xseq)  # ... and indeed the objects are deemed identical.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -108,7 +108,7 @@ identical(myDNAseq, x)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
# ==   3.1  Properties  ========================================================
 | 
					# ==   3.1  Properties  ========================================================
 | 
				
			||||||
str(myDNAseq)
 | 
					str(myDNAseq)
 | 
				
			||||||
length(nchar(x))  # This gives you the _number of nucleotides_!
 | 
					length(myDNAseq)  # This gives you the _number of nucleotides_!
 | 
				
			||||||
                  # By comparison ...
 | 
					                  # By comparison ...
 | 
				
			||||||
length(x)         # ... is 1: one string only. To get the number of
 | 
					length(x)         # ... is 1: one string only. To get the number of
 | 
				
			||||||
                  # characters in a string, you need nchar().
 | 
					                  # characters in a string, you need nchar().
 | 
				
			||||||
@@ -130,15 +130,15 @@ sum(letterFrequency(myDNAseq, c("G", "C"))) / length(myDNAseq) # GC contents
 | 
				
			|||||||
dinucleotideFrequency(myDNAseq)
 | 
					dinucleotideFrequency(myDNAseq)
 | 
				
			||||||
barplot(sort(dinucleotideFrequency(myDNAseq)), cex.names = 0.5)
 | 
					barplot(sort(dinucleotideFrequency(myDNAseq)), cex.names = 0.5)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
(x <- trinucleotideFrequency(myDNAseq))
 | 
					(triNuc <- trinucleotideFrequency(myDNAseq))
 | 
				
			||||||
barplot(sort(x), col="#4499EE33")
 | 
					barplot(sort(triNuc), col="#4499EE33")
 | 
				
			||||||
x[x == max(x)]
 | 
					triNuc[triNuc == max(triNuc)]
 | 
				
			||||||
x[x == min(x)]
 | 
					triNuc[triNuc == min(triNuc)]
 | 
				
			||||||
max(x) / min(x)  # AAA is more than 13 times as frequent as CGT
 | 
					max(triNuc) / min(triNuc)  # AAA is more than 13 times as frequent as CGT
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# compare to a shuffled sequence:
 | 
					# compare to a shuffled sequence:
 | 
				
			||||||
(x <- trinucleotideFrequency(sample(myDNAseq)))
 | 
					(triNuc <- trinucleotideFrequency(sample(myDNAseq)))
 | 
				
			||||||
barplot(sort(x), col="#EEEE4433", add = TRUE)
 | 
					barplot(sort(triNuc), col="#EEEE4433", add = TRUE)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Interpret this plot.
 | 
					# Interpret this plot.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user