bugfix
This commit is contained in:
parent
1d6170f417
commit
67abde8920
@ -29,18 +29,18 @@
|
|||||||
#TOC>
|
#TOC>
|
||||||
#TOC> Section Title Line
|
#TOC> Section Title Line
|
||||||
#TOC> ---------------------------------------------------------
|
#TOC> ---------------------------------------------------------
|
||||||
#TOC> 1 The Biostrings package 57
|
#TOC> 1 The Biostrings Package 52
|
||||||
#TOC> 2 Getting Data into Biostrings Objects 91
|
#TOC> 2 Getting Data into Biostrings Objects 85
|
||||||
#TOC> 3 Working with Biostrings Objects 111
|
#TOC> 3 Working with Biostrings Objects 106
|
||||||
#TOC> 3.1 Properties 114
|
#TOC> 3.1 Properties 109
|
||||||
#TOC> 3.2 Subsetting 151
|
#TOC> 3.2 Subsetting 146
|
||||||
#TOC> 3.3 Operators 163
|
#TOC> 3.3 Operators 158
|
||||||
#TOC> 3.4 Transformations 170
|
#TOC> 3.4 Transformations 165
|
||||||
#TOC> 4 Getting Data out of Biostrings Objects 177
|
#TOC> 4 Getting Data out of Biostrings Objects 172
|
||||||
#TOC> 5 More 186
|
#TOC> 5 More 181
|
||||||
#TOC> 5.1 Views 188
|
#TOC> 5.1 Views 183
|
||||||
#TOC> 5.2 Iranges 200
|
#TOC> 5.2 Iranges 195
|
||||||
#TOC> 5.3 StringSets 206
|
#TOC> 5.3 StringSets 201
|
||||||
#TOC>
|
#TOC>
|
||||||
#TOC> ==========================================================================
|
#TOC> ==========================================================================
|
||||||
|
|
||||||
@ -49,7 +49,7 @@
|
|||||||
# be using more of the biostrings functions.
|
# be using more of the biostrings functions.
|
||||||
|
|
||||||
|
|
||||||
# = 1 The Biostrings package ==============================================
|
# = 1 The Biostrings Package ==============================================
|
||||||
|
|
||||||
|
|
||||||
# First, we install and load the Biostrings package from bioconductor
|
# First, we install and load the Biostrings package from bioconductor
|
||||||
@ -62,13 +62,12 @@ if (! require(Biostrings, quietly=TRUE)) {
|
|||||||
library(Biostrings)
|
library(Biostrings)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Examine the ackage information:
|
# Examine the package information:
|
||||||
library(help = Biostrings) # basic information
|
library(help = Biostrings) # basic information
|
||||||
browseVignettes("Biostrings") # available vignettes
|
browseVignettes("Biostrings") # available vignettes
|
||||||
data(package = "Biostrings") # available datasets
|
data(package = "Biostrings") # available datasets
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# At its core, Biostrings objects are "classes" of type XString (you can think
|
# At its core, Biostrings objects are "classes" of type XString (you can think
|
||||||
# of a "class" in R as a special kind of list), that can take on particular
|
# of a "class" in R as a special kind of list), that can take on particular
|
||||||
# flavours for RNA, DNA or amino acid sequence information.
|
# flavours for RNA, DNA or amino acid sequence information.
|
||||||
@ -89,17 +88,18 @@ DNAString("AUG") # Error! No "U" in IUPAC DNA codes
|
|||||||
# Example: read FASTA. Extract sequence. Convert to DNAString object.
|
# Example: read FASTA. Extract sequence. Convert to DNAString object.
|
||||||
x <- readLines("./data/S288C_YDL056W_MBP1_coding.fsa")
|
x <- readLines("./data/S288C_YDL056W_MBP1_coding.fsa")
|
||||||
x <- dbSanitizeSequence(x)
|
x <- dbSanitizeSequence(x)
|
||||||
myDNAseq <- DNAString(x) # takes the nucleotide sequence and conerts into a
|
myDNAseq <- DNAString(x) # takes the nucleotide sequence and converts into a
|
||||||
# object of class DNAstring
|
# object of class DNAstring
|
||||||
|
|
||||||
# Multi FASTA files can be read directly ...
|
# Multi FASTA files can be read directly as a "XStringSet) ...
|
||||||
readDNAStringSet("./data/S288C_YDL056W_MBP1_coding.fsa") # Note: XStringSet
|
(myDNASet <- readDNAStringSet("./data/S288C_YDL056W_MBP1_coding.fsa"))
|
||||||
|
|
||||||
# ... and if you subset one sequence from the set, you get an XString object
|
# ... and if you subset one sequence from the set, you get an XString object
|
||||||
( x <- readDNAStringSet("./data/S288C_YDL056W_MBP1_coding.fsa")[[1]] )
|
# back again.
|
||||||
|
(Xseq <- myDNASet[[1]])
|
||||||
|
|
||||||
myDNAseq == x
|
myDNAseq == Xseq # the comparison evaluates to TRUE ...
|
||||||
identical(myDNAseq, x)
|
identical(myDNAseq, Xseq) # ... and indeed the objects are deemed identical.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -108,7 +108,7 @@ identical(myDNAseq, x)
|
|||||||
|
|
||||||
# == 3.1 Properties ========================================================
|
# == 3.1 Properties ========================================================
|
||||||
str(myDNAseq)
|
str(myDNAseq)
|
||||||
length(nchar(x)) # This gives you the _number of nucleotides_!
|
length(myDNAseq) # This gives you the _number of nucleotides_!
|
||||||
# By comparison ...
|
# By comparison ...
|
||||||
length(x) # ... is 1: one string only. To get the number of
|
length(x) # ... is 1: one string only. To get the number of
|
||||||
# characters in a string, you need nchar().
|
# characters in a string, you need nchar().
|
||||||
@ -130,15 +130,15 @@ sum(letterFrequency(myDNAseq, c("G", "C"))) / length(myDNAseq) # GC contents
|
|||||||
dinucleotideFrequency(myDNAseq)
|
dinucleotideFrequency(myDNAseq)
|
||||||
barplot(sort(dinucleotideFrequency(myDNAseq)), cex.names = 0.5)
|
barplot(sort(dinucleotideFrequency(myDNAseq)), cex.names = 0.5)
|
||||||
|
|
||||||
(x <- trinucleotideFrequency(myDNAseq))
|
(triNuc <- trinucleotideFrequency(myDNAseq))
|
||||||
barplot(sort(x), col="#4499EE33")
|
barplot(sort(triNuc), col="#4499EE33")
|
||||||
x[x == max(x)]
|
triNuc[triNuc == max(triNuc)]
|
||||||
x[x == min(x)]
|
triNuc[triNuc == min(triNuc)]
|
||||||
max(x) / min(x) # AAA is more than 13 times as frequent as CGT
|
max(triNuc) / min(triNuc) # AAA is more than 13 times as frequent as CGT
|
||||||
|
|
||||||
# compare to a shuffled sequence:
|
# compare to a shuffled sequence:
|
||||||
(x <- trinucleotideFrequency(sample(myDNAseq)))
|
(triNuc <- trinucleotideFrequency(sample(myDNAseq)))
|
||||||
barplot(sort(x), col="#EEEE4433", add = TRUE)
|
barplot(sort(triNuc), col="#EEEE4433", add = TRUE)
|
||||||
|
|
||||||
# Interpret this plot.
|
# Interpret this plot.
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user