Add section on GPL annotations to RPR-GEO2R
This commit is contained in:
parent
359434d863
commit
b17a9e202a
@ -238,7 +238,7 @@ for (i in seq_along(highScoringRanges$lengths)) {
|
||||
|
||||
# We computed a T-Coffee alignment at the EBI. msa has no native import function
|
||||
# so we need to improvise, and it's a bit of a pain to do - but a good
|
||||
# illustration of startegies to convert data into any kind of object:
|
||||
# illustration of strategies to convert data into any kind of object:
|
||||
# - read an .aln file
|
||||
# - adjust the sequence names
|
||||
# - convert to msaAAMultipleAlignment object
|
||||
@ -452,7 +452,7 @@ legend("bottomright",
|
||||
cex = 0.7,
|
||||
bty = "n")
|
||||
|
||||
# Your alignment is going to be differnte from mine, due to the inclusion of
|
||||
# Your alignment is going to be different from mine, due to the inclusion of
|
||||
# MYSPE - but what I see is that MUSCLE gives the highest score overall, and
|
||||
# achieves this with fewer indels then most, and the lowest number of gaps of
|
||||
# all algorithms.
|
||||
|
@ -311,7 +311,7 @@ for (ID in pID) {
|
||||
|
||||
|
||||
# Have a look at the structure of the yeast Mbp1 protein data:
|
||||
file.edit("./data/MBP1_SACCE.json")
|
||||
file.show("./data/MBP1_SACCE.json")
|
||||
|
||||
# - The whole thing is an array: [ ... ]. This is not necessary for a single
|
||||
# object, but we will have more objects in other files. And it's perfectly
|
||||
|
51
RPR-GEO2R.R
51
RPR-GEO2R.R
@ -32,17 +32,18 @@
|
||||
#TOC>
|
||||
#TOC> Section Title Line
|
||||
#TOC> --------------------------------------------------------------------
|
||||
#TOC> 1 Preparations 50
|
||||
#TOC> 2 Loading a GEO Dataset 81
|
||||
#TOC> 3 Column wise analysis - time points 151
|
||||
#TOC> 3.1 Task - Comparison of experiments 157
|
||||
#TOC> 3.2 Grouped Samples 204
|
||||
#TOC> 4 Row-wise Analysis: Expression Profiles 239
|
||||
#TOC> 4.1 Task - Read a table of features 274
|
||||
#TOC> 4.2 Selected Expression profiles 322
|
||||
#TOC> 5 Differential Expression 363
|
||||
#TOC> 5.1 Final task: Gene descriptions 487
|
||||
#TOC> 6 Improving on Discovery by Differential Expression 492
|
||||
#TOC> 1 Preparations 51
|
||||
#TOC> 2 Loading a GEO Dataset 82
|
||||
#TOC> 3 Column wise analysis - time points 152
|
||||
#TOC> 3.1 Task - Comparison of experiments 158
|
||||
#TOC> 3.2 Grouped Samples 205
|
||||
#TOC> 4 Row-wise Analysis: Expression Profiles 240
|
||||
#TOC> 4.1 Task - Read a table of features 275
|
||||
#TOC> 4.2 Selected Expression profiles 323
|
||||
#TOC> 5 Differential Expression 364
|
||||
#TOC> 5.1 Final task: Gene descriptions 488
|
||||
#TOC> 6 Improving on Discovery by Differential Expression 493
|
||||
#TOC> 7 Annotation data 575
|
||||
#TOC>
|
||||
#TOC> ==========================================================================
|
||||
|
||||
@ -264,7 +265,7 @@ file.show("./data/SGD_features.README.txt")
|
||||
# Note: the file as downloaded from SGD actually crashed RStudio due to an
|
||||
# unbalanced quotation mark which caused R to try and read the whole
|
||||
# of the subsequent file into a single string. This was caused by an
|
||||
# alias gene name (B"). I have removed this abomination,
|
||||
# alias gene name (B"). I have removed this abomination
|
||||
# by editing the file. The version in the ./data directory can be
|
||||
# read without issues.
|
||||
|
||||
@ -571,5 +572,31 @@ for (i in 1:length(myBottomC)) {
|
||||
# and explore. There is a learning curve - but the payoffs are
|
||||
# significant.
|
||||
|
||||
# = 7 Annotation data =====================================================
|
||||
#
|
||||
# Loading feature data "by hand" as we've done above, is usually not necessary
|
||||
# since GEO provides rich annotations in the GPL platform files, which are
|
||||
# associated with its Gene Expression Sets files. In the code above,
|
||||
# we used getGEO("GSE3635", GSEMatrix = TRUE, getGPL = FALSE), and the GPL
|
||||
# annotations were not loaded. We could use getGPL = TRUE instead ...
|
||||
|
||||
GSE3635annot <- getGEO("GSE3635", GSEMatrix = TRUE, getGPL = TRUE)
|
||||
GSE3635annot <- GSE3635annot[[1]]
|
||||
|
||||
# ... and the feature data is then available in the GSE3635@featureData@data
|
||||
# slot:
|
||||
str(GSE3635annot@featureData@data)
|
||||
GSE3635annot@featureData@data[ 1:20 , ]
|
||||
|
||||
# ... or we could have identified the GPL file for this set:
|
||||
GSE3635@annotation # "GPL1914"
|
||||
|
||||
# ... and downloaded it directly from NCBI:
|
||||
GPL1914 <- getGEO("GPL1914")
|
||||
str(GPL1914)
|
||||
|
||||
# ... from which we can get the data - which is however NOT necessarily
|
||||
# matched to the rows of our expression dataset.
|
||||
|
||||
|
||||
# [END]
|
||||
|
@ -16,6 +16,7 @@
|
||||
#
|
||||
# TODO:
|
||||
# Confirm that SS residue numbers are indices
|
||||
# Set task seed from student number
|
||||
#
|
||||
# == DO NOT SIMPLY source() THIS FILE! =======================================
|
||||
#
|
||||
@ -403,7 +404,7 @@ om <- c(360 + tor$omega[tor$omega < 0],
|
||||
hist(om, xlim=c(0,360))
|
||||
abline(v=180, col="red")
|
||||
|
||||
# Note: a cis-peptide bond will have an omega torsion angle of around 0°
|
||||
# Note: a cis-peptide bond will have an omega torsion angle around 0°
|
||||
|
||||
|
||||
# = 5 H-bond lengths ======================================================
|
||||
|
Loading…
Reference in New Issue
Block a user