Add section on GPL annotations to RPR-GEO2R

This commit is contained in:
hyginn 2018-01-28 18:23:21 -05:00
parent 359434d863
commit b17a9e202a
4 changed files with 53 additions and 25 deletions

View File

@ -238,7 +238,7 @@ for (i in seq_along(highScoringRanges$lengths)) {
# We computed a T-Coffee alignment at the EBI. msa has no native import function
# so we need to improvise, and it's a bit of a pain to do - but a good
# illustration of startegies to convert data into any kind of object:
# illustration of strategies to convert data into any kind of object:
# - read an .aln file
# - adjust the sequence names
# - convert to msaAAMultipleAlignment object
@ -452,7 +452,7 @@ legend("bottomright",
cex = 0.7,
bty = "n")
# Your alignment is going to be differnte from mine, due to the inclusion of
# Your alignment is going to be different from mine, due to the inclusion of
# MYSPE - but what I see is that MUSCLE gives the highest score overall, and
# achieves this with fewer indels then most, and the lowest number of gaps of
# all algorithms.

View File

@ -27,7 +27,7 @@
#TOC> ==========================================================================
#TOC>
#TOC>
#TOC> Section Title Line
#TOC> -----------------------------------------------------------------
#TOC> 1 A Relational Datamodel in R: review 62
@ -50,7 +50,7 @@
#TOC> 3.3 Create an R script to create your own database 540
#TOC> 3.3.1 Check and validate 560
#TOC> 3.4 Task: submit for credit (part 2/2) 601
#TOC>
#TOC>
#TOC> ==========================================================================
@ -205,7 +205,7 @@ str(philDB)
# go back, re-read, play with it, and ask for help. This is essential.
# === 1.1.1 completing the database
# === 1.1.1 completing the database
# Next I'll add one more person, and create the other two tables:
@ -311,7 +311,7 @@ for (ID in pID) {
# Have a look at the structure of the yeast Mbp1 protein data:
file.edit("./data/MBP1_SACCE.json")
file.show("./data/MBP1_SACCE.json")
# - The whole thing is an array: [ ... ]. This is not necessary for a single
# object, but we will have more objects in other files. And it's perfectly
@ -369,7 +369,7 @@ dbSanitizeSequence(x)
# == 2.3 Create a protein table for our data model =========================
# === 2.3.1 Initialize the database
# === 2.3.1 Initialize the database
# The function dbInit contains all the code to return a list of empty
@ -381,7 +381,7 @@ myDB <- dbInit()
str(myDB)
# === 2.3.2 Add data
# === 2.3.2 Add data
# fromJSON() returns a dataframe that we can readily process to add data
@ -428,7 +428,7 @@ source("./scripts/ABC-createRefDB.R")
str(myDB)
# === 2.4.1 Examples of navigating the database
# === 2.4.1 Examples of navigating the database
# You can look at the contents of the tables in the usual way we access
@ -552,7 +552,7 @@ myDB$taxonomy$species[sel]
# in any of the JSON files. Later you will add more information ...
# === 3.3.1 Check and validate
# === 3.3.1 Check and validate
# Is your protein named according to the pattern "MBP1_MYSPE"? It should be.

View File

@ -32,17 +32,18 @@
#TOC>
#TOC> Section Title Line
#TOC> --------------------------------------------------------------------
#TOC> 1 Preparations 50
#TOC> 2 Loading a GEO Dataset 81
#TOC> 3 Column wise analysis - time points 151
#TOC> 3.1 Task - Comparison of experiments 157
#TOC> 3.2 Grouped Samples 204
#TOC> 4 Row-wise Analysis: Expression Profiles 239
#TOC> 4.1 Task - Read a table of features 274
#TOC> 4.2 Selected Expression profiles 322
#TOC> 5 Differential Expression 363
#TOC> 5.1 Final task: Gene descriptions 487
#TOC> 6 Improving on Discovery by Differential Expression 492
#TOC> 1 Preparations 51
#TOC> 2 Loading a GEO Dataset 82
#TOC> 3 Column wise analysis - time points 152
#TOC> 3.1 Task - Comparison of experiments 158
#TOC> 3.2 Grouped Samples 205
#TOC> 4 Row-wise Analysis: Expression Profiles 240
#TOC> 4.1 Task - Read a table of features 275
#TOC> 4.2 Selected Expression profiles 323
#TOC> 5 Differential Expression 364
#TOC> 5.1 Final task: Gene descriptions 488
#TOC> 6 Improving on Discovery by Differential Expression 493
#TOC> 7 Annotation data 575
#TOC>
#TOC> ==========================================================================
@ -74,8 +75,8 @@ if (! require(GEOquery, quietly=TRUE)) {
}
# Package information:
# library(help = GEOquery) # basic information
# browseVignettes("GEOquery") # available vignettes
# data(package = "GEOquery") # available datasets
# browseVignettes("GEOquery") # available vignettes
# data(package = "GEOquery") # available datasets
# = 2 Loading a GEO Dataset ===============================================
@ -264,7 +265,7 @@ file.show("./data/SGD_features.README.txt")
# Note: the file as downloaded from SGD actually crashed RStudio due to an
# unbalanced quotation mark which caused R to try and read the whole
# of the subsequent file into a single string. This was caused by an
# alias gene name (B"). I have removed this abomination,
# alias gene name (B"). I have removed this abomination
# by editing the file. The version in the ./data directory can be
# read without issues.
@ -571,5 +572,31 @@ for (i in 1:length(myBottomC)) {
# and explore. There is a learning curve - but the payoffs are
# significant.
# = 7 Annotation data =====================================================
#
# Loading feature data "by hand" as we've done above, is usually not necessary
# since GEO provides rich annotations in the GPL platform files, which are
# associated with its Gene Expression Sets files. In the code above,
# we used getGEO("GSE3635", GSEMatrix = TRUE, getGPL = FALSE), and the GPL
# annotations were not loaded. We could use getGPL = TRUE instead ...
GSE3635annot <- getGEO("GSE3635", GSEMatrix = TRUE, getGPL = TRUE)
GSE3635annot <- GSE3635annot[[1]]
# ... and the feature data is then available in the GSE3635@featureData@data
# slot:
str(GSE3635annot@featureData@data)
GSE3635annot@featureData@data[ 1:20 , ]
# ... or we could have identified the GPL file for this set:
GSE3635@annotation # "GPL1914"
# ... and downloaded it directly from NCBI:
GPL1914 <- getGEO("GPL1914")
str(GPL1914)
# ... from which we can get the data - which is however NOT necessarily
# matched to the rows of our expression dataset.
# [END]

View File

@ -16,6 +16,7 @@
#
# TODO:
# Confirm that SS residue numbers are indices
# Set task seed from student number
#
# == DO NOT SIMPLY source() THIS FILE! =======================================
#
@ -403,7 +404,7 @@ om <- c(360 + tor$omega[tor$omega < 0],
hist(om, xlim=c(0,360))
abline(v=180, col="red")
# Note: a cis-peptide bond will have an omega torsion angle of around 0°
# Note: a cis-peptide bond will have an omega torsion angle around 0°
# = 5 H-bond lengths ======================================================