bch441-work-abc-units/BIN-ALI-BLAST.R

101 lines
3.3 KiB
R
Raw Normal View History

2017-09-12 20:09:20 +00:00
# BIN-ALI-BLAST.R
#
# Purpose: A Bioinformatics Course:
# R code accompanying the BIN-ALI-BLAST unit.
#
# Version: 1.2
2017-09-12 20:09:20 +00:00
#
# Date: 2017 10 - 2019 01
2017-09-12 20:09:20 +00:00
# Author: Boris Steipe (boris.steipe@utoronto.ca)
#
# Versions:
# 1.2 Change from require() to requireNamespace(),
# use <package>::<function>() idiom throughout
2017-11-16 16:47:53 +00:00
# 1.1 Fixed parsing logic.
2017-10-23 16:37:09 +00:00
# 1.0 First live version 2017.
2017-09-12 20:09:20 +00:00
# 0.1 First code copied from 2016 material.
2017-10-23 16:37:09 +00:00
#
2017-09-12 20:09:20 +00:00
#
# TODO:
#
#
# == DO NOT SIMPLY source() THIS FILE! =======================================
2017-10-23 16:37:09 +00:00
#
2017-09-12 20:09:20 +00:00
# If there are portions you don't understand, use R's help system, Google for an
# answer, or ask your instructor. Don't continue if you don't understand what's
# going on. That's not how it works ...
#
# ==============================================================================
2017-10-23 16:37:09 +00:00
#TOC> ==========================================================================
#TOC>
#TOC> Section Title Line
#TOC> ---------------------------------------------------
#TOC> 1 Defining the APSES domain 42
#TOC> 2 Executing the BLAST search 64
#TOC> 3 Analysing results 86
#TOC>
2017-10-23 16:37:09 +00:00
#TOC> ==========================================================================
# = 1 Defining the APSES domain ===========================================
2017-09-12 20:09:20 +00:00
2017-10-23 16:37:09 +00:00
# Load your protein database
source("makeProteinDB.R")
2017-09-12 20:09:20 +00:00
2017-10-23 16:37:09 +00:00
# Get the APSES domain sequence for MBP1_MYSPE feature annotation. (You have
# entered this data in the BIN-ALI-Optimal_sequence_alignment unit.)
2017-09-12 20:09:20 +00:00
2017-10-23 16:37:09 +00:00
(proID <- myDB$protein$ID[myDB$protein$name == "MBP1_<MYSSPE>"]) # <<< EDIT
(ftrID <- myDB$feature$ID[myDB$feature$name == "APSES fold"])
(fanID <- myDB$annotation$ID[myDB$annotation$proteinID == proID &
myDB$annotation$featureID == ftrID])
(start <- myDB$annotation$start[myDB$annotation$ID == fanID])
(end <- myDB$annotation$end[myDB$annotation$ID == fanID])
(apses <- substr(myDB$protein$sequence[myDB$protein$ID == proID],
start,
end))
2017-09-12 20:09:20 +00:00
2017-10-23 16:37:09 +00:00
# The MYSPE "apses" sequence is the sequence that we will use for our reverse
# BLAST search.
2017-09-12 20:09:20 +00:00
# = 2 Executing the BLAST search ==========================================
2017-09-12 20:09:20 +00:00
2017-10-23 16:37:09 +00:00
# The ./scripts/BLAST.R code defines two functions to access the BLAST interface
# through its Web API, and to parse results. Have a look at the script, then
# source it:
2017-09-12 20:09:20 +00:00
2017-10-23 16:37:09 +00:00
source("./scripts/BLAST.R")
2017-09-12 20:09:20 +00:00
2017-10-23 16:37:09 +00:00
# Use BLAST() to find the best match to the MYSPE APSES domain in Saccharomyces
# cerevisiae:
2017-09-12 20:09:20 +00:00
2017-11-16 16:47:53 +00:00
BLASTresults <- BLAST(apses, # MYSPE APSES domain sequence
db = "refseq_protein", # database to search in
nHits = 10, #
E = 0.01, #
limits = "txid559292[ORGN]") # S. cerevisiae S288c
2017-09-12 20:09:20 +00:00
length(BLASTresults$hits) # There should be at least one hit there. Ask for
# advice in case this step fails.
2017-09-12 20:09:20 +00:00
# = 3 Analysing results ===================================================
2017-09-12 20:09:20 +00:00
2017-11-16 16:47:53 +00:00
(topHit <- BLASTresults$hits[[1]]) # Get the top hit
2017-09-12 20:09:20 +00:00
2017-10-23 16:37:09 +00:00
# What is the refseq ID of the top hit
topHit$accession
2017-09-12 20:09:20 +00:00
2017-10-23 16:37:09 +00:00
# If this is "NP_010227.1" you have confirmed the RBM of the MYSPE apses
# domain. If it is not, ask me for advice.
2017-09-12 20:09:20 +00:00
# [END]