2020-09-18 11:56:30 +00:00
|
|
|
# tocID <- "BIN-ALI-BLAST.R"
|
|
|
|
#
|
2017-09-12 20:09:20 +00:00
|
|
|
# Purpose: A Bioinformatics Course:
|
|
|
|
# R code accompanying the BIN-ALI-BLAST unit.
|
|
|
|
#
|
2021-09-16 05:29:19 +00:00
|
|
|
# ==============================================================================
|
|
|
|
#
|
|
|
|
# S T O P :
|
|
|
|
# =========
|
|
|
|
#
|
|
|
|
# 2021
|
|
|
|
# UPDATE WARNING!
|
|
|
|
# ---------------
|
|
|
|
#
|
|
|
|
# This file has not yet been updated for coursework. You may inspect it, but
|
|
|
|
# do NOT use it for actual coursework as long as this warning is here. Parts
|
|
|
|
# of the code and data will change, and if you use this outdated code it will
|
|
|
|
# break your setup and workflow.
|
|
|
|
#
|
|
|
|
# ==============================================================================
|
|
|
|
#
|
2020-09-25 11:50:42 +00:00
|
|
|
# Version: 1.3
|
2017-09-12 20:09:20 +00:00
|
|
|
#
|
2020-09-25 11:50:42 +00:00
|
|
|
# Date: 2017-10 - 2020-09
|
2017-09-12 20:09:20 +00:00
|
|
|
# Author: Boris Steipe (boris.steipe@utoronto.ca)
|
|
|
|
#
|
|
|
|
# Versions:
|
2020-09-25 11:50:42 +00:00
|
|
|
# 1.3 2020 Maintenance
|
2019-01-08 07:11:25 +00:00
|
|
|
# 1.2 Change from require() to requireNamespace(),
|
|
|
|
# use <package>::<function>() idiom throughout
|
2017-11-16 16:47:53 +00:00
|
|
|
# 1.1 Fixed parsing logic.
|
2017-10-23 16:37:09 +00:00
|
|
|
# 1.0 First live version 2017.
|
2017-09-12 20:09:20 +00:00
|
|
|
# 0.1 First code copied from 2016 material.
|
2017-10-23 16:37:09 +00:00
|
|
|
#
|
2017-09-12 20:09:20 +00:00
|
|
|
#
|
|
|
|
# TODO:
|
|
|
|
#
|
|
|
|
#
|
|
|
|
# == DO NOT SIMPLY source() THIS FILE! =======================================
|
2017-10-23 16:37:09 +00:00
|
|
|
#
|
2017-09-12 20:09:20 +00:00
|
|
|
# If there are portions you don't understand, use R's help system, Google for an
|
|
|
|
# answer, or ask your instructor. Don't continue if you don't understand what's
|
|
|
|
# going on. That's not how it works ...
|
|
|
|
#
|
|
|
|
# ==============================================================================
|
2017-10-29 03:05:53 +00:00
|
|
|
|
|
|
|
|
2017-10-23 16:37:09 +00:00
|
|
|
#TOC> ==========================================================================
|
2021-09-16 05:29:19 +00:00
|
|
|
#TOC>
|
2019-01-08 07:11:25 +00:00
|
|
|
#TOC> Section Title Line
|
|
|
|
#TOC> ---------------------------------------------------
|
2020-09-25 11:50:42 +00:00
|
|
|
#TOC> 1 Defining the APSES domain 43
|
|
|
|
#TOC> 2 Executing the BLAST search 73
|
|
|
|
#TOC> 3 Analysing results 95
|
2021-09-16 05:29:19 +00:00
|
|
|
#TOC>
|
2017-10-23 16:37:09 +00:00
|
|
|
#TOC> ==========================================================================
|
|
|
|
|
|
|
|
|
2019-01-08 07:11:25 +00:00
|
|
|
# = 1 Defining the APSES domain ===========================================
|
2017-09-12 20:09:20 +00:00
|
|
|
|
2017-10-23 16:37:09 +00:00
|
|
|
# Load your protein database
|
|
|
|
source("makeProteinDB.R")
|
2017-09-12 20:09:20 +00:00
|
|
|
|
2020-09-25 11:50:42 +00:00
|
|
|
# Get the APSES domain sequence via your MBP1_MYSPE feature annotation. (You
|
|
|
|
# have entered this data into your database in the
|
|
|
|
# BIN-ALI-Optimal_sequence_alignment unit.)
|
|
|
|
|
|
|
|
( myOrth <- sprintf("MBP1_%s", biCode(MYSPE)) ) # If this is not the correct
|
|
|
|
# name of the Mbp1 orthologue
|
|
|
|
# of Mbp1 in your protein
|
|
|
|
# database, DON'T continue. We
|
|
|
|
# need to fix this problem.
|
|
|
|
# Get in touch.
|
2017-09-12 20:09:20 +00:00
|
|
|
|
2020-09-25 11:50:42 +00:00
|
|
|
(proID <- myDB$protein$ID[myDB$protein$name == myOrth])
|
2017-10-23 16:37:09 +00:00
|
|
|
(ftrID <- myDB$feature$ID[myDB$feature$name == "APSES fold"])
|
|
|
|
(fanID <- myDB$annotation$ID[myDB$annotation$proteinID == proID &
|
|
|
|
myDB$annotation$featureID == ftrID])
|
|
|
|
(start <- myDB$annotation$start[myDB$annotation$ID == fanID])
|
|
|
|
(end <- myDB$annotation$end[myDB$annotation$ID == fanID])
|
|
|
|
(apses <- substr(myDB$protein$sequence[myDB$protein$ID == proID],
|
|
|
|
start,
|
|
|
|
end))
|
2017-09-12 20:09:20 +00:00
|
|
|
|
2017-10-23 16:37:09 +00:00
|
|
|
# The MYSPE "apses" sequence is the sequence that we will use for our reverse
|
|
|
|
# BLAST search.
|
2017-09-12 20:09:20 +00:00
|
|
|
|
|
|
|
|
2019-01-08 07:11:25 +00:00
|
|
|
# = 2 Executing the BLAST search ==========================================
|
2017-09-12 20:09:20 +00:00
|
|
|
|
2017-10-23 16:37:09 +00:00
|
|
|
# The ./scripts/BLAST.R code defines two functions to access the BLAST interface
|
|
|
|
# through its Web API, and to parse results. Have a look at the script, then
|
|
|
|
# source it:
|
2017-09-12 20:09:20 +00:00
|
|
|
|
2017-10-23 16:37:09 +00:00
|
|
|
source("./scripts/BLAST.R")
|
2017-09-12 20:09:20 +00:00
|
|
|
|
2017-10-23 16:37:09 +00:00
|
|
|
# Use BLAST() to find the best match to the MYSPE APSES domain in Saccharomyces
|
|
|
|
# cerevisiae:
|
2017-09-12 20:09:20 +00:00
|
|
|
|
2017-11-16 16:47:53 +00:00
|
|
|
BLASTresults <- BLAST(apses, # MYSPE APSES domain sequence
|
|
|
|
db = "refseq_protein", # database to search in
|
|
|
|
nHits = 10, #
|
|
|
|
E = 0.01, #
|
|
|
|
limits = "txid559292[ORGN]") # S. cerevisiae S288c
|
2017-09-12 20:09:20 +00:00
|
|
|
|
|
|
|
|
2019-01-08 07:11:25 +00:00
|
|
|
length(BLASTresults$hits) # There should be at least one hit there. Ask for
|
|
|
|
# advice in case this step fails.
|
2017-09-12 20:09:20 +00:00
|
|
|
|
|
|
|
|
2019-01-08 07:11:25 +00:00
|
|
|
# = 3 Analysing results ===================================================
|
2017-09-12 20:09:20 +00:00
|
|
|
|
2017-11-16 16:47:53 +00:00
|
|
|
(topHit <- BLASTresults$hits[[1]]) # Get the top hit
|
2017-09-12 20:09:20 +00:00
|
|
|
|
2017-10-23 16:37:09 +00:00
|
|
|
# What is the refseq ID of the top hit
|
|
|
|
topHit$accession
|
2017-09-12 20:09:20 +00:00
|
|
|
|
2017-10-23 16:37:09 +00:00
|
|
|
# If this is "NP_010227.1" you have confirmed the RBM of the MYSPE apses
|
|
|
|
# domain. If it is not, ask me for advice.
|
2017-09-12 20:09:20 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# [END]
|