add fetchMSAmotif() function
This commit is contained in:
		
							
								
								
									
										44
									
								
								.utilities.R
									
									
									
									
									
								
							
							
						
						
									
										44
									
								
								.utilities.R
									
									
									
									
									
								
							| @@ -133,6 +133,50 @@ waitTimer <- function(t, nIntervals = 50) { | |||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | fetchMSAmotif <- function(ali, mot) { | ||||||
|  |   # retrieve a subset from ali that spans the sequence in mot. | ||||||
|  |   # Parameters: | ||||||
|  |   #    ali        MsaAAMultipleAlignment object | ||||||
|  |   #    mot  chr   substring within ali | ||||||
|  |   # Value:  AAStringset | ||||||
|  |  | ||||||
|  |   if (class(ali) != "MsaAAMultipleAlignment" && | ||||||
|  |       class(ali) != "MsaDNAMultipleAlignment" && | ||||||
|  |       class(ali) != "MsaRNAMultipleAlignment") { | ||||||
|  |     stop("ali has to be an msa multiple alignment object.") | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   if (class(mot) != "character") { | ||||||
|  |     stop("mot has to be a character object.") | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   x <- gsub("-", "", as.character(ali))  # pure sequence, no hyphens | ||||||
|  |  | ||||||
|  |   idx <- grep(mot, x)[1] # first sequence containing mot. If no match, | ||||||
|  |                          # idx becomes NA | ||||||
|  |   if (is.na(idx)) { | ||||||
|  |     stop("mot is not a subsequence in ali.") | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   # Find the match range | ||||||
|  |   m <- regexpr(mot, x[idx]) | ||||||
|  |   motifStart <- as.numeric(m) | ||||||
|  |   motifEnd <- attr(m, "match.length") + motifStart - 1 | ||||||
|  |  | ||||||
|  |   # Count characters, skip hyphens ... | ||||||
|  |   x <- unlist(strsplit(as.character(ali)[idx], "")) | ||||||
|  |   x <- x != "-" | ||||||
|  |   x <- as.numeric(x) | ||||||
|  |   x <- cumsum(x) | ||||||
|  |  | ||||||
|  |   return(subseq(ali@unmasked, | ||||||
|  |                 start = which(x == motifStart)[1], # get the first position | ||||||
|  |                 end   = which(x == motifEnd)[1])) | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| # ====== PDB ID selection ====================================================== | # ====== PDB ID selection ====================================================== | ||||||
|  |  | ||||||
| selectPDBrep <- function(n) { | selectPDBrep <- function(n) { | ||||||
|   | |||||||
| @@ -3,12 +3,13 @@ | |||||||
| # Purpose:  A Bioinformatics Course: | # Purpose:  A Bioinformatics Course: | ||||||
| #              R code accompanying the BIN-ALI-MSA unit. | #              R code accompanying the BIN-ALI-MSA unit. | ||||||
| # | # | ||||||
| # Version:  1.0 | # Version:  1.1 | ||||||
| # | # | ||||||
| # Date:     2017  10  23 | # Date:     2017  10 | ||||||
| # Author:   Boris Steipe (boris.steipe@utoronto.ca) | # Author:   Boris Steipe (boris.steipe@utoronto.ca) | ||||||
| # | # | ||||||
| # Versions: | # Versions: | ||||||
|  | #           1.1    Added fetchMSAmotif() | ||||||
| #           1.0    Fully refactored and rewritten for 2017 | #           1.0    Fully refactored and rewritten for 2017 | ||||||
| #           0.1    First code copied from 2016 material. | #           0.1    First code copied from 2016 material. | ||||||
| # | # | ||||||
| @@ -53,7 +54,7 @@ | |||||||
| # You need to reload you protein database, including changes that might | # You need to reload you protein database, including changes that might | ||||||
| # have been made to the reference files. If you have worked with the | # have been made to the reference files. If you have worked with the | ||||||
| # prerequiste units, you should have a script named "makeProteinDB.R" | # prerequiste units, you should have a script named "makeProteinDB.R" | ||||||
| # that will create the myDB object with aprotein and feature database. | # that will create the myDB object with a protein and feature database. | ||||||
| # Ask for advice if not. | # Ask for advice if not. | ||||||
| source("makeProteinDB.R") | source("makeProteinDB.R") | ||||||
|  |  | ||||||
| @@ -587,6 +588,12 @@ x <- cumsum(x) | |||||||
|  |  | ||||||
| (motifAli <- subseq(msaM@unmasked, start = aliStart, end = aliEnd)) | (motifAli <- subseq(msaM@unmasked, start = aliStart, end = aliEnd)) | ||||||
|  |  | ||||||
|  | # Packaging this into a function is convenient to have, therefore I have added | ||||||
|  | # such a function to the .utilities.R script:  fetchMSAmotif(). Try it: | ||||||
|  |  | ||||||
|  | wing <- "HEKVQGGFGKYQGTWV" # the MBP1_SACCE APSES "wing" sequence | ||||||
|  | writeALN(fetchMSAmotif(msaM, wing)) | ||||||
|  |  | ||||||
|  |  | ||||||
| # ==   6.2  Plot a Sequence Logo  ============================================== | # ==   6.2  Plot a Sequence Logo  ============================================== | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user