Changed alignment library

This commit is contained in:
Harrison Deng 2023-04-20 12:24:26 -05:00
parent f22070e8c3
commit 04f730cacb
5 changed files with 46 additions and 34 deletions

View File

@ -1,33 +1,43 @@
from skbio.alignment import StripedSmithWaterman
from Bio.Align import PairwiseAligner, substitution_matrices
from exceptions import UnexpectedAlignmentResult
from datatypes import Annotation
from datatypes import AlignedSequence
def generate_annotated_positions(sequence: str, annotations: dict[str, Annotation]):
def protein_align_many_to_one_ssw(sequence: str, queries: dict[str, AlignedSequence]):
annotation_pairs = {}
align = StripedSmithWaterman(sequence)
for id, annot in annotations.items():
alignment = align(annot.sequence)
score, aligned_start, aligned_end = (
alignment.optimal_alignment_score,
alignment.query_begin,
alignment.query_end,
)
annotation_pairs[id] = Annotation(
id,
annot.sequence,
annot.name,
annot.description,
annot.start,
annot.end,
annot.score,
), Annotation(
id,
alignment.aligned_target_sequence,
annot.name,
annot.description,
aligned_start,
aligned_end,
score,
)
aligner = PairwiseAligner(one_alignment_only=True)
aligner.mode = "local"
aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
aligner.extend_gap_score = -1
aligner.open_gap_score = -11
for id, query in queries.items():
try:
alignments = aligner.align(sequence, query.sequence)
except ValueError:
continue
if len(alignments) > 1:
raise UnexpectedAlignmentResult(
"More than one alignment resulted from a single query."
)
for alignment in alignments:
score, query_aligned = (alignment.score, alignment.aligned[0][0])
aligned_start, aligned_end = query_aligned
annotation_pairs[id] = AlignedSequence(
id,
query.sequence,
query.name,
query.description,
query.start,
query.end,
query.score,
), AlignedSequence(
id,
alignment.query,
query.name,
query.description,
aligned_start,
aligned_end,
score,
)
return annotation_pairs

View File

@ -1,7 +1,7 @@
import os
import argparse
from Bio import SeqIO
from aligner import generate_annotated_positions
from aligner import protein_align_many_to_one_ssw
from persistence import read_annotations_from_csv, save_alignments_to_csv
@ -78,7 +78,7 @@ def main():
)
with open(args.sequence, "r") as sequence_fd:
for sequence in SeqIO.parse(sequence_fd, "fasta"):
aligned_annotations = generate_annotated_positions(
aligned_annotations = protein_align_many_to_one_ssw(
str(sequence.seq), given_annotations
)
save_alignments_to_csv(

View File

@ -1,4 +1,4 @@
class Annotation:
class AlignedSequence:
def __init__(
self,
id: str,

2
bmlsa/exceptions.py Normal file
View File

@ -0,0 +1,2 @@
class UnexpectedAlignmentResult(Exception):
pass

View File

@ -1,5 +1,5 @@
import csv
from datatypes import Annotation
from datatypes import AlignedSequence
def read_annotations_from_csv(
@ -37,7 +37,7 @@ def read_annotations_from_csv(
start = row[start_ind] if start_header else None
end = row[end_ind] if end_header else None
sequence = row[sequence_ind]
annotations[id] = Annotation(
annotations[id] = AlignedSequence(
id,
sequence,
name,
@ -49,7 +49,7 @@ def read_annotations_from_csv(
def save_alignments_to_csv(
aligned_pairs: dict[str, tuple[Annotation, Annotation]], output_path: str
aligned_pairs: dict[str, tuple[AlignedSequence, AlignedSequence]], output_path: str
):
with open(output_path, "w") as output_fd:
writer = csv.writer(output_fd)