bmlsa/bmlsa/aligner.py
2023-04-20 15:16:34 -05:00

44 lines
1.5 KiB
Python

from Bio.Align import PairwiseAligner, substitution_matrices
from exceptions import UnexpectedAlignmentResult
from datatypes import AlignedSequence
def protein_align_many_to_one_ssw(sequence: str, queries: dict[str, AlignedSequence]):
annotation_pairs = {}
aligner = PairwiseAligner()
aligner.mode = "local"
aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
aligner.extend_gap_score = -1
aligner.open_gap_score = -11
for id, query in queries.items():
try:
alignments = aligner.align(sequence, query.sequence)
except ValueError:
continue
if len(alignments) > 1:
raise UnexpectedAlignmentResult(
"More than one alignment resulted from a single query."
)
for alignment in alignments:
score, query_aligned = (alignment.score, alignment.aligned[0][0])
aligned_start, aligned_end = query_aligned
annotation_pairs[id] = AlignedSequence(
id,
query.sequence,
query.name,
query.description,
query.start,
query.end,
query.score,
), AlignedSequence(
id,
alignment.query,
query.name,
query.description,
aligned_start,
aligned_end,
score,
)
return annotation_pairs