Changed alignment library

2023-04-20 12:24:26 -05:00 · 2023-04-20 12:24:26 -05:00 · 04f730cacb
commit 04f730cacb
parent f22070e8c3
5 changed files with 46 additions and 34 deletions
--- a/bmlsa/aligner.py
+++ b/bmlsa/aligner.py
@ -1,33 +1,43 @@
-from skbio.alignment import StripedSmithWaterman
+from Bio.Align import PairwiseAligner, substitution_matrices
 from exceptions import UnexpectedAlignmentResult
-from datatypes import Annotation
+from datatypes import AlignedSequence
-def generate_annotated_positions(sequence: str, annotations: dict[str, Annotation]):
+def protein_align_many_to_one_ssw(sequence: str, queries: dict[str, AlignedSequence]):
    annotation_pairs = {}
-    align = StripedSmithWaterman(sequence)
+    aligner = PairwiseAligner(one_alignment_only=True)
-    for id, annot in annotations.items():
+    aligner.mode = "local"
-        alignment = align(annot.sequence)
+    aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
-        score, aligned_start, aligned_end = (
+    aligner.extend_gap_score = -1
-            alignment.optimal_alignment_score,
+    aligner.open_gap_score = -11
-            alignment.query_begin,
+    for id, query in queries.items():
-            alignment.query_end,
+        try:
-        )
+            alignments = aligner.align(sequence, query.sequence)
-        annotation_pairs[id] = Annotation(
+        except ValueError:
-            id,
+            continue
-            annot.sequence,
+        if len(alignments) > 1:
-            annot.name,
+            raise UnexpectedAlignmentResult(
-            annot.description,
+                "More than one alignment resulted from a single query."
-            annot.start,
+            )
-            annot.end,
+        for alignment in alignments:
-            annot.score,
+            score, query_aligned = (alignment.score, alignment.aligned[0][0])
-        ), Annotation(
+            aligned_start, aligned_end = query_aligned
-            id,
+            annotation_pairs[id] = AlignedSequence(
-            alignment.aligned_target_sequence,
+                id,
-            annot.name,
+                query.sequence,
-            annot.description,
+                query.name,
-            aligned_start,
+                query.description,
-            aligned_end,
+                query.start,
-            score,
+                query.end,
-        )
+                query.score,
            ), AlignedSequence(
                id,
                alignment.query,
                query.name,
                query.description,
                aligned_start,
                aligned_end,
                score,
            )
    return annotation_pairs
--- a/bmlsa/cli.py
+++ b/bmlsa/cli.py
@ -1,7 +1,7 @@
 import os
 import argparse
 from Bio import SeqIO
-from aligner import generate_annotated_positions
+from aligner import protein_align_many_to_one_ssw
 from persistence import read_annotations_from_csv, save_alignments_to_csv
@ -78,7 +78,7 @@ def main():
    )
    with open(args.sequence, "r") as sequence_fd:
        for sequence in SeqIO.parse(sequence_fd, "fasta"):
-            aligned_annotations = generate_annotated_positions(
+            aligned_annotations = protein_align_many_to_one_ssw(
                str(sequence.seq), given_annotations
            )
            save_alignments_to_csv(
--- a/bmlsa/datatypes.py
+++ b/bmlsa/datatypes.py
@ -1,4 +1,4 @@
-class Annotation:
+class AlignedSequence:
    def __init__(
        self,
        id: str,
--- a/bmlsa/exceptions.py
+++ b/bmlsa/exceptions.py
@ -0,0 +1,2 @@
 class UnexpectedAlignmentResult(Exception):
    pass
--- a/bmlsa/persistence.py
+++ b/bmlsa/persistence.py
@ -1,5 +1,5 @@
 import csv
-from datatypes import Annotation
+from datatypes import AlignedSequence
 def read_annotations_from_csv(
@ -37,7 +37,7 @@ def read_annotations_from_csv(
            start = row[start_ind] if start_header else None
            end = row[end_ind] if end_header else None
            sequence = row[sequence_ind]
-            annotations[id] = Annotation(
+            annotations[id] = AlignedSequence(
                id,
                sequence,
                name,
@ -49,7 +49,7 @@ def read_annotations_from_csv(
 def save_alignments_to_csv(
-    aligned_pairs: dict[str, tuple[Annotation, Annotation]], output_path: str
+    aligned_pairs: dict[str, tuple[AlignedSequence, AlignedSequence]], output_path: str
 ):
    with open(output_path, "w") as output_fd:
        writer = csv.writer(output_fd)
		`@ -0,0 +1,2 @@`
							`class UnexpectedAlignmentResult(Exception):`
							`pass`