Changed alignment library
This commit is contained in:
parent
f22070e8c3
commit
04f730cacb
@ -1,33 +1,43 @@
|
||||
from skbio.alignment import StripedSmithWaterman
|
||||
from Bio.Align import PairwiseAligner, substitution_matrices
|
||||
from exceptions import UnexpectedAlignmentResult
|
||||
|
||||
from datatypes import Annotation
|
||||
from datatypes import AlignedSequence
|
||||
|
||||
|
||||
def generate_annotated_positions(sequence: str, annotations: dict[str, Annotation]):
|
||||
def protein_align_many_to_one_ssw(sequence: str, queries: dict[str, AlignedSequence]):
|
||||
annotation_pairs = {}
|
||||
align = StripedSmithWaterman(sequence)
|
||||
for id, annot in annotations.items():
|
||||
alignment = align(annot.sequence)
|
||||
score, aligned_start, aligned_end = (
|
||||
alignment.optimal_alignment_score,
|
||||
alignment.query_begin,
|
||||
alignment.query_end,
|
||||
)
|
||||
annotation_pairs[id] = Annotation(
|
||||
id,
|
||||
annot.sequence,
|
||||
annot.name,
|
||||
annot.description,
|
||||
annot.start,
|
||||
annot.end,
|
||||
annot.score,
|
||||
), Annotation(
|
||||
id,
|
||||
alignment.aligned_target_sequence,
|
||||
annot.name,
|
||||
annot.description,
|
||||
aligned_start,
|
||||
aligned_end,
|
||||
score,
|
||||
)
|
||||
aligner = PairwiseAligner(one_alignment_only=True)
|
||||
aligner.mode = "local"
|
||||
aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
|
||||
aligner.extend_gap_score = -1
|
||||
aligner.open_gap_score = -11
|
||||
for id, query in queries.items():
|
||||
try:
|
||||
alignments = aligner.align(sequence, query.sequence)
|
||||
except ValueError:
|
||||
continue
|
||||
if len(alignments) > 1:
|
||||
raise UnexpectedAlignmentResult(
|
||||
"More than one alignment resulted from a single query."
|
||||
)
|
||||
for alignment in alignments:
|
||||
score, query_aligned = (alignment.score, alignment.aligned[0][0])
|
||||
aligned_start, aligned_end = query_aligned
|
||||
annotation_pairs[id] = AlignedSequence(
|
||||
id,
|
||||
query.sequence,
|
||||
query.name,
|
||||
query.description,
|
||||
query.start,
|
||||
query.end,
|
||||
query.score,
|
||||
), AlignedSequence(
|
||||
id,
|
||||
alignment.query,
|
||||
query.name,
|
||||
query.description,
|
||||
aligned_start,
|
||||
aligned_end,
|
||||
score,
|
||||
)
|
||||
return annotation_pairs
|
||||
|
@ -1,7 +1,7 @@
|
||||
import os
|
||||
import argparse
|
||||
from Bio import SeqIO
|
||||
from aligner import generate_annotated_positions
|
||||
from aligner import protein_align_many_to_one_ssw
|
||||
|
||||
from persistence import read_annotations_from_csv, save_alignments_to_csv
|
||||
|
||||
@ -78,7 +78,7 @@ def main():
|
||||
)
|
||||
with open(args.sequence, "r") as sequence_fd:
|
||||
for sequence in SeqIO.parse(sequence_fd, "fasta"):
|
||||
aligned_annotations = generate_annotated_positions(
|
||||
aligned_annotations = protein_align_many_to_one_ssw(
|
||||
str(sequence.seq), given_annotations
|
||||
)
|
||||
save_alignments_to_csv(
|
||||
|
@ -1,4 +1,4 @@
|
||||
class Annotation:
|
||||
class AlignedSequence:
|
||||
def __init__(
|
||||
self,
|
||||
id: str,
|
||||
|
2
bmlsa/exceptions.py
Normal file
2
bmlsa/exceptions.py
Normal file
@ -0,0 +1,2 @@
|
||||
class UnexpectedAlignmentResult(Exception):
|
||||
pass
|
@ -1,5 +1,5 @@
|
||||
import csv
|
||||
from datatypes import Annotation
|
||||
from datatypes import AlignedSequence
|
||||
|
||||
|
||||
def read_annotations_from_csv(
|
||||
@ -37,7 +37,7 @@ def read_annotations_from_csv(
|
||||
start = row[start_ind] if start_header else None
|
||||
end = row[end_ind] if end_header else None
|
||||
sequence = row[sequence_ind]
|
||||
annotations[id] = Annotation(
|
||||
annotations[id] = AlignedSequence(
|
||||
id,
|
||||
sequence,
|
||||
name,
|
||||
@ -49,7 +49,7 @@ def read_annotations_from_csv(
|
||||
|
||||
|
||||
def save_alignments_to_csv(
|
||||
aligned_pairs: dict[str, tuple[Annotation, Annotation]], output_path: str
|
||||
aligned_pairs: dict[str, tuple[AlignedSequence, AlignedSequence]], output_path: str
|
||||
):
|
||||
with open(output_path, "w") as output_fd:
|
||||
writer = csv.writer(output_fd)
|
||||
|
Loading…
Reference in New Issue
Block a user