Changed alignment library
This commit is contained in:
parent
f22070e8c3
commit
04f730cacb
@ -1,31 +1,41 @@
|
|||||||
from skbio.alignment import StripedSmithWaterman
|
from Bio.Align import PairwiseAligner, substitution_matrices
|
||||||
|
from exceptions import UnexpectedAlignmentResult
|
||||||
|
|
||||||
from datatypes import Annotation
|
from datatypes import AlignedSequence
|
||||||
|
|
||||||
|
|
||||||
def generate_annotated_positions(sequence: str, annotations: dict[str, Annotation]):
|
def protein_align_many_to_one_ssw(sequence: str, queries: dict[str, AlignedSequence]):
|
||||||
annotation_pairs = {}
|
annotation_pairs = {}
|
||||||
align = StripedSmithWaterman(sequence)
|
aligner = PairwiseAligner(one_alignment_only=True)
|
||||||
for id, annot in annotations.items():
|
aligner.mode = "local"
|
||||||
alignment = align(annot.sequence)
|
aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
|
||||||
score, aligned_start, aligned_end = (
|
aligner.extend_gap_score = -1
|
||||||
alignment.optimal_alignment_score,
|
aligner.open_gap_score = -11
|
||||||
alignment.query_begin,
|
for id, query in queries.items():
|
||||||
alignment.query_end,
|
try:
|
||||||
|
alignments = aligner.align(sequence, query.sequence)
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
if len(alignments) > 1:
|
||||||
|
raise UnexpectedAlignmentResult(
|
||||||
|
"More than one alignment resulted from a single query."
|
||||||
)
|
)
|
||||||
annotation_pairs[id] = Annotation(
|
for alignment in alignments:
|
||||||
|
score, query_aligned = (alignment.score, alignment.aligned[0][0])
|
||||||
|
aligned_start, aligned_end = query_aligned
|
||||||
|
annotation_pairs[id] = AlignedSequence(
|
||||||
id,
|
id,
|
||||||
annot.sequence,
|
query.sequence,
|
||||||
annot.name,
|
query.name,
|
||||||
annot.description,
|
query.description,
|
||||||
annot.start,
|
query.start,
|
||||||
annot.end,
|
query.end,
|
||||||
annot.score,
|
query.score,
|
||||||
), Annotation(
|
), AlignedSequence(
|
||||||
id,
|
id,
|
||||||
alignment.aligned_target_sequence,
|
alignment.query,
|
||||||
annot.name,
|
query.name,
|
||||||
annot.description,
|
query.description,
|
||||||
aligned_start,
|
aligned_start,
|
||||||
aligned_end,
|
aligned_end,
|
||||||
score,
|
score,
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
import argparse
|
import argparse
|
||||||
from Bio import SeqIO
|
from Bio import SeqIO
|
||||||
from aligner import generate_annotated_positions
|
from aligner import protein_align_many_to_one_ssw
|
||||||
|
|
||||||
from persistence import read_annotations_from_csv, save_alignments_to_csv
|
from persistence import read_annotations_from_csv, save_alignments_to_csv
|
||||||
|
|
||||||
@ -78,7 +78,7 @@ def main():
|
|||||||
)
|
)
|
||||||
with open(args.sequence, "r") as sequence_fd:
|
with open(args.sequence, "r") as sequence_fd:
|
||||||
for sequence in SeqIO.parse(sequence_fd, "fasta"):
|
for sequence in SeqIO.parse(sequence_fd, "fasta"):
|
||||||
aligned_annotations = generate_annotated_positions(
|
aligned_annotations = protein_align_many_to_one_ssw(
|
||||||
str(sequence.seq), given_annotations
|
str(sequence.seq), given_annotations
|
||||||
)
|
)
|
||||||
save_alignments_to_csv(
|
save_alignments_to_csv(
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
class Annotation:
|
class AlignedSequence:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
id: str,
|
id: str,
|
||||||
|
2
bmlsa/exceptions.py
Normal file
2
bmlsa/exceptions.py
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
class UnexpectedAlignmentResult(Exception):
|
||||||
|
pass
|
@ -1,5 +1,5 @@
|
|||||||
import csv
|
import csv
|
||||||
from datatypes import Annotation
|
from datatypes import AlignedSequence
|
||||||
|
|
||||||
|
|
||||||
def read_annotations_from_csv(
|
def read_annotations_from_csv(
|
||||||
@ -37,7 +37,7 @@ def read_annotations_from_csv(
|
|||||||
start = row[start_ind] if start_header else None
|
start = row[start_ind] if start_header else None
|
||||||
end = row[end_ind] if end_header else None
|
end = row[end_ind] if end_header else None
|
||||||
sequence = row[sequence_ind]
|
sequence = row[sequence_ind]
|
||||||
annotations[id] = Annotation(
|
annotations[id] = AlignedSequence(
|
||||||
id,
|
id,
|
||||||
sequence,
|
sequence,
|
||||||
name,
|
name,
|
||||||
@ -49,7 +49,7 @@ def read_annotations_from_csv(
|
|||||||
|
|
||||||
|
|
||||||
def save_alignments_to_csv(
|
def save_alignments_to_csv(
|
||||||
aligned_pairs: dict[str, tuple[Annotation, Annotation]], output_path: str
|
aligned_pairs: dict[str, tuple[AlignedSequence, AlignedSequence]], output_path: str
|
||||||
):
|
):
|
||||||
with open(output_path, "w") as output_fd:
|
with open(output_path, "w") as output_fd:
|
||||||
writer = csv.writer(output_fd)
|
writer = csv.writer(output_fd)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user