Changed alignment library

This commit is contained in:
Harrison Deng 2023-04-20 12:24:26 -05:00
parent f22070e8c3
commit 04f730cacb
5 changed files with 46 additions and 34 deletions

View File

@ -1,33 +1,43 @@
from skbio.alignment import StripedSmithWaterman from Bio.Align import PairwiseAligner, substitution_matrices
from exceptions import UnexpectedAlignmentResult
from datatypes import Annotation from datatypes import AlignedSequence
def generate_annotated_positions(sequence: str, annotations: dict[str, Annotation]): def protein_align_many_to_one_ssw(sequence: str, queries: dict[str, AlignedSequence]):
annotation_pairs = {} annotation_pairs = {}
align = StripedSmithWaterman(sequence) aligner = PairwiseAligner(one_alignment_only=True)
for id, annot in annotations.items(): aligner.mode = "local"
alignment = align(annot.sequence) aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
score, aligned_start, aligned_end = ( aligner.extend_gap_score = -1
alignment.optimal_alignment_score, aligner.open_gap_score = -11
alignment.query_begin, for id, query in queries.items():
alignment.query_end, try:
) alignments = aligner.align(sequence, query.sequence)
annotation_pairs[id] = Annotation( except ValueError:
id, continue
annot.sequence, if len(alignments) > 1:
annot.name, raise UnexpectedAlignmentResult(
annot.description, "More than one alignment resulted from a single query."
annot.start, )
annot.end, for alignment in alignments:
annot.score, score, query_aligned = (alignment.score, alignment.aligned[0][0])
), Annotation( aligned_start, aligned_end = query_aligned
id, annotation_pairs[id] = AlignedSequence(
alignment.aligned_target_sequence, id,
annot.name, query.sequence,
annot.description, query.name,
aligned_start, query.description,
aligned_end, query.start,
score, query.end,
) query.score,
), AlignedSequence(
id,
alignment.query,
query.name,
query.description,
aligned_start,
aligned_end,
score,
)
return annotation_pairs return annotation_pairs

View File

@ -1,7 +1,7 @@
import os import os
import argparse import argparse
from Bio import SeqIO from Bio import SeqIO
from aligner import generate_annotated_positions from aligner import protein_align_many_to_one_ssw
from persistence import read_annotations_from_csv, save_alignments_to_csv from persistence import read_annotations_from_csv, save_alignments_to_csv
@ -78,7 +78,7 @@ def main():
) )
with open(args.sequence, "r") as sequence_fd: with open(args.sequence, "r") as sequence_fd:
for sequence in SeqIO.parse(sequence_fd, "fasta"): for sequence in SeqIO.parse(sequence_fd, "fasta"):
aligned_annotations = generate_annotated_positions( aligned_annotations = protein_align_many_to_one_ssw(
str(sequence.seq), given_annotations str(sequence.seq), given_annotations
) )
save_alignments_to_csv( save_alignments_to_csv(

View File

@ -1,4 +1,4 @@
class Annotation: class AlignedSequence:
def __init__( def __init__(
self, self,
id: str, id: str,

2
bmlsa/exceptions.py Normal file
View File

@ -0,0 +1,2 @@
class UnexpectedAlignmentResult(Exception):
pass

View File

@ -1,5 +1,5 @@
import csv import csv
from datatypes import Annotation from datatypes import AlignedSequence
def read_annotations_from_csv( def read_annotations_from_csv(
@ -37,7 +37,7 @@ def read_annotations_from_csv(
start = row[start_ind] if start_header else None start = row[start_ind] if start_header else None
end = row[end_ind] if end_header else None end = row[end_ind] if end_header else None
sequence = row[sequence_ind] sequence = row[sequence_ind]
annotations[id] = Annotation( annotations[id] = AlignedSequence(
id, id,
sequence, sequence,
name, name,
@ -49,7 +49,7 @@ def read_annotations_from_csv(
def save_alignments_to_csv( def save_alignments_to_csv(
aligned_pairs: dict[str, tuple[Annotation, Annotation]], output_path: str aligned_pairs: dict[str, tuple[AlignedSequence, AlignedSequence]], output_path: str
): ):
with open(output_path, "w") as output_fd: with open(output_path, "w") as output_fd:
writer = csv.writer(output_fd) writer = csv.writer(output_fd)