Changed alignment library
This commit is contained in:
		@@ -1,33 +1,43 @@
 | 
			
		||||
from skbio.alignment import StripedSmithWaterman
 | 
			
		||||
from Bio.Align import PairwiseAligner, substitution_matrices
 | 
			
		||||
from exceptions import UnexpectedAlignmentResult
 | 
			
		||||
 | 
			
		||||
from datatypes import Annotation
 | 
			
		||||
from datatypes import AlignedSequence
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def generate_annotated_positions(sequence: str, annotations: dict[str, Annotation]):
 | 
			
		||||
def protein_align_many_to_one_ssw(sequence: str, queries: dict[str, AlignedSequence]):
 | 
			
		||||
    annotation_pairs = {}
 | 
			
		||||
    align = StripedSmithWaterman(sequence)
 | 
			
		||||
    for id, annot in annotations.items():
 | 
			
		||||
        alignment = align(annot.sequence)
 | 
			
		||||
        score, aligned_start, aligned_end = (
 | 
			
		||||
            alignment.optimal_alignment_score,
 | 
			
		||||
            alignment.query_begin,
 | 
			
		||||
            alignment.query_end,
 | 
			
		||||
        )
 | 
			
		||||
        annotation_pairs[id] = Annotation(
 | 
			
		||||
            id,
 | 
			
		||||
            annot.sequence,
 | 
			
		||||
            annot.name,
 | 
			
		||||
            annot.description,
 | 
			
		||||
            annot.start,
 | 
			
		||||
            annot.end,
 | 
			
		||||
            annot.score,
 | 
			
		||||
        ), Annotation(
 | 
			
		||||
            id,
 | 
			
		||||
            alignment.aligned_target_sequence,
 | 
			
		||||
            annot.name,
 | 
			
		||||
            annot.description,
 | 
			
		||||
            aligned_start,
 | 
			
		||||
            aligned_end,
 | 
			
		||||
            score,
 | 
			
		||||
        )
 | 
			
		||||
    aligner = PairwiseAligner(one_alignment_only=True)
 | 
			
		||||
    aligner.mode = "local"
 | 
			
		||||
    aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
 | 
			
		||||
    aligner.extend_gap_score = -1
 | 
			
		||||
    aligner.open_gap_score = -11
 | 
			
		||||
    for id, query in queries.items():
 | 
			
		||||
        try:
 | 
			
		||||
            alignments = aligner.align(sequence, query.sequence)
 | 
			
		||||
        except ValueError:
 | 
			
		||||
            continue
 | 
			
		||||
        if len(alignments) > 1:
 | 
			
		||||
            raise UnexpectedAlignmentResult(
 | 
			
		||||
                "More than one alignment resulted from a single query."
 | 
			
		||||
            )
 | 
			
		||||
        for alignment in alignments:
 | 
			
		||||
            score, query_aligned = (alignment.score, alignment.aligned[0][0])
 | 
			
		||||
            aligned_start, aligned_end = query_aligned
 | 
			
		||||
            annotation_pairs[id] = AlignedSequence(
 | 
			
		||||
                id,
 | 
			
		||||
                query.sequence,
 | 
			
		||||
                query.name,
 | 
			
		||||
                query.description,
 | 
			
		||||
                query.start,
 | 
			
		||||
                query.end,
 | 
			
		||||
                query.score,
 | 
			
		||||
            ), AlignedSequence(
 | 
			
		||||
                id,
 | 
			
		||||
                alignment.query,
 | 
			
		||||
                query.name,
 | 
			
		||||
                query.description,
 | 
			
		||||
                aligned_start,
 | 
			
		||||
                aligned_end,
 | 
			
		||||
                score,
 | 
			
		||||
            )
 | 
			
		||||
    return annotation_pairs
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,7 @@
 | 
			
		||||
import os
 | 
			
		||||
import argparse
 | 
			
		||||
from Bio import SeqIO
 | 
			
		||||
from aligner import generate_annotated_positions
 | 
			
		||||
from aligner import protein_align_many_to_one_ssw
 | 
			
		||||
 | 
			
		||||
from persistence import read_annotations_from_csv, save_alignments_to_csv
 | 
			
		||||
 | 
			
		||||
@@ -78,7 +78,7 @@ def main():
 | 
			
		||||
    )
 | 
			
		||||
    with open(args.sequence, "r") as sequence_fd:
 | 
			
		||||
        for sequence in SeqIO.parse(sequence_fd, "fasta"):
 | 
			
		||||
            aligned_annotations = generate_annotated_positions(
 | 
			
		||||
            aligned_annotations = protein_align_many_to_one_ssw(
 | 
			
		||||
                str(sequence.seq), given_annotations
 | 
			
		||||
            )
 | 
			
		||||
            save_alignments_to_csv(
 | 
			
		||||
 
 | 
			
		||||
@@ -1,4 +1,4 @@
 | 
			
		||||
class Annotation:
 | 
			
		||||
class AlignedSequence:
 | 
			
		||||
    def __init__(
 | 
			
		||||
        self,
 | 
			
		||||
        id: str,
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										2
									
								
								bmlsa/exceptions.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								bmlsa/exceptions.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,2 @@
 | 
			
		||||
class UnexpectedAlignmentResult(Exception):
 | 
			
		||||
    pass
 | 
			
		||||
@@ -1,5 +1,5 @@
 | 
			
		||||
import csv
 | 
			
		||||
from datatypes import Annotation
 | 
			
		||||
from datatypes import AlignedSequence
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def read_annotations_from_csv(
 | 
			
		||||
@@ -37,7 +37,7 @@ def read_annotations_from_csv(
 | 
			
		||||
            start = row[start_ind] if start_header else None
 | 
			
		||||
            end = row[end_ind] if end_header else None
 | 
			
		||||
            sequence = row[sequence_ind]
 | 
			
		||||
            annotations[id] = Annotation(
 | 
			
		||||
            annotations[id] = AlignedSequence(
 | 
			
		||||
                id,
 | 
			
		||||
                sequence,
 | 
			
		||||
                name,
 | 
			
		||||
@@ -49,7 +49,7 @@ def read_annotations_from_csv(
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def save_alignments_to_csv(
 | 
			
		||||
    aligned_pairs: dict[str, tuple[Annotation, Annotation]], output_path: str
 | 
			
		||||
    aligned_pairs: dict[str, tuple[AlignedSequence, AlignedSequence]], output_path: str
 | 
			
		||||
):
 | 
			
		||||
    with open(output_path, "w") as output_fd:
 | 
			
		||||
        writer = csv.writer(output_fd)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user