Fixed match metric difference between remote and local

This commit is contained in:
Harrison Deng 2025-02-06 17:12:31 +00:00
parent a27e09da31
commit 85946eb110
4 changed files with 10 additions and 10 deletions

View File

@ -8,7 +8,7 @@ from queue import Queue
from autobigs.engine.structures.alignment import AlignmentStats, PairwiseAlignment from autobigs.engine.structures.alignment import AlignmentStats, PairwiseAlignment
class AsyncPairwiseAlignmentEngine(AbstractContextManager): class AsyncBiopythonPairwiseAlignmentEngine(AbstractContextManager):
def __enter__(self): def __enter__(self):
self._thread_pool = ThreadPoolExecutor(self._max_threads, thread_name_prefix="async-pairwise-alignment") self._thread_pool = ThreadPoolExecutor(self._max_threads, thread_name_prefix="async-pairwise-alignment")
return self return self
@ -46,7 +46,7 @@ class AsyncPairwiseAlignmentEngine(AbstractContextManager):
percent_identity=top_alignment_identities/top_alignment.length, percent_identity=top_alignment_identities/top_alignment.length,
mismatches=top_alignment_mismatches, mismatches=top_alignment_mismatches,
gaps=top_alignment_gaps, gaps=top_alignment_gaps,
score=top_alignment_score match_metric=top_alignment_score
)), associated_data )), associated_data
async def next_completed(self) -> Union[tuple[PairwiseAlignment, dict[str, Any]], None]: async def next_completed(self) -> Union[tuple[PairwiseAlignment, dict[str, Any]], None]:

View File

@ -11,7 +11,7 @@ from typing import Any, AsyncGenerator, AsyncIterable, Iterable, Mapping, Sequen
from aiohttp import ClientSession, ClientTimeout from aiohttp import ClientSession, ClientTimeout
from autobigs.engine.analysis.aligners import AsyncPairwiseAlignmentEngine from autobigs.engine.analysis.aligners import AsyncBiopythonPairwiseAlignmentEngine
from autobigs.engine.reading import read_fasta from autobigs.engine.reading import read_fasta
from autobigs.engine.structures.alignment import PairwiseAlignment from autobigs.engine.structures.alignment import PairwiseAlignment
from autobigs.engine.structures.genomics import NamedString from autobigs.engine.structures.genomics import NamedString
@ -82,7 +82,7 @@ class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
percent_identity=float(partial_match["identity"]), percent_identity=float(partial_match["identity"]),
mismatches=int(partial_match["mismatches"]), mismatches=int(partial_match["mismatches"]),
gaps=int(partial_match["gaps"]), gaps=int(partial_match["gaps"]),
score=int(partial_match["score"]) match_metric=int(partial_match["bitscore"])
) )
yield Allele( yield Allele(
allele_locus=allele_loci, allele_locus=allele_loci,
@ -209,7 +209,7 @@ class LocalBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
async def determine_mlst_allele_variants(self, query_sequence_strings: Iterable[str]) -> AsyncGenerator[Allele, Any]: async def determine_mlst_allele_variants(self, query_sequence_strings: Iterable[str]) -> AsyncGenerator[Allele, Any]:
aligner = PairwiseAligner("blastn") aligner = PairwiseAligner("blastn")
aligner.mode = "local" aligner.mode = "local"
with AsyncPairwiseAlignmentEngine(aligner, max_threads=2) as aligner_engine: with AsyncBiopythonPairwiseAlignmentEngine(aligner, max_threads=4) as aligner_engine:
for query_sequence_string in query_sequence_strings: for query_sequence_string in query_sequence_strings:
for locus in self._loci: for locus in self._loci:
async for allele_variant in read_fasta(self.get_locus_cache_path(locus)): async for allele_variant in read_fasta(self.get_locus_cache_path(locus)):
@ -235,7 +235,7 @@ class LocalBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
else: else:
alignment_rankings[result_locus].add((alignment_result, variant_id)) alignment_rankings[result_locus].add((alignment_result, variant_id))
for final_locus, alignments in alignment_rankings.items(): for final_locus, alignments in alignment_rankings.items():
closest_alignment, closest_variant_id = sorted(alignments, key=lambda index: index[0].alignment_stats.score)[0] closest_alignment, closest_variant_id = sorted(alignments, key=lambda index: index[0].alignment_stats.match_metric)[0]
yield Allele(final_locus, closest_variant_id, closest_alignment.alignment_stats) yield Allele(final_locus, closest_variant_id, closest_alignment.alignment_stats)
async def determine_mlst_st(self, alleles): async def determine_mlst_st(self, alleles):

View File

@ -7,7 +7,7 @@ class AlignmentStats:
percent_identity: float percent_identity: float
mismatches: int mismatches: int
gaps: int gaps: int
score: int match_metric: int
@dataclass(frozen=True) @dataclass(frozen=True)
class PairwiseAlignment: class PairwiseAlignment:

View File

@ -2,7 +2,7 @@ from Bio import SeqIO
from Bio.Align import PairwiseAligner from Bio.Align import PairwiseAligner
from pytest import mark from pytest import mark
from pytest import fixture from pytest import fixture
from autobigs.engine.analysis.aligners import AsyncPairwiseAlignmentEngine from autobigs.engine.analysis.aligners import AsyncBiopythonPairwiseAlignmentEngine
from autobigs.engine.structures.alignment import PairwiseAlignment from autobigs.engine.structures.alignment import PairwiseAlignment
@fixture @fixture
@ -17,11 +17,11 @@ def tohamaI_bpertussis_genome():
def dummy_engine(request): def dummy_engine(request):
aligner = PairwiseAligner("blastn") aligner = PairwiseAligner("blastn")
aligner.mode = "local" aligner.mode = "local"
with AsyncPairwiseAlignmentEngine(aligner, request.param) as engine: with AsyncBiopythonPairwiseAlignmentEngine(aligner, request.param) as engine:
yield engine yield engine
class TestAsyncPairwiseAlignmentEngine: class TestAsyncPairwiseAlignmentEngine:
async def test_single_alignment_no_errors(self, tohamaI_bpertussis_genome, tohamaI_bpertussis_adk: str, dummy_engine: AsyncPairwiseAlignmentEngine): async def test_single_alignment_no_errors(self, tohamaI_bpertussis_genome, tohamaI_bpertussis_adk: str, dummy_engine: AsyncBiopythonPairwiseAlignmentEngine):
dummy_engine.align(tohamaI_bpertussis_genome, tohamaI_bpertussis_adk) dummy_engine.align(tohamaI_bpertussis_genome, tohamaI_bpertussis_adk)
async for alignment, additional_information in dummy_engine: async for alignment, additional_information in dummy_engine:
assert isinstance(alignment, PairwiseAlignment) assert isinstance(alignment, PairwiseAlignment)