Added unit tests for pubMLST MLST profiling
This commit is contained in:
parent
424beeb559
commit
645357ac58
@ -1,7 +1,7 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
from numbers import Number
|
from numbers import Number
|
||||||
from os import path
|
from os import path
|
||||||
from typing import AsyncGenerator, Collection, Sequence, Union
|
from typing import Any, AsyncGenerator, Collection, Sequence, Union
|
||||||
from automlst.engine.data.genomics import NamedString, SangerTraceData
|
from automlst.engine.data.genomics import NamedString, SangerTraceData
|
||||||
from Bio.SeqRecord import SeqRecord
|
from Bio.SeqRecord import SeqRecord
|
||||||
from Bio import SeqIO, Align
|
from Bio import SeqIO, Align
|
||||||
@ -104,12 +104,15 @@ async def read_abif(seq_path: str) -> SangerTraceData:
|
|||||||
)
|
)
|
||||||
return trace_data
|
return trace_data
|
||||||
|
|
||||||
|
|
||||||
def _biopython_local_pairwise_alignment(reference: NamedString, query: NamedString) -> tuple[NamedString, NamedString]:
|
def _biopython_local_pairwise_alignment(reference: NamedString, query: NamedString) -> tuple[NamedString, NamedString]:
|
||||||
aligner = Align.PairwiseAligner(scoring="blastn")
|
aligner = Align.PairwiseAligner(scoring="blastn")
|
||||||
aligner.mode = "local"
|
aligner.mode = "local"
|
||||||
alignment_result = sorted(aligner.align(reference.sequence, query.sequence))[0] # take the best alignment
|
alignment_result = sorted(aligner.align(reference.sequence, query.sequence))[
|
||||||
|
0] # take the best alignment
|
||||||
return NamedString(alignment_result.sequences[0].id, alignment_result.sequences[0].seq), NamedString(alignment_result.sequences[1].id, alignment_result.sequences[1].seq)
|
return NamedString(alignment_result.sequences[0].id, alignment_result.sequences[0].seq), NamedString(alignment_result.sequences[1].id, alignment_result.sequences[1].seq)
|
||||||
|
|
||||||
|
|
||||||
async def reference_consensus_assembly(reference: NamedString, sanger_traces: Collection[SangerTraceData]) -> AsyncGenerator[NamedString, Any]:
|
async def reference_consensus_assembly(reference: NamedString, sanger_traces: Collection[SangerTraceData]) -> AsyncGenerator[NamedString, Any]:
|
||||||
for sanger_trace in sanger_traces:
|
for sanger_trace in sanger_traces:
|
||||||
yield (await asyncio.to_thread(_biopython_local_pairwise_alignment, reference, sanger_trace))[1]
|
yield (await asyncio.to_thread(_biopython_local_pairwise_alignment, reference, sanger_trace))[1]
|
@ -30,15 +30,11 @@ class InstitutPasteurProfiler(MLSTProfiler):
|
|||||||
alelle_id = allele["allele_id"]
|
alelle_id = allele["allele_id"]
|
||||||
yield Allele(allele_loci=allele_loci, allele_variant=alelle_id)
|
yield Allele(allele_loci=allele_loci, allele_variant=alelle_id)
|
||||||
|
|
||||||
async def fetch_mlst_st(self, schema_id: int, alleles: Union[AsyncIterable[Allele], Iterable[Allele]]) -> MLSTProfile:
|
async def fetch_mlst_st(self, schema_id: int, alleles: AsyncIterable[Allele]) -> MLSTProfile:
|
||||||
uri_path = f"schemes/{schema_id}/designations"
|
uri_path = f"schemes/{schema_id}/designations"
|
||||||
allele_request_dict: dict[str, list[dict[str, str]]] = defaultdict(list)
|
allele_request_dict: dict[str, list[dict[str, str]]] = defaultdict(list)
|
||||||
if isinstance(alleles, AsyncIterable):
|
|
||||||
async for allele in alleles:
|
async for allele in alleles:
|
||||||
allele_request_dict[allele.allele_loci].append({"allele": str(allele.allele_variant)})
|
allele_request_dict[allele.allele_loci].append({"allele": str(allele.allele_variant)})
|
||||||
else:
|
|
||||||
for allele in alleles:
|
|
||||||
allele_request_dict[allele.allele_loci].append({"allele": str(allele.allele_variant)})
|
|
||||||
response = await self._http_client.post(uri_path, json={
|
response = await self._http_client.post(uri_path, json={
|
||||||
"designations": allele_request_dict
|
"designations": allele_request_dict
|
||||||
})
|
})
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from contextlib import AbstractAsyncContextManager
|
from contextlib import AbstractAsyncContextManager
|
||||||
from typing import AsyncGenerator, AsyncIterable, Generator, Iterable, Mapping, Union
|
from typing import Any, AsyncGenerator, AsyncIterable, Generator, Iterable, Mapping, Union
|
||||||
|
|
||||||
from aiohttp import ClientSession
|
from aiohttp import ClientSession
|
||||||
|
|
||||||
|
@ -16,6 +16,7 @@ async def test_profiling_results_in_exact_matches_when_exact():
|
|||||||
assert len(targets_left) == 0
|
assert len(targets_left) == 0
|
||||||
|
|
||||||
async def test_profiling_results_in_correct_st():
|
async def test_profiling_results_in_correct_st():
|
||||||
|
async def dummy_allele_generator():
|
||||||
dummy_alleles = [
|
dummy_alleles = [
|
||||||
Allele("adk", "1"),
|
Allele("adk", "1"),
|
||||||
Allele("fumC", "1"),
|
Allele("fumC", "1"),
|
||||||
@ -25,8 +26,10 @@ async def test_profiling_results_in_correct_st():
|
|||||||
Allele("pepA", "1"),
|
Allele("pepA", "1"),
|
||||||
Allele("pgm", "1"),
|
Allele("pgm", "1"),
|
||||||
]
|
]
|
||||||
|
for dummy_allele in dummy_alleles:
|
||||||
|
yield dummy_allele
|
||||||
async with InstitutPasteurProfiler(database_name="pubmlst_bordetella_seqdef") as dummy_profiler:
|
async with InstitutPasteurProfiler(database_name="pubmlst_bordetella_seqdef") as dummy_profiler:
|
||||||
mlst_st_data = await dummy_profiler.fetch_mlst_st(3, dummy_alleles)
|
mlst_st_data = await dummy_profiler.fetch_mlst_st(3, dummy_allele_generator())
|
||||||
assert mlst_st_data is not None
|
assert mlst_st_data is not None
|
||||||
assert isinstance(mlst_st_data, MLSTProfile)
|
assert isinstance(mlst_st_data, MLSTProfile)
|
||||||
assert mlst_st_data.clonal_complex == "ST-2 complex"
|
assert mlst_st_data.clonal_complex == "ST-2 complex"
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
|
import asyncio
|
||||||
from Bio import SeqIO
|
from Bio import SeqIO
|
||||||
from automlst.engine.data.mlst import Allele, MLSTProfile
|
from automlst.engine.data.mlst import Allele, MLSTProfile
|
||||||
from automlst.engine.remote.databases.institutpasteur.mlst import InstitutPasteurProfiler
|
|
||||||
from automlst.engine.remote.databases.pubmlst.mlst import PubMLSTProfiler
|
from automlst.engine.remote.databases.pubmlst.mlst import PubMLSTProfiler
|
||||||
|
|
||||||
|
|
||||||
@ -24,6 +24,7 @@ async def test_profiling_results_in_exact_matches_when_exact():
|
|||||||
assert len(dummy_alleles) == 0
|
assert len(dummy_alleles) == 0
|
||||||
|
|
||||||
async def test_profiling_results_in_correct_st():
|
async def test_profiling_results_in_correct_st():
|
||||||
|
async def generate_dummy_targets():
|
||||||
dummy_alleles = [
|
dummy_alleles = [
|
||||||
Allele("adk", "1"),
|
Allele("adk", "1"),
|
||||||
Allele("atpG", "1"),
|
Allele("atpG", "1"),
|
||||||
@ -33,16 +34,18 @@ async def test_profiling_results_in_correct_st():
|
|||||||
Allele("pgi", "1"),
|
Allele("pgi", "1"),
|
||||||
Allele("recA", "5"),
|
Allele("recA", "5"),
|
||||||
]
|
]
|
||||||
async with InstitutPasteurProfiler(database_name="pubmlst_hinfluenzae_seqdef") as dummy_profiler:
|
for dummy_allele in dummy_alleles:
|
||||||
mlst_st_data = await dummy_profiler.fetch_mlst_st(1, dummy_alleles)
|
yield dummy_allele
|
||||||
|
async with PubMLSTProfiler(database_name="pubmlst_hinfluenzae_seqdef") as dummy_profiler:
|
||||||
|
mlst_st_data = await dummy_profiler.fetch_mlst_st(1, generate_dummy_targets())
|
||||||
assert mlst_st_data is not None
|
assert mlst_st_data is not None
|
||||||
assert isinstance(mlst_st_data, MLSTProfile)
|
assert isinstance(mlst_st_data, MLSTProfile)
|
||||||
assert mlst_st_data.clonal_complex == "ST-3 complex"
|
assert mlst_st_data.clonal_complex == "ST-3 complex"
|
||||||
assert mlst_st_data.sequence_type == "3"
|
assert mlst_st_data.sequence_type == "3"
|
||||||
|
|
||||||
async def test_sequence_profiling_is_correct():
|
async def test_sequence_profiling_is_correct():
|
||||||
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
sequence = str(SeqIO.read("tests/resources/FDAARGOS_1560.fasta", "fasta").seq)
|
||||||
async with InstitutPasteurProfiler(database_name="pubmlst_hinfluenzae_seqdef") as dummy_profiler:
|
async with PubMLSTProfiler(database_name="pubmlst_hinfluenzae_seqdef") as dummy_profiler:
|
||||||
profile = await dummy_profiler.profile_string(1, sequence)
|
profile = await dummy_profiler.profile_string(1, sequence)
|
||||||
assert profile is not None
|
assert profile is not None
|
||||||
assert isinstance(profile, MLSTProfile)
|
assert isinstance(profile, MLSTProfile)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user