Added unit tests for pubMLST MLST profiling
This commit is contained in:
parent
424beeb559
commit
645357ac58
@ -1,7 +1,7 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
from numbers import Number
|
from numbers import Number
|
||||||
from os import path
|
from os import path
|
||||||
from typing import AsyncGenerator, Collection, Sequence, Union
|
from typing import Any, AsyncGenerator, Collection, Sequence, Union
|
||||||
from automlst.engine.data.genomics import NamedString, SangerTraceData
|
from automlst.engine.data.genomics import NamedString, SangerTraceData
|
||||||
from Bio.SeqRecord import SeqRecord
|
from Bio.SeqRecord import SeqRecord
|
||||||
from Bio import SeqIO, Align
|
from Bio import SeqIO, Align
|
||||||
@ -21,95 +21,98 @@ async def read_abif(seq_path: str) -> SangerTraceData:
|
|||||||
biopython_annotations = biopython_seq.annotations
|
biopython_annotations = biopython_seq.annotations
|
||||||
|
|
||||||
# Lot of type ignoring since Biopython did not define their typing.
|
# Lot of type ignoring since Biopython did not define their typing.
|
||||||
biopython_abif_raw = biopython_annotations["abif_raw"] # type: ignore
|
biopython_abif_raw = biopython_annotations["abif_raw"] # type: ignore
|
||||||
trace_data = SangerTraceData(
|
trace_data = SangerTraceData(
|
||||||
path.basename(seq_path),
|
path.basename(seq_path),
|
||||||
biopython_seq.seq,
|
biopython_seq.seq,
|
||||||
biopython_abif_raw.get("APFN2"), # type: ignore
|
biopython_abif_raw.get("APFN2"), # type: ignore
|
||||||
biopython_abif_raw.get("APrN1"), # type: ignore
|
biopython_abif_raw.get("APrN1"), # type: ignore
|
||||||
biopython_abif_raw.get("APrV1"), # type: ignore
|
biopython_abif_raw.get("APrV1"), # type: ignore
|
||||||
biopython_abif_raw.get("APrX1"), # type: ignore
|
biopython_abif_raw.get("APrX1"), # type: ignore
|
||||||
biopython_abif_raw.get("APXV1"), # type: ignore
|
biopython_abif_raw.get("APXV1"), # type: ignore
|
||||||
biopython_abif_raw.get("CMNT1"), # type: ignore
|
biopython_abif_raw.get("CMNT1"), # type: ignore
|
||||||
biopython_abif_raw.get("CpEP1"), # type: ignore
|
biopython_abif_raw.get("CpEP1"), # type: ignore
|
||||||
biopython_abif_raw.get("CTID1"), # type: ignore
|
biopython_abif_raw.get("CTID1"), # type: ignore
|
||||||
biopython_abif_raw.get("CTNM1"), # type: ignore
|
biopython_abif_raw.get("CTNM1"), # type: ignore
|
||||||
biopython_abif_raw.get("CTTL1"), # type: ignore
|
biopython_abif_raw.get("CTTL1"), # type: ignore
|
||||||
biopython_abif_raw.get("DATA1"), # type: ignore
|
biopython_abif_raw.get("DATA1"), # type: ignore
|
||||||
biopython_abif_raw.get("DATA2"), # type: ignore
|
biopython_abif_raw.get("DATA2"), # type: ignore
|
||||||
biopython_abif_raw.get("DATA3"), # type: ignore
|
biopython_abif_raw.get("DATA3"), # type: ignore
|
||||||
biopython_abif_raw.get("DATA4"), # type: ignore
|
biopython_abif_raw.get("DATA4"), # type: ignore
|
||||||
biopython_abif_raw.get("DATA5"), # type: ignore
|
biopython_abif_raw.get("DATA5"), # type: ignore
|
||||||
biopython_abif_raw.get("DATA6"), # type: ignore
|
biopython_abif_raw.get("DATA6"), # type: ignore
|
||||||
biopython_abif_raw.get("DATA7"), # type: ignore
|
biopython_abif_raw.get("DATA7"), # type: ignore
|
||||||
biopython_abif_raw.get("DATA8"), # type: ignore
|
biopython_abif_raw.get("DATA8"), # type: ignore
|
||||||
biopython_abif_raw.get("DSam1"), # type: ignore
|
biopython_abif_raw.get("DSam1"), # type: ignore
|
||||||
biopython_abif_raw.get("DyeN1"), # type: ignore
|
biopython_abif_raw.get("DyeN1"), # type: ignore
|
||||||
biopython_abif_raw.get("DyeN2"), # type: ignore
|
biopython_abif_raw.get("DyeN2"), # type: ignore
|
||||||
biopython_abif_raw.get("DyeN3"), # type: ignore
|
biopython_abif_raw.get("DyeN3"), # type: ignore
|
||||||
biopython_abif_raw.get("DyeN4"), # type: ignore
|
biopython_abif_raw.get("DyeN4"), # type: ignore
|
||||||
biopython_abif_raw.get("DyeW1"), # type: ignore
|
biopython_abif_raw.get("DyeW1"), # type: ignore
|
||||||
biopython_abif_raw.get("DyeW2"), # type: ignore
|
biopython_abif_raw.get("DyeW2"), # type: ignore
|
||||||
biopython_abif_raw.get("DyeW3"), # type: ignore
|
biopython_abif_raw.get("DyeW3"), # type: ignore
|
||||||
biopython_abif_raw.get("DyeW4"), # type: ignore
|
biopython_abif_raw.get("DyeW4"), # type: ignore
|
||||||
biopython_abif_raw.get("DySN1"), # type: ignore
|
biopython_abif_raw.get("DySN1"), # type: ignore
|
||||||
biopython_abif_raw.get("EPVt1"), # type: ignore
|
biopython_abif_raw.get("EPVt1"), # type: ignore
|
||||||
biopython_abif_raw.get("EVNT1"), # type: ignore
|
biopython_abif_raw.get("EVNT1"), # type: ignore
|
||||||
biopython_abif_raw.get("EVNT2"), # type: ignore
|
biopython_abif_raw.get("EVNT2"), # type: ignore
|
||||||
biopython_abif_raw.get("EVNT3"), # type: ignore
|
biopython_abif_raw.get("EVNT3"), # type: ignore
|
||||||
biopython_abif_raw.get("EVNT4"), # type: ignore
|
biopython_abif_raw.get("EVNT4"), # type: ignore
|
||||||
biopython_abif_raw.get("FWO_1"), # type: ignore
|
biopython_abif_raw.get("FWO_1"), # type: ignore
|
||||||
biopython_abif_raw.get("GTyp1"), # type: ignore
|
biopython_abif_raw.get("GTyp1"), # type: ignore
|
||||||
biopython_abif_raw.get("InSc1"), # type: ignore
|
biopython_abif_raw.get("InSc1"), # type: ignore
|
||||||
biopython_abif_raw.get("InVt1"), # type: ignore
|
biopython_abif_raw.get("InVt1"), # type: ignore
|
||||||
biopython_abif_raw.get("LANE1"), # type: ignore
|
biopython_abif_raw.get("LANE1"), # type: ignore
|
||||||
biopython_abif_raw.get("LIMS1"), # type: ignore
|
biopython_abif_raw.get("LIMS1"), # type: ignore
|
||||||
biopython_abif_raw.get("LNTD1"), # type: ignore
|
biopython_abif_raw.get("LNTD1"), # type: ignore
|
||||||
biopython_abif_raw.get("LsrP1"), # type: ignore
|
biopython_abif_raw.get("LsrP1"), # type: ignore
|
||||||
biopython_abif_raw.get("MCHN1"), # type: ignore
|
biopython_abif_raw.get("MCHN1"), # type: ignore
|
||||||
biopython_abif_raw.get("MODF1"), # type: ignore
|
biopython_abif_raw.get("MODF1"), # type: ignore
|
||||||
biopython_abif_raw.get("MODL1"), # type: ignore
|
biopython_abif_raw.get("MODL1"), # type: ignore
|
||||||
biopython_abif_raw.get("NAVG1"), # type: ignore
|
biopython_abif_raw.get("NAVG1"), # type: ignore
|
||||||
biopython_abif_raw.get("NLNE1"), # type: ignore
|
biopython_abif_raw.get("NLNE1"), # type: ignore
|
||||||
biopython_abif_raw.get("OfSc1"), # type: ignore
|
biopython_abif_raw.get("OfSc1"), # type: ignore
|
||||||
biopython_abif_raw.get("PDMF1"), # type: ignore
|
biopython_abif_raw.get("PDMF1"), # type: ignore
|
||||||
biopython_abif_raw.get("PXLB1"), # type: ignore
|
biopython_abif_raw.get("PXLB1"), # type: ignore
|
||||||
biopython_abif_raw.get("RGCm1"), # type: ignore
|
biopython_abif_raw.get("RGCm1"), # type: ignore
|
||||||
biopython_abif_raw.get("RGNm1"), # type: ignore
|
biopython_abif_raw.get("RGNm1"), # type: ignore
|
||||||
biopython_abif_raw.get("RMdV1"), # type: ignore
|
biopython_abif_raw.get("RMdV1"), # type: ignore
|
||||||
biopython_abif_raw.get("RMdX1"), # type: ignore
|
biopython_abif_raw.get("RMdX1"), # type: ignore
|
||||||
biopython_abif_raw.get("RMXV1"), # type: ignore
|
biopython_abif_raw.get("RMXV1"), # type: ignore
|
||||||
biopython_abif_raw.get("RPrN1"), # type: ignore
|
biopython_abif_raw.get("RPrN1"), # type: ignore
|
||||||
biopython_abif_raw.get("RPrV1"), # type: ignore
|
biopython_abif_raw.get("RPrV1"), # type: ignore
|
||||||
biopython_abif_raw.get("RUND1"), # type: ignore
|
biopython_abif_raw.get("RUND1"), # type: ignore
|
||||||
biopython_abif_raw.get("RUND2"), # type: ignore
|
biopython_abif_raw.get("RUND2"), # type: ignore
|
||||||
biopython_abif_raw.get("RUND3"), # type: ignore
|
biopython_abif_raw.get("RUND3"), # type: ignore
|
||||||
biopython_abif_raw.get("RUND4"), # type: ignore
|
biopython_abif_raw.get("RUND4"), # type: ignore
|
||||||
biopython_abif_raw.get("RunN1"), # type: ignore
|
biopython_abif_raw.get("RunN1"), # type: ignore
|
||||||
biopython_abif_raw.get("RUNT1"), # type: ignore
|
biopython_abif_raw.get("RUNT1"), # type: ignore
|
||||||
biopython_abif_raw.get("RUNT2"), # type: ignore
|
biopython_abif_raw.get("RUNT2"), # type: ignore
|
||||||
biopython_abif_raw.get("RUNT3"), # type: ignore
|
biopython_abif_raw.get("RUNT3"), # type: ignore
|
||||||
biopython_abif_raw.get("RUNT4"), # type: ignore
|
biopython_abif_raw.get("RUNT4"), # type: ignore
|
||||||
biopython_abif_raw.get("Satd"), # type: ignore
|
biopython_abif_raw.get("Satd"), # type: ignore
|
||||||
biopython_abif_raw.get("Scal1"), # type: ignore
|
biopython_abif_raw.get("Scal1"), # type: ignore
|
||||||
biopython_abif_raw.get("SCAN1"), # type: ignore
|
biopython_abif_raw.get("SCAN1"), # type: ignore
|
||||||
biopython_abif_raw.get("SMED1"), # type: ignore
|
biopython_abif_raw.get("SMED1"), # type: ignore
|
||||||
biopython_abif_raw.get("SMLt"), # type: ignore
|
biopython_abif_raw.get("SMLt"), # type: ignore
|
||||||
biopython_abif_raw.get("SMPL1"), # type: ignore
|
biopython_abif_raw.get("SMPL1"), # type: ignore
|
||||||
biopython_abif_raw.get("SVER1"), # type: ignore
|
biopython_abif_raw.get("SVER1"), # type: ignore
|
||||||
biopython_abif_raw.get("SVER3"), # type: ignore
|
biopython_abif_raw.get("SVER3"), # type: ignore
|
||||||
biopython_abif_raw.get("Tmpr1"), # type: ignore
|
biopython_abif_raw.get("Tmpr1"), # type: ignore
|
||||||
biopython_abif_raw.get("TUBE"), # type: ignore
|
biopython_abif_raw.get("TUBE"), # type: ignore
|
||||||
biopython_abif_raw.get("User") # type: ignore
|
biopython_abif_raw.get("User") # type: ignore
|
||||||
)
|
)
|
||||||
return trace_data
|
return trace_data
|
||||||
|
|
||||||
|
|
||||||
def _biopython_local_pairwise_alignment(reference: NamedString, query: NamedString) -> tuple[NamedString, NamedString]:
|
def _biopython_local_pairwise_alignment(reference: NamedString, query: NamedString) -> tuple[NamedString, NamedString]:
|
||||||
aligner = Align.PairwiseAligner(scoring="blastn")
|
aligner = Align.PairwiseAligner(scoring="blastn")
|
||||||
aligner.mode = "local"
|
aligner.mode = "local"
|
||||||
alignment_result = sorted(aligner.align(reference.sequence, query.sequence))[0] # take the best alignment
|
alignment_result = sorted(aligner.align(reference.sequence, query.sequence))[
|
||||||
|
0] # take the best alignment
|
||||||
return NamedString(alignment_result.sequences[0].id, alignment_result.sequences[0].seq), NamedString(alignment_result.sequences[1].id, alignment_result.sequences[1].seq)
|
return NamedString(alignment_result.sequences[0].id, alignment_result.sequences[0].seq), NamedString(alignment_result.sequences[1].id, alignment_result.sequences[1].seq)
|
||||||
|
|
||||||
|
|
||||||
async def reference_consensus_assembly(reference: NamedString, sanger_traces: Collection[SangerTraceData]) -> AsyncGenerator[NamedString, Any]:
|
async def reference_consensus_assembly(reference: NamedString, sanger_traces: Collection[SangerTraceData]) -> AsyncGenerator[NamedString, Any]:
|
||||||
for sanger_trace in sanger_traces:
|
for sanger_trace in sanger_traces:
|
||||||
yield (await asyncio.to_thread(_biopython_local_pairwise_alignment, reference, sanger_trace))[1]
|
yield (await asyncio.to_thread(_biopython_local_pairwise_alignment, reference, sanger_trace))[1]
|
@ -30,15 +30,11 @@ class InstitutPasteurProfiler(MLSTProfiler):
|
|||||||
alelle_id = allele["allele_id"]
|
alelle_id = allele["allele_id"]
|
||||||
yield Allele(allele_loci=allele_loci, allele_variant=alelle_id)
|
yield Allele(allele_loci=allele_loci, allele_variant=alelle_id)
|
||||||
|
|
||||||
async def fetch_mlst_st(self, schema_id: int, alleles: Union[AsyncIterable[Allele], Iterable[Allele]]) -> MLSTProfile:
|
async def fetch_mlst_st(self, schema_id: int, alleles: AsyncIterable[Allele]) -> MLSTProfile:
|
||||||
uri_path = f"schemes/{schema_id}/designations"
|
uri_path = f"schemes/{schema_id}/designations"
|
||||||
allele_request_dict: dict[str, list[dict[str, str]]] = defaultdict(list)
|
allele_request_dict: dict[str, list[dict[str, str]]] = defaultdict(list)
|
||||||
if isinstance(alleles, AsyncIterable):
|
async for allele in alleles:
|
||||||
async for allele in alleles:
|
allele_request_dict[allele.allele_loci].append({"allele": str(allele.allele_variant)})
|
||||||
allele_request_dict[allele.allele_loci].append({"allele": str(allele.allele_variant)})
|
|
||||||
else:
|
|
||||||
for allele in alleles:
|
|
||||||
allele_request_dict[allele.allele_loci].append({"allele": str(allele.allele_variant)})
|
|
||||||
response = await self._http_client.post(uri_path, json={
|
response = await self._http_client.post(uri_path, json={
|
||||||
"designations": allele_request_dict
|
"designations": allele_request_dict
|
||||||
})
|
})
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from contextlib import AbstractAsyncContextManager
|
from contextlib import AbstractAsyncContextManager
|
||||||
from typing import AsyncGenerator, AsyncIterable, Generator, Iterable, Mapping, Union
|
from typing import Any, AsyncGenerator, AsyncIterable, Generator, Iterable, Mapping, Union
|
||||||
|
|
||||||
from aiohttp import ClientSession
|
from aiohttp import ClientSession
|
||||||
|
|
||||||
|
@ -16,7 +16,8 @@ async def test_profiling_results_in_exact_matches_when_exact():
|
|||||||
assert len(targets_left) == 0
|
assert len(targets_left) == 0
|
||||||
|
|
||||||
async def test_profiling_results_in_correct_st():
|
async def test_profiling_results_in_correct_st():
|
||||||
dummy_alleles = [
|
async def dummy_allele_generator():
|
||||||
|
dummy_alleles = [
|
||||||
Allele("adk", "1"),
|
Allele("adk", "1"),
|
||||||
Allele("fumC", "1"),
|
Allele("fumC", "1"),
|
||||||
Allele("glyA", "1"),
|
Allele("glyA", "1"),
|
||||||
@ -24,9 +25,11 @@ async def test_profiling_results_in_correct_st():
|
|||||||
Allele("icd", "1"),
|
Allele("icd", "1"),
|
||||||
Allele("pepA", "1"),
|
Allele("pepA", "1"),
|
||||||
Allele("pgm", "1"),
|
Allele("pgm", "1"),
|
||||||
]
|
]
|
||||||
|
for dummy_allele in dummy_alleles:
|
||||||
|
yield dummy_allele
|
||||||
async with InstitutPasteurProfiler(database_name="pubmlst_bordetella_seqdef") as dummy_profiler:
|
async with InstitutPasteurProfiler(database_name="pubmlst_bordetella_seqdef") as dummy_profiler:
|
||||||
mlst_st_data = await dummy_profiler.fetch_mlst_st(3, dummy_alleles)
|
mlst_st_data = await dummy_profiler.fetch_mlst_st(3, dummy_allele_generator())
|
||||||
assert mlst_st_data is not None
|
assert mlst_st_data is not None
|
||||||
assert isinstance(mlst_st_data, MLSTProfile)
|
assert isinstance(mlst_st_data, MLSTProfile)
|
||||||
assert mlst_st_data.clonal_complex == "ST-2 complex"
|
assert mlst_st_data.clonal_complex == "ST-2 complex"
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
|
import asyncio
|
||||||
from Bio import SeqIO
|
from Bio import SeqIO
|
||||||
from automlst.engine.data.mlst import Allele, MLSTProfile
|
from automlst.engine.data.mlst import Allele, MLSTProfile
|
||||||
from automlst.engine.remote.databases.institutpasteur.mlst import InstitutPasteurProfiler
|
|
||||||
from automlst.engine.remote.databases.pubmlst.mlst import PubMLSTProfiler
|
from automlst.engine.remote.databases.pubmlst.mlst import PubMLSTProfiler
|
||||||
|
|
||||||
|
|
||||||
@ -24,25 +24,28 @@ async def test_profiling_results_in_exact_matches_when_exact():
|
|||||||
assert len(dummy_alleles) == 0
|
assert len(dummy_alleles) == 0
|
||||||
|
|
||||||
async def test_profiling_results_in_correct_st():
|
async def test_profiling_results_in_correct_st():
|
||||||
dummy_alleles = [
|
async def generate_dummy_targets():
|
||||||
Allele("adk", "1"),
|
dummy_alleles = [
|
||||||
Allele("atpG", "1"),
|
Allele("adk", "1"),
|
||||||
Allele("frdB", "1"),
|
Allele("atpG", "1"),
|
||||||
Allele("fucK", "1"),
|
Allele("frdB", "1"),
|
||||||
Allele("mdh", "1"),
|
Allele("fucK", "1"),
|
||||||
Allele("pgi", "1"),
|
Allele("mdh", "1"),
|
||||||
Allele("recA", "5"),
|
Allele("pgi", "1"),
|
||||||
]
|
Allele("recA", "5"),
|
||||||
async with InstitutPasteurProfiler(database_name="pubmlst_hinfluenzae_seqdef") as dummy_profiler:
|
]
|
||||||
mlst_st_data = await dummy_profiler.fetch_mlst_st(1, dummy_alleles)
|
for dummy_allele in dummy_alleles:
|
||||||
|
yield dummy_allele
|
||||||
|
async with PubMLSTProfiler(database_name="pubmlst_hinfluenzae_seqdef") as dummy_profiler:
|
||||||
|
mlst_st_data = await dummy_profiler.fetch_mlst_st(1, generate_dummy_targets())
|
||||||
assert mlst_st_data is not None
|
assert mlst_st_data is not None
|
||||||
assert isinstance(mlst_st_data, MLSTProfile)
|
assert isinstance(mlst_st_data, MLSTProfile)
|
||||||
assert mlst_st_data.clonal_complex == "ST-3 complex"
|
assert mlst_st_data.clonal_complex == "ST-3 complex"
|
||||||
assert mlst_st_data.sequence_type == "3"
|
assert mlst_st_data.sequence_type == "3"
|
||||||
|
|
||||||
async def test_sequence_profiling_is_correct():
|
async def test_sequence_profiling_is_correct():
|
||||||
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
sequence = str(SeqIO.read("tests/resources/FDAARGOS_1560.fasta", "fasta").seq)
|
||||||
async with InstitutPasteurProfiler(database_name="pubmlst_hinfluenzae_seqdef") as dummy_profiler:
|
async with PubMLSTProfiler(database_name="pubmlst_hinfluenzae_seqdef") as dummy_profiler:
|
||||||
profile = await dummy_profiler.profile_string(1, sequence)
|
profile = await dummy_profiler.profile_string(1, sequence)
|
||||||
assert profile is not None
|
assert profile is not None
|
||||||
assert isinstance(profile, MLSTProfile)
|
assert isinstance(profile, MLSTProfile)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user