Compare commits
20 Commits
Author | SHA1 | Date | |
---|---|---|---|
bfc286e6b0 | |||
a88225fcff | |||
c18d817cd9 | |||
f462e6d5e0 | |||
f75707e4fe | |||
341ca933a3 | |||
3e3898334f | |||
ba1f0aa318 | |||
6d0157581f | |||
4bcbfa0c6a | |||
ca0f9673b0 | |||
5048fa8057 | |||
39125c848e | |||
744a6c2009 | |||
1773bb9dcb | |||
1372141b57 | |||
677c5e1aa8 | |||
53e74af20a | |||
ade2f3b845 | |||
c2c6d0b016 |
4
.vscode/launch.json
vendored
4
.vscode/launch.json
vendored
@@ -6,10 +6,10 @@
|
||||
"configurations": [
|
||||
|
||||
{
|
||||
"name": "automlst info -lschema pubmlst_bordetella_seqdef",
|
||||
"name": "autobigs info -lschema pubmlst_bordetella_seqdef",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/src/automlst/cli/program.py",
|
||||
"program": "${workspaceFolder}/src/autobigs/cli/program.py",
|
||||
"console": "integratedTerminal",
|
||||
"args": [
|
||||
"info",
|
||||
|
4
Jenkinsfile
vendored
4
Jenkinsfile
vendored
@@ -33,10 +33,10 @@ pipeline {
|
||||
parallel {
|
||||
stage ("git.reslate.systems") {
|
||||
environment {
|
||||
TOKEN = credentials('git.reslate.systems')
|
||||
CREDS = credentials('username-password-rs-git')
|
||||
}
|
||||
steps {
|
||||
sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u __token__ -p ${TOKEN} --non-interactive --disable-progress-bar --verbose dist/*'
|
||||
sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*'
|
||||
}
|
||||
}
|
||||
stage ("pypi.org") {
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# autoMLST.Engine
|
||||
# autoBIGS.Engine
|
||||
|
||||
A python library implementing common BIGSdb MLST schemes and databases. Implementation follows the RESTful API outlined by the official [BIGSdb documentation](https://bigsdb.readthedocs.io/en/latest/rest.html) up to `V1.50.0`.
|
||||
|
||||
@@ -18,8 +18,8 @@ Furthermore, this library is highly asynchronous where any potentially blocking
|
||||
|
||||
This library can be installed through pip. Learn how to [setup and install pip first](https://pip.pypa.io/en/stable/installation/).
|
||||
|
||||
Then, it's as easy as running `pip install automlst-engine` in any terminal that has pip in it's path (any terminal where `pip --version` returns a valid version and install path).
|
||||
Then, it's as easy as running `pip install autobigs-engine` in any terminal that has pip in it's path (any terminal where `pip --version` returns a valid version and install path).
|
||||
|
||||
### CLI usage
|
||||
|
||||
This is a independent python library and thus does not have any form of direct user interface. One way of using it could be to create your own Python script that makes calls to this libraries functions. Alternatively, you may use `automlst-cli`, a `Python` package that implements a CLI for calling this library.
|
||||
This is a independent python library and thus does not have any form of direct user interface. One way of using it could be to create your own Python script that makes calls to this libraries functions. Alternatively, you may use `autobigs-cli`, a `Python` package that implements a CLI for calling this library.
|
@@ -3,17 +3,22 @@ requires = ["setuptools>=64", "setuptools_scm>=8"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "automlst.engine"
|
||||
name = "autoBIGS.engine"
|
||||
dynamic = ["version"]
|
||||
readme = "README.md"
|
||||
|
||||
dependencies = [
|
||||
"biopython",
|
||||
"aiohttp[speedups]",
|
||||
"biopython==1.85",
|
||||
"aiohttp[speedups]==3.11.*",
|
||||
]
|
||||
requires-python = ">=3.11"
|
||||
requires-python = ">=3.12"
|
||||
description = "A library to rapidly fetch fetch MLST profiles given sequences for various diseases."
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/RealYHD/autoBIGS.engine"
|
||||
Source = "https://github.com/RealYHD/autoBIGS.engine"
|
||||
Issues = "https://github.com/RealYHD/autoBIGS.engine/issues"
|
||||
|
||||
[tool.setuptools_scm]
|
||||
|
||||
[tool.pyright]
|
||||
|
@@ -1,5 +1,5 @@
|
||||
aiohttp[speedups]
|
||||
biopython
|
||||
aiohttp[speedups]==3.11.*
|
||||
biopython==1.85
|
||||
pytest
|
||||
pytest-asyncio
|
||||
build
|
||||
|
@@ -2,7 +2,7 @@ import csv
|
||||
from os import PathLike
|
||||
from typing import AsyncIterable, Mapping, Sequence, Union
|
||||
|
||||
from automlst.engine.data.structures.mlst import Allele, MLSTProfile
|
||||
from autobigs.engine.data.structures.mlst import Allele, MLSTProfile
|
||||
|
||||
|
||||
def dict_loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Allele]]):
|
||||
@@ -28,14 +28,14 @@ async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[tuple
|
||||
failed.append(name)
|
||||
continue
|
||||
if writer is None:
|
||||
header = ["id", "st", "clonal-complex", *mlst_profile.alleles.keys()]
|
||||
header = ["id", "st", "clonal-complex", *sorted(mlst_profile.alleles.keys())]
|
||||
writer = csv.DictWriter(filehandle, fieldnames=header)
|
||||
writer.writeheader()
|
||||
row_dictionary = {
|
||||
"st": mlst_profile.sequence_type,
|
||||
"clonal-complex": mlst_profile.clonal_complex,
|
||||
"id": name,
|
||||
**dict_loci_alleles_variants_from_loci(mlst_profile.alleles)
|
||||
**mlst_profile.alleles
|
||||
}
|
||||
writer.writerow(rowdict=row_dictionary)
|
||||
return failed
|
@@ -3,7 +3,7 @@ from io import TextIOWrapper
|
||||
from typing import Any, AsyncGenerator, Generator, Iterable, Sequence, Union
|
||||
from Bio import SeqIO
|
||||
|
||||
from automlst.engine.data.structures.genomics import NamedString
|
||||
from autobigs.engine.data.structures.genomics import NamedString
|
||||
|
||||
async def read_fasta(handle: Union[str, TextIOWrapper]) -> AsyncGenerator[NamedString, Any]:
|
||||
fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta")
|
@@ -1,16 +1,43 @@
|
||||
from abc import abstractmethod
|
||||
from collections import defaultdict
|
||||
from contextlib import AbstractAsyncContextManager
|
||||
from numbers import Number
|
||||
from typing import Any, AsyncGenerator, AsyncIterable, Collection, Generator, Iterable, Mapping, Sequence, Union
|
||||
import csv
|
||||
from os import path
|
||||
from typing import Any, AsyncGenerator, AsyncIterable, Iterable, Mapping, Sequence, Union
|
||||
|
||||
from aiohttp import ClientSession, ClientTimeout
|
||||
|
||||
from automlst.engine.data.structures.genomics import NamedString
|
||||
from automlst.engine.data.structures.mlst import Allele, PartialAllelicMatchProfile, MLSTProfile
|
||||
from automlst.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException, NoSuchBIGSdbDatabaseException
|
||||
from autobigs.engine.data.local.fasta import read_fasta
|
||||
from autobigs.engine.data.structures.genomics import NamedString
|
||||
from autobigs.engine.data.structures.mlst import Allele, NamedMLSTProfile, PartialAllelicMatchProfile, MLSTProfile
|
||||
from autobigs.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException, NoSuchBIGSdbDatabaseException
|
||||
|
||||
from Bio.Align import PairwiseAligner
|
||||
|
||||
class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
|
||||
|
||||
@abstractmethod
|
||||
def fetch_mlst_allele_variants(self, sequence_strings: Iterable[str]) -> AsyncGenerator[Allele, Any]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def fetch_mlst_st(self, alleles: Union[AsyncIterable[Allele], Iterable[Allele]]) -> MLSTProfile:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def profile_string(self, sequence_strings: Iterable[str]) -> MLSTProfile:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def profile_multiple_strings(self, named_string_groups: AsyncIterable[Iterable[NamedString]], stop_on_fail: bool = False) -> AsyncGenerator[NamedMLSTProfile, Any]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def close(self):
|
||||
pass
|
||||
|
||||
class OnlineBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
|
||||
|
||||
def __init__(self, database_api: str, database_name: str, schema_id: int):
|
||||
self._database_name = database_name
|
||||
self._schema_id = schema_id
|
||||
@@ -20,85 +47,86 @@ class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
async def fetch_mlst_allele_variants(self, sequence_string: str, exact: bool) -> AsyncGenerator[Allele, Any]:
|
||||
async def fetch_mlst_allele_variants(self, sequence_strings: Union[Iterable[str], str]) -> AsyncGenerator[Allele, Any]:
|
||||
# See https://bigsdb.pasteur.fr/api/db/pubmlst_bordetella_seqdef/schemes
|
||||
uri_path = "sequence"
|
||||
response = await self._http_client.post(uri_path, json={
|
||||
"sequence": sequence_string,
|
||||
"partial_matches": not exact
|
||||
})
|
||||
sequence_response: dict = await response.json()
|
||||
|
||||
if "exact_matches" in sequence_response:
|
||||
# loci -> list of alleles with id and loci
|
||||
exact_matches: dict[str, Sequence[dict[str, str]]] = sequence_response["exact_matches"]
|
||||
for allele_loci, alleles in exact_matches.items():
|
||||
for allele in alleles:
|
||||
alelle_id = allele["allele_id"]
|
||||
yield Allele(allele_loci=allele_loci, allele_variant=alelle_id, partial_match_profile=None)
|
||||
elif "partial_matches" in sequence_response:
|
||||
if exact:
|
||||
raise NoBIGSdbExactMatchesException(self._database_name, self._schema_id)
|
||||
partial_matches: dict[str, dict[str, Union[str, float, int]]] = sequence_response["partial_matches"]
|
||||
for allele_loci, partial_match in partial_matches.items():
|
||||
if len(partial_match) <= 0:
|
||||
continue
|
||||
partial_match_profile = PartialAllelicMatchProfile(
|
||||
percent_identity=float(partial_match["identity"]),
|
||||
mismatches=int(partial_match["mismatches"]),
|
||||
bitscore=float(partial_match["bitscore"]),
|
||||
gaps=int(partial_match["gaps"])
|
||||
)
|
||||
yield Allele(
|
||||
allele_loci=allele_loci,
|
||||
allele_variant=str(partial_match["allele"]),
|
||||
partial_match_profile=partial_match_profile
|
||||
)
|
||||
else:
|
||||
raise NoBIGSdbMatchesException(self._database_name, self._schema_id)
|
||||
if isinstance(sequence_strings, str):
|
||||
sequence_strings = [sequence_strings]
|
||||
|
||||
for sequence_string in sequence_strings:
|
||||
async with self._http_client.post(uri_path, json={
|
||||
"sequence": sequence_string,
|
||||
"partial_matches": True
|
||||
}) as response:
|
||||
sequence_response: dict = await response.json()
|
||||
|
||||
if "exact_matches" in sequence_response:
|
||||
# loci -> list of alleles with id and loci
|
||||
exact_matches: dict[str, Sequence[dict[str, str]]] = sequence_response["exact_matches"]
|
||||
for allele_loci, alleles in exact_matches.items():
|
||||
for allele in alleles:
|
||||
alelle_id = allele["allele_id"]
|
||||
yield Allele(allele_locus=allele_loci, allele_variant=alelle_id, partial_match_profile=None)
|
||||
elif "partial_matches" in sequence_response:
|
||||
partial_matches: dict[str, dict[str, Union[str, float, int]]] = sequence_response["partial_matches"]
|
||||
for allele_loci, partial_match in partial_matches.items():
|
||||
if len(partial_match) <= 0:
|
||||
continue
|
||||
partial_match_profile = PartialAllelicMatchProfile(
|
||||
percent_identity=float(partial_match["identity"]),
|
||||
mismatches=int(partial_match["mismatches"]),
|
||||
gaps=int(partial_match["gaps"])
|
||||
)
|
||||
yield Allele(
|
||||
allele_locus=allele_loci,
|
||||
allele_variant=str(partial_match["allele"]),
|
||||
partial_match_profile=partial_match_profile
|
||||
)
|
||||
else:
|
||||
raise NoBIGSdbMatchesException(self._database_name, self._schema_id)
|
||||
|
||||
async def fetch_mlst_st(self, alleles: Union[AsyncIterable[Allele], Iterable[Allele]]) -> MLSTProfile:
|
||||
uri_path = "designations"
|
||||
allele_request_dict: dict[str, list[dict[str, str]]] = defaultdict(list)
|
||||
if isinstance(alleles, AsyncIterable):
|
||||
async for allele in alleles:
|
||||
allele_request_dict[allele.allele_loci].append({"allele": str(allele.allele_variant)})
|
||||
allele_request_dict[allele.allele_locus].append({"allele": str(allele.allele_variant)})
|
||||
else:
|
||||
for allele in alleles:
|
||||
allele_request_dict[allele.allele_loci].append({"allele": str(allele.allele_variant)})
|
||||
allele_request_dict[allele.allele_locus].append({"allele": str(allele.allele_variant)})
|
||||
request_json = {
|
||||
"designations": allele_request_dict
|
||||
}
|
||||
async with self._http_client.post(uri_path, json=request_json) as response:
|
||||
response_json: dict = await response.json()
|
||||
allele_map: dict[str, list[Allele]] = defaultdict(list)
|
||||
allele_map: dict[str, Allele] = {}
|
||||
response_json.setdefault("fields", dict())
|
||||
schema_fields_returned: dict[str, str] = response_json["fields"]
|
||||
schema_fields_returned.setdefault("ST", "unknown")
|
||||
schema_fields_returned.setdefault("clonal_complex", "unknown")
|
||||
schema_exact_matches: dict = response_json["exact_matches"]
|
||||
for exact_match_loci, exact_match_alleles in schema_exact_matches.items():
|
||||
for exact_match_allele in exact_match_alleles:
|
||||
allele_map[exact_match_loci].append(Allele(exact_match_loci, exact_match_allele["allele_id"], None))
|
||||
for exact_match_locus, exact_match_alleles in schema_exact_matches.items():
|
||||
if len(exact_match_alleles) > 1:
|
||||
raise ValueError(f"Unexpected number of alleles returned for exact match (Expected 1, retrieved {len(exact_match_alleles)})")
|
||||
allele_map[exact_match_locus] = Allele(exact_match_locus, exact_match_alleles[0]["allele_id"], None)
|
||||
if len(allele_map) == 0:
|
||||
raise ValueError("Passed in no alleles.")
|
||||
return MLSTProfile(dict(allele_map), schema_fields_returned["ST"], schema_fields_returned["clonal_complex"])
|
||||
|
||||
async def profile_string(self, string: str, exact: bool = False) -> MLSTProfile:
|
||||
alleles = self.fetch_mlst_allele_variants(string, exact)
|
||||
async def profile_string(self, sequence_strings: Iterable[str]) -> MLSTProfile:
|
||||
alleles = self.fetch_mlst_allele_variants(sequence_strings)
|
||||
return await self.fetch_mlst_st(alleles)
|
||||
|
||||
|
||||
async def profile_multiple_strings(self, namedStrings: AsyncIterable[NamedString], exact: bool = False, stop_on_fail: bool = False) -> AsyncGenerator[tuple[str, Union[MLSTProfile, None]], Any]:
|
||||
async for named_string in namedStrings:
|
||||
try:
|
||||
yield (named_string.name, await self.profile_string(named_string.sequence, exact))
|
||||
except NoBIGSdbMatchesException as e:
|
||||
if stop_on_fail:
|
||||
raise e
|
||||
yield (named_string.name, None)
|
||||
async def profile_multiple_strings(self, named_string_groups: AsyncIterable[Iterable[NamedString]], stop_on_fail: bool = False) -> AsyncGenerator[NamedMLSTProfile, Any]:
|
||||
async for named_strings in named_string_groups:
|
||||
for named_string in named_strings:
|
||||
try:
|
||||
yield NamedMLSTProfile(named_string.name, (await self.profile_string([named_string.sequence])))
|
||||
except NoBIGSdbMatchesException as e:
|
||||
if stop_on_fail:
|
||||
raise e
|
||||
yield NamedMLSTProfile(named_string.name, None)
|
||||
|
||||
async def close(self):
|
||||
await self._http_client.close()
|
||||
@@ -155,8 +183,8 @@ class BIGSdbIndex(AbstractAsyncContextManager):
|
||||
self._seqdefdb_schemas[seqdef_db_name] = schema_descriptions
|
||||
return self._seqdefdb_schemas[seqdef_db_name] # type: ignore
|
||||
|
||||
async def build_profiler_from_seqdefdb(self, dbseqdef_name: str, schema_id: int) -> BIGSdbMLSTProfiler:
|
||||
return BIGSdbMLSTProfiler(await self.get_bigsdb_api_from_seqdefdb(dbseqdef_name), dbseqdef_name, schema_id)
|
||||
async def build_profiler_from_seqdefdb(self, dbseqdef_name: str, schema_id: int) -> OnlineBIGSdbMLSTProfiler:
|
||||
return OnlineBIGSdbMLSTProfiler(await self.get_bigsdb_api_from_seqdefdb(dbseqdef_name), dbseqdef_name, schema_id)
|
||||
|
||||
async def close(self):
|
||||
await self._http_client.close()
|
@@ -5,17 +5,21 @@ from typing import Mapping, Sequence, Union
|
||||
class PartialAllelicMatchProfile:
|
||||
percent_identity: float
|
||||
mismatches: int
|
||||
bitscore: float
|
||||
gaps: int
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Allele:
|
||||
allele_loci: str
|
||||
allele_locus: str
|
||||
allele_variant: str
|
||||
partial_match_profile: Union[None, PartialAllelicMatchProfile]
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MLSTProfile:
|
||||
alleles: Mapping[str, Sequence[Allele]]
|
||||
alleles: Mapping[str, Allele]
|
||||
sequence_type: str
|
||||
clonal_complex: str
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class NamedMLSTProfile:
|
||||
name: str
|
||||
mlst_profile: Union[None, MLSTProfile]
|
44
tests/autobigs/engine/data/local/test_csv.py
Normal file
44
tests/autobigs/engine/data/local/test_csv.py
Normal file
@@ -0,0 +1,44 @@
|
||||
from typing import AsyncIterable, Iterable
|
||||
from autobigs.engine.data.local.csv import dict_loci_alleles_variants_from_loci, write_mlst_profiles_as_csv
|
||||
from autobigs.engine.data.structures.mlst import Allele, MLSTProfile
|
||||
import tempfile
|
||||
from csv import reader
|
||||
from os import path
|
||||
|
||||
async def iterable_to_asynciterable(iterable: Iterable):
|
||||
for iterated in iterable:
|
||||
yield iterated
|
||||
|
||||
def test_dict_loci_alleles_variants_from_loci_single_loci_not_list():
|
||||
alleles_map = {
|
||||
"adk": [Allele("adk", "1", None)]
|
||||
}
|
||||
results = dict_loci_alleles_variants_from_loci(alleles_map)
|
||||
for loci, variant in results.items():
|
||||
assert isinstance(variant, str)
|
||||
assert variant == "1"
|
||||
|
||||
def test_dict_loci_alleles_variants_from_loci_multi_loci_is_list():
|
||||
alleles_map = {
|
||||
"adk": [Allele("adk", "1", None), Allele("adk", "2", None)]
|
||||
}
|
||||
results = dict_loci_alleles_variants_from_loci(alleles_map)
|
||||
for loci, variant in results.items():
|
||||
assert isinstance(variant, list)
|
||||
assert len(variant) == 2
|
||||
|
||||
async def test_column_order_is_same_as_expected_file():
|
||||
dummy_profiles = [("test_1", MLSTProfile({
|
||||
"A": Allele("A", "1", None),
|
||||
"D": Allele("D", "1", None),
|
||||
"B": Allele("B", "1", None),
|
||||
"C": Allele("C", "1", None)
|
||||
}, "mysterious", "very mysterious"))]
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
output_path = path.join(temp_dir, "out.csv")
|
||||
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path)
|
||||
with open(output_path) as csv_handle:
|
||||
csv_reader = reader(csv_handle)
|
||||
lines = list(csv_reader)
|
||||
target_columns = lines[4:]
|
||||
assert target_columns == sorted(target_columns)
|
@@ -1,4 +1,4 @@
|
||||
from automlst.engine.data.local.fasta import read_fasta
|
||||
from autobigs.engine.data.local.fasta import read_fasta
|
||||
|
||||
|
||||
async def test_fasta_reader_not_none():
|
@@ -3,10 +3,10 @@ import re
|
||||
from typing import Collection, Sequence, Union
|
||||
from Bio import SeqIO
|
||||
import pytest
|
||||
from automlst.engine.data.structures.genomics import NamedString
|
||||
from automlst.engine.data.structures.mlst import Allele, MLSTProfile
|
||||
from automlst.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException
|
||||
from automlst.engine.data.remote.databases.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler
|
||||
from autobigs.engine.data.structures.genomics import NamedString
|
||||
from autobigs.engine.data.structures.mlst import Allele, MLSTProfile
|
||||
from autobigs.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException
|
||||
from autobigs.engine.data.remote.databases.bigsdb import BIGSdbIndex, OnlineBIGSdbMLSTProfiler
|
||||
|
||||
def gene_scrambler(gene: str, mutation_site_count: Union[int, float], alphabet: Sequence[str] = ["A", "T", "C", "G"]):
|
||||
rand = random.Random(gene)
|
||||
@@ -20,19 +20,19 @@ def gene_scrambler(gene: str, mutation_site_count: Union[int, float], alphabet:
|
||||
|
||||
async def test_institutpasteur_profiling_results_in_exact_matches_when_exact():
|
||||
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
||||
async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
async with OnlineBIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
targets_left = {"adk", "fumC", "glyA", "tyrB", "icd", "pepA", "pgm"}
|
||||
async for exact_match in dummy_profiler.fetch_mlst_allele_variants(sequence_string=sequence, exact=True):
|
||||
async for exact_match in dummy_profiler.fetch_mlst_allele_variants(sequence_strings=[sequence]):
|
||||
assert isinstance(exact_match, Allele)
|
||||
assert exact_match.allele_variant == '1' # All of Tohama I has allele id I
|
||||
targets_left.remove(exact_match.allele_loci)
|
||||
targets_left.remove(exact_match.allele_locus)
|
||||
|
||||
assert len(targets_left) == 0
|
||||
|
||||
async def test_institutpasteur_sequence_profiling_non_exact_returns_non_exact():
|
||||
sequences = list(SeqIO.parse("tests/resources/tohama_I_bpertussis_coding.fasta", "fasta"))
|
||||
mlst_targets = {"adk", "fumc", "glya", "tyrb", "icd", "pepa", "pgm"}
|
||||
async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as profiler:
|
||||
async with OnlineBIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as profiler:
|
||||
for sequence in sequences:
|
||||
match = re.fullmatch(r".*\[gene=([\w\d]+)\].*", sequence.description)
|
||||
if match is None:
|
||||
@@ -41,7 +41,7 @@ async def test_institutpasteur_sequence_profiling_non_exact_returns_non_exact():
|
||||
if gene.lower() not in mlst_targets:
|
||||
continue
|
||||
scrambled = gene_scrambler(str(sequence.seq), 0.125)
|
||||
async for partial_match in profiler.fetch_mlst_allele_variants(scrambled, False):
|
||||
async for partial_match in profiler.fetch_mlst_allele_variants(scrambled):
|
||||
assert partial_match.partial_match_profile is not None
|
||||
mlst_targets.remove(gene.lower())
|
||||
|
||||
@@ -60,7 +60,7 @@ async def test_institutpasteur_profiling_results_in_correct_mlst_st():
|
||||
]
|
||||
for dummy_allele in dummy_alleles:
|
||||
yield dummy_allele
|
||||
async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
async with OnlineBIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
mlst_st_data = await dummy_profiler.fetch_mlst_st(dummy_allele_generator())
|
||||
assert mlst_st_data is not None
|
||||
assert isinstance(mlst_st_data, MLSTProfile)
|
||||
@@ -77,7 +77,7 @@ async def test_institutpasteur_profiling_non_exact_results_in_list_of_mlsts():
|
||||
Allele("pepA", "1", None),
|
||||
Allele("pgm", "5", None),
|
||||
]
|
||||
async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
async with OnlineBIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
mlst_profile = await dummy_profiler.fetch_mlst_st(dummy_alleles)
|
||||
assert mlst_profile.clonal_complex == "unknown"
|
||||
assert mlst_profile.sequence_type == "unknown"
|
||||
@@ -85,7 +85,7 @@ async def test_institutpasteur_profiling_non_exact_results_in_list_of_mlsts():
|
||||
|
||||
async def test_institutpasteur_sequence_profiling_is_correct():
|
||||
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
||||
async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
async with OnlineBIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
profile = await dummy_profiler.profile_string(sequence)
|
||||
assert profile is not None
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
@@ -104,8 +104,8 @@ async def test_pubmlst_profiling_results_in_exact_matches_when_exact():
|
||||
Allele("recA", "5", None),
|
||||
}
|
||||
sequence = str(SeqIO.read("tests/resources/FDAARGOS_1560.fasta", "fasta").seq)
|
||||
async with BIGSdbMLSTProfiler(database_api="https://rest.pubmlst.org/", database_name="pubmlst_hinfluenzae_seqdef", schema_id=1) as dummy_profiler:
|
||||
exact_matches = dummy_profiler.fetch_mlst_allele_variants(sequence_string=sequence, exact=True)
|
||||
async with OnlineBIGSdbMLSTProfiler(database_api="https://rest.pubmlst.org/", database_name="pubmlst_hinfluenzae_seqdef", schema_id=1) as dummy_profiler:
|
||||
exact_matches = dummy_profiler.fetch_mlst_allele_variants(sequence_strings=sequence)
|
||||
async for exact_match in exact_matches:
|
||||
assert isinstance(exact_match, Allele)
|
||||
dummy_alleles.remove(exact_match)
|
||||
@@ -125,7 +125,7 @@ async def test_pubmlst_profiling_results_in_correct_st():
|
||||
]
|
||||
for dummy_allele in dummy_alleles:
|
||||
yield dummy_allele
|
||||
async with BIGSdbMLSTProfiler(database_api="https://rest.pubmlst.org/", database_name="pubmlst_hinfluenzae_seqdef", schema_id=1) as dummy_profiler:
|
||||
async with OnlineBIGSdbMLSTProfiler(database_api="https://rest.pubmlst.org/", database_name="pubmlst_hinfluenzae_seqdef", schema_id=1) as dummy_profiler:
|
||||
mlst_st_data = await dummy_profiler.fetch_mlst_st(generate_dummy_targets())
|
||||
assert mlst_st_data is not None
|
||||
assert isinstance(mlst_st_data, MLSTProfile)
|
||||
@@ -134,7 +134,7 @@ async def test_pubmlst_profiling_results_in_correct_st():
|
||||
|
||||
async def test_pubmlst_sequence_profiling_is_correct():
|
||||
sequence = str(SeqIO.read("tests/resources/FDAARGOS_1560.fasta", "fasta").seq)
|
||||
async with BIGSdbMLSTProfiler(database_api="https://rest.pubmlst.org/", database_name="pubmlst_hinfluenzae_seqdef", schema_id=1) as dummy_profiler:
|
||||
async with OnlineBIGSdbMLSTProfiler(database_api="https://rest.pubmlst.org/", database_name="pubmlst_hinfluenzae_seqdef", schema_id=1) as dummy_profiler:
|
||||
profile = await dummy_profiler.profile_string(sequence)
|
||||
assert profile is not None
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
@@ -167,9 +167,10 @@ async def test_bigsdb_profile_multiple_strings_same_string_twice():
|
||||
dummy_sequences = [NamedString("seq1", sequence), NamedString("seq2", sequence)]
|
||||
async def generate_async_iterable_sequences():
|
||||
for dummy_sequence in dummy_sequences:
|
||||
yield dummy_sequence
|
||||
async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
async for name, profile in dummy_profiler.profile_multiple_strings(generate_async_iterable_sequences()):
|
||||
yield [dummy_sequence]
|
||||
async with OnlineBIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
async for named_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable_sequences()):
|
||||
name, profile = named_profile.name, named_profile.mlst_profile
|
||||
assert profile is not None
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == "ST-2 complex"
|
||||
@@ -180,14 +181,17 @@ async def test_bigsdb_profile_multiple_strings_exactmatch_fail_second_no_stop():
|
||||
dummy_sequences = [NamedString("seq1", valid_seq), NamedString("should_fail", gene_scrambler(valid_seq, 0.3)), NamedString("seq3", valid_seq)]
|
||||
async def generate_async_iterable_sequences():
|
||||
for dummy_sequence in dummy_sequences:
|
||||
yield dummy_sequence
|
||||
async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
async for name, profile in dummy_profiler.profile_multiple_strings(generate_async_iterable_sequences(), True):
|
||||
yield [dummy_sequence]
|
||||
async with OnlineBIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
async for name_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable_sequences(), True):
|
||||
name, profile = name_profile.name, name_profile.mlst_profile
|
||||
|
||||
assert profile is not None
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
if name == "should_fail":
|
||||
assert profile is None
|
||||
assert profile.clonal_complex == "unknown"
|
||||
assert profile.sequence_type == "unknown"
|
||||
else:
|
||||
assert profile is not None
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == "ST-2 complex"
|
||||
assert profile.sequence_type == "1"
|
||||
|
||||
@@ -196,9 +200,10 @@ async def test_bigsdb_profile_multiple_strings_nonexact_second_no_stop():
|
||||
dummy_sequences = [NamedString("seq1", valid_seq), NamedString("should_fail", gene_scrambler(valid_seq, 0.3)), NamedString("seq3", valid_seq)]
|
||||
async def generate_async_iterable_sequences():
|
||||
for dummy_sequence in dummy_sequences:
|
||||
yield dummy_sequence
|
||||
async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
async for name, profile in dummy_profiler.profile_multiple_strings(generate_async_iterable_sequences(), False):
|
||||
yield [dummy_sequence]
|
||||
async with OnlineBIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
async for named_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable_sequences(), False):
|
||||
name, profile = named_profile.name, named_profile.mlst_profile
|
||||
if name == "should_fail":
|
||||
assert profile is not None
|
||||
assert profile.clonal_complex == "unknown"
|
||||
@@ -210,24 +215,6 @@ async def test_bigsdb_profile_multiple_strings_nonexact_second_no_stop():
|
||||
assert profile.clonal_complex == "ST-2 complex"
|
||||
assert profile.sequence_type == "1"
|
||||
|
||||
async def test_bigsdb_profile_multiple_strings_fail_second_stop():
|
||||
valid_seq = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
||||
invalid_seq = str(SeqIO.read("tests/resources/FDAARGOS_1560.fasta", "fasta").seq)
|
||||
dummy_sequences = [NamedString("seq1", valid_seq), NamedString("should_fail", invalid_seq), NamedString("seq3", valid_seq)]
|
||||
async def generate_async_iterable_sequences():
|
||||
for dummy_sequence in dummy_sequences:
|
||||
yield dummy_sequence
|
||||
async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
with pytest.raises(NoBIGSdbMatchesException):
|
||||
async for name, profile in dummy_profiler.profile_multiple_strings(generate_async_iterable_sequences(), exact=True, stop_on_fail=True):
|
||||
if name == "should_fail":
|
||||
pytest.fail("Exception should have been thrown, no exception was thrown.")
|
||||
else:
|
||||
assert profile is not None
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == "ST-2 complex"
|
||||
assert profile.sequence_type == "1"
|
||||
|
||||
async def test_bigsdb_index_get_schemas_for_bordetella():
|
||||
async with BIGSdbIndex() as index:
|
||||
schemas = await index.get_schemas_for_seqdefdb(seqdef_db_name="pubmlst_bordetella_seqdef")
|
@@ -1,21 +0,0 @@
|
||||
from automlst.engine.data.local.csv import dict_loci_alleles_variants_from_loci
|
||||
from automlst.engine.data.structures.mlst import Allele
|
||||
|
||||
|
||||
def test_dict_loci_alleles_variants_from_loci_single_loci_not_list():
|
||||
alleles_map = {
|
||||
"adk": [Allele("adk", "1", None)]
|
||||
}
|
||||
results = dict_loci_alleles_variants_from_loci(alleles_map)
|
||||
for loci, variant in results.items():
|
||||
assert isinstance(variant, str)
|
||||
assert variant == "1"
|
||||
|
||||
def test_dict_loci_alleles_variants_from_loci_multi_loci_is_list():
|
||||
alleles_map = {
|
||||
"adk": [Allele("adk", "1", None), Allele("adk", "2", None)]
|
||||
}
|
||||
results = dict_loci_alleles_variants_from_loci(alleles_map)
|
||||
for loci, variant in results.items():
|
||||
assert isinstance(variant, list)
|
||||
assert len(variant) == 2
|
Reference in New Issue
Block a user