Compare commits

..

3 Commits

6 changed files with 35 additions and 10 deletions
.vscode
src/autobigs/engine
tests/autobigs/engine

@ -1,5 +1,6 @@
{
"recommendations": [
"piotrpalarz.vscode-gitignore-generator"
"piotrpalarz.vscode-gitignore-generator",
"gruntfuggly.todo-tree"
]
}

@ -15,7 +15,7 @@ from autobigs.engine.reading import read_fasta
from autobigs.engine.structures.alignment import PairwiseAlignment
from autobigs.engine.structures.genomics import NamedString
from autobigs.engine.structures.mlst import Allele, NamedMLSTProfile, AlignmentStats, MLSTProfile
from autobigs.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException, NoSuchBIGSdbDatabaseException
from autobigs.engine.exceptions.database import BIGSdbResponseNotOkay, NoBIGSdbExactMatchesException, NoBIGSdbMatchesException, NoSuchBIGSdbDatabaseException
from Bio.Align import PairwiseAligner
@ -99,7 +99,10 @@ class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
)
yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
else:
if response.status == 200:
raise NoBIGSdbMatchesException(self._database_name, self._scheme_id, sequence_string.name if isinstance(sequence_string, NamedString) else None)
else:
raise BIGSdbResponseNotOkay(sequence_response)
except (ConnectionError, ServerDisconnectedError, ClientOSError) as e: # Errors we will retry
last_error = e
success = False
@ -252,6 +255,16 @@ class BIGSdbIndex(AbstractAsyncContextManager):
async def build_profiler_from_seqdefdb(self, local: bool, dbseqdef_name: str, scheme_id: int) -> BIGSdbMLSTProfiler:
return get_BIGSdb_MLST_profiler(local, await self.get_bigsdb_api_from_seqdefdb(dbseqdef_name), dbseqdef_name, scheme_id)
async def get_scheme_loci(self, dbseqdef_name: str, scheme_id: int) -> list[str]:
uri_path = f"{await self.get_bigsdb_api_from_seqdefdb(dbseqdef_name)}/db/{dbseqdef_name}/schemes/{scheme_id}"
async with self._http_client.get(uri_path) as response:
response_json = await response.json()
loci = response_json["loci"]
results = []
for locus in loci:
results.append(path.basename(locus))
return results
async def close(self):
await self._http_client.close()

@ -3,6 +3,8 @@ from typing import Union
class BIGSDbDatabaseAPIException(Exception):
pass
class BIGSdbResponseNotOkay(BIGSDbDatabaseAPIException):
pass
class NoBIGSdbMatchesException(BIGSDbDatabaseAPIException):
def __init__(self, database_name: str, database_scheme_id: int, query_name: Union[None, str], *args):

@ -1,7 +1,7 @@
from collections import defaultdict
import csv
from os import PathLike
from typing import AsyncIterable, Collection, Mapping, Sequence, Union
from typing import AsyncIterable, Collection, Iterable, Mapping, Sequence, Union
from autobigs.engine.structures.mlst import Allele, MLSTProfile, NamedMLSTProfile
@ -17,7 +17,7 @@ def alleles_to_text_map(alleles: Collection[Allele]) -> Mapping[str, Union[Seque
result[locus] = tuple(result[locus]) # type: ignore
return dict(result)
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[NamedMLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]) -> Sequence[str]:
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[NamedMLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]], allele_names: Iterable[str]) -> Sequence[str]:
failed = list()
with open(handle, "w", newline='') as filehandle:
header = None
@ -30,7 +30,7 @@ async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[Named
continue
allele_mapping = alleles_to_text_map(mlst_profile.alleles)
if writer is None:
header = ["id", "st", "clonal-complex", *sorted(allele_mapping.keys())]
header = ["id", "st", "clonal-complex", *sorted(allele_names)]
writer = csv.DictWriter(filehandle, fieldnames=header)
writer.writeheader()
row_dictionary = {

@ -222,3 +222,12 @@ class TestBIGSdbIndex:
assert isinstance(profile, MLSTProfile)
assert profile.clonal_complex == "ST-2 complex"
assert profile.sequence_type == "1"
@pytest.mark.parametrize(["bigsdb_name", "scheme_id", "expected"], [
("pubmlst_bordetella_seqdef", 3, ["adk", "fumC", "glyA", "tyrB", "icd", "pepA", "pgm"])
])
async def test_bigsdb_index_fetches_loci_names(self, bigsdb_name, scheme_id, expected):
async with BIGSdbIndex() as bigsdb_index:
loci = await bigsdb_index.get_scheme_loci(bigsdb_name, scheme_id)
assert set(loci) == set(expected)

@ -27,7 +27,7 @@ async def test_column_order_is_same_as_expected_file(dummy_alphabet_mlst_profile
dummy_profiles = [dummy_alphabet_mlst_profile]
with tempfile.TemporaryDirectory() as temp_dir:
output_path = path.join(temp_dir, "out.csv")
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path)
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path, ["A", "D", "B", "C"])
with open(output_path) as csv_handle:
csv_reader = reader(csv_handle)
lines = list(csv_reader)
@ -38,7 +38,7 @@ async def test_csv_writing_sample_name_not_repeated_when_single_sequence(dummy_a
dummy_profiles = [dummy_alphabet_mlst_profile]
with tempfile.TemporaryDirectory() as temp_dir:
output_path = path.join(temp_dir, "out.csv")
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path)
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path, ["A", "D", "B", "C"])
with open(output_path) as csv_handle:
csv_reader = reader(csv_handle)
lines = list(csv_reader)
@ -63,7 +63,7 @@ async def test_csv_writing_includes_asterisk_for_non_exact(dummy_alphabet_mlst_p
dummy_profiles = [dummy_alphabet_mlst_profile]
with tempfile.TemporaryDirectory() as temp_dir:
output_path = path.join(temp_dir, "out.csv")
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path)
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path, ["A", "D", "B", "C"])
with open(output_path) as csv_handle:
csv_reader = reader(csv_handle)
lines = list(csv_reader)