Compare commits

...

3 Commits

Author SHA1 Message Date
7384895578 Writing now uses named MLST profile
All checks were successful
automlst.engine/pipeline/head This commit looks good
automlst.engine/pipeline/tag This commit looks good
2025-02-18 16:03:17 +00:00
5a03c7e8d8 Multiple string profiling now respects grouped queries (for non-WGS)
All checks were successful
automlst.engine/pipeline/head This commit looks good
2025-02-18 15:34:18 +00:00
ddf9cde175 Added a license text to pyproject.toml 2025-02-14 20:47:06 +00:00
4 changed files with 23 additions and 16 deletions

View File

@ -13,6 +13,7 @@ dependencies = [
] ]
requires-python = ">=3.12" requires-python = ">=3.12"
description = "A library to rapidly fetch fetch MLST profiles given sequences for various diseases." description = "A library to rapidly fetch fetch MLST profiles given sequences for various diseases."
license = {text = "GPL-3.0-or-later"}
[project.urls] [project.urls]
Homepage = "https://github.com/Syph-and-VPD-Lab/autoBIGS.engine" Homepage = "https://github.com/Syph-and-VPD-Lab/autoBIGS.engine"

View File

@ -124,13 +124,17 @@ class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
async def profile_multiple_strings(self, query_named_string_groups: AsyncIterable[Iterable[NamedString]], stop_on_fail: bool = False) -> AsyncGenerator[NamedMLSTProfile, Any]: async def profile_multiple_strings(self, query_named_string_groups: AsyncIterable[Iterable[NamedString]], stop_on_fail: bool = False) -> AsyncGenerator[NamedMLSTProfile, Any]:
async for named_strings in query_named_string_groups: async for named_strings in query_named_string_groups:
names: list[str] = list()
sequences: list[str] = list()
for named_string in named_strings: for named_string in named_strings:
names.append(named_string.name)
sequences.append(named_string.sequence)
try: try:
yield NamedMLSTProfile(named_string.name, (await self.profile_string([named_string.sequence]))) yield NamedMLSTProfile("-".join(names), (await self.profile_string(sequences)))
except NoBIGSdbMatchesException as e: except NoBIGSdbMatchesException as e:
if stop_on_fail: if stop_on_fail:
raise e raise e
yield NamedMLSTProfile(named_string.name, None) yield NamedMLSTProfile("-".join(names), None)
async def close(self): async def close(self):
await self._http_client.close() await self._http_client.close()

View File

@ -3,7 +3,7 @@ import csv
from os import PathLike from os import PathLike
from typing import AsyncIterable, Collection, Mapping, Sequence, Union from typing import AsyncIterable, Collection, Mapping, Sequence, Union
from autobigs.engine.structures.mlst import Allele, MLSTProfile from autobigs.engine.structures.mlst import Allele, MLSTProfile, NamedMLSTProfile
def alleles_to_text_map(alleles: Collection[Allele]) -> Mapping[str, Union[Sequence[str], str]]: def alleles_to_text_map(alleles: Collection[Allele]) -> Mapping[str, Union[Sequence[str], str]]:
@ -17,12 +17,14 @@ def alleles_to_text_map(alleles: Collection[Allele]) -> Mapping[str, Union[Seque
result[locus] = tuple(result[locus]) # type: ignore result[locus] = tuple(result[locus]) # type: ignore
return dict(result) return dict(result)
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[tuple[str, Union[MLSTProfile, None]]], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]) -> Sequence[str]: async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[NamedMLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]) -> Sequence[str]:
failed = list() failed = list()
with open(handle, "w", newline='') as filehandle: with open(handle, "w", newline='') as filehandle:
header = None header = None
writer: Union[csv.DictWriter, None] = None writer: Union[csv.DictWriter, None] = None
async for name, mlst_profile in mlst_profiles_iterable: async for named_mlst_profile in mlst_profiles_iterable:
name = named_mlst_profile.name
mlst_profile = named_mlst_profile.mlst_profile
if mlst_profile is None: if mlst_profile is None:
failed.append(name) failed.append(name)
continue continue

View File

@ -3,7 +3,7 @@ from typing import AsyncIterable, Iterable
import pytest import pytest
from autobigs.engine.structures.alignment import AlignmentStats from autobigs.engine.structures.alignment import AlignmentStats
from autobigs.engine.writing import alleles_to_text_map, write_mlst_profiles_as_csv from autobigs.engine.writing import alleles_to_text_map, write_mlst_profiles_as_csv
from autobigs.engine.structures.mlst import Allele, MLSTProfile from autobigs.engine.structures.mlst import Allele, MLSTProfile, NamedMLSTProfile
import tempfile import tempfile
from csv import reader from csv import reader
from os import path from os import path
@ -11,20 +11,20 @@ from os import path
@pytest.fixture @pytest.fixture
def dummy_alphabet_mlst_profile(): def dummy_alphabet_mlst_profile():
return MLSTProfile(( return NamedMLSTProfile("name", MLSTProfile((
Allele("A", "1", None), Allele("A", "1", None),
Allele("D", "1", None), Allele("D", "1", None),
Allele("B", "1", None), Allele("B", "1", None),
Allele("C", "1", None), Allele("C", "1", None),
Allele("C", "2", AlignmentStats(90, 10, 0, 90)) Allele("C", "2", AlignmentStats(90, 10, 0, 90))
), "mysterious", "very mysterious") ), "mysterious", "very mysterious"))
async def iterable_to_asynciterable(iterable: Iterable): async def iterable_to_asynciterable(iterable: Iterable):
for iterated in iterable: for iterated in iterable:
yield iterated yield iterated
async def test_column_order_is_same_as_expected_file(dummy_alphabet_mlst_profile: MLSTProfile): async def test_column_order_is_same_as_expected_file(dummy_alphabet_mlst_profile: MLSTProfile):
dummy_profiles = [("test_1", dummy_alphabet_mlst_profile)] dummy_profiles = [dummy_alphabet_mlst_profile]
with tempfile.TemporaryDirectory() as temp_dir: with tempfile.TemporaryDirectory() as temp_dir:
output_path = path.join(temp_dir, "out.csv") output_path = path.join(temp_dir, "out.csv")
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path) await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path)
@ -34,8 +34,8 @@ async def test_column_order_is_same_as_expected_file(dummy_alphabet_mlst_profile
target_columns = lines[4:] target_columns = lines[4:]
assert target_columns == sorted(target_columns) assert target_columns == sorted(target_columns)
async def test_alleles_to_text_map_mapping_is_correct(dummy_alphabet_mlst_profile: MLSTProfile): async def test_alleles_to_text_map_mapping_is_correct(dummy_alphabet_mlst_profile: NamedMLSTProfile):
mapping = alleles_to_text_map(dummy_alphabet_mlst_profile.alleles) mapping = alleles_to_text_map(dummy_alphabet_mlst_profile.mlst_profile.alleles) # type: ignore
expected_mapping = { expected_mapping = {
"A": "1", "A": "1",
"B": "1", "B": "1",