From f8d92a4aade23a6ed276380ac3846da63f6f4c01 Mon Sep 17 00:00:00 2001 From: Harrison Deng Date: Fri, 14 Mar 2025 14:26:43 +0000 Subject: [PATCH] Added predetermined headers feature --- .vscode/extensions.json | 3 ++- src/autobigs/engine/analysis/bigsdb.py | 10 ++++++++++ src/autobigs/engine/writing.py | 6 +++--- tests/autobigs/engine/analysis/test_bigsdb.py | 9 +++++++++ tests/autobigs/engine/test_writing.py | 6 +++--- 5 files changed, 27 insertions(+), 7 deletions(-) diff --git a/.vscode/extensions.json b/.vscode/extensions.json index 1bddac4..5951a02 100644 --- a/.vscode/extensions.json +++ b/.vscode/extensions.json @@ -1,5 +1,6 @@ { "recommendations": [ - "piotrpalarz.vscode-gitignore-generator" + "piotrpalarz.vscode-gitignore-generator", + "gruntfuggly.todo-tree" ] } \ No newline at end of file diff --git a/src/autobigs/engine/analysis/bigsdb.py b/src/autobigs/engine/analysis/bigsdb.py index 42e413f..99776a2 100644 --- a/src/autobigs/engine/analysis/bigsdb.py +++ b/src/autobigs/engine/analysis/bigsdb.py @@ -251,6 +251,16 @@ class BIGSdbIndex(AbstractAsyncContextManager): async def build_profiler_from_seqdefdb(self, local: bool, dbseqdef_name: str, scheme_id: int) -> BIGSdbMLSTProfiler: return get_BIGSdb_MLST_profiler(local, await self.get_bigsdb_api_from_seqdefdb(dbseqdef_name), dbseqdef_name, scheme_id) + + async def get_scheme_loci(self, dbseqdef_name: str, scheme_id: int) -> list[str]: + uri_path = f"{await self.get_bigsdb_api_from_seqdefdb(dbseqdef_name)}/db/{dbseqdef_name}/schemes/{scheme_id}" + async with self._http_client.get(uri_path) as response: + response_json = await response.json() + loci = response_json["loci"] + results = [] + for locus in loci: + results.append(path.basename(locus)) + return results async def close(self): await self._http_client.close() diff --git a/src/autobigs/engine/writing.py b/src/autobigs/engine/writing.py index 1d19ec6..83efdce 100644 --- a/src/autobigs/engine/writing.py +++ b/src/autobigs/engine/writing.py @@ -1,7 +1,7 @@ from collections import defaultdict import csv from os import PathLike -from typing import AsyncIterable, Collection, Mapping, Sequence, Union +from typing import AsyncIterable, Collection, Iterable, Mapping, Sequence, Union from autobigs.engine.structures.mlst import Allele, MLSTProfile, NamedMLSTProfile @@ -17,7 +17,7 @@ def alleles_to_text_map(alleles: Collection[Allele]) -> Mapping[str, Union[Seque result[locus] = tuple(result[locus]) # type: ignore return dict(result) -async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[NamedMLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]) -> Sequence[str]: +async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[NamedMLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]], allele_names: Iterable[str]) -> Sequence[str]: failed = list() with open(handle, "w", newline='') as filehandle: header = None @@ -30,7 +30,7 @@ async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[Named continue allele_mapping = alleles_to_text_map(mlst_profile.alleles) if writer is None: - header = ["id", "st", "clonal-complex", *sorted(allele_mapping.keys())] + header = ["id", "st", "clonal-complex", *sorted(allele_names)] writer = csv.DictWriter(filehandle, fieldnames=header) writer.writeheader() row_dictionary = { diff --git a/tests/autobigs/engine/analysis/test_bigsdb.py b/tests/autobigs/engine/analysis/test_bigsdb.py index ffea753..000e7ea 100644 --- a/tests/autobigs/engine/analysis/test_bigsdb.py +++ b/tests/autobigs/engine/analysis/test_bigsdb.py @@ -222,3 +222,12 @@ class TestBIGSdbIndex: assert isinstance(profile, MLSTProfile) assert profile.clonal_complex == "ST-2 complex" assert profile.sequence_type == "1" + + + @pytest.mark.parametrize(["bigsdb_name", "scheme_id", "expected"], [ + ("pubmlst_bordetella_seqdef", 3, ["adk", "fumC", "glyA", "tyrB", "icd", "pepA", "pgm"]) + ]) + async def test_bigsdb_index_fetches_loci_names(self, bigsdb_name, scheme_id, expected): + async with BIGSdbIndex() as bigsdb_index: + loci = await bigsdb_index.get_scheme_loci(bigsdb_name, scheme_id) + assert set(loci) == set(expected) \ No newline at end of file diff --git a/tests/autobigs/engine/test_writing.py b/tests/autobigs/engine/test_writing.py index bd092f6..e6838fe 100644 --- a/tests/autobigs/engine/test_writing.py +++ b/tests/autobigs/engine/test_writing.py @@ -27,7 +27,7 @@ async def test_column_order_is_same_as_expected_file(dummy_alphabet_mlst_profile dummy_profiles = [dummy_alphabet_mlst_profile] with tempfile.TemporaryDirectory() as temp_dir: output_path = path.join(temp_dir, "out.csv") - await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path) + await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path, ["A", "D", "B", "C"]) with open(output_path) as csv_handle: csv_reader = reader(csv_handle) lines = list(csv_reader) @@ -38,7 +38,7 @@ async def test_csv_writing_sample_name_not_repeated_when_single_sequence(dummy_a dummy_profiles = [dummy_alphabet_mlst_profile] with tempfile.TemporaryDirectory() as temp_dir: output_path = path.join(temp_dir, "out.csv") - await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path) + await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path, ["A", "D", "B", "C"]) with open(output_path) as csv_handle: csv_reader = reader(csv_handle) lines = list(csv_reader) @@ -63,7 +63,7 @@ async def test_csv_writing_includes_asterisk_for_non_exact(dummy_alphabet_mlst_p dummy_profiles = [dummy_alphabet_mlst_profile] with tempfile.TemporaryDirectory() as temp_dir: output_path = path.join(temp_dir, "out.csv") - await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path) + await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path, ["A", "D", "B", "C"]) with open(output_path) as csv_handle: csv_reader = reader(csv_handle) lines = list(csv_reader)