Added predetermined headers feature
All checks were successful
autoBIGS.engine/pipeline/head This commit looks good

This commit is contained in:
Harrison Deng 2025-03-14 14:26:43 +00:00
parent 34bf02c75a
commit f8d92a4aad
5 changed files with 27 additions and 7 deletions

View File

@ -1,5 +1,6 @@
{ {
"recommendations": [ "recommendations": [
"piotrpalarz.vscode-gitignore-generator" "piotrpalarz.vscode-gitignore-generator",
"gruntfuggly.todo-tree"
] ]
} }

View File

@ -251,6 +251,16 @@ class BIGSdbIndex(AbstractAsyncContextManager):
async def build_profiler_from_seqdefdb(self, local: bool, dbseqdef_name: str, scheme_id: int) -> BIGSdbMLSTProfiler: async def build_profiler_from_seqdefdb(self, local: bool, dbseqdef_name: str, scheme_id: int) -> BIGSdbMLSTProfiler:
return get_BIGSdb_MLST_profiler(local, await self.get_bigsdb_api_from_seqdefdb(dbseqdef_name), dbseqdef_name, scheme_id) return get_BIGSdb_MLST_profiler(local, await self.get_bigsdb_api_from_seqdefdb(dbseqdef_name), dbseqdef_name, scheme_id)
async def get_scheme_loci(self, dbseqdef_name: str, scheme_id: int) -> list[str]:
uri_path = f"{await self.get_bigsdb_api_from_seqdefdb(dbseqdef_name)}/db/{dbseqdef_name}/schemes/{scheme_id}"
async with self._http_client.get(uri_path) as response:
response_json = await response.json()
loci = response_json["loci"]
results = []
for locus in loci:
results.append(path.basename(locus))
return results
async def close(self): async def close(self):
await self._http_client.close() await self._http_client.close()

View File

@ -1,7 +1,7 @@
from collections import defaultdict from collections import defaultdict
import csv import csv
from os import PathLike from os import PathLike
from typing import AsyncIterable, Collection, Mapping, Sequence, Union from typing import AsyncIterable, Collection, Iterable, Mapping, Sequence, Union
from autobigs.engine.structures.mlst import Allele, MLSTProfile, NamedMLSTProfile from autobigs.engine.structures.mlst import Allele, MLSTProfile, NamedMLSTProfile
@ -17,7 +17,7 @@ def alleles_to_text_map(alleles: Collection[Allele]) -> Mapping[str, Union[Seque
result[locus] = tuple(result[locus]) # type: ignore result[locus] = tuple(result[locus]) # type: ignore
return dict(result) return dict(result)
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[NamedMLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]) -> Sequence[str]: async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[NamedMLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]], allele_names: Iterable[str]) -> Sequence[str]:
failed = list() failed = list()
with open(handle, "w", newline='') as filehandle: with open(handle, "w", newline='') as filehandle:
header = None header = None
@ -30,7 +30,7 @@ async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[Named
continue continue
allele_mapping = alleles_to_text_map(mlst_profile.alleles) allele_mapping = alleles_to_text_map(mlst_profile.alleles)
if writer is None: if writer is None:
header = ["id", "st", "clonal-complex", *sorted(allele_mapping.keys())] header = ["id", "st", "clonal-complex", *sorted(allele_names)]
writer = csv.DictWriter(filehandle, fieldnames=header) writer = csv.DictWriter(filehandle, fieldnames=header)
writer.writeheader() writer.writeheader()
row_dictionary = { row_dictionary = {

View File

@ -222,3 +222,12 @@ class TestBIGSdbIndex:
assert isinstance(profile, MLSTProfile) assert isinstance(profile, MLSTProfile)
assert profile.clonal_complex == "ST-2 complex" assert profile.clonal_complex == "ST-2 complex"
assert profile.sequence_type == "1" assert profile.sequence_type == "1"
@pytest.mark.parametrize(["bigsdb_name", "scheme_id", "expected"], [
("pubmlst_bordetella_seqdef", 3, ["adk", "fumC", "glyA", "tyrB", "icd", "pepA", "pgm"])
])
async def test_bigsdb_index_fetches_loci_names(self, bigsdb_name, scheme_id, expected):
async with BIGSdbIndex() as bigsdb_index:
loci = await bigsdb_index.get_scheme_loci(bigsdb_name, scheme_id)
assert set(loci) == set(expected)

View File

@ -27,7 +27,7 @@ async def test_column_order_is_same_as_expected_file(dummy_alphabet_mlst_profile
dummy_profiles = [dummy_alphabet_mlst_profile] dummy_profiles = [dummy_alphabet_mlst_profile]
with tempfile.TemporaryDirectory() as temp_dir: with tempfile.TemporaryDirectory() as temp_dir:
output_path = path.join(temp_dir, "out.csv") output_path = path.join(temp_dir, "out.csv")
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path) await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path, ["A", "D", "B", "C"])
with open(output_path) as csv_handle: with open(output_path) as csv_handle:
csv_reader = reader(csv_handle) csv_reader = reader(csv_handle)
lines = list(csv_reader) lines = list(csv_reader)
@ -38,7 +38,7 @@ async def test_csv_writing_sample_name_not_repeated_when_single_sequence(dummy_a
dummy_profiles = [dummy_alphabet_mlst_profile] dummy_profiles = [dummy_alphabet_mlst_profile]
with tempfile.TemporaryDirectory() as temp_dir: with tempfile.TemporaryDirectory() as temp_dir:
output_path = path.join(temp_dir, "out.csv") output_path = path.join(temp_dir, "out.csv")
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path) await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path, ["A", "D", "B", "C"])
with open(output_path) as csv_handle: with open(output_path) as csv_handle:
csv_reader = reader(csv_handle) csv_reader = reader(csv_handle)
lines = list(csv_reader) lines = list(csv_reader)
@ -63,7 +63,7 @@ async def test_csv_writing_includes_asterisk_for_non_exact(dummy_alphabet_mlst_p
dummy_profiles = [dummy_alphabet_mlst_profile] dummy_profiles = [dummy_alphabet_mlst_profile]
with tempfile.TemporaryDirectory() as temp_dir: with tempfile.TemporaryDirectory() as temp_dir:
output_path = path.join(temp_dir, "out.csv") output_path = path.join(temp_dir, "out.csv")
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path) await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path, ["A", "D", "B", "C"])
with open(output_path) as csv_handle: with open(output_path) as csv_handle:
csv_reader = reader(csv_handle) csv_reader = reader(csv_handle)
lines = list(csv_reader) lines = list(csv_reader)