From 9b8e448512a499d102176719a48451d585adc50e Mon Sep 17 00:00:00 2001 From: Harrison Deng Date: Thu, 9 Jan 2025 07:39:18 +0000 Subject: [PATCH] Completed updating CLI to be more organized with bettert error messages --- .devcontainer/devcontainer.json | 9 ++- .vscode/launch.json | 8 ++- src/automlst/cli/exactmatch.py | 48 ++++++++++++++++ src/automlst/cli/info.py | 47 ++++++++-------- src/automlst/cli/meta.py | 2 + src/automlst/cli/profile.py | 55 ------------------- src/automlst/cli/program.py | 7 ++- src/automlst/engine/local/csv.py | 2 +- .../engine/remote/databases/bigsdb.py | 16 ++++-- 9 files changed, 107 insertions(+), 87 deletions(-) create mode 100644 src/automlst/cli/exactmatch.py create mode 100644 src/automlst/cli/meta.py delete mode 100644 src/automlst/cli/profile.py diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index ce3acb4..c58d0e4 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -12,7 +12,14 @@ // "forwardPorts": [], // Use 'postCreateCommand' to run commands after the container is created. - "postCreateCommand": "pip3 install --user -r requirements.txt" + "postCreateCommand": "pip3 install --user -r requirements.txt", + "customizations": { + "vscode": { + "extensions": [ + "mechatroner.rainbow-csv" + ] + } + } // Configure tool-specific properties. // "customizations": {}, diff --git a/.vscode/launch.json b/.vscode/launch.json index 15a17f7..308d5b0 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -6,12 +6,16 @@ "configurations": [ { - "name": "CLI blank", + "name": "automlst info -lschema pubmlst_bordetella_seqdef", "type": "debugpy", "request": "launch", "program": "${workspaceFolder}/src/automlst/cli/program.py", "console": "integratedTerminal", - "args": [], + "args": [ + "info", + "-lschemas", + "pubmlst_bordetella_seqdef" + ], "cwd": "${workspaceFolder}/src", "env": { "PYTHONPATH": "${workspaceFolder}/src" diff --git a/src/automlst/cli/exactmatch.py b/src/automlst/cli/exactmatch.py new file mode 100644 index 0000000..304c75f --- /dev/null +++ b/src/automlst/cli/exactmatch.py @@ -0,0 +1,48 @@ + +from argparse import ArgumentParser +import asyncio +import datetime +from automlst.engine.local.csv import write_mlst_profiles_as_csv +from automlst.engine.local.fasta import read_multiple_fastas +from automlst.engine.remote.databases.bigsdb import BIGSdbIndex + + +def setup_parser(parser: ArgumentParser): + parser.description = "Returns MLST exact profile matches." + parser.add_argument( + "fastas", + nargs="+", + action='extend', + default=[], + type=str, + help="The FASTA files to process. Multiple can be listed." + ) + + parser.add_argument( + "seqdefdb", + help="The BIGSdb seqdef database to use for typing." + ) + + parser.add_argument( + "schema", + type=int, + help="The BIGSdb seqdef database schema ID (integer) to use for typing." + ) + + parser.add_argument( + "out", + default=f'./{datetime.datetime.now().strftime(r"%Y%m%d%H%M%S")}', + help="The output CSV name (.csv will be appended)." + ) + parser.set_defaults(func=run_asynchronously) + +async def run(args): + async with BIGSdbIndex() as bigsdb_index: + gen_strings = read_multiple_fastas(args.fastas) + async with await bigsdb_index.build_profiler_from_seqdefdb(args.seqdefdb, args.schema) as mlst_profiler: + mlst_profiles = mlst_profiler.profile_multiple_strings(gen_strings) + await write_mlst_profiles_as_csv(mlst_profiles, args.out) + +def run_asynchronously(args): + asyncio.run(run(args)) + diff --git a/src/automlst/cli/info.py b/src/automlst/cli/info.py index 26cd65f..20a4e9b 100644 --- a/src/automlst/cli/info.py +++ b/src/automlst/cli/info.py @@ -1,36 +1,38 @@ +from argparse import ArgumentParser import asyncio -from automlst.cli import program from automlst.engine.remote.databases.bigsdb import BIGSdbIndex -parser = program.subparsers.add_parser(__name__) +def setup_parser(parser: ArgumentParser): + parser.description = "Fetches the latest BIGSdb MLST database definitions." + parser.usage = "test" + parser.add_argument( + "--retrieve-bigsdbs", "-l", + action="store_true", + dest="list_dbs", + required=False, + default=False, + help="Lists all known BIGSdb MLST databases (fetched from known APIs and cached)." + ) -parser.add_argument( - "--retrieve-bigsdbs", "-l", - action="store_true", - dest="list_dbs", - required=False, - default=False, - type=bool, - help="Lists all known BIGSdb MLST databases (fetched from known APIs and cached)." -) + parser.add_argument( + "--retrieve-bigsdb-schemas", "-lschemas", + nargs="+", + action="extend", + dest="list_bigsdb_schemas", + required=False, + default=[], + type=str, + help="Lists the known schema IDs for a given BIGSdb sequence definition database name. The name, and then the ID of the schema is given." + ) -parser.add_argument( - "--retrieve-bigsdb-schemas", "-lschemas", - nargs="+", - action="extend", - dest="list_bigsdb_schemas", - required=False, - default=[], - type=str, - help="Lists the known schema IDs for a given BIGSdb sequence definition database name" -) + parser.set_defaults(func=run_asynchronously) async def run(args): async with BIGSdbIndex() as bigsdb_index: if args.list_dbs: known_seqdef_dbs = await bigsdb_index.get_known_seqdef_dbs(force=False) - print(", ".join(known_seqdef_dbs.keys())) + print("\n".join(known_seqdef_dbs.keys())) for bigsdb_schema_name in args.list_bigsdb_schemas: schemas = await bigsdb_index.get_schemas_for_seqdefdb(bigsdb_schema_name) @@ -40,4 +42,3 @@ async def run(args): def run_asynchronously(args): asyncio.run(run(args)) -parser.set_defaults(func=run_asynchronously) diff --git a/src/automlst/cli/meta.py b/src/automlst/cli/meta.py new file mode 100644 index 0000000..d45096d --- /dev/null +++ b/src/automlst/cli/meta.py @@ -0,0 +1,2 @@ +def get_module_base_name(name): + return name.split(".")[-1] diff --git a/src/automlst/cli/profile.py b/src/automlst/cli/profile.py deleted file mode 100644 index 1abc966..0000000 --- a/src/automlst/cli/profile.py +++ /dev/null @@ -1,55 +0,0 @@ - -import asyncio -import datetime -from typing import Any, AsyncGenerator, AsyncIterable, Iterable, Sequence, Union -from automlst.cli import program -from automlst.engine.data.genomics import NamedString -from automlst.engine.data.mlst import MLSTProfile -from automlst.engine.local.abif import read_abif, reference_consensus_assembly -from automlst.engine.local.csv import write_mlst_profiles_as_csv -from automlst.engine.local.fasta import read_fasta, read_multiple_fastas -from automlst.engine.remote.databases.bigsdb import BIGSdbIndex, BigSDBMLSTProfiler - - -parser = program.subparsers.add_parser(__name__) - -parser.add_argument( - "--fasta", "-fa", "-fst", - nargs="+", - action='extend', - dest="fastas", - required=False, - default=[], - type=str, - help="The FASTA files to process. Multiple can be listed." -) - -parser.add_argument( - "seqdefdb", - help="The BIGSdb seqdef database to use for typing." -) - -parser.add_argument( - "schema", - type=int, - help="The BIGSdb seqdef database schema ID (integer) to use for typing." -) - -parser.add_argument( - "out", - default=f'./{datetime.datetime.now().strftime(r"%Y%m%d%H%M%S")}', - help="The output CSV name (.csv will be appended)." -) - - -async def run(args): - async with BIGSdbIndex() as bigsdb_index: - gen_strings = read_multiple_fastas(args.fastas) - async with await bigsdb_index.build_profiler_from_seqdefdb(args.seqdefdb, args.schema) as mlst_profiler: - mlst_profiles = mlst_profiler.profile_multiple_strings(gen_strings) - await write_mlst_profiles_as_csv(mlst_profiles, args.out) - -def run_asynchronously(args): - asyncio.run(run(args)) - -parser.set_defaults(func=run_asynchronously) diff --git a/src/automlst/cli/program.py b/src/automlst/cli/program.py index e3e7a5e..0357144 100644 --- a/src/automlst/cli/program.py +++ b/src/automlst/cli/program.py @@ -4,6 +4,8 @@ import datetime from os import path import os +from automlst.cli import exactmatch, info +from automlst.cli.meta import get_module_base_name from automlst.engine.data.genomics import NamedString from automlst.engine.local.abif import read_abif from automlst.engine.local.csv import write_mlst_profiles_as_csv @@ -13,10 +15,13 @@ from automlst.engine.remote.databases.bigsdb import BIGSdbIndex root_parser = argparse.ArgumentParser() subparsers = root_parser.add_subparsers(required=True) +info.setup_parser(subparsers.add_parser(get_module_base_name(info.__name__))) +exactmatch.setup_parser(subparsers.add_parser(get_module_base_name(exactmatch.__name__))) + + def run(): args = root_parser.parse_args() args.func(args) - if __name__ == "__main__": run() \ No newline at end of file diff --git a/src/automlst/engine/local/csv.py b/src/automlst/engine/local/csv.py index 2d88304..2340b6a 100644 --- a/src/automlst/engine/local/csv.py +++ b/src/automlst/engine/local/csv.py @@ -21,7 +21,7 @@ async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[tuple writer: Union[csv.DictWriter, None] = None async for name, mlst_profile in mlst_profiles_iterable: if writer is None: - header = ["st", "clonal-complex", "id", *mlst_profile.alleles.keys()] + header = ["id", "st", "clonal-complex", *mlst_profile.alleles.keys()] writer = csv.DictWriter(filehandle, fieldnames=header) writer.writeheader() row_dictionary = { diff --git a/src/automlst/engine/remote/databases/bigsdb.py b/src/automlst/engine/remote/databases/bigsdb.py index c2db02a..d523b22 100644 --- a/src/automlst/engine/remote/databases/bigsdb.py +++ b/src/automlst/engine/remote/databases/bigsdb.py @@ -10,7 +10,9 @@ from automlst.engine.data.mlst import Allele, MLSTProfile class BigSDBMLSTProfiler(AbstractAsyncContextManager): def __init__(self, database_api: str, database_name: str, schema_id: int): - self._base_url = f"{database_api}/db/{database_name}/schemes/{schema_id}/" + self._database_name = database_name + self._schema_id = schema_id + self._base_url = f"{database_api}/db/{self._database_name}/schemes/{self._schema_id}/" self._http_client = ClientSession(self._base_url, timeout=ClientTimeout(10000)) async def __aenter__(self): @@ -26,6 +28,9 @@ class BigSDBMLSTProfiler(AbstractAsyncContextManager): if "exact_matches" not in sequence_response: # TODO throw exception for not finding matches. pass + + if "exact_matches" not in sequence_response: + raise ValueError(f"Unable to find exact matches in \"{self._database_name}\" under schema ID \"{self._schema_id}\".") exact_matches: dict[str, Sequence[dict[str, str]]] = sequence_response["exact_matches"] for allele_loci, alleles in exact_matches.items(): for allele in alleles: @@ -100,12 +105,15 @@ class BIGSdbIndex(AbstractAsyncContextManager): return self._known_seqdef_dbs_origin async def get_bigsdb_api_from_seqdefdb(self, seqdef_db_name: str) -> str: - return (await self.get_known_seqdef_dbs())[seqdef_db_name] + known_databases = await self.get_known_seqdef_dbs() + if seqdef_db_name not in known_databases: + raise ValueError(f"The database \"{seqdef_db_name}\" could not be found.") + return known_databases[seqdef_db_name] async def get_schemas_for_seqdefdb(self, seqdef_db_name: str, force: bool = False) -> Mapping[str, int]: - if self._seqdefdb_schemas[seqdef_db_name] is not None and not force: + if seqdef_db_name in self._seqdefdb_schemas and not force: return self._seqdefdb_schemas[seqdef_db_name] # type: ignore since it's guaranteed to not be none by conditional - uri_path = f"{await self.get_bigsdb_api_from_seqdefdb(seqdef_db_name)}/{seqdef_db_name}/schemes" + uri_path = f"{await self.get_bigsdb_api_from_seqdefdb(seqdef_db_name)}/db/{seqdef_db_name}/schemes" async with self._http_client.get(uri_path) as response: response_json = await response.json() schema_descriptions: Mapping[str, int] = dict()