Completed updating CLI to be more organized with bettert error messages
This commit is contained in:
parent
42d0f56b18
commit
9b8e448512
@ -12,7 +12,14 @@
|
|||||||
// "forwardPorts": [],
|
// "forwardPorts": [],
|
||||||
|
|
||||||
// Use 'postCreateCommand' to run commands after the container is created.
|
// Use 'postCreateCommand' to run commands after the container is created.
|
||||||
"postCreateCommand": "pip3 install --user -r requirements.txt"
|
"postCreateCommand": "pip3 install --user -r requirements.txt",
|
||||||
|
"customizations": {
|
||||||
|
"vscode": {
|
||||||
|
"extensions": [
|
||||||
|
"mechatroner.rainbow-csv"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Configure tool-specific properties.
|
// Configure tool-specific properties.
|
||||||
// "customizations": {},
|
// "customizations": {},
|
||||||
|
8
.vscode/launch.json
vendored
8
.vscode/launch.json
vendored
@ -6,12 +6,16 @@
|
|||||||
"configurations": [
|
"configurations": [
|
||||||
|
|
||||||
{
|
{
|
||||||
"name": "CLI blank",
|
"name": "automlst info -lschema pubmlst_bordetella_seqdef",
|
||||||
"type": "debugpy",
|
"type": "debugpy",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"program": "${workspaceFolder}/src/automlst/cli/program.py",
|
"program": "${workspaceFolder}/src/automlst/cli/program.py",
|
||||||
"console": "integratedTerminal",
|
"console": "integratedTerminal",
|
||||||
"args": [],
|
"args": [
|
||||||
|
"info",
|
||||||
|
"-lschemas",
|
||||||
|
"pubmlst_bordetella_seqdef"
|
||||||
|
],
|
||||||
"cwd": "${workspaceFolder}/src",
|
"cwd": "${workspaceFolder}/src",
|
||||||
"env": {
|
"env": {
|
||||||
"PYTHONPATH": "${workspaceFolder}/src"
|
"PYTHONPATH": "${workspaceFolder}/src"
|
||||||
|
48
src/automlst/cli/exactmatch.py
Normal file
48
src/automlst/cli/exactmatch.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
|
||||||
|
from argparse import ArgumentParser
|
||||||
|
import asyncio
|
||||||
|
import datetime
|
||||||
|
from automlst.engine.local.csv import write_mlst_profiles_as_csv
|
||||||
|
from automlst.engine.local.fasta import read_multiple_fastas
|
||||||
|
from automlst.engine.remote.databases.bigsdb import BIGSdbIndex
|
||||||
|
|
||||||
|
|
||||||
|
def setup_parser(parser: ArgumentParser):
|
||||||
|
parser.description = "Returns MLST exact profile matches."
|
||||||
|
parser.add_argument(
|
||||||
|
"fastas",
|
||||||
|
nargs="+",
|
||||||
|
action='extend',
|
||||||
|
default=[],
|
||||||
|
type=str,
|
||||||
|
help="The FASTA files to process. Multiple can be listed."
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"seqdefdb",
|
||||||
|
help="The BIGSdb seqdef database to use for typing."
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"schema",
|
||||||
|
type=int,
|
||||||
|
help="The BIGSdb seqdef database schema ID (integer) to use for typing."
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"out",
|
||||||
|
default=f'./{datetime.datetime.now().strftime(r"%Y%m%d%H%M%S")}',
|
||||||
|
help="The output CSV name (.csv will be appended)."
|
||||||
|
)
|
||||||
|
parser.set_defaults(func=run_asynchronously)
|
||||||
|
|
||||||
|
async def run(args):
|
||||||
|
async with BIGSdbIndex() as bigsdb_index:
|
||||||
|
gen_strings = read_multiple_fastas(args.fastas)
|
||||||
|
async with await bigsdb_index.build_profiler_from_seqdefdb(args.seqdefdb, args.schema) as mlst_profiler:
|
||||||
|
mlst_profiles = mlst_profiler.profile_multiple_strings(gen_strings)
|
||||||
|
await write_mlst_profiles_as_csv(mlst_profiles, args.out)
|
||||||
|
|
||||||
|
def run_asynchronously(args):
|
||||||
|
asyncio.run(run(args))
|
||||||
|
|
@ -1,36 +1,38 @@
|
|||||||
|
from argparse import ArgumentParser
|
||||||
import asyncio
|
import asyncio
|
||||||
from automlst.cli import program
|
|
||||||
from automlst.engine.remote.databases.bigsdb import BIGSdbIndex
|
from automlst.engine.remote.databases.bigsdb import BIGSdbIndex
|
||||||
|
|
||||||
|
|
||||||
parser = program.subparsers.add_parser(__name__)
|
def setup_parser(parser: ArgumentParser):
|
||||||
|
parser.description = "Fetches the latest BIGSdb MLST database definitions."
|
||||||
|
parser.usage = "test"
|
||||||
|
parser.add_argument(
|
||||||
|
"--retrieve-bigsdbs", "-l",
|
||||||
|
action="store_true",
|
||||||
|
dest="list_dbs",
|
||||||
|
required=False,
|
||||||
|
default=False,
|
||||||
|
help="Lists all known BIGSdb MLST databases (fetched from known APIs and cached)."
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--retrieve-bigsdbs", "-l",
|
"--retrieve-bigsdb-schemas", "-lschemas",
|
||||||
action="store_true",
|
nargs="+",
|
||||||
dest="list_dbs",
|
action="extend",
|
||||||
required=False,
|
dest="list_bigsdb_schemas",
|
||||||
default=False,
|
required=False,
|
||||||
type=bool,
|
default=[],
|
||||||
help="Lists all known BIGSdb MLST databases (fetched from known APIs and cached)."
|
type=str,
|
||||||
)
|
help="Lists the known schema IDs for a given BIGSdb sequence definition database name. The name, and then the ID of the schema is given."
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.set_defaults(func=run_asynchronously)
|
||||||
"--retrieve-bigsdb-schemas", "-lschemas",
|
|
||||||
nargs="+",
|
|
||||||
action="extend",
|
|
||||||
dest="list_bigsdb_schemas",
|
|
||||||
required=False,
|
|
||||||
default=[],
|
|
||||||
type=str,
|
|
||||||
help="Lists the known schema IDs for a given BIGSdb sequence definition database name"
|
|
||||||
)
|
|
||||||
|
|
||||||
async def run(args):
|
async def run(args):
|
||||||
async with BIGSdbIndex() as bigsdb_index:
|
async with BIGSdbIndex() as bigsdb_index:
|
||||||
if args.list_dbs:
|
if args.list_dbs:
|
||||||
known_seqdef_dbs = await bigsdb_index.get_known_seqdef_dbs(force=False)
|
known_seqdef_dbs = await bigsdb_index.get_known_seqdef_dbs(force=False)
|
||||||
print(", ".join(known_seqdef_dbs.keys()))
|
print("\n".join(known_seqdef_dbs.keys()))
|
||||||
|
|
||||||
for bigsdb_schema_name in args.list_bigsdb_schemas:
|
for bigsdb_schema_name in args.list_bigsdb_schemas:
|
||||||
schemas = await bigsdb_index.get_schemas_for_seqdefdb(bigsdb_schema_name)
|
schemas = await bigsdb_index.get_schemas_for_seqdefdb(bigsdb_schema_name)
|
||||||
@ -40,4 +42,3 @@ async def run(args):
|
|||||||
def run_asynchronously(args):
|
def run_asynchronously(args):
|
||||||
asyncio.run(run(args))
|
asyncio.run(run(args))
|
||||||
|
|
||||||
parser.set_defaults(func=run_asynchronously)
|
|
||||||
|
2
src/automlst/cli/meta.py
Normal file
2
src/automlst/cli/meta.py
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
def get_module_base_name(name):
|
||||||
|
return name.split(".")[-1]
|
@ -1,55 +0,0 @@
|
|||||||
|
|
||||||
import asyncio
|
|
||||||
import datetime
|
|
||||||
from typing import Any, AsyncGenerator, AsyncIterable, Iterable, Sequence, Union
|
|
||||||
from automlst.cli import program
|
|
||||||
from automlst.engine.data.genomics import NamedString
|
|
||||||
from automlst.engine.data.mlst import MLSTProfile
|
|
||||||
from automlst.engine.local.abif import read_abif, reference_consensus_assembly
|
|
||||||
from automlst.engine.local.csv import write_mlst_profiles_as_csv
|
|
||||||
from automlst.engine.local.fasta import read_fasta, read_multiple_fastas
|
|
||||||
from automlst.engine.remote.databases.bigsdb import BIGSdbIndex, BigSDBMLSTProfiler
|
|
||||||
|
|
||||||
|
|
||||||
parser = program.subparsers.add_parser(__name__)
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"--fasta", "-fa", "-fst",
|
|
||||||
nargs="+",
|
|
||||||
action='extend',
|
|
||||||
dest="fastas",
|
|
||||||
required=False,
|
|
||||||
default=[],
|
|
||||||
type=str,
|
|
||||||
help="The FASTA files to process. Multiple can be listed."
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"seqdefdb",
|
|
||||||
help="The BIGSdb seqdef database to use for typing."
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"schema",
|
|
||||||
type=int,
|
|
||||||
help="The BIGSdb seqdef database schema ID (integer) to use for typing."
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"out",
|
|
||||||
default=f'./{datetime.datetime.now().strftime(r"%Y%m%d%H%M%S")}',
|
|
||||||
help="The output CSV name (.csv will be appended)."
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
async def run(args):
|
|
||||||
async with BIGSdbIndex() as bigsdb_index:
|
|
||||||
gen_strings = read_multiple_fastas(args.fastas)
|
|
||||||
async with await bigsdb_index.build_profiler_from_seqdefdb(args.seqdefdb, args.schema) as mlst_profiler:
|
|
||||||
mlst_profiles = mlst_profiler.profile_multiple_strings(gen_strings)
|
|
||||||
await write_mlst_profiles_as_csv(mlst_profiles, args.out)
|
|
||||||
|
|
||||||
def run_asynchronously(args):
|
|
||||||
asyncio.run(run(args))
|
|
||||||
|
|
||||||
parser.set_defaults(func=run_asynchronously)
|
|
@ -4,6 +4,8 @@ import datetime
|
|||||||
from os import path
|
from os import path
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
from automlst.cli import exactmatch, info
|
||||||
|
from automlst.cli.meta import get_module_base_name
|
||||||
from automlst.engine.data.genomics import NamedString
|
from automlst.engine.data.genomics import NamedString
|
||||||
from automlst.engine.local.abif import read_abif
|
from automlst.engine.local.abif import read_abif
|
||||||
from automlst.engine.local.csv import write_mlst_profiles_as_csv
|
from automlst.engine.local.csv import write_mlst_profiles_as_csv
|
||||||
@ -13,10 +15,13 @@ from automlst.engine.remote.databases.bigsdb import BIGSdbIndex
|
|||||||
root_parser = argparse.ArgumentParser()
|
root_parser = argparse.ArgumentParser()
|
||||||
subparsers = root_parser.add_subparsers(required=True)
|
subparsers = root_parser.add_subparsers(required=True)
|
||||||
|
|
||||||
|
info.setup_parser(subparsers.add_parser(get_module_base_name(info.__name__)))
|
||||||
|
exactmatch.setup_parser(subparsers.add_parser(get_module_base_name(exactmatch.__name__)))
|
||||||
|
|
||||||
|
|
||||||
def run():
|
def run():
|
||||||
args = root_parser.parse_args()
|
args = root_parser.parse_args()
|
||||||
args.func(args)
|
args.func(args)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
run()
|
run()
|
@ -21,7 +21,7 @@ async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[tuple
|
|||||||
writer: Union[csv.DictWriter, None] = None
|
writer: Union[csv.DictWriter, None] = None
|
||||||
async for name, mlst_profile in mlst_profiles_iterable:
|
async for name, mlst_profile in mlst_profiles_iterable:
|
||||||
if writer is None:
|
if writer is None:
|
||||||
header = ["st", "clonal-complex", "id", *mlst_profile.alleles.keys()]
|
header = ["id", "st", "clonal-complex", *mlst_profile.alleles.keys()]
|
||||||
writer = csv.DictWriter(filehandle, fieldnames=header)
|
writer = csv.DictWriter(filehandle, fieldnames=header)
|
||||||
writer.writeheader()
|
writer.writeheader()
|
||||||
row_dictionary = {
|
row_dictionary = {
|
||||||
|
@ -10,7 +10,9 @@ from automlst.engine.data.mlst import Allele, MLSTProfile
|
|||||||
class BigSDBMLSTProfiler(AbstractAsyncContextManager):
|
class BigSDBMLSTProfiler(AbstractAsyncContextManager):
|
||||||
|
|
||||||
def __init__(self, database_api: str, database_name: str, schema_id: int):
|
def __init__(self, database_api: str, database_name: str, schema_id: int):
|
||||||
self._base_url = f"{database_api}/db/{database_name}/schemes/{schema_id}/"
|
self._database_name = database_name
|
||||||
|
self._schema_id = schema_id
|
||||||
|
self._base_url = f"{database_api}/db/{self._database_name}/schemes/{self._schema_id}/"
|
||||||
self._http_client = ClientSession(self._base_url, timeout=ClientTimeout(10000))
|
self._http_client = ClientSession(self._base_url, timeout=ClientTimeout(10000))
|
||||||
|
|
||||||
async def __aenter__(self):
|
async def __aenter__(self):
|
||||||
@ -26,6 +28,9 @@ class BigSDBMLSTProfiler(AbstractAsyncContextManager):
|
|||||||
if "exact_matches" not in sequence_response:
|
if "exact_matches" not in sequence_response:
|
||||||
# TODO throw exception for not finding matches.
|
# TODO throw exception for not finding matches.
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
if "exact_matches" not in sequence_response:
|
||||||
|
raise ValueError(f"Unable to find exact matches in \"{self._database_name}\" under schema ID \"{self._schema_id}\".")
|
||||||
exact_matches: dict[str, Sequence[dict[str, str]]] = sequence_response["exact_matches"]
|
exact_matches: dict[str, Sequence[dict[str, str]]] = sequence_response["exact_matches"]
|
||||||
for allele_loci, alleles in exact_matches.items():
|
for allele_loci, alleles in exact_matches.items():
|
||||||
for allele in alleles:
|
for allele in alleles:
|
||||||
@ -100,12 +105,15 @@ class BIGSdbIndex(AbstractAsyncContextManager):
|
|||||||
return self._known_seqdef_dbs_origin
|
return self._known_seqdef_dbs_origin
|
||||||
|
|
||||||
async def get_bigsdb_api_from_seqdefdb(self, seqdef_db_name: str) -> str:
|
async def get_bigsdb_api_from_seqdefdb(self, seqdef_db_name: str) -> str:
|
||||||
return (await self.get_known_seqdef_dbs())[seqdef_db_name]
|
known_databases = await self.get_known_seqdef_dbs()
|
||||||
|
if seqdef_db_name not in known_databases:
|
||||||
|
raise ValueError(f"The database \"{seqdef_db_name}\" could not be found.")
|
||||||
|
return known_databases[seqdef_db_name]
|
||||||
|
|
||||||
async def get_schemas_for_seqdefdb(self, seqdef_db_name: str, force: bool = False) -> Mapping[str, int]:
|
async def get_schemas_for_seqdefdb(self, seqdef_db_name: str, force: bool = False) -> Mapping[str, int]:
|
||||||
if self._seqdefdb_schemas[seqdef_db_name] is not None and not force:
|
if seqdef_db_name in self._seqdefdb_schemas and not force:
|
||||||
return self._seqdefdb_schemas[seqdef_db_name] # type: ignore since it's guaranteed to not be none by conditional
|
return self._seqdefdb_schemas[seqdef_db_name] # type: ignore since it's guaranteed to not be none by conditional
|
||||||
uri_path = f"{await self.get_bigsdb_api_from_seqdefdb(seqdef_db_name)}/{seqdef_db_name}/schemes"
|
uri_path = f"{await self.get_bigsdb_api_from_seqdefdb(seqdef_db_name)}/db/{seqdef_db_name}/schemes"
|
||||||
async with self._http_client.get(uri_path) as response:
|
async with self._http_client.get(uri_path) as response:
|
||||||
response_json = await response.json()
|
response_json = await response.json()
|
||||||
schema_descriptions: Mapping[str, int] = dict()
|
schema_descriptions: Mapping[str, int] = dict()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user