Initial commit transfering files over from original automlst.engine project

This commit is contained in:
2025-01-10 21:15:59 +00:00
commit 3b2cf916fd
11 changed files with 488 additions and 0 deletions

View File

42
src/automlst/cli/info.py Normal file
View File

@@ -0,0 +1,42 @@
from argparse import ArgumentParser
import asyncio
from automlst.engine.remote.databases.bigsdb import BIGSdbIndex
def setup_parser(parser: ArgumentParser):
parser.description = "Fetches the latest BIGSdb MLST database definitions."
parser.add_argument(
"--retrieve-bigsdbs", "-l",
action="store_true",
dest="list_dbs",
required=False,
default=False,
help="Lists all known BIGSdb MLST databases (fetched from known APIs and cached)."
)
parser.add_argument(
"--retrieve-bigsdb-schemas", "-lschemas",
nargs="+",
action="extend",
dest="list_bigsdb_schemas",
required=False,
default=[],
type=str,
help="Lists the known schema IDs for a given BIGSdb sequence definition database name. The name, and then the ID of the schema is given."
)
parser.set_defaults(func=run_asynchronously)
async def run(args):
async with BIGSdbIndex() as bigsdb_index:
if args.list_dbs:
known_seqdef_dbs = await bigsdb_index.get_known_seqdef_dbs(force=False)
print("\n".join(known_seqdef_dbs.keys()))
for bigsdb_schema_name in args.list_bigsdb_schemas:
schemas = await bigsdb_index.get_schemas_for_seqdefdb(bigsdb_schema_name)
for schema_desc, schema_id in schemas.items():
print(f"{schema_desc}: {schema_id}")
def run_asynchronously(args):
asyncio.run(run(args))

2
src/automlst/cli/meta.py Normal file
View File

@@ -0,0 +1,2 @@
def get_module_base_name(name):
return name.split(".")[-1]

View File

@@ -0,0 +1,27 @@
import argparse
import asyncio
import datetime
from os import path
import os
from automlst.cli import info, st
from automlst.cli.meta import get_module_base_name
from automlst.engine.data.genomics import NamedString
from automlst.engine.local.abif import read_abif
from automlst.engine.local.csv import write_mlst_profiles_as_csv
from automlst.engine.local.fasta import read_fasta
from automlst.engine.remote.databases.bigsdb import BIGSdbIndex
root_parser = argparse.ArgumentParser()
subparsers = root_parser.add_subparsers(required=True)
info.setup_parser(subparsers.add_parser(get_module_base_name(info.__name__)))
st.setup_parser(subparsers.add_parser(get_module_base_name(st.__name__)))
def run():
args = root_parser.parse_args()
args.func(args)
if __name__ == "__main__":
run()

69
src/automlst/cli/st.py Normal file
View File

@@ -0,0 +1,69 @@
from argparse import ArgumentParser
import asyncio
import datetime
from automlst.engine.local.csv import write_mlst_profiles_as_csv
from automlst.engine.local.fasta import read_multiple_fastas
from automlst.engine.remote.databases.bigsdb import BIGSdbIndex
def setup_parser(parser: ArgumentParser):
parser.description = "Returns MLST exact profile matches."
parser.add_argument(
"fastas",
nargs="+",
action='extend',
default=[],
type=str,
help="The FASTA files to process. Multiple can be listed."
)
parser.add_argument(
"seqdefdb",
help="The BIGSdb seqdef database to use for typing."
)
parser.add_argument(
"schema",
type=int,
help="The BIGSdb seqdef database schema ID (integer) to use for typing."
)
parser.add_argument(
"out",
default=f'./{datetime.datetime.now().strftime(r"%Y%m%d%H%M%S")}',
help="The output CSV name (.csv will be appended)."
)
parser.add_argument(
"--exact", "-ex",
action="store_true",
dest="exact",
required=False,
default=False,
help="Should run exact matching rather than returning all similar ones"
)
parser.add_argument(
"--stop-on-fail", "-sof",
action="store_true",
dest="stop_on_fail",
required=False,
default=False,
help="Should the algorithm stop in the case there are no matches (or partial matches when expecting exact matches)."
)
parser.set_defaults(func=run_asynchronously)
async def run(args):
async with BIGSdbIndex() as bigsdb_index:
gen_strings = read_multiple_fastas(args.fastas)
async with await bigsdb_index.build_profiler_from_seqdefdb(args.seqdefdb, args.schema) as mlst_profiler:
mlst_profiles = mlst_profiler.profile_multiple_strings(gen_strings, exact=args.exact)
failed = await write_mlst_profiles_as_csv(mlst_profiles, args.out)
if len(failed) > 0:
print(f"A total of {len(failed)} IDs failed:\n{"\n".join(failed)}")
print(f"Completed fetching MLSTs for {len(args.fastas)} sequences.")
def run_asynchronously(args):
asyncio.run(run(args))