Initial commit transfering files over from original automlst.engine project

2025-01-10 21:15:59 +00:00
commit 3b2cf916fd
11 changed files with 488 additions and 0 deletions
--- a/src/automlst/cli/init.py
+++ b/src/automlst/cli/init.py
--- a/src/automlst/cli/info.py
+++ b/src/automlst/cli/info.py
@@ -0,0 +1,42 @@
+from argparse import ArgumentParser
+import asyncio
+from automlst.engine.remote.databases.bigsdb import BIGSdbIndex
+
+def setup_parser(parser: ArgumentParser):
+    parser.description = "Fetches the latest BIGSdb MLST database definitions."
+    parser.add_argument(
+        "--retrieve-bigsdbs", "-l",
+        action="store_true",
+        dest="list_dbs",
+        required=False,
+        default=False,
+        help="Lists all known BIGSdb MLST databases (fetched from known APIs and cached)."
+    )
+
+    parser.add_argument(
+        "--retrieve-bigsdb-schemas", "-lschemas",
+        nargs="+",
+        action="extend",
+        dest="list_bigsdb_schemas",
+        required=False,
+        default=[],
+        type=str,
+        help="Lists the known schema IDs for a given BIGSdb sequence definition database name. The name, and then the ID of the schema is given."
+    )
+
+    parser.set_defaults(func=run_asynchronously)
+
+async def run(args):
+    async with BIGSdbIndex() as bigsdb_index:
+        if args.list_dbs:
+            known_seqdef_dbs = await bigsdb_index.get_known_seqdef_dbs(force=False)
+            print("\n".join(known_seqdef_dbs.keys()))
+
+        for bigsdb_schema_name in args.list_bigsdb_schemas:
+            schemas = await bigsdb_index.get_schemas_for_seqdefdb(bigsdb_schema_name)
+            for schema_desc, schema_id in schemas.items():
+                print(f"{schema_desc}: {schema_id}")
+
+def run_asynchronously(args):
+    asyncio.run(run(args))
+
--- a/src/automlst/cli/meta.py
+++ b/src/automlst/cli/meta.py
@@ -0,0 +1,2 @@
+def get_module_base_name(name):
+    return name.split(".")[-1]
--- a/src/automlst/cli/program.py
+++ b/src/automlst/cli/program.py
@@ -0,0 +1,27 @@
+import argparse
+import asyncio
+import datetime
+from os import path
+import os
+
+from automlst.cli import info, st
+from automlst.cli.meta import get_module_base_name
+from automlst.engine.data.genomics import NamedString
+from automlst.engine.local.abif import read_abif
+from automlst.engine.local.csv import write_mlst_profiles_as_csv
+from automlst.engine.local.fasta import read_fasta
+from automlst.engine.remote.databases.bigsdb import BIGSdbIndex
+
+root_parser = argparse.ArgumentParser()
+subparsers = root_parser.add_subparsers(required=True)
+
+info.setup_parser(subparsers.add_parser(get_module_base_name(info.__name__)))
+st.setup_parser(subparsers.add_parser(get_module_base_name(st.__name__)))
+
+
+def run():
+    args = root_parser.parse_args()
+    args.func(args)
+
+if __name__ == "__main__":
+    run()
--- a/src/automlst/cli/st.py
+++ b/src/automlst/cli/st.py
@@ -0,0 +1,69 @@
+
+from argparse import ArgumentParser
+import asyncio
+import datetime
+from automlst.engine.local.csv import write_mlst_profiles_as_csv
+from automlst.engine.local.fasta import read_multiple_fastas
+from automlst.engine.remote.databases.bigsdb import BIGSdbIndex
+
+
+def setup_parser(parser: ArgumentParser):
+    parser.description = "Returns MLST exact profile matches."
+    parser.add_argument(
+        "fastas",
+        nargs="+",
+        action='extend',
+        default=[],
+        type=str,
+        help="The FASTA files to process. Multiple can be listed."
+    )
+
+    parser.add_argument(
+        "seqdefdb",
+        help="The BIGSdb seqdef database to use for typing."
+    )
+
+    parser.add_argument(
+        "schema",
+        type=int,
+        help="The BIGSdb seqdef database schema ID (integer) to use for typing."
+    )
+
+    parser.add_argument(
+        "out",
+        default=f'./{datetime.datetime.now().strftime(r"%Y%m%d%H%M%S")}',
+        help="The output CSV name (.csv will be appended)."
+    )
+
+    parser.add_argument(
+        "--exact", "-ex",
+        action="store_true",
+        dest="exact",
+        required=False,
+        default=False,
+        help="Should run exact matching rather than returning all similar ones"
+    )
+
+    parser.add_argument(
+        "--stop-on-fail", "-sof",
+        action="store_true",
+        dest="stop_on_fail",
+        required=False,
+        default=False,
+        help="Should the algorithm stop in the case there are no matches (or partial matches when expecting exact matches)."
+    )
+    parser.set_defaults(func=run_asynchronously)
+
+async def run(args):
+    async with BIGSdbIndex() as bigsdb_index:
+        gen_strings = read_multiple_fastas(args.fastas)
+        async with await bigsdb_index.build_profiler_from_seqdefdb(args.seqdefdb, args.schema) as mlst_profiler:
+            mlst_profiles = mlst_profiler.profile_multiple_strings(gen_strings, exact=args.exact)
+            failed = await write_mlst_profiles_as_csv(mlst_profiles, args.out)
+            if len(failed) > 0:
+                print(f"A total of {len(failed)} IDs failed:\n{"\n".join(failed)}")
+            print(f"Completed fetching MLSTs for {len(args.fastas)} sequences.")
+
+def run_asynchronously(args):
+    asyncio.run(run(args))
+