Prepared software for more analysis options

2025-01-03 20:42:30 +00:00
parent 580e96c1c6
commit 0a37bb5176
4 changed files with 27 additions and 14 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -9,15 +9,14 @@
            "name": "Python Debugger: Current File with Arguments",
            "type": "debugpy",
            "request": "launch",
-            "program": "${workspaceFolder}/src/nsbdiagnosistoolkit/cli/program.py",
+            "program": "${workspaceFolder}/src/nsbdiagnosistoolkit/cli/root.py",
            "console": "integratedTerminal",
            "args": [
                "-fa",
                "${workspaceFolder}/tests/resources/tohama_I_bpertussis.fasta",
                "-ipdbmlst",
                "pubmlst_bordetella_seqdef",
-                "-csv",
-                "${workspaceFolder}/output.csv"
+                "${workspaceFolder}/output"
            ],
            "cwd": "${workspaceFolder}/src",
            "env": {
--- a/output/MLST_20250103204147.csv
+++ b/output/MLST_20250103204147.csv
@@ -0,0 +1,2 @@
+st,clonal-complex,fumC,pepA,icd,adk,pgm,tyrB,glyA
+1,ST-2 complex,['1'],['1'],['1'],['1'],['1'],['1'],['1']
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,8 +13,8 @@ requires-python = ">=3.11"
 description = "A tool to rapidly fetch fetch MLST profiles given sequences for various diseases."

 [project.scripts]
-nsbdiagnosistoolkit = "nsbdiagnosistoolkit.cli.program:cli"
-nsbdiagtk = "nsbdiagnosistoolkit.cli.program:cli"
+nsbdiagnosistoolkit = "nsbdiagnosistoolkit.cli.root:cli"
+nsbdiagtk = "nsbdiagnosistoolkit.cli.root:cli"

 [tool.pyright]
 extraPaths = ["src"]
--- a/src/nsbdiagnosistoolkit/cli/program.py
+++ b/src/nsbdiagnosistoolkit/cli/program.py
@@ -1,6 +1,8 @@
 import argparse
 import asyncio
+import datetime
 from os import path
+import os

 from nsbdiagnosistoolkit.cli import aggregator
 from nsbdiagnosistoolkit.engine.data.genomics import NamedString
@@ -10,6 +12,14 @@ from nsbdiagnosistoolkit.engine.local.fasta import read_fasta


 parser = argparse.ArgumentParser()
+parser.add_argument(
+    "--run-name", "-name",
+    dest="run_name",
+    required=False,
+    default=datetime.datetime.now().strftime(r"%Y%m%d%H%M%S"),
+    type=str,
+    help="The name of the run. Will use a date and time string if not provided."
+)
 parser.add_argument(
    "--fasta", "-fa", "-fst",
    nargs="+",
@@ -33,25 +43,27 @@ parser.add_argument(
    "--institut-pasteur-mlst",
    "-ipdbmlst",
    dest="institut_pasteur_db",
+    required=False,
+    default=None,
    type=str,
    help="The Institut Pasteur MLST database to use."
 )
 parser.add_argument(
-    "-csv",
-    dest="csv_path",
-    required=False,
-    default=None,
-    help="The destination to place the CSV output."
+    "out",
+    default="./.",
+    help="The output folder. Files will be named by the provided (or default) run name."
 )


 def cli():
    args = parser.parse_args()
    gen_strings = aggregator.aggregate_sequences(args.fastas, args.abifs)
+    os.makedirs(args.out)
+    if args.institut_pasteur_db is not None:
        mlst_profiles = aggregator.profile_all_genetic_strings(
            gen_strings, args.institut_pasteur_db)
        asyncio.run(write_mlst_profiles_as_csv(
-        asyncio.run(mlst_profiles), str(args.csv_path)))
+            asyncio.run(mlst_profiles), str(path.join(args.out, "MLST_" + args.run_name + ".csv"))))


 if __name__ == "__main__":