Prepared software for more analysis options
All checks were successful
NSBDiagnosisToolkit/pipeline/head This commit looks good

This commit is contained in:
Harrison Deng 2025-01-03 20:42:30 +00:00
parent 580e96c1c6
commit 0a37bb5176
4 changed files with 27 additions and 14 deletions

5
.vscode/launch.json vendored
View File

@ -9,15 +9,14 @@
"name": "Python Debugger: Current File with Arguments", "name": "Python Debugger: Current File with Arguments",
"type": "debugpy", "type": "debugpy",
"request": "launch", "request": "launch",
"program": "${workspaceFolder}/src/nsbdiagnosistoolkit/cli/program.py", "program": "${workspaceFolder}/src/nsbdiagnosistoolkit/cli/root.py",
"console": "integratedTerminal", "console": "integratedTerminal",
"args": [ "args": [
"-fa", "-fa",
"${workspaceFolder}/tests/resources/tohama_I_bpertussis.fasta", "${workspaceFolder}/tests/resources/tohama_I_bpertussis.fasta",
"-ipdbmlst", "-ipdbmlst",
"pubmlst_bordetella_seqdef", "pubmlst_bordetella_seqdef",
"-csv", "${workspaceFolder}/output"
"${workspaceFolder}/output.csv"
], ],
"cwd": "${workspaceFolder}/src", "cwd": "${workspaceFolder}/src",
"env": { "env": {

View File

@ -0,0 +1,2 @@
st,clonal-complex,fumC,pepA,icd,adk,pgm,tyrB,glyA
1,ST-2 complex,['1'],['1'],['1'],['1'],['1'],['1'],['1']
1 st clonal-complex fumC pepA icd adk pgm tyrB glyA
2 1 ST-2 complex ['1'] ['1'] ['1'] ['1'] ['1'] ['1'] ['1']

View File

@ -13,8 +13,8 @@ requires-python = ">=3.11"
description = "A tool to rapidly fetch fetch MLST profiles given sequences for various diseases." description = "A tool to rapidly fetch fetch MLST profiles given sequences for various diseases."
[project.scripts] [project.scripts]
nsbdiagnosistoolkit = "nsbdiagnosistoolkit.cli.program:cli" nsbdiagnosistoolkit = "nsbdiagnosistoolkit.cli.root:cli"
nsbdiagtk = "nsbdiagnosistoolkit.cli.program:cli" nsbdiagtk = "nsbdiagnosistoolkit.cli.root:cli"
[tool.pyright] [tool.pyright]
extraPaths = ["src"] extraPaths = ["src"]

View File

@ -1,6 +1,8 @@
import argparse import argparse
import asyncio import asyncio
import datetime
from os import path from os import path
import os
from nsbdiagnosistoolkit.cli import aggregator from nsbdiagnosistoolkit.cli import aggregator
from nsbdiagnosistoolkit.engine.data.genomics import NamedString from nsbdiagnosistoolkit.engine.data.genomics import NamedString
@ -10,6 +12,14 @@ from nsbdiagnosistoolkit.engine.local.fasta import read_fasta
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument(
"--run-name", "-name",
dest="run_name",
required=False,
default=datetime.datetime.now().strftime(r"%Y%m%d%H%M%S"),
type=str,
help="The name of the run. Will use a date and time string if not provided."
)
parser.add_argument( parser.add_argument(
"--fasta", "-fa", "-fst", "--fasta", "-fa", "-fst",
nargs="+", nargs="+",
@ -33,25 +43,27 @@ parser.add_argument(
"--institut-pasteur-mlst", "--institut-pasteur-mlst",
"-ipdbmlst", "-ipdbmlst",
dest="institut_pasteur_db", dest="institut_pasteur_db",
required=False,
default=None,
type=str, type=str,
help="The Institut Pasteur MLST database to use." help="The Institut Pasteur MLST database to use."
) )
parser.add_argument( parser.add_argument(
"-csv", "out",
dest="csv_path", default="./.",
required=False, help="The output folder. Files will be named by the provided (or default) run name."
default=None,
help="The destination to place the CSV output."
) )
def cli(): def cli():
args = parser.parse_args() args = parser.parse_args()
gen_strings = aggregator.aggregate_sequences(args.fastas, args.abifs) gen_strings = aggregator.aggregate_sequences(args.fastas, args.abifs)
os.makedirs(args.out)
if args.institut_pasteur_db is not None:
mlst_profiles = aggregator.profile_all_genetic_strings( mlst_profiles = aggregator.profile_all_genetic_strings(
gen_strings, args.institut_pasteur_db) gen_strings, args.institut_pasteur_db)
asyncio.run(write_mlst_profiles_as_csv( asyncio.run(write_mlst_profiles_as_csv(
asyncio.run(mlst_profiles), str(args.csv_path))) asyncio.run(mlst_profiles), str(path.join(args.out, "MLST_" + args.run_name + ".csv"))))
if __name__ == "__main__": if __name__ == "__main__":