71 lines
2.3 KiB
Python
71 lines
2.3 KiB
Python
|
|
from argparse import ArgumentParser, Namespace
|
|
import asyncio
|
|
import datetime
|
|
from autobigs.engine.data.local.csv import write_mlst_profiles_as_csv
|
|
from autobigs.engine.data.local.fasta import read_multiple_fastas
|
|
from autobigs.engine.data.remote.databases.bigsdb import BIGSdbIndex
|
|
|
|
|
|
def setup_parser(parser: ArgumentParser):
|
|
parser.description = "Returns MLST exact profile matches."
|
|
parser.add_argument(
|
|
"fastas",
|
|
nargs="+",
|
|
action='extend',
|
|
default=[],
|
|
type=str,
|
|
help="The FASTA files to process. Multiple can be listed."
|
|
)
|
|
|
|
parser.add_argument(
|
|
"seqdefdb",
|
|
help="The BIGSdb seqdef database to use for typing."
|
|
)
|
|
|
|
parser.add_argument(
|
|
"schema",
|
|
type=int,
|
|
help="The BIGSdb seqdef database schema ID (integer) to use for typing."
|
|
)
|
|
|
|
parser.add_argument(
|
|
"out",
|
|
default=f'./{datetime.datetime.now().strftime(r"%Y%m%d%H%M%S")}',
|
|
help="The output CSV name (.csv will be appended)."
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--exact", "-ex",
|
|
action="store_true",
|
|
dest="exact",
|
|
required=False,
|
|
default=False,
|
|
help="Should run exact matching rather than returning all similar ones"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--stop-on-fail", "-sof",
|
|
action="store_true",
|
|
dest="stop_on_fail",
|
|
required=False,
|
|
default=False,
|
|
help="Should the algorithm stop in the case there are no matches (or partial matches when expecting exact matches)."
|
|
)
|
|
parser.set_defaults(run=run_asynchronously)
|
|
return parser
|
|
|
|
async def run(args: Namespace):
|
|
async with BIGSdbIndex() as bigsdb_index:
|
|
gen_strings = read_multiple_fastas(args.fastas)
|
|
async with await bigsdb_index.build_profiler_from_seqdefdb(args.seqdefdb, args.schema) as mlst_profiler:
|
|
mlst_profiles = mlst_profiler.profile_multiple_strings(gen_strings, exact=args.exact)
|
|
failed = await write_mlst_profiles_as_csv(mlst_profiles, args.out)
|
|
if len(failed) > 0:
|
|
print(f"A total of {len(failed)} IDs failed:\n{"\n".join(failed)}")
|
|
print(f"Completed fetching MLSTs for {len(args.fastas)} sequences.")
|
|
|
|
def run_asynchronously(args):
|
|
asyncio.run(run(args))
|
|
|