from argparse import ArgumentParser, Namespace import asyncio import datetime from autobigs.engine.data.local.csv import write_mlst_profiles_as_csv from autobigs.engine.data.local.fasta import read_multiple_fastas from autobigs.engine.data.remote.databases.bigsdb import BIGSdbIndex def setup_parser(parser: ArgumentParser): parser.description = "Returns MLST exact profile matches." parser.add_argument( "fastas", nargs="+", action='extend', default=[], type=str, help="The FASTA files to process. Multiple can be listed." ) parser.add_argument( "seqdefdb", help="The BIGSdb seqdef database to use for typing." ) parser.add_argument( "schema", type=int, help="The BIGSdb seqdef database schema ID (integer) to use for typing." ) parser.add_argument( "out", default=f'./{datetime.datetime.now().strftime(r"%Y%m%d%H%M%S")}', help="The output CSV name (.csv will be appended)." ) parser.add_argument( "--exact", "-ex", action="store_true", dest="exact", required=False, default=False, help="Should run exact matching rather than returning all similar ones" ) parser.add_argument( "--stop-on-fail", "-sof", action="store_true", dest="stop_on_fail", required=False, default=False, help="Should the algorithm stop in the case there are no matches (or partial matches when expecting exact matches)." ) parser.set_defaults(run=run_asynchronously) return parser async def run(args: Namespace): async with BIGSdbIndex() as bigsdb_index: gen_strings = read_multiple_fastas(args.fastas) async with await bigsdb_index.build_profiler_from_seqdefdb(args.seqdefdb, args.schema) as mlst_profiler: mlst_profiles = mlst_profiler.profile_multiple_strings(gen_strings, exact=args.exact) failed = await write_mlst_profiles_as_csv(mlst_profiles, args.out) if len(failed) > 0: print(f"A total of {len(failed)} IDs failed:\n{"\n".join(failed)}") print(f"Completed fetching MLSTs for {len(args.fastas)} sequences.") def run_asynchronously(args): asyncio.run(run(args))