From 319edf36afa24e2198329b75d32d01b5a7856f81 Mon Sep 17 00:00:00 2001 From: Harrison Deng Date: Wed, 19 Feb 2025 15:01:57 +0000 Subject: [PATCH] Added option to output database and schemas lists to CSV --- src/autobigs/cli/info.py | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/src/autobigs/cli/info.py b/src/autobigs/cli/info.py index c653c89..9f4404d 100644 --- a/src/autobigs/cli/info.py +++ b/src/autobigs/cli/info.py @@ -1,5 +1,7 @@ from argparse import ArgumentParser, Namespace import asyncio +import csv +from os import path from autobigs.engine.analysis.bigsdb import BIGSdbIndex def setup_parser(parser: ArgumentParser): @@ -24,6 +26,14 @@ def setup_parser(parser: ArgumentParser): help="Lists the known schema IDs for a given BIGSdb sequence definition database name. The name, and then the ID of the schema is given." ) + parser.add_argument( + "--csv-prefix", "-o", + dest="csv_output", + required=False, + default=None, + help="Output list as CSV at a given path. A suffix is added depending on the action taken." + ) + parser.set_defaults(run=run_asynchronously) return parser @@ -31,15 +41,29 @@ async def run(args: Namespace): async with BIGSdbIndex() as bigsdb_index: if args.list_dbs: known_seqdef_dbs = await bigsdb_index.get_known_seqdef_dbs(force=False) - print("The following are all known BIGS database names (sorted alphabetically):") - print("\n".join(sorted(known_seqdef_dbs.keys()))) + sorted_seqdef_dbs = [(name, source) for name, source in sorted(known_seqdef_dbs.items())] + print("The following are all known BIGS database names, and their source (sorted alphabetically):") + print("\n".join(["{0}: {1}".format(name, source) for name, source in sorted_seqdef_dbs])) + if args.csv_output: + dbs_csv_path = path.splitext(args.csv_output)[0] + "_" + "dbs.csv" + with open(dbs_csv_path, "w") as csv_out_handle: + writer = csv.writer(csv_out_handle) + writer.writerow(("BIGSdb Names", "Source")) + writer.writerows(sorted_seqdef_dbs) + print("\nDatabase output written to {0}".format(dbs_csv_path)) for bigsdb_schema_name in args.list_bigsdb_schemas: schemas = await bigsdb_index.get_schemas_for_seqdefdb(bigsdb_schema_name) + sorted_schemas = [(name, id) for name, id in sorted(schemas.items())] print("The following are the known schemas for \"{0}\", and their associated IDs:".format(bigsdb_schema_name)) - for schema_desc, schema_id in schemas.items(): - print(f"{schema_desc}: {schema_id}") - + print("\n".join(["{0}: {1}".format(name, id) for name, id in sorted_schemas])) + if args.csv_output: + schema_csv_path = path.splitext(args.csv_output)[0] + "_" + "schemas.csv" + with open(schema_csv_path, "w") as csv_out_handle: + writer = csv.writer(csv_out_handle) + writer.writerow(("Name", "ID")) + writer.writerows(sorted_schemas) + print("\nSchema list output written to {0}".format(schema_csv_path)) if not (args.list_dbs or len(args.list_bigsdb_schemas) > 0): print("Nothing to do. Try specifying \"-l\" for a list of known databases, or \"-h\" for more information.")