From 8b379198ec1e080e068e6533a94f33c960b774e7 Mon Sep 17 00:00:00 2001 From: Harrison Date: Tue, 11 Apr 2023 16:13:07 -0500 Subject: [PATCH] Prepping for implementation of quick and dirty GUI --- .vscode/launch.json | 9 ++-- environment.yml | 2 + setup.cfg | 3 +- splitmsa/splitmsa.py | 125 ++++++++++++++++++++++--------------------- 4 files changed, 73 insertions(+), 66 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 84e835c..05aaa4a 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -8,15 +8,16 @@ "name": "Splitter Single Gene with Translation", "type": "python", "request": "launch", - "program": "${workspaceFolder}/msa_splitter.py", + "program": "${workspaceFolder}/splitmsa/splitmsa.py", "args": [ "${workspaceFolder}/tests/resources/test_msa-shortened.fa", - "--gene-list", "${workspaceFolder}/tests/resources/gene_list.csv", + "--gene-list", + "${workspaceFolder}/tests/resources/gene_list.csv", "-C", - "-E", "DEBUG", + "-E", + "DEBUG", "--do-translate", "--gen-cut-stop-codon" - ], "console": "integratedTerminal", "justMyCode": true diff --git a/environment.yml b/environment.yml index d8c8bfe..5a5d9a8 100644 --- a/environment.yml +++ b/environment.yml @@ -6,3 +6,5 @@ dependencies: - pytest=7.2.2 - twine=4.0.2 - biopython=1.81 + - gooey=1.0.8.1 + - python=3.9 diff --git a/setup.cfg b/setup.cfg index 3f5f3de..5cff0a3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -5,7 +5,8 @@ version = 0.0.1 [options] packages = splitmsa install_requires = - Bio + gooey + Bio; python_version == "3.9" [options.entry_points] console_scripts = diff --git a/splitmsa/splitmsa.py b/splitmsa/splitmsa.py index af66656..30bb18e 100755 --- a/splitmsa/splitmsa.py +++ b/splitmsa/splitmsa.py @@ -14,6 +14,7 @@ import os from Bio import SeqIO, SeqRecord from Bio.Seq import Seq import csv +from gooey import Gooey def read_genes_from_csv(batch_genes_csv_path: str): @@ -247,6 +248,68 @@ def output_as_csv(gene: str, problems: list[list[str]], output_path: str): writer.writerows(problems) +def run(args): + msa_records = list(read_msa_file(args.input)) + info(f"MSA records read complete. Found {len(msa_records)} records.") + genes = [] + if args.gene_list: + genes = read_genes_from_csv(args.gene_list) + info(f"Gene list read from {args.gene_list} resulted in {len(genes)} " "genes.") + else: + if args.gene_name and args.start and args.end: + genes.append([args.gene_name, args.start, args.end]) + info( + f"Extracting {args.gene_name} starting at {args.start} to " + f"{args.end}." + ) + else: + raise Exception( + "Need either a gene list by --gene-list or a start and end " + "via --start, and --end respectively." + ) + for gene_name, start, end in genes: + info(f"Started on gene {gene_name} ({start} - {end})") + ( + nt_sequence_records, + nt_no_stop_sequence_records, + aa_sequence_records, + aa_no_stop_sequence_records, + problems, + ) = trim( + start, + end, + args.gen_cut_stop_codon, + args.do_translate, + msa_records, + correction_range=args.correction_range, + ) + if len(problems) > 0: + warning( + f"There were {len(problems)} problems " f"during trimming {gene_name}!" + ) + if args.catalogue_problems: + output_as_csv( + gene_name, + problems, + os.path.join(args.output_dir, f"{gene_name} - problems.csv"), + ) + write_to_file( + args.output_dir, + gene_name, + start, + end, + args.full_suffix, + args.ns_suffix, + args.aa_suffix, + nt_sequence_records, + nt_no_stop_sequence_records, + aa_sequence_records, + aa_no_stop_sequence_records, + ) + info(f"Completed gene {gene_name} ({start} - {end})") + + +@Gooey def main(): parser = argparse.ArgumentParser( prog="splitmsa", @@ -391,69 +454,9 @@ def main(): ) args = parser.parse_args() - logging.basicConfig(level=args.log_level.upper()) - - msa_records = list(read_msa_file(args.input)) - info(f"MSA records read complete. Found {len(msa_records)} records.") - genes = [] - if args.gene_list: - genes = read_genes_from_csv(args.gene_list) - info(f"Gene list read from {args.gene_list} resulted in {len(genes)} " "genes.") - else: - if args.gene_name and args.start and args.end: - genes.append([args.gene_name, args.start, args.end]) - info( - f"Extracting {args.gene_name} starting at {args.start} to " - f"{args.end}." - ) - else: - raise Exception( - "Need either a gene list by --gene-list or a start and end " - "via --start, and --end respectively." - ) - for gene_name, start, end in genes: - info(f"Started on gene {gene_name} ({start} - {end})") - ( - nt_sequence_records, - nt_no_stop_sequence_records, - aa_sequence_records, - aa_no_stop_sequence_records, - problems, - ) = trim( - start, - end, - args.gen_cut_stop_codon, - args.do_translate, - msa_records, - correction_range=args.correction_range, - ) - if len(problems) > 0: - warning( - f"There were {len(problems)} problems " f"during trimming {gene_name}!" - ) - if args.catalogue_problems: - output_as_csv( - gene_name, - problems, - os.path.join(args.output_dir, f"{gene_name} - problems.csv"), - ) - write_to_file( - args.output_dir, - gene_name, - start, - end, - args.full_suffix, - args.ns_suffix, - args.aa_suffix, - nt_sequence_records, - nt_no_stop_sequence_records, - aa_sequence_records, - aa_no_stop_sequence_records, - ) - info(f"Completed gene {gene_name} ({start} - {end})") + run(args) if __name__ == "__main__": - main()