Prepping for implementation of quick and dirty GUI
	
		
			
	
		
	
	
		
	
		
			All checks were successful
		
		
	
	
		
			
				
	
				ydeng/splitmsa/pipeline/head This commit looks good
				
			
		
		
	
	
				
					
				
			
		
			All checks were successful
		
		
	
	ydeng/splitmsa/pipeline/head This commit looks good
				
			This commit is contained in:
		
							
								
								
									
										9
									
								
								.vscode/launch.json
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										9
									
								
								.vscode/launch.json
									
									
									
									
										vendored
									
									
								
							@@ -8,15 +8,16 @@
 | 
				
			|||||||
            "name": "Splitter Single Gene with Translation",
 | 
					            "name": "Splitter Single Gene with Translation",
 | 
				
			||||||
            "type": "python",
 | 
					            "type": "python",
 | 
				
			||||||
            "request": "launch",
 | 
					            "request": "launch",
 | 
				
			||||||
            "program": "${workspaceFolder}/msa_splitter.py",
 | 
					            "program": "${workspaceFolder}/splitmsa/splitmsa.py",
 | 
				
			||||||
            "args": [
 | 
					            "args": [
 | 
				
			||||||
                "${workspaceFolder}/tests/resources/test_msa-shortened.fa",
 | 
					                "${workspaceFolder}/tests/resources/test_msa-shortened.fa",
 | 
				
			||||||
                "--gene-list", "${workspaceFolder}/tests/resources/gene_list.csv",
 | 
					                "--gene-list",
 | 
				
			||||||
 | 
					                "${workspaceFolder}/tests/resources/gene_list.csv",
 | 
				
			||||||
                "-C",
 | 
					                "-C",
 | 
				
			||||||
                "-E", "DEBUG",
 | 
					                "-E",
 | 
				
			||||||
 | 
					                "DEBUG",
 | 
				
			||||||
                "--do-translate",
 | 
					                "--do-translate",
 | 
				
			||||||
                "--gen-cut-stop-codon"
 | 
					                "--gen-cut-stop-codon"
 | 
				
			||||||
 | 
					 | 
				
			||||||
            ],
 | 
					            ],
 | 
				
			||||||
            "console": "integratedTerminal",
 | 
					            "console": "integratedTerminal",
 | 
				
			||||||
            "justMyCode": true
 | 
					            "justMyCode": true
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -6,3 +6,5 @@ dependencies:
 | 
				
			|||||||
  - pytest=7.2.2
 | 
					  - pytest=7.2.2
 | 
				
			||||||
  - twine=4.0.2
 | 
					  - twine=4.0.2
 | 
				
			||||||
  - biopython=1.81
 | 
					  - biopython=1.81
 | 
				
			||||||
 | 
					  - gooey=1.0.8.1
 | 
				
			||||||
 | 
					  - python=3.9
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -5,7 +5,8 @@ version = 0.0.1
 | 
				
			|||||||
[options]
 | 
					[options]
 | 
				
			||||||
packages = splitmsa
 | 
					packages = splitmsa
 | 
				
			||||||
install_requires =
 | 
					install_requires =
 | 
				
			||||||
    Bio
 | 
					    gooey
 | 
				
			||||||
 | 
					    Bio; python_version == "3.9"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[options.entry_points]
 | 
					[options.entry_points]
 | 
				
			||||||
console_scripts =
 | 
					console_scripts =
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -14,6 +14,7 @@ import os
 | 
				
			|||||||
from Bio import SeqIO, SeqRecord
 | 
					from Bio import SeqIO, SeqRecord
 | 
				
			||||||
from Bio.Seq import Seq
 | 
					from Bio.Seq import Seq
 | 
				
			||||||
import csv
 | 
					import csv
 | 
				
			||||||
 | 
					from gooey import Gooey
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def read_genes_from_csv(batch_genes_csv_path: str):
 | 
					def read_genes_from_csv(batch_genes_csv_path: str):
 | 
				
			||||||
@@ -247,6 +248,68 @@ def output_as_csv(gene: str, problems: list[list[str]], output_path: str):
 | 
				
			|||||||
        writer.writerows(problems)
 | 
					        writer.writerows(problems)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def run(args):
 | 
				
			||||||
 | 
					    msa_records = list(read_msa_file(args.input))
 | 
				
			||||||
 | 
					    info(f"MSA records read complete. Found {len(msa_records)} records.")
 | 
				
			||||||
 | 
					    genes = []
 | 
				
			||||||
 | 
					    if args.gene_list:
 | 
				
			||||||
 | 
					        genes = read_genes_from_csv(args.gene_list)
 | 
				
			||||||
 | 
					        info(f"Gene list read from {args.gene_list} resulted in {len(genes)} " "genes.")
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        if args.gene_name and args.start and args.end:
 | 
				
			||||||
 | 
					            genes.append([args.gene_name, args.start, args.end])
 | 
				
			||||||
 | 
					            info(
 | 
				
			||||||
 | 
					                f"Extracting {args.gene_name} starting at {args.start} to "
 | 
				
			||||||
 | 
					                f"{args.end}."
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            raise Exception(
 | 
				
			||||||
 | 
					                "Need either a gene list by --gene-list or a start and end "
 | 
				
			||||||
 | 
					                "via --start, and --end respectively."
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					    for gene_name, start, end in genes:
 | 
				
			||||||
 | 
					        info(f"Started on gene {gene_name} ({start} - {end})")
 | 
				
			||||||
 | 
					        (
 | 
				
			||||||
 | 
					            nt_sequence_records,
 | 
				
			||||||
 | 
					            nt_no_stop_sequence_records,
 | 
				
			||||||
 | 
					            aa_sequence_records,
 | 
				
			||||||
 | 
					            aa_no_stop_sequence_records,
 | 
				
			||||||
 | 
					            problems,
 | 
				
			||||||
 | 
					        ) = trim(
 | 
				
			||||||
 | 
					            start,
 | 
				
			||||||
 | 
					            end,
 | 
				
			||||||
 | 
					            args.gen_cut_stop_codon,
 | 
				
			||||||
 | 
					            args.do_translate,
 | 
				
			||||||
 | 
					            msa_records,
 | 
				
			||||||
 | 
					            correction_range=args.correction_range,
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        if len(problems) > 0:
 | 
				
			||||||
 | 
					            warning(
 | 
				
			||||||
 | 
					                f"There were {len(problems)} problems " f"during trimming {gene_name}!"
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					        if args.catalogue_problems:
 | 
				
			||||||
 | 
					            output_as_csv(
 | 
				
			||||||
 | 
					                gene_name,
 | 
				
			||||||
 | 
					                problems,
 | 
				
			||||||
 | 
					                os.path.join(args.output_dir, f"{gene_name} - problems.csv"),
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					        write_to_file(
 | 
				
			||||||
 | 
					            args.output_dir,
 | 
				
			||||||
 | 
					            gene_name,
 | 
				
			||||||
 | 
					            start,
 | 
				
			||||||
 | 
					            end,
 | 
				
			||||||
 | 
					            args.full_suffix,
 | 
				
			||||||
 | 
					            args.ns_suffix,
 | 
				
			||||||
 | 
					            args.aa_suffix,
 | 
				
			||||||
 | 
					            nt_sequence_records,
 | 
				
			||||||
 | 
					            nt_no_stop_sequence_records,
 | 
				
			||||||
 | 
					            aa_sequence_records,
 | 
				
			||||||
 | 
					            aa_no_stop_sequence_records,
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        info(f"Completed gene {gene_name} ({start} - {end})")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@Gooey
 | 
				
			||||||
def main():
 | 
					def main():
 | 
				
			||||||
    parser = argparse.ArgumentParser(
 | 
					    parser = argparse.ArgumentParser(
 | 
				
			||||||
        prog="splitmsa",
 | 
					        prog="splitmsa",
 | 
				
			||||||
@@ -391,69 +454,9 @@ def main():
 | 
				
			|||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    args = parser.parse_args()
 | 
					    args = parser.parse_args()
 | 
				
			||||||
 | 
					 | 
				
			||||||
    logging.basicConfig(level=args.log_level.upper())
 | 
					    logging.basicConfig(level=args.log_level.upper())
 | 
				
			||||||
 | 
					    run(args)
 | 
				
			||||||
    msa_records = list(read_msa_file(args.input))
 | 
					 | 
				
			||||||
    info(f"MSA records read complete. Found {len(msa_records)} records.")
 | 
					 | 
				
			||||||
    genes = []
 | 
					 | 
				
			||||||
    if args.gene_list:
 | 
					 | 
				
			||||||
        genes = read_genes_from_csv(args.gene_list)
 | 
					 | 
				
			||||||
        info(f"Gene list read from {args.gene_list} resulted in {len(genes)} " "genes.")
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        if args.gene_name and args.start and args.end:
 | 
					 | 
				
			||||||
            genes.append([args.gene_name, args.start, args.end])
 | 
					 | 
				
			||||||
            info(
 | 
					 | 
				
			||||||
                f"Extracting {args.gene_name} starting at {args.start} to "
 | 
					 | 
				
			||||||
                f"{args.end}."
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            raise Exception(
 | 
					 | 
				
			||||||
                "Need either a gene list by --gene-list or a start and end "
 | 
					 | 
				
			||||||
                "via --start, and --end respectively."
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
    for gene_name, start, end in genes:
 | 
					 | 
				
			||||||
        info(f"Started on gene {gene_name} ({start} - {end})")
 | 
					 | 
				
			||||||
        (
 | 
					 | 
				
			||||||
            nt_sequence_records,
 | 
					 | 
				
			||||||
            nt_no_stop_sequence_records,
 | 
					 | 
				
			||||||
            aa_sequence_records,
 | 
					 | 
				
			||||||
            aa_no_stop_sequence_records,
 | 
					 | 
				
			||||||
            problems,
 | 
					 | 
				
			||||||
        ) = trim(
 | 
					 | 
				
			||||||
            start,
 | 
					 | 
				
			||||||
            end,
 | 
					 | 
				
			||||||
            args.gen_cut_stop_codon,
 | 
					 | 
				
			||||||
            args.do_translate,
 | 
					 | 
				
			||||||
            msa_records,
 | 
					 | 
				
			||||||
            correction_range=args.correction_range,
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        if len(problems) > 0:
 | 
					 | 
				
			||||||
            warning(
 | 
					 | 
				
			||||||
                f"There were {len(problems)} problems " f"during trimming {gene_name}!"
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
        if args.catalogue_problems:
 | 
					 | 
				
			||||||
            output_as_csv(
 | 
					 | 
				
			||||||
                gene_name,
 | 
					 | 
				
			||||||
                problems,
 | 
					 | 
				
			||||||
                os.path.join(args.output_dir, f"{gene_name} - problems.csv"),
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
        write_to_file(
 | 
					 | 
				
			||||||
            args.output_dir,
 | 
					 | 
				
			||||||
            gene_name,
 | 
					 | 
				
			||||||
            start,
 | 
					 | 
				
			||||||
            end,
 | 
					 | 
				
			||||||
            args.full_suffix,
 | 
					 | 
				
			||||||
            args.ns_suffix,
 | 
					 | 
				
			||||||
            args.aa_suffix,
 | 
					 | 
				
			||||||
            nt_sequence_records,
 | 
					 | 
				
			||||||
            nt_no_stop_sequence_records,
 | 
					 | 
				
			||||||
            aa_sequence_records,
 | 
					 | 
				
			||||||
            aa_no_stop_sequence_records,
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        info(f"Completed gene {gene_name} ({start} - {end})")
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if __name__ == "__main__":
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 | 
					 | 
				
			||||||
    main()
 | 
					    main()
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user