Compare commits
No commits in common. "2c38d7d1729d9b00e71cd247d84d2db0db3225fc" and "43758c45f8eed9a8172c528ddccc7f3147195a8a" have entirely different histories.
2c38d7d172
...
43758c45f8
9
.vscode/launch.json
vendored
9
.vscode/launch.json
vendored
@ -8,16 +8,15 @@
|
||||
"name": "Splitter Single Gene with Translation",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/splitmsa/splitmsa.py",
|
||||
"program": "${workspaceFolder}/msa_splitter.py",
|
||||
"args": [
|
||||
"${workspaceFolder}/tests/resources/test_msa-shortened.fa",
|
||||
"--gene-list",
|
||||
"${workspaceFolder}/tests/resources/gene_list.csv",
|
||||
"--gene-list", "${workspaceFolder}/tests/resources/gene_list.csv",
|
||||
"-C",
|
||||
"-E",
|
||||
"DEBUG",
|
||||
"-E", "DEBUG",
|
||||
"--do-translate",
|
||||
"--gen-cut-stop-codon"
|
||||
|
||||
],
|
||||
"console": "integratedTerminal",
|
||||
"justMyCode": true
|
||||
|
5
Jenkinsfile
vendored
5
Jenkinsfile
vendored
@ -12,11 +12,6 @@ pipeline {
|
||||
sh "python -m build"
|
||||
}
|
||||
}
|
||||
stage("test") {
|
||||
steps {
|
||||
sh "pip install dist/*.whl"
|
||||
}
|
||||
}
|
||||
stage("publish") {
|
||||
when {
|
||||
branch '**/master'
|
||||
|
@ -6,4 +6,3 @@ dependencies:
|
||||
- pytest=7.2.2
|
||||
- twine=4.0.2
|
||||
- biopython=1.81
|
||||
- python=3.9
|
||||
|
1
requirements.txt
Normal file
1
requirements.txt
Normal file
@ -0,0 +1 @@
|
||||
Bio==1.5.6
|
@ -1,11 +1,11 @@
|
||||
[metadata]
|
||||
name = splitmsa
|
||||
version = 0.0.2
|
||||
version = 0.0.1
|
||||
|
||||
[options]
|
||||
packages = splitmsa
|
||||
install_requires =
|
||||
biopython ==1.81; python_version == "3.9"
|
||||
Bio
|
||||
|
||||
[options.entry_points]
|
||||
console_scripts =
|
||||
|
@ -247,67 +247,6 @@ def output_as_csv(gene: str, problems: list[list[str]], output_path: str):
|
||||
writer.writerows(problems)
|
||||
|
||||
|
||||
def run(args):
|
||||
msa_records = list(read_msa_file(args.input))
|
||||
info(f"MSA records read complete. Found {len(msa_records)} records.")
|
||||
genes = []
|
||||
if args.gene_list:
|
||||
genes = read_genes_from_csv(args.gene_list)
|
||||
info(f"Gene list read from {args.gene_list} resulted in {len(genes)} " "genes.")
|
||||
else:
|
||||
if args.gene_name and args.start and args.end:
|
||||
genes.append([args.gene_name, args.start, args.end])
|
||||
info(
|
||||
f"Extracting {args.gene_name} starting at {args.start} to "
|
||||
f"{args.end}."
|
||||
)
|
||||
else:
|
||||
raise Exception(
|
||||
"Need either a gene list by --gene-list or a start and end "
|
||||
"via --start, and --end respectively."
|
||||
)
|
||||
for gene_name, start, end in genes:
|
||||
info(f"Started on gene {gene_name} ({start} - {end})")
|
||||
(
|
||||
nt_sequence_records,
|
||||
nt_no_stop_sequence_records,
|
||||
aa_sequence_records,
|
||||
aa_no_stop_sequence_records,
|
||||
problems,
|
||||
) = trim(
|
||||
start,
|
||||
end,
|
||||
args.gen_cut_stop_codon,
|
||||
args.do_translate,
|
||||
msa_records,
|
||||
correction_range=args.correction_range,
|
||||
)
|
||||
if len(problems) > 0:
|
||||
warning(
|
||||
f"There were {len(problems)} problems " f"during trimming {gene_name}!"
|
||||
)
|
||||
if args.catalogue_problems:
|
||||
output_as_csv(
|
||||
gene_name,
|
||||
problems,
|
||||
os.path.join(args.output_dir, f"{gene_name} - problems.csv"),
|
||||
)
|
||||
write_to_file(
|
||||
args.output_dir,
|
||||
gene_name,
|
||||
start,
|
||||
end,
|
||||
args.full_suffix,
|
||||
args.ns_suffix,
|
||||
args.aa_suffix,
|
||||
nt_sequence_records,
|
||||
nt_no_stop_sequence_records,
|
||||
aa_sequence_records,
|
||||
aa_no_stop_sequence_records,
|
||||
)
|
||||
info(f"Completed gene {gene_name} ({start} - {end})")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="splitmsa",
|
||||
@ -452,9 +391,69 @@ def main():
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
logging.basicConfig(level=args.log_level.upper())
|
||||
run(args)
|
||||
|
||||
msa_records = list(read_msa_file(args.input))
|
||||
info(f"MSA records read complete. Found {len(msa_records)} records.")
|
||||
genes = []
|
||||
if args.gene_list:
|
||||
genes = read_genes_from_csv(args.gene_list)
|
||||
info(f"Gene list read from {args.gene_list} resulted in {len(genes)} " "genes.")
|
||||
else:
|
||||
if args.gene_name and args.start and args.end:
|
||||
genes.append([args.gene_name, args.start, args.end])
|
||||
info(
|
||||
f"Extracting {args.gene_name} starting at {args.start} to "
|
||||
f"{args.end}."
|
||||
)
|
||||
else:
|
||||
raise Exception(
|
||||
"Need either a gene list by --gene-list or a start and end "
|
||||
"via --start, and --end respectively."
|
||||
)
|
||||
for gene_name, start, end in genes:
|
||||
info(f"Started on gene {gene_name} ({start} - {end})")
|
||||
(
|
||||
nt_sequence_records,
|
||||
nt_no_stop_sequence_records,
|
||||
aa_sequence_records,
|
||||
aa_no_stop_sequence_records,
|
||||
problems,
|
||||
) = trim(
|
||||
start,
|
||||
end,
|
||||
args.gen_cut_stop_codon,
|
||||
args.do_translate,
|
||||
msa_records,
|
||||
correction_range=args.correction_range,
|
||||
)
|
||||
if len(problems) > 0:
|
||||
warning(
|
||||
f"There were {len(problems)} problems " f"during trimming {gene_name}!"
|
||||
)
|
||||
if args.catalogue_problems:
|
||||
output_as_csv(
|
||||
gene_name,
|
||||
problems,
|
||||
os.path.join(args.output_dir, f"{gene_name} - problems.csv"),
|
||||
)
|
||||
write_to_file(
|
||||
args.output_dir,
|
||||
gene_name,
|
||||
start,
|
||||
end,
|
||||
args.full_suffix,
|
||||
args.ns_suffix,
|
||||
args.aa_suffix,
|
||||
nt_sequence_records,
|
||||
nt_no_stop_sequence_records,
|
||||
aa_sequence_records,
|
||||
aa_no_stop_sequence_records,
|
||||
)
|
||||
info(f"Completed gene {gene_name} ({start} - {end})")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
main()
|
||||
|
Loading…
x
Reference in New Issue
Block a user