Compare commits
15 Commits
43758c45f8
...
master
Author | SHA1 | Date | |
---|---|---|---|
f08e4c7d35 | |||
2f49699a23 | |||
9d97ee6244 | |||
bfeec68756 | |||
ba4a532784 | |||
b047b6f8fc | |||
d1b3993011 | |||
ad8fe00479 | |||
df132814c8 | |||
9b56853e36 | |||
0cc3539280 | |||
2c38d7d172 | |||
3ca07feade | |||
cf9df14fce | |||
8b379198ec |
11
.devcontainer/Dockerfile
Normal file
11
.devcontainer/Dockerfile
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
FROM mcr.microsoft.com/devcontainers/anaconda:1-3
|
||||||
|
|
||||||
|
# Copy environment.yml (if found) to a temp location so we update the environment. Also
|
||||||
|
# copy "noop.txt" so the COPY instruction does not fail if no environment.yml exists.
|
||||||
|
COPY environment.yml* .devcontainer/noop.txt /tmp/conda-tmp/
|
||||||
|
RUN if [ -f "/tmp/conda-tmp/environment.yml" ]; then umask 0002 && /opt/conda/bin/conda env update -n base -f /tmp/conda-tmp/environment.yml; fi \
|
||||||
|
&& rm -rf /tmp/conda-tmp
|
||||||
|
|
||||||
|
# [Optional] Uncomment this section to install additional OS packages.
|
||||||
|
# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
|
||||||
|
# && apt-get -y install --no-install-recommends <your-package-list-here>
|
35
.devcontainer/devcontainer.json
Normal file
35
.devcontainer/devcontainer.json
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
||||||
|
// README at: https://github.com/devcontainers/templates/tree/main/src/anaconda
|
||||||
|
{
|
||||||
|
"name": "Anaconda (Python 3)",
|
||||||
|
"build": {
|
||||||
|
"context": "..",
|
||||||
|
"dockerfile": "Dockerfile"
|
||||||
|
},
|
||||||
|
"customizations": {
|
||||||
|
"vscode": {
|
||||||
|
"extensions": [
|
||||||
|
"ms-python.debugpy",
|
||||||
|
"ms-python.python",
|
||||||
|
"ms-python.vscode-pylance",
|
||||||
|
"ms-python.black-formatter",
|
||||||
|
"ms-python.flake8"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Features to add to the dev container. More info: https://containers.dev/features.
|
||||||
|
// "features": {},
|
||||||
|
|
||||||
|
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
||||||
|
// "forwardPorts": [],
|
||||||
|
|
||||||
|
// Use 'postCreateCommand' to run commands after the container is created.
|
||||||
|
// "postCreateCommand": "python --version",
|
||||||
|
|
||||||
|
// Configure tool-specific properties.
|
||||||
|
// "customizations": {},
|
||||||
|
|
||||||
|
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
|
||||||
|
// "remoteUser": "root"
|
||||||
|
}
|
3
.devcontainer/noop.txt
Normal file
3
.devcontainer/noop.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
This file copied into the container along with environment.yml* from the parent
|
||||||
|
folder. This file is included to prevents the Dockerfile COPY instruction from
|
||||||
|
failing if no environment.yml is found.
|
9
.vscode/launch.json
vendored
9
.vscode/launch.json
vendored
@@ -8,15 +8,16 @@
|
|||||||
"name": "Splitter Single Gene with Translation",
|
"name": "Splitter Single Gene with Translation",
|
||||||
"type": "python",
|
"type": "python",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"program": "${workspaceFolder}/msa_splitter.py",
|
"program": "${workspaceFolder}/splitmsa/splitmsa.py",
|
||||||
"args": [
|
"args": [
|
||||||
"${workspaceFolder}/tests/resources/test_msa-shortened.fa",
|
"${workspaceFolder}/tests/resources/test_msa-shortened.fa",
|
||||||
"--gene-list", "${workspaceFolder}/tests/resources/gene_list.csv",
|
"--gene-list",
|
||||||
|
"${workspaceFolder}/tests/resources/gene_list.csv",
|
||||||
"-C",
|
"-C",
|
||||||
"-E", "DEBUG",
|
"-E",
|
||||||
|
"DEBUG",
|
||||||
"--do-translate",
|
"--do-translate",
|
||||||
"--gen-cut-stop-codon"
|
"--gen-cut-stop-codon"
|
||||||
|
|
||||||
],
|
],
|
||||||
"console": "integratedTerminal",
|
"console": "integratedTerminal",
|
||||||
"justMyCode": true
|
"justMyCode": true
|
||||||
|
26
Jenkinsfile
vendored
26
Jenkinsfile
vendored
@@ -1,15 +1,31 @@
|
|||||||
pipeline {
|
pipeline {
|
||||||
agent any
|
agent {
|
||||||
|
kubernetes {
|
||||||
|
cloud 'Reslate Systems'
|
||||||
|
defaultContainer 'conda'
|
||||||
|
}
|
||||||
|
}
|
||||||
stages {
|
stages {
|
||||||
stage("install") {
|
stage("install") {
|
||||||
steps {
|
steps {
|
||||||
sh 'mamba env update --file environment.yml'
|
sh 'conda update conda -y -q'
|
||||||
sh 'echo "mamba activate splitmsa" >> ~/.bashrc'
|
sh 'conda env update -n base --file environment.yml'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage("build") {
|
stage("build") {
|
||||||
steps {
|
steps {
|
||||||
sh "python -m build"
|
sh "conda run -n base python -m build"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage("test") {
|
||||||
|
steps {
|
||||||
|
sh "conda run -n base pip install dist/*.whl"
|
||||||
|
sh "conda run -n base splitmsa -h"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage("archive") {
|
||||||
|
steps {
|
||||||
|
archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage("publish") {
|
stage("publish") {
|
||||||
@@ -18,7 +34,7 @@ pipeline {
|
|||||||
}
|
}
|
||||||
steps {
|
steps {
|
||||||
withCredentials([usernamePassword(credentialsId: 'rs-git-package-registry-ydeng', passwordVariable: 'PASS', usernameVariable: 'USER')]) {
|
withCredentials([usernamePassword(credentialsId: 'rs-git-package-registry-ydeng', passwordVariable: 'PASS', usernameVariable: 'USER')]) {
|
||||||
sh "python -m twine upload --repository-url https://git.reslate.systems/api/packages/${USER}/pypi -u ${USER} -p ${PASS} --non-interactive --disable-progress-bar --verbose dist/*"
|
sh returnStatus: true, script: 'conda run -n base python -m twine upload --repository-url https://git.reslate.systems/api/packages/${USER}/pypi -u ${USER} -p ${PASS} --non-interactive --disable-progress-bar --verbose dist/*'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -4,5 +4,6 @@ channels:
|
|||||||
dependencies:
|
dependencies:
|
||||||
- build=0.7.0
|
- build=0.7.0
|
||||||
- pytest=7.2.2
|
- pytest=7.2.2
|
||||||
- twine=4.0.2
|
- twine
|
||||||
- biopython=1.81
|
- biopython=1.81
|
||||||
|
- python=3.9
|
||||||
|
@@ -1 +0,0 @@
|
|||||||
Bio==1.5.6
|
|
@@ -1,12 +1,12 @@
|
|||||||
[metadata]
|
[metadata]
|
||||||
name = splitmsa
|
name = splitmsa
|
||||||
version = 0.0.1
|
version = 0.0.4
|
||||||
|
|
||||||
[options]
|
[options]
|
||||||
packages = splitmsa
|
packages = splitmsa
|
||||||
install_requires =
|
install_requires =
|
||||||
Bio
|
biopython ==1.81; python_version == "3.9"
|
||||||
|
|
||||||
[options.entry_points]
|
[options.entry_points]
|
||||||
console_scripts =
|
console_scripts =
|
||||||
splitmsa = splitmsa.splitmsa:main
|
splitmsa = splitmsa.splitmsa:main
|
||||||
|
0
splitmsa/__init__.py
Normal file
0
splitmsa/__init__.py
Normal file
@@ -247,6 +247,67 @@ def output_as_csv(gene: str, problems: list[list[str]], output_path: str):
|
|||||||
writer.writerows(problems)
|
writer.writerows(problems)
|
||||||
|
|
||||||
|
|
||||||
|
def run(args):
|
||||||
|
msa_records = list(read_msa_file(args.input))
|
||||||
|
info(f"MSA records read complete. Found {len(msa_records)} records.")
|
||||||
|
genes = []
|
||||||
|
if args.gene_list:
|
||||||
|
genes = read_genes_from_csv(args.gene_list)
|
||||||
|
info(f"Gene list read from {args.gene_list} resulted in {len(genes)} " "genes.")
|
||||||
|
else:
|
||||||
|
if args.gene_name and args.start and args.end:
|
||||||
|
genes.append([args.gene_name, args.start, args.end])
|
||||||
|
info(
|
||||||
|
f"Extracting {args.gene_name} starting at {args.start} to "
|
||||||
|
f"{args.end}."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise Exception(
|
||||||
|
"Need either a gene list by --gene-list or a start and end "
|
||||||
|
"via --start, and --end respectively."
|
||||||
|
)
|
||||||
|
for gene_name, start, end in genes:
|
||||||
|
info(f"Started on gene {gene_name} ({start} - {end})")
|
||||||
|
(
|
||||||
|
nt_sequence_records,
|
||||||
|
nt_no_stop_sequence_records,
|
||||||
|
aa_sequence_records,
|
||||||
|
aa_no_stop_sequence_records,
|
||||||
|
problems,
|
||||||
|
) = trim(
|
||||||
|
start,
|
||||||
|
end,
|
||||||
|
args.gen_cut_stop_codon,
|
||||||
|
args.do_translate,
|
||||||
|
msa_records,
|
||||||
|
correction_range=args.correction_range,
|
||||||
|
)
|
||||||
|
if len(problems) > 0:
|
||||||
|
warning(
|
||||||
|
f"There were {len(problems)} problems " f"during trimming {gene_name}!"
|
||||||
|
)
|
||||||
|
if args.catalogue_problems:
|
||||||
|
output_as_csv(
|
||||||
|
gene_name,
|
||||||
|
problems,
|
||||||
|
os.path.join(args.output_dir, f"{gene_name} - problems.csv"),
|
||||||
|
)
|
||||||
|
write_to_file(
|
||||||
|
args.output_dir,
|
||||||
|
gene_name,
|
||||||
|
start,
|
||||||
|
end,
|
||||||
|
args.full_suffix,
|
||||||
|
args.ns_suffix,
|
||||||
|
args.aa_suffix,
|
||||||
|
nt_sequence_records,
|
||||||
|
nt_no_stop_sequence_records,
|
||||||
|
aa_sequence_records,
|
||||||
|
aa_no_stop_sequence_records,
|
||||||
|
)
|
||||||
|
info(f"Completed gene {gene_name} ({start} - {end})")
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
prog="splitmsa",
|
prog="splitmsa",
|
||||||
@@ -391,69 +452,9 @@ def main():
|
|||||||
)
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
logging.basicConfig(level=args.log_level.upper())
|
logging.basicConfig(level=args.log_level.upper())
|
||||||
|
run(args)
|
||||||
msa_records = list(read_msa_file(args.input))
|
|
||||||
info(f"MSA records read complete. Found {len(msa_records)} records.")
|
|
||||||
genes = []
|
|
||||||
if args.gene_list:
|
|
||||||
genes = read_genes_from_csv(args.gene_list)
|
|
||||||
info(f"Gene list read from {args.gene_list} resulted in {len(genes)} " "genes.")
|
|
||||||
else:
|
|
||||||
if args.gene_name and args.start and args.end:
|
|
||||||
genes.append([args.gene_name, args.start, args.end])
|
|
||||||
info(
|
|
||||||
f"Extracting {args.gene_name} starting at {args.start} to "
|
|
||||||
f"{args.end}."
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
raise Exception(
|
|
||||||
"Need either a gene list by --gene-list or a start and end "
|
|
||||||
"via --start, and --end respectively."
|
|
||||||
)
|
|
||||||
for gene_name, start, end in genes:
|
|
||||||
info(f"Started on gene {gene_name} ({start} - {end})")
|
|
||||||
(
|
|
||||||
nt_sequence_records,
|
|
||||||
nt_no_stop_sequence_records,
|
|
||||||
aa_sequence_records,
|
|
||||||
aa_no_stop_sequence_records,
|
|
||||||
problems,
|
|
||||||
) = trim(
|
|
||||||
start,
|
|
||||||
end,
|
|
||||||
args.gen_cut_stop_codon,
|
|
||||||
args.do_translate,
|
|
||||||
msa_records,
|
|
||||||
correction_range=args.correction_range,
|
|
||||||
)
|
|
||||||
if len(problems) > 0:
|
|
||||||
warning(
|
|
||||||
f"There were {len(problems)} problems " f"during trimming {gene_name}!"
|
|
||||||
)
|
|
||||||
if args.catalogue_problems:
|
|
||||||
output_as_csv(
|
|
||||||
gene_name,
|
|
||||||
problems,
|
|
||||||
os.path.join(args.output_dir, f"{gene_name} - problems.csv"),
|
|
||||||
)
|
|
||||||
write_to_file(
|
|
||||||
args.output_dir,
|
|
||||||
gene_name,
|
|
||||||
start,
|
|
||||||
end,
|
|
||||||
args.full_suffix,
|
|
||||||
args.ns_suffix,
|
|
||||||
args.aa_suffix,
|
|
||||||
nt_sequence_records,
|
|
||||||
nt_no_stop_sequence_records,
|
|
||||||
aa_sequence_records,
|
|
||||||
aa_no_stop_sequence_records,
|
|
||||||
)
|
|
||||||
info(f"Completed gene {gene_name} ({start} - {end})")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
main()
|
main()
|
||||||
|
Reference in New Issue
Block a user