Renamed project to "SplitMSA" and added pipeline file
Some checks failed
ydeng/splitmsa/pipeline/head There was a failure building this commit
Some checks failed
ydeng/splitmsa/pipeline/head There was a failure building this commit
This commit is contained in:
parent
419adcd098
commit
eccb08153e
26
Jenkinsfile
vendored
Normal file
26
Jenkinsfile
vendored
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
pipeline {
|
||||||
|
agent any
|
||||||
|
stages {
|
||||||
|
stage("install") {
|
||||||
|
steps {
|
||||||
|
sh 'conda env update --file environment.yml'
|
||||||
|
sh 'echo "conda activate splitmsa" >> ~/.bashrc'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage("build") {
|
||||||
|
steps {
|
||||||
|
sh "python -m build"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage("publish") {
|
||||||
|
when {
|
||||||
|
branch '**/master'
|
||||||
|
}
|
||||||
|
steps {
|
||||||
|
withCredentials([usernamePassword(credentialsId: 'rs-git-package-registry-ydeng', passwordVariable: 'PASS', usernameVariable: 'USER')]) {
|
||||||
|
sh "python -m twine upload --repository-url https://git.reslate.systems/api/packages/${USER}/pypi -u ${USER} -p ${PASS} --non-interactive --disable-progress-bar --verbose dist/*"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1,4 +1,4 @@
|
|||||||
# MSA Splitter
|
# SplitMSA
|
||||||
|
|
||||||
Simple FASTA file splitter. Capable of batch trimming a large amount of sequences in the form of a MSA in a FASTA file.
|
Simple FASTA file splitter. Capable of batch trimming a large amount of sequences in the form of a MSA in a FASTA file.
|
||||||
|
|
||||||
|
@ -1,46 +0,0 @@
|
|||||||
name: /home/ydeng/msa-splitter/envs
|
|
||||||
channels:
|
|
||||||
- conda-forge
|
|
||||||
dependencies:
|
|
||||||
- _libgcc_mutex=0.1=conda_forge
|
|
||||||
- _openmp_mutex=4.5=2_gnu
|
|
||||||
- biopython=1.81=py311h2582759_0
|
|
||||||
- black=23.3.0=py311h38be061_0
|
|
||||||
- bzip2=1.0.8=h7f98852_4
|
|
||||||
- ca-certificates=2022.12.7=ha878542_0
|
|
||||||
- click=8.1.3=unix_pyhd8ed1ab_2
|
|
||||||
- ld_impl_linux-64=2.40=h41732ed_0
|
|
||||||
- libblas=3.9.0=16_linux64_openblas
|
|
||||||
- libcblas=3.9.0=16_linux64_openblas
|
|
||||||
- libexpat=2.5.0=hcb278e6_1
|
|
||||||
- libffi=3.4.2=h7f98852_5
|
|
||||||
- libgcc-ng=12.2.0=h65d4601_19
|
|
||||||
- libgfortran-ng=12.2.0=h69a702a_19
|
|
||||||
- libgfortran5=12.2.0=h337968e_19
|
|
||||||
- libgomp=12.2.0=h65d4601_19
|
|
||||||
- liblapack=3.9.0=16_linux64_openblas
|
|
||||||
- libnsl=2.0.0=h7f98852_0
|
|
||||||
- libopenblas=0.3.21=pthreads_h78a6416_3
|
|
||||||
- libsqlite=3.40.0=h753d276_0
|
|
||||||
- libstdcxx-ng=12.2.0=h46fd767_19
|
|
||||||
- libuuid=2.38.1=h0b41bf4_0
|
|
||||||
- libzlib=1.2.13=h166bdaf_4
|
|
||||||
- mypy_extensions=1.0.0=pyha770c72_0
|
|
||||||
- ncurses=6.3=h27087fc_1
|
|
||||||
- numpy=1.24.2=py311h8e6699e_0
|
|
||||||
- openssl=3.1.0=h0b41bf4_0
|
|
||||||
- packaging=23.0=pyhd8ed1ab_0
|
|
||||||
- pathspec=0.11.1=pyhd8ed1ab_0
|
|
||||||
- pip=23.0.1=pyhd8ed1ab_0
|
|
||||||
- platformdirs=3.2.0=pyhd8ed1ab_0
|
|
||||||
- python=3.11.1=h2755cc3_0_cpython
|
|
||||||
- python_abi=3.11=3_cp311
|
|
||||||
- readline=8.2=h8228510_1
|
|
||||||
- setuptools=67.6.1=pyhd8ed1ab_0
|
|
||||||
- tk=8.6.12=h27826a3_0
|
|
||||||
- typing-extensions=4.5.0=hd8ed1ab_0
|
|
||||||
- typing_extensions=4.5.0=pyha770c72_0
|
|
||||||
- tzdata=2023c=h71feb2d_0
|
|
||||||
- wheel=0.40.0=pyhd8ed1ab_0
|
|
||||||
- xz=5.2.6=h166bdaf_0
|
|
||||||
prefix: /home/ydeng/msa-splitter/envs
|
|
5
environment.yml
Normal file
5
environment.yml
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
name: splitmsa
|
||||||
|
channels:
|
||||||
|
- conda-forge
|
||||||
|
dependencies:
|
||||||
|
- biopython=1.81
|
3
pyproject.toml
Normal file
3
pyproject.toml
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
[build-system]
|
||||||
|
build-backend = "setuptools.build_meta"
|
||||||
|
requires = ["setuptools", "wheel"]
|
12
setup.cfg
Normal file
12
setup.cfg
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
[metadata]
|
||||||
|
name = splitmsa
|
||||||
|
version = 0.0.1
|
||||||
|
|
||||||
|
[options]
|
||||||
|
packages = splitmsa
|
||||||
|
install_requires =
|
||||||
|
Bio
|
||||||
|
|
||||||
|
[options.entry_points]
|
||||||
|
console_scripts =
|
||||||
|
splitmsa = splitmsa.splitmsa:main
|
@ -203,7 +203,7 @@ def trim(
|
|||||||
)
|
)
|
||||||
|
|
||||||
if perform_translation and not skip_translation:
|
if perform_translation and not skip_translation:
|
||||||
if '-' in nt_sequence:
|
if "-" in nt_sequence:
|
||||||
sequence_with_ambiguity = []
|
sequence_with_ambiguity = []
|
||||||
for codon_in_sequence in range(0, len(nt_sequence), 3):
|
for codon_in_sequence in range(0, len(nt_sequence), 3):
|
||||||
codon = nt_sequence[codon_in_sequence : codon_in_sequence + 3]
|
codon = nt_sequence[codon_in_sequence : codon_in_sequence + 3]
|
||||||
@ -247,72 +247,9 @@ def output_as_csv(gene: str, problems: list[list[str]], output_path: str):
|
|||||||
writer.writerows(problems)
|
writer.writerows(problems)
|
||||||
|
|
||||||
|
|
||||||
def main(args):
|
def main():
|
||||||
logging.basicConfig(level=args.log_level.upper())
|
|
||||||
|
|
||||||
msa_records = list(read_msa_file(args.input))
|
|
||||||
info(f"MSA records read complete. Found {len(msa_records)} records.")
|
|
||||||
genes = []
|
|
||||||
if args.gene_list:
|
|
||||||
genes = read_genes_from_csv(args.gene_list)
|
|
||||||
info(f"Gene list read from {args.gene_list} resulted in {len(genes)} " "genes.")
|
|
||||||
else:
|
|
||||||
if args.gene_name and args.start and args.end:
|
|
||||||
genes.append([args.gene_name, args.start, args.end])
|
|
||||||
info(
|
|
||||||
f"Extracting {args.gene_name} starting at {args.start} to "
|
|
||||||
f"{args.end}."
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
raise Exception(
|
|
||||||
"Need either a gene list by --gene-list or a start and end "
|
|
||||||
"via --start, and --end respectively."
|
|
||||||
)
|
|
||||||
for gene_name, start, end in genes:
|
|
||||||
info(f"Started on gene {gene_name} ({start} - {end})")
|
|
||||||
(
|
|
||||||
nt_sequence_records,
|
|
||||||
nt_no_stop_sequence_records,
|
|
||||||
aa_sequence_records,
|
|
||||||
aa_no_stop_sequence_records,
|
|
||||||
problems,
|
|
||||||
) = trim(
|
|
||||||
start,
|
|
||||||
end,
|
|
||||||
args.gen_cut_stop_codon,
|
|
||||||
args.do_translate,
|
|
||||||
msa_records,
|
|
||||||
correction_range=args.correction_range,
|
|
||||||
)
|
|
||||||
if len(problems) > 0:
|
|
||||||
warning(
|
|
||||||
f"There were {len(problems)} problems " f"during trimming {gene_name}!"
|
|
||||||
)
|
|
||||||
if args.catalogue_problems:
|
|
||||||
output_as_csv(
|
|
||||||
gene_name,
|
|
||||||
problems,
|
|
||||||
os.path.join(args.output_dir, f"{gene_name} - problems.csv"),
|
|
||||||
)
|
|
||||||
write_to_file(
|
|
||||||
args.output_dir,
|
|
||||||
gene_name,
|
|
||||||
start,
|
|
||||||
end,
|
|
||||||
args.full_suffix,
|
|
||||||
args.ns_suffix,
|
|
||||||
args.aa_suffix,
|
|
||||||
nt_sequence_records,
|
|
||||||
nt_no_stop_sequence_records,
|
|
||||||
aa_sequence_records,
|
|
||||||
aa_no_stop_sequence_records,
|
|
||||||
)
|
|
||||||
info(f"Completed gene {gene_name} ({start} - {end})")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
prog="msa_splitter",
|
prog="splitmsa",
|
||||||
description="""
|
description="""
|
||||||
The MSA splitter is a simple program that takes in two positions
|
The MSA splitter is a simple program that takes in two positions
|
||||||
and a MSA file and produces two separate FASTA files
|
and a MSA file and produces two separate FASTA files
|
||||||
@ -453,4 +390,70 @@ if __name__ == "__main__":
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
)
|
)
|
||||||
|
|
||||||
main(parser.parse_args())
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
logging.basicConfig(level=args.log_level.upper())
|
||||||
|
|
||||||
|
msa_records = list(read_msa_file(args.input))
|
||||||
|
info(f"MSA records read complete. Found {len(msa_records)} records.")
|
||||||
|
genes = []
|
||||||
|
if args.gene_list:
|
||||||
|
genes = read_genes_from_csv(args.gene_list)
|
||||||
|
info(f"Gene list read from {args.gene_list} resulted in {len(genes)} " "genes.")
|
||||||
|
else:
|
||||||
|
if args.gene_name and args.start and args.end:
|
||||||
|
genes.append([args.gene_name, args.start, args.end])
|
||||||
|
info(
|
||||||
|
f"Extracting {args.gene_name} starting at {args.start} to "
|
||||||
|
f"{args.end}."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise Exception(
|
||||||
|
"Need either a gene list by --gene-list or a start and end "
|
||||||
|
"via --start, and --end respectively."
|
||||||
|
)
|
||||||
|
for gene_name, start, end in genes:
|
||||||
|
info(f"Started on gene {gene_name} ({start} - {end})")
|
||||||
|
(
|
||||||
|
nt_sequence_records,
|
||||||
|
nt_no_stop_sequence_records,
|
||||||
|
aa_sequence_records,
|
||||||
|
aa_no_stop_sequence_records,
|
||||||
|
problems,
|
||||||
|
) = trim(
|
||||||
|
start,
|
||||||
|
end,
|
||||||
|
args.gen_cut_stop_codon,
|
||||||
|
args.do_translate,
|
||||||
|
msa_records,
|
||||||
|
correction_range=args.correction_range,
|
||||||
|
)
|
||||||
|
if len(problems) > 0:
|
||||||
|
warning(
|
||||||
|
f"There were {len(problems)} problems " f"during trimming {gene_name}!"
|
||||||
|
)
|
||||||
|
if args.catalogue_problems:
|
||||||
|
output_as_csv(
|
||||||
|
gene_name,
|
||||||
|
problems,
|
||||||
|
os.path.join(args.output_dir, f"{gene_name} - problems.csv"),
|
||||||
|
)
|
||||||
|
write_to_file(
|
||||||
|
args.output_dir,
|
||||||
|
gene_name,
|
||||||
|
start,
|
||||||
|
end,
|
||||||
|
args.full_suffix,
|
||||||
|
args.ns_suffix,
|
||||||
|
args.aa_suffix,
|
||||||
|
nt_sequence_records,
|
||||||
|
nt_no_stop_sequence_records,
|
||||||
|
aa_sequence_records,
|
||||||
|
aa_no_stop_sequence_records,
|
||||||
|
)
|
||||||
|
info(f"Completed gene {gene_name} ({start} - {end})")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
main()
|
Loading…
Reference in New Issue
Block a user