Renamed project to "SplitMSA" and added pipeline file

2023-04-11 12:33:52 -05:00 · 2023-04-11 12:33:52 -05:00 · eccb08153e
commit eccb08153e
parent 419adcd098
8 changed files with 120 additions and 114 deletions
--- a/26
+++ b/26
@ -0,0 +1,26 @@
 pipeline {
    agent any
    stages {
        stage("install") {
            steps {
                sh 'conda env update --file environment.yml'
                sh 'echo "conda activate splitmsa" >> ~/.bashrc'
            }
        }
        stage("build") {
            steps {
                sh "python -m build"
            }
        }
        stage("publish") {
            when {
                branch '**/master'
            }
            steps {
                withCredentials([usernamePassword(credentialsId: 'rs-git-package-registry-ydeng', passwordVariable: 'PASS', usernameVariable: 'USER')]) {
                    sh "python -m twine upload --repository-url https://git.reslate.systems/api/packages/${USER}/pypi -u ${USER} -p ${PASS} --non-interactive --disable-progress-bar --verbose dist/*"
                }
            }
        }
    }
 }
--- a/README.md
+++ b/README.md
@ -1,4 +1,4 @@
-# MSA Splitter
+# SplitMSA
 Simple FASTA file splitter. Capable of batch trimming a large amount of sequences in the form of a MSA in a FASTA file.
--- a/environment.yaml
+++ b/environment.yaml
@ -1,46 +0,0 @@
 name: /home/ydeng/msa-splitter/envs
 channels:
  - conda-forge
 dependencies:
  - _libgcc_mutex=0.1=conda_forge
  - _openmp_mutex=4.5=2_gnu
  - biopython=1.81=py311h2582759_0
  - black=23.3.0=py311h38be061_0
  - bzip2=1.0.8=h7f98852_4
  - ca-certificates=2022.12.7=ha878542_0
  - click=8.1.3=unix_pyhd8ed1ab_2
  - ld_impl_linux-64=2.40=h41732ed_0
  - libblas=3.9.0=16_linux64_openblas
  - libcblas=3.9.0=16_linux64_openblas
  - libexpat=2.5.0=hcb278e6_1
  - libffi=3.4.2=h7f98852_5
  - libgcc-ng=12.2.0=h65d4601_19
  - libgfortran-ng=12.2.0=h69a702a_19
  - libgfortran5=12.2.0=h337968e_19
  - libgomp=12.2.0=h65d4601_19
  - liblapack=3.9.0=16_linux64_openblas
  - libnsl=2.0.0=h7f98852_0
  - libopenblas=0.3.21=pthreads_h78a6416_3
  - libsqlite=3.40.0=h753d276_0
  - libstdcxx-ng=12.2.0=h46fd767_19
  - libuuid=2.38.1=h0b41bf4_0
  - libzlib=1.2.13=h166bdaf_4
  - mypy_extensions=1.0.0=pyha770c72_0
  - ncurses=6.3=h27087fc_1
  - numpy=1.24.2=py311h8e6699e_0
  - openssl=3.1.0=h0b41bf4_0
  - packaging=23.0=pyhd8ed1ab_0
  - pathspec=0.11.1=pyhd8ed1ab_0
  - pip=23.0.1=pyhd8ed1ab_0
  - platformdirs=3.2.0=pyhd8ed1ab_0
  - python=3.11.1=h2755cc3_0_cpython
  - python_abi=3.11=3_cp311
  - readline=8.2=h8228510_1
  - setuptools=67.6.1=pyhd8ed1ab_0
  - tk=8.6.12=h27826a3_0
  - typing-extensions=4.5.0=hd8ed1ab_0
  - typing_extensions=4.5.0=pyha770c72_0
  - tzdata=2023c=h71feb2d_0
  - wheel=0.40.0=pyhd8ed1ab_0
  - xz=5.2.6=h166bdaf_0
 prefix: /home/ydeng/msa-splitter/envs
--- a/environment.yml
+++ b/environment.yml
@ -0,0 +1,5 @@
 name: splitmsa
 channels:
  - conda-forge
 dependencies:
  - biopython=1.81
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,3 @@
 [build-system]
 build-backend = "setuptools.build_meta"
 requires = ["setuptools", "wheel"]
--- a/setup.cfg
+++ b/setup.cfg
@ -0,0 +1,12 @@
 [metadata]
 name = splitmsa
 version = 0.0.1
 [options]
 packages = splitmsa
 install_requires =
    Bio
 [options.entry_points]
 console_scripts =
    splitmsa = splitmsa.splitmsa:main
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,3 @@
 from setuptools import setup
 setup()
--- a/splitmsa/splitmsa.py
+++ b/splitmsa/splitmsa.py
@ -203,7 +203,7 @@ def trim(
            )
        if perform_translation and not skip_translation:
-            if '-' in nt_sequence:
+            if "-" in nt_sequence:
                sequence_with_ambiguity = []
                for codon_in_sequence in range(0, len(nt_sequence), 3):
                    codon = nt_sequence[codon_in_sequence : codon_in_sequence + 3]
@ -247,72 +247,9 @@ def output_as_csv(gene: str, problems: list[list[str]], output_path: str):
        writer.writerows(problems)
-def main(args):
+def main():
    logging.basicConfig(level=args.log_level.upper())
    msa_records = list(read_msa_file(args.input))
    info(f"MSA records read complete. Found {len(msa_records)} records.")
    genes = []
    if args.gene_list:
        genes = read_genes_from_csv(args.gene_list)
        info(f"Gene list read from {args.gene_list} resulted in {len(genes)} " "genes.")
    else:
        if args.gene_name and args.start and args.end:
            genes.append([args.gene_name, args.start, args.end])
            info(
                f"Extracting {args.gene_name} starting at {args.start} to "
                f"{args.end}."
            )
        else:
            raise Exception(
                "Need either a gene list by --gene-list or a start and end "
                "via --start, and --end respectively."
            )
    for gene_name, start, end in genes:
        info(f"Started on gene {gene_name} ({start} - {end})")
        (
            nt_sequence_records,
            nt_no_stop_sequence_records,
            aa_sequence_records,
            aa_no_stop_sequence_records,
            problems,
        ) = trim(
            start,
            end,
            args.gen_cut_stop_codon,
            args.do_translate,
            msa_records,
            correction_range=args.correction_range,
        )
        if len(problems) > 0:
            warning(
                f"There were {len(problems)} problems " f"during trimming {gene_name}!"
            )
        if args.catalogue_problems:
            output_as_csv(
                gene_name,
                problems,
                os.path.join(args.output_dir, f"{gene_name} - problems.csv"),
            )
        write_to_file(
            args.output_dir,
            gene_name,
            start,
            end,
            args.full_suffix,
            args.ns_suffix,
            args.aa_suffix,
            nt_sequence_records,
            nt_no_stop_sequence_records,
            aa_sequence_records,
            aa_no_stop_sequence_records,
        )
        info(f"Completed gene {gene_name} ({start} - {end})")
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(
-        prog="msa_splitter",
+        prog="splitmsa",
        description="""
            The MSA splitter is a simple program that takes in two positions
            and a MSA file and produces two separate FASTA files
@ -453,4 +390,70 @@ if __name__ == "__main__":
        action="store_true",
    )
-    main(parser.parse_args())
+    args = parser.parse_args()
    logging.basicConfig(level=args.log_level.upper())
    msa_records = list(read_msa_file(args.input))
    info(f"MSA records read complete. Found {len(msa_records)} records.")
    genes = []
    if args.gene_list:
        genes = read_genes_from_csv(args.gene_list)
        info(f"Gene list read from {args.gene_list} resulted in {len(genes)} " "genes.")
    else:
        if args.gene_name and args.start and args.end:
            genes.append([args.gene_name, args.start, args.end])
            info(
                f"Extracting {args.gene_name} starting at {args.start} to "
                f"{args.end}."
            )
        else:
            raise Exception(
                "Need either a gene list by --gene-list or a start and end "
                "via --start, and --end respectively."
            )
    for gene_name, start, end in genes:
        info(f"Started on gene {gene_name} ({start} - {end})")
        (
            nt_sequence_records,
            nt_no_stop_sequence_records,
            aa_sequence_records,
            aa_no_stop_sequence_records,
            problems,
        ) = trim(
            start,
            end,
            args.gen_cut_stop_codon,
            args.do_translate,
            msa_records,
            correction_range=args.correction_range,
        )
        if len(problems) > 0:
            warning(
                f"There were {len(problems)} problems " f"during trimming {gene_name}!"
            )
        if args.catalogue_problems:
            output_as_csv(
                gene_name,
                problems,
                os.path.join(args.output_dir, f"{gene_name} - problems.csv"),
            )
        write_to_file(
            args.output_dir,
            gene_name,
            start,
            end,
            args.full_suffix,
            args.ns_suffix,
            args.aa_suffix,
            nt_sequence_records,
            nt_no_stop_sequence_records,
            aa_sequence_records,
            aa_no_stop_sequence_records,
        )
        info(f"Completed gene {gene_name} ({start} - {end})")
 if __name__ == "__main__":
    main()
`@ -1,4 +1,4 @@`
	`# MSA Splitter`	`# SplitMSA`

	`Simple FASTA file splitter. Capable of batch trimming a large amount of sequences in the form of a MSA in a FASTA file.`	`Simple FASTA file splitter. Capable of batch trimming a large amount of sequences in the form of a MSA in a FASTA file.`