Added python project description files

Added 'Jenkinsfile' and 'environment.yml'
Removed bad parameter
2023-04-21 11:47:28 -05:00 · 2023-04-21 11:44:01 -05:00 · 2023-04-20 15:16:34 -05:00 · 2023-04-20 12:24:26 -05:00 · 2023-04-20 10:07:39 -05:00
12 changed files with 272 additions and 194 deletions
--- a/36
+++ b/36
@@ -0,0 +1,36 @@
 pipeline {
    agent any
    stages {
        stage("clean") {
            steps {
                sh 'rm -rf ./dist/*'
            }
        }
        stage("install") {
            steps {
                sh 'mamba env update --file environment.yml'
                sh 'echo "mamba activate bmlsa" >> ~/.bashrc'
            }
        }
        stage("build") {
            steps {
                sh "python -m build"
            }
        }
        stage("test") {
            steps {
                sh "pip install dist/*.whl"
            }
        }
        stage("publish") {
            when {
                branch '**/master'
            }
            steps {
                withCredentials([usernamePassword(credentialsId: 'rs-git-package-registry-ydeng', passwordVariable: 'PASS', usernameVariable: 'USER')]) {
                    sh "python -m twine upload --repository-url https://git.reslate.systems/api/packages/${USER}/pypi -u ${USER} -p ${PASS} --non-interactive --disable-progress-bar --verbose dist/*"
                }
            }
        }
    }
 }
--- a/protannot/init.py
+++ b/protannot/init.py
--- a/bmlsa/aligner.py
+++ b/bmlsa/aligner.py
@@ -0,0 +1,43 @@
 from Bio.Align import PairwiseAligner, substitution_matrices
 from exceptions import UnexpectedAlignmentResult
 from datatypes import AlignedSequence
 def protein_align_many_to_one_ssw(sequence: str, queries: dict[str, AlignedSequence]):
    annotation_pairs = {}
    aligner = PairwiseAligner()
    aligner.mode = "local"
    aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
    aligner.extend_gap_score = -1
    aligner.open_gap_score = -11
    for id, query in queries.items():
        try:
            alignments = aligner.align(sequence, query.sequence)
        except ValueError:
            continue
        if len(alignments) > 1:
            raise UnexpectedAlignmentResult(
                "More than one alignment resulted from a single query."
            )
        for alignment in alignments:
            score, query_aligned = (alignment.score, alignment.aligned[0][0])
            aligned_start, aligned_end = query_aligned
            annotation_pairs[id] = AlignedSequence(
                id,
                query.sequence,
                query.name,
                query.description,
                query.start,
                query.end,
                query.score,
            ), AlignedSequence(
                id,
                alignment.query,
                query.name,
                query.description,
                aligned_start,
                aligned_end,
                score,
            )
    return annotation_pairs
--- a/bmlsa/cli.py
+++ b/bmlsa/cli.py
@@ -0,0 +1,94 @@
 import os
 import argparse
 from Bio import SeqIO
 from aligner import protein_align_many_to_one_ssw
 from persistence import read_annotations_from_csv, save_alignments_to_csv
 def main():
    argparser = argparse.ArgumentParser("blmsa")
    argparser.add_argument(
        "annotations",
        type=str,
        help=(
            "Path to CSV containing the sequences to align as well as the "
            "annotations for the respective sequences"
        ),
        metavar="a",
    )
    argparser.add_argument(
        "sequence",
        type=str,
        help=(
            "Path to the sequence to annotate in FASTA format. "
            "If multiple sequences are present, annotations will be run on each"
        ),
        metavar="s",
    )
    argparser.add_argument(
        "output", type=str, help="Path to output location", metavar="o"
    )
    argparser.add_argument(
        "-I", "--id-header", type=str, help="The header for the ID of the annotation"
    )
    argparser.add_argument(
        "-N",
        "--name-header",
        type=str,
        help="The header for the name of the annotation",
        required=False,
    )
    argparser.add_argument(
        "-D",
        "--desc-header",
        type=str,
        help="The header for the description of the annotation",
        required=False,
    )
    argparser.add_argument(
        "-T",
        "--start-header",
        type=str,
        help="The header for the start of the annotation",
        required=False,
    )
    argparser.add_argument(
        "-E",
        "--end-header",
        type=str,
        help="The header for the end of the annotation",
        required=False,
    )
    argparser.add_argument(
        "-S",
        "--seq-header",
        type=str,
        help="The header for the sequence of the annotation",
    )
    args = argparser.parse_args()
    given_annotations = read_annotations_from_csv(
        args.annotations,
        args.id_header,
        args.name_header,
        args.desc_header,
        args.start_header,
        args.end_header,
        args.seq_header,
    )
    with open(args.sequence, "r") as sequence_fd:
        for sequence in SeqIO.parse(sequence_fd, "fasta"):
            aligned_annotations = protein_align_many_to_one_ssw(
                str(sequence.seq), given_annotations
            )
            save_alignments_to_csv(
                aligned_annotations,
                os.path.join(
                    args.output,
                    sequence.id.replace("|", "+").replace(".", "_") + ".csv",
                ),
            )
 if __name__ == "__main__":
    main()
--- a/protannot/Datatypes.py
+++ b/protannot/Datatypes.py
@@ -1,4 +1,4 @@
-class Annotation:
+class AlignedSequence:
    def __init__(
        self,
        id: str,
--- a/bmlsa/exceptions.py
+++ b/bmlsa/exceptions.py
@@ -0,0 +1,2 @@
 class UnexpectedAlignmentResult(Exception):
    pass
--- a/bmlsa/persistence.py
+++ b/bmlsa/persistence.py
@@ -0,0 +1,70 @@
 import csv
 from datatypes import AlignedSequence
 def read_annotations_from_csv(
    csv_path: str,
    id_header: str,
    name_header: str,
    desc_header: str,
    start_header: str,
    end_header: str,
    sequence_header: str,
 ):
    annotations = {}
    with open(csv_path, "r") as csv_fd:
        reader = csv.reader(csv_fd)
        id_ind = None
        name_ind = None
        desc_ind = None
        start_ind = None
        end_ind = None
        sequence_ind = None
        headers_parsed = False
        for row in reader:
            if not headers_parsed:
                id_ind = row.index(id_header)
                name_ind = row.index(name_header) if name_header else None
                desc_ind = row.index(desc_header) if desc_header else None
                start_ind = row.index(start_header) if start_header else None
                end_ind = row.index(end_header) if end_header else None
                sequence_ind = row.index(sequence_header)
                headers_parsed = True
                continue
            id = row[id_ind]
            name = row[name_ind] if name_header else None
            desc = row[desc_ind] if desc_header else None
            start = row[start_ind] if start_header else None
            end = row[end_ind] if end_header else None
            sequence = row[sequence_ind]
            annotations[id] = AlignedSequence(
                id,
                sequence,
                name,
                desc,
                int(start) if start else None,
                int(end) if end else None,
            )
    return annotations
 def save_alignments_to_csv(
    aligned_pairs: dict[str, tuple[AlignedSequence, AlignedSequence]], output_path: str
 ):
    with open(output_path, "w") as output_fd:
        writer = csv.writer(output_fd)
        header_wrote = False
        header_order = None
        for id, annotations in aligned_pairs.items():
            original, aligned = annotations
            original_vars = vars(original)
            aligned_vars = vars(aligned)
            if not header_wrote:
                header_order = list(original_vars.keys())
                header = ["original" + key for key in header_order]
                header.extend(["aligned" + key for key in header_order])
                writer.writerow(header)
                header_wrote = True
            row_data = [original_vars[header] for header in header_order]
            row_data.extend([aligned_vars[header] for header in header_order])
            writer.writerow(row_data)
--- a/environment.yml
+++ b/environment.yml
@@ -0,0 +1,8 @@
 name: bmlsa
 channels:
  - conda-forge
 dependencies:
  - biopython=1.81
  - build=0.7
  - pytest=7
  - twine=4
--- a/protannot/protannot.py
+++ b/protannot/protannot.py
@@ -1,193 +0,0 @@
 import csv
 import os
 from skbio.alignment import StripedSmithWaterman
 import argparse
 from Bio import SeqIO
 from Datatypes import Annotation
 def read_annotations_from_csv(
    csv_path: str,
    id_header: str,
    name_header: str,
    desc_header: str,
    start_header: str,
    end_header: str,
    sequence_header: str,
 ):
    annotations = {}
    with open(csv_path, "r") as csv_fd:
        reader = csv.reader(csv_fd)
        id_ind = None
        name_ind = None
        desc_ind = None
        start_ind = None
        end_ind = None
        sequence_ind = None
        headers_parsed = False
        for row in reader:
            if not headers_parsed:
                id_ind = row.index(id_header)
                name_ind = row.index(name_header) if name_header else None
                desc_ind = row.index(desc_header) if desc_header else None
                start_ind = row.index(start_header) if start_header else None
                end_ind = row.index(end_header) if end_header else None
                sequence_ind = row.index(sequence_header)
                headers_parsed = True
                continue
            id = row[id_ind]
            name = row[name_ind] if name_header else None
            desc = row[desc_ind] if desc_header else None
            start = row[start_ind] if start_header else None
            end = row[end_ind] if end_header else None
            sequence = row[sequence_ind]
            annotations[id] = Annotation(
                id,
                sequence,
                name,
                desc,
                int(start) if start else None,
                int(end) if end else None,
            )
    return annotations
 def generate_annotated_positions(sequence: str, annotations: dict[str, Annotation]):
    annotation_pairs = {}
    align = StripedSmithWaterman(sequence)
    for id, annot in annotations.items():
        alignment = align(annot.sequence)
        score, aligned_start, aligned_end = (
            alignment.optimal_alignment_score,
            alignment.query_begin,
            alignment.query_end,
        )
        annotation_pairs[id] = Annotation(
            id,
            annot.sequence,
            annot.name,
            annot.description,
            annot.start,
            annot.end,
            annot.score,
        ), Annotation(
            id,
            alignment.aligned_target_sequence,
            annot.name,
            annot.description,
            aligned_start,
            aligned_end,
            score,
        )
    return annotation_pairs
 def save_alignments_to_csv(
    aligned_pairs: dict[str, tuple[Annotation, Annotation]], output_path: str
 ):
    with open(output_path, "w") as output_fd:
        writer = csv.writer(output_fd)
        header_wrote = False
        header_order = None
        for id, annotations in aligned_pairs.items():
            original, aligned = annotations
            original_vars = vars(original)
            aligned_vars = vars(aligned)
            if not header_wrote:
                header_order = list(original_vars.keys())
                header = ["original" + key for key in header_order]
                header.extend(["aligned" + key for key in header_order])
                writer.writerow(header)
                header_wrote = True
            row_data = [original_vars[header] for header in header_order]
            row_data.extend([aligned_vars[header] for header in header_order])
            writer.writerow(row_data)
 def main():
    argparser = argparse.ArgumentParser("protannot")
    argparser.add_argument(
        "annotations",
        type=str,
        help=(
            "Path to CSV containing the sequences to align as well as the "
            "annotations for the respective sequences"
        ),
        metavar="a",
    )
    argparser.add_argument(
        "sequence",
        type=str,
        help=(
            "Path to the sequence to annotate in FASTA format. "
            "If multiple sequences are present, annotations will be run on each"
        ),
        metavar="s",
    )
    argparser.add_argument(
        "output", type=str, help="Path to output location", metavar="o"
    )
    argparser.add_argument(
        "-I", "--id-header", type=str, help="The header for the ID of the annotation"
    )
    argparser.add_argument(
        "-N",
        "--name-header",
        type=str,
        help="The header for the name of the annotation",
        required=False,
    )
    argparser.add_argument(
        "-D",
        "--desc-header",
        type=str,
        help="The header for the description of the annotation",
        required=False,
    )
    argparser.add_argument(
        "-T",
        "--start-header",
        type=str,
        help="The header for the start of the annotation",
        required=False,
    )
    argparser.add_argument(
        "-E",
        "--end-header",
        type=str,
        help="The header for the end of the annotation",
        required=False,
    )
    argparser.add_argument(
        "-S",
        "--seq-header",
        type=str,
        help="The header for the sequence of the annotation",
    )
    args = argparser.parse_args()
    given_annotations = read_annotations_from_csv(
        args.annotations,
        args.id_header,
        args.name_header,
        args.desc_header,
        args.start_header,
        args.end_header,
        args.seq_header,
    )
    with open(args.sequence, "r") as sequence_fd:
        for sequence in SeqIO.parse(sequence_fd, "fasta"):
            aligned_annotations = generate_annotated_positions(
                str(sequence.seq), given_annotations
            )
            save_alignments_to_csv(
                aligned_annotations,
                os.path.join(
                    args.output,
                    sequence.id.replace("|", "+").replace(".", "_") + ".csv",
                ),
            )
 if __name__ == "__main__":
    main()
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,3 @@
 [build-system]
 build-backend = "setuptools.build_meta"
 requires = ["setuptools", "wheel"]
--- a/setup.cfg
+++ b/setup.cfg
@@ -0,0 +1,12 @@
 [metadata]
 name = bmlsa
 version = 0.0.1
 [options]
 packages = bmlsa
 install_requires =
    biopython ==1.81
 [options.entry_points]
 console_scripts =
    bmlsa = bmlsa.cli:main
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,3 @@
 from setuptools import setup
 setup()
Author	SHA1	Message	Date
Harrison	2a80c2aea2	Added python project description files All checks were successful ydeng/bmlsa/pipeline/head This commit looks good Details	2023-04-21 11:47:28 -05:00
Harrison	6b3c9c312e	Added 'Jenkinsfile' and 'environment.yml' Some checks failed ydeng/bmlsa/pipeline/head There was a failure building this commit Details	2023-04-21 11:44:01 -05:00
Harrison	7f9f9405c3	Removed bad parameter	2023-04-20 15:16:34 -05:00
Harrison	04f730cacb	Changed alignment library	2023-04-20 12:24:26 -05:00
Harrison	f22070e8c3	Renamed project	2023-04-20 10:07:39 -05:00
		`@@ -0,0 +1,2 @@`
							`class UnexpectedAlignmentResult(Exception):`
							`pass`