Compare commits

...

23 Commits

Author SHA1 Message Date
5e59eb4302 Updated publication credentials
All checks were successful
bmlsa/pipeline/head This commit looks good
2024-11-14 20:19:48 +00:00
884787f31d Fixed package name
Some checks failed
bmlsa/pipeline/head There was a failure building this commit
2024-11-14 20:01:57 +00:00
9d98d5d8d1 Updated CI to new serverside config and added devcontainer config
Some checks failed
bmlsa/pipeline/head There was a failure building this commit
2024-11-14 19:57:55 +00:00
4d2c491733 Merge branch 'develop'
All checks were successful
ydeng/bmlsa/pipeline/head This commit looks good
2023-05-03 08:21:42 -05:00
842458f20c Updated 'Jenkinsfile' to no longer activate environment
All checks were successful
ydeng/bmlsa/pipeline/head This commit looks good
2023-05-03 08:17:34 -05:00
40c8afe68c Updated 'Jenkinsfile' to conform to new build container usage
Some checks failed
ydeng/bmlsa/pipeline/head There was a failure building this commit
2023-05-03 08:13:18 -05:00
a5b7d2773e Pipeline no longer fails build if testing step fails
All checks were successful
ydeng/bmlsa/pipeline/head This commit looks good
2023-04-28 21:47:18 +00:00
1ae59d0a8f Updated system path insertion in 'conf.py'
All checks were successful
ydeng/bmlsa/pipeline/head This commit looks good
2023-04-28 15:54:55 -05:00
d6edc0e317 Changing documentation generation structure
All checks were successful
ydeng/bmlsa/pipeline/head This commit looks good
2023-04-28 15:47:27 -05:00
a8b820523a Merge branch 'develop'
All checks were successful
ydeng/bmlsa/pipeline/head This commit looks good
2023-04-28 14:29:46 -05:00
cf104c859d Added some more help to 'README.md'
All checks were successful
ydeng/bmlsa/pipeline/head This commit looks good
2023-04-28 14:29:34 -05:00
ebb27556b5 Merge branch 'develop'
All checks were successful
ydeng/bmlsa/pipeline/head This commit looks good
2023-04-28 14:11:37 -05:00
7bfe49445a Moved steps and removed 'clean' stage
All checks were successful
ydeng/bmlsa/pipeline/head This commit looks good
2023-04-28 14:11:24 -05:00
f0da142377 Updated documentation publishing title and 'README.md' to link to CI
All checks were successful
ydeng/bmlsa/pipeline/head This commit looks good
2023-04-28 14:10:15 -05:00
4383e36c1f Removed space in doc title
All checks were successful
ydeng/bmlsa/pipeline/head This commit looks good
2023-04-28 14:01:10 -05:00
cbf32d253b Added docs cleaning step to pipeline
All checks were successful
ydeng/bmlsa/pipeline/head This commit looks good
2023-04-28 13:56:10 -05:00
adf7473587 Fixed some doc typos
All checks were successful
ydeng/bmlsa/pipeline/head This commit looks good
2023-04-28 13:46:58 -05:00
e942b65ebd Added '--force' to 'sphinx-apidoc' step
All checks were successful
ydeng/bmlsa/pipeline/head This commit looks good
2023-04-28 13:42:38 -05:00
61149b88b7 Added sphinx to build environment
All checks were successful
ydeng/bmlsa/pipeline/head This commit looks good
2023-04-28 13:32:50 -05:00
47cdd8bc28 Added basic automatic documentation generation
Some checks reported errors
ydeng/bmlsa/pipeline/head Something is wrong with the build of this commit
2023-04-28 13:28:12 -05:00
7e3f43434e Added documentation
Some checks failed
ydeng/bmlsa/pipeline/head There was a failure building this commit
2023-04-28 12:44:59 -05:00
d75a83585c Merge branch 'master' of https://git.reslate.systems/ydeng/bmlsa and fingerprint archives
All checks were successful
ydeng/bmlsa/pipeline/head This commit looks good
2023-04-28 08:25:38 -05:00
b9f4c74168 Pipeline now tests the installed command
All checks were successful
ydeng/bmlsa/pipeline/head This commit looks good
2023-04-22 16:20:12 -05:00
17 changed files with 280 additions and 56 deletions

View File

@@ -0,0 +1,22 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/python
{
"name": "Python 3",
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
"image": "mcr.microsoft.com/devcontainers/python:1-3.12-bullseye"
// Features to add to the dev container. More info: https://containers.dev/features.
// "features": {},
// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],
// Use 'postCreateCommand' to run commands after the container is created.
// "postCreateCommand": "pip3 install --user -r requirements.txt",
// Configure tool-specific properties.
// "customizations": {},
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "root"
}

2
.gitignore vendored
View File

@@ -213,3 +213,5 @@ pyrightconfig.json
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
output
!docs/source/
docs/source/autodoc

View File

@@ -7,5 +7,6 @@
"FASTA"
],
"python.testing.pytestEnabled": true,
"python.analysis.inlayHints.pytestParameters": true
"python.analysis.inlayHints.pytestParameters": true,
"autoDocstring.docstringFormat": "sphinx"
}

30
Jenkinsfile vendored
View File

@@ -1,33 +1,41 @@
pipeline {
agent any
stages {
stage("clean") {
steps {
sh 'rm -rf ./dist/*'
}
agent {
kubernetes {
cloud 'rsys-devel'
defaultContainer 'pip'
inheritFrom 'pip'
}
}
stages {
stage("install") {
steps {
sh 'mamba env update --file environment.yml || mamba env create --force --file environment.yml'
sh 'echo "mamba activate bmlsa" >> ~/.bashrc'
sh 'pip install -r requirements.txt'
}
}
stage("unit tests") {
steps {
sh "python -m pytest --junitxml=test_results.xml"
sh returnStatus: true, script: "python -m pytest --junitxml=test_results.xml"
xunit checksName: '', tools: [JUnit(excludesPattern: '', pattern: 'test_results.xml', stopProcessingIfError: true)]
}
}
stage("build") {
steps {
sh 'rm -rf ./dist/*'
sh 'rm -rf ./docs/build/*'
sh "python -m build"
container('sphinx') {
sh 'sphinx-apidoc -o docs/source/autodoc/bmlsa src/bmlsa --force'
sh 'sphinx-build -M html ./docs/source ./docs/build'
publishHTML([allowMissing: false, alwaysLinkToLastBuild: false, keepAll: false, reportDir: 'docs/build/html', reportFiles: 'index.html', reportName: 'Documentation', reportTitles: ''])
}
}
}
stage("test installation") {
steps {
sh "pip install dist/*.whl --force-reinstall"
sh "bmlsa -h"
sh "bmlsa -I id -S sequence -B BLASTp tests/resources/SARS_CoV-2_genes.csv tests/resources/NC_045512_coding.fasta /output/"
sh "bmlsa -I id -S sequence -B BLASTp tests/resources/SARS_CoV-2_genes.csv tests/resources/NC_045512_coding.fasta ./output/"
}
}
stage("archive") {
@@ -40,7 +48,7 @@ pipeline {
branch '**/master'
}
steps {
withCredentials([usernamePassword(credentialsId: 'rs-git-package-registry-ydeng', passwordVariable: 'PASS', usernameVariable: 'USER')]) {
withCredentials([usernamePassword(credentialsId: '4d6f64be-d26d-4f95-8de3-b6a9b0beb311', passwordVariable: 'PASS', usernameVariable: 'USER')]) {
sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/${USER}/pypi -u ${USER} -p ${PASS} --non-interactive --disable-progress-bar --verbose dist/*'
}
}

View File

@@ -1,6 +1,8 @@
[![Build Status](https://ci.reslate.systems/job/ydeng/job/bmlsa/job/master/badge/icon)](https://ci.reslate.systems/job/ydeng/job/bmlsa/job/master/)
# BMLSA
A Basic multi local sequence alignment tool using the Biopython implementation of the Smith-Waterman alignment algorithm.
A Basic multi local sequence alignment tool using the Biopython implementation of the Smith-Waterman alignment algorithm. May be used as a python library, or as a full command line interface.
## Features
@@ -11,3 +13,21 @@ A Basic multi local sequence alignment tool using the Biopython implementation o
- Automatically align with all sequences in reference FASTA
- All produced output is human readable!
## Install via `pip`
Just run `pip install --index-url https://git.reslate.systems/api/packages/ydeng/pypi/simple/ bmlsa` in a console that is capable of running `pip`!
## CLI Demo from Git Repo
Resources have been provided in this package for CI purposes, but you may use them to try out the program yourself!
1. Install the program
2. clone this repository
3. Change into the cloned repository directory
4. Run `bmlsa -I id -S sequence -B BLASTp tests/resources/SARS_CoV-2_genes.csv tests/resources/NC_045512_coding.fasta ./output/`
The results will show up in a `output` folder (which will be generated if it doesn't exist). Use `bmlsa -h` to see what each part of the above command does!
## More Information
For all live downloadable artifacts, build statuses, unit test results, and documentation, check out the continuous integration page for the [master branch](https://ci.reslate.systems/job/ydeng/job/bmlsa/job/master/) ([development branch](https://ci.reslate.systems/job/ydeng/job/bmlsa/job/develop/)).

20
docs/Makefile Normal file
View File

@@ -0,0 +1,20 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = source
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

35
docs/make.bat Normal file
View File

@@ -0,0 +1,35 @@
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)
if "%1" == "" goto help
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd

33
docs/source/conf.py Normal file
View File

@@ -0,0 +1,33 @@
import os
import sys
# Configure system path
sys.path.insert(0, os.path.abspath("../../src/"))
# Configuration file for the Sphinx documentation builder.
#
# For the full list of built-in configuration values, see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
project = "BMLSA"
copyright = "2023, Harrison"
author = "Harrison"
# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
extensions = ["sphinx.ext.autodoc"]
templates_path = ["_templates"]
exclude_patterns = []
# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
html_theme = "alabaster"
html_static_path = ["_static"]

22
docs/source/index.rst Normal file
View File

@@ -0,0 +1,22 @@
.. BMLSA documentation master file, created by
sphinx-quickstart on Fri Apr 28 13:04:16 2023.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to BMLSA's documentation!
=================================
.. toctree::
:glob:
:maxdepth: 2
:caption: Contents:
autodoc/**/modules
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

View File

@@ -7,4 +7,6 @@ dependencies:
- hypothesis=6.56
- twine=4
- python-build=0.10.0
- setuptools
- setuptools=67.6
- sphinx=6.2
prefix: ./env

6
requirements.txt Normal file
View File

@@ -0,0 +1,6 @@
biopython==1.81
pytest
hypothesis
twine
build
setuptools

View File

@@ -1,20 +1,36 @@
import logging
from typing import Iterable
from typing import Generator, Iterable
from Bio.Align import PairwiseAligner, substitution_matrices
from bmlsa.datatypes import AlignedSequence
from bmlsa.exceptions import UnexpectedAlignmentResult
from bmlsa.datatypes import QuerySequence
logger = logging.getLogger(__name__)
def align_many_to_one_ssw(
reference_sequence: str,
queries: Iterable[AlignedSequence],
queries: Iterable[QuerySequence],
extend_gap_score: int,
open_gap_score: int,
alignment_mode: str,
substitution_matrix: str = "BLOSUM62",
):
) -> Generator[tuple[QuerySequence, QuerySequence], None, None]:
"""Aligns :obj:`bmlsa.datatypes.QuerySequence` objects to a given reference sequence
:param reference_sequence: The reference sequence to align to
:type reference_sequence: str
:param queries: A iterable sequence of :obj:`bmlsa.datatypes.QuerySequence`
:type queries: Iterable[QuerySequence]
:param extend_gap_score: The gap score to use for alignment. Typically negative.
:type extend_gap_score: int
:param open_gap_score: The open gap score to use for alignment. Typically negative.
:type open_gap_score: int
:param alignment_mode: The alignment mode to use. Either "local" or "global".
:type alignment_mode: str
:param substitution_matrix: The name of the substitution matrix available in :mod:`Bio.Align`, defaults to "BLOSUM62"
:type substitution_matrix: str, optional
:yield: Pairs of :obj:`bmlsa.datatypes.QuerySequence` objects where the first is the original, and the second is the aligned version
:rtype: a generator of :obj:`bmlsa.datatypes.QuerySequence` objects
"""
# TODO Consider using the built in "scoring" parameter
aligner = PairwiseAligner()
aligner.substitution_matrix = substitution_matrices.load(substitution_matrix)
@@ -30,14 +46,10 @@ def align_many_to_one_ssw(
)
continue
# TODO Implement comparison with input positions to choose best
if len(alignments) > 1:
raise UnexpectedAlignmentResult(
"More than one alignment resulted from a single query."
)
for alignment in alignments:
score, query_aligned = (alignment.score, alignment.aligned[0][0])
aligned_start, aligned_end = query_aligned
yield AlignedSequence(
yield QuerySequence(
query.id,
query.sequence,
query.name,
@@ -45,7 +57,7 @@ def align_many_to_one_ssw(
query.start,
query.end,
query.score,
), AlignedSequence(
), QuerySequence(
query.id,
alignment.query,
query.name,

View File

@@ -4,7 +4,7 @@ from Bio import SeqIO
import logging
from bmlsa.aligner import align_many_to_one_ssw
from bmlsa.io import read_annotations_from_csv, save_alignments_to_csv
from bmlsa.io import queries_from_csv, save_alignments_to_csv
logger = logging.getLogger(__name__)
@@ -29,8 +29,8 @@ def run(args):
"more information."
)
exit(3)
queries = read_annotations_from_csv(
args.annotations,
queries = queries_from_csv(
args.queries,
args.id_header,
args.seq_header,
args.name_header,
@@ -59,11 +59,11 @@ def run(args):
os.makedirs(args.output, exist_ok=True)
with open(args.sequence, "r") as sequence_fd:
for sequence in SeqIO.parse(sequence_fd, "fasta"):
aligned_annotations = align_many_to_one_ssw(
aligned_queries = align_many_to_one_ssw(
str(sequence.seq), queries, **scoring_parameter
)
save_alignments_to_csv(
aligned_annotations,
aligned_queries,
os.path.join(
args.output,
sequence.id.replace("|", "+").replace(".", "_") + ".csv",
@@ -74,11 +74,11 @@ def run(args):
def main():
arg_parser = argparse.ArgumentParser("bmlsa")
arg_parser.add_argument(
"annotations",
"queries",
type=str,
help=(
"Path to CSV containing the sequences to align as well as the "
"annotations for the respective sequences."
"queries for the respective sequences."
),
metavar="a",
)

View File

@@ -1,4 +1,6 @@
class AlignedSequence:
class QuerySequence:
"""Represents a sequence that may be aligned."""
def __init__(
self,
id: str,
@@ -9,6 +11,23 @@ class AlignedSequence:
end: int = None,
score: int = None,
) -> None:
"""Instantiates a :obj:bmlsa.datatypes.QuerySequence object
:param id: The id of the query sequence
:type id: str
:param sequence: The sequence itself
:type sequence: str
:param name: The name of the sequence, defaults to None
:type name: str, optional
:param description: The description of the sequence, defaults to None
:type description: str, optional
:param start: The start of the sequence, defaults to None
:type start: int, optional
:param end: The end of the sequence, defaults to None
:type end: int, optional
:param score: The alignment score of the sequence, defaults to None
:type score: int, optional
"""
self._description = description
self._start = start
self._end = end

View File

@@ -1,10 +1,10 @@
import csv
from typing import Iterable
from typing import Generator, Iterable
from bmlsa.datatypes import AlignedSequence
from bmlsa.datatypes import QuerySequence
def read_annotations_from_csv(
def queries_from_csv(
csv_path: str,
id_header: str,
sequence_header: str,
@@ -12,7 +12,26 @@ def read_annotations_from_csv(
desc_header: str = None,
start_header: str = None,
end_header: str = None,
):
) -> Generator[QuerySequence, None, None]:
"""Generates and :obj:`bmlsa.datatypes.QuerySequence` instances from a CSV file.
:param csv_path: Path to CSV to use
:type csv_path: str
:param id_header: The column title for the unique identifier for each query sequence
:type id_header: str
:param sequence_header: The column title for the sequences themselves
:type sequence_header: str
:param name_header: The column title for the name of the sequence, defaults to None
:type name_header: str, optional
:param desc_header: The column title for the description of the sequence, defaults to None
:type desc_header: str, optional
:param start_header: The column title for the start position of the sequence, defaults to None
:type start_header: str, optional
:param end_header: The column title for the end position of the sequence, defaults to None
:type end_header: str, optional
:yield: One :obj:`bmlsa.datatypes.QuerySequence` for each row
:rtype: A generator that yields objects of :class:`bmlsa.datatypes.QuerySequence`
"""
with open(csv_path, "r") as csv_fd:
reader = csv.reader(csv_fd)
id_ind = None
@@ -38,7 +57,7 @@ def read_annotations_from_csv(
desc = row[desc_ind] if desc_header else None
start = row[start_ind] if start_header else None
end = row[end_ind] if end_header else None
yield AlignedSequence(
yield QuerySequence(
id,
sequence,
name,
@@ -49,8 +68,15 @@ def read_annotations_from_csv(
def save_alignments_to_csv(
aligned_pairs: Iterable[tuple[AlignedSequence, AlignedSequence]], output_path: str
):
aligned_pairs: Iterable[tuple[QuerySequence, QuerySequence]], output_path: str
) -> None:
"""Saves alignments to a CSV.
:param aligned_pairs: An iterable of the original sequence and aligned sequences
:type aligned_pairs: Iterable[tuple[AlignedSequence, AlignedSequence]]
:param output_path: A path to the output directory
:type output_path: str
"""
with open(output_path, "w") as output_fd:
writer = csv.writer(output_fd)
header_wrote = False

View File

@@ -2,7 +2,7 @@ import pytest
from Bio import SeqIO
from bmlsa.aligner import align_many_to_one_ssw
from bmlsa.cli import DEFAULT_ALIGNMENT_PARAMETERS
from bmlsa.datatypes import AlignedSequence
from bmlsa.datatypes import QuerySequence
from collections.abc import Iterable
@@ -16,7 +16,7 @@ def reference_sequence():
@pytest.fixture
def queries():
return [
AlignedSequence(
QuerySequence(
"ORF10",
"ATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAAT"
"GAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAG",
@@ -38,8 +38,8 @@ def test_align_many_to_one_returns_correct_data_structure(reference_sequence, qu
reference_sequence, queries, **DEFAULT_ALIGNMENT_PARAMETERS["BLASTp"]
)
for original, aligned_seq in results:
assert isinstance(original, AlignedSequence)
assert isinstance(aligned_seq, AlignedSequence)
assert isinstance(original, QuerySequence)
assert isinstance(aligned_seq, QuerySequence)
def test_align_many_to_one_returns_correct_data(reference_sequence, queries):

View File

@@ -1,21 +1,17 @@
from csv import reader
from os import path
from bmlsa.datatypes import AlignedSequence
from bmlsa.io import read_annotations_from_csv, save_alignments_to_csv
from bmlsa.datatypes import QuerySequence
from bmlsa.io import queries_from_csv, save_alignments_to_csv
from collections.abc import Iterable
def test_read_annotations_from_csv_has_data():
results = read_annotations_from_csv(
"tests/resources/SARS_CoV-2_genes.csv", "id", "sequence"
)
def test_queries_from_csv_has_data():
results = queries_from_csv("tests/resources/SARS_CoV-2_genes.csv", "id", "sequence")
assert isinstance(results, Iterable)
def test_read_annotations_from_csv_data_valid():
results = read_annotations_from_csv(
"tests/resources/SARS_CoV-2_genes.csv", "id", "sequence"
)
def test_queries_from_csv_data_valid():
results = queries_from_csv("tests/resources/SARS_CoV-2_genes.csv", "id", "sequence")
for aligned_seq in results:
assert isinstance(aligned_seq.id, str)
assert isinstance(aligned_seq.sequence, str)
@@ -23,7 +19,7 @@ def test_read_annotations_from_csv_data_valid():
def test_save_alignments_to_csv_produces_correct_headers_in_csv(tmpdir):
output_path = path.join(tmpdir, "alignment_results.csv")
dummy_sequence = AlignedSequence("DUMMY", "ATACTGGAAAA", name="test_sequence")
dummy_sequence = QuerySequence("DUMMY", "ATACTGGAAAA", name="test_sequence")
alignments = [(dummy_sequence, dummy_sequence)]
save_alignments_to_csv(alignments, output_path)