Compare commits
23 Commits
587c2e753a
...
master
Author | SHA1 | Date | |
---|---|---|---|
5e59eb4302 | |||
884787f31d | |||
9d98d5d8d1 | |||
4d2c491733 | |||
842458f20c | |||
40c8afe68c | |||
a5b7d2773e | |||
1ae59d0a8f | |||
d6edc0e317 | |||
a8b820523a | |||
cf104c859d | |||
ebb27556b5 | |||
7bfe49445a | |||
f0da142377 | |||
4383e36c1f | |||
cbf32d253b | |||
adf7473587 | |||
e942b65ebd | |||
61149b88b7 | |||
47cdd8bc28 | |||
7e3f43434e | |||
d75a83585c | |||
b9f4c74168 |
22
.devcontainer/devcontainer.json
Normal file
22
.devcontainer/devcontainer.json
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
||||||
|
// README at: https://github.com/devcontainers/templates/tree/main/src/python
|
||||||
|
{
|
||||||
|
"name": "Python 3",
|
||||||
|
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
|
||||||
|
"image": "mcr.microsoft.com/devcontainers/python:1-3.12-bullseye"
|
||||||
|
|
||||||
|
// Features to add to the dev container. More info: https://containers.dev/features.
|
||||||
|
// "features": {},
|
||||||
|
|
||||||
|
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
||||||
|
// "forwardPorts": [],
|
||||||
|
|
||||||
|
// Use 'postCreateCommand' to run commands after the container is created.
|
||||||
|
// "postCreateCommand": "pip3 install --user -r requirements.txt",
|
||||||
|
|
||||||
|
// Configure tool-specific properties.
|
||||||
|
// "customizations": {},
|
||||||
|
|
||||||
|
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
|
||||||
|
// "remoteUser": "root"
|
||||||
|
}
|
4
.gitignore
vendored
4
.gitignore
vendored
@@ -212,4 +212,6 @@ pyrightconfig.json
|
|||||||
|
|
||||||
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
|
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
|
||||||
|
|
||||||
output
|
output
|
||||||
|
!docs/source/
|
||||||
|
docs/source/autodoc
|
3
.vscode/settings.json
vendored
3
.vscode/settings.json
vendored
@@ -7,5 +7,6 @@
|
|||||||
"FASTA"
|
"FASTA"
|
||||||
],
|
],
|
||||||
"python.testing.pytestEnabled": true,
|
"python.testing.pytestEnabled": true,
|
||||||
"python.analysis.inlayHints.pytestParameters": true
|
"python.analysis.inlayHints.pytestParameters": true,
|
||||||
|
"autoDocstring.docstringFormat": "sphinx"
|
||||||
}
|
}
|
30
Jenkinsfile
vendored
30
Jenkinsfile
vendored
@@ -1,33 +1,41 @@
|
|||||||
pipeline {
|
pipeline {
|
||||||
agent any
|
agent {
|
||||||
stages {
|
kubernetes {
|
||||||
stage("clean") {
|
cloud 'rsys-devel'
|
||||||
steps {
|
defaultContainer 'pip'
|
||||||
sh 'rm -rf ./dist/*'
|
inheritFrom 'pip'
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stages {
|
||||||
stage("install") {
|
stage("install") {
|
||||||
steps {
|
steps {
|
||||||
sh 'mamba env update --file environment.yml || mamba env create --force --file environment.yml'
|
sh 'pip install -r requirements.txt'
|
||||||
sh 'echo "mamba activate bmlsa" >> ~/.bashrc'
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage("unit tests") {
|
stage("unit tests") {
|
||||||
steps {
|
steps {
|
||||||
sh "python -m pytest --junitxml=test_results.xml"
|
sh returnStatus: true, script: "python -m pytest --junitxml=test_results.xml"
|
||||||
xunit checksName: '', tools: [JUnit(excludesPattern: '', pattern: 'test_results.xml', stopProcessingIfError: true)]
|
xunit checksName: '', tools: [JUnit(excludesPattern: '', pattern: 'test_results.xml', stopProcessingIfError: true)]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage("build") {
|
stage("build") {
|
||||||
steps {
|
steps {
|
||||||
|
sh 'rm -rf ./dist/*'
|
||||||
|
sh 'rm -rf ./docs/build/*'
|
||||||
sh "python -m build"
|
sh "python -m build"
|
||||||
|
container('sphinx') {
|
||||||
|
sh 'sphinx-apidoc -o docs/source/autodoc/bmlsa src/bmlsa --force'
|
||||||
|
sh 'sphinx-build -M html ./docs/source ./docs/build'
|
||||||
|
publishHTML([allowMissing: false, alwaysLinkToLastBuild: false, keepAll: false, reportDir: 'docs/build/html', reportFiles: 'index.html', reportName: 'Documentation', reportTitles: ''])
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage("test installation") {
|
stage("test installation") {
|
||||||
steps {
|
steps {
|
||||||
sh "pip install dist/*.whl --force-reinstall"
|
sh "pip install dist/*.whl --force-reinstall"
|
||||||
sh "bmlsa -h"
|
sh "bmlsa -h"
|
||||||
sh "bmlsa -I id -S sequence -B BLASTp tests/resources/SARS_CoV-2_genes.csv tests/resources/NC_045512_coding.fasta /output/"
|
sh "bmlsa -I id -S sequence -B BLASTp tests/resources/SARS_CoV-2_genes.csv tests/resources/NC_045512_coding.fasta ./output/"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage("archive") {
|
stage("archive") {
|
||||||
@@ -40,7 +48,7 @@ pipeline {
|
|||||||
branch '**/master'
|
branch '**/master'
|
||||||
}
|
}
|
||||||
steps {
|
steps {
|
||||||
withCredentials([usernamePassword(credentialsId: 'rs-git-package-registry-ydeng', passwordVariable: 'PASS', usernameVariable: 'USER')]) {
|
withCredentials([usernamePassword(credentialsId: '4d6f64be-d26d-4f95-8de3-b6a9b0beb311', passwordVariable: 'PASS', usernameVariable: 'USER')]) {
|
||||||
sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/${USER}/pypi -u ${USER} -p ${PASS} --non-interactive --disable-progress-bar --verbose dist/*'
|
sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/${USER}/pypi -u ${USER} -p ${PASS} --non-interactive --disable-progress-bar --verbose dist/*'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
22
README.md
22
README.md
@@ -1,6 +1,8 @@
|
|||||||
|
[](https://ci.reslate.systems/job/ydeng/job/bmlsa/job/master/)
|
||||||
|
|
||||||
# BMLSA
|
# BMLSA
|
||||||
|
|
||||||
A Basic multi local sequence alignment tool using the Biopython implementation of the Smith-Waterman alignment algorithm.
|
A Basic multi local sequence alignment tool using the Biopython implementation of the Smith-Waterman alignment algorithm. May be used as a python library, or as a full command line interface.
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
@@ -11,3 +13,21 @@ A Basic multi local sequence alignment tool using the Biopython implementation o
|
|||||||
- Automatically align with all sequences in reference FASTA
|
- Automatically align with all sequences in reference FASTA
|
||||||
- All produced output is human readable!
|
- All produced output is human readable!
|
||||||
|
|
||||||
|
## Install via `pip`
|
||||||
|
|
||||||
|
Just run `pip install --index-url https://git.reslate.systems/api/packages/ydeng/pypi/simple/ bmlsa` in a console that is capable of running `pip`!
|
||||||
|
|
||||||
|
## CLI Demo from Git Repo
|
||||||
|
|
||||||
|
Resources have been provided in this package for CI purposes, but you may use them to try out the program yourself!
|
||||||
|
|
||||||
|
1. Install the program
|
||||||
|
2. clone this repository
|
||||||
|
3. Change into the cloned repository directory
|
||||||
|
4. Run `bmlsa -I id -S sequence -B BLASTp tests/resources/SARS_CoV-2_genes.csv tests/resources/NC_045512_coding.fasta ./output/`
|
||||||
|
|
||||||
|
The results will show up in a `output` folder (which will be generated if it doesn't exist). Use `bmlsa -h` to see what each part of the above command does!
|
||||||
|
|
||||||
|
## More Information
|
||||||
|
|
||||||
|
For all live downloadable artifacts, build statuses, unit test results, and documentation, check out the continuous integration page for the [master branch](https://ci.reslate.systems/job/ydeng/job/bmlsa/job/master/) ([development branch](https://ci.reslate.systems/job/ydeng/job/bmlsa/job/develop/)).
|
20
docs/Makefile
Normal file
20
docs/Makefile
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
# Minimal makefile for Sphinx documentation
|
||||||
|
#
|
||||||
|
|
||||||
|
# You can set these variables from the command line, and also
|
||||||
|
# from the environment for the first two.
|
||||||
|
SPHINXOPTS ?=
|
||||||
|
SPHINXBUILD ?= sphinx-build
|
||||||
|
SOURCEDIR = source
|
||||||
|
BUILDDIR = build
|
||||||
|
|
||||||
|
# Put it first so that "make" without argument is like "make help".
|
||||||
|
help:
|
||||||
|
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||||
|
|
||||||
|
.PHONY: help Makefile
|
||||||
|
|
||||||
|
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||||
|
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||||
|
%: Makefile
|
||||||
|
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
35
docs/make.bat
Normal file
35
docs/make.bat
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
@ECHO OFF
|
||||||
|
|
||||||
|
pushd %~dp0
|
||||||
|
|
||||||
|
REM Command file for Sphinx documentation
|
||||||
|
|
||||||
|
if "%SPHINXBUILD%" == "" (
|
||||||
|
set SPHINXBUILD=sphinx-build
|
||||||
|
)
|
||||||
|
set SOURCEDIR=source
|
||||||
|
set BUILDDIR=build
|
||||||
|
|
||||||
|
%SPHINXBUILD% >NUL 2>NUL
|
||||||
|
if errorlevel 9009 (
|
||||||
|
echo.
|
||||||
|
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
||||||
|
echo.installed, then set the SPHINXBUILD environment variable to point
|
||||||
|
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
||||||
|
echo.may add the Sphinx directory to PATH.
|
||||||
|
echo.
|
||||||
|
echo.If you don't have Sphinx installed, grab it from
|
||||||
|
echo.https://www.sphinx-doc.org/
|
||||||
|
exit /b 1
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "" goto help
|
||||||
|
|
||||||
|
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||||
|
goto end
|
||||||
|
|
||||||
|
:help
|
||||||
|
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||||
|
|
||||||
|
:end
|
||||||
|
popd
|
33
docs/source/conf.py
Normal file
33
docs/source/conf.py
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Configure system path
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.abspath("../../src/"))
|
||||||
|
|
||||||
|
# Configuration file for the Sphinx documentation builder.
|
||||||
|
#
|
||||||
|
# For the full list of built-in configuration values, see the documentation:
|
||||||
|
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
||||||
|
|
||||||
|
# -- Project information -----------------------------------------------------
|
||||||
|
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
|
||||||
|
|
||||||
|
project = "BMLSA"
|
||||||
|
copyright = "2023, Harrison"
|
||||||
|
author = "Harrison"
|
||||||
|
|
||||||
|
# -- General configuration ---------------------------------------------------
|
||||||
|
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
||||||
|
|
||||||
|
extensions = ["sphinx.ext.autodoc"]
|
||||||
|
|
||||||
|
templates_path = ["_templates"]
|
||||||
|
exclude_patterns = []
|
||||||
|
|
||||||
|
|
||||||
|
# -- Options for HTML output -------------------------------------------------
|
||||||
|
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
|
||||||
|
|
||||||
|
html_theme = "alabaster"
|
||||||
|
html_static_path = ["_static"]
|
22
docs/source/index.rst
Normal file
22
docs/source/index.rst
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
.. BMLSA documentation master file, created by
|
||||||
|
sphinx-quickstart on Fri Apr 28 13:04:16 2023.
|
||||||
|
You can adapt this file completely to your liking, but it should at least
|
||||||
|
contain the root `toctree` directive.
|
||||||
|
|
||||||
|
Welcome to BMLSA's documentation!
|
||||||
|
=================================
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:glob:
|
||||||
|
:maxdepth: 2
|
||||||
|
:caption: Contents:
|
||||||
|
|
||||||
|
autodoc/**/modules
|
||||||
|
|
||||||
|
|
||||||
|
Indices and tables
|
||||||
|
==================
|
||||||
|
|
||||||
|
* :ref:`genindex`
|
||||||
|
* :ref:`modindex`
|
||||||
|
* :ref:`search`
|
@@ -7,4 +7,6 @@ dependencies:
|
|||||||
- hypothesis=6.56
|
- hypothesis=6.56
|
||||||
- twine=4
|
- twine=4
|
||||||
- python-build=0.10.0
|
- python-build=0.10.0
|
||||||
- setuptools
|
- setuptools=67.6
|
||||||
|
- sphinx=6.2
|
||||||
|
prefix: ./env
|
6
requirements.txt
Normal file
6
requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
biopython==1.81
|
||||||
|
pytest
|
||||||
|
hypothesis
|
||||||
|
twine
|
||||||
|
build
|
||||||
|
setuptools
|
@@ -1,20 +1,36 @@
|
|||||||
import logging
|
import logging
|
||||||
from typing import Iterable
|
from typing import Generator, Iterable
|
||||||
from Bio.Align import PairwiseAligner, substitution_matrices
|
from Bio.Align import PairwiseAligner, substitution_matrices
|
||||||
from bmlsa.datatypes import AlignedSequence
|
from bmlsa.datatypes import QuerySequence
|
||||||
from bmlsa.exceptions import UnexpectedAlignmentResult
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def align_many_to_one_ssw(
|
def align_many_to_one_ssw(
|
||||||
reference_sequence: str,
|
reference_sequence: str,
|
||||||
queries: Iterable[AlignedSequence],
|
queries: Iterable[QuerySequence],
|
||||||
extend_gap_score: int,
|
extend_gap_score: int,
|
||||||
open_gap_score: int,
|
open_gap_score: int,
|
||||||
alignment_mode: str,
|
alignment_mode: str,
|
||||||
substitution_matrix: str = "BLOSUM62",
|
substitution_matrix: str = "BLOSUM62",
|
||||||
):
|
) -> Generator[tuple[QuerySequence, QuerySequence], None, None]:
|
||||||
|
"""Aligns :obj:`bmlsa.datatypes.QuerySequence` objects to a given reference sequence
|
||||||
|
|
||||||
|
:param reference_sequence: The reference sequence to align to
|
||||||
|
:type reference_sequence: str
|
||||||
|
:param queries: A iterable sequence of :obj:`bmlsa.datatypes.QuerySequence`
|
||||||
|
:type queries: Iterable[QuerySequence]
|
||||||
|
:param extend_gap_score: The gap score to use for alignment. Typically negative.
|
||||||
|
:type extend_gap_score: int
|
||||||
|
:param open_gap_score: The open gap score to use for alignment. Typically negative.
|
||||||
|
:type open_gap_score: int
|
||||||
|
:param alignment_mode: The alignment mode to use. Either "local" or "global".
|
||||||
|
:type alignment_mode: str
|
||||||
|
:param substitution_matrix: The name of the substitution matrix available in :mod:`Bio.Align`, defaults to "BLOSUM62"
|
||||||
|
:type substitution_matrix: str, optional
|
||||||
|
:yield: Pairs of :obj:`bmlsa.datatypes.QuerySequence` objects where the first is the original, and the second is the aligned version
|
||||||
|
:rtype: a generator of :obj:`bmlsa.datatypes.QuerySequence` objects
|
||||||
|
"""
|
||||||
# TODO Consider using the built in "scoring" parameter
|
# TODO Consider using the built in "scoring" parameter
|
||||||
aligner = PairwiseAligner()
|
aligner = PairwiseAligner()
|
||||||
aligner.substitution_matrix = substitution_matrices.load(substitution_matrix)
|
aligner.substitution_matrix = substitution_matrices.load(substitution_matrix)
|
||||||
@@ -30,14 +46,10 @@ def align_many_to_one_ssw(
|
|||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
# TODO Implement comparison with input positions to choose best
|
# TODO Implement comparison with input positions to choose best
|
||||||
if len(alignments) > 1:
|
|
||||||
raise UnexpectedAlignmentResult(
|
|
||||||
"More than one alignment resulted from a single query."
|
|
||||||
)
|
|
||||||
for alignment in alignments:
|
for alignment in alignments:
|
||||||
score, query_aligned = (alignment.score, alignment.aligned[0][0])
|
score, query_aligned = (alignment.score, alignment.aligned[0][0])
|
||||||
aligned_start, aligned_end = query_aligned
|
aligned_start, aligned_end = query_aligned
|
||||||
yield AlignedSequence(
|
yield QuerySequence(
|
||||||
query.id,
|
query.id,
|
||||||
query.sequence,
|
query.sequence,
|
||||||
query.name,
|
query.name,
|
||||||
@@ -45,7 +57,7 @@ def align_many_to_one_ssw(
|
|||||||
query.start,
|
query.start,
|
||||||
query.end,
|
query.end,
|
||||||
query.score,
|
query.score,
|
||||||
), AlignedSequence(
|
), QuerySequence(
|
||||||
query.id,
|
query.id,
|
||||||
alignment.query,
|
alignment.query,
|
||||||
query.name,
|
query.name,
|
||||||
|
@@ -4,7 +4,7 @@ from Bio import SeqIO
|
|||||||
import logging
|
import logging
|
||||||
from bmlsa.aligner import align_many_to_one_ssw
|
from bmlsa.aligner import align_many_to_one_ssw
|
||||||
|
|
||||||
from bmlsa.io import read_annotations_from_csv, save_alignments_to_csv
|
from bmlsa.io import queries_from_csv, save_alignments_to_csv
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -29,8 +29,8 @@ def run(args):
|
|||||||
"more information."
|
"more information."
|
||||||
)
|
)
|
||||||
exit(3)
|
exit(3)
|
||||||
queries = read_annotations_from_csv(
|
queries = queries_from_csv(
|
||||||
args.annotations,
|
args.queries,
|
||||||
args.id_header,
|
args.id_header,
|
||||||
args.seq_header,
|
args.seq_header,
|
||||||
args.name_header,
|
args.name_header,
|
||||||
@@ -59,11 +59,11 @@ def run(args):
|
|||||||
os.makedirs(args.output, exist_ok=True)
|
os.makedirs(args.output, exist_ok=True)
|
||||||
with open(args.sequence, "r") as sequence_fd:
|
with open(args.sequence, "r") as sequence_fd:
|
||||||
for sequence in SeqIO.parse(sequence_fd, "fasta"):
|
for sequence in SeqIO.parse(sequence_fd, "fasta"):
|
||||||
aligned_annotations = align_many_to_one_ssw(
|
aligned_queries = align_many_to_one_ssw(
|
||||||
str(sequence.seq), queries, **scoring_parameter
|
str(sequence.seq), queries, **scoring_parameter
|
||||||
)
|
)
|
||||||
save_alignments_to_csv(
|
save_alignments_to_csv(
|
||||||
aligned_annotations,
|
aligned_queries,
|
||||||
os.path.join(
|
os.path.join(
|
||||||
args.output,
|
args.output,
|
||||||
sequence.id.replace("|", "+").replace(".", "_") + ".csv",
|
sequence.id.replace("|", "+").replace(".", "_") + ".csv",
|
||||||
@@ -74,11 +74,11 @@ def run(args):
|
|||||||
def main():
|
def main():
|
||||||
arg_parser = argparse.ArgumentParser("bmlsa")
|
arg_parser = argparse.ArgumentParser("bmlsa")
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
"annotations",
|
"queries",
|
||||||
type=str,
|
type=str,
|
||||||
help=(
|
help=(
|
||||||
"Path to CSV containing the sequences to align as well as the "
|
"Path to CSV containing the sequences to align as well as the "
|
||||||
"annotations for the respective sequences."
|
"queries for the respective sequences."
|
||||||
),
|
),
|
||||||
metavar="a",
|
metavar="a",
|
||||||
)
|
)
|
||||||
|
@@ -1,4 +1,6 @@
|
|||||||
class AlignedSequence:
|
class QuerySequence:
|
||||||
|
"""Represents a sequence that may be aligned."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
id: str,
|
id: str,
|
||||||
@@ -9,6 +11,23 @@ class AlignedSequence:
|
|||||||
end: int = None,
|
end: int = None,
|
||||||
score: int = None,
|
score: int = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
"""Instantiates a :obj:bmlsa.datatypes.QuerySequence object
|
||||||
|
|
||||||
|
:param id: The id of the query sequence
|
||||||
|
:type id: str
|
||||||
|
:param sequence: The sequence itself
|
||||||
|
:type sequence: str
|
||||||
|
:param name: The name of the sequence, defaults to None
|
||||||
|
:type name: str, optional
|
||||||
|
:param description: The description of the sequence, defaults to None
|
||||||
|
:type description: str, optional
|
||||||
|
:param start: The start of the sequence, defaults to None
|
||||||
|
:type start: int, optional
|
||||||
|
:param end: The end of the sequence, defaults to None
|
||||||
|
:type end: int, optional
|
||||||
|
:param score: The alignment score of the sequence, defaults to None
|
||||||
|
:type score: int, optional
|
||||||
|
"""
|
||||||
self._description = description
|
self._description = description
|
||||||
self._start = start
|
self._start = start
|
||||||
self._end = end
|
self._end = end
|
||||||
|
@@ -1,10 +1,10 @@
|
|||||||
import csv
|
import csv
|
||||||
from typing import Iterable
|
from typing import Generator, Iterable
|
||||||
|
|
||||||
from bmlsa.datatypes import AlignedSequence
|
from bmlsa.datatypes import QuerySequence
|
||||||
|
|
||||||
|
|
||||||
def read_annotations_from_csv(
|
def queries_from_csv(
|
||||||
csv_path: str,
|
csv_path: str,
|
||||||
id_header: str,
|
id_header: str,
|
||||||
sequence_header: str,
|
sequence_header: str,
|
||||||
@@ -12,7 +12,26 @@ def read_annotations_from_csv(
|
|||||||
desc_header: str = None,
|
desc_header: str = None,
|
||||||
start_header: str = None,
|
start_header: str = None,
|
||||||
end_header: str = None,
|
end_header: str = None,
|
||||||
):
|
) -> Generator[QuerySequence, None, None]:
|
||||||
|
"""Generates and :obj:`bmlsa.datatypes.QuerySequence` instances from a CSV file.
|
||||||
|
|
||||||
|
:param csv_path: Path to CSV to use
|
||||||
|
:type csv_path: str
|
||||||
|
:param id_header: The column title for the unique identifier for each query sequence
|
||||||
|
:type id_header: str
|
||||||
|
:param sequence_header: The column title for the sequences themselves
|
||||||
|
:type sequence_header: str
|
||||||
|
:param name_header: The column title for the name of the sequence, defaults to None
|
||||||
|
:type name_header: str, optional
|
||||||
|
:param desc_header: The column title for the description of the sequence, defaults to None
|
||||||
|
:type desc_header: str, optional
|
||||||
|
:param start_header: The column title for the start position of the sequence, defaults to None
|
||||||
|
:type start_header: str, optional
|
||||||
|
:param end_header: The column title for the end position of the sequence, defaults to None
|
||||||
|
:type end_header: str, optional
|
||||||
|
:yield: One :obj:`bmlsa.datatypes.QuerySequence` for each row
|
||||||
|
:rtype: A generator that yields objects of :class:`bmlsa.datatypes.QuerySequence`
|
||||||
|
"""
|
||||||
with open(csv_path, "r") as csv_fd:
|
with open(csv_path, "r") as csv_fd:
|
||||||
reader = csv.reader(csv_fd)
|
reader = csv.reader(csv_fd)
|
||||||
id_ind = None
|
id_ind = None
|
||||||
@@ -38,7 +57,7 @@ def read_annotations_from_csv(
|
|||||||
desc = row[desc_ind] if desc_header else None
|
desc = row[desc_ind] if desc_header else None
|
||||||
start = row[start_ind] if start_header else None
|
start = row[start_ind] if start_header else None
|
||||||
end = row[end_ind] if end_header else None
|
end = row[end_ind] if end_header else None
|
||||||
yield AlignedSequence(
|
yield QuerySequence(
|
||||||
id,
|
id,
|
||||||
sequence,
|
sequence,
|
||||||
name,
|
name,
|
||||||
@@ -49,8 +68,15 @@ def read_annotations_from_csv(
|
|||||||
|
|
||||||
|
|
||||||
def save_alignments_to_csv(
|
def save_alignments_to_csv(
|
||||||
aligned_pairs: Iterable[tuple[AlignedSequence, AlignedSequence]], output_path: str
|
aligned_pairs: Iterable[tuple[QuerySequence, QuerySequence]], output_path: str
|
||||||
):
|
) -> None:
|
||||||
|
"""Saves alignments to a CSV.
|
||||||
|
|
||||||
|
:param aligned_pairs: An iterable of the original sequence and aligned sequences
|
||||||
|
:type aligned_pairs: Iterable[tuple[AlignedSequence, AlignedSequence]]
|
||||||
|
:param output_path: A path to the output directory
|
||||||
|
:type output_path: str
|
||||||
|
"""
|
||||||
with open(output_path, "w") as output_fd:
|
with open(output_path, "w") as output_fd:
|
||||||
writer = csv.writer(output_fd)
|
writer = csv.writer(output_fd)
|
||||||
header_wrote = False
|
header_wrote = False
|
||||||
|
@@ -2,7 +2,7 @@ import pytest
|
|||||||
from Bio import SeqIO
|
from Bio import SeqIO
|
||||||
from bmlsa.aligner import align_many_to_one_ssw
|
from bmlsa.aligner import align_many_to_one_ssw
|
||||||
from bmlsa.cli import DEFAULT_ALIGNMENT_PARAMETERS
|
from bmlsa.cli import DEFAULT_ALIGNMENT_PARAMETERS
|
||||||
from bmlsa.datatypes import AlignedSequence
|
from bmlsa.datatypes import QuerySequence
|
||||||
from collections.abc import Iterable
|
from collections.abc import Iterable
|
||||||
|
|
||||||
|
|
||||||
@@ -16,7 +16,7 @@ def reference_sequence():
|
|||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def queries():
|
def queries():
|
||||||
return [
|
return [
|
||||||
AlignedSequence(
|
QuerySequence(
|
||||||
"ORF10",
|
"ORF10",
|
||||||
"ATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAAT"
|
"ATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAAT"
|
||||||
"GAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAG",
|
"GAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAG",
|
||||||
@@ -38,8 +38,8 @@ def test_align_many_to_one_returns_correct_data_structure(reference_sequence, qu
|
|||||||
reference_sequence, queries, **DEFAULT_ALIGNMENT_PARAMETERS["BLASTp"]
|
reference_sequence, queries, **DEFAULT_ALIGNMENT_PARAMETERS["BLASTp"]
|
||||||
)
|
)
|
||||||
for original, aligned_seq in results:
|
for original, aligned_seq in results:
|
||||||
assert isinstance(original, AlignedSequence)
|
assert isinstance(original, QuerySequence)
|
||||||
assert isinstance(aligned_seq, AlignedSequence)
|
assert isinstance(aligned_seq, QuerySequence)
|
||||||
|
|
||||||
|
|
||||||
def test_align_many_to_one_returns_correct_data(reference_sequence, queries):
|
def test_align_many_to_one_returns_correct_data(reference_sequence, queries):
|
||||||
|
@@ -1,21 +1,17 @@
|
|||||||
from csv import reader
|
from csv import reader
|
||||||
from os import path
|
from os import path
|
||||||
from bmlsa.datatypes import AlignedSequence
|
from bmlsa.datatypes import QuerySequence
|
||||||
from bmlsa.io import read_annotations_from_csv, save_alignments_to_csv
|
from bmlsa.io import queries_from_csv, save_alignments_to_csv
|
||||||
from collections.abc import Iterable
|
from collections.abc import Iterable
|
||||||
|
|
||||||
|
|
||||||
def test_read_annotations_from_csv_has_data():
|
def test_queries_from_csv_has_data():
|
||||||
results = read_annotations_from_csv(
|
results = queries_from_csv("tests/resources/SARS_CoV-2_genes.csv", "id", "sequence")
|
||||||
"tests/resources/SARS_CoV-2_genes.csv", "id", "sequence"
|
|
||||||
)
|
|
||||||
assert isinstance(results, Iterable)
|
assert isinstance(results, Iterable)
|
||||||
|
|
||||||
|
|
||||||
def test_read_annotations_from_csv_data_valid():
|
def test_queries_from_csv_data_valid():
|
||||||
results = read_annotations_from_csv(
|
results = queries_from_csv("tests/resources/SARS_CoV-2_genes.csv", "id", "sequence")
|
||||||
"tests/resources/SARS_CoV-2_genes.csv", "id", "sequence"
|
|
||||||
)
|
|
||||||
for aligned_seq in results:
|
for aligned_seq in results:
|
||||||
assert isinstance(aligned_seq.id, str)
|
assert isinstance(aligned_seq.id, str)
|
||||||
assert isinstance(aligned_seq.sequence, str)
|
assert isinstance(aligned_seq.sequence, str)
|
||||||
@@ -23,7 +19,7 @@ def test_read_annotations_from_csv_data_valid():
|
|||||||
|
|
||||||
def test_save_alignments_to_csv_produces_correct_headers_in_csv(tmpdir):
|
def test_save_alignments_to_csv_produces_correct_headers_in_csv(tmpdir):
|
||||||
output_path = path.join(tmpdir, "alignment_results.csv")
|
output_path = path.join(tmpdir, "alignment_results.csv")
|
||||||
dummy_sequence = AlignedSequence("DUMMY", "ATACTGGAAAA", name="test_sequence")
|
dummy_sequence = QuerySequence("DUMMY", "ATACTGGAAAA", name="test_sequence")
|
||||||
alignments = [(dummy_sequence, dummy_sequence)]
|
alignments = [(dummy_sequence, dummy_sequence)]
|
||||||
save_alignments_to_csv(alignments, output_path)
|
save_alignments_to_csv(alignments, output_path)
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user