Compare commits
36 Commits
ae3732eba2
...
master
Author | SHA1 | Date | |
---|---|---|---|
5e59eb4302 | |||
884787f31d | |||
9d98d5d8d1 | |||
4d2c491733 | |||
842458f20c | |||
40c8afe68c | |||
a5b7d2773e | |||
1ae59d0a8f | |||
d6edc0e317 | |||
a8b820523a | |||
cf104c859d | |||
ebb27556b5 | |||
7bfe49445a | |||
f0da142377 | |||
4383e36c1f | |||
cbf32d253b | |||
adf7473587 | |||
e942b65ebd | |||
61149b88b7 | |||
47cdd8bc28 | |||
7e3f43434e | |||
587c2e753a | |||
00cedbb181 | |||
11d5590355 | |||
c34b219306 | |||
6017eadb2c | |||
3c5e934c7c | |||
9eebaa2f91 | |||
78b4a74bc8 | |||
beef3ee6a5 | |||
2f86ec050f | |||
51c2bd1f7b | |||
515c130146 | |||
927a8a170a | |||
d75a83585c | |||
b9f4c74168 |
22
.devcontainer/devcontainer.json
Normal file
22
.devcontainer/devcontainer.json
Normal file
@@ -0,0 +1,22 @@
|
||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
||||
// README at: https://github.com/devcontainers/templates/tree/main/src/python
|
||||
{
|
||||
"name": "Python 3",
|
||||
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
|
||||
"image": "mcr.microsoft.com/devcontainers/python:1-3.12-bullseye"
|
||||
|
||||
// Features to add to the dev container. More info: https://containers.dev/features.
|
||||
// "features": {},
|
||||
|
||||
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
||||
// "forwardPorts": [],
|
||||
|
||||
// Use 'postCreateCommand' to run commands after the container is created.
|
||||
// "postCreateCommand": "pip3 install --user -r requirements.txt",
|
||||
|
||||
// Configure tool-specific properties.
|
||||
// "customizations": {},
|
||||
|
||||
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
|
||||
// "remoteUser": "root"
|
||||
}
|
3
.gitignore
vendored
3
.gitignore
vendored
@@ -212,3 +212,6 @@ pyrightconfig.json
|
||||
|
||||
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
|
||||
|
||||
output
|
||||
!docs/source/
|
||||
docs/source/autodoc
|
26
.vscode/launch.json
vendored
Normal file
26
.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python: Module",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"module": "bmlsa.cli",
|
||||
"args": [
|
||||
"-I",
|
||||
"id",
|
||||
"-S",
|
||||
"sequence",
|
||||
"-B",
|
||||
"BLASTp",
|
||||
"${workspaceFolder}/tests/resources/SARS_CoV-2_genes.csv",
|
||||
"${workspaceFolder}/tests/resources/NC_045512_coding.fasta",
|
||||
"${workspaceFolder}/output"
|
||||
],
|
||||
"justMyCode": true
|
||||
}
|
||||
]
|
||||
}
|
3
.vscode/settings.json
vendored
3
.vscode/settings.json
vendored
@@ -7,5 +7,6 @@
|
||||
"FASTA"
|
||||
],
|
||||
"python.testing.pytestEnabled": true,
|
||||
"python.analysis.inlayHints.pytestParameters": true
|
||||
"python.analysis.inlayHints.pytestParameters": true,
|
||||
"autoDocstring.docstringFormat": "sphinx"
|
||||
}
|
31
Jenkinsfile
vendored
31
Jenkinsfile
vendored
@@ -1,37 +1,46 @@
|
||||
pipeline {
|
||||
agent any
|
||||
stages {
|
||||
stage("clean") {
|
||||
steps {
|
||||
sh 'rm -rf ./dist/*'
|
||||
}
|
||||
agent {
|
||||
kubernetes {
|
||||
cloud 'rsys-devel'
|
||||
defaultContainer 'pip'
|
||||
inheritFrom 'pip'
|
||||
}
|
||||
}
|
||||
|
||||
stages {
|
||||
stage("install") {
|
||||
steps {
|
||||
sh 'mamba env update --file environment.yml'
|
||||
sh 'echo "mamba activate bmlsa" >> ~/.bashrc'
|
||||
sh 'pip install -r requirements.txt'
|
||||
}
|
||||
}
|
||||
stage("unit tests") {
|
||||
steps {
|
||||
sh "python -m pytest --junitxml=test_results.xml"
|
||||
sh returnStatus: true, script: "python -m pytest --junitxml=test_results.xml"
|
||||
xunit checksName: '', tools: [JUnit(excludesPattern: '', pattern: 'test_results.xml', stopProcessingIfError: true)]
|
||||
}
|
||||
}
|
||||
stage("build") {
|
||||
steps {
|
||||
sh 'rm -rf ./dist/*'
|
||||
sh 'rm -rf ./docs/build/*'
|
||||
sh "python -m build"
|
||||
container('sphinx') {
|
||||
sh 'sphinx-apidoc -o docs/source/autodoc/bmlsa src/bmlsa --force'
|
||||
sh 'sphinx-build -M html ./docs/source ./docs/build'
|
||||
publishHTML([allowMissing: false, alwaysLinkToLastBuild: false, keepAll: false, reportDir: 'docs/build/html', reportFiles: 'index.html', reportName: 'Documentation', reportTitles: ''])
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("test installation") {
|
||||
steps {
|
||||
sh "pip install dist/*.whl --force-reinstall"
|
||||
sh "bmlsa -h"
|
||||
sh "bmlsa -I id -S sequence -B BLASTp tests/resources/SARS_CoV-2_genes.csv tests/resources/NC_045512_coding.fasta ./output/"
|
||||
}
|
||||
}
|
||||
stage("archive") {
|
||||
steps {
|
||||
archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl'
|
||||
archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl', fingerprint: true, followSymlinks: false, onlyIfSuccessful: true
|
||||
}
|
||||
}
|
||||
stage("publish") {
|
||||
@@ -39,7 +48,7 @@ pipeline {
|
||||
branch '**/master'
|
||||
}
|
||||
steps {
|
||||
withCredentials([usernamePassword(credentialsId: 'rs-git-package-registry-ydeng', passwordVariable: 'PASS', usernameVariable: 'USER')]) {
|
||||
withCredentials([usernamePassword(credentialsId: '4d6f64be-d26d-4f95-8de3-b6a9b0beb311', passwordVariable: 'PASS', usernameVariable: 'USER')]) {
|
||||
sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/${USER}/pypi -u ${USER} -p ${PASS} --non-interactive --disable-progress-bar --verbose dist/*'
|
||||
}
|
||||
}
|
||||
|
22
README.md
22
README.md
@@ -1,6 +1,8 @@
|
||||
[](https://ci.reslate.systems/job/ydeng/job/bmlsa/job/master/)
|
||||
|
||||
# BMLSA
|
||||
|
||||
A Basic multi local sequence alignment tool using the Biopython implementation of the Smith-Waterman alignment algorithm.
|
||||
A Basic multi local sequence alignment tool using the Biopython implementation of the Smith-Waterman alignment algorithm. May be used as a python library, or as a full command line interface.
|
||||
|
||||
## Features
|
||||
|
||||
@@ -11,3 +13,21 @@ A Basic multi local sequence alignment tool using the Biopython implementation o
|
||||
- Automatically align with all sequences in reference FASTA
|
||||
- All produced output is human readable!
|
||||
|
||||
## Install via `pip`
|
||||
|
||||
Just run `pip install --index-url https://git.reslate.systems/api/packages/ydeng/pypi/simple/ bmlsa` in a console that is capable of running `pip`!
|
||||
|
||||
## CLI Demo from Git Repo
|
||||
|
||||
Resources have been provided in this package for CI purposes, but you may use them to try out the program yourself!
|
||||
|
||||
1. Install the program
|
||||
2. clone this repository
|
||||
3. Change into the cloned repository directory
|
||||
4. Run `bmlsa -I id -S sequence -B BLASTp tests/resources/SARS_CoV-2_genes.csv tests/resources/NC_045512_coding.fasta ./output/`
|
||||
|
||||
The results will show up in a `output` folder (which will be generated if it doesn't exist). Use `bmlsa -h` to see what each part of the above command does!
|
||||
|
||||
## More Information
|
||||
|
||||
For all live downloadable artifacts, build statuses, unit test results, and documentation, check out the continuous integration page for the [master branch](https://ci.reslate.systems/job/ydeng/job/bmlsa/job/master/) ([development branch](https://ci.reslate.systems/job/ydeng/job/bmlsa/job/develop/)).
|
20
docs/Makefile
Normal file
20
docs/Makefile
Normal file
@@ -0,0 +1,20 @@
|
||||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line, and also
|
||||
# from the environment for the first two.
|
||||
SPHINXOPTS ?=
|
||||
SPHINXBUILD ?= sphinx-build
|
||||
SOURCEDIR = source
|
||||
BUILDDIR = build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile
|
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
35
docs/make.bat
Normal file
35
docs/make.bat
Normal file
@@ -0,0 +1,35 @@
|
||||
@ECHO OFF
|
||||
|
||||
pushd %~dp0
|
||||
|
||||
REM Command file for Sphinx documentation
|
||||
|
||||
if "%SPHINXBUILD%" == "" (
|
||||
set SPHINXBUILD=sphinx-build
|
||||
)
|
||||
set SOURCEDIR=source
|
||||
set BUILDDIR=build
|
||||
|
||||
%SPHINXBUILD% >NUL 2>NUL
|
||||
if errorlevel 9009 (
|
||||
echo.
|
||||
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
||||
echo.installed, then set the SPHINXBUILD environment variable to point
|
||||
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
||||
echo.may add the Sphinx directory to PATH.
|
||||
echo.
|
||||
echo.If you don't have Sphinx installed, grab it from
|
||||
echo.https://www.sphinx-doc.org/
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
if "%1" == "" goto help
|
||||
|
||||
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
goto end
|
||||
|
||||
:help
|
||||
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
|
||||
:end
|
||||
popd
|
33
docs/source/conf.py
Normal file
33
docs/source/conf.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Configure system path
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../../src/"))
|
||||
|
||||
# Configuration file for the Sphinx documentation builder.
|
||||
#
|
||||
# For the full list of built-in configuration values, see the documentation:
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
|
||||
|
||||
project = "BMLSA"
|
||||
copyright = "2023, Harrison"
|
||||
author = "Harrison"
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
||||
|
||||
extensions = ["sphinx.ext.autodoc"]
|
||||
|
||||
templates_path = ["_templates"]
|
||||
exclude_patterns = []
|
||||
|
||||
|
||||
# -- Options for HTML output -------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
|
||||
|
||||
html_theme = "alabaster"
|
||||
html_static_path = ["_static"]
|
22
docs/source/index.rst
Normal file
22
docs/source/index.rst
Normal file
@@ -0,0 +1,22 @@
|
||||
.. BMLSA documentation master file, created by
|
||||
sphinx-quickstart on Fri Apr 28 13:04:16 2023.
|
||||
You can adapt this file completely to your liking, but it should at least
|
||||
contain the root `toctree` directive.
|
||||
|
||||
Welcome to BMLSA's documentation!
|
||||
=================================
|
||||
|
||||
.. toctree::
|
||||
:glob:
|
||||
:maxdepth: 2
|
||||
:caption: Contents:
|
||||
|
||||
autodoc/**/modules
|
||||
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
@@ -3,9 +3,10 @@ channels:
|
||||
- conda-forge
|
||||
dependencies:
|
||||
- biopython=1.81
|
||||
- build=0.7
|
||||
- pytest=7.3
|
||||
- hypothesis=6.56
|
||||
- twine=4
|
||||
- python-build=0.10.0
|
||||
- setuptools
|
||||
- setuptools=67.6
|
||||
- sphinx=6.2
|
||||
prefix: ./env
|
6
requirements.txt
Normal file
6
requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
biopython==1.81
|
||||
pytest
|
||||
hypothesis
|
||||
twine
|
||||
build
|
||||
setuptools
|
@@ -1,20 +1,36 @@
|
||||
import logging
|
||||
from typing import Iterable
|
||||
from typing import Generator, Iterable
|
||||
from Bio.Align import PairwiseAligner, substitution_matrices
|
||||
from bmlsa.datatypes import AlignedSequence
|
||||
from bmlsa.exceptions import UnexpectedAlignmentResult
|
||||
from bmlsa.datatypes import QuerySequence
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def align_many_to_one_ssw(
|
||||
reference_sequence: str,
|
||||
queries: Iterable[AlignedSequence],
|
||||
queries: Iterable[QuerySequence],
|
||||
extend_gap_score: int,
|
||||
open_gap_score: int,
|
||||
alignment_mode: str,
|
||||
substitution_matrix: str = "BLOSUM62",
|
||||
):
|
||||
) -> Generator[tuple[QuerySequence, QuerySequence], None, None]:
|
||||
"""Aligns :obj:`bmlsa.datatypes.QuerySequence` objects to a given reference sequence
|
||||
|
||||
:param reference_sequence: The reference sequence to align to
|
||||
:type reference_sequence: str
|
||||
:param queries: A iterable sequence of :obj:`bmlsa.datatypes.QuerySequence`
|
||||
:type queries: Iterable[QuerySequence]
|
||||
:param extend_gap_score: The gap score to use for alignment. Typically negative.
|
||||
:type extend_gap_score: int
|
||||
:param open_gap_score: The open gap score to use for alignment. Typically negative.
|
||||
:type open_gap_score: int
|
||||
:param alignment_mode: The alignment mode to use. Either "local" or "global".
|
||||
:type alignment_mode: str
|
||||
:param substitution_matrix: The name of the substitution matrix available in :mod:`Bio.Align`, defaults to "BLOSUM62"
|
||||
:type substitution_matrix: str, optional
|
||||
:yield: Pairs of :obj:`bmlsa.datatypes.QuerySequence` objects where the first is the original, and the second is the aligned version
|
||||
:rtype: a generator of :obj:`bmlsa.datatypes.QuerySequence` objects
|
||||
"""
|
||||
# TODO Consider using the built in "scoring" parameter
|
||||
aligner = PairwiseAligner()
|
||||
aligner.substitution_matrix = substitution_matrices.load(substitution_matrix)
|
||||
@@ -30,14 +46,10 @@ def align_many_to_one_ssw(
|
||||
)
|
||||
continue
|
||||
# TODO Implement comparison with input positions to choose best
|
||||
if len(alignments) > 1:
|
||||
raise UnexpectedAlignmentResult(
|
||||
"More than one alignment resulted from a single query."
|
||||
)
|
||||
for alignment in alignments:
|
||||
score, query_aligned = (alignment.score, alignment.aligned[0][0])
|
||||
aligned_start, aligned_end = query_aligned
|
||||
yield AlignedSequence(
|
||||
yield QuerySequence(
|
||||
query.id,
|
||||
query.sequence,
|
||||
query.name,
|
||||
@@ -45,7 +57,7 @@ def align_many_to_one_ssw(
|
||||
query.start,
|
||||
query.end,
|
||||
query.score,
|
||||
), AlignedSequence(
|
||||
), QuerySequence(
|
||||
query.id,
|
||||
alignment.query,
|
||||
query.name,
|
||||
|
@@ -4,7 +4,7 @@ from Bio import SeqIO
|
||||
import logging
|
||||
from bmlsa.aligner import align_many_to_one_ssw
|
||||
|
||||
from bmlsa.io import read_annotations_from_csv, save_alignments_to_csv
|
||||
from bmlsa.io import queries_from_csv, save_alignments_to_csv
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -28,9 +28,9 @@ def run(args):
|
||||
'"--alignment-mode", or, at least "--behave-as". See help (-h) for '
|
||||
"more information."
|
||||
)
|
||||
exit(1)
|
||||
queries = read_annotations_from_csv(
|
||||
args.annotations,
|
||||
exit(3)
|
||||
queries = queries_from_csv(
|
||||
args.queries,
|
||||
args.id_header,
|
||||
args.seq_header,
|
||||
args.name_header,
|
||||
@@ -39,6 +39,10 @@ def run(args):
|
||||
args.end_header,
|
||||
)
|
||||
|
||||
if args.behave_as and args.behave_as not in DEFAULT_ALIGNMENT_PARAMETERS:
|
||||
logger.error('"--behave-as" received bad option.')
|
||||
exit(3)
|
||||
|
||||
scoring_parameter = (
|
||||
{
|
||||
"extend_gap_score": args.extend_gap_score,
|
||||
@@ -52,13 +56,14 @@ def run(args):
|
||||
else DEFAULT_ALIGNMENT_PARAMETERS[args.behave_as]
|
||||
)
|
||||
|
||||
os.makedirs(args.output, exist_ok=True)
|
||||
with open(args.sequence, "r") as sequence_fd:
|
||||
for sequence in SeqIO.parse(sequence_fd, "fasta"):
|
||||
aligned_annotations = align_many_to_one_ssw(
|
||||
aligned_queries = align_many_to_one_ssw(
|
||||
str(sequence.seq), queries, **scoring_parameter
|
||||
)
|
||||
save_alignments_to_csv(
|
||||
aligned_annotations,
|
||||
aligned_queries,
|
||||
os.path.join(
|
||||
args.output,
|
||||
sequence.id.replace("|", "+").replace(".", "_") + ".csv",
|
||||
@@ -69,11 +74,11 @@ def run(args):
|
||||
def main():
|
||||
arg_parser = argparse.ArgumentParser("bmlsa")
|
||||
arg_parser.add_argument(
|
||||
"annotations",
|
||||
"queries",
|
||||
type=str,
|
||||
help=(
|
||||
"Path to CSV containing the sequences to align as well as the "
|
||||
"annotations for the respective sequences."
|
||||
"queries for the respective sequences."
|
||||
),
|
||||
metavar="a",
|
||||
)
|
||||
@@ -166,7 +171,7 @@ def main():
|
||||
arg_parser.add_argument(
|
||||
"-m",
|
||||
"--substitution-matrix",
|
||||
type="str",
|
||||
type=str,
|
||||
help="The name of the substitution matrix.",
|
||||
required=False,
|
||||
default=None,
|
||||
@@ -182,7 +187,6 @@ def main():
|
||||
required=False,
|
||||
default=None,
|
||||
)
|
||||
arg_parser.add
|
||||
args = arg_parser.parse_args()
|
||||
run(args)
|
||||
|
||||
|
@@ -1,4 +1,6 @@
|
||||
class AlignedSequence:
|
||||
class QuerySequence:
|
||||
"""Represents a sequence that may be aligned."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
id: str,
|
||||
@@ -9,12 +11,29 @@ class AlignedSequence:
|
||||
end: int = None,
|
||||
score: int = None,
|
||||
) -> None:
|
||||
"""Instantiates a :obj:bmlsa.datatypes.QuerySequence object
|
||||
|
||||
:param id: The id of the query sequence
|
||||
:type id: str
|
||||
:param sequence: The sequence itself
|
||||
:type sequence: str
|
||||
:param name: The name of the sequence, defaults to None
|
||||
:type name: str, optional
|
||||
:param description: The description of the sequence, defaults to None
|
||||
:type description: str, optional
|
||||
:param start: The start of the sequence, defaults to None
|
||||
:type start: int, optional
|
||||
:param end: The end of the sequence, defaults to None
|
||||
:type end: int, optional
|
||||
:param score: The alignment score of the sequence, defaults to None
|
||||
:type score: int, optional
|
||||
"""
|
||||
self._description = description
|
||||
self._start = start
|
||||
self._end = end
|
||||
self._id = id
|
||||
self._name = name
|
||||
self._sequence = sequence
|
||||
self._sequence = sequence.replace("\n", "").replace("\r", "")
|
||||
self._score = score
|
||||
|
||||
@property
|
||||
|
@@ -1,10 +1,10 @@
|
||||
import csv
|
||||
from typing import Iterable
|
||||
from typing import Generator, Iterable
|
||||
|
||||
from bmlsa.datatypes import AlignedSequence
|
||||
from bmlsa.datatypes import QuerySequence
|
||||
|
||||
|
||||
def read_annotations_from_csv(
|
||||
def queries_from_csv(
|
||||
csv_path: str,
|
||||
id_header: str,
|
||||
sequence_header: str,
|
||||
@@ -12,8 +12,26 @@ def read_annotations_from_csv(
|
||||
desc_header: str = None,
|
||||
start_header: str = None,
|
||||
end_header: str = None,
|
||||
):
|
||||
annotations = {}
|
||||
) -> Generator[QuerySequence, None, None]:
|
||||
"""Generates and :obj:`bmlsa.datatypes.QuerySequence` instances from a CSV file.
|
||||
|
||||
:param csv_path: Path to CSV to use
|
||||
:type csv_path: str
|
||||
:param id_header: The column title for the unique identifier for each query sequence
|
||||
:type id_header: str
|
||||
:param sequence_header: The column title for the sequences themselves
|
||||
:type sequence_header: str
|
||||
:param name_header: The column title for the name of the sequence, defaults to None
|
||||
:type name_header: str, optional
|
||||
:param desc_header: The column title for the description of the sequence, defaults to None
|
||||
:type desc_header: str, optional
|
||||
:param start_header: The column title for the start position of the sequence, defaults to None
|
||||
:type start_header: str, optional
|
||||
:param end_header: The column title for the end position of the sequence, defaults to None
|
||||
:type end_header: str, optional
|
||||
:yield: One :obj:`bmlsa.datatypes.QuerySequence` for each row
|
||||
:rtype: A generator that yields objects of :class:`bmlsa.datatypes.QuerySequence`
|
||||
"""
|
||||
with open(csv_path, "r") as csv_fd:
|
||||
reader = csv.reader(csv_fd)
|
||||
id_ind = None
|
||||
@@ -39,7 +57,7 @@ def read_annotations_from_csv(
|
||||
desc = row[desc_ind] if desc_header else None
|
||||
start = row[start_ind] if start_header else None
|
||||
end = row[end_ind] if end_header else None
|
||||
yield AlignedSequence(
|
||||
yield QuerySequence(
|
||||
id,
|
||||
sequence,
|
||||
name,
|
||||
@@ -50,8 +68,15 @@ def read_annotations_from_csv(
|
||||
|
||||
|
||||
def save_alignments_to_csv(
|
||||
aligned_pairs: Iterable[tuple[AlignedSequence, AlignedSequence]], output_path: str
|
||||
):
|
||||
aligned_pairs: Iterable[tuple[QuerySequence, QuerySequence]], output_path: str
|
||||
) -> None:
|
||||
"""Saves alignments to a CSV.
|
||||
|
||||
:param aligned_pairs: An iterable of the original sequence and aligned sequences
|
||||
:type aligned_pairs: Iterable[tuple[AlignedSequence, AlignedSequence]]
|
||||
:param output_path: A path to the output directory
|
||||
:type output_path: str
|
||||
"""
|
||||
with open(output_path, "w") as output_fd:
|
||||
writer = csv.writer(output_fd)
|
||||
header_wrote = False
|
||||
|
@@ -2,7 +2,7 @@ import pytest
|
||||
from Bio import SeqIO
|
||||
from bmlsa.aligner import align_many_to_one_ssw
|
||||
from bmlsa.cli import DEFAULT_ALIGNMENT_PARAMETERS
|
||||
from bmlsa.datatypes import AlignedSequence
|
||||
from bmlsa.datatypes import QuerySequence
|
||||
from collections.abc import Iterable
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ def reference_sequence():
|
||||
@pytest.fixture
|
||||
def queries():
|
||||
return [
|
||||
AlignedSequence(
|
||||
QuerySequence(
|
||||
"ORF10",
|
||||
"ATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAAT"
|
||||
"GAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAG",
|
||||
@@ -38,8 +38,8 @@ def test_align_many_to_one_returns_correct_data_structure(reference_sequence, qu
|
||||
reference_sequence, queries, **DEFAULT_ALIGNMENT_PARAMETERS["BLASTp"]
|
||||
)
|
||||
for original, aligned_seq in results:
|
||||
assert isinstance(original, AlignedSequence)
|
||||
assert isinstance(aligned_seq, AlignedSequence)
|
||||
assert isinstance(original, QuerySequence)
|
||||
assert isinstance(aligned_seq, QuerySequence)
|
||||
|
||||
|
||||
def test_align_many_to_one_returns_correct_data(reference_sequence, queries):
|
||||
|
@@ -1,21 +1,17 @@
|
||||
from csv import reader
|
||||
from os import path
|
||||
from bmlsa.datatypes import AlignedSequence
|
||||
from bmlsa.io import read_annotations_from_csv, save_alignments_to_csv
|
||||
from bmlsa.datatypes import QuerySequence
|
||||
from bmlsa.io import queries_from_csv, save_alignments_to_csv
|
||||
from collections.abc import Iterable
|
||||
|
||||
|
||||
def test_read_annotations_from_csv_has_data():
|
||||
results = read_annotations_from_csv(
|
||||
"tests/resources/SARS_CoV-2_genes.csv", "id", "sequence"
|
||||
)
|
||||
def test_queries_from_csv_has_data():
|
||||
results = queries_from_csv("tests/resources/SARS_CoV-2_genes.csv", "id", "sequence")
|
||||
assert isinstance(results, Iterable)
|
||||
|
||||
|
||||
def test_read_annotations_from_csv_data_valid():
|
||||
results = read_annotations_from_csv(
|
||||
"tests/resources/SARS_CoV-2_genes.csv", "id", "sequence"
|
||||
)
|
||||
def test_queries_from_csv_data_valid():
|
||||
results = queries_from_csv("tests/resources/SARS_CoV-2_genes.csv", "id", "sequence")
|
||||
for aligned_seq in results:
|
||||
assert isinstance(aligned_seq.id, str)
|
||||
assert isinstance(aligned_seq.sequence, str)
|
||||
@@ -23,7 +19,7 @@ def test_read_annotations_from_csv_data_valid():
|
||||
|
||||
def test_save_alignments_to_csv_produces_correct_headers_in_csv(tmpdir):
|
||||
output_path = path.join(tmpdir, "alignment_results.csv")
|
||||
dummy_sequence = AlignedSequence("DUMMY", "ATACTGGAAAA", name="test_sequence")
|
||||
dummy_sequence = QuerySequence("DUMMY", "ATACTGGAAAA", name="test_sequence")
|
||||
alignments = [(dummy_sequence, dummy_sequence)]
|
||||
save_alignments_to_csv(alignments, output_path)
|
||||
|
||||
|
Reference in New Issue
Block a user