Compare commits
28 Commits
features/l
...
develop
Author | SHA1 | Date | |
---|---|---|---|
06dbb56c28 | |||
79fcce8b84 | |||
f4064f087e | |||
276665f5fd | |||
fd536862e2 | |||
576dc303f4 | |||
2822a483e3 | |||
b8cebb8ba4 | |||
7384895578 | |||
5a03c7e8d8 | |||
ddf9cde175 | |||
2e8cdd8da9 | |||
d0318536b2 | |||
765cf9d418 | |||
348c3d00b4 | |||
1c3f7f9ed8 | |||
e4ddaf2e8c | |||
73aade2bde | |||
af8590baa7 | |||
36bca1b70d | |||
09a693b696 | |||
f76bf86ef6 | |||
a60daf3ee2 | |||
fbfd993269 | |||
ba606c35a9 | |||
4183840ba0 | |||
7fb3eab5b6 | |||
175a51f968 |
11
.devcontainer/Dockerfile
Normal file
11
.devcontainer/Dockerfile
Normal file
@ -0,0 +1,11 @@
|
||||
FROM mcr.microsoft.com/devcontainers/anaconda:1-3
|
||||
|
||||
# Copy environment.yml (if found) to a temp location so we update the environment. Also
|
||||
# copy "noop.txt" so the COPY instruction does not fail if no environment.yml exists.
|
||||
COPY environment.yml* .devcontainer/noop.txt /tmp/conda-tmp/
|
||||
RUN if [ -f "/tmp/conda-tmp/environment.yml" ]; then umask 0002 && /opt/conda/bin/conda env update -n base -f /tmp/conda-tmp/environment.yml; fi \
|
||||
&& rm -rf /tmp/conda-tmp
|
||||
|
||||
# [Optional] Uncomment this section to install additional OS packages.
|
||||
# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
|
||||
# && apt-get -y install --no-install-recommends <your-package-list-here>
|
@ -1,9 +1,11 @@
|
||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
||||
// README at: https://github.com/devcontainers/templates/tree/main/src/python
|
||||
// README at: https://github.com/devcontainers/templates/tree/main/src/anaconda
|
||||
{
|
||||
"name": "Python 3",
|
||||
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
|
||||
"image": "mcr.microsoft.com/devcontainers/python:1-3.12-bullseye",
|
||||
"name": "Anaconda (Python 3)",
|
||||
"build": {
|
||||
"context": "..",
|
||||
"dockerfile": "Dockerfile"
|
||||
}
|
||||
|
||||
// Features to add to the dev container. More info: https://containers.dev/features.
|
||||
// "features": {},
|
||||
@ -12,14 +14,7 @@
|
||||
// "forwardPorts": [],
|
||||
|
||||
// Use 'postCreateCommand' to run commands after the container is created.
|
||||
"postCreateCommand": "pip3 install --user -r requirements.txt",
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"mechatroner.rainbow-csv"
|
||||
]
|
||||
}
|
||||
}
|
||||
// "postCreateCommand": "python --version",
|
||||
|
||||
// Configure tool-specific properties.
|
||||
// "customizations": {},
|
||||
|
3
.devcontainer/noop.txt
Normal file
3
.devcontainer/noop.txt
Normal file
@ -0,0 +1,3 @@
|
||||
This file copied into the container along with environment.yml* from the parent
|
||||
folder. This file is included to prevents the Dockerfile COPY instruction from
|
||||
failing if no environment.yml is found.
|
159
.gitignore
vendored
159
.gitignore
vendored
@ -1,6 +1,6 @@
|
||||
# File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig
|
||||
# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,svelte,python,linux,node
|
||||
# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,svelte,python,linux,node
|
||||
# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,linux,python
|
||||
# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,linux,python
|
||||
|
||||
### Linux ###
|
||||
*~
|
||||
@ -17,146 +17,6 @@
|
||||
# .nfs files are created when an open file is removed but is still being accessed
|
||||
.nfs*
|
||||
|
||||
### Node ###
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
lerna-debug.log*
|
||||
.pnpm-debug.log*
|
||||
|
||||
# Diagnostic reports (https://nodejs.org/api/report.html)
|
||||
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
|
||||
|
||||
# Runtime data
|
||||
pids
|
||||
*.pid
|
||||
*.seed
|
||||
*.pid.lock
|
||||
|
||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
||||
lib-cov
|
||||
|
||||
# Coverage directory used by tools like istanbul
|
||||
coverage
|
||||
*.lcov
|
||||
|
||||
# nyc test coverage
|
||||
.nyc_output
|
||||
|
||||
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
|
||||
.grunt
|
||||
|
||||
# Bower dependency directory (https://bower.io/)
|
||||
bower_components
|
||||
|
||||
# node-waf configuration
|
||||
.lock-wscript
|
||||
|
||||
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
||||
build/Release
|
||||
|
||||
# Dependency directories
|
||||
node_modules/
|
||||
jspm_packages/
|
||||
|
||||
# Snowpack dependency directory (https://snowpack.dev/)
|
||||
web_modules/
|
||||
|
||||
# TypeScript cache
|
||||
*.tsbuildinfo
|
||||
|
||||
# Optional npm cache directory
|
||||
.npm
|
||||
|
||||
# Optional eslint cache
|
||||
.eslintcache
|
||||
|
||||
# Optional stylelint cache
|
||||
.stylelintcache
|
||||
|
||||
# Microbundle cache
|
||||
.rpt2_cache/
|
||||
.rts2_cache_cjs/
|
||||
.rts2_cache_es/
|
||||
.rts2_cache_umd/
|
||||
|
||||
# Optional REPL history
|
||||
.node_repl_history
|
||||
|
||||
# Output of 'npm pack'
|
||||
*.tgz
|
||||
|
||||
# Yarn Integrity file
|
||||
.yarn-integrity
|
||||
|
||||
# dotenv environment variable files
|
||||
.env
|
||||
.env.development.local
|
||||
.env.test.local
|
||||
.env.production.local
|
||||
.env.local
|
||||
|
||||
# parcel-bundler cache (https://parceljs.org/)
|
||||
.cache
|
||||
.parcel-cache
|
||||
|
||||
# Next.js build output
|
||||
.next
|
||||
out
|
||||
|
||||
# Nuxt.js build / generate output
|
||||
.nuxt
|
||||
dist
|
||||
|
||||
# Gatsby files
|
||||
.cache/
|
||||
# Comment in the public line in if your project uses Gatsby and not Next.js
|
||||
# https://nextjs.org/blog/next-9-1#public-directory-support
|
||||
# public
|
||||
|
||||
# vuepress build output
|
||||
.vuepress/dist
|
||||
|
||||
# vuepress v2.x temp and cache directory
|
||||
.temp
|
||||
|
||||
# Docusaurus cache and generated files
|
||||
.docusaurus
|
||||
|
||||
# Serverless directories
|
||||
.serverless/
|
||||
|
||||
# FuseBox cache
|
||||
.fusebox/
|
||||
|
||||
# DynamoDB Local files
|
||||
.dynamodb/
|
||||
|
||||
# TernJS port file
|
||||
.tern-port
|
||||
|
||||
# Stores VSCode versions used for testing VSCode extensions
|
||||
.vscode-test
|
||||
|
||||
# yarn v2
|
||||
.yarn/cache
|
||||
.yarn/unplugged
|
||||
.yarn/build-state.yml
|
||||
.yarn/install-state.gz
|
||||
.pnp.*
|
||||
|
||||
### Node Patch ###
|
||||
# Serverless Webpack directories
|
||||
.webpack/
|
||||
|
||||
# Optional stylelint cache
|
||||
|
||||
# SvelteKit build / generate output
|
||||
.svelte-kit
|
||||
|
||||
### Python ###
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
@ -202,6 +62,7 @@ htmlcov/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
@ -215,6 +76,7 @@ cover/
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
@ -278,6 +140,7 @@ celerybeat.pid
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
@ -326,13 +189,6 @@ poetry.toml
|
||||
# LSP config files
|
||||
pyrightconfig.json
|
||||
|
||||
### Svelte ###
|
||||
# gitignore template for the SvelteKit, frontend web component framework
|
||||
# website: https://kit.svelte.dev/
|
||||
|
||||
.svelte-kit/
|
||||
package
|
||||
|
||||
### VisualStudioCode ###
|
||||
.vscode/*
|
||||
!.vscode/settings.json
|
||||
@ -352,9 +208,8 @@ package
|
||||
.history
|
||||
.ionide
|
||||
|
||||
# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,svelte,python,linux,node
|
||||
# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,linux,python
|
||||
|
||||
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
|
||||
|
||||
output
|
||||
*.private.*
|
||||
conda-bld
|
5
.vscode/extensions.json
vendored
Normal file
5
.vscode/extensions.json
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
{
|
||||
"recommendations": [
|
||||
"piotrpalarz.vscode-gitignore-generator"
|
||||
]
|
||||
}
|
14
Jenkinsfile
vendored
14
Jenkinsfile
vendored
@ -2,14 +2,14 @@ pipeline {
|
||||
agent {
|
||||
kubernetes {
|
||||
cloud 'rsys-devel'
|
||||
defaultContainer 'pip'
|
||||
inheritFrom 'pip'
|
||||
defaultContainer 'miniforge3'
|
||||
inheritFrom 'miniforge'
|
||||
}
|
||||
}
|
||||
stages {
|
||||
stage("install") {
|
||||
steps {
|
||||
sh 'python -m pip install -r requirements.txt'
|
||||
sh 'conda env update -n base -f environment.yml'
|
||||
}
|
||||
}
|
||||
stage("unit tests") {
|
||||
@ -22,11 +22,14 @@ pipeline {
|
||||
stage("build") {
|
||||
steps {
|
||||
sh "python -m build"
|
||||
sh "grayskull pypi dist/*.tar.gz --maintainers 'Harrison Deng'"
|
||||
sh "python scripts/patch_recipe.py"
|
||||
sh 'conda build autobigs-engine -c bioconda --output-folder conda-bld --verify'
|
||||
}
|
||||
}
|
||||
stage("archive") {
|
||||
steps {
|
||||
archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl', fingerprint: true, followSymlinks: false, onlyIfSuccessful: true
|
||||
archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl, conda-bld/**/*.conda', fingerprint: true, followSymlinks: false, onlyIfSuccessful: true
|
||||
}
|
||||
}
|
||||
stage("publish") {
|
||||
@ -36,7 +39,8 @@ pipeline {
|
||||
CREDS = credentials('username-password-rs-git')
|
||||
}
|
||||
steps {
|
||||
sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*'
|
||||
sh 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*'
|
||||
sh 'curl --user ${CREDS_USR}:${CREDS_PSW} --upload-file conda-bld/**/*.conda https://git.reslate.systems/api/packages/${CREDS_USR}/conda/$(basename conda-bld/**/*.conda)'
|
||||
}
|
||||
}
|
||||
stage ("pypi.org") {
|
||||
|
@ -1,6 +1,6 @@
|
||||
# autoBIGS.Engine
|
||||
|
||||
A python library implementing common BIGSdb MLST schemes and databases. Implementation follows the RESTful API outlined by the official [BIGSdb documentation](https://bigsdb.readthedocs.io/en/latest/rest.html) up to `V1.50.0`.
|
||||
A python library implementing common BIGSdb MLST schemes and databases accesses for the purpose of typing sequences automatically. Implementation follows the RESTful API outlined by the official [BIGSdb documentation](https://bigsdb.readthedocs.io/en/latest/rest.html) up to `V1.50.0`.
|
||||
|
||||
## Features
|
||||
|
||||
|
44
autobigs-engine/meta.yaml
Normal file
44
autobigs-engine/meta.yaml
Normal file
@ -0,0 +1,44 @@
|
||||
{% set name = "autoBIGS.engine" %}
|
||||
{% set version = "0.12.1.dev1+gb8cebb8.d20250221" %}
|
||||
|
||||
package:
|
||||
name: {{ name|lower|replace(".", "-") }}
|
||||
version: {{ version }}
|
||||
|
||||
source:
|
||||
url: file:///workspaces/autoBIGS.engine/dist/autobigs_engine-0.12.1.dev1%2Bgb8cebb8.d20250221.tar.gz
|
||||
sha256: c86441b94f935cfa414ff28ca4c026a070e0fb15988ea3bb7d1a942859a09b16
|
||||
|
||||
build:
|
||||
noarch: python
|
||||
script: {{ PYTHON }} -m pip install . -vv --no-deps --no-build-isolation
|
||||
number: 0
|
||||
run_exports:
|
||||
- {{ pin_subpackage( name|lower|replace(".", "-"), max_pin="x.x") }}
|
||||
requirements:
|
||||
host:
|
||||
- python >=3.12
|
||||
- setuptools >=64
|
||||
- setuptools-scm >=8
|
||||
- pip
|
||||
run:
|
||||
- python >=3.12
|
||||
- biopython ==1.85
|
||||
- aiohttp ==3.11.*
|
||||
|
||||
test:
|
||||
imports:
|
||||
- autobigs
|
||||
commands:
|
||||
- pip check
|
||||
requires:
|
||||
- pip
|
||||
|
||||
about:
|
||||
summary: A library to rapidly fetch fetch MLST profiles given sequences for various diseases.
|
||||
license: GPL-3.0-or-later
|
||||
license_file: LICENSE
|
||||
home: https://github.com/Syph-and-VPD-Lab/autoBIGS.engine
|
||||
extra:
|
||||
recipe-maintainers:
|
||||
- Harrison Deng
|
16
environment.yml
Normal file
16
environment.yml
Normal file
@ -0,0 +1,16 @@
|
||||
name: ci
|
||||
channels:
|
||||
- bioconda
|
||||
- conda-forge
|
||||
dependencies:
|
||||
- aiohttp==3.11.*
|
||||
- biopython==1.85
|
||||
- pytest
|
||||
- pytest-asyncio
|
||||
- python-build
|
||||
- conda-build
|
||||
- twine==6.0.1
|
||||
- setuptools_scm
|
||||
- pytest-cov
|
||||
- grayskull
|
||||
- curl
|
@ -13,11 +13,12 @@ dependencies = [
|
||||
]
|
||||
requires-python = ">=3.12"
|
||||
description = "A library to rapidly fetch fetch MLST profiles given sequences for various diseases."
|
||||
license = {text = "GPL-3.0-or-later"}
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/RealYHD/autoBIGS.engine"
|
||||
Source = "https://github.com/RealYHD/autoBIGS.engine"
|
||||
Issues = "https://github.com/RealYHD/autoBIGS.engine/issues"
|
||||
Homepage = "https://github.com/Syph-and-VPD-Lab/autoBIGS.engine"
|
||||
Source = "https://github.com/Syph-and-VPD-Lab/autoBIGS.engine"
|
||||
Issues = "https://github.com/Syph-and-VPD-Lab/autoBIGS.engine/issues"
|
||||
|
||||
[tool.setuptools_scm]
|
||||
|
||||
|
@ -1,8 +0,0 @@
|
||||
aiohttp[speedups]==3.11.*
|
||||
biopython==1.85
|
||||
pytest
|
||||
pytest-asyncio
|
||||
build
|
||||
twine
|
||||
setuptools_scm
|
||||
pytest-cov
|
103
scripts/patch_recipe.py
Normal file
103
scripts/patch_recipe.py
Normal file
@ -0,0 +1,103 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
from os import fdopen, path
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
from sys import argv
|
||||
import tempfile
|
||||
|
||||
INDENTATION = " "
|
||||
GRAYSKULL_OUTPUT_PATH = "autoBIGS.engine"
|
||||
RUN_EXPORTED_VALUE = r'{{ pin_subpackage( name|lower|replace(".", "-"), max_pin="x.x") }}'
|
||||
LICENSE_SUFFIX = "-or-later"
|
||||
HOME_PAGE = "https://github.com/Syph-and-VPD-Lab/autoBIGS.engine"
|
||||
|
||||
def _calc_indentation(line: str):
|
||||
return len(re.findall(INDENTATION, line.split(line.strip())[0])) if line != "\n" else 0
|
||||
|
||||
def read_grayskull_output():
|
||||
original_recipe = path.abspath(GRAYSKULL_OUTPUT_PATH)
|
||||
original_meta = path.join(original_recipe, "meta.yaml")
|
||||
meta_file = open(original_meta)
|
||||
lines = meta_file.readlines()
|
||||
meta_file.close()
|
||||
return lines
|
||||
|
||||
def update_naming_scheme(lines):
|
||||
modified_lines = []
|
||||
for line in lines:
|
||||
matches = re.finditer(r"\{\{\s*name\|lower()\s+\}\}", line)
|
||||
modified_line = line
|
||||
for match in matches:
|
||||
modified_line = modified_line[:match.start(1)] + r'|replace(".", "-")' + modified_line[match.end(1):]
|
||||
modified_lines.append(modified_line)
|
||||
return modified_lines
|
||||
|
||||
def inject_run_exports(lines: list[str]):
|
||||
package_indent = False
|
||||
modified_lines = []
|
||||
for line in lines:
|
||||
indentation_count = _calc_indentation(line)
|
||||
if line == "build:\n" and indentation_count == 0:
|
||||
package_indent = True
|
||||
modified_lines.append(line)
|
||||
elif package_indent and indentation_count == 0:
|
||||
modified_lines.append(INDENTATION*1 + "run_exports:\n")
|
||||
modified_lines.append(INDENTATION*2 + "- " + RUN_EXPORTED_VALUE + "\n")
|
||||
package_indent = False
|
||||
else:
|
||||
modified_lines.append(line)
|
||||
return modified_lines
|
||||
|
||||
def suffix_license(lines: list[str]):
|
||||
about_indent = False
|
||||
modified_lines = []
|
||||
for line in lines:
|
||||
indentation_count = _calc_indentation(line)
|
||||
if line == "about:\n" and indentation_count == 0:
|
||||
about_indent = True
|
||||
modified_lines.append(line)
|
||||
elif about_indent and indentation_count == 1 and line.lstrip().startswith("license:"):
|
||||
modified_lines.append(line.rstrip() + LICENSE_SUFFIX + "\n")
|
||||
about_indent = False
|
||||
else:
|
||||
modified_lines.append(line)
|
||||
return modified_lines
|
||||
|
||||
def inject_home_page(lines: list[str]):
|
||||
about_indent = False
|
||||
modified_lines = []
|
||||
for line in lines:
|
||||
indentation_count = _calc_indentation(line)
|
||||
if line == "about:\n" and indentation_count == 0:
|
||||
about_indent = True
|
||||
modified_lines.append(line)
|
||||
elif about_indent and indentation_count == 0:
|
||||
modified_lines.append(INDENTATION + "home: " + HOME_PAGE + "\n")
|
||||
about_indent = False
|
||||
else:
|
||||
modified_lines.append(line)
|
||||
return modified_lines
|
||||
|
||||
def write_to_original(lines: list[str]):
|
||||
original_recipe = path.abspath(GRAYSKULL_OUTPUT_PATH)
|
||||
original_meta = path.join(original_recipe, "meta.yaml")
|
||||
with open(original_meta, "w") as file:
|
||||
file.writelines(lines)
|
||||
|
||||
def rename_recipe_dir():
|
||||
new_recipe_name = path.abspath(path.join(GRAYSKULL_OUTPUT_PATH.replace(".", "-").lower()))
|
||||
shutil.rmtree(new_recipe_name, ignore_errors=True)
|
||||
os.replace(path.abspath(GRAYSKULL_OUTPUT_PATH), new_recipe_name)
|
||||
|
||||
if __name__ == "__main__":
|
||||
original_grayskull_out = read_grayskull_output()
|
||||
modified_recipe_meta = None
|
||||
modified_recipe_meta = update_naming_scheme(original_grayskull_out)
|
||||
modified_recipe_meta = inject_run_exports(modified_recipe_meta)
|
||||
modified_recipe_meta = suffix_license(modified_recipe_meta)
|
||||
modified_recipe_meta = inject_home_page(modified_recipe_meta)
|
||||
write_to_original(modified_recipe_meta)
|
||||
rename_recipe_dir()
|
@ -1,70 +0,0 @@
|
||||
import asyncio
|
||||
from concurrent.futures import Future, ThreadPoolExecutor
|
||||
from contextlib import AbstractContextManager
|
||||
from typing import Any, Set, Union
|
||||
from Bio.Align import PairwiseAligner
|
||||
from queue import Queue
|
||||
|
||||
from autobigs.engine.structures.alignment import AlignmentStats, PairwiseAlignment
|
||||
|
||||
class AsyncBiopythonPairwiseAlignmentEngine(AbstractContextManager):
|
||||
def __enter__(self):
|
||||
self._thread_pool = ThreadPoolExecutor(self._max_threads, thread_name_prefix="async-pairwise-alignment")
|
||||
return self
|
||||
|
||||
def __init__(self, aligner: PairwiseAligner, max_threads: int = 4):
|
||||
self._max_threads = max_threads
|
||||
self._aligner = aligner
|
||||
self._work_left: Set[Future] = set()
|
||||
self._work_complete: Queue[Future] = Queue()
|
||||
|
||||
def align(self, reference: str, query: str, **associated_data):
|
||||
work = self._thread_pool.submit(
|
||||
self.work, reference, query, **associated_data)
|
||||
work.add_done_callback(self._on_complete)
|
||||
self._work_left.add(work)
|
||||
|
||||
def _on_complete(self, future: Future):
|
||||
self._work_left.remove(future)
|
||||
self._work_complete.put(future)
|
||||
|
||||
def work(self, reference, query, **associated_data):
|
||||
alignments = self._aligner.align(reference, query)
|
||||
top_alignment = alignments[0]
|
||||
top_alignment_stats = top_alignment.counts()
|
||||
top_alignment_gaps = top_alignment_stats.gaps
|
||||
top_alignment_identities = top_alignment_stats.identities
|
||||
top_alignment_mismatches = top_alignment_stats.mismatches
|
||||
top_alignment_score = top_alignment.score # type: ignore
|
||||
return PairwiseAlignment(
|
||||
top_alignment.sequences[0],
|
||||
top_alignment.sequences[1],
|
||||
tuple(top_alignment.indices[0]),
|
||||
tuple(top_alignment.indices[1]),
|
||||
AlignmentStats(
|
||||
percent_identity=top_alignment_identities/top_alignment.length,
|
||||
mismatches=top_alignment_mismatches,
|
||||
gaps=top_alignment_gaps,
|
||||
match_metric=top_alignment_score
|
||||
)), associated_data
|
||||
|
||||
async def next_completed(self) -> Union[tuple[PairwiseAlignment, dict[str, Any]], None]:
|
||||
if self._work_complete.empty() and len(self._work_left):
|
||||
return None
|
||||
completed_alignment = await asyncio.wrap_future(self._work_complete.get())
|
||||
return completed_alignment
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
self.shutdown()
|
||||
|
||||
def __aiter__(self):
|
||||
return self
|
||||
|
||||
async def __anext__(self):
|
||||
result = await self.next_completed()
|
||||
if result is None:
|
||||
raise StopAsyncIteration
|
||||
return result
|
||||
|
||||
def shutdown(self):
|
||||
self._thread_pool.shutdown(wait=True, cancel_futures=True)
|
@ -11,7 +11,6 @@ from typing import Any, AsyncGenerator, AsyncIterable, Iterable, Mapping, Sequen
|
||||
|
||||
from aiohttp import ClientSession, ClientTimeout
|
||||
|
||||
from autobigs.engine.analysis.aligners import AsyncBiopythonPairwiseAlignmentEngine
|
||||
from autobigs.engine.reading import read_fasta
|
||||
from autobigs.engine.structures.alignment import PairwiseAlignment
|
||||
from autobigs.engine.structures.genomics import NamedString
|
||||
@ -23,15 +22,15 @@ from Bio.Align import PairwiseAligner
|
||||
class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
|
||||
|
||||
@abstractmethod
|
||||
def determine_mlst_allele_variants(self, query_sequence_strings: Iterable[str]) -> AsyncGenerator[Allele, Any]:
|
||||
def determine_mlst_allele_variants(self, query_sequence_strings: Union[Iterable[Union[NamedString, str]], Union[NamedString, str]]) -> AsyncGenerator[Union[Allele, tuple[str, Allele]], Any]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def determine_mlst_st(self, alleles: Union[AsyncIterable[Allele], Iterable[Allele]]) -> MLSTProfile:
|
||||
async def determine_mlst_st(self, alleles: Union[AsyncIterable[Union[Allele, tuple[str, Allele]]], Iterable[Union[Allele, tuple[str, Allele]]]]) -> Union[MLSTProfile, NamedMLSTProfile]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def profile_string(self, query_sequence_strings: Iterable[str]) -> MLSTProfile:
|
||||
async def profile_string(self, query_sequence_strings: Iterable[Union[NamedString, str]]) -> Union[NamedMLSTProfile, MLSTProfile]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
@ -53,14 +52,14 @@ class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
async def determine_mlst_allele_variants(self, query_sequence_strings: Union[Iterable[str], str]) -> AsyncGenerator[Allele, Any]:
|
||||
async def determine_mlst_allele_variants(self, query_sequence_strings: Union[Iterable[Union[NamedString, str]], Union[NamedString, str]]) -> AsyncGenerator[Union[Allele, tuple[str, Allele]], Any]:
|
||||
# See https://bigsdb.pasteur.fr/api/db/pubmlst_bordetella_seqdef/schemes
|
||||
uri_path = "sequence"
|
||||
if isinstance(query_sequence_strings, str):
|
||||
if isinstance(query_sequence_strings, str) or isinstance(query_sequence_strings, NamedString):
|
||||
query_sequence_strings = [query_sequence_strings]
|
||||
for sequence_string in query_sequence_strings:
|
||||
async with self._http_client.post(uri_path, json={
|
||||
"sequence": sequence_string,
|
||||
"sequence": sequence_string if isinstance(sequence_string, str) else sequence_string.sequence,
|
||||
"partial_matches": True
|
||||
}) as response:
|
||||
sequence_response: dict = await response.json()
|
||||
@ -71,7 +70,8 @@ class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
|
||||
for allele_loci, alleles in exact_matches.items():
|
||||
for allele in alleles:
|
||||
alelle_id = allele["allele_id"]
|
||||
yield Allele(allele_locus=allele_loci, allele_variant=alelle_id, partial_match_profile=None)
|
||||
result_allele = Allele(allele_locus=allele_loci, allele_variant=alelle_id, partial_match_profile=None)
|
||||
yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
|
||||
elif "partial_matches" in sequence_response:
|
||||
partial_matches: dict[str, dict[str, Union[str, float, int]]] = sequence_response["partial_matches"]
|
||||
for allele_loci, partial_match in partial_matches.items():
|
||||
@ -83,23 +83,33 @@ class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
|
||||
gaps=int(partial_match["gaps"]),
|
||||
match_metric=int(partial_match["bitscore"])
|
||||
)
|
||||
yield Allele(
|
||||
result_allele = Allele(
|
||||
allele_locus=allele_loci,
|
||||
allele_variant=str(partial_match["allele"]),
|
||||
partial_match_profile=partial_match_profile
|
||||
)
|
||||
yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
|
||||
else:
|
||||
raise NoBIGSdbMatchesException(self._database_name, self._schema_id)
|
||||
raise NoBIGSdbMatchesException(self._database_name, self._schema_id, sequence_string.name if isinstance(sequence_string, NamedString) else None)
|
||||
|
||||
async def determine_mlst_st(self, alleles: Union[AsyncIterable[Allele], Iterable[Allele]]) -> MLSTProfile:
|
||||
async def determine_mlst_st(self, alleles: Union[AsyncIterable[Union[Allele, tuple[str, Allele]]], Iterable[Union[Allele, tuple[str, Allele]]]]) -> Union[MLSTProfile, NamedMLSTProfile]:
|
||||
uri_path = "designations"
|
||||
allele_request_dict: dict[str, list[dict[str, str]]] = defaultdict(list)
|
||||
names_list = []
|
||||
def insert_allele_to_request_dict(allele: Union[Allele, tuple[str, Allele]]):
|
||||
if isinstance(allele, Allele):
|
||||
allele_val = allele
|
||||
else:
|
||||
allele_val = allele[1]
|
||||
names_list.append(allele[0])
|
||||
allele_request_dict[allele_val.allele_locus].append({"allele": str(allele_val.allele_variant)})
|
||||
|
||||
if isinstance(alleles, AsyncIterable):
|
||||
async for allele in alleles:
|
||||
allele_request_dict[allele.allele_locus].append({"allele": str(allele.allele_variant)})
|
||||
insert_allele_to_request_dict(allele)
|
||||
else:
|
||||
for allele in alleles:
|
||||
allele_request_dict[allele.allele_locus].append({"allele": str(allele.allele_variant)})
|
||||
insert_allele_to_request_dict(allele)
|
||||
request_json = {
|
||||
"designations": allele_request_dict
|
||||
}
|
||||
@ -112,164 +122,36 @@ class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
|
||||
schema_fields_returned.setdefault("clonal_complex", "unknown")
|
||||
schema_exact_matches: dict = response_json["exact_matches"]
|
||||
for exact_match_locus, exact_match_alleles in schema_exact_matches.items():
|
||||
if len(exact_match_alleles) > 1:
|
||||
raise ValueError(f"Unexpected number of alleles returned for exact match (Expected 1, retrieved {len(exact_match_alleles)})")
|
||||
allele_set.add(Allele(exact_match_locus, exact_match_alleles[0]["allele_id"], None))
|
||||
if len(allele_set) == 0:
|
||||
raise ValueError("Passed in no alleles.")
|
||||
return MLSTProfile(allele_set, schema_fields_returned["ST"], schema_fields_returned["clonal_complex"])
|
||||
result_mlst_profile = MLSTProfile(allele_set, schema_fields_returned["ST"], schema_fields_returned["clonal_complex"])
|
||||
if len(names_list) > 0:
|
||||
result_mlst_profile = NamedMLSTProfile(str(tuple(names_list)), result_mlst_profile)
|
||||
return result_mlst_profile
|
||||
|
||||
async def profile_string(self, query_sequence_strings: Iterable[str]) -> MLSTProfile:
|
||||
async def profile_string(self, query_sequence_strings: Iterable[Union[NamedString, str]]) -> Union[NamedMLSTProfile, MLSTProfile]:
|
||||
alleles = self.determine_mlst_allele_variants(query_sequence_strings)
|
||||
return await self.determine_mlst_st(alleles)
|
||||
|
||||
async def profile_multiple_strings(self, query_named_string_groups: AsyncIterable[Iterable[NamedString]], stop_on_fail: bool = False) -> AsyncGenerator[NamedMLSTProfile, Any]:
|
||||
tasks = []
|
||||
async for named_strings in query_named_string_groups:
|
||||
for named_string in named_strings:
|
||||
tasks.append(self.profile_string(named_strings))
|
||||
for task in asyncio.as_completed(tasks):
|
||||
try:
|
||||
yield NamedMLSTProfile(named_string.name, (await self.profile_string([named_string.sequence])))
|
||||
yield await task
|
||||
except NoBIGSdbMatchesException as e:
|
||||
if stop_on_fail:
|
||||
raise e
|
||||
yield NamedMLSTProfile(named_string.name, None)
|
||||
|
||||
async def close(self):
|
||||
await self._http_client.close()
|
||||
|
||||
async def __aexit__(self, exc_type, exc_value, traceback):
|
||||
await self.close()
|
||||
|
||||
class LocalBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
|
||||
async def __aenter__(self):
|
||||
if self._prepare:
|
||||
await self.update_scheme_locis()
|
||||
await asyncio.gather(
|
||||
self.download_alleles_cache_data(),
|
||||
self.download_scheme_profiles()
|
||||
)
|
||||
await self.load_scheme_profiles()
|
||||
return self
|
||||
|
||||
def __init__(self, database_api: str, database_name: str, schema_id: int, cache_path: Union[str, None] = None, prepare: bool =True):
|
||||
self._database_api = database_api
|
||||
self._database_name = database_name
|
||||
self._schema_id = schema_id
|
||||
self._base_url = f"{self._database_api}/db/{self._database_name}/schemes/{self._schema_id}/"
|
||||
self._http_client = ClientSession(self._base_url, timeout=ClientTimeout(60))
|
||||
if cache_path is None:
|
||||
self._cache_path = tempfile.mkdtemp("BIGSdb")
|
||||
self._cleanup_required = True
|
||||
else:
|
||||
self._cache_path = cache_path
|
||||
self._cleanup_required = False
|
||||
self._loci: list[str] = []
|
||||
self._profiles_st_map = {}
|
||||
self._prepare = prepare
|
||||
|
||||
async def update_scheme_locis(self):
|
||||
self._loci.clear()
|
||||
async with self._http_client.get(f"/api/db/{self._database_name}/schemes/{self._schema_id}") as schema_response:
|
||||
schema_json = await schema_response.json()
|
||||
for locus in schema_json["loci"]:
|
||||
locus_name = path.basename(locus)
|
||||
self._loci.append(locus_name)
|
||||
self._loci.sort()
|
||||
|
||||
async def load_scheme_profiles(self):
|
||||
self._profiles_st_map.clear()
|
||||
with open(self.get_scheme_profile_path()) as profile_cache_handle:
|
||||
reader = csv.DictReader(profile_cache_handle, delimiter="\t")
|
||||
for line in reader:
|
||||
alleles = []
|
||||
for locus in self._loci:
|
||||
alleles.append(line[locus])
|
||||
self._profiles_st_map[tuple(alleles)] = (line["ST"], line["clonal_complex"])
|
||||
|
||||
def get_locus_cache_path(self, locus) -> str:
|
||||
return path.join(self._cache_path, locus + "." + "fasta")
|
||||
|
||||
def get_scheme_profile_path(self):
|
||||
return path.join(self._cache_path, "profiles.csv")
|
||||
|
||||
async def download_alleles_cache_data(self):
|
||||
for locus in self._loci:
|
||||
with open(self.get_locus_cache_path(locus), "wb") as fasta_handle:
|
||||
async with self._http_client.get(f"/api/db/{self._database_name}/loci/{locus}/alleles_fasta") as fasta_response:
|
||||
async for chunk, eof in fasta_response.content.iter_chunks():
|
||||
fasta_handle.write(chunk)
|
||||
|
||||
async def download_scheme_profiles(self):
|
||||
with open(self.get_scheme_profile_path(), "wb") as profile_cache_handle:
|
||||
async with self._http_client.get("profiles_csv") as profiles_response:
|
||||
async for chunk, eof in profiles_response.content.iter_chunks():
|
||||
profile_cache_handle.write(chunk)
|
||||
await self.load_scheme_profiles()
|
||||
|
||||
async def determine_mlst_allele_variants(self, query_sequence_strings: Iterable[str]) -> AsyncGenerator[Allele, Any]:
|
||||
aligner = PairwiseAligner("blastn")
|
||||
aligner.mode = "local"
|
||||
with AsyncBiopythonPairwiseAlignmentEngine(aligner, max_threads=4) as aligner_engine:
|
||||
for query_sequence_string in query_sequence_strings:
|
||||
for locus in self._loci:
|
||||
async for allele_variant in read_fasta(self.get_locus_cache_path(locus)):
|
||||
aligner_engine.align(allele_variant.sequence, query_sequence_string, variant_name=allele_variant.name, full=True)
|
||||
break # start a bunch of full alignments for each variant to select segments
|
||||
alignment_rankings: dict[str, set[tuple[PairwiseAlignment, str]]] = defaultdict(set)
|
||||
async for alignment_result, additional_information in aligner_engine:
|
||||
result_variant_name = additional_information["variant_name"]
|
||||
result_locus, variant_id = result_variant_name.split("_")
|
||||
full_alignment = additional_information["full"]
|
||||
if full_alignment:
|
||||
if alignment_result.alignment_stats.gaps == 0 and alignment_result.alignment_stats.mismatches == 0:
|
||||
# I.e., 100% exactly the same
|
||||
yield Allele(result_locus, variant_id, None)
|
||||
continue
|
||||
causal_name = e.get_causal_query_name()
|
||||
if causal_name is None:
|
||||
raise ValueError("Missing query name despite requiring names.")
|
||||
else:
|
||||
alignment_rankings[result_locus].add((alignment_result, variant_id))
|
||||
interest_sequence = full_alignment[alignment_result.query_indices[0]:alignment_result.query_indices[-1]]
|
||||
async for allele_variant in read_fasta(self.get_locus_cache_path(result_locus)):
|
||||
if result_variant_name == allele_variant.name:
|
||||
continue # Skip if we just finished aligning this
|
||||
aligner_engine.align(allele_variant.sequence, interest_sequence, variant_name=result_variant_name.name, full=False)
|
||||
else:
|
||||
alignment_rankings[result_locus].add((alignment_result, variant_id))
|
||||
for final_locus, alignments in alignment_rankings.items():
|
||||
closest_alignment, closest_variant_id = sorted(alignments, key=lambda index: index[0].alignment_stats.match_metric)[0]
|
||||
yield Allele(final_locus, closest_variant_id, closest_alignment.alignment_stats)
|
||||
|
||||
async def determine_mlst_st(self, alleles):
|
||||
allele_variants: dict[str, Allele] = {}
|
||||
if isinstance(alleles, AsyncIterable):
|
||||
async for allele in alleles:
|
||||
allele_variants[allele.allele_locus] = allele
|
||||
else:
|
||||
for allele in alleles:
|
||||
allele_variants[allele.allele_locus] = allele
|
||||
ordered_profile = []
|
||||
for locus in self._loci:
|
||||
ordered_profile.append(allele_variants[locus].allele_variant)
|
||||
|
||||
st, clonal_complex = self._profiles_st_map[tuple(ordered_profile)]
|
||||
return MLSTProfile(set(allele_variants.values()), st, clonal_complex)
|
||||
|
||||
async def profile_string(self, query_sequence_strings: Iterable[str]) -> MLSTProfile:
|
||||
alleles = self.determine_mlst_allele_variants(query_sequence_strings)
|
||||
return await self.determine_mlst_st(alleles)
|
||||
|
||||
async def profile_multiple_strings(self, query_named_string_groups: AsyncIterable[Iterable[NamedString]], stop_on_fail: bool = False) -> AsyncGenerator[NamedMLSTProfile, Any]:
|
||||
async for named_strings in query_named_string_groups:
|
||||
for named_string in named_strings:
|
||||
try:
|
||||
yield NamedMLSTProfile(named_string.name, await self.profile_string([named_string.sequence]))
|
||||
except NoBIGSdbMatchesException as e:
|
||||
if stop_on_fail:
|
||||
raise e
|
||||
yield NamedMLSTProfile(named_string.name, None)
|
||||
yield NamedMLSTProfile(causal_name, None)
|
||||
|
||||
async def close(self):
|
||||
await self._http_client.close()
|
||||
if self._cleanup_required:
|
||||
shutil.rmtree(self._cache_path)
|
||||
|
||||
async def __aexit__(self, exc_type, exc_value, traceback):
|
||||
await self.close()
|
||||
@ -334,5 +216,5 @@ class BIGSdbIndex(AbstractAsyncContextManager):
|
||||
|
||||
def get_BIGSdb_MLST_profiler(local: bool, database_api: str, database_name: str, schema_id: int):
|
||||
if local:
|
||||
return LocalBIGSdbMLSTProfiler(database_api=database_api, database_name=database_name, schema_id=schema_id)
|
||||
raise NotImplementedError()
|
||||
return RemoteBIGSdbMLSTProfiler(database_api=database_api, database_name=database_name, schema_id=schema_id)
|
@ -1,26 +0,0 @@
|
||||
import asyncio
|
||||
from contextlib import AbstractAsyncContextManager
|
||||
import tempfile
|
||||
from typing import Iterable, Union
|
||||
from Bio import Entrez
|
||||
from Bio import SeqIO
|
||||
|
||||
from autobigs.engine.structures.genomics import AnnotatedString, StringAnnotation
|
||||
|
||||
async def fetch_ncbi_genbank(genbank_id: str) -> AnnotatedString:
|
||||
with (await asyncio.to_thread(Entrez.efetch, db="nucleotide", id=genbank_id, rettype="gb", retmode="text")) as fetch_stream:
|
||||
record = SeqIO.read(fetch_stream, "genbank")
|
||||
sequence_features = list()
|
||||
for feature in record.features:
|
||||
start = int(feature.location.start)
|
||||
end = int(feature.location.end)
|
||||
qualifiers = feature.qualifiers
|
||||
for qualifier_key in qualifiers:
|
||||
qualifiers[qualifier_key] = set(qualifiers[qualifier_key])
|
||||
sequence_features.append(StringAnnotation(
|
||||
type=feature.type,
|
||||
start=start,
|
||||
end=end+1, # Position is exclusive
|
||||
feature_properties=qualifiers
|
||||
))
|
||||
return AnnotatedString(name=genbank_id, sequence=str(record.seq), annotations=sequence_features)
|
@ -5,8 +5,12 @@ class BIGSDbDatabaseAPIException(Exception):
|
||||
|
||||
|
||||
class NoBIGSdbMatchesException(BIGSDbDatabaseAPIException):
|
||||
def __init__(self, database_name: str, database_schema_id: int, *args):
|
||||
def __init__(self, database_name: str, database_schema_id: int, query_name: Union[None, str], *args):
|
||||
self._query_name = query_name
|
||||
super().__init__(f"No matches found with schema with ID {database_schema_id} in the database \"{database_name}\".", *args)
|
||||
|
||||
def get_causal_query_name(self) -> Union[str, None]:
|
||||
return self._query_name
|
||||
|
||||
class NoBIGSdbExactMatchesException(NoBIGSdbMatchesException):
|
||||
def __init__(self, database_name: str, database_schema_id: int, *args):
|
||||
|
@ -5,12 +5,16 @@ from Bio import SeqIO
|
||||
|
||||
from autobigs.engine.structures.genomics import NamedString
|
||||
|
||||
async def read_fasta(handle: Union[str, TextIOWrapper]) -> AsyncGenerator[NamedString, Any]:
|
||||
async def read_fasta(handle: Union[str, TextIOWrapper]) -> Iterable[NamedString]:
|
||||
fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta")
|
||||
results = []
|
||||
for fasta_sequence in await fasta_sequences:
|
||||
yield NamedString(fasta_sequence.id, str(fasta_sequence.seq))
|
||||
results.append(NamedString(fasta_sequence.id, str(fasta_sequence.seq)))
|
||||
return results
|
||||
|
||||
async def read_multiple_fastas(handles: Iterable[Union[str, TextIOWrapper]]) -> AsyncGenerator[NamedString, Any]:
|
||||
async def read_multiple_fastas(handles: Iterable[Union[str, TextIOWrapper]]) -> AsyncGenerator[Iterable[NamedString], Any]:
|
||||
tasks = []
|
||||
for handle in handles:
|
||||
async for named_seq in read_fasta(handle):
|
||||
yield named_seq
|
||||
tasks.append(read_fasta(handle))
|
||||
for task in asyncio.as_completed(tasks):
|
||||
yield await task
|
@ -3,28 +3,32 @@ import csv
|
||||
from os import PathLike
|
||||
from typing import AsyncIterable, Collection, Mapping, Sequence, Union
|
||||
|
||||
from autobigs.engine.structures.mlst import Allele, MLSTProfile
|
||||
from autobigs.engine.structures.mlst import Allele, MLSTProfile, NamedMLSTProfile
|
||||
|
||||
|
||||
def alleles_to_map(alleles: Collection[Allele]) -> Mapping[str, Union[list[str], str]]:
|
||||
def alleles_to_text_map(alleles: Collection[Allele]) -> Mapping[str, Union[Sequence[str], str]]:
|
||||
result = defaultdict(list)
|
||||
for allele in alleles:
|
||||
result[allele.allele_locus].append(allele.allele_variant)
|
||||
result[allele.allele_locus].append(allele.allele_variant + ("*" if allele.partial_match_profile is not None else ""))
|
||||
for locus in result.keys():
|
||||
if len(result[locus]) == 1:
|
||||
result[locus] = result[locus][0] # Take the only one
|
||||
else:
|
||||
result[locus] = tuple(result[locus]) # type: ignore
|
||||
return dict(result)
|
||||
|
||||
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[tuple[str, Union[MLSTProfile, None]]], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]) -> Sequence[str]:
|
||||
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[NamedMLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]) -> Sequence[str]:
|
||||
failed = list()
|
||||
with open(handle, "w", newline='') as filehandle:
|
||||
header = None
|
||||
writer: Union[csv.DictWriter, None] = None
|
||||
async for name, mlst_profile in mlst_profiles_iterable:
|
||||
async for named_mlst_profile in mlst_profiles_iterable:
|
||||
name = named_mlst_profile.name
|
||||
mlst_profile = named_mlst_profile.mlst_profile
|
||||
if mlst_profile is None:
|
||||
failed.append(name)
|
||||
continue
|
||||
allele_mapping = alleles_to_map(mlst_profile.alleles)
|
||||
allele_mapping = alleles_to_text_map(mlst_profile.alleles)
|
||||
if writer is None:
|
||||
header = ["id", "st", "clonal-complex", *sorted(allele_mapping.keys())]
|
||||
writer = csv.DictWriter(filehandle, fieldnames=header)
|
||||
|
@ -1,42 +0,0 @@
|
||||
from Bio import SeqIO
|
||||
from Bio.Align import PairwiseAligner
|
||||
from pytest import mark
|
||||
from pytest import fixture
|
||||
from autobigs.engine.analysis.aligners import AsyncBiopythonPairwiseAlignmentEngine
|
||||
from autobigs.engine.structures.alignment import PairwiseAlignment
|
||||
|
||||
@fixture
|
||||
def tohamaI_bpertussis_adk():
|
||||
return str(SeqIO.read("tests/resources/tohama_I_bpertussis_adk.fasta", format="fasta").seq)
|
||||
|
||||
@fixture
|
||||
def tohamaI_bpertussis_genome():
|
||||
return str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", format="fasta").seq)
|
||||
|
||||
@fixture
|
||||
def fdaargos_1560_hinfluenza_adk():
|
||||
return str(SeqIO.read("tests/resources/fdaargos_1560_hinfluenza_adk.fasta", format="fasta").seq)
|
||||
|
||||
@fixture
|
||||
def fdaargos_1560_hinfluenza_genome():
|
||||
return str(SeqIO.read("tests/resources/fdaargos_1560_hinfluenza.fasta", format="fasta").seq)
|
||||
|
||||
|
||||
@fixture(params=[1, 2])
|
||||
def dummy_engine(request):
|
||||
aligner = PairwiseAligner("blastn")
|
||||
aligner.mode = "local"
|
||||
with AsyncBiopythonPairwiseAlignmentEngine(aligner, request.param) as engine:
|
||||
yield engine
|
||||
|
||||
class TestAsyncPairwiseAlignmentEngine:
|
||||
async def test_single_alignment_no_errors_single_alignment(self, tohamaI_bpertussis_genome, tohamaI_bpertussis_adk: str, dummy_engine: AsyncBiopythonPairwiseAlignmentEngine):
|
||||
dummy_engine.align(tohamaI_bpertussis_genome, tohamaI_bpertussis_adk)
|
||||
async for alignment, additional_information in dummy_engine:
|
||||
assert isinstance(alignment, PairwiseAlignment)
|
||||
|
||||
async def test_single_alignment_no_errors_multiple(self, tohamaI_bpertussis_genome, tohamaI_bpertussis_adk, fdaargos_1560_hinfluenza_genome, fdaargos_1560_hinfluenza_adk, dummy_engine: AsyncBiopythonPairwiseAlignmentEngine):
|
||||
dummy_engine.align(tohamaI_bpertussis_genome, tohamaI_bpertussis_adk)
|
||||
dummy_engine.align(fdaargos_1560_hinfluenza_genome, fdaargos_1560_hinfluenza_adk)
|
||||
async for alignment, additional_information in dummy_engine:
|
||||
assert isinstance(alignment, PairwiseAlignment)
|
@ -9,7 +9,7 @@ from autobigs.engine.structures import mlst
|
||||
from autobigs.engine.structures.genomics import NamedString
|
||||
from autobigs.engine.structures.mlst import Allele, MLSTProfile
|
||||
from autobigs.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException
|
||||
from autobigs.engine.analysis.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler, LocalBIGSdbMLSTProfiler, RemoteBIGSdbMLSTProfiler
|
||||
from autobigs.engine.analysis.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler, RemoteBIGSdbMLSTProfiler
|
||||
|
||||
async def generate_async_iterable(normal_iterable):
|
||||
for dummy_sequence in normal_iterable:
|
||||
@ -50,33 +50,30 @@ bpertussis_tohamaI_bad_profile = MLSTProfile((
|
||||
Allele("pgm", "5", None),
|
||||
), "unknown", "unknown")
|
||||
|
||||
hinfluenzae_fdaargos_profile = MLSTProfile((
|
||||
Allele("adk", "1", None),
|
||||
Allele("atpG", "1", None),
|
||||
Allele("frdB", "1", None),
|
||||
Allele("fucK", "1", None),
|
||||
Allele("mdh", "1", None),
|
||||
Allele("pgi", "1", None),
|
||||
hinfluenzae_2014_102_profile = MLSTProfile((
|
||||
Allele("adk", "28", None),
|
||||
Allele("atpG", "33", None),
|
||||
Allele("frdB", "7", None),
|
||||
Allele("fucK", "18", None),
|
||||
Allele("mdh", "11", None),
|
||||
Allele("pgi", "125", None),
|
||||
Allele("recA", "89", None)
|
||||
), "478", "unknown")
|
||||
|
||||
hinfluenzae_2014_102_bad_profile = MLSTProfile((
|
||||
Allele("adk", "3", None),
|
||||
Allele("atpG", "121", None),
|
||||
Allele("frdB", "6", None),
|
||||
Allele("fucK", "5", None),
|
||||
Allele("mdh", "12", None),
|
||||
Allele("pgi", "4", None),
|
||||
Allele("recA", "5", None)
|
||||
), "3", "ST-3 complex")
|
||||
), "unknown", "unknown")
|
||||
|
||||
hinfluenzae_fdaargos_bad_profile = MLSTProfile((
|
||||
Allele("adk", "1", None),
|
||||
Allele("atpG", "1", None),
|
||||
Allele("frdB", "1", None),
|
||||
Allele("fucK", "1", None),
|
||||
Allele("mdh", "1", None),
|
||||
Allele("pgi", "1", None),
|
||||
Allele("recA", "5", None)
|
||||
), "3", "ST-3 complex")
|
||||
|
||||
hinfluenzae_fdaargos_sequence = str(SeqIO.read("tests/resources/fdaargos_1560_hinfluenza.fasta", "fasta").seq)
|
||||
|
||||
hinfluenzae_fdaargos_fragmented_sequence = tuple(SeqIO.parse("tests/resources/tohama_I_bpertussis_features.fasta", "fasta"))
|
||||
|
||||
@pytest.mark.parametrize("local_db,database_api,database_name,schema_id,seq_path,feature_seqs_path,expected_profile,bad_profile", [
|
||||
(False, "https://bigsdb.pasteur.fr/api", "pubmlst_bordetella_seqdef", 3, "tohama_I_bpertussis.fasta", "tohama_I_bpertussis_features.fasta", bpertussis_tohamaI_profile, bpertussis_tohamaI_bad_profile),
|
||||
(True, "https://bigsdb.pasteur.fr/api", "pubmlst_bordetella_seqdef", 3, "tohama_I_bpertussis.fasta", "tohama_I_bpertussis_features.fasta", bpertussis_tohamaI_profile, bpertussis_tohamaI_bad_profile),
|
||||
(False, "https://rest.pubmlst.org", "pubmlst_hinfluenzae_seqdef", 1, "2014-102_hinfluenza.fasta", "2014-102_hinfluenza_features.fasta", hinfluenzae_2014_102_profile, hinfluenzae_2014_102_bad_profile),
|
||||
])
|
||||
class TestBIGSdbMLSTProfiler:
|
||||
async def test_profiling_results_in_exact_matches_when_exact(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
@ -202,7 +199,6 @@ class TestBIGSdbIndex:
|
||||
assert databases["pubmlst_bordetella_seqdef"] == "https://bigsdb.pasteur.fr/api"
|
||||
|
||||
@pytest.mark.parametrize("local", [
|
||||
(True),
|
||||
(False)
|
||||
])
|
||||
async def test_bigsdb_index_instantiates_correct_profiler(self, local):
|
||||
|
@ -2,6 +2,6 @@ from autobigs.engine.reading import read_fasta
|
||||
|
||||
|
||||
async def test_fasta_reader_not_none():
|
||||
named_strings = read_fasta("tests/resources/tohama_I_bpertussis.fasta")
|
||||
async for named_string in named_strings:
|
||||
named_strings = await read_fasta("tests/resources/tohama_I_bpertussis.fasta")
|
||||
for named_string in named_strings:
|
||||
assert named_string.name == "BX470248.1"
|
||||
|
@ -0,0 +1,47 @@
|
||||
from typing import AsyncIterable, Iterable
|
||||
|
||||
import pytest
|
||||
from autobigs.engine.structures.alignment import AlignmentStats
|
||||
from autobigs.engine.writing import alleles_to_text_map, write_mlst_profiles_as_csv
|
||||
from autobigs.engine.structures.mlst import Allele, MLSTProfile, NamedMLSTProfile
|
||||
import tempfile
|
||||
from csv import reader
|
||||
from os import path
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dummy_alphabet_mlst_profile():
|
||||
return NamedMLSTProfile("name", MLSTProfile((
|
||||
Allele("A", "1", None),
|
||||
Allele("D", "1", None),
|
||||
Allele("B", "1", None),
|
||||
Allele("C", "1", None),
|
||||
Allele("C", "2", AlignmentStats(90, 10, 0, 90))
|
||||
), "mysterious", "very mysterious"))
|
||||
|
||||
async def iterable_to_asynciterable(iterable: Iterable):
|
||||
for iterated in iterable:
|
||||
yield iterated
|
||||
|
||||
async def test_column_order_is_same_as_expected_file(dummy_alphabet_mlst_profile: MLSTProfile):
|
||||
dummy_profiles = [dummy_alphabet_mlst_profile]
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
output_path = path.join(temp_dir, "out.csv")
|
||||
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path)
|
||||
with open(output_path) as csv_handle:
|
||||
csv_reader = reader(csv_handle)
|
||||
lines = list(csv_reader)
|
||||
target_columns = lines[4:]
|
||||
assert target_columns == sorted(target_columns)
|
||||
|
||||
async def test_alleles_to_text_map_mapping_is_correct(dummy_alphabet_mlst_profile: NamedMLSTProfile):
|
||||
mapping = alleles_to_text_map(dummy_alphabet_mlst_profile.mlst_profile.alleles) # type: ignore
|
||||
expected_mapping = {
|
||||
"A": "1",
|
||||
"B": "1",
|
||||
"C": ("1", "2*"),
|
||||
"D": "1"
|
||||
}
|
||||
for allele_name, allele_ids in mapping.items():
|
||||
assert allele_name in expected_mapping
|
||||
assert allele_ids == expected_mapping[allele_name]
|
28244
tests/resources/2014-102_hinfluenza.fasta
Normal file
28244
tests/resources/2014-102_hinfluenza.fasta
Normal file
File diff suppressed because it is too large
Load Diff
27751
tests/resources/2014-102_hinfluenza_features.fasta
Normal file
27751
tests/resources/2014-102_hinfluenza_features.fasta
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,11 +0,0 @@
|
||||
>lcl|CP085952.1_gene_371 [gene=adk] [locus_tag=LK401_01855] [location=complement(365128..365772)] [gbkey=Gene]
|
||||
ATGAAAATTATTCTTTTAGGTGCACCGGGTGCAGGTAAAGGCACTCAAGCACAATTTATTATGAACAAAT
|
||||
TTGGTATCCCGCAAATTTCAACTGGTGATATGTTCCGTGCTGCAATCAAAGCGGGGACTGAACTTGGCAA
|
||||
ACAAGCTAAAGCATTAATGGATGAAGGTAAATTAGTGCCAGATGAATTAACCGTTGCCCTTGTAAAAGAT
|
||||
CGTATTGCTCAAGCTGACTGCACAAATGGTTTCTTGTTAGATGGTTTCCCTCGTACTATTCCACAAGCGG
|
||||
ATGCACTGAAAGATTCAGGTGTTAAAATTGACTTTGTTTTAGAATTTGATGTGCCAGACGAAGTGATTGT
|
||||
TGAACGTATGAGTGGCCGTCGCGTACACCAAGCGTCTGGCCGTTCTTACCACATCGTTTATAATCCACCA
|
||||
AAAGTGGAAGGTAAAGATGATGTAACAGGCGAAGATTTAATTATTCGTGCAGACGATAAACCAGAAACTG
|
||||
TATTAGATCGTTTAGCCGTATATCATAAACAAACTAGCCCATTAATTGATTATTACCAAGCAGAAGCGAA
|
||||
AGCGGGGAATACTCAATATTTCCGTTTAGACGGTACACAAAAAGTAGAAGAAGTTAGCCAAGAGTTAGAT
|
||||
AAAATCTTAGGCTAA
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user