Copied over most code from another of my projects
This commit is contained in:
commit
02985d5e37
22
.devcontainer/devcontainer.json
Normal file
22
.devcontainer/devcontainer.json
Normal file
@ -0,0 +1,22 @@
|
||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
||||
// README at: https://github.com/devcontainers/templates/tree/main/src/python
|
||||
{
|
||||
"name": "Python 3",
|
||||
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
|
||||
"image": "mcr.microsoft.com/devcontainers/python:1-3.12-bullseye",
|
||||
|
||||
// Features to add to the dev container. More info: https://containers.dev/features.
|
||||
// "features": {},
|
||||
|
||||
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
||||
// "forwardPorts": [],
|
||||
|
||||
// Use 'postCreateCommand' to run commands after the container is created.
|
||||
"postCreateCommand": "pip3 install --user -r requirements.txt"
|
||||
|
||||
// Configure tool-specific properties.
|
||||
// "customizations": {},
|
||||
|
||||
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
|
||||
// "remoteUser": "root"
|
||||
}
|
359
.gitignore
vendored
Normal file
359
.gitignore
vendored
Normal file
@ -0,0 +1,359 @@
|
||||
# File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig
|
||||
# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,svelte,python,linux,node
|
||||
# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,svelte,python,linux,node
|
||||
|
||||
### Linux ###
|
||||
*~
|
||||
|
||||
# temporary files which can be created if a process still has a handle open of a deleted file
|
||||
.fuse_hidden*
|
||||
|
||||
# KDE directory preferences
|
||||
.directory
|
||||
|
||||
# Linux trash folder which might appear on any partition or disk
|
||||
.Trash-*
|
||||
|
||||
# .nfs files are created when an open file is removed but is still being accessed
|
||||
.nfs*
|
||||
|
||||
### Node ###
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
lerna-debug.log*
|
||||
.pnpm-debug.log*
|
||||
|
||||
# Diagnostic reports (https://nodejs.org/api/report.html)
|
||||
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
|
||||
|
||||
# Runtime data
|
||||
pids
|
||||
*.pid
|
||||
*.seed
|
||||
*.pid.lock
|
||||
|
||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
||||
lib-cov
|
||||
|
||||
# Coverage directory used by tools like istanbul
|
||||
coverage
|
||||
*.lcov
|
||||
|
||||
# nyc test coverage
|
||||
.nyc_output
|
||||
|
||||
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
|
||||
.grunt
|
||||
|
||||
# Bower dependency directory (https://bower.io/)
|
||||
bower_components
|
||||
|
||||
# node-waf configuration
|
||||
.lock-wscript
|
||||
|
||||
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
||||
build/Release
|
||||
|
||||
# Dependency directories
|
||||
node_modules/
|
||||
jspm_packages/
|
||||
|
||||
# Snowpack dependency directory (https://snowpack.dev/)
|
||||
web_modules/
|
||||
|
||||
# TypeScript cache
|
||||
*.tsbuildinfo
|
||||
|
||||
# Optional npm cache directory
|
||||
.npm
|
||||
|
||||
# Optional eslint cache
|
||||
.eslintcache
|
||||
|
||||
# Optional stylelint cache
|
||||
.stylelintcache
|
||||
|
||||
# Microbundle cache
|
||||
.rpt2_cache/
|
||||
.rts2_cache_cjs/
|
||||
.rts2_cache_es/
|
||||
.rts2_cache_umd/
|
||||
|
||||
# Optional REPL history
|
||||
.node_repl_history
|
||||
|
||||
# Output of 'npm pack'
|
||||
*.tgz
|
||||
|
||||
# Yarn Integrity file
|
||||
.yarn-integrity
|
||||
|
||||
# dotenv environment variable files
|
||||
.env
|
||||
.env.development.local
|
||||
.env.test.local
|
||||
.env.production.local
|
||||
.env.local
|
||||
|
||||
# parcel-bundler cache (https://parceljs.org/)
|
||||
.cache
|
||||
.parcel-cache
|
||||
|
||||
# Next.js build output
|
||||
.next
|
||||
out
|
||||
|
||||
# Nuxt.js build / generate output
|
||||
.nuxt
|
||||
dist
|
||||
|
||||
# Gatsby files
|
||||
.cache/
|
||||
# Comment in the public line in if your project uses Gatsby and not Next.js
|
||||
# https://nextjs.org/blog/next-9-1#public-directory-support
|
||||
# public
|
||||
|
||||
# vuepress build output
|
||||
.vuepress/dist
|
||||
|
||||
# vuepress v2.x temp and cache directory
|
||||
.temp
|
||||
|
||||
# Docusaurus cache and generated files
|
||||
.docusaurus
|
||||
|
||||
# Serverless directories
|
||||
.serverless/
|
||||
|
||||
# FuseBox cache
|
||||
.fusebox/
|
||||
|
||||
# DynamoDB Local files
|
||||
.dynamodb/
|
||||
|
||||
# TernJS port file
|
||||
.tern-port
|
||||
|
||||
# Stores VSCode versions used for testing VSCode extensions
|
||||
.vscode-test
|
||||
|
||||
# yarn v2
|
||||
.yarn/cache
|
||||
.yarn/unplugged
|
||||
.yarn/build-state.yml
|
||||
.yarn/install-state.gz
|
||||
.pnp.*
|
||||
|
||||
### Node Patch ###
|
||||
# Serverless Webpack directories
|
||||
.webpack/
|
||||
|
||||
# Optional stylelint cache
|
||||
|
||||
# SvelteKit build / generate output
|
||||
.svelte-kit
|
||||
|
||||
### Python ###
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
#poetry.lock
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
#pdm.lock
|
||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||
# in version control.
|
||||
# https://pdm.fming.dev/#use-with-ide
|
||||
.pdm.toml
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
||||
### Python Patch ###
|
||||
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
|
||||
poetry.toml
|
||||
|
||||
# ruff
|
||||
.ruff_cache/
|
||||
|
||||
# LSP config files
|
||||
pyrightconfig.json
|
||||
|
||||
### Svelte ###
|
||||
# gitignore template for the SvelteKit, frontend web component framework
|
||||
# website: https://kit.svelte.dev/
|
||||
|
||||
.svelte-kit/
|
||||
package
|
||||
|
||||
### VisualStudioCode ###
|
||||
.vscode/*
|
||||
!.vscode/settings.json
|
||||
!.vscode/tasks.json
|
||||
!.vscode/launch.json
|
||||
!.vscode/extensions.json
|
||||
!.vscode/*.code-snippets
|
||||
|
||||
# Local History for Visual Studio Code
|
||||
.history/
|
||||
|
||||
# Built Visual Studio Code Extensions
|
||||
*.vsix
|
||||
|
||||
### VisualStudioCode Patch ###
|
||||
# Ignore all local history of files
|
||||
.history
|
||||
.ionide
|
||||
|
||||
# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,svelte,python,linux,node
|
||||
|
||||
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
|
||||
|
||||
output
|
27
.vscode/launch.json
vendored
Normal file
27
.vscode/launch.json
vendored
Normal file
@ -0,0 +1,27 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
|
||||
{
|
||||
"name": "CLI ipdbmlst",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/src/automlst/cli/root.py",
|
||||
"console": "integratedTerminal",
|
||||
"args": [
|
||||
"-fa",
|
||||
"${workspaceFolder}/tests/resources/tohama_I_bpertussis.fasta",
|
||||
"-ipdbmlst",
|
||||
"pubmlst_bordetella_seqdef",
|
||||
"${workspaceFolder}/output"
|
||||
],
|
||||
"cwd": "${workspaceFolder}/src",
|
||||
"env": {
|
||||
"PYTHONPATH": "${workspaceFolder}/src"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
4
.vscode/settings.json
vendored
Normal file
4
.vscode/settings.json
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
{
|
||||
"python.testing.unittestEnabled": false,
|
||||
"python.testing.pytestEnabled": true
|
||||
}
|
50
Jenkinsfile
vendored
Normal file
50
Jenkinsfile
vendored
Normal file
@ -0,0 +1,50 @@
|
||||
pipeline {
|
||||
agent {
|
||||
kubernetes {
|
||||
cloud 'rsys-devel'
|
||||
defaultContainer 'homebrew'
|
||||
inheritFrom 'homebrew'
|
||||
}
|
||||
}
|
||||
stages {
|
||||
stage("install") {
|
||||
steps {
|
||||
sh 'brew install python@3.11 sphinx-doc'
|
||||
sh 'python3.11 -m pip install -r requirements.txt'
|
||||
}
|
||||
}
|
||||
stage("unit tests") {
|
||||
steps {
|
||||
sh returnStatus: true, script: "python3.11 -m pytest --junitxml=test_results.xml"
|
||||
xunit checksName: '', tools: [JUnit(excludesPattern: '', pattern: 'test_results.xml', stopProcessingIfError: true)]
|
||||
}
|
||||
}
|
||||
stage("build") {
|
||||
steps {
|
||||
sh "python3.11 -m build"
|
||||
}
|
||||
}
|
||||
stage("test installation") {
|
||||
steps {
|
||||
sh "python3.11 -m pip install dist/*.whl --force-reinstall"
|
||||
sh "automlst -h"
|
||||
}
|
||||
}
|
||||
stage("archive") {
|
||||
steps {
|
||||
archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl', fingerprint: true, followSymlinks: false, onlyIfSuccessful: true
|
||||
}
|
||||
}
|
||||
stage("publish") {
|
||||
environment {
|
||||
CREDS = credentials('4d6f64be-d26d-4f95-8de3-b6a9b0beb311')
|
||||
}
|
||||
when {
|
||||
branch '**/main'
|
||||
}
|
||||
steps {
|
||||
sh returnStatus: true, script: 'python3.11 -m twine upload --repository-url https://git.reslate.systems/api/packages/${CREDS_USR}/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
3
README.md
Normal file
3
README.md
Normal file
@ -0,0 +1,3 @@
|
||||
# FASTA-MLST
|
||||
|
||||
A CLI tool for rapidly performing MLST typing via accessing pubMLST and InstitutPasteur MSLT databases.
|
50
pyproject.toml
Normal file
50
pyproject.toml
Normal file
@ -0,0 +1,50 @@
|
||||
[build-system]
|
||||
requires = ["setuptools >= 61.0"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "automlst"
|
||||
dynamic = ["version"]
|
||||
dependencies = [
|
||||
"biopython",
|
||||
"aiohttp[speedups]",
|
||||
]
|
||||
requires-python = ">=3.11"
|
||||
description = "A tool to rapidly fetch fetch MLST profiles given sequences for various diseases."
|
||||
|
||||
[project.scripts]
|
||||
automlst = "automlst.cli.root:cli"
|
||||
nsbdiagtk = "automlst.cli.root:cli"
|
||||
|
||||
[tool.pyright]
|
||||
extraPaths = ["src"]
|
||||
exclude = [
|
||||
"**/node_modules",
|
||||
"**/__pycache__"
|
||||
]
|
||||
executionEnvironments = [
|
||||
{root = "src"}
|
||||
]
|
||||
|
||||
[tool.setuptools]
|
||||
package-dir = {"" = "src"}
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = [
|
||||
"tests"
|
||||
]
|
||||
pythonpath = [
|
||||
"src"
|
||||
]
|
||||
addopts = [
|
||||
"--import-mode=importlib",
|
||||
]
|
||||
asyncio_mode = "auto"
|
||||
|
||||
[tool.pylint.main]
|
||||
source-roots = "src"
|
||||
|
||||
[tool.pylint.format]
|
||||
# Maximum number of characters on a single line.
|
||||
max-line-length = 88
|
||||
|
6
requirements.txt
Normal file
6
requirements.txt
Normal file
@ -0,0 +1,6 @@
|
||||
aiohttp[speedups]
|
||||
biopython
|
||||
pytest
|
||||
pytest-asyncio
|
||||
build
|
||||
twine
|
23
src/automlst/cli/aggregator.py
Normal file
23
src/automlst/cli/aggregator.py
Normal file
@ -0,0 +1,23 @@
|
||||
from os import path
|
||||
from typing import Any, AsyncGenerator, AsyncIterable, Iterable, Sequence
|
||||
from automlst.engine.data.MLST import MLSTProfile
|
||||
from automlst.engine.data.genomics import NamedString
|
||||
from automlst.engine.local.abif import read_abif
|
||||
from automlst.engine.local.fasta import read_fasta
|
||||
from automlst.engine.remote.databases.institutpasteur.profiling import InstitutPasteurProfiler
|
||||
|
||||
|
||||
async def aggregate_sequences(fastas: Iterable[str], abifs: Iterable[str]) -> AsyncGenerator[str, Any]:
|
||||
for fasta_path in fastas:
|
||||
async for fasta in read_fasta(fasta_path):
|
||||
yield fasta.sequence
|
||||
for abif_path in abifs:
|
||||
abif_data = await read_abif(abif_path)
|
||||
yield "".join(abif_data.sequence)
|
||||
|
||||
async def profile_all_genetic_strings(strings: AsyncIterable[str], database_name: str) -> Sequence[MLSTProfile]:
|
||||
profiles = list()
|
||||
async with InstitutPasteurProfiler(database_name=database_name) as profiler:
|
||||
async for string in strings:
|
||||
profiles.append(await profiler.profile_string(string))
|
||||
return profiles
|
70
src/automlst/cli/root.py
Normal file
70
src/automlst/cli/root.py
Normal file
@ -0,0 +1,70 @@
|
||||
import argparse
|
||||
import asyncio
|
||||
import datetime
|
||||
from os import path
|
||||
import os
|
||||
|
||||
from automlst.cli import aggregator
|
||||
from automlst.engine.data.genomics import NamedString
|
||||
from automlst.engine.local.abif import read_abif
|
||||
from automlst.engine.local.csv import write_mlst_profiles_as_csv
|
||||
from automlst.engine.local.fasta import read_fasta
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--run-name", "-name",
|
||||
dest="run_name",
|
||||
required=False,
|
||||
default=datetime.datetime.now().strftime(r"%Y%m%d%H%M%S"),
|
||||
type=str,
|
||||
help="The name of the run. Will use a date and time string if not provided."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--fasta", "-fa", "-fst",
|
||||
nargs="+",
|
||||
action='extend',
|
||||
dest="fastas",
|
||||
required=False,
|
||||
default=[],
|
||||
type=str,
|
||||
help="The FASTA files to process. Multiple can be listed."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--abif", "-abi", "-ab1",
|
||||
action='extend',
|
||||
dest="abifs",
|
||||
required=False,
|
||||
default=[],
|
||||
type=str,
|
||||
help="The ABIF files to process. Multiple can be listed."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--institut-pasteur-mlst",
|
||||
"-ipdbmlst",
|
||||
dest="institut_pasteur_db",
|
||||
required=False,
|
||||
default=None,
|
||||
type=str,
|
||||
help="The Institut Pasteur MLST database to use."
|
||||
)
|
||||
parser.add_argument(
|
||||
"out",
|
||||
default="./.",
|
||||
help="The output folder. Files will be named by the provided (or default) run name."
|
||||
)
|
||||
|
||||
|
||||
def cli():
|
||||
args = parser.parse_args()
|
||||
gen_strings = aggregator.aggregate_sequences(args.fastas, args.abifs)
|
||||
os.makedirs(args.out, exist_ok=True)
|
||||
if args.institut_pasteur_db is not None:
|
||||
mlst_profiles = aggregator.profile_all_genetic_strings(
|
||||
gen_strings, args.institut_pasteur_db)
|
||||
asyncio.run(write_mlst_profiles_as_csv(
|
||||
asyncio.run(mlst_profiles), str(path.join(args.out, "MLST_" + args.run_name + ".csv"))))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli()
|
0
src/automlst/engine/__init__.py
Normal file
0
src/automlst/engine/__init__.py
Normal file
44
src/automlst/engine/annotate.py
Normal file
44
src/automlst/engine/annotate.py
Normal file
@ -0,0 +1,44 @@
|
||||
import asyncio
|
||||
from collections.abc import Set
|
||||
from typing import Any, Generator, List, Sequence
|
||||
from Bio.Align import PairwiseAligner
|
||||
from Bio import Entrez
|
||||
from Bio import SeqIO
|
||||
import numpy as np
|
||||
|
||||
from automlst.engine.data.genomics import AnnotatedString, StringAnnotation
|
||||
from automlst.engine.remote.databases.ncbi.genbank import fetch_ncbi_genbank
|
||||
|
||||
|
||||
async def annotate_from_genbank(genbank_id: str, query_name: str, query_string: str, max_annotation_length:int = 512, gene_targets:Set = set()):
|
||||
# TODO implement asynchronous alignment algorithm
|
||||
reference_annotations = await fetch_ncbi_genbank(genbank_id=genbank_id)
|
||||
query_annotations = list()
|
||||
aligner = PairwiseAligner("blastn")
|
||||
aligner.mode = "local"
|
||||
for annotation in reference_annotations.annotations:
|
||||
if annotation.type != "gene" or "gene" not in annotation.feature_properties:
|
||||
continue
|
||||
if len(gene_targets) > 0 and "gene" in annotation.feature_properties:
|
||||
if not annotation.feature_properties["gene"].intersection(gene_targets):
|
||||
continue
|
||||
if max_annotation_length > 0 and annotation.end - annotation.start > max_annotation_length:
|
||||
# TODO implement a failsafe
|
||||
continue
|
||||
feature_string_sequence = get_feature_coding(annotated_string=reference_annotations, string_annotation=annotation)
|
||||
alignments = aligner.align(query_string, feature_string_sequence)
|
||||
if len(alignments) < 1:
|
||||
# TODO implement a failsafe
|
||||
continue
|
||||
top_alignment = sorted(alignments)[0]
|
||||
# TODO Check if alternatives are better
|
||||
query_annotations.append(StringAnnotation(
|
||||
type=annotation.type, # same as original
|
||||
start=np.min(top_alignment.aligned[0]), # We only care about the start of first chunk
|
||||
end=np.max(top_alignment.aligned[0]), # and the end of the last chunk
|
||||
feature_properties=dict(annotation.feature_properties) # same as original
|
||||
))
|
||||
return AnnotatedString(name=query_name, sequence=query_string, annotations=query_annotations)
|
||||
|
||||
def get_feature_coding(annotated_string: AnnotatedString, string_annotation: StringAnnotation) -> str:
|
||||
return annotated_string.sequence[string_annotation.start:string_annotation.end]
|
13
src/automlst/engine/data/MLST.py
Normal file
13
src/automlst/engine/data/MLST.py
Normal file
@ -0,0 +1,13 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import Mapping, Sequence
|
||||
|
||||
@dataclass
|
||||
class Allele:
|
||||
allele_loci: str
|
||||
allele_variant: str
|
||||
|
||||
@dataclass
|
||||
class MLSTProfile:
|
||||
alleles: Mapping[str, Sequence[Allele]]
|
||||
sequence_type: int
|
||||
clonal_complex: str
|
0
src/automlst/engine/data/__init__.py
Normal file
0
src/automlst/engine/data/__init__.py
Normal file
105
src/automlst/engine/data/genomics.py
Normal file
105
src/automlst/engine/data/genomics.py
Normal file
@ -0,0 +1,105 @@
|
||||
from dataclasses import dataclass
|
||||
from numbers import Number
|
||||
from typing import Mapping, Sequence, Set, Union
|
||||
|
||||
|
||||
@dataclass
|
||||
class StringAnnotation:
|
||||
type: str
|
||||
start: int
|
||||
end: int
|
||||
feature_properties: Mapping[str, Set[str]]
|
||||
|
||||
@dataclass
|
||||
class NamedString:
|
||||
name: str
|
||||
sequence: str
|
||||
|
||||
@dataclass
|
||||
class AnnotatedString(NamedString):
|
||||
annotations: Sequence[StringAnnotation]
|
||||
|
||||
@dataclass
|
||||
class SangerTraceData:
|
||||
sequence: Sequence[str]
|
||||
seq_param_file_name: str
|
||||
analysis_proto_settings_name: str
|
||||
analysis_rpto_settings_ver: str
|
||||
analysis_proto_xml_data: str
|
||||
analysis_proto_xml_schema_ver: str
|
||||
sample_comment: Union[None, str]
|
||||
capillary_machine: bool
|
||||
container_identifier: str
|
||||
container_name: str
|
||||
comment_title: str
|
||||
channel_1: Sequence[Number]
|
||||
channel_2: Sequence[Number]
|
||||
channel_3: Sequence[Number]
|
||||
channel_4: Sequence[Number]
|
||||
measured_voltage_dv: Sequence[Number]
|
||||
measured_current_ma: Sequence[Number]
|
||||
measured_power_mw: Sequence[Number]
|
||||
measured_temperature_celsius: Sequence[Number]
|
||||
down_sample_factor: Number
|
||||
dye_1: str
|
||||
dye_2: str
|
||||
dye_3: str
|
||||
dye_4: str
|
||||
dye_wavelength_1: str
|
||||
dye_wavelength_2: str
|
||||
dye_wavelength_3: str
|
||||
dye_wavelength_4: str
|
||||
dye_set_name: str
|
||||
electrophoresis_voltage_setting_v: Number
|
||||
start_run_event: str
|
||||
stop_run_event: str
|
||||
start_collection_event: str
|
||||
stop_collection_event: str
|
||||
base_order: Sequence[str]
|
||||
gel_type_desc: str
|
||||
injection_time_sec: Number
|
||||
inection_voltage_v: Number
|
||||
lane_or_capillary: Number
|
||||
sample_tracking_id: str
|
||||
length_to_detector_cm: Number
|
||||
laser_power_mw: Number
|
||||
instrument_name_and_serial: str
|
||||
data_collection_module_file: str
|
||||
model_number: str
|
||||
pixels_avg_per_lane: Number
|
||||
number_of_capillaries: Number
|
||||
marked_off_scale_scans: Union[None, Sequence[Number]]
|
||||
# Skipped Ovrl, OvrV
|
||||
mobility_file: str
|
||||
# Skipped PRJT, PROJ
|
||||
pixel_bin_size: Number
|
||||
# Skipped scan rate
|
||||
results_group_comment: Union[None, str]
|
||||
results_group_name: str
|
||||
run_module_ver: str
|
||||
run_module_xml: str
|
||||
run_module_xml_ver: str
|
||||
run_proto_name: str
|
||||
run_proto_ver: str
|
||||
run_start_date: str # Date time object
|
||||
run_stop_date: str # Date time object
|
||||
data_collection_start_date: str
|
||||
data_collection_stop_date: str
|
||||
run_name: str
|
||||
run_start_time: str # time object
|
||||
run_stop_time: str # time object
|
||||
collection_start_time: str # time object
|
||||
collection_stop_time: str # time object
|
||||
saturated_data_points: Union[None, Sequence[Number]]
|
||||
color_rescaling_divisor: Number
|
||||
scan_count: Number
|
||||
polymer_lot_expiration: str # date time object
|
||||
polymer_lot_number: Number
|
||||
sample_name: str
|
||||
# Skipped genescan data
|
||||
# Skipped size standard file name
|
||||
data_collection_software_ver: str
|
||||
data_collection_firmware_ver: str
|
||||
run_temperature_setting_celcius: Number
|
||||
well_id: str
|
||||
plate_user_name: str
|
104
src/automlst/engine/local/abif.py
Normal file
104
src/automlst/engine/local/abif.py
Normal file
@ -0,0 +1,104 @@
|
||||
import asyncio
|
||||
from numbers import Number
|
||||
from os import path
|
||||
from typing import Sequence, Union
|
||||
from automlst.engine.data.genomics import SangerTraceData
|
||||
from Bio.SeqRecord import SeqRecord
|
||||
from Bio import SeqIO
|
||||
|
||||
|
||||
def _biopython_read_abif_sequence(seq_path: str) -> SeqRecord:
|
||||
with open(seq_path, "rb") as seq_handle:
|
||||
return SeqIO.read(seq_handle, "abi")
|
||||
|
||||
|
||||
async def read_abif(seq_path: str) -> SangerTraceData:
|
||||
ext = path.splitext(seq_path)[1]
|
||||
if ext.lower() != ".ab1" and ext.lower() != "abi":
|
||||
raise ValueError(
|
||||
'seq_path must have file extension of "ab1", or "abi".')
|
||||
biopython_seq = await asyncio.to_thread(_biopython_read_abif_sequence, seq_path)
|
||||
biopython_annotations = biopython_seq.annotations
|
||||
|
||||
# Lot of type ignoring since Biopython did not define their typing.
|
||||
biopython_abif_raw = biopython_annotations["abif_raw"] # type: ignore
|
||||
trace_data = SangerTraceData(
|
||||
biopython_seq.seq,
|
||||
biopython_abif_raw.get("APFN2"), # type: ignore
|
||||
biopython_abif_raw.get("APrN1"), # type: ignore
|
||||
biopython_abif_raw.get("APrV1"), # type: ignore
|
||||
biopython_abif_raw.get("APrX1"), # type: ignore
|
||||
biopython_abif_raw.get("APXV1"), # type: ignore
|
||||
biopython_abif_raw.get("CMNT1"), # type: ignore
|
||||
biopython_abif_raw.get("CpEP1"), # type: ignore
|
||||
biopython_abif_raw.get("CTID1"), # type: ignore
|
||||
biopython_abif_raw.get("CTNM1"), # type: ignore
|
||||
biopython_abif_raw.get("CTTL1"), # type: ignore
|
||||
biopython_abif_raw.get("DATA1"), # type: ignore
|
||||
biopython_abif_raw.get("DATA2"), # type: ignore
|
||||
biopython_abif_raw.get("DATA3"), # type: ignore
|
||||
biopython_abif_raw.get("DATA4"), # type: ignore
|
||||
biopython_abif_raw.get("DATA5"), # type: ignore
|
||||
biopython_abif_raw.get("DATA6"), # type: ignore
|
||||
biopython_abif_raw.get("DATA7"), # type: ignore
|
||||
biopython_abif_raw.get("DATA8"), # type: ignore
|
||||
biopython_abif_raw.get("DSam1"), # type: ignore
|
||||
biopython_abif_raw.get("DyeN1"), # type: ignore
|
||||
biopython_abif_raw.get("DyeN2"), # type: ignore
|
||||
biopython_abif_raw.get("DyeN3"), # type: ignore
|
||||
biopython_abif_raw.get("DyeN4"), # type: ignore
|
||||
biopython_abif_raw.get("DyeW1"), # type: ignore
|
||||
biopython_abif_raw.get("DyeW2"), # type: ignore
|
||||
biopython_abif_raw.get("DyeW3"), # type: ignore
|
||||
biopython_abif_raw.get("DyeW4"), # type: ignore
|
||||
biopython_abif_raw.get("DySN1"), # type: ignore
|
||||
biopython_abif_raw.get("EPVt1"), # type: ignore
|
||||
biopython_abif_raw.get("EVNT1"), # type: ignore
|
||||
biopython_abif_raw.get("EVNT2"), # type: ignore
|
||||
biopython_abif_raw.get("EVNT3"), # type: ignore
|
||||
biopython_abif_raw.get("EVNT4"), # type: ignore
|
||||
biopython_abif_raw.get("FWO_1"), # type: ignore
|
||||
biopython_abif_raw.get("GTyp1"), # type: ignore
|
||||
biopython_abif_raw.get("InSc1"), # type: ignore
|
||||
biopython_abif_raw.get("InVt1"), # type: ignore
|
||||
biopython_abif_raw.get("LANE1"), # type: ignore
|
||||
biopython_abif_raw.get("LIMS1"), # type: ignore
|
||||
biopython_abif_raw.get("LNTD1"), # type: ignore
|
||||
biopython_abif_raw.get("LsrP1"), # type: ignore
|
||||
biopython_abif_raw.get("MCHN1"), # type: ignore
|
||||
biopython_abif_raw.get("MODF1"), # type: ignore
|
||||
biopython_abif_raw.get("MODL1"), # type: ignore
|
||||
biopython_abif_raw.get("NAVG1"), # type: ignore
|
||||
biopython_abif_raw.get("NLNE1"), # type: ignore
|
||||
biopython_abif_raw.get("OfSc1"), # type: ignore
|
||||
biopython_abif_raw.get("PDMF1"), # type: ignore
|
||||
biopython_abif_raw.get("PXLB1"), # type: ignore
|
||||
biopython_abif_raw.get("RGCm1"), # type: ignore
|
||||
biopython_abif_raw.get("RGNm1"), # type: ignore
|
||||
biopython_abif_raw.get("RMdV1"), # type: ignore
|
||||
biopython_abif_raw.get("RMdX1"), # type: ignore
|
||||
biopython_abif_raw.get("RMXV1"), # type: ignore
|
||||
biopython_abif_raw.get("RPrN1"), # type: ignore
|
||||
biopython_abif_raw.get("RPrV1"), # type: ignore
|
||||
biopython_abif_raw.get("RUND1"), # type: ignore
|
||||
biopython_abif_raw.get("RUND2"), # type: ignore
|
||||
biopython_abif_raw.get("RUND3"), # type: ignore
|
||||
biopython_abif_raw.get("RUND4"), # type: ignore
|
||||
biopython_abif_raw.get("RunN1"), # type: ignore
|
||||
biopython_abif_raw.get("RUNT1"), # type: ignore
|
||||
biopython_abif_raw.get("RUNT2"), # type: ignore
|
||||
biopython_abif_raw.get("RUNT3"), # type: ignore
|
||||
biopython_abif_raw.get("RUNT4"), # type: ignore
|
||||
biopython_abif_raw.get("Satd"), # type: ignore
|
||||
biopython_abif_raw.get("Scal1"), # type: ignore
|
||||
biopython_abif_raw.get("SCAN1"), # type: ignore
|
||||
biopython_abif_raw.get("SMED1"), # type: ignore
|
||||
biopython_abif_raw.get("SMLt"), # type: ignore
|
||||
biopython_abif_raw.get("SMPL1"), # type: ignore
|
||||
biopython_abif_raw.get("SVER1"), # type: ignore
|
||||
biopython_abif_raw.get("SVER3"), # type: ignore
|
||||
biopython_abif_raw.get("Tmpr1"), # type: ignore
|
||||
biopython_abif_raw.get("TUBE"), # type: ignore
|
||||
biopython_abif_raw.get("User") # type: ignore
|
||||
)
|
||||
return trace_data
|
31
src/automlst/engine/local/csv.py
Normal file
31
src/automlst/engine/local/csv.py
Normal file
@ -0,0 +1,31 @@
|
||||
import csv
|
||||
from io import TextIOWrapper
|
||||
from os import PathLike
|
||||
from typing import AsyncIterable, Iterable, Mapping, Sequence, Union
|
||||
|
||||
from automlst.engine.data.MLST import Allele, MLSTProfile
|
||||
|
||||
|
||||
def loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Allele]]):
|
||||
result_dict: dict[str, list[str]] = {}
|
||||
for loci, alleles in alleles_map.items():
|
||||
result_dict[loci] = list()
|
||||
for allele in alleles:
|
||||
result_dict[loci].append(allele.allele_variant)
|
||||
return result_dict
|
||||
|
||||
|
||||
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: Iterable[MLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]):
|
||||
mlst_profiles = list(mlst_profiles_iterable)
|
||||
header = ["st", "clonal-complex", *mlst_profiles[0].alleles.keys()]
|
||||
with open(handle, "w", newline='') as filehandle:
|
||||
writer = csv.DictWriter(filehandle, fieldnames=header)
|
||||
writer.writeheader()
|
||||
for mlst_profile in mlst_profiles:
|
||||
row_dictionary = {
|
||||
"st": mlst_profile.sequence_type,
|
||||
"clonal-complex": mlst_profile.clonal_complex,
|
||||
**loci_alleles_variants_from_loci(mlst_profile.alleles)
|
||||
}
|
||||
|
||||
writer.writerow(rowdict=row_dictionary)
|
11
src/automlst/engine/local/fasta.py
Normal file
11
src/automlst/engine/local/fasta.py
Normal file
@ -0,0 +1,11 @@
|
||||
import asyncio
|
||||
from io import TextIOWrapper
|
||||
from typing import Any, AsyncGenerator, Generator, Sequence, Union
|
||||
from Bio import SeqIO
|
||||
|
||||
from automlst.engine.data.genomics import NamedString
|
||||
|
||||
async def read_fasta(handle: Union[str, TextIOWrapper]) -> AsyncGenerator[NamedString, Any]:
|
||||
fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta")
|
||||
for fasta_sequence in await fasta_sequences:
|
||||
yield NamedString(fasta_sequence.id, str(fasta_sequence.seq))
|
@ -0,0 +1,62 @@
|
||||
from collections import defaultdict
|
||||
from contextlib import AbstractAsyncContextManager
|
||||
import re
|
||||
from typing import Any, AsyncGenerator, AsyncIterable, Generator, Iterable, Sequence, Union
|
||||
from aiohttp import ClientSession, ClientTimeout
|
||||
from automlst.engine.data.MLST import Allele, MLSTProfile
|
||||
from automlst.engine.data.genomics import NamedString
|
||||
|
||||
class InstitutPasteurProfiler(AbstractAsyncContextManager):
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
|
||||
def __init__(self, database_name: str):
|
||||
self._base_url = f"https://bigsdb.pasteur.fr/api/db/{database_name}/"
|
||||
self._http_client = ClientSession(self._base_url, timeout=ClientTimeout(10000))
|
||||
|
||||
async def fetch_mlst_allele_variants(self, sequence_string: str) -> AsyncGenerator[Allele, Any]:
|
||||
# See https://bigsdb.pasteur.fr/api/db/pubmlst_bordetella_seqdef/schemes
|
||||
uri_path = "schemes/3/sequence"
|
||||
response = await self._http_client.post(uri_path, json={
|
||||
"sequence": sequence_string
|
||||
})
|
||||
sequence_response: dict = await response.json()
|
||||
exact_matches: dict[str, Sequence[dict[str, str]]] = sequence_response["exact_matches"]
|
||||
for allele_loci, alleles in exact_matches.items():
|
||||
for allele in alleles:
|
||||
alelle_id = allele["allele_id"]
|
||||
yield Allele(allele_loci=allele_loci, allele_variant=alelle_id)
|
||||
|
||||
async def fetch_mlst_st(self, alleles: Union[AsyncIterable[Allele], Iterable[Allele]]) -> MLSTProfile:
|
||||
uri_path = "schemes/3/designations"
|
||||
allele_request_dict: dict[str, list[dict[str, str]]] = defaultdict(list)
|
||||
if isinstance(alleles, AsyncIterable):
|
||||
async for allele in alleles:
|
||||
allele_request_dict[allele.allele_loci].append({"allele": str(allele.allele_variant)})
|
||||
else:
|
||||
for allele in alleles:
|
||||
allele_request_dict[allele.allele_loci].append({"allele": str(allele.allele_variant)})
|
||||
response = await self._http_client.post(uri_path, json={
|
||||
"designations": allele_request_dict
|
||||
})
|
||||
response_json = await response.json()
|
||||
schema_fields_returned = response_json["fields"]
|
||||
schema_exact_matches = response_json["exact_matches"]
|
||||
allele_map: dict[str, list[Allele]] = defaultdict(list)
|
||||
for exact_match_loci, exact_match_alleles in schema_exact_matches.items():
|
||||
for exact_match_allele in exact_match_alleles:
|
||||
allele_map[exact_match_loci].append(Allele(exact_match_loci, exact_match_allele["allele_id"]))
|
||||
return MLSTProfile(allele_map, schema_fields_returned["ST"], schema_fields_returned["clonal_complex"])
|
||||
|
||||
async def profile_string(self, string: str) -> MLSTProfile:
|
||||
alleles = self.fetch_mlst_allele_variants(string)
|
||||
return await self.fetch_mlst_st(alleles)
|
||||
|
||||
|
||||
async def close(self):
|
||||
await self._http_client.close()
|
||||
|
||||
async def __aexit__(self, exc_type, exc_value, traceback):
|
||||
await self.close()
|
27
src/automlst/engine/remote/databases/ncbi/genbank.py
Normal file
27
src/automlst/engine/remote/databases/ncbi/genbank.py
Normal file
@ -0,0 +1,27 @@
|
||||
import asyncio
|
||||
from Bio import Entrez
|
||||
from Bio import SeqIO
|
||||
|
||||
# TODO Change this out for a more professional approach
|
||||
Entrez.email = "yunyangdeng@outlook.com"
|
||||
|
||||
from automlst.engine.data.genomics import AnnotatedString, StringAnnotation
|
||||
|
||||
|
||||
async def fetch_ncbi_genbank(genbank_id: str) -> AnnotatedString:
|
||||
with (await asyncio.to_thread(Entrez.efetch, db="nucleotide", id=genbank_id, rettype="gb", retmode="text")) as fetch_stream:
|
||||
record = SeqIO.read(fetch_stream, "genbank")
|
||||
sequence_features = list()
|
||||
for feature in record.features:
|
||||
start = int(feature.location.start)
|
||||
end = int(feature.location.end)
|
||||
qualifiers = feature.qualifiers
|
||||
for qualifier_key in qualifiers:
|
||||
qualifiers[qualifier_key] = set(qualifiers[qualifier_key])
|
||||
sequence_features.append(StringAnnotation(
|
||||
type=feature.type,
|
||||
start=start,
|
||||
end=end+1, # Position is exclusive
|
||||
feature_properties=qualifiers
|
||||
))
|
||||
return AnnotatedString(name=genbank_id, sequence=str(record.seq), annotations=sequence_features)
|
8
tests/nsbdiagnosistoolkit/engine/local/test_abif.py
Normal file
8
tests/nsbdiagnosistoolkit/engine/local/test_abif.py
Normal file
@ -0,0 +1,8 @@
|
||||
import os
|
||||
|
||||
from automlst.engine.local.abif import read_abif
|
||||
|
||||
async def test_load_sanger_sequence_has_data():
|
||||
assert os.path.exists("tests/resources/1I1_F_P1815443_047.ab1")
|
||||
result_data = await read_abif("tests/resources/1I1_F_P1815443_047.ab1")
|
||||
assert result_data is not None
|
7
tests/nsbdiagnosistoolkit/engine/local/test_fasta.py
Normal file
7
tests/nsbdiagnosistoolkit/engine/local/test_fasta.py
Normal file
@ -0,0 +1,7 @@
|
||||
from automlst.engine.local.fasta import read_fasta
|
||||
|
||||
|
||||
async def test_fasta_reader_not_none():
|
||||
named_strings = read_fasta("tests/resources/tohama_I_bpertussis.fasta")
|
||||
async for named_string in named_strings:
|
||||
assert named_string.name == "BX470248.1"
|
@ -0,0 +1,35 @@
|
||||
from Bio import SeqIO
|
||||
from automlst.engine.data.MLST import Allele, MLSTProfile
|
||||
from automlst.engine.remote.databases.institutpasteur.profiling import InstitutPasteurProfiler
|
||||
|
||||
|
||||
async def test_profiling_results_in_exact_matches_when_exact():
|
||||
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
||||
async with InstitutPasteurProfiler(database_name="pubmlst_bordetella_seqdef") as dummy_profiler:
|
||||
exact_matches = dummy_profiler.fetch_mlst_allele_variants(sequence_string=sequence)
|
||||
targets_left = {"adk", "fumC", "glyA", "tyrB", "icd", "pepA", "pgm"}
|
||||
async for exact_match in exact_matches:
|
||||
assert isinstance(exact_match, Allele)
|
||||
assert exact_match.allele_variant == '1' # All of Tohama I has allele id I
|
||||
targets_left.remove(exact_match.allele_loci)
|
||||
|
||||
assert len(targets_left) == 0
|
||||
|
||||
async def test_profiling_results_in_correct_st():
|
||||
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
||||
dummy_alleles = [
|
||||
Allele("adk", "1"),
|
||||
Allele("fumC", "1"),
|
||||
Allele("glyA", "1"),
|
||||
Allele("tyrB", "1"),
|
||||
Allele("icd", "1"),
|
||||
Allele("pepA", "1"),
|
||||
Allele("pgm", "1"),
|
||||
]
|
||||
async with InstitutPasteurProfiler(database_name="pubmlst_bordetella_seqdef") as dummy_profiler:
|
||||
exact_matches = dummy_profiler.fetch_mlst_allele_variants(sequence_string=sequence)
|
||||
mlst_st_data = await dummy_profiler.fetch_mlst_st(dummy_alleles)
|
||||
assert mlst_st_data is not None
|
||||
assert isinstance(mlst_st_data, MLSTProfile)
|
||||
assert mlst_st_data.clonal_complex == "ST-2 complex"
|
||||
assert mlst_st_data.sequence_type == "1"
|
@ -0,0 +1,5 @@
|
||||
from automlst.engine.remote.databases.ncbi.genbank import fetch_ncbi_genbank
|
||||
|
||||
|
||||
async def test_fetch_ncbi_genbank_with_id_works():
|
||||
assert len((await fetch_ncbi_genbank("CP011448.1")).sequence) > 0
|
12
tests/nsbdiagnosistoolkit/engine/test_annotate.py
Normal file
12
tests/nsbdiagnosistoolkit/engine/test_annotate.py
Normal file
@ -0,0 +1,12 @@
|
||||
from automlst.engine.annotate import annotate_from_genbank, fetch_ncbi_genbank
|
||||
from Bio import SeqIO
|
||||
|
||||
from automlst.engine.data.genomics import AnnotatedString
|
||||
|
||||
async def test_annotate_from_genbank_for_adk_annotation():
|
||||
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
||||
annotated_sequence = await annotate_from_genbank("CP011448.1", "bpertussis_tohamaI", sequence, max_annotation_length=750, gene_targets=set(["adk"]))
|
||||
assert isinstance(annotated_sequence, AnnotatedString)
|
||||
assert len(annotated_sequence.annotations) >= 1
|
||||
assert annotated_sequence.annotations[0].type == "gene"
|
||||
assert "adk" in annotated_sequence.annotations[0].feature_properties["gene"]
|
BIN
tests/resources/1I1_F_P1815443_047.ab1
Normal file
BIN
tests/resources/1I1_F_P1815443_047.ab1
Normal file
Binary file not shown.
BIN
tests/resources/1I1_R_P1815443_094.ab1
Normal file
BIN
tests/resources/1I1_R_P1815443_094.ab1
Normal file
Binary file not shown.
BIN
tests/resources/1I2_F_P1815443_048.ab1
Normal file
BIN
tests/resources/1I2_F_P1815443_048.ab1
Normal file
Binary file not shown.
BIN
tests/resources/1I2_R_P1815443_011.ab1
Normal file
BIN
tests/resources/1I2_R_P1815443_011.ab1
Normal file
Binary file not shown.
58377
tests/resources/tohama_I_bpertussis.fasta
Normal file
58377
tests/resources/tohama_I_bpertussis.fasta
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user