Compare commits
No commits in common. "develop" and "0.8.0" have entirely different histories.
@ -1,11 +0,0 @@
|
||||
FROM mcr.microsoft.com/devcontainers/anaconda:1-3
|
||||
|
||||
# Copy environment.yml (if found) to a temp location so we update the environment. Also
|
||||
# copy "noop.txt" so the COPY instruction does not fail if no environment.yml exists.
|
||||
COPY environment.yml* .devcontainer/noop.txt /tmp/conda-tmp/
|
||||
RUN if [ -f "/tmp/conda-tmp/environment.yml" ]; then umask 0002 && /opt/conda/bin/conda env update -n base -f /tmp/conda-tmp/environment.yml; fi \
|
||||
&& rm -rf /tmp/conda-tmp
|
||||
|
||||
# [Optional] Uncomment this section to install additional OS packages.
|
||||
# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
|
||||
# && apt-get -y install --no-install-recommends <your-package-list-here>
|
@ -1,11 +1,9 @@
|
||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
||||
// README at: https://github.com/devcontainers/templates/tree/main/src/anaconda
|
||||
// README at: https://github.com/devcontainers/templates/tree/main/src/python
|
||||
{
|
||||
"name": "Anaconda (Python 3)",
|
||||
"build": {
|
||||
"context": "..",
|
||||
"dockerfile": "Dockerfile"
|
||||
}
|
||||
"name": "Python 3",
|
||||
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
|
||||
"image": "mcr.microsoft.com/devcontainers/python:1-3.12-bullseye",
|
||||
|
||||
// Features to add to the dev container. More info: https://containers.dev/features.
|
||||
// "features": {},
|
||||
@ -14,7 +12,14 @@
|
||||
// "forwardPorts": [],
|
||||
|
||||
// Use 'postCreateCommand' to run commands after the container is created.
|
||||
// "postCreateCommand": "python --version",
|
||||
"postCreateCommand": "pip3 install --user -r requirements.txt",
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"mechatroner.rainbow-csv"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
// Configure tool-specific properties.
|
||||
// "customizations": {},
|
||||
|
@ -1,3 +0,0 @@
|
||||
This file copied into the container along with environment.yml* from the parent
|
||||
folder. This file is included to prevents the Dockerfile COPY instruction from
|
||||
failing if no environment.yml is found.
|
159
.gitignore
vendored
159
.gitignore
vendored
@ -1,6 +1,6 @@
|
||||
# File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig
|
||||
# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,linux,python
|
||||
# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,linux,python
|
||||
# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,svelte,python,linux,node
|
||||
# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,svelte,python,linux,node
|
||||
|
||||
### Linux ###
|
||||
*~
|
||||
@ -17,6 +17,146 @@
|
||||
# .nfs files are created when an open file is removed but is still being accessed
|
||||
.nfs*
|
||||
|
||||
### Node ###
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
lerna-debug.log*
|
||||
.pnpm-debug.log*
|
||||
|
||||
# Diagnostic reports (https://nodejs.org/api/report.html)
|
||||
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
|
||||
|
||||
# Runtime data
|
||||
pids
|
||||
*.pid
|
||||
*.seed
|
||||
*.pid.lock
|
||||
|
||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
||||
lib-cov
|
||||
|
||||
# Coverage directory used by tools like istanbul
|
||||
coverage
|
||||
*.lcov
|
||||
|
||||
# nyc test coverage
|
||||
.nyc_output
|
||||
|
||||
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
|
||||
.grunt
|
||||
|
||||
# Bower dependency directory (https://bower.io/)
|
||||
bower_components
|
||||
|
||||
# node-waf configuration
|
||||
.lock-wscript
|
||||
|
||||
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
||||
build/Release
|
||||
|
||||
# Dependency directories
|
||||
node_modules/
|
||||
jspm_packages/
|
||||
|
||||
# Snowpack dependency directory (https://snowpack.dev/)
|
||||
web_modules/
|
||||
|
||||
# TypeScript cache
|
||||
*.tsbuildinfo
|
||||
|
||||
# Optional npm cache directory
|
||||
.npm
|
||||
|
||||
# Optional eslint cache
|
||||
.eslintcache
|
||||
|
||||
# Optional stylelint cache
|
||||
.stylelintcache
|
||||
|
||||
# Microbundle cache
|
||||
.rpt2_cache/
|
||||
.rts2_cache_cjs/
|
||||
.rts2_cache_es/
|
||||
.rts2_cache_umd/
|
||||
|
||||
# Optional REPL history
|
||||
.node_repl_history
|
||||
|
||||
# Output of 'npm pack'
|
||||
*.tgz
|
||||
|
||||
# Yarn Integrity file
|
||||
.yarn-integrity
|
||||
|
||||
# dotenv environment variable files
|
||||
.env
|
||||
.env.development.local
|
||||
.env.test.local
|
||||
.env.production.local
|
||||
.env.local
|
||||
|
||||
# parcel-bundler cache (https://parceljs.org/)
|
||||
.cache
|
||||
.parcel-cache
|
||||
|
||||
# Next.js build output
|
||||
.next
|
||||
out
|
||||
|
||||
# Nuxt.js build / generate output
|
||||
.nuxt
|
||||
dist
|
||||
|
||||
# Gatsby files
|
||||
.cache/
|
||||
# Comment in the public line in if your project uses Gatsby and not Next.js
|
||||
# https://nextjs.org/blog/next-9-1#public-directory-support
|
||||
# public
|
||||
|
||||
# vuepress build output
|
||||
.vuepress/dist
|
||||
|
||||
# vuepress v2.x temp and cache directory
|
||||
.temp
|
||||
|
||||
# Docusaurus cache and generated files
|
||||
.docusaurus
|
||||
|
||||
# Serverless directories
|
||||
.serverless/
|
||||
|
||||
# FuseBox cache
|
||||
.fusebox/
|
||||
|
||||
# DynamoDB Local files
|
||||
.dynamodb/
|
||||
|
||||
# TernJS port file
|
||||
.tern-port
|
||||
|
||||
# Stores VSCode versions used for testing VSCode extensions
|
||||
.vscode-test
|
||||
|
||||
# yarn v2
|
||||
.yarn/cache
|
||||
.yarn/unplugged
|
||||
.yarn/build-state.yml
|
||||
.yarn/install-state.gz
|
||||
.pnp.*
|
||||
|
||||
### Node Patch ###
|
||||
# Serverless Webpack directories
|
||||
.webpack/
|
||||
|
||||
# Optional stylelint cache
|
||||
|
||||
# SvelteKit build / generate output
|
||||
.svelte-kit
|
||||
|
||||
### Python ###
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
@ -62,7 +202,6 @@ htmlcov/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
@ -76,7 +215,6 @@ cover/
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
@ -140,7 +278,6 @@ celerybeat.pid
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
@ -189,6 +326,13 @@ poetry.toml
|
||||
# LSP config files
|
||||
pyrightconfig.json
|
||||
|
||||
### Svelte ###
|
||||
# gitignore template for the SvelteKit, frontend web component framework
|
||||
# website: https://kit.svelte.dev/
|
||||
|
||||
.svelte-kit/
|
||||
package
|
||||
|
||||
### VisualStudioCode ###
|
||||
.vscode/*
|
||||
!.vscode/settings.json
|
||||
@ -208,8 +352,9 @@ pyrightconfig.json
|
||||
.history
|
||||
.ionide
|
||||
|
||||
# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,linux,python
|
||||
# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,svelte,python,linux,node
|
||||
|
||||
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
|
||||
|
||||
conda-bld
|
||||
output
|
||||
*.private.*
|
6
.vscode/extensions.json
vendored
6
.vscode/extensions.json
vendored
@ -1,6 +0,0 @@
|
||||
{
|
||||
"recommendations": [
|
||||
"piotrpalarz.vscode-gitignore-generator",
|
||||
"gruntfuggly.todo-tree"
|
||||
]
|
||||
}
|
25
.vscode/launch.json
vendored
Normal file
25
.vscode/launch.json
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
|
||||
{
|
||||
"name": "autobigs info -lschema pubmlst_bordetella_seqdef",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/src/autobigs/cli/program.py",
|
||||
"console": "integratedTerminal",
|
||||
"args": [
|
||||
"info",
|
||||
"-lschemas",
|
||||
"pubmlst_bordetella_seqdef"
|
||||
],
|
||||
"cwd": "${workspaceFolder}/src",
|
||||
"env": {
|
||||
"PYTHONPATH": "${workspaceFolder}/src"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
22
Jenkinsfile
vendored
22
Jenkinsfile
vendored
@ -2,14 +2,14 @@ pipeline {
|
||||
agent {
|
||||
kubernetes {
|
||||
cloud 'rsys-devel'
|
||||
defaultContainer 'miniforge3'
|
||||
inheritFrom 'miniforge'
|
||||
defaultContainer 'pip'
|
||||
inheritFrom 'pip'
|
||||
}
|
||||
}
|
||||
stages {
|
||||
stage("install") {
|
||||
steps {
|
||||
sh 'conda env update -n base -f environment.yml'
|
||||
sh 'python -m pip install -r requirements.txt'
|
||||
}
|
||||
}
|
||||
stage("unit tests") {
|
||||
@ -22,36 +22,26 @@ pipeline {
|
||||
stage("build") {
|
||||
steps {
|
||||
sh "python -m build"
|
||||
sh "grayskull pypi dist/*.tar.gz --maintainers 'Harrison Deng'"
|
||||
sh "python scripts/patch_recipe.py"
|
||||
sh 'conda build autobigs-engine -c bioconda --output-folder conda-bld --verify'
|
||||
}
|
||||
}
|
||||
stage("archive") {
|
||||
steps {
|
||||
archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl, conda-bld/**/*.conda', fingerprint: true, followSymlinks: false, onlyIfSuccessful: true
|
||||
archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl', fingerprint: true, followSymlinks: false, onlyIfSuccessful: true
|
||||
}
|
||||
}
|
||||
stage("publish") {
|
||||
parallel {
|
||||
stage ("git.reslate.systems") {
|
||||
when {
|
||||
not {
|
||||
tag '*.*.*'
|
||||
}
|
||||
}
|
||||
|
||||
environment {
|
||||
CREDS = credentials('username-password-rs-git')
|
||||
}
|
||||
steps {
|
||||
sh 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*'
|
||||
sh 'curl --user ${CREDS_USR}:${CREDS_PSW} --upload-file conda-bld/**/*.conda https://git.reslate.systems/api/packages/${CREDS_USR}/conda/$(basename conda-bld/**/*.conda)'
|
||||
sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*'
|
||||
}
|
||||
}
|
||||
stage ("pypi.org") {
|
||||
when {
|
||||
tag '*.*.*'
|
||||
tag '*.*'
|
||||
}
|
||||
environment {
|
||||
TOKEN = credentials('pypi.org')
|
||||
|
21
README.md
21
README.md
@ -1,14 +1,13 @@
|
||||
# autoBIGS.engine
|
||||
|
||||
A python library implementing common BIGSdb MLST schemes and databases accesses for the purpose of typing sequences automatically. Implementation follows the RESTful API outlined by the official [BIGSdb documentation](https://bigsdb.readthedocs.io/en/latest/rest.html) up to `V1.50.0`.
|
||||
# autoBIGS.Engine
|
||||
|
||||
A python library implementing common BIGSdb MLST schemes and databases. Implementation follows the RESTful API outlined by the official [BIGSdb documentation](https://bigsdb.readthedocs.io/en/latest/rest.html) up to `V1.50.0`.
|
||||
|
||||
## Features
|
||||
|
||||
Briefly, this library can:
|
||||
- Import multiple `FASTA` files
|
||||
- Fetch the available BIGSdb databases that is currently live and available
|
||||
- Fetch the available BIGSdb database schemes for a given MLST database
|
||||
- Fetch the available BIGSdb database schemas for a given MLST database
|
||||
- Retrieve exact/non-exact MLST allele variant IDs based off a sequence
|
||||
- Retrieve MLST sequence type IDs based off a sequence
|
||||
- Output all results to a single CSV
|
||||
@ -23,16 +22,4 @@ Then, it's as easy as running `pip install autobigs-engine` in any terminal that
|
||||
|
||||
### CLI usage
|
||||
|
||||
This is a independent python library and thus does not have any form of direct user interface. One way of using it could be to create your own Python script that makes calls to this libraries functions. Alternatively, you may use `autobigs-cli`, a `Python` package that implements a CLI for calling this library.
|
||||
|
||||
## Versioning
|
||||
|
||||
the autoBIGS project follows [semantic versioning](https://semver.org/) where the three numbers may be interpreted as MAJOR.MINOR.PATCH.
|
||||
|
||||
Note regarding major version 0 ([spec item 4](https://semver.org/#spec-item-4)), the following adaptation of semantic versioning definition is as follows:
|
||||
|
||||
1. Given x.Y.z, Y is only incremented when a backwards incompatible change is made.
|
||||
|
||||
2. Given x.y.Z, Z is only incremented when a backwards compatible change is made.
|
||||
|
||||
Versions of autoBIGS items with a major version number of 0 will introduce numerous changes and patches. As such, changes between such versions should be considered highly variable.
|
||||
This is a independent python library and thus does not have any form of direct user interface. One way of using it could be to create your own Python script that makes calls to this libraries functions. Alternatively, you may use `autobigs-cli`, a `Python` package that implements a CLI for calling this library.
|
@ -1,16 +0,0 @@
|
||||
name: ci
|
||||
channels:
|
||||
- bioconda
|
||||
- conda-forge
|
||||
dependencies:
|
||||
- aiohttp==3.11.*
|
||||
- biopython==1.85
|
||||
- pytest
|
||||
- pytest-asyncio
|
||||
- python-build
|
||||
- conda-build
|
||||
- twine==6.0.1
|
||||
- setuptools_scm
|
||||
- pytest-cov
|
||||
- grayskull
|
||||
- curl
|
@ -13,12 +13,10 @@ dependencies = [
|
||||
]
|
||||
requires-python = ">=3.12"
|
||||
description = "A library to rapidly fetch fetch MLST profiles given sequences for various diseases."
|
||||
license = {text = "GPL-3.0-or-later"}
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/Syph-and-VPD-Lab/autoBIGS.engine"
|
||||
Source = "https://github.com/Syph-and-VPD-Lab/autoBIGS.engine"
|
||||
Issues = "https://github.com/Syph-and-VPD-Lab/autoBIGS.engine/issues"
|
||||
Repository = "https://github.com/RealYHD/autoBIGS.engine"
|
||||
Issues = "https://github.com/RealYHD/autoBIGS.engine/issues"
|
||||
|
||||
[tool.setuptools_scm]
|
||||
|
||||
|
8
requirements.txt
Normal file
8
requirements.txt
Normal file
@ -0,0 +1,8 @@
|
||||
aiohttp[speedups]==3.11.*
|
||||
biopython==1.85
|
||||
pytest
|
||||
pytest-asyncio
|
||||
build
|
||||
twine
|
||||
setuptools_scm
|
||||
pytest-cov
|
@ -1,103 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
from os import fdopen, path
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
from sys import argv
|
||||
import tempfile
|
||||
|
||||
INDENTATION = " "
|
||||
GRAYSKULL_OUTPUT_PATH = "autoBIGS.engine"
|
||||
RUN_EXPORTED_VALUE = r'{{ pin_subpackage( name|lower|replace(".", "-"), max_pin="x.x") }}'
|
||||
LICENSE_SUFFIX = "-or-later"
|
||||
HOME_PAGE = "https://github.com/Syph-and-VPD-Lab/autoBIGS.engine"
|
||||
|
||||
def _calc_indentation(line: str):
|
||||
return len(re.findall(INDENTATION, line.split(line.strip())[0])) if line != "\n" else 0
|
||||
|
||||
def read_grayskull_output():
|
||||
original_recipe = path.abspath(GRAYSKULL_OUTPUT_PATH)
|
||||
original_meta = path.join(original_recipe, "meta.yaml")
|
||||
meta_file = open(original_meta)
|
||||
lines = meta_file.readlines()
|
||||
meta_file.close()
|
||||
return lines
|
||||
|
||||
def update_naming_scheme(lines):
|
||||
modified_lines = []
|
||||
for line in lines:
|
||||
matches = re.finditer(r"\{\{\s*name\|lower()\s+\}\}", line)
|
||||
modified_line = line
|
||||
for match in matches:
|
||||
modified_line = modified_line[:match.start(1)] + r'|replace(".", "-")' + modified_line[match.end(1):]
|
||||
modified_lines.append(modified_line)
|
||||
return modified_lines
|
||||
|
||||
def inject_run_exports(lines: list[str]):
|
||||
package_indent = False
|
||||
modified_lines = []
|
||||
for line in lines:
|
||||
indentation_count = _calc_indentation(line)
|
||||
if line == "build:\n" and indentation_count == 0:
|
||||
package_indent = True
|
||||
modified_lines.append(line)
|
||||
elif package_indent and indentation_count == 0:
|
||||
modified_lines.append(INDENTATION*1 + "run_exports:\n")
|
||||
modified_lines.append(INDENTATION*2 + "- " + RUN_EXPORTED_VALUE + "\n")
|
||||
package_indent = False
|
||||
else:
|
||||
modified_lines.append(line)
|
||||
return modified_lines
|
||||
|
||||
def suffix_license(lines: list[str]):
|
||||
about_indent = False
|
||||
modified_lines = []
|
||||
for line in lines:
|
||||
indentation_count = _calc_indentation(line)
|
||||
if line == "about:\n" and indentation_count == 0:
|
||||
about_indent = True
|
||||
modified_lines.append(line)
|
||||
elif about_indent and indentation_count == 1 and line.lstrip().startswith("license:"):
|
||||
modified_lines.append(line.rstrip() + LICENSE_SUFFIX + "\n")
|
||||
about_indent = False
|
||||
else:
|
||||
modified_lines.append(line)
|
||||
return modified_lines
|
||||
|
||||
def inject_home_page(lines: list[str]):
|
||||
about_indent = False
|
||||
modified_lines = []
|
||||
for line in lines:
|
||||
indentation_count = _calc_indentation(line)
|
||||
if line == "about:\n" and indentation_count == 0:
|
||||
about_indent = True
|
||||
modified_lines.append(line)
|
||||
elif about_indent and indentation_count == 0:
|
||||
modified_lines.append(INDENTATION + "home: " + HOME_PAGE + "\n")
|
||||
about_indent = False
|
||||
else:
|
||||
modified_lines.append(line)
|
||||
return modified_lines
|
||||
|
||||
def write_to_original(lines: list[str]):
|
||||
original_recipe = path.abspath(GRAYSKULL_OUTPUT_PATH)
|
||||
original_meta = path.join(original_recipe, "meta.yaml")
|
||||
with open(original_meta, "w") as file:
|
||||
file.writelines(lines)
|
||||
|
||||
def rename_recipe_dir():
|
||||
new_recipe_name = path.abspath(path.join(GRAYSKULL_OUTPUT_PATH.replace(".", "-").lower()))
|
||||
shutil.rmtree(new_recipe_name, ignore_errors=True)
|
||||
os.replace(path.abspath(GRAYSKULL_OUTPUT_PATH), new_recipe_name)
|
||||
|
||||
if __name__ == "__main__":
|
||||
original_grayskull_out = read_grayskull_output()
|
||||
modified_recipe_meta = None
|
||||
modified_recipe_meta = update_naming_scheme(original_grayskull_out)
|
||||
modified_recipe_meta = inject_run_exports(modified_recipe_meta)
|
||||
modified_recipe_meta = suffix_license(modified_recipe_meta)
|
||||
modified_recipe_meta = inject_home_page(modified_recipe_meta)
|
||||
write_to_original(modified_recipe_meta)
|
||||
rename_recipe_dir()
|
@ -1,277 +0,0 @@
|
||||
from abc import abstractmethod
|
||||
import asyncio
|
||||
from collections import defaultdict
|
||||
from contextlib import AbstractAsyncContextManager
|
||||
import csv
|
||||
from os import path
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
from typing import Any, AsyncGenerator, AsyncIterable, Coroutine, Iterable, Mapping, Sequence, Set, Union
|
||||
|
||||
from aiohttp import ClientOSError, ClientSession, ClientTimeout, ServerDisconnectedError
|
||||
|
||||
from autobigs.engine.reading import read_fasta
|
||||
from autobigs.engine.structures.alignment import PairwiseAlignment
|
||||
from autobigs.engine.structures.genomics import NamedString
|
||||
from autobigs.engine.structures.mlst import Allele, NamedMLSTProfile, AlignmentStats, MLSTProfile
|
||||
from autobigs.engine.exceptions.database import BIGSdbResponseNotOkay, NoBIGSdbExactMatchesException, NoBIGSdbMatchesException, NoSuchBIGSdbDatabaseException
|
||||
|
||||
from Bio.Align import PairwiseAligner
|
||||
|
||||
class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
|
||||
|
||||
@abstractmethod
|
||||
def determine_mlst_allele_variants(self, query_sequence_strings: Union[Iterable[Union[NamedString, str]], Union[NamedString, str]]) -> AsyncGenerator[Union[Allele, tuple[str, Allele]], Any]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def determine_mlst_st(self, alleles: Union[AsyncIterable[Union[Allele, tuple[str, Allele]]], Iterable[Union[Allele, tuple[str, Allele]]]]) -> Union[MLSTProfile, NamedMLSTProfile]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def profile_string(self, query_sequence_strings: Iterable[Union[NamedString, str]]) -> Union[NamedMLSTProfile, MLSTProfile]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def profile_multiple_strings(self, query_named_string_groups: AsyncIterable[Iterable[NamedString]], stop_on_fail: bool = False) -> AsyncGenerator[NamedMLSTProfile, Any]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def close(self):
|
||||
pass
|
||||
|
||||
class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
|
||||
|
||||
def __init__(self, database_api: str, database_name: str, scheme_id: int, retry_requests: int = 5):
|
||||
self._retry_limit = retry_requests
|
||||
self._database_name = database_name
|
||||
self._scheme_id = scheme_id
|
||||
self._base_url = f"{database_api}/db/{self._database_name}/schemes/{self._scheme_id}/"
|
||||
self._http_client = ClientSession(self._base_url, timeout=ClientTimeout(300))
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
async def determine_mlst_allele_variants(self, query_sequence_strings: Union[Iterable[Union[NamedString, str]], Union[NamedString, str]]) -> AsyncGenerator[Union[Allele, tuple[str, Allele]], Any]:
|
||||
# See https://bigsdb.pasteur.fr/api/db/pubmlst_bordetella_seqdef/schemes
|
||||
uri_path = "sequence"
|
||||
if isinstance(query_sequence_strings, str) or isinstance(query_sequence_strings, NamedString):
|
||||
query_sequence_strings = [query_sequence_strings]
|
||||
|
||||
for sequence_string in query_sequence_strings:
|
||||
attempts = 0
|
||||
success = False
|
||||
last_error = None
|
||||
while not success and attempts < self._retry_limit:
|
||||
attempts += 1
|
||||
request = self._http_client.post(uri_path, json={
|
||||
"sequence": sequence_string if isinstance(sequence_string, str) else sequence_string.sequence,
|
||||
"partial_matches": True
|
||||
})
|
||||
try:
|
||||
async with request as response:
|
||||
sequence_response: dict = await response.json()
|
||||
|
||||
if "exact_matches" in sequence_response:
|
||||
# loci -> list of alleles with id and loci
|
||||
exact_matches: dict[str, Sequence[dict[str, str]]] = sequence_response["exact_matches"]
|
||||
for allele_loci, alleles in exact_matches.items():
|
||||
for allele in alleles:
|
||||
alelle_id = allele["allele_id"]
|
||||
result_allele = Allele(allele_locus=allele_loci, allele_variant=alelle_id, partial_match_profile=None)
|
||||
yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
|
||||
elif "partial_matches" in sequence_response:
|
||||
partial_matches: dict[str, dict[str, Union[str, float, int]]] = sequence_response["partial_matches"]
|
||||
for allele_loci, partial_match in partial_matches.items():
|
||||
if len(partial_match) <= 0:
|
||||
continue
|
||||
partial_match_profile = AlignmentStats(
|
||||
percent_identity=float(partial_match["identity"]),
|
||||
mismatches=int(partial_match["mismatches"]),
|
||||
gaps=int(partial_match["gaps"]),
|
||||
match_metric=int(partial_match["bitscore"])
|
||||
)
|
||||
result_allele = Allele(
|
||||
allele_locus=allele_loci,
|
||||
allele_variant=str(partial_match["allele"]),
|
||||
partial_match_profile=partial_match_profile
|
||||
)
|
||||
yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
|
||||
else:
|
||||
if response.status == 200:
|
||||
raise NoBIGSdbMatchesException(self._database_name, self._scheme_id, sequence_string.name if isinstance(sequence_string, NamedString) else None)
|
||||
else:
|
||||
raise BIGSdbResponseNotOkay(sequence_response)
|
||||
except (ConnectionError, ServerDisconnectedError, ClientOSError) as e: # Errors we will retry
|
||||
last_error = e
|
||||
success = False
|
||||
await asyncio.sleep(5) # In case the connection issue is due to rate issues
|
||||
else:
|
||||
success = True
|
||||
if not success and last_error is not None:
|
||||
try:
|
||||
raise last_error
|
||||
except (ConnectionError, ServerDisconnectedError, ClientOSError) as e: # Non-fatal errors
|
||||
yield Allele("error", "error", None)
|
||||
|
||||
async def determine_mlst_st(self, alleles: Union[AsyncIterable[Union[Allele, tuple[str, Allele]]], Iterable[Union[Allele, tuple[str, Allele]]]]) -> Union[MLSTProfile, NamedMLSTProfile]:
|
||||
uri_path = "designations"
|
||||
allele_request_dict: dict[str, list[dict[str, str]]] = defaultdict(list)
|
||||
names_list = []
|
||||
def insert_allele_to_request_dict(allele: Union[Allele, tuple[str, Allele]]):
|
||||
if isinstance(allele, Allele):
|
||||
allele_val = allele
|
||||
else:
|
||||
allele_val = allele[1]
|
||||
names_list.append(allele[0])
|
||||
allele_request_dict[allele_val.allele_locus].append({"allele": str(allele_val.allele_variant)})
|
||||
|
||||
if isinstance(alleles, AsyncIterable):
|
||||
async for allele in alleles:
|
||||
insert_allele_to_request_dict(allele)
|
||||
else:
|
||||
for allele in alleles:
|
||||
insert_allele_to_request_dict(allele)
|
||||
request_json = {
|
||||
"designations": allele_request_dict
|
||||
}
|
||||
|
||||
attempts = 0
|
||||
success = False
|
||||
last_error = None
|
||||
while attempts < self._retry_limit and not success:
|
||||
attempts += 1
|
||||
try:
|
||||
async with self._http_client.post(uri_path, json=request_json) as response:
|
||||
response_json: dict = await response.json()
|
||||
allele_set: Set[Allele] = set()
|
||||
response_json.setdefault("fields", dict())
|
||||
scheme_fields_returned: dict[str, str] = response_json["fields"]
|
||||
scheme_fields_returned.setdefault("ST", "unknown")
|
||||
scheme_fields_returned.setdefault("clonal_complex", "unknown")
|
||||
scheme_exact_matches: dict = response_json["exact_matches"]
|
||||
for exact_match_locus, exact_match_alleles in scheme_exact_matches.items():
|
||||
allele_set.add(Allele(exact_match_locus, exact_match_alleles[0]["allele_id"], None))
|
||||
if len(allele_set) == 0:
|
||||
raise ValueError("Passed in no alleles.")
|
||||
result_mlst_profile = MLSTProfile(allele_set, scheme_fields_returned["ST"], scheme_fields_returned["clonal_complex"])
|
||||
if len(names_list) > 0:
|
||||
result_mlst_profile = NamedMLSTProfile(str(tuple(names_list)) if len(set(names_list)) > 1 else names_list[0], result_mlst_profile)
|
||||
return result_mlst_profile
|
||||
except (ConnectionError, ServerDisconnectedError, ClientOSError) as e:
|
||||
last_error = e
|
||||
success = False
|
||||
await asyncio.sleep(5)
|
||||
else:
|
||||
success = True
|
||||
try:
|
||||
if last_error is not None:
|
||||
raise last_error
|
||||
except (ConnectionError, ServerDisconnectedError, ClientOSError) as e:
|
||||
result_mlst_profile = NamedMLSTProfile((str(tuple(names_list)) if len(set(names_list)) > 1 else names_list[0]) + ":Error", None)
|
||||
raise ValueError("Last error was not recorded.")
|
||||
|
||||
|
||||
async def profile_string(self, query_sequence_strings: Iterable[Union[NamedString, str]]) -> Union[NamedMLSTProfile, MLSTProfile]:
|
||||
alleles = self.determine_mlst_allele_variants(query_sequence_strings)
|
||||
return await self.determine_mlst_st(alleles)
|
||||
|
||||
async def profile_multiple_strings(self, query_named_string_groups: AsyncIterable[Iterable[NamedString]], stop_on_fail: bool = False) -> AsyncGenerator[NamedMLSTProfile, Any]:
|
||||
tasks: list[Coroutine[Any, Any, Union[NamedMLSTProfile, MLSTProfile]]] = []
|
||||
async for named_strings in query_named_string_groups:
|
||||
tasks.append(self.profile_string(named_strings))
|
||||
for task in asyncio.as_completed(tasks):
|
||||
named_mlst_profile = await task
|
||||
try:
|
||||
if isinstance(named_mlst_profile, NamedMLSTProfile):
|
||||
yield named_mlst_profile
|
||||
else:
|
||||
raise TypeError("MLST profile is not named.")
|
||||
except NoBIGSdbMatchesException as e:
|
||||
if stop_on_fail:
|
||||
raise e
|
||||
causal_name = e.get_causal_query_name()
|
||||
if causal_name is None:
|
||||
raise ValueError("Missing query name despite requiring names.")
|
||||
else:
|
||||
yield NamedMLSTProfile(causal_name, None)
|
||||
|
||||
async def close(self):
|
||||
await self._http_client.close()
|
||||
|
||||
async def __aexit__(self, exc_type, exc_value, traceback):
|
||||
await self.close()
|
||||
|
||||
class BIGSdbIndex(AbstractAsyncContextManager):
|
||||
KNOWN_BIGSDB_APIS = {
|
||||
"https://bigsdb.pasteur.fr/api",
|
||||
"https://rest.pubmlst.org"
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self._http_client = ClientSession()
|
||||
self._known_seqdef_dbs_origin: Union[Mapping[str, str], None] = None
|
||||
self._seqdefdb_schemes: dict[str, Union[Mapping[str, int], None]] = dict()
|
||||
super().__init__()
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
async def get_known_seqdef_dbs(self, force: bool = False) -> Mapping[str, str]:
|
||||
if self._known_seqdef_dbs_origin is not None and not force:
|
||||
return self._known_seqdef_dbs_origin
|
||||
known_seqdef_dbs = dict()
|
||||
for known_bigsdb in BIGSdbIndex.KNOWN_BIGSDB_APIS:
|
||||
async with self._http_client.get(f"{known_bigsdb}/db") as response:
|
||||
response_json_databases = await response.json()
|
||||
for database_group in response_json_databases:
|
||||
for database_info in database_group["databases"]:
|
||||
if str(database_info["name"]).endswith("seqdef"):
|
||||
known_seqdef_dbs[database_info["name"]] = known_bigsdb
|
||||
self._known_seqdef_dbs_origin = dict(known_seqdef_dbs)
|
||||
return self._known_seqdef_dbs_origin
|
||||
|
||||
async def get_bigsdb_api_from_seqdefdb(self, seqdef_db_name: str) -> str:
|
||||
known_databases = await self.get_known_seqdef_dbs()
|
||||
if seqdef_db_name not in known_databases:
|
||||
raise NoSuchBIGSdbDatabaseException(seqdef_db_name)
|
||||
return known_databases[seqdef_db_name]
|
||||
|
||||
async def get_schemes_for_seqdefdb(self, seqdef_db_name: str, force: bool = False) -> Mapping[str, int]:
|
||||
if seqdef_db_name in self._seqdefdb_schemes and not force:
|
||||
return self._seqdefdb_schemes[seqdef_db_name] # type: ignore since it's guaranteed to not be none by conditional
|
||||
uri_path = f"{await self.get_bigsdb_api_from_seqdefdb(seqdef_db_name)}/db/{seqdef_db_name}/schemes"
|
||||
async with self._http_client.get(uri_path) as response:
|
||||
response_json = await response.json()
|
||||
scheme_descriptions: Mapping[str, int] = dict()
|
||||
for scheme_definition in response_json["schemes"]:
|
||||
scheme_id: int = int(str(scheme_definition["scheme"]).split("/")[-1])
|
||||
scheme_desc: str = scheme_definition["description"]
|
||||
scheme_descriptions[scheme_desc] = scheme_id
|
||||
self._seqdefdb_schemes[seqdef_db_name] = scheme_descriptions
|
||||
return self._seqdefdb_schemes[seqdef_db_name] # type: ignore
|
||||
|
||||
async def build_profiler_from_seqdefdb(self, local: bool, dbseqdef_name: str, scheme_id: int) -> BIGSdbMLSTProfiler:
|
||||
return get_BIGSdb_MLST_profiler(local, await self.get_bigsdb_api_from_seqdefdb(dbseqdef_name), dbseqdef_name, scheme_id)
|
||||
|
||||
async def get_scheme_loci(self, dbseqdef_name: str, scheme_id: int) -> list[str]:
|
||||
uri_path = f"{await self.get_bigsdb_api_from_seqdefdb(dbseqdef_name)}/db/{dbseqdef_name}/schemes/{scheme_id}"
|
||||
async with self._http_client.get(uri_path) as response:
|
||||
response_json = await response.json()
|
||||
loci = response_json["loci"]
|
||||
results = []
|
||||
for locus in loci:
|
||||
results.append(path.basename(locus))
|
||||
return results
|
||||
|
||||
async def close(self):
|
||||
await self._http_client.close()
|
||||
|
||||
async def __aexit__(self, exc_type, exc_value, traceback):
|
||||
await self.close()
|
||||
|
||||
def get_BIGSdb_MLST_profiler(local: bool, database_api: str, database_name: str, scheme_id: int):
|
||||
if local:
|
||||
raise NotImplementedError()
|
||||
return RemoteBIGSdbMLSTProfiler(database_api=database_api, database_name=database_name, scheme_id=scheme_id)
|
41
src/autobigs/engine/data/local/csv.py
Normal file
41
src/autobigs/engine/data/local/csv.py
Normal file
@ -0,0 +1,41 @@
|
||||
import csv
|
||||
from os import PathLike
|
||||
from typing import AsyncIterable, Mapping, Sequence, Union
|
||||
|
||||
from autobigs.engine.data.structures.mlst import Allele, MLSTProfile
|
||||
|
||||
|
||||
def dict_loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Allele]]):
|
||||
result_dict: dict[str, Union[list[str], str]] = {}
|
||||
for loci, alleles in alleles_map.items():
|
||||
if len(alleles) == 1:
|
||||
result_dict[loci] = alleles[0].allele_variant
|
||||
else:
|
||||
result_locis = list()
|
||||
for allele in alleles:
|
||||
result_locis.append(allele.allele_variant)
|
||||
result_dict[loci] = result_locis
|
||||
return result_dict
|
||||
|
||||
|
||||
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[tuple[str, Union[MLSTProfile, None]]], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]) -> Sequence[str]:
|
||||
failed = list()
|
||||
with open(handle, "w", newline='') as filehandle:
|
||||
header = None
|
||||
writer: Union[csv.DictWriter, None] = None
|
||||
async for name, mlst_profile in mlst_profiles_iterable:
|
||||
if mlst_profile is None:
|
||||
failed.append(name)
|
||||
continue
|
||||
if writer is None:
|
||||
header = ["id", "st", "clonal-complex", *mlst_profile.alleles.keys()]
|
||||
writer = csv.DictWriter(filehandle, fieldnames=header)
|
||||
writer.writeheader()
|
||||
row_dictionary = {
|
||||
"st": mlst_profile.sequence_type,
|
||||
"clonal-complex": mlst_profile.clonal_complex,
|
||||
"id": name,
|
||||
**dict_loci_alleles_variants_from_loci(mlst_profile.alleles)
|
||||
}
|
||||
writer.writerow(rowdict=row_dictionary)
|
||||
return failed
|
16
src/autobigs/engine/data/local/fasta.py
Normal file
16
src/autobigs/engine/data/local/fasta.py
Normal file
@ -0,0 +1,16 @@
|
||||
import asyncio
|
||||
from io import TextIOWrapper
|
||||
from typing import Any, AsyncGenerator, Generator, Iterable, Sequence, Union
|
||||
from Bio import SeqIO
|
||||
|
||||
from autobigs.engine.data.structures.genomics import NamedString
|
||||
|
||||
async def read_fasta(handle: Union[str, TextIOWrapper]) -> AsyncGenerator[NamedString, Any]:
|
||||
fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta")
|
||||
for fasta_sequence in await fasta_sequences:
|
||||
yield NamedString(fasta_sequence.id, str(fasta_sequence.seq))
|
||||
|
||||
async def read_multiple_fastas(handles: Iterable[Union[str, TextIOWrapper]]) -> AsyncGenerator[NamedString, Any]:
|
||||
for handle in handles:
|
||||
async for named_seq in read_fasta(handle):
|
||||
yield named_seq
|
166
src/autobigs/engine/data/remote/databases/bigsdb.py
Normal file
166
src/autobigs/engine/data/remote/databases/bigsdb.py
Normal file
@ -0,0 +1,166 @@
|
||||
from collections import defaultdict
|
||||
from contextlib import AbstractAsyncContextManager
|
||||
from numbers import Number
|
||||
from typing import Any, AsyncGenerator, AsyncIterable, Collection, Generator, Iterable, Mapping, Sequence, Union
|
||||
|
||||
from aiohttp import ClientSession, ClientTimeout
|
||||
|
||||
from autobigs.engine.data.structures.genomics import NamedString
|
||||
from autobigs.engine.data.structures.mlst import Allele, PartialAllelicMatchProfile, MLSTProfile
|
||||
from autobigs.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException, NoSuchBIGSdbDatabaseException
|
||||
|
||||
class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
|
||||
|
||||
def __init__(self, database_api: str, database_name: str, schema_id: int):
|
||||
self._database_name = database_name
|
||||
self._schema_id = schema_id
|
||||
self._base_url = f"{database_api}/db/{self._database_name}/schemes/{self._schema_id}/"
|
||||
self._http_client = ClientSession(self._base_url, timeout=ClientTimeout(10000))
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
async def fetch_mlst_allele_variants(self, sequence_string: str, exact: bool) -> AsyncGenerator[Allele, Any]:
|
||||
# See https://bigsdb.pasteur.fr/api/db/pubmlst_bordetella_seqdef/schemes
|
||||
uri_path = "sequence"
|
||||
response = await self._http_client.post(uri_path, json={
|
||||
"sequence": sequence_string,
|
||||
"partial_matches": not exact
|
||||
})
|
||||
sequence_response: dict = await response.json()
|
||||
|
||||
if "exact_matches" in sequence_response:
|
||||
# loci -> list of alleles with id and loci
|
||||
exact_matches: dict[str, Sequence[dict[str, str]]] = sequence_response["exact_matches"]
|
||||
for allele_loci, alleles in exact_matches.items():
|
||||
for allele in alleles:
|
||||
alelle_id = allele["allele_id"]
|
||||
yield Allele(allele_loci=allele_loci, allele_variant=alelle_id, partial_match_profile=None)
|
||||
elif "partial_matches" in sequence_response:
|
||||
if exact:
|
||||
raise NoBIGSdbExactMatchesException(self._database_name, self._schema_id)
|
||||
partial_matches: dict[str, dict[str, Union[str, float, int]]] = sequence_response["partial_matches"]
|
||||
for allele_loci, partial_match in partial_matches.items():
|
||||
if len(partial_match) <= 0:
|
||||
continue
|
||||
partial_match_profile = PartialAllelicMatchProfile(
|
||||
percent_identity=float(partial_match["identity"]),
|
||||
mismatches=int(partial_match["mismatches"]),
|
||||
bitscore=float(partial_match["bitscore"]),
|
||||
gaps=int(partial_match["gaps"])
|
||||
)
|
||||
yield Allele(
|
||||
allele_loci=allele_loci,
|
||||
allele_variant=str(partial_match["allele"]),
|
||||
partial_match_profile=partial_match_profile
|
||||
)
|
||||
else:
|
||||
raise NoBIGSdbMatchesException(self._database_name, self._schema_id)
|
||||
|
||||
|
||||
|
||||
async def fetch_mlst_st(self, alleles: Union[AsyncIterable[Allele], Iterable[Allele]]) -> MLSTProfile:
|
||||
uri_path = "designations"
|
||||
allele_request_dict: dict[str, list[dict[str, str]]] = defaultdict(list)
|
||||
if isinstance(alleles, AsyncIterable):
|
||||
async for allele in alleles:
|
||||
allele_request_dict[allele.allele_loci].append({"allele": str(allele.allele_variant)})
|
||||
else:
|
||||
for allele in alleles:
|
||||
allele_request_dict[allele.allele_loci].append({"allele": str(allele.allele_variant)})
|
||||
request_json = {
|
||||
"designations": allele_request_dict
|
||||
}
|
||||
async with self._http_client.post(uri_path, json=request_json) as response:
|
||||
response_json: dict = await response.json()
|
||||
allele_map: dict[str, list[Allele]] = defaultdict(list)
|
||||
response_json.setdefault("fields", dict())
|
||||
schema_fields_returned: dict[str, str] = response_json["fields"]
|
||||
schema_fields_returned.setdefault("ST", "unknown")
|
||||
schema_fields_returned.setdefault("clonal_complex", "unknown")
|
||||
schema_exact_matches: dict = response_json["exact_matches"]
|
||||
for exact_match_loci, exact_match_alleles in schema_exact_matches.items():
|
||||
for exact_match_allele in exact_match_alleles:
|
||||
allele_map[exact_match_loci].append(Allele(exact_match_loci, exact_match_allele["allele_id"], None))
|
||||
if len(allele_map) == 0:
|
||||
raise ValueError("Passed in no alleles.")
|
||||
return MLSTProfile(dict(allele_map), schema_fields_returned["ST"], schema_fields_returned["clonal_complex"])
|
||||
|
||||
async def profile_string(self, string: str, exact: bool = False) -> MLSTProfile:
|
||||
alleles = self.fetch_mlst_allele_variants(string, exact)
|
||||
return await self.fetch_mlst_st(alleles)
|
||||
|
||||
|
||||
async def profile_multiple_strings(self, namedStrings: AsyncIterable[NamedString], exact: bool = False, stop_on_fail: bool = False) -> AsyncGenerator[tuple[str, Union[MLSTProfile, None]], Any]:
|
||||
async for named_string in namedStrings:
|
||||
try:
|
||||
yield (named_string.name, await self.profile_string(named_string.sequence, exact))
|
||||
except NoBIGSdbMatchesException as e:
|
||||
if stop_on_fail:
|
||||
raise e
|
||||
yield (named_string.name, None)
|
||||
|
||||
async def close(self):
|
||||
await self._http_client.close()
|
||||
|
||||
async def __aexit__(self, exc_type, exc_value, traceback):
|
||||
await self.close()
|
||||
|
||||
class BIGSdbIndex(AbstractAsyncContextManager):
|
||||
KNOWN_BIGSDB_APIS = {
|
||||
"https://bigsdb.pasteur.fr/api",
|
||||
"https://rest.pubmlst.org"
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self._http_client = ClientSession()
|
||||
self._known_seqdef_dbs_origin: Union[Mapping[str, str], None] = None
|
||||
self._seqdefdb_schemas: dict[str, Union[Mapping[str, int], None]] = dict()
|
||||
super().__init__()
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
async def get_known_seqdef_dbs(self, force: bool = False) -> Mapping[str, str]:
|
||||
if self._known_seqdef_dbs_origin is not None and not force:
|
||||
return self._known_seqdef_dbs_origin
|
||||
known_seqdef_dbs = dict()
|
||||
for known_bigsdb in BIGSdbIndex.KNOWN_BIGSDB_APIS:
|
||||
async with self._http_client.get(f"{known_bigsdb}/db") as response:
|
||||
response_json_databases = await response.json()
|
||||
for database_group in response_json_databases:
|
||||
for database_info in database_group["databases"]:
|
||||
if str(database_info["name"]).endswith("seqdef"):
|
||||
known_seqdef_dbs[database_info["name"]] = known_bigsdb
|
||||
self._known_seqdef_dbs_origin = dict(known_seqdef_dbs)
|
||||
return self._known_seqdef_dbs_origin
|
||||
|
||||
async def get_bigsdb_api_from_seqdefdb(self, seqdef_db_name: str) -> str:
|
||||
known_databases = await self.get_known_seqdef_dbs()
|
||||
if seqdef_db_name not in known_databases:
|
||||
raise NoSuchBIGSdbDatabaseException(seqdef_db_name)
|
||||
return known_databases[seqdef_db_name]
|
||||
|
||||
async def get_schemas_for_seqdefdb(self, seqdef_db_name: str, force: bool = False) -> Mapping[str, int]:
|
||||
if seqdef_db_name in self._seqdefdb_schemas and not force:
|
||||
return self._seqdefdb_schemas[seqdef_db_name] # type: ignore since it's guaranteed to not be none by conditional
|
||||
uri_path = f"{await self.get_bigsdb_api_from_seqdefdb(seqdef_db_name)}/db/{seqdef_db_name}/schemes"
|
||||
async with self._http_client.get(uri_path) as response:
|
||||
response_json = await response.json()
|
||||
schema_descriptions: Mapping[str, int] = dict()
|
||||
for scheme_definition in response_json["schemes"]:
|
||||
scheme_id: int = int(str(scheme_definition["scheme"]).split("/")[-1])
|
||||
scheme_desc: str = scheme_definition["description"]
|
||||
schema_descriptions[scheme_desc] = scheme_id
|
||||
self._seqdefdb_schemas[seqdef_db_name] = schema_descriptions
|
||||
return self._seqdefdb_schemas[seqdef_db_name] # type: ignore
|
||||
|
||||
async def build_profiler_from_seqdefdb(self, dbseqdef_name: str, schema_id: int) -> BIGSdbMLSTProfiler:
|
||||
return BIGSdbMLSTProfiler(await self.get_bigsdb_api_from_seqdefdb(dbseqdef_name), dbseqdef_name, schema_id)
|
||||
|
||||
async def close(self):
|
||||
await self._http_client.close()
|
||||
|
||||
async def __aexit__(self, exc_type, exc_value, traceback):
|
||||
await self.close()
|
||||
|
0
src/autobigs/engine/data/structures/__init__.py
Normal file
0
src/autobigs/engine/data/structures/__init__.py
Normal file
@ -25,7 +25,7 @@ class SangerTraceData(NamedString):
|
||||
analysis_proto_settings_name: str
|
||||
analysis_rpto_settings_ver: str
|
||||
analysis_proto_xml_data: str
|
||||
analysis_proto_xml_scheme_ver: str
|
||||
analysis_proto_xml_schema_ver: str
|
||||
sample_comment: Union[None, str]
|
||||
capillary_machine: bool
|
||||
container_identifier: str
|
21
src/autobigs/engine/data/structures/mlst.py
Normal file
21
src/autobigs/engine/data/structures/mlst.py
Normal file
@ -0,0 +1,21 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import Mapping, Sequence, Union
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PartialAllelicMatchProfile:
|
||||
percent_identity: float
|
||||
mismatches: int
|
||||
bitscore: float
|
||||
gaps: int
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Allele:
|
||||
allele_loci: str
|
||||
allele_variant: str
|
||||
partial_match_profile: Union[None, PartialAllelicMatchProfile]
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MLSTProfile:
|
||||
alleles: Mapping[str, Sequence[Allele]]
|
||||
sequence_type: str
|
||||
clonal_complex: str
|
@ -3,25 +3,19 @@ from typing import Union
|
||||
class BIGSDbDatabaseAPIException(Exception):
|
||||
pass
|
||||
|
||||
class BIGSdbResponseNotOkay(BIGSDbDatabaseAPIException):
|
||||
pass
|
||||
|
||||
class NoBIGSdbMatchesException(BIGSDbDatabaseAPIException):
|
||||
def __init__(self, database_name: str, database_scheme_id: int, query_name: Union[None, str], *args):
|
||||
self._query_name = query_name
|
||||
super().__init__(f"No matches found with scheme with ID {database_scheme_id} in the database \"{database_name}\".", *args)
|
||||
|
||||
def get_causal_query_name(self) -> Union[str, None]:
|
||||
return self._query_name
|
||||
def __init__(self, database_name: str, database_schema_id: int, *args):
|
||||
super().__init__(f"No matches found with schema with ID {database_schema_id} in the database \"{database_name}\".", *args)
|
||||
|
||||
class NoBIGSdbExactMatchesException(NoBIGSdbMatchesException):
|
||||
def __init__(self, database_name: str, database_scheme_id: int, *args):
|
||||
super().__init__(f"No exact match found with scheme with ID {database_scheme_id} in the database \"{database_name}\".", *args)
|
||||
def __init__(self, database_name: str, database_schema_id: int, *args):
|
||||
super().__init__(f"No exact match found with schema with ID {database_schema_id} in the database \"{database_name}\".", *args)
|
||||
|
||||
class NoSuchBIGSdbDatabaseException(BIGSDbDatabaseAPIException):
|
||||
def __init__(self, database_name: str, *args):
|
||||
super().__init__(f"No database \"{database_name}\" found.", *args)
|
||||
|
||||
class NoSuchBigSdbschemeException(BIGSDbDatabaseAPIException):
|
||||
def __init__(self, database_name: str, database_scheme_id: int, *args):
|
||||
super().__init__(f"No scheme with ID {database_scheme_id} in \"{database_name}\" found.", *args)
|
||||
class NoSuchBigSdbSchemaException(BIGSDbDatabaseAPIException):
|
||||
def __init__(self, database_name: str, database_schema_id: int, *args):
|
||||
super().__init__(f"No schema with ID {database_schema_id} in \"{database_name}\" found.", *args)
|
||||
|
@ -1,21 +0,0 @@
|
||||
import asyncio
|
||||
from io import TextIOWrapper
|
||||
from os import path
|
||||
from typing import Any, AsyncGenerator, Iterable, Union
|
||||
from Bio import SeqIO
|
||||
|
||||
from autobigs.engine.structures.genomics import NamedString
|
||||
|
||||
async def read_fasta(handle: Union[str, TextIOWrapper]) -> Iterable[NamedString]:
|
||||
fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta")
|
||||
results = []
|
||||
for fasta_sequence in await fasta_sequences:
|
||||
results.append(NamedString("{0}:{1}".format(path.basename(handle.name if isinstance(handle, TextIOWrapper) else handle), fasta_sequence.id), str(fasta_sequence.seq)))
|
||||
return results
|
||||
|
||||
async def read_multiple_fastas(handles: Iterable[Union[str, TextIOWrapper]]) -> AsyncGenerator[Iterable[NamedString], Any]:
|
||||
tasks = []
|
||||
for handle in handles:
|
||||
tasks.append(read_fasta(handle))
|
||||
for task in asyncio.as_completed(tasks):
|
||||
yield await task
|
@ -1,18 +0,0 @@
|
||||
from dataclasses import dataclass
|
||||
from numbers import Number
|
||||
from typing import Sequence
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AlignmentStats:
|
||||
percent_identity: float
|
||||
mismatches: int
|
||||
gaps: int
|
||||
match_metric: int
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PairwiseAlignment:
|
||||
reference: str
|
||||
query: str
|
||||
reference_indices: Sequence[Number]
|
||||
query_indices: Sequence[Number]
|
||||
alignment_stats: AlignmentStats
|
@ -1,33 +0,0 @@
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
from typing import Collection, Iterable, Mapping, Sequence, Union
|
||||
|
||||
from autobigs.engine.structures.alignment import AlignmentStats
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Allele:
|
||||
allele_locus: str
|
||||
allele_variant: str
|
||||
partial_match_profile: Union[None, AlignmentStats]
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MLSTProfile:
|
||||
alleles: Collection[Allele]
|
||||
sequence_type: str
|
||||
clonal_complex: str
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class NamedMLSTProfile:
|
||||
name: str
|
||||
mlst_profile: Union[None, MLSTProfile]
|
||||
|
||||
|
||||
def alleles_to_mapping(alleles: Iterable[Allele]):
|
||||
result = defaultdict(list)
|
||||
for allele in alleles:
|
||||
result[allele.allele_locus].append(allele.allele_variant)
|
||||
result = dict(result)
|
||||
for locus, variant in result.items():
|
||||
if len(variant) == 1:
|
||||
result[locus] = variant[0]
|
||||
return result
|
@ -1,43 +0,0 @@
|
||||
from collections import defaultdict
|
||||
import csv
|
||||
from os import PathLike
|
||||
from typing import AsyncIterable, Collection, Iterable, Mapping, Sequence, Union
|
||||
|
||||
from autobigs.engine.structures.mlst import Allele, MLSTProfile, NamedMLSTProfile
|
||||
|
||||
|
||||
def alleles_to_text_map(alleles: Collection[Allele]) -> Mapping[str, Union[Sequence[str], str]]:
|
||||
result = defaultdict(list)
|
||||
for allele in alleles:
|
||||
result[allele.allele_locus].append(allele.allele_variant + ("*" if allele.partial_match_profile is not None else ""))
|
||||
for locus in result.keys():
|
||||
if len(result[locus]) == 1:
|
||||
result[locus] = result[locus][0] # Take the only one
|
||||
else:
|
||||
result[locus] = tuple(result[locus]) # type: ignore
|
||||
return dict(result)
|
||||
|
||||
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[NamedMLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]], allele_names: Iterable[str]) -> Sequence[str]:
|
||||
failed = list()
|
||||
with open(handle, "w", newline='') as filehandle:
|
||||
header = None
|
||||
writer: Union[csv.DictWriter, None] = None
|
||||
async for named_mlst_profile in mlst_profiles_iterable:
|
||||
name = named_mlst_profile.name
|
||||
mlst_profile = named_mlst_profile.mlst_profile
|
||||
if mlst_profile is None:
|
||||
failed.append(name)
|
||||
continue
|
||||
allele_mapping = alleles_to_text_map(mlst_profile.alleles)
|
||||
if writer is None:
|
||||
header = ["id", "st", "clonal-complex", *sorted(allele_names)]
|
||||
writer = csv.DictWriter(filehandle, fieldnames=header)
|
||||
writer.writeheader()
|
||||
row_dictionary = {
|
||||
"st": mlst_profile.sequence_type,
|
||||
"clonal-complex": mlst_profile.clonal_complex,
|
||||
"id": name,
|
||||
**allele_mapping
|
||||
}
|
||||
writer.writerow(rowdict=row_dictionary)
|
||||
return failed
|
@ -1,233 +0,0 @@
|
||||
from os import path
|
||||
import random
|
||||
import re
|
||||
from typing import Callable, Collection, Sequence, Union
|
||||
from Bio import SeqIO
|
||||
import pytest
|
||||
from autobigs.engine.analysis import bigsdb
|
||||
from autobigs.engine.structures import mlst
|
||||
from autobigs.engine.structures.genomics import NamedString
|
||||
from autobigs.engine.structures.mlst import Allele, MLSTProfile, NamedMLSTProfile
|
||||
from autobigs.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException
|
||||
from autobigs.engine.analysis.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler, RemoteBIGSdbMLSTProfiler
|
||||
|
||||
async def generate_async_iterable(normal_iterable):
|
||||
for dummy_sequence in normal_iterable:
|
||||
yield dummy_sequence
|
||||
|
||||
def gene_scrambler(gene: str, mutation_site_count: Union[int, float], alphabet: Sequence[str] = ["A", "T", "C", "G"]):
|
||||
rand = random.Random(gene)
|
||||
if isinstance(mutation_site_count, float):
|
||||
mutation_site_count = int(mutation_site_count * len(gene))
|
||||
random_locations = rand.choices(range(len(gene)), k=mutation_site_count)
|
||||
scrambled = list(gene)
|
||||
for random_location in random_locations:
|
||||
scrambled[random_location] = rand.choice(alphabet)
|
||||
return "".join(scrambled)
|
||||
|
||||
def get_first_sequence_from_fasta(resource: str):
|
||||
return str(SeqIO.read(path.join("tests/resources/", resource), "fasta").seq)
|
||||
|
||||
def get_multiple_sequences_from_fasta(resource: str):
|
||||
return tuple(SeqIO.parse(path.join("tests/resources/", resource), "fasta"))
|
||||
|
||||
bpertussis_tohamaI_profile = MLSTProfile((
|
||||
Allele("adk", "1", None),
|
||||
Allele("fumC", "1", None),
|
||||
Allele("glyA", "1", None),
|
||||
Allele("tyrB", "1", None),
|
||||
Allele("icd", "1", None),
|
||||
Allele("pepA", "1", None),
|
||||
Allele("pgm", "1", None)), "1", "ST-2 complex")
|
||||
|
||||
bpertussis_tohamaI_bad_profile = MLSTProfile((
|
||||
Allele("adk", "1", None),
|
||||
Allele("fumC", "2", None),
|
||||
Allele("glyA", "36", None),
|
||||
Allele("tyrB", "4", None),
|
||||
Allele("icd", "4", None),
|
||||
Allele("pepA", "1", None),
|
||||
Allele("pgm", "5", None),
|
||||
), "unknown", "unknown")
|
||||
|
||||
hinfluenzae_2014_102_profile = MLSTProfile((
|
||||
Allele("adk", "28", None),
|
||||
Allele("atpG", "33", None),
|
||||
Allele("frdB", "7", None),
|
||||
Allele("fucK", "18", None),
|
||||
Allele("mdh", "11", None),
|
||||
Allele("pgi", "125", None),
|
||||
Allele("recA", "89", None)
|
||||
), "478", "unknown")
|
||||
|
||||
hinfluenzae_2014_102_bad_profile = MLSTProfile((
|
||||
Allele("adk", "3", None),
|
||||
Allele("atpG", "121", None),
|
||||
Allele("frdB", "6", None),
|
||||
Allele("fucK", "5", None),
|
||||
Allele("mdh", "12", None),
|
||||
Allele("pgi", "4", None),
|
||||
Allele("recA", "5", None)
|
||||
), "unknown", "unknown")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("local_db,database_api,database_name,scheme_id,seq_path,feature_seqs_path,expected_profile,bad_profile", [
|
||||
(False, "https://bigsdb.pasteur.fr/api", "pubmlst_bordetella_seqdef", 3, "tohama_I_bpertussis.fasta", "tohama_I_bpertussis_features.fasta", bpertussis_tohamaI_profile, bpertussis_tohamaI_bad_profile),
|
||||
(False, "https://rest.pubmlst.org", "pubmlst_hinfluenzae_seqdef", 1, "2014-102_hinfluenza.fasta", "2014-102_hinfluenza_features.fasta", hinfluenzae_2014_102_profile, hinfluenzae_2014_102_bad_profile),
|
||||
])
|
||||
class TestBIGSdbMLSTProfiler:
|
||||
async def test_profiling_results_in_exact_matches_when_exact(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
sequence = get_first_sequence_from_fasta(seq_path)
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as dummy_profiler:
|
||||
expected_alleles = mlst.alleles_to_mapping(expected_profile.alleles)
|
||||
targets_left = set(mlst.alleles_to_mapping(expected_profile.alleles).keys())
|
||||
async for exact_match in dummy_profiler.determine_mlst_allele_variants(query_sequence_strings=[sequence]):
|
||||
assert isinstance(exact_match, Allele)
|
||||
assert exact_match.allele_locus in expected_alleles
|
||||
assert exact_match.allele_variant == expected_alleles[exact_match.allele_locus]
|
||||
targets_left.remove(exact_match.allele_locus)
|
||||
|
||||
assert len(targets_left) == 0
|
||||
|
||||
async def test_sequence_profiling_non_exact_returns_non_exact(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
target_sequences = get_multiple_sequences_from_fasta(feature_seqs_path)
|
||||
mlst_targets = {x.lower() for x in mlst.alleles_to_mapping(expected_profile.alleles).keys()}
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as profiler:
|
||||
for target_sequence in target_sequences:
|
||||
match = re.fullmatch(r".*\[gene=([\w\d]+)\].*", target_sequence.description)
|
||||
if match is None:
|
||||
continue
|
||||
gene = match.group(1).lower()
|
||||
if gene not in mlst_targets:
|
||||
continue
|
||||
scrambled = gene_scrambler(str(target_sequence.seq), 0.125)
|
||||
async for partial_match in profiler.determine_mlst_allele_variants([scrambled]):
|
||||
assert isinstance(partial_match, Allele)
|
||||
assert partial_match.partial_match_profile is not None
|
||||
mlst_targets.remove(gene)
|
||||
|
||||
assert len(mlst_targets) == 0
|
||||
|
||||
async def test_profiling_results_in_correct_mlst_st(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as dummy_profiler:
|
||||
mlst_st_data = await dummy_profiler.determine_mlst_st(expected_profile.alleles)
|
||||
assert mlst_st_data is not None
|
||||
assert isinstance(mlst_st_data, MLSTProfile)
|
||||
assert mlst_st_data.clonal_complex == expected_profile.clonal_complex
|
||||
assert mlst_st_data.sequence_type == expected_profile.sequence_type
|
||||
|
||||
async def test_profiling_non_exact_results_in_list_of_mlsts(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
dummy_alleles = bad_profile.alleles
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as dummy_profiler:
|
||||
mlst_profile = await dummy_profiler.determine_mlst_st(dummy_alleles)
|
||||
assert isinstance(mlst_profile, MLSTProfile)
|
||||
assert mlst_profile.clonal_complex == "unknown"
|
||||
assert mlst_profile.sequence_type == "unknown"
|
||||
|
||||
|
||||
async def test_bigsdb_profile_multiple_strings_same_string_twice(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
sequence = get_first_sequence_from_fasta(seq_path)
|
||||
dummy_sequences = [[NamedString("seq1", sequence)], [NamedString("seq2", sequence)]]
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as dummy_profiler:
|
||||
async for named_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable(dummy_sequences)):
|
||||
name, profile = named_profile.name, named_profile.mlst_profile
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == expected_profile.clonal_complex
|
||||
assert profile.sequence_type == expected_profile.sequence_type
|
||||
|
||||
async def test_bigsdb_profile_named_string_no_repeat_name(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
sequence = get_first_sequence_from_fasta(seq_path)
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as dummy_profiler:
|
||||
named_profile = await dummy_profiler.profile_string([NamedString("BX470248.1", sequence)])
|
||||
assert isinstance(named_profile, NamedMLSTProfile)
|
||||
name, profile = named_profile.name, named_profile.mlst_profile
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == expected_profile.clonal_complex
|
||||
assert profile.sequence_type == expected_profile.sequence_type
|
||||
assert name == "BX470248.1"
|
||||
|
||||
async def test_bigsdb_profile_multiple_strings_exactmatch_fail_second_no_stop(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
valid_seq = get_first_sequence_from_fasta(seq_path)
|
||||
dummy_sequences = [[NamedString("seq1", valid_seq)], [NamedString("should_fail", gene_scrambler(valid_seq, 0.3))], [NamedString("seq3", valid_seq)]]
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as dummy_profiler:
|
||||
async for name_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable(dummy_sequences), True):
|
||||
name, profile = name_profile.name, name_profile.mlst_profile
|
||||
|
||||
assert profile is not None
|
||||
if name == "should_fail":
|
||||
assert profile.clonal_complex == "unknown"
|
||||
assert profile.sequence_type == "unknown"
|
||||
assert len(profile.alleles) > 0
|
||||
else:
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == expected_profile.clonal_complex
|
||||
assert profile.sequence_type == expected_profile.sequence_type
|
||||
|
||||
async def test_bigsdb_profile_multiple_strings_nonexact_second_no_stop(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
valid_seq = get_first_sequence_from_fasta(seq_path)
|
||||
dummy_sequences = [[NamedString("seq1", valid_seq)], [NamedString("should_fail", gene_scrambler(valid_seq, 0.3))], [NamedString("seq3", valid_seq)]]
|
||||
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as dummy_profiler:
|
||||
async for named_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable(dummy_sequences), False):
|
||||
name, profile = named_profile.name, named_profile.mlst_profile
|
||||
|
||||
assert profile is not None
|
||||
if name == "should_fail":
|
||||
assert profile.clonal_complex == "unknown"
|
||||
assert profile.sequence_type == "unknown"
|
||||
assert len(profile.alleles) > 0
|
||||
else:
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == expected_profile.clonal_complex
|
||||
assert profile.sequence_type == expected_profile.sequence_type
|
||||
|
||||
class TestBIGSdbIndex:
|
||||
|
||||
async def test_bigsdb_index_all_databases_is_not_empty(self):
|
||||
async with BIGSdbIndex() as bigsdb_index:
|
||||
assert len(await bigsdb_index.get_known_seqdef_dbs()) > 0
|
||||
|
||||
async def test_bigsdb_index_references_pubmlst_correctly(self):
|
||||
async with BIGSdbIndex() as bigsdb_index:
|
||||
assert (await bigsdb_index.get_bigsdb_api_from_seqdefdb("pubmlst_hinfluenzae_seqdef")) == "https://rest.pubmlst.org"
|
||||
|
||||
async def test_bigsdb_index_references_institutpasteur_correctly(self):
|
||||
async with BIGSdbIndex() as bigsdb_index:
|
||||
assert (await bigsdb_index.get_bigsdb_api_from_seqdefdb("pubmlst_bordetella_seqdef")) == "https://bigsdb.pasteur.fr/api"
|
||||
|
||||
async def test_bigsdb_index_get_schemes_for_bordetella(self):
|
||||
async with BIGSdbIndex() as index:
|
||||
schemes = await index.get_schemes_for_seqdefdb(seqdef_db_name="pubmlst_bordetella_seqdef")
|
||||
assert len(schemes.keys()) > 0
|
||||
assert "MLST" in schemes
|
||||
assert isinstance(schemes["MLST"], int)
|
||||
|
||||
async def test_bigsdb_index_get_databases_has_only_seqdef(self):
|
||||
async with BIGSdbIndex() as index:
|
||||
databases = await index.get_known_seqdef_dbs()
|
||||
assert len(databases.keys()) > 0
|
||||
for database_name in databases.keys():
|
||||
assert database_name.endswith("seqdef")
|
||||
assert databases["pubmlst_bordetella_seqdef"] == "https://bigsdb.pasteur.fr/api"
|
||||
|
||||
@pytest.mark.parametrize("local", [
|
||||
(False)
|
||||
])
|
||||
async def test_bigsdb_index_instantiates_correct_profiler(self, local):
|
||||
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
||||
async with BIGSdbIndex() as bigsdb_index:
|
||||
async with await bigsdb_index.build_profiler_from_seqdefdb(local, "pubmlst_bordetella_seqdef", 3) as profiler:
|
||||
assert isinstance(profiler, BIGSdbMLSTProfiler)
|
||||
profile = await profiler.profile_string(sequence)
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == "ST-2 complex"
|
||||
assert profile.sequence_type == "1"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(["bigsdb_name", "scheme_id", "expected"], [
|
||||
("pubmlst_bordetella_seqdef", 3, ["adk", "fumC", "glyA", "tyrB", "icd", "pepA", "pgm"])
|
||||
])
|
||||
async def test_bigsdb_index_fetches_loci_names(self, bigsdb_name, scheme_id, expected):
|
||||
async with BIGSdbIndex() as bigsdb_index:
|
||||
loci = await bigsdb_index.get_scheme_loci(bigsdb_name, scheme_id)
|
||||
assert set(loci) == set(expected)
|
21
tests/autobigs/engine/data/local/test_csv.py
Normal file
21
tests/autobigs/engine/data/local/test_csv.py
Normal file
@ -0,0 +1,21 @@
|
||||
from autobigs.engine.data.local.csv import dict_loci_alleles_variants_from_loci
|
||||
from autobigs.engine.data.structures.mlst import Allele
|
||||
|
||||
|
||||
def test_dict_loci_alleles_variants_from_loci_single_loci_not_list():
|
||||
alleles_map = {
|
||||
"adk": [Allele("adk", "1", None)]
|
||||
}
|
||||
results = dict_loci_alleles_variants_from_loci(alleles_map)
|
||||
for loci, variant in results.items():
|
||||
assert isinstance(variant, str)
|
||||
assert variant == "1"
|
||||
|
||||
def test_dict_loci_alleles_variants_from_loci_multi_loci_is_list():
|
||||
alleles_map = {
|
||||
"adk": [Allele("adk", "1", None), Allele("adk", "2", None)]
|
||||
}
|
||||
results = dict_loci_alleles_variants_from_loci(alleles_map)
|
||||
for loci, variant in results.items():
|
||||
assert isinstance(variant, list)
|
||||
assert len(variant) == 2
|
7
tests/autobigs/engine/data/local/test_fasta.py
Normal file
7
tests/autobigs/engine/data/local/test_fasta.py
Normal file
@ -0,0 +1,7 @@
|
||||
from autobigs.engine.data.local.fasta import read_fasta
|
||||
|
||||
|
||||
async def test_fasta_reader_not_none():
|
||||
named_strings = read_fasta("tests/resources/tohama_I_bpertussis.fasta")
|
||||
async for named_string in named_strings:
|
||||
assert named_string.name == "BX470248.1"
|
244
tests/autobigs/engine/data/remote/databases/test_bigsdb.py
Normal file
244
tests/autobigs/engine/data/remote/databases/test_bigsdb.py
Normal file
@ -0,0 +1,244 @@
|
||||
import random
|
||||
import re
|
||||
from typing import Collection, Sequence, Union
|
||||
from Bio import SeqIO
|
||||
import pytest
|
||||
from autobigs.engine.data.structures.genomics import NamedString
|
||||
from autobigs.engine.data.structures.mlst import Allele, MLSTProfile
|
||||
from autobigs.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException
|
||||
from autobigs.engine.data.remote.databases.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler
|
||||
|
||||
def gene_scrambler(gene: str, mutation_site_count: Union[int, float], alphabet: Sequence[str] = ["A", "T", "C", "G"]):
|
||||
rand = random.Random(gene)
|
||||
if isinstance(mutation_site_count, float):
|
||||
mutation_site_count = int(mutation_site_count * len(gene))
|
||||
random_locations = rand.choices(range(len(gene)), k=mutation_site_count)
|
||||
scrambled = list(gene)
|
||||
for random_location in random_locations:
|
||||
scrambled[random_location] = rand.choice(alphabet)
|
||||
return "".join(scrambled)
|
||||
|
||||
async def test_institutpasteur_profiling_results_in_exact_matches_when_exact():
|
||||
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
||||
async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
targets_left = {"adk", "fumC", "glyA", "tyrB", "icd", "pepA", "pgm"}
|
||||
async for exact_match in dummy_profiler.fetch_mlst_allele_variants(sequence_string=sequence, exact=True):
|
||||
assert isinstance(exact_match, Allele)
|
||||
assert exact_match.allele_variant == '1' # All of Tohama I has allele id I
|
||||
targets_left.remove(exact_match.allele_loci)
|
||||
|
||||
assert len(targets_left) == 0
|
||||
|
||||
async def test_institutpasteur_sequence_profiling_non_exact_returns_non_exact():
|
||||
sequences = list(SeqIO.parse("tests/resources/tohama_I_bpertussis_coding.fasta", "fasta"))
|
||||
mlst_targets = {"adk", "fumc", "glya", "tyrb", "icd", "pepa", "pgm"}
|
||||
async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as profiler:
|
||||
for sequence in sequences:
|
||||
match = re.fullmatch(r".*\[gene=([\w\d]+)\].*", sequence.description)
|
||||
if match is None:
|
||||
continue
|
||||
gene = match.group(1)
|
||||
if gene.lower() not in mlst_targets:
|
||||
continue
|
||||
scrambled = gene_scrambler(str(sequence.seq), 0.125)
|
||||
async for partial_match in profiler.fetch_mlst_allele_variants(scrambled, False):
|
||||
assert partial_match.partial_match_profile is not None
|
||||
mlst_targets.remove(gene.lower())
|
||||
|
||||
assert len(mlst_targets) == 0
|
||||
|
||||
async def test_institutpasteur_profiling_results_in_correct_mlst_st():
|
||||
async def dummy_allele_generator():
|
||||
dummy_alleles = [
|
||||
Allele("adk", "1", None),
|
||||
Allele("fumC", "1", None),
|
||||
Allele("glyA", "1", None),
|
||||
Allele("tyrB", "1", None),
|
||||
Allele("icd", "1", None),
|
||||
Allele("pepA", "1", None),
|
||||
Allele("pgm", "1", None),
|
||||
]
|
||||
for dummy_allele in dummy_alleles:
|
||||
yield dummy_allele
|
||||
async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
mlst_st_data = await dummy_profiler.fetch_mlst_st(dummy_allele_generator())
|
||||
assert mlst_st_data is not None
|
||||
assert isinstance(mlst_st_data, MLSTProfile)
|
||||
assert mlst_st_data.clonal_complex == "ST-2 complex"
|
||||
assert mlst_st_data.sequence_type == "1"
|
||||
|
||||
async def test_institutpasteur_profiling_non_exact_results_in_list_of_mlsts():
|
||||
dummy_alleles = [
|
||||
Allele("adk", "1", None),
|
||||
Allele("fumC", "2", None),
|
||||
Allele("glyA", "36", None),
|
||||
Allele("tyrB", "4", None),
|
||||
Allele("icd", "4", None),
|
||||
Allele("pepA", "1", None),
|
||||
Allele("pgm", "5", None),
|
||||
]
|
||||
async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
mlst_profile = await dummy_profiler.fetch_mlst_st(dummy_alleles)
|
||||
assert mlst_profile.clonal_complex == "unknown"
|
||||
assert mlst_profile.sequence_type == "unknown"
|
||||
|
||||
|
||||
async def test_institutpasteur_sequence_profiling_is_correct():
|
||||
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
||||
async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
profile = await dummy_profiler.profile_string(sequence)
|
||||
assert profile is not None
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == "ST-2 complex"
|
||||
assert profile.sequence_type == "1"
|
||||
|
||||
|
||||
async def test_pubmlst_profiling_results_in_exact_matches_when_exact():
|
||||
dummy_alleles = {
|
||||
Allele("adk", "1", None),
|
||||
Allele("atpG", "1", None),
|
||||
Allele("frdB", "1", None),
|
||||
Allele("fucK", "1", None),
|
||||
Allele("mdh", "1", None),
|
||||
Allele("pgi", "1", None),
|
||||
Allele("recA", "5", None),
|
||||
}
|
||||
sequence = str(SeqIO.read("tests/resources/FDAARGOS_1560.fasta", "fasta").seq)
|
||||
async with BIGSdbMLSTProfiler(database_api="https://rest.pubmlst.org/", database_name="pubmlst_hinfluenzae_seqdef", schema_id=1) as dummy_profiler:
|
||||
exact_matches = dummy_profiler.fetch_mlst_allele_variants(sequence_string=sequence, exact=True)
|
||||
async for exact_match in exact_matches:
|
||||
assert isinstance(exact_match, Allele)
|
||||
dummy_alleles.remove(exact_match)
|
||||
|
||||
assert len(dummy_alleles) == 0
|
||||
|
||||
async def test_pubmlst_profiling_results_in_correct_st():
|
||||
async def generate_dummy_targets():
|
||||
dummy_alleles = [
|
||||
Allele("adk", "1", None),
|
||||
Allele("atpG", "1", None),
|
||||
Allele("frdB", "1", None),
|
||||
Allele("fucK", "1", None),
|
||||
Allele("mdh", "1", None),
|
||||
Allele("pgi", "1", None),
|
||||
Allele("recA", "5", None),
|
||||
]
|
||||
for dummy_allele in dummy_alleles:
|
||||
yield dummy_allele
|
||||
async with BIGSdbMLSTProfiler(database_api="https://rest.pubmlst.org/", database_name="pubmlst_hinfluenzae_seqdef", schema_id=1) as dummy_profiler:
|
||||
mlst_st_data = await dummy_profiler.fetch_mlst_st(generate_dummy_targets())
|
||||
assert mlst_st_data is not None
|
||||
assert isinstance(mlst_st_data, MLSTProfile)
|
||||
assert mlst_st_data.clonal_complex == "ST-3 complex"
|
||||
assert mlst_st_data.sequence_type == "3"
|
||||
|
||||
async def test_pubmlst_sequence_profiling_is_correct():
|
||||
sequence = str(SeqIO.read("tests/resources/FDAARGOS_1560.fasta", "fasta").seq)
|
||||
async with BIGSdbMLSTProfiler(database_api="https://rest.pubmlst.org/", database_name="pubmlst_hinfluenzae_seqdef", schema_id=1) as dummy_profiler:
|
||||
profile = await dummy_profiler.profile_string(sequence)
|
||||
assert profile is not None
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == "ST-3 complex"
|
||||
assert profile.sequence_type == "3"
|
||||
|
||||
async def test_bigsdb_index_all_databases_is_not_empty():
|
||||
async with BIGSdbIndex() as bigsdb_index:
|
||||
assert len(await bigsdb_index.get_known_seqdef_dbs()) > 0
|
||||
|
||||
async def test_bigsdb_index_references_pubmlst_correctly():
|
||||
async with BIGSdbIndex() as bigsdb_index:
|
||||
assert (await bigsdb_index.get_bigsdb_api_from_seqdefdb("pubmlst_hinfluenzae_seqdef")) == "https://rest.pubmlst.org"
|
||||
|
||||
async def test_bigsdb_index_references_institutpasteur_correctly():
|
||||
async with BIGSdbIndex() as bigsdb_index:
|
||||
assert (await bigsdb_index.get_bigsdb_api_from_seqdefdb("pubmlst_bordetella_seqdef")) == "https://bigsdb.pasteur.fr/api"
|
||||
|
||||
|
||||
async def test_bigsdb_index_instantiates_correct_profiler():
|
||||
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
||||
async with BIGSdbIndex() as bigsdb_index:
|
||||
async with await bigsdb_index.build_profiler_from_seqdefdb("pubmlst_bordetella_seqdef", 3) as profiler:
|
||||
profile = await profiler.profile_string(sequence)
|
||||
assert profile.clonal_complex == "ST-2 complex"
|
||||
assert profile.sequence_type == "1"
|
||||
|
||||
async def test_bigsdb_profile_multiple_strings_same_string_twice():
|
||||
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
||||
dummy_sequences = [NamedString("seq1", sequence), NamedString("seq2", sequence)]
|
||||
async def generate_async_iterable_sequences():
|
||||
for dummy_sequence in dummy_sequences:
|
||||
yield dummy_sequence
|
||||
async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
async for name, profile in dummy_profiler.profile_multiple_strings(generate_async_iterable_sequences()):
|
||||
assert profile is not None
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == "ST-2 complex"
|
||||
assert profile.sequence_type == "1"
|
||||
|
||||
async def test_bigsdb_profile_multiple_strings_exactmatch_fail_second_no_stop():
|
||||
valid_seq = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
||||
dummy_sequences = [NamedString("seq1", valid_seq), NamedString("should_fail", gene_scrambler(valid_seq, 0.3)), NamedString("seq3", valid_seq)]
|
||||
async def generate_async_iterable_sequences():
|
||||
for dummy_sequence in dummy_sequences:
|
||||
yield dummy_sequence
|
||||
async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
async for name, profile in dummy_profiler.profile_multiple_strings(generate_async_iterable_sequences(), True):
|
||||
if name == "should_fail":
|
||||
assert profile is None
|
||||
else:
|
||||
assert profile is not None
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == "ST-2 complex"
|
||||
assert profile.sequence_type == "1"
|
||||
|
||||
async def test_bigsdb_profile_multiple_strings_nonexact_second_no_stop():
|
||||
valid_seq = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
||||
dummy_sequences = [NamedString("seq1", valid_seq), NamedString("should_fail", gene_scrambler(valid_seq, 0.3)), NamedString("seq3", valid_seq)]
|
||||
async def generate_async_iterable_sequences():
|
||||
for dummy_sequence in dummy_sequences:
|
||||
yield dummy_sequence
|
||||
async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
async for name, profile in dummy_profiler.profile_multiple_strings(generate_async_iterable_sequences(), False):
|
||||
if name == "should_fail":
|
||||
assert profile is not None
|
||||
assert profile.clonal_complex == "unknown"
|
||||
assert profile.sequence_type == "unknown"
|
||||
assert len(profile.alleles) > 0
|
||||
else:
|
||||
assert profile is not None
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == "ST-2 complex"
|
||||
assert profile.sequence_type == "1"
|
||||
|
||||
async def test_bigsdb_profile_multiple_strings_fail_second_stop():
|
||||
valid_seq = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
||||
invalid_seq = str(SeqIO.read("tests/resources/FDAARGOS_1560.fasta", "fasta").seq)
|
||||
dummy_sequences = [NamedString("seq1", valid_seq), NamedString("should_fail", invalid_seq), NamedString("seq3", valid_seq)]
|
||||
async def generate_async_iterable_sequences():
|
||||
for dummy_sequence in dummy_sequences:
|
||||
yield dummy_sequence
|
||||
async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
with pytest.raises(NoBIGSdbMatchesException):
|
||||
async for name, profile in dummy_profiler.profile_multiple_strings(generate_async_iterable_sequences(), exact=True, stop_on_fail=True):
|
||||
if name == "should_fail":
|
||||
pytest.fail("Exception should have been thrown, no exception was thrown.")
|
||||
else:
|
||||
assert profile is not None
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == "ST-2 complex"
|
||||
assert profile.sequence_type == "1"
|
||||
|
||||
async def test_bigsdb_index_get_schemas_for_bordetella():
|
||||
async with BIGSdbIndex() as index:
|
||||
schemas = await index.get_schemas_for_seqdefdb(seqdef_db_name="pubmlst_bordetella_seqdef")
|
||||
assert len(schemas.keys()) > 0
|
||||
assert "MLST" in schemas
|
||||
assert isinstance(schemas["MLST"], int)
|
||||
|
||||
async def test_bigsdb_index_get_databases_has_only_seqdef():
|
||||
async with BIGSdbIndex() as index:
|
||||
databases = await index.get_known_seqdef_dbs()
|
||||
assert len(databases.keys()) > 0
|
||||
for database_name in databases.keys():
|
||||
assert database_name.endswith("seqdef")
|
||||
assert databases["pubmlst_bordetella_seqdef"] == "https://bigsdb.pasteur.fr/api"
|
@ -1,12 +0,0 @@
|
||||
from autobigs.engine.reading import read_fasta
|
||||
|
||||
|
||||
async def test_fasta_reader_not_none():
|
||||
named_strings = await read_fasta("tests/resources/tohama_I_bpertussis.fasta")
|
||||
for named_string in named_strings:
|
||||
assert named_string.name is not None
|
||||
|
||||
async def test_fasta_reader_name_contains_file_and_id():
|
||||
named_strings = await read_fasta("tests/resources/tohama_I_bpertussis.fasta")
|
||||
for named_string in named_strings:
|
||||
assert named_string.name == "tohama_I_bpertussis.fasta:BX470248.1"
|
@ -1,70 +0,0 @@
|
||||
from typing import AsyncIterable, Iterable
|
||||
|
||||
import pytest
|
||||
from autobigs.engine.structures.alignment import AlignmentStats
|
||||
from autobigs.engine.writing import alleles_to_text_map, write_mlst_profiles_as_csv
|
||||
from autobigs.engine.structures.mlst import Allele, MLSTProfile, NamedMLSTProfile
|
||||
import tempfile
|
||||
from csv import reader
|
||||
from os import path
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dummy_alphabet_mlst_profile():
|
||||
return NamedMLSTProfile("name", MLSTProfile((
|
||||
Allele("A", "1", None),
|
||||
Allele("D", "1", None),
|
||||
Allele("B", "1", None),
|
||||
Allele("C", "1", None),
|
||||
Allele("C", "2", AlignmentStats(90, 10, 0, 90))
|
||||
), "mysterious", "very mysterious"))
|
||||
|
||||
async def iterable_to_asynciterable(iterable: Iterable):
|
||||
for iterated in iterable:
|
||||
yield iterated
|
||||
|
||||
async def test_column_order_is_same_as_expected_file(dummy_alphabet_mlst_profile: MLSTProfile):
|
||||
dummy_profiles = [dummy_alphabet_mlst_profile]
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
output_path = path.join(temp_dir, "out.csv")
|
||||
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path, ["A", "D", "B", "C"])
|
||||
with open(output_path) as csv_handle:
|
||||
csv_reader = reader(csv_handle)
|
||||
lines = list(csv_reader)
|
||||
target_columns = lines[0][3:]
|
||||
assert target_columns == sorted(target_columns)
|
||||
|
||||
async def test_csv_writing_sample_name_not_repeated_when_single_sequence(dummy_alphabet_mlst_profile):
|
||||
dummy_profiles = [dummy_alphabet_mlst_profile]
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
output_path = path.join(temp_dir, "out.csv")
|
||||
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path, ["A", "D", "B", "C"])
|
||||
with open(output_path) as csv_handle:
|
||||
csv_reader = reader(csv_handle)
|
||||
lines = list(csv_reader)
|
||||
sample_name = lines[1][0]
|
||||
assert sample_name == "name"
|
||||
|
||||
|
||||
async def test_alleles_to_text_map_mapping_is_correct(dummy_alphabet_mlst_profile: NamedMLSTProfile):
|
||||
assert dummy_alphabet_mlst_profile.mlst_profile is not None
|
||||
mapping = alleles_to_text_map(dummy_alphabet_mlst_profile.mlst_profile.alleles)
|
||||
expected_mapping = {
|
||||
"A": "1",
|
||||
"B": "1",
|
||||
"C": ("1", "2*"),
|
||||
"D": "1"
|
||||
}
|
||||
for allele_name, allele_ids in mapping.items():
|
||||
assert allele_name in expected_mapping
|
||||
assert allele_ids == expected_mapping[allele_name]
|
||||
|
||||
async def test_csv_writing_includes_asterisk_for_non_exact(dummy_alphabet_mlst_profile: NamedMLSTProfile):
|
||||
dummy_profiles = [dummy_alphabet_mlst_profile]
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
output_path = path.join(temp_dir, "out.csv")
|
||||
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path, ["A", "D", "B", "C"])
|
||||
with open(output_path) as csv_handle:
|
||||
csv_reader = reader(csv_handle)
|
||||
lines = list(csv_reader)
|
||||
assert '*' in lines[1][5]
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,133 +0,0 @@
|
||||
>lcl|CP011447.1_gene_2762 [gene=adk] [locus_tag=B3913_2762] [location=2916440..2917096] [gbkey=Gene]
|
||||
ATGCGTCTCATTCTGCTCGGACCGCCCGGAGCCGGCAAAGGCACCCAAGCCGCCTTTCTCACCCAACACT
|
||||
ACGGCATCCCGCAGATATCCACCGGTGACATGCTGCGCGCCGCCGTCAAGGCCGGCACGCCGCTGGGCCT
|
||||
GGAAGCCAAGAAGGTCATGGACGCGGGCGGCCTGGTCTCGGACGACCTGATCATCGGCCTGGTGCGCGAT
|
||||
CGCCTGACCCAGCCCGATTGCGCCAACGGCTACCTGTTCGACGGTTTCCCGCGCACCATCCCGCAGGCCG
|
||||
ACGCGCTCAAGAGCGCCGGCATCGCGCTGGATTACGTGGTCGAGATCGAAGTGCCGGAAAGCGACATCAT
|
||||
CGAACGCATGAGCGAACGCCGCGTGCACCCGGCCAGCGGCCGCAGCTACCACGTACGCTTCAATCCGCCC
|
||||
AAGGCCGAAGGCGTGGACGACGTCACGGGCGAACCGCTGGTGCAGCGCGACGACGACCGCGAGGAAACCG
|
||||
TGCGCCATCGTCTCAACGTCTACCAGAACCAGACCCGCCCGCTGGTCGACTACTACTCGTCCTGGGCCCA
|
||||
GTCCGATGCCGCCGCGGCGCCCAAGTACCGCAAGATCTCCGGCGTCGGCTCGGTCGACGAAATCAAGAGC
|
||||
CGCCTGTCGCAGGCTCTGCAGAGCTAA
|
||||
>lcl|CP011447.1_gene_253 [gene=fumC] [locus_tag=B3913_0253] [location=257438..258829] [gbkey=Gene]
|
||||
ATGAAAACCCGCACCGAAAAAGACACTTTCGGCCCGATCGAGGTGCCCGAGCAGCACCTGTGGGGCGCGC
|
||||
AGACCCAGCGCTCGCTGCATTTCTTCGCGATCTCGACCGAGAAGATGCCGGTGCCGCTGGTCGCCGCCAT
|
||||
GGCACGCCTGAAGCGCGCCGCCGCCAAGGTCAACGCCGAGCTGGGCGAGCTGGATCCGCAGGTCGCAGAC
|
||||
GCCATCATGCGGGCCGCCGATGAGGTGATCGCCGGCAAGTGGCCCGACGAGTTTCCGCTGTCGGTCTGGC
|
||||
AGACCGGCTCGGGCACGCAGAGCAACATGAACATGAACGAGGTGCTGGCCAACCGCGCCTCCGAGCTGCT
|
||||
GGGCGGCGAGCGCGGCGAAGGCCGCAAGGTGCACCCCAACGACCACGTGAACCGGGGCCAGTCGTCCAAC
|
||||
GATACCTTTCCGACCGCCATGCACGTGGCCGCCGCGGTCGAGGTCGAGCACCGCGTGCTGCCCGCCCTGA
|
||||
AGGCGTTGCGCGGCACGCTGGCCGCCAAGAGCGCGGCGTTCTACGACATCGTCAAGATCGGTCGCACCCA
|
||||
TTTGCAGGACGCCACCCCGTTGACGCTGGGCCAGGAGATCTCCGGCTACGTGGCGCAGCTGGACCTGGCC
|
||||
GAGCAGCAGATCCGCGCGACGCTGGCCGGCCTGCACCAGCTGGCCATCGGCGGCACGGCGGTGGGCACCG
|
||||
GCCTGAACGCGCATCCGCAGTTCAGCGCCAAGGTATCGGCCGAACTGGCCCATGACACGGGCAGCGCGTT
|
||||
CGTGTCGGCGCCCAACAAGTTCCAGGCGCTGGCTTCGCACGAGGCGCTGCTGTTCGCGCACGGCGCCTTG
|
||||
AAGACGCTGGCCGCCGGCCTGATGAAGATCGCCAACGATGTGCGCTGGCTGGCCAGCGGCCCGCGCTCGG
|
||||
GGCTGGGCGAAATCAGCATTCCCGAGAACGAGCCGGGCAGCTCCATCATGCCGGGCAAGGTCAACCCGAC
|
||||
CCAGTGCGAAGCCGTCACGATGCTGGCCGCGCAGGTCATGGGCAACGACGTGGCCATCAATGTCGGCGGG
|
||||
GCCAGCGGCAACTTCGAGCTGAACGTCTTCAAGCCGCTGGTGATCCACAATTTCCTGCAGTCGGTGCGCC
|
||||
TGCTGGCCGACGGCATGGTCAGCTTCGACAAGCACTGCGCGGCCGGCATCGAGCCCAACCGCGAGCGCAT
|
||||
CACCGAGCTGGTCGAGCGTTCGCTGATGCTGGTGACTGCGCTCAACCCGCACATCGGCTACGACAAGGCC
|
||||
GCGCAGATCGCCAAGAAGGCGCACAAGGAAAACCTGTCGCTGAAAGAGGCGGCGCTGGCGCTGGGGCACC
|
||||
TGACCGAGGCGCAGTTCGCCGAGTGGGTGGTGCCGGGCGACATGACCAACGCGCGCCGCTAG
|
||||
>lcl|CP011447.1_gene_2963 [gene=glyA] [locus_tag=B3913_2963] [location=complement(3129365..3130612)] [gbkey=Gene]
|
||||
ATGTTCAACCGCAACCTGACCCTCGACCAGGTGGATCCCGACGTCTGGGCCGCCATCCAGAAAGAAGACG
|
||||
TACGCCAGGAACAGCACATCGAGCTGATCGCGTCCGAGAACTACGCCAGCCCCGCCGTGATGCAGGCCCA
|
||||
GGGCACGCAACTGACCAACAAGTATGCGGAAGGCTACCCGGGCAAGCGCTACTACGGCGGTTGCGAGTAC
|
||||
GTCGACGTGGTCGAGCAGCTGGCCATCGACCGCCTGAAGCAGATTTTCGGCGCCGAGGCCGCCAACGTGC
|
||||
AGCCGAACTCCGGCTCGCAGGCCAACCAGGGCGTGTACATGGCGGTGCTCAAGCCGGGCGATACCGTGCT
|
||||
GGGCATGAGCCTGGCCGAAGGCGGTCACCTGACGCACGGCGCGTCGGTCAACGCCTCGGGCAAGCTGTAC
|
||||
AACTTCGTGCCCTACGGCCTGGACGCCGACGAGGTGCTGGACTACGCCCAGGTCGAGCGGCTGACCAAGG
|
||||
AACACAAGCCCAAGCTGATCGTGGCCGGCGCCTCCGCGTACGCGCTGCACATCGACTTCGAGCGCATGGC
|
||||
GCGCATCGCCCACGACAACGGCGCGCTGTTCATGGTGGACATCGCCCACTATGCCGGCCTGGTGGCCGGC
|
||||
GGCGCCTATCCCAACCCGGTGCCGCACGCCGATTTCGTCACCTCCACCACGCACAAGTCGCTGCGCGGCC
|
||||
CGCGCGGCGGCGTCATCATGATGAAGGCCGAGTTCGAGAAGGCCGTCAATTCGGCCATCTTCCCGGGCAT
|
||||
CCAGGGCGGTCCGCTGATGCACGTCATCGCGGCCAAGGCCGTGGCCTTCAAGGAAGCGCTGTCGCCCGAG
|
||||
TTCCAGGATTACGCCCAGCAGGTCGTCAAGAACGCCAAGGTGCTGGCCGATACGCTGGTCAAGCGCGGCC
|
||||
TGCGCATCGTGTCGGGCAGGACCGAAAGCCACGTCATGCTGGTGGACCTGCGTCCCAAGGGCATTACCGG
|
||||
CAAGGAAGCGGAAGCGGTGCTGGGCCAGGCCCACATCACGGTCAACAAGAACGCCATTCCCAACGACCCG
|
||||
GAAAAGCCCTTCGTGACCAGCGGCATCCGCCTGGGCACTCCGGCCATGACCACCCGCGGCTTCAAGGAGG
|
||||
CCGAGGCCGAGCTGACCGCCAACCTGATCGCCGACGTGCTGGACAATCCGCGCGACGAGGCGAACATCGC
|
||||
CGCGGTGCGCGCGCGGGTCAATGAACTGACCGCCCGCCTGCCCGTCTACGGCAACTGA
|
||||
>lcl|CP011447.1_gene_2473 [gene=icd] [locus_tag=B3913_2473] [location=complement(2605674..2606930)] [gbkey=Gene]
|
||||
ATGTCCTATCAACATATCAAGGTTCCCACTGGGGGCCAAAAAATCACGGTCAACGCCGATTACTCGCTGA
|
||||
ATGTGCCCGATCAGGTCATCATTCCGGTCATCGAGGGTGACGGTACGGGCGCCGACATCACGCCGGTGAT
|
||||
GATTAAGGTCGTCGACGCGGCCGTGCAGAAGGCCTATGCGGGCAAGCGCAAGATCCACTGGATGGAAGTC
|
||||
TACGCCGGCGAGAAGGCCACCAAGGTCTACGGCCCGGACGTCTGGCTGCCCGAGGAAACCCTCGACGCCG
|
||||
TCAAGGACTACGTGGTGTCGATCAAGGGTCCGCTGACCACGCCGGTCGGCGGCGGCATCCGTTCGCTGAA
|
||||
CGTGGCGCTGCGCCAGCAGCTGGACCTGTATGTCTGCCTGCGCCCGGTGCGCTACTTCAAGGGCGTGCCC
|
||||
TCGCCGGTGCGCGAGCCCGAGAAGACCGACATGGTCATCTTCCGCGAGAACTCGGAAGACATCTACGCGG
|
||||
GCATCGAGTACATGGCCGAGTCCGAGCAGGCCAAGGACCTGATCCAGTACCTGCAGACCAAGCTGGGCGT
|
||||
GACCAAGATCCGCTTCCCGAACACCTCGTCGATCGGCATCAAGCCGGTTTCGCGCGAAGGCACCGAGCGC
|
||||
CTGGTGCGCAAGGCGCTGCAGTACGCCATCGACAATGACCGCGCCTCGGTGACCCTGGTCCACAAGGGCA
|
||||
ACATCATGAAGTTCACGGAAGGCGGCTTCCGCGACTGGGGCTACGCCCTGGCCCAGAACGAGTTCGGCGC
|
||||
GCAGCCGATCGACGGCGGCCCGTGGTGCAAGTTCAAGAATCCCAAGACGGGTCGCGAGATCATCGTCAAG
|
||||
GATTCGATCGCCGACGCCTTCCTGCAGCAGATCCTGCTGCGTCCGGCCGAATACGACGTGATCGCCACGC
|
||||
TGAACCTGAACGGCGACTACATCTCCGACGCGCTGGCCGCGCAAGTGGGCGGCATCGGCATTGCCCCGGG
|
||||
CGCCAACCTGTCGGATTCCGTGGCCATGTTCGAAGCCACCCACGGCACCGCGCCGAAGTACGCGGGCAAG
|
||||
GACTACGTGAACCCCGGTTCCGAAATCCTGTCGGCCGAAATGATGCTGCGCCACATGGGCTGGACCGAGG
|
||||
CCGCCGACCTGATCATCGCCAGCATGGAGAAATCCATCCTGTCCAAGAAGGTCACCTATGACTTCGCCCG
|
||||
TCTGCTCGAAGGCGCCACCCAGGTGTCGTGCTCGGGCTTCGGTCAGGTCATGATCGACAATATGTAA
|
||||
>lcl|CP011447.1_gene_2403 [gene=pepA] [locus_tag=B3913_2403] [location=2531836..2533335] [gbkey=Gene]
|
||||
ATGGAATTTAGCACACAGACCACTGCCTCCCTGCATCAGATCAAGACTGCGGCCCTGGCCGTCGGCGTCT
|
||||
TCGCCGACGGCGTGCTCAGCGCCGCCGCCGAAGTCATCGACCGCGCCAGCCACGGTGCCGTGGCCGCCGT
|
||||
GGTGAAAAGCGAGTTCCGCGGCCGCACCGGCAGCACGCTGGTGCTGCGCAGCCTGGCCGGCGTCAGCGCC
|
||||
CAGCGCGTGGTGCTGGTGGGCCTGGGCAAGCAGGCCGAATACAACGCCCGCGCGCACGCCAGCGCCGAAC
|
||||
AGGCGTTCGCCGCGGCGTGCGTCGCGGCCCAGGTGGGCGAAGGCGTGTCGACCCTGGCCGGCGTGGCCAT
|
||||
CGAGGGCGTGCCGGTGCGCGCCCGCGCGCGCAGCGCCGCCATCGCCGCGGGCGCGGCGGCCTACCATTAC
|
||||
GATGCGACGTTCGGCAAGGCCAATCGCGACGCCCGCCCCAGGTTGAAGAAAATCGTCCAGGTGGTCGACC
|
||||
GCGCGGCCTCCGCGCAGGCGCAGCTGGGCCTGCGCGAAGGCGCGGCCATCGCCCACGGCATGGAATTGAC
|
||||
CCGCACGCTGGGCAACCTGCCCGGCAACGTGTGCACGCCGGCCTATCTCGGCAATACCGCCAAGAAACTG
|
||||
GCGCGCGAATTCAAGAGCCTCAAGGTCGAGGTGCTCGAACGCAAGCAGGTCGAGGCGCTGGGCATGGGCT
|
||||
CGTTCCTCTCGGTCGCGCGCGGCTCGGAAGAACCGCTGCGCTTCATCGTGCTGCGCCATGCCGGCAAGCC
|
||||
CGCCAAGAAGGACAAGGCCGGCCCGGTCGTCCTGGTGGGCAAGGGCATCACCTTCGATGCTGGCGGCATC
|
||||
TCGCTCAAGCCGGCCGCCACGATGGACGAAATGAAGTACGACATGTGCGGCGCGGCCAGCGTGCTGGGCA
|
||||
CGTTCCGCGCCCTGGCCGAGCTGGAGCTGCCGCTGGATGTGGTGGGCCTGATCGCGGCGTGCGAGAACCT
|
||||
GCCCAGCGGCAAGGCCAACAAGCCCGGCGACGTGGTCACCAGCATGTCGGGCCAGACCATCGAGATCCTC
|
||||
AACACCGACGCCGAAGGCCGCCTGGTGCTGTGCGATGCCCTGACCTACGCCGAGCGCTTCAAGCCCGCGG
|
||||
CCGTGATCGACATCGCCACGTTGACCGGCGCCTGCGTGGTAGCCCTGGGCAACGTCAATAGCGGCCTGTT
|
||||
CTCCAAGGACGACGCGCTGGCCGACGCGCTGCTGGCCGCCAGCCGCCAGTCGCTCGACCCGGCCTGGCGC
|
||||
CTGCCGCTGGACGATGCCTACCAGGACCAGCTCAAGTCCAACTTCGCCGACATCGCCAACATCGGCGGCC
|
||||
CCCCGGCCGGCGCGGTCACGGCGGCCTGCTTCCTGTCGCGCTTCACCAAGGCTTATCCGTGGGCGCACCT
|
||||
GGACATCGCCGGCACGGCCTGGCGCGGCGGCAAGGACAAGGGCGCCACCGGCCGGCCGGTGCCGCTGCTG
|
||||
ATGCAGTACCTGCTGGACCAGGCAGGCTGA
|
||||
>lcl|CP011447.1_gene_3165 [gene=pgm] [locus_tag=B3913_3165] [location=3355021..3356403] [gbkey=Gene]
|
||||
GTGGCGCACCCCTTTCCCGCATCGGTCTACAAGGCGTACGACATCCGTGGCTCGGTTCCCGACCAGCTCG
|
||||
ACCCGGTATTCGCCCGGGCGCTGGGCCGCGCCCTGGCCGCCAGCGCCCGCGCGCAGGGCATCGGCGCCCT
|
||||
GGTGGTCGGCCGCGACGGCCGCCTGAGCAGCCCCGACCTGGCCGGCGCGCTGCAGGAAGGCATCATGGAA
|
||||
GGCGGCGTGGACACCCTGGACATCGGCCAGGTGCCCACGCCGCTGGTCTATTTCGCGGCGCACATCCAGG
|
||||
GCACGGGCTCGGGCGTGGCGGTCACCGGCAGCCACAACCCGCCGCAGTACAACGGCTTCAAGATGATGAT
|
||||
GGGCGGCCAGGCCCTGTACGGCCCGGCCGTGCAGGCGCTGCGCCCGGCCATGCTGGCGCCGGCTGCGGCG
|
||||
CCGGGCACCTGGGGCGAACGCCGCCAGCTCGATGTCGTCCCCGCCTATATCGAGCGCATCGTGTCCGACG
|
||||
TGAAGCTGGCGCGCCCCATGAAGATCGCCGTCGACTGCGGCAATGGCGTGGCCGGCGCCCTGGCGCCGCA
|
||||
ACTGTTCCGCGCGCTGGGTTGCGAAGTGGACGAGCTCTATTGCGAGGTCGACGGCACGTTTCCCAACCAC
|
||||
CATCCCGACCCGGCCGAACCGCGCAACCTGCAGGACCTGATCGCCCATGTCACCAGCACCGACTGCGAGC
|
||||
TGGGCCTGGCCTTCGACGGCGACGGCGACCGCCTCGGCGTGGTGACCAAGTCCGGCCAGATCATCTGGCC
|
||||
CGACCGCCAGCTGATCCTGTTCGCCCGCGACGTGCTGGCCCGCTGTCCCGGCGCGACCATCATCTATGAC
|
||||
GTCAAGTGCAGCCAGCACGTGGGCGTGGCCATCGAGCAAAGCGGCGGCGTGCCGCTGATGTGGCAGACTG
|
||||
GCCATTCGCTGGTGAAGGCCAAGCTGGCCGAGACCGGCGCGCCGCTGGCCGGCGAGATGAGCGGCCATAT
|
||||
CTTCTTCAAGGAGCGCTGGTACGGCTTCGACGACGGCCTGTACACCGGCGCCCGCCTGCTGGAAATCGTC
|
||||
TCCCGCGAAACCGATGCGTCGCGCCCGCTGGAGGCCCTGCCGCAGGCGCTGTCGACCCCCGAGCTCAAGC
|
||||
TGGAGATGGCCGAGGGCGAGCCGCATGCGCTGATCGCCGCCCTGCAGCAGCAGGGCGAGTTCGCCAGCGC
|
||||
CAGCCGGCTGGTTACGATAGACGGCGTGCGCGCGGAATACCCGGACGGCTTCGGGCTGGCGCGCGCCTCC
|
||||
AATACCACCCCCGTCGTCGTGCTGCGCTTCGAAGCGGAGACCGAGCCGGGCCTGGCCCGCATCCAGCAGG
|
||||
AATTCCGCCAGCAGCTGCTGCGGCTGGCTCCGCAAGCCAAACTGCCCTTCTGA
|
||||
>lcl|CP011447.1_gene_2110 [gene=tyrB] [locus_tag=B3913_2110] [location=2214524..2215726] [gbkey=Gene]
|
||||
ATGAGCACTCTTTTCGCTTCCGTCGAACTCGCGCCGCGCGACCCCATTCTTGGCCTGAACGAACAGTACA
|
||||
ACGCCGATACCCGTCCCGGCAAAGTGAACCTGGGCGTGGGCGTGTACTACGACGACGAAGGCCGCATCCC
|
||||
GCTGCTTCAGGCCGTGCGCAAGGCCGAGGTGGCCCGCATCGAAGCCGCCGCCGCCCGCGGCTATCTGCCG
|
||||
ATCGAAGGCATCGCGGGGTACAACAAGGGTGCGCAGGCGCTGCTGCTGGGCGCCGACTCGCCGCTGGCCG
|
||||
CCGAAGGCCGCGTGCTGACCGCGCAGGCCCTGGGCGGCACCGGCGCGCTGAAGATCGGCGCCGACTTCCT
|
||||
GCGCCAGCTGCTGCCGCAGTCCAAGGTCCTCATCAGCGACCCCAGCTGGGAAAACCACCGCGCCCTGTTC
|
||||
GAGCGCGCCGGCTTCCCGGTCGAGACCTACGCTTATTACGATGCCGCCACCCATGGCCTGAACTTCGAAG
|
||||
CCATGCTGGCCGCCCTGCAGGCCGCGCCCGAACAGACCATCGTGGTGCTGCACGCCTGCTGCCACAACCC
|
||||
GACCGGCGTCGATCCCACGCCGCAACAGTGGGAACAGATCGCCGCCGTGGTCAAGGCGCGCAACCTGGTG
|
||||
CCGTTCCTCGACATCGCCTACCAGGGCTTCGGCGAAGGCCTGGAGCAGGACGCCGCCGTGGTGCGCATGT
|
||||
TCGCCGAGCTCGACCTGACCATGTTCATCAGCTCGTCGTTCTCCAAGTCCTTCTCGCTGTATGGCGAGCG
|
||||
GGTCGGGGCCCTGACCGTGGTGGCCGGCAGCAAGGACGAGGCCGCCCGCGTGCTCAGCCAGCTCAAGCGC
|
||||
GTGATCCGCACCAACTACTCCAACCCGCCCACCCACGGCGGCACCGTGGTGTCCACGGTCCTGAACACAC
|
||||
CCGAGCTGTTCGCGCTCTGGGAAAATGAACTGGCCGGCATGCGCGACCGCATCCGCCTGATGCGCAAGGA
|
||||
GCTGGTCGAGAAGATCAAGACCCAGGGCGTGGCGCAGGACTTCAGCTTCGTGCTGGCGCAGCGCGGCATG
|
||||
TTCTCGTACTCGGGCCTGACCGCCGCCCAGGTCGATCGCCTGCGCGAAGAGCACGGCATCTACGCGGTCT
|
||||
CCAGCGGCCGCATCTGCGTGGCCGCGCTCAACAGCCGCAACATCGACGCGGTCGCGGCCGGCATCGCCGC
|
||||
GGTGCTGAAGTAG
|
@ -1,133 +0,0 @@
|
||||
>lcl|CP011448.1_cds_ALH77808.1_2459 [gene=adk] [locus_tag=B3921_2764] [protein=adenylate kinase] [protein_id=ALH77808.1] [location=2918521..2919177] [gbkey=CDS]
|
||||
ATGCGTCTCATTCTGCTCGGACCGCCCGGAGCCGGCAAAGGCACCCAAGCCGCCTTTCTCACCCAACACT
|
||||
ACGGCATCCCGCAGATATCCACCGGTGACATGCTGCGCGCCGCCGTCAAGGCCGGCACGCCGCTGGGCCT
|
||||
GGAAGCCAAGAAGGTCATGGACGCGGGCGGCCTGGTCTCGGACGACCTGATCATCGGCCTGGTGCGCGAT
|
||||
CGCCTGACCCAGCCCGATTGCGCCAACGGCTACCTGTTCGACGGTTTCCCGCGCACCATCCCGCAGGCCG
|
||||
ACGCGCTCAAGAGCGCCGGCATCGCGCTGGATTACGTGGTCGAGATCGAAGTGCCGGAAAGCGACATCAT
|
||||
CGAACGCATGAGCGAACGCCGCGTGCACCCGGCCAGCGGCCGCAGCTACCACGTACGCTTCAATCCGCCC
|
||||
AAGGCCGAAGGCGTGGACGACGTCACGGGCGAACCGCTGGTGCAGCGCGACGACGACCGCGAGGAAACCG
|
||||
TGCGCCATCGTCTCAACGTCTACCAGAACCAGACCCGCCCGCTGGTCGACTACTACTCGTCCTGGGCCCA
|
||||
GTCCGATGCCGCCGCGGCGCCCAAGTACCGCAAGATCTCCGGCGTCGGCTCGGTCGACGAAATCAAGAGC
|
||||
CGCCTGTCGCAGGCTCTGCAGAGCTAA
|
||||
>lcl|CP011448.1_cds_ALH75563.1_214 [gene=fumC] [locus_tag=B3921_0253] [protein=fumarate hydratase] [protein_id=ALH75563.1] [location=257428..258819] [gbkey=CDS]
|
||||
ATGAAAACCCGCACCGAAAAAGACACTTTCGGCCCGATCGAGGTGCCCGAGCAGCACCTGTGGGGCGCGC
|
||||
AGACCCAGCGCTCGCTGCATTTCTTCGCGATCTCGACCGAGAAGATGCCGGTGCCGCTGGTCGCCGCCAT
|
||||
GGCACGCCTGAAGCGCGCCGCCGCCAAGGTCAACGCCGAGCTGGGCGAGCTGGATCCGCAGGTCGCAGAC
|
||||
GCCATCATGCGGGCCGCCGATGAGGTGATCGCCGGCAAGTGGCCCGACGAGTTTCCGCTGTCGGTCTGGC
|
||||
AGACCGGCTCGGGCACGCAGAGCAACATGAACATGAACGAGGTGCTGGCCAACCGCGCCTCCGAGCTGCT
|
||||
GGGCGGCGAGCGCGGCGAAGGCCGCAAGGTGCACCCCAACGACCACGTGAACCGGGGCCAGTCGTCCAAC
|
||||
GATACCTTTCCGACCGCCATGCACGTGGCCGCCGCGGTCGAGGTCGAGCACCGCGTGCTGCCCGCCCTGA
|
||||
AGGCGTTGCGCGGCACGCTGGCCGCCAAGAGCGCGGCGTTCTACGACATCGTCAAGATCGGTCGCACCCA
|
||||
TTTGCAGGACGCCACCCCGTTGACGCTGGGCCAGGAGATCTCCGGCTACGTGGCGCAGCTGGACCTGGCC
|
||||
GAGCAGCAGATCCGCGCGACGCTGGCCGGCCTGCACCAGCTGGCCATCGGCGGCACGGCGGTGGGCACCG
|
||||
GCCTGAACGCGCATCCGCAGTTCAGCGCCAAGGTATCGGCCGAACTGGCCCATGACACGGGCAGCGCGTT
|
||||
CGTGTCGGCGCCCAACAAGTTCCAGGCGCTGGCTTCGCACGAGGCGCTGCTGTTCGCGCACGGCGCCTTG
|
||||
AAGACGCTGGCCGCCGGCCTGATGAAGATCGCCAACGATGTGCGCTGGCTGGCCAGCGGCCCGCGCTCGG
|
||||
GGCTGGGCGAAATCAGCATTCCCGAGAACGAGCCGGGCAGCTCCATCATGCCGGGCAAGGTCAACCCGAC
|
||||
CCAGTGCGAAGCCGTCACGATGCTGGCCGCGCAGGTCATGGGCAACGACGTGGCCATCAATGTCGGCGGG
|
||||
GCCAGCGGCAACTTCGAGCTGAACGTCTTCAAGCCGCTGGTGATCCACAATTTCCTGCAGTCGGTGCGCC
|
||||
TGCTGGCCGACGGCATGGTCAGCTTCGACAAGCACTGCGCGGCCGGCATCGAGCCCAACCGCGAGCGCAT
|
||||
CACCGAGCTGGTCGAGCGTTCGCTGATGCTGGTGACTGCGCTCAACCCGCACATCGGCTACGACAAGGCC
|
||||
GCGCAGATCGCCAAGAAGGCGCACAAGGAAAACCTGTCGCTGAAAGAGGCGGCGCTGGCGCTGGGGCACC
|
||||
TGACCGAGGCGCAGTTCGCCGAGTGGGTGGTGCCGGGCGACATGACCAACGCGCGCCGCTAG
|
||||
>lcl|CP011448.1_cds_ALH77981.1_2632 [gene=glyA] [locus_tag=B3921_2965] [protein=serine hydroxymethyltransferase] [protein_id=ALH77981.1] [location=complement(3131372..3132619)] [gbkey=CDS]
|
||||
ATGTTCAACCGCAACCTGACCCTCGACCAGGTGGATCCCGACGTCTGGGCCGCCATCCAGAAAGAAGACG
|
||||
TACGCCAGGAACAGCACATCGAGCTGATCGCGTCCGAGAACTACGCCAGCCCCGCCGTGATGCAGGCCCA
|
||||
GGGCACGCAACTGACCAACAAGTATGCGGAAGGCTACCCGGGCAAGCGCTACTACGGCGGTTGCGAGTAC
|
||||
GTCGACGTGGTCGAGCAGCTGGCCATCGACCGCCTGAAGCAGATTTTCGGCGCCGAGGCCGCCAACGTGC
|
||||
AGCCGAACTCCGGCTCGCAGGCCAACCAGGGCGTGTACATGGCGGTGCTCAAGCCGGGCGATACCGTGCT
|
||||
GGGCATGAGCCTGGCCGAAGGCGGTCACCTGACGCACGGCGCGTCGGTCAACGCCTCGGGCAAGCTGTAC
|
||||
AACTTCGTGCCCTACGGCCTGGACGCCGACGAGGTGCTGGACTACGCCCAGGTCGAGCGGCTGACCAAGG
|
||||
AACACAAGCCCAAGCTGATCGTGGCCGGCGCCTCCGCGTACGCGCTGCACATCGACTTCGAGCGCATGGC
|
||||
GCGCATCGCCCACGACAACGGCGCGCTGTTCATGGTGGACATCGCCCACTATGCCGGCCTGGTGGCCGGC
|
||||
GGCGCCTATCCCAACCCGGTGCCGCACGCCGATTTCGTCACCTCCACCACGCACAAGTCGCTGCGCGGCC
|
||||
CGCGCGGCGGCGTCATCATGATGAAGGCCGAGTTCGAGAAGGCCGTCAATTCGGCCATCTTCCCGGGCAT
|
||||
CCAGGGCGGTCCGCTGATGCACGTCATCGCGGCCAAGGCCGTGGCCTTCAAGGAAGCGCTGTCGCCCGAG
|
||||
TTCCAGGATTACGCCCAGCAGGTCGTCAAGAACGCCAAGGTGCTGGCCGATACGCTGGTCAAGCGCGGCC
|
||||
TGCGCATCGTGTCGGGCAGGACCGAAAGCCACGTCATGCTGGTGGACCTGCGTCCCAAGGGCATTACCGG
|
||||
CAAGGAAGCGGAAGCGGTGCTGGGCCAGGCCCACATCACGGTCAACAAGAACGCCATTCCCAACGACCCG
|
||||
GAAAAGCCCTTCGTGACCAGCGGCATCCGCCTGGGCACTCCGGCCATGACCACCCGCGGCTTCAAGGAGG
|
||||
CCGAGGCCGAGCTGACCGCCAACCTGATCGCCGACGTGCTGGACAATCCGCGCGACGAGGCGAACATCGC
|
||||
CGCGGTGCGCGCGCGGGTCAATGAACTGACCGCCCGCCTGCCCGTCTACGGCAACTGA
|
||||
>lcl|CP011448.1_cds_ALH77547.1_2198 [gene=icd] [locus_tag=B3921_2474] [protein=isocitrate dehydrogenase] [protein_id=ALH77547.1] [location=complement(2606706..2607962)] [gbkey=CDS]
|
||||
ATGTCCTATCAACATATCAAGGTTCCCACTGGGGGCCAAAAAATCACGGTCAACGCCGATTACTCGCTGA
|
||||
ATGTGCCCGATCAGGTCATCATTCCGGTCATCGAGGGTGACGGTACGGGCGCCGACATCACGCCGGTGAT
|
||||
GATTAAGGTCGTCGACGCGGCCGTGCAGAAGGCCTATGCGGGCAAGCGCAAGATCCACTGGATGGAAGTC
|
||||
TACGCCGGCGAGAAGGCCACCAAGGTCTACGGCCCGGACGTCTGGCTGCCCGAGGAAACCCTCGACGCCG
|
||||
TCAAGGACTACGTGGTGTCGATCAAGGGTCCGCTGACCACGCCGGTCGGCGGCGGCATCCGTTCGCTGAA
|
||||
CGTGGCGCTGCGCCAGCAGCTGGACCTGTATGTCTGCCTGCGCCCGGTGCGCTACTTCAAGGGCGTGCCC
|
||||
TCGCCGGTGCGCGAGCCCGAGAAGACCGACATGGTCATCTTCCGCGAGAACTCGGAAGACATCTACGCGG
|
||||
GCATCGAGTACATGGCCGAGTCCGAGCAGGCCAAGGACCTGATCCAGTACCTGCAGACCAAGCTGGGCGT
|
||||
GACCAAGATCCGCTTCCCGAACACCTCGTCGATCGGCATCAAGCCGGTTTCGCGCGAAGGCACCGAGCGC
|
||||
CTGGTGCGCAAGGCGCTGCAGTACGCCATCGACAATGACCGCGCCTCGGTGACCCTGGTCCACAAGGGCA
|
||||
ACATCATGAAGTTCACGGAAGGCGGCTTCCGCGACTGGGGCTACGCCCTGGCCCAGAACGAGTTCGGCGC
|
||||
GCAGCCGATCGACGGCGGCCCGTGGTGCAAGTTCAAGAATCCCAAGACGGGTCGCGAGATCATCGTCAAG
|
||||
GATTCGATCGCCGACGCCTTCCTGCAGCAGATCCTGCTGCGTCCGGCCGAATACGACGTGATCGCCACGC
|
||||
TGAACCTGAACGGCGACTACATCTCCGACGCGCTGGCCGCGCAAGTGGGCGGCATCGGCATTGCCCCGGG
|
||||
CGCCAACCTGTCGGATTCCGTGGCCATGTTCGAAGCCACCCACGGCACCGCGCCGAAGTACGCGGGCAAG
|
||||
GACTACGTGAACCCCGGTTCCGAAATCCTGTCGGCCGAAATGATGCTGCGCCACATGGGCTGGACCGAGG
|
||||
CCGCCGACCTGATCATCGCCAGCATGGAGAAATCCATCCTGTCCAAGAAGGTCACCTATGACTTCGCCCG
|
||||
TCTGCTCGAAGGCGCCACCCAGGTGTCGTGCTCGGGCTTCGGTCAGGTCATGATCGACAATATGTAA
|
||||
>lcl|CP011448.1_cds_ALH77480.1_2131 [gene=pepA] [locus_tag=B3921_2404] [protein=leucyl aminopeptidase] [protein_id=ALH77480.1] [location=2532868..2534367] [gbkey=CDS]
|
||||
ATGGAATTTAGCACACAGACCACTGCCTCCCTGCATCAGATCAAGACTGCGGCCCTGGCCGTCGGCGTCT
|
||||
TCGCCGACGGCGTGCTCAGCGCCGCCGCCGAAGTCATCGACCGCGCCAGCCACGGTGCCGTGGCCGCCGT
|
||||
GGTGAAAAGCGAGTTCCGCGGCCGCACCGGCAGCACGCTGGTGCTGCGCAGCCTGGCCGGCGTCAGCGCC
|
||||
CAGCGCGTGGTGCTGGTGGGCCTGGGCAAGCAGGCCGAATACAACGCCCGCGCGCACGCCAGCGCCGAAC
|
||||
AGGCGTTCGCCGCGGCGTGCGTCGCGGCCCAGGTGGGCGAAGGCGTGTCGACCCTGGCCGGCGTGGCCAT
|
||||
CGAGGGCGTGCCGGTGCGCGCCCGCGCGCGCAGCGCCGCCATCGCCGCGGGCGCGGCGGCCTACCATTAC
|
||||
GATGCGACGTTCGGCAAGGCCAATCGCGACGCCCGCCCCAGGTTGAAGAAAATCGTCCAGGTGGTCGACC
|
||||
GCGCGGCCTCCGCGCAGGCGCAGCTGGGCCTGCGCGAAGGCGCGGCCATCGCCCACGGCATGGAATTGAC
|
||||
CCGCACGCTGGGCAACCTGCCCGGCAACGTGTGCACGCCGGCCTATCTCGGCAATACCGCCAAGAAACTG
|
||||
GCGCGCGAATTCAAGAGCCTCAAGGTCGAGGTGCTCGAACGCAAGCAGGTCGAGGCGCTGGGCATGGGCT
|
||||
CGTTCCTCTCGGTCGCGCGCGGCTCGGAAGAACCGCTGCGCTTCATCGTGCTGCGCCATGCCGGCAAGCC
|
||||
CGCCAAGAAGGACAAGGCCGGCCCGGTCGTCCTGGTGGGCAAGGGCATCACCTTCGATGCTGGCGGCATC
|
||||
TCGCTCAAGCCGGCCGCCACGATGGACGAAATGAAGTACGACATGTGCGGCGCGGCCAGCGTGCTGGGCA
|
||||
CGTTCCGCGCCCTGGCCGAGCTGGAGCTGCCGCTGGATGTGGTGGGCCTGATCGCGGCGTGCGAGAACCT
|
||||
GCCCAGCGGCAAGGCCAACAAGCCCGGCGACGTGGTCACCAGCATGTCGGGCCAGACCATCGAGATCCTC
|
||||
AACACCGACGCCGAAGGCCGCCTGGTGCTGTGCGATGCCCTGACCTACGCCGAGCGCTTCAAGCCCGCGG
|
||||
CCGTGATCGACATCGCCACGTTGACCGGCGCCTGCGTGGTAGCCCTGGGCAACGTCAATAGCGGCCTGTT
|
||||
CTCCAAGGACGACGCGCTGGCCGACGCGCTGCTGGCCGCCAGCCGCCAGTCGCTCGACCCGGCCTGGCGC
|
||||
CTGCCGCTGGACGATGCCTACCAGGACCAGCTCAAGTCCAACTTCGCCGACATCGCCAACATCGGCGGCC
|
||||
CCCCGGCCGGCGCGGTCACGGCGGCCTGCTTCCTGTCGCGCTTCACCAAGGCTTATCCGTGGGCGCACCT
|
||||
GGACATCGCCGGCACGGCCTGGCGCGGCGGCAAGGACAAGGGCGCCACCGGCCGGCCGGTGCCGCTGCTG
|
||||
ATGCAGTACCTGCTGGACCAGGCAGGCTGA
|
||||
>lcl|CP011448.1_cds_ALH78163.1_2814 [gene=pgm] [locus_tag=B3921_3166] [protein=phosphoglucomutase] [protein_id=ALH78163.1] [location=3355979..3357361] [gbkey=CDS]
|
||||
GTGGCGCACCCCTTTCCCGCATCGGTCTACAAGGCGTACGACATCCGTGGCTCGGTTCCCGACCAGCTCG
|
||||
ACCCGGTATTCGCCCGGGCGCTGGGCCGCGCCCTGGCCGCCAGCGCCCGCGCGCAGGGCATCGGCGCCCT
|
||||
GGTGGTCGGCCGCGACGGCCGCCTGAGCAGCCCCGACCTGGCCGGCGCGCTGCAGGAAGGCATCATGGAA
|
||||
GGCGGCGTGGACACCCTGGACATCGGCCAGGTGCCCACGCCGCTGGTCTATTTCGCGGCGCACATCCAGG
|
||||
GCACGGGCTCGGGCGTGGCGGTCACCGGCAGCCACAACCCGCCGCAGTACAACGGCTTCAAGATGATGAT
|
||||
GGGCGGCCAGGCCCTGTACGGCCCGGCCGTGCAGGCGCTGCGCCCGGCCATGCTGGCGCCGGCTGCGGCG
|
||||
CCGGGCACCTGGGGCGAACGCCGCCAGCTCGATGTCGTCCCCGCCTATATCGAGCGCATCGTGTCCGACG
|
||||
TGAAGCTGGCGCGCCCCATGAAGATCGCCGTCGACTGCGGCAATGGCGTGGCCGGCGCCCTGGCGCCGCA
|
||||
ACTGTTCCGCGCGCTGGGTTGCGAAGTGGACGAGCTCTATTGCGAGGTCGACGGCACGTTTCCCAACCAC
|
||||
CATCCCGACCCGGCCGAACCGCGCAACCTGCAGGACCTGATCGCCCATGTCACCAGCACCGACTGCGAGC
|
||||
TGGGCCTGGCCTTCGACGGCGACGGCGACCGCCTCGGCGTGGTGACCAAGTCCGGCCAGATCATCTGGCC
|
||||
CGACCGCCAGCTGATCCTGTTCGCCCGCGACGTGCTGGCCCGCTGTCCCGGCGCGACCATCATCTATGAC
|
||||
GTCAAGTGCAGCCAGCACGTGGGCGTGGCCATCGAGCAAAGCGGCGGCGTGCCGCTGATGTGGCAGACTG
|
||||
GCCATTCGCTGGTGAAGGCCAAGCTGGCCGAGACCGGCGCGCCGCTGGCCGGCGAGATGAGCGGCCATAT
|
||||
CTTCTTCAAGGAGCGCTGGTACGGCTTCGACGACGGCCTGTACACCGGCGCCCGCCTGCTGGAAATCGTC
|
||||
TCCCGCGAAACCGATGCGTCGCGCCCGCTGGAGGCCCTGCCGCAGGCGCTGTCGACCCCCGAGCTCAAGC
|
||||
TGGAGATGGCCGAGGGCGAGCCGCATGCGCTGATCGCCGCCCTGCAGCAGCAGGGCGAGTTCGCCAGCGC
|
||||
CAGCCGGCTGGTTACGATAGACGGCGTGCGCGCGGAATACCCGGACGGCTTCGGGCTGGCGCGCGCCTCC
|
||||
AATACCACCCCCGTCGTCGTGCTGCGCTTCGAAGCGGAGACCGAGCCGGGCCTGGCCCGCATCCAGCAGG
|
||||
AATTCCGCCAGCAGCTGCTGCGGCTGGCTCCGCAAGCCAAACTGCCCTTCTGA
|
||||
>lcl|CP011448.1_cds_ALH77215.1_1866 [gene=tyrB] [locus_tag=B3921_2112] [protein=aromatic amino acid aminotransferase] [protein_id=ALH77215.1] [location=2216606..2217808] [gbkey=CDS]
|
||||
ATGAGCACTCTTTTCGCTTCCGTCGAACTCGCGCCGCGCGACCCCATTCTTGGCCTGAACGAACAGTACA
|
||||
ACGCCGATACCCGTCCCGGCAAAGTGAACCTGGGCGTGGGCGTGTACTACGACGACGAAGGCCGCATCCC
|
||||
GCTGCTTCAGGCCGTGCGCAAGGCCGAGGTGGCCCGCATCGAAGCCGCCGCCGCCCGCGGCTATCTGCCG
|
||||
ATCGAAGGCATCGCGGGGTACAACAAGGGTGCGCAGGCGCTGCTGCTGGGCGCCGACTCGCCGCTGGCCG
|
||||
CCGAAGGCCGCGTGCTGACCGCGCAGGCCCTGGGCGGCACCGGCGCGCTGAAGATCGGCGCCGACTTCCT
|
||||
GCGCCAGCTGCTGCCGCAGTCCAAGGTCCTCATCAGCGACCCCAGCTGGGAAAACCACCGCGCCCTGTTC
|
||||
GAGCGCGCCGGCTTCCCGGTCGAGACCTACGCTTATTACGATGCCGCCACCCATGGCCTGAACTTCGAAG
|
||||
CCATGCTGGCCGCCCTGCAGGCCGCGCCCGAACAGACCATCGTGGTGCTGCACGCCTGCTGCCACAACCC
|
||||
GACCGGCGTCGATCCCACGCCGCAACAGTGGGAACAGATCGCCGCCGTGGTCAAGGCGCGCAACCTGGTG
|
||||
CCGTTCCTCGACATCGCCTACCAGGGCTTCGGCGAAGGCCTGGAGCAGGACGCCGCCGTGGTGCGCATGT
|
||||
TCGCCGAGCTCGACCTGACCATGTTCATCAGCTCGTCGTTCTCCAAGTCCTTCTCGCTGTATGGCGAGCG
|
||||
GGTCGGGGCCCTGACCGTGGTGGCCGGCAGCAAGGACGAGGCCGCCCGCGTGCTCAGCCAGCTCAAGCGC
|
||||
GTGATCCGCACCAACTACTCCAACCCGCCCACCCACGGCGGCACCGTGGTGTCCACGGTCCTGAACACAC
|
||||
CCGAGCTGTTCGCGCTCTGGGAAAATGAACTGGCCGGCATGCGCGACCGCATCCGCCTGATGCGCAAGGA
|
||||
GCTGGTCGAGAAGATCAAGACCCAGGGCGTGGCGCAGGACTTCAGCTTCGTGCTGGCGCAGCGCGGCATG
|
||||
TTCTCGTACTCGGGCCTGACCGCCGCCCAGGTCGATCGCCTGCGCGAAGAGCACGGCATCTACGCGGTCT
|
||||
CCAGCGGCCGCATCTGCGTGGCCGCGCTCAACAGCCGCAACATCGACGCGGTCGCGGCCGGCATCGCCGC
|
||||
GGTGCTGAAGTAG
|
23635
tests/resources/FDAARGOS_1560.fasta
Normal file
23635
tests/resources/FDAARGOS_1560.fasta
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,11 +0,0 @@
|
||||
>lcl|BX640419.1_cds_CAE43044.1_2724 [gene=adK] [locus_tag=BP2769] [db_xref=GOA:P0DKX8,InterPro:IPR000850,InterPro:IPR006259,InterPro:IPR007862,InterPro:IPR027417] [protein=adenylate kinase] [protein_id=CAE43044.1] [location=164032..164688] [gbkey=CDS]
|
||||
ATGCGTCTCATTCTGCTCGGACCGCCCGGAGCCGGCAAAGGCACCCAAGCCGCCTTTCTCACCCAACACT
|
||||
ACGGCATCCCGCAGATATCCACCGGTGACATGCTGCGCGCCGCCGTCAAGGCCGGCACGCCGCTGGGCCT
|
||||
GGAAGCCAAGAAGGTCATGGACGCGGGCGGCCTGGTCTCGGACGACCTGATCATCGGCCTGGTGCGCGAT
|
||||
CGCCTGACCCAGCCCGATTGCGCCAACGGCTACCTGTTCGACGGTTTCCCGCGCACCATCCCGCAGGCCG
|
||||
ACGCGCTCAAGAGCGCCGGCATCGCGCTGGATTACGTGGTCGAGATCGAAGTGCCGGAAAGCGACATCAT
|
||||
CGAACGCATGAGCGAACGCCGCGTGCACCCGGCCAGCGGCCGCAGCTACCACGTACGCTTCAATCCGCCC
|
||||
AAGGCCGAAGGCGTGGACGACGTCACGGGCGAACCGCTGGTGCAGCGCGACGACGACCGCGAGGAAACCG
|
||||
TGCGCCATCGTCTCAACGTCTACCAGAACCAGACCCGCCCGCTGGTCGACTACTACTCGTCCTGGGCCCA
|
||||
GTCCGATGCCGCCGCGGCGCCCAAGTACCGCAAGATCTCCGGCGTCGGCTCGGTCGACGAAATCAAGAGC
|
||||
CGCCTGTCGCAGGCTCTGCAGAGCTAA
|
Loading…
x
Reference in New Issue
Block a user