Compare commits
27 Commits
62ce1c9b2f
...
0.14.1
Author | SHA1 | Date | |
---|---|---|---|
14d760a266 | |||
f8d92a4aad | |||
34bf02c75a | |||
3cb10a4609 | |||
1776f5aa51 | |||
96d715fdcb | |||
e088d1080b | |||
8ffc7c7fb5 | |||
af7edf0942 | |||
481870db97 | |||
62fdada9c1 | |||
3074997db6 | |||
3d6f36a722 | |||
bbd9e67c8c | |||
2ea2f63f29 | |||
17932ecd71 | |||
6cdc4ff4ae | |||
4b34036d17 | |||
27ae89fde7 | |||
06dbb56c28 | |||
79fcce8b84 | |||
f4064f087e | |||
276665f5fd | |||
fd536862e2 | |||
576dc303f4 | |||
2822a483e3 | |||
b8cebb8ba4 |
11
.devcontainer/Dockerfile
Normal file
11
.devcontainer/Dockerfile
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
FROM mcr.microsoft.com/devcontainers/anaconda:1-3
|
||||||
|
|
||||||
|
# Copy environment.yml (if found) to a temp location so we update the environment. Also
|
||||||
|
# copy "noop.txt" so the COPY instruction does not fail if no environment.yml exists.
|
||||||
|
COPY environment.yml* .devcontainer/noop.txt /tmp/conda-tmp/
|
||||||
|
RUN if [ -f "/tmp/conda-tmp/environment.yml" ]; then umask 0002 && /opt/conda/bin/conda env update -n base -f /tmp/conda-tmp/environment.yml; fi \
|
||||||
|
&& rm -rf /tmp/conda-tmp
|
||||||
|
|
||||||
|
# [Optional] Uncomment this section to install additional OS packages.
|
||||||
|
# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
|
||||||
|
# && apt-get -y install --no-install-recommends <your-package-list-here>
|
@@ -1,9 +1,11 @@
|
|||||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
||||||
// README at: https://github.com/devcontainers/templates/tree/main/src/python
|
// README at: https://github.com/devcontainers/templates/tree/main/src/anaconda
|
||||||
{
|
{
|
||||||
"name": "Python 3",
|
"name": "Anaconda (Python 3)",
|
||||||
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
|
"build": {
|
||||||
"image": "mcr.microsoft.com/devcontainers/python:1-3.12-bullseye",
|
"context": "..",
|
||||||
|
"dockerfile": "Dockerfile"
|
||||||
|
}
|
||||||
|
|
||||||
// Features to add to the dev container. More info: https://containers.dev/features.
|
// Features to add to the dev container. More info: https://containers.dev/features.
|
||||||
// "features": {},
|
// "features": {},
|
||||||
@@ -12,14 +14,7 @@
|
|||||||
// "forwardPorts": [],
|
// "forwardPorts": [],
|
||||||
|
|
||||||
// Use 'postCreateCommand' to run commands after the container is created.
|
// Use 'postCreateCommand' to run commands after the container is created.
|
||||||
"postCreateCommand": "pip3 install --user -r requirements.txt",
|
// "postCreateCommand": "python --version",
|
||||||
"customizations": {
|
|
||||||
"vscode": {
|
|
||||||
"extensions": [
|
|
||||||
"mechatroner.rainbow-csv"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Configure tool-specific properties.
|
// Configure tool-specific properties.
|
||||||
// "customizations": {},
|
// "customizations": {},
|
||||||
|
3
.devcontainer/noop.txt
Normal file
3
.devcontainer/noop.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
This file copied into the container along with environment.yml* from the parent
|
||||||
|
folder. This file is included to prevents the Dockerfile COPY instruction from
|
||||||
|
failing if no environment.yml is found.
|
159
.gitignore
vendored
159
.gitignore
vendored
@@ -1,6 +1,6 @@
|
|||||||
# File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig
|
# File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig
|
||||||
# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,svelte,python,linux,node
|
# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,linux,python
|
||||||
# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,svelte,python,linux,node
|
# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,linux,python
|
||||||
|
|
||||||
### Linux ###
|
### Linux ###
|
||||||
*~
|
*~
|
||||||
@@ -17,146 +17,6 @@
|
|||||||
# .nfs files are created when an open file is removed but is still being accessed
|
# .nfs files are created when an open file is removed but is still being accessed
|
||||||
.nfs*
|
.nfs*
|
||||||
|
|
||||||
### Node ###
|
|
||||||
# Logs
|
|
||||||
logs
|
|
||||||
*.log
|
|
||||||
npm-debug.log*
|
|
||||||
yarn-debug.log*
|
|
||||||
yarn-error.log*
|
|
||||||
lerna-debug.log*
|
|
||||||
.pnpm-debug.log*
|
|
||||||
|
|
||||||
# Diagnostic reports (https://nodejs.org/api/report.html)
|
|
||||||
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
|
|
||||||
|
|
||||||
# Runtime data
|
|
||||||
pids
|
|
||||||
*.pid
|
|
||||||
*.seed
|
|
||||||
*.pid.lock
|
|
||||||
|
|
||||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
|
||||||
lib-cov
|
|
||||||
|
|
||||||
# Coverage directory used by tools like istanbul
|
|
||||||
coverage
|
|
||||||
*.lcov
|
|
||||||
|
|
||||||
# nyc test coverage
|
|
||||||
.nyc_output
|
|
||||||
|
|
||||||
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
|
|
||||||
.grunt
|
|
||||||
|
|
||||||
# Bower dependency directory (https://bower.io/)
|
|
||||||
bower_components
|
|
||||||
|
|
||||||
# node-waf configuration
|
|
||||||
.lock-wscript
|
|
||||||
|
|
||||||
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
|
||||||
build/Release
|
|
||||||
|
|
||||||
# Dependency directories
|
|
||||||
node_modules/
|
|
||||||
jspm_packages/
|
|
||||||
|
|
||||||
# Snowpack dependency directory (https://snowpack.dev/)
|
|
||||||
web_modules/
|
|
||||||
|
|
||||||
# TypeScript cache
|
|
||||||
*.tsbuildinfo
|
|
||||||
|
|
||||||
# Optional npm cache directory
|
|
||||||
.npm
|
|
||||||
|
|
||||||
# Optional eslint cache
|
|
||||||
.eslintcache
|
|
||||||
|
|
||||||
# Optional stylelint cache
|
|
||||||
.stylelintcache
|
|
||||||
|
|
||||||
# Microbundle cache
|
|
||||||
.rpt2_cache/
|
|
||||||
.rts2_cache_cjs/
|
|
||||||
.rts2_cache_es/
|
|
||||||
.rts2_cache_umd/
|
|
||||||
|
|
||||||
# Optional REPL history
|
|
||||||
.node_repl_history
|
|
||||||
|
|
||||||
# Output of 'npm pack'
|
|
||||||
*.tgz
|
|
||||||
|
|
||||||
# Yarn Integrity file
|
|
||||||
.yarn-integrity
|
|
||||||
|
|
||||||
# dotenv environment variable files
|
|
||||||
.env
|
|
||||||
.env.development.local
|
|
||||||
.env.test.local
|
|
||||||
.env.production.local
|
|
||||||
.env.local
|
|
||||||
|
|
||||||
# parcel-bundler cache (https://parceljs.org/)
|
|
||||||
.cache
|
|
||||||
.parcel-cache
|
|
||||||
|
|
||||||
# Next.js build output
|
|
||||||
.next
|
|
||||||
out
|
|
||||||
|
|
||||||
# Nuxt.js build / generate output
|
|
||||||
.nuxt
|
|
||||||
dist
|
|
||||||
|
|
||||||
# Gatsby files
|
|
||||||
.cache/
|
|
||||||
# Comment in the public line in if your project uses Gatsby and not Next.js
|
|
||||||
# https://nextjs.org/blog/next-9-1#public-directory-support
|
|
||||||
# public
|
|
||||||
|
|
||||||
# vuepress build output
|
|
||||||
.vuepress/dist
|
|
||||||
|
|
||||||
# vuepress v2.x temp and cache directory
|
|
||||||
.temp
|
|
||||||
|
|
||||||
# Docusaurus cache and generated files
|
|
||||||
.docusaurus
|
|
||||||
|
|
||||||
# Serverless directories
|
|
||||||
.serverless/
|
|
||||||
|
|
||||||
# FuseBox cache
|
|
||||||
.fusebox/
|
|
||||||
|
|
||||||
# DynamoDB Local files
|
|
||||||
.dynamodb/
|
|
||||||
|
|
||||||
# TernJS port file
|
|
||||||
.tern-port
|
|
||||||
|
|
||||||
# Stores VSCode versions used for testing VSCode extensions
|
|
||||||
.vscode-test
|
|
||||||
|
|
||||||
# yarn v2
|
|
||||||
.yarn/cache
|
|
||||||
.yarn/unplugged
|
|
||||||
.yarn/build-state.yml
|
|
||||||
.yarn/install-state.gz
|
|
||||||
.pnp.*
|
|
||||||
|
|
||||||
### Node Patch ###
|
|
||||||
# Serverless Webpack directories
|
|
||||||
.webpack/
|
|
||||||
|
|
||||||
# Optional stylelint cache
|
|
||||||
|
|
||||||
# SvelteKit build / generate output
|
|
||||||
.svelte-kit
|
|
||||||
|
|
||||||
### Python ###
|
### Python ###
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
@@ -202,6 +62,7 @@ htmlcov/
|
|||||||
.nox/
|
.nox/
|
||||||
.coverage
|
.coverage
|
||||||
.coverage.*
|
.coverage.*
|
||||||
|
.cache
|
||||||
nosetests.xml
|
nosetests.xml
|
||||||
coverage.xml
|
coverage.xml
|
||||||
*.cover
|
*.cover
|
||||||
@@ -215,6 +76,7 @@ cover/
|
|||||||
*.pot
|
*.pot
|
||||||
|
|
||||||
# Django stuff:
|
# Django stuff:
|
||||||
|
*.log
|
||||||
local_settings.py
|
local_settings.py
|
||||||
db.sqlite3
|
db.sqlite3
|
||||||
db.sqlite3-journal
|
db.sqlite3-journal
|
||||||
@@ -278,6 +140,7 @@ celerybeat.pid
|
|||||||
*.sage.py
|
*.sage.py
|
||||||
|
|
||||||
# Environments
|
# Environments
|
||||||
|
.env
|
||||||
.venv
|
.venv
|
||||||
env/
|
env/
|
||||||
venv/
|
venv/
|
||||||
@@ -326,13 +189,6 @@ poetry.toml
|
|||||||
# LSP config files
|
# LSP config files
|
||||||
pyrightconfig.json
|
pyrightconfig.json
|
||||||
|
|
||||||
### Svelte ###
|
|
||||||
# gitignore template for the SvelteKit, frontend web component framework
|
|
||||||
# website: https://kit.svelte.dev/
|
|
||||||
|
|
||||||
.svelte-kit/
|
|
||||||
package
|
|
||||||
|
|
||||||
### VisualStudioCode ###
|
### VisualStudioCode ###
|
||||||
.vscode/*
|
.vscode/*
|
||||||
!.vscode/settings.json
|
!.vscode/settings.json
|
||||||
@@ -352,9 +208,8 @@ package
|
|||||||
.history
|
.history
|
||||||
.ionide
|
.ionide
|
||||||
|
|
||||||
# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,svelte,python,linux,node
|
# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,linux,python
|
||||||
|
|
||||||
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
|
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
|
||||||
|
|
||||||
output
|
conda-bld
|
||||||
*.private.*
|
|
6
.vscode/extensions.json
vendored
Normal file
6
.vscode/extensions.json
vendored
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
"recommendations": [
|
||||||
|
"piotrpalarz.vscode-gitignore-generator",
|
||||||
|
"gruntfuggly.todo-tree"
|
||||||
|
]
|
||||||
|
}
|
25
.vscode/launch.json
vendored
25
.vscode/launch.json
vendored
@@ -1,25 +0,0 @@
|
|||||||
{
|
|
||||||
// Use IntelliSense to learn about possible attributes.
|
|
||||||
// Hover to view descriptions of existing attributes.
|
|
||||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
|
||||||
"version": "0.2.0",
|
|
||||||
"configurations": [
|
|
||||||
|
|
||||||
{
|
|
||||||
"name": "autobigs info -lschema pubmlst_bordetella_seqdef",
|
|
||||||
"type": "debugpy",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${workspaceFolder}/src/autobigs/cli/program.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": [
|
|
||||||
"info",
|
|
||||||
"-lschemas",
|
|
||||||
"pubmlst_bordetella_seqdef"
|
|
||||||
],
|
|
||||||
"cwd": "${workspaceFolder}/src",
|
|
||||||
"env": {
|
|
||||||
"PYTHONPATH": "${workspaceFolder}/src"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
22
Jenkinsfile
vendored
22
Jenkinsfile
vendored
@@ -2,14 +2,14 @@ pipeline {
|
|||||||
agent {
|
agent {
|
||||||
kubernetes {
|
kubernetes {
|
||||||
cloud 'rsys-devel'
|
cloud 'rsys-devel'
|
||||||
defaultContainer 'pip'
|
defaultContainer 'miniforge3'
|
||||||
inheritFrom 'pip'
|
inheritFrom 'miniforge'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stages {
|
stages {
|
||||||
stage("install") {
|
stage("install") {
|
||||||
steps {
|
steps {
|
||||||
sh 'python -m pip install -r requirements.txt'
|
sh 'conda env update -n base -f environment.yml'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage("unit tests") {
|
stage("unit tests") {
|
||||||
@@ -22,26 +22,36 @@ pipeline {
|
|||||||
stage("build") {
|
stage("build") {
|
||||||
steps {
|
steps {
|
||||||
sh "python -m build"
|
sh "python -m build"
|
||||||
|
sh "grayskull pypi dist/*.tar.gz --maintainers 'Harrison Deng'"
|
||||||
|
sh "python scripts/patch_recipe.py"
|
||||||
|
sh 'conda build autobigs-engine -c bioconda --output-folder conda-bld --verify'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage("archive") {
|
stage("archive") {
|
||||||
steps {
|
steps {
|
||||||
archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl', fingerprint: true, followSymlinks: false, onlyIfSuccessful: true
|
archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl, conda-bld/**/*.conda', fingerprint: true, followSymlinks: false, onlyIfSuccessful: true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage("publish") {
|
stage("publish") {
|
||||||
parallel {
|
parallel {
|
||||||
stage ("git.reslate.systems") {
|
stage ("git.reslate.systems") {
|
||||||
|
when {
|
||||||
|
not {
|
||||||
|
tag '*.*.*'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
environment {
|
environment {
|
||||||
CREDS = credentials('username-password-rs-git')
|
CREDS = credentials('username-password-rs-git')
|
||||||
}
|
}
|
||||||
steps {
|
steps {
|
||||||
sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*'
|
sh 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*'
|
||||||
|
sh 'curl --user ${CREDS_USR}:${CREDS_PSW} --upload-file conda-bld/**/*.conda https://git.reslate.systems/api/packages/${CREDS_USR}/conda/$(basename conda-bld/**/*.conda)'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage ("pypi.org") {
|
stage ("pypi.org") {
|
||||||
when {
|
when {
|
||||||
tag '*.*'
|
tag '*.*.*'
|
||||||
}
|
}
|
||||||
environment {
|
environment {
|
||||||
TOKEN = credentials('pypi.org')
|
TOKEN = credentials('pypi.org')
|
||||||
|
@@ -8,7 +8,7 @@ A python library implementing common BIGSdb MLST schemes and databases accesses
|
|||||||
Briefly, this library can:
|
Briefly, this library can:
|
||||||
- Import multiple `FASTA` files
|
- Import multiple `FASTA` files
|
||||||
- Fetch the available BIGSdb databases that is currently live and available
|
- Fetch the available BIGSdb databases that is currently live and available
|
||||||
- Fetch the available BIGSdb database schemas for a given MLST database
|
- Fetch the available BIGSdb database schemes for a given MLST database
|
||||||
- Retrieve exact/non-exact MLST allele variant IDs based off a sequence
|
- Retrieve exact/non-exact MLST allele variant IDs based off a sequence
|
||||||
- Retrieve MLST sequence type IDs based off a sequence
|
- Retrieve MLST sequence type IDs based off a sequence
|
||||||
- Output all results to a single CSV
|
- Output all results to a single CSV
|
||||||
|
16
environment.yml
Normal file
16
environment.yml
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
name: ci
|
||||||
|
channels:
|
||||||
|
- bioconda
|
||||||
|
- conda-forge
|
||||||
|
dependencies:
|
||||||
|
- aiohttp==3.11.*
|
||||||
|
- biopython==1.85
|
||||||
|
- pytest
|
||||||
|
- pytest-asyncio
|
||||||
|
- python-build
|
||||||
|
- conda-build
|
||||||
|
- twine==6.0.1
|
||||||
|
- setuptools_scm
|
||||||
|
- pytest-cov
|
||||||
|
- grayskull
|
||||||
|
- curl
|
@@ -1,8 +0,0 @@
|
|||||||
aiohttp[speedups]==3.11.*
|
|
||||||
biopython==1.85
|
|
||||||
pytest
|
|
||||||
pytest-asyncio
|
|
||||||
build
|
|
||||||
twine
|
|
||||||
setuptools_scm
|
|
||||||
pytest-cov
|
|
103
scripts/patch_recipe.py
Normal file
103
scripts/patch_recipe.py
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
from os import fdopen, path
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
|
from sys import argv
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
INDENTATION = " "
|
||||||
|
GRAYSKULL_OUTPUT_PATH = "autoBIGS.engine"
|
||||||
|
RUN_EXPORTED_VALUE = r'{{ pin_subpackage( name|lower|replace(".", "-"), max_pin="x.x") }}'
|
||||||
|
LICENSE_SUFFIX = "-or-later"
|
||||||
|
HOME_PAGE = "https://github.com/Syph-and-VPD-Lab/autoBIGS.engine"
|
||||||
|
|
||||||
|
def _calc_indentation(line: str):
|
||||||
|
return len(re.findall(INDENTATION, line.split(line.strip())[0])) if line != "\n" else 0
|
||||||
|
|
||||||
|
def read_grayskull_output():
|
||||||
|
original_recipe = path.abspath(GRAYSKULL_OUTPUT_PATH)
|
||||||
|
original_meta = path.join(original_recipe, "meta.yaml")
|
||||||
|
meta_file = open(original_meta)
|
||||||
|
lines = meta_file.readlines()
|
||||||
|
meta_file.close()
|
||||||
|
return lines
|
||||||
|
|
||||||
|
def update_naming_scheme(lines):
|
||||||
|
modified_lines = []
|
||||||
|
for line in lines:
|
||||||
|
matches = re.finditer(r"\{\{\s*name\|lower()\s+\}\}", line)
|
||||||
|
modified_line = line
|
||||||
|
for match in matches:
|
||||||
|
modified_line = modified_line[:match.start(1)] + r'|replace(".", "-")' + modified_line[match.end(1):]
|
||||||
|
modified_lines.append(modified_line)
|
||||||
|
return modified_lines
|
||||||
|
|
||||||
|
def inject_run_exports(lines: list[str]):
|
||||||
|
package_indent = False
|
||||||
|
modified_lines = []
|
||||||
|
for line in lines:
|
||||||
|
indentation_count = _calc_indentation(line)
|
||||||
|
if line == "build:\n" and indentation_count == 0:
|
||||||
|
package_indent = True
|
||||||
|
modified_lines.append(line)
|
||||||
|
elif package_indent and indentation_count == 0:
|
||||||
|
modified_lines.append(INDENTATION*1 + "run_exports:\n")
|
||||||
|
modified_lines.append(INDENTATION*2 + "- " + RUN_EXPORTED_VALUE + "\n")
|
||||||
|
package_indent = False
|
||||||
|
else:
|
||||||
|
modified_lines.append(line)
|
||||||
|
return modified_lines
|
||||||
|
|
||||||
|
def suffix_license(lines: list[str]):
|
||||||
|
about_indent = False
|
||||||
|
modified_lines = []
|
||||||
|
for line in lines:
|
||||||
|
indentation_count = _calc_indentation(line)
|
||||||
|
if line == "about:\n" and indentation_count == 0:
|
||||||
|
about_indent = True
|
||||||
|
modified_lines.append(line)
|
||||||
|
elif about_indent and indentation_count == 1 and line.lstrip().startswith("license:"):
|
||||||
|
modified_lines.append(line.rstrip() + LICENSE_SUFFIX + "\n")
|
||||||
|
about_indent = False
|
||||||
|
else:
|
||||||
|
modified_lines.append(line)
|
||||||
|
return modified_lines
|
||||||
|
|
||||||
|
def inject_home_page(lines: list[str]):
|
||||||
|
about_indent = False
|
||||||
|
modified_lines = []
|
||||||
|
for line in lines:
|
||||||
|
indentation_count = _calc_indentation(line)
|
||||||
|
if line == "about:\n" and indentation_count == 0:
|
||||||
|
about_indent = True
|
||||||
|
modified_lines.append(line)
|
||||||
|
elif about_indent and indentation_count == 0:
|
||||||
|
modified_lines.append(INDENTATION + "home: " + HOME_PAGE + "\n")
|
||||||
|
about_indent = False
|
||||||
|
else:
|
||||||
|
modified_lines.append(line)
|
||||||
|
return modified_lines
|
||||||
|
|
||||||
|
def write_to_original(lines: list[str]):
|
||||||
|
original_recipe = path.abspath(GRAYSKULL_OUTPUT_PATH)
|
||||||
|
original_meta = path.join(original_recipe, "meta.yaml")
|
||||||
|
with open(original_meta, "w") as file:
|
||||||
|
file.writelines(lines)
|
||||||
|
|
||||||
|
def rename_recipe_dir():
|
||||||
|
new_recipe_name = path.abspath(path.join(GRAYSKULL_OUTPUT_PATH.replace(".", "-").lower()))
|
||||||
|
shutil.rmtree(new_recipe_name, ignore_errors=True)
|
||||||
|
os.replace(path.abspath(GRAYSKULL_OUTPUT_PATH), new_recipe_name)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
original_grayskull_out = read_grayskull_output()
|
||||||
|
modified_recipe_meta = None
|
||||||
|
modified_recipe_meta = update_naming_scheme(original_grayskull_out)
|
||||||
|
modified_recipe_meta = inject_run_exports(modified_recipe_meta)
|
||||||
|
modified_recipe_meta = suffix_license(modified_recipe_meta)
|
||||||
|
modified_recipe_meta = inject_home_page(modified_recipe_meta)
|
||||||
|
write_to_original(modified_recipe_meta)
|
||||||
|
rename_recipe_dir()
|
@@ -7,9 +7,9 @@ from os import path
|
|||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import tempfile
|
import tempfile
|
||||||
from typing import Any, AsyncGenerator, AsyncIterable, Iterable, Mapping, Sequence, Set, Union
|
from typing import Any, AsyncGenerator, AsyncIterable, Coroutine, Iterable, Mapping, Sequence, Set, Union
|
||||||
|
|
||||||
from aiohttp import ClientSession, ClientTimeout
|
from aiohttp import ClientOSError, ClientSession, ClientTimeout, ServerDisconnectedError
|
||||||
|
|
||||||
from autobigs.engine.reading import read_fasta
|
from autobigs.engine.reading import read_fasta
|
||||||
from autobigs.engine.structures.alignment import PairwiseAlignment
|
from autobigs.engine.structures.alignment import PairwiseAlignment
|
||||||
@@ -22,15 +22,15 @@ from Bio.Align import PairwiseAligner
|
|||||||
class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
|
class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def determine_mlst_allele_variants(self, query_sequence_strings: Iterable[str]) -> AsyncGenerator[Allele, Any]:
|
def determine_mlst_allele_variants(self, query_sequence_strings: Union[Iterable[Union[NamedString, str]], Union[NamedString, str]]) -> AsyncGenerator[Union[Allele, tuple[str, Allele]], Any]:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def determine_mlst_st(self, alleles: Union[AsyncIterable[Allele], Iterable[Allele]]) -> MLSTProfile:
|
async def determine_mlst_st(self, alleles: Union[AsyncIterable[Union[Allele, tuple[str, Allele]]], Iterable[Union[Allele, tuple[str, Allele]]]]) -> Union[MLSTProfile, NamedMLSTProfile]:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def profile_string(self, query_sequence_strings: Iterable[str]) -> MLSTProfile:
|
async def profile_string(self, query_sequence_strings: Iterable[Union[NamedString, str]]) -> Union[NamedMLSTProfile, MLSTProfile]:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
@@ -43,98 +43,157 @@ class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
|
|||||||
|
|
||||||
class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
|
class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
|
||||||
|
|
||||||
def __init__(self, database_api: str, database_name: str, schema_id: int):
|
def __init__(self, database_api: str, database_name: str, scheme_id: int, retry_requests: int = 5, timeout: int = 600):
|
||||||
|
self._timeout = timeout
|
||||||
|
self._retry_limit = retry_requests
|
||||||
self._database_name = database_name
|
self._database_name = database_name
|
||||||
self._schema_id = schema_id
|
self._scheme_id = scheme_id
|
||||||
self._base_url = f"{database_api}/db/{self._database_name}/schemes/{self._schema_id}/"
|
self._base_url = f"{database_api}/db/{self._database_name}/schemes/{self._scheme_id}/"
|
||||||
self._http_client = ClientSession(self._base_url, timeout=ClientTimeout(60))
|
self._http_client = ClientSession(self._base_url, timeout=ClientTimeout(timeout))
|
||||||
|
|
||||||
async def __aenter__(self):
|
async def __aenter__(self):
|
||||||
return self
|
return self
|
||||||
|
|
||||||
async def determine_mlst_allele_variants(self, query_sequence_strings: Union[Iterable[str], str]) -> AsyncGenerator[Allele, Any]:
|
async def determine_mlst_allele_variants(self, query_sequence_strings: Union[Iterable[Union[NamedString, str]], Union[NamedString, str]]) -> AsyncGenerator[Union[Allele, tuple[str, Allele]], Any]:
|
||||||
# See https://bigsdb.pasteur.fr/api/db/pubmlst_bordetella_seqdef/schemes
|
# See https://bigsdb.pasteur.fr/api/db/pubmlst_bordetella_seqdef/schemes
|
||||||
uri_path = "sequence"
|
uri_path = "sequence"
|
||||||
if isinstance(query_sequence_strings, str):
|
if isinstance(query_sequence_strings, str) or isinstance(query_sequence_strings, NamedString):
|
||||||
query_sequence_strings = [query_sequence_strings]
|
query_sequence_strings = [query_sequence_strings]
|
||||||
|
|
||||||
for sequence_string in query_sequence_strings:
|
for sequence_string in query_sequence_strings:
|
||||||
async with self._http_client.post(uri_path, json={
|
attempts = 0
|
||||||
"sequence": sequence_string,
|
success = False
|
||||||
"partial_matches": True
|
last_error = None
|
||||||
}) as response:
|
while not success and attempts < self._retry_limit:
|
||||||
sequence_response: dict = await response.json()
|
attempts += 1
|
||||||
|
request = self._http_client.post(uri_path, json={
|
||||||
|
"sequence": sequence_string if isinstance(sequence_string, str) else sequence_string.sequence,
|
||||||
|
"partial_matches": True
|
||||||
|
})
|
||||||
|
try:
|
||||||
|
async with request as response:
|
||||||
|
sequence_response: dict = await response.json()
|
||||||
|
|
||||||
if "exact_matches" in sequence_response:
|
if "exact_matches" in sequence_response:
|
||||||
# loci -> list of alleles with id and loci
|
# loci -> list of alleles with id and loci
|
||||||
exact_matches: dict[str, Sequence[dict[str, str]]] = sequence_response["exact_matches"]
|
exact_matches: dict[str, Sequence[dict[str, str]]] = sequence_response["exact_matches"]
|
||||||
for allele_loci, alleles in exact_matches.items():
|
for allele_loci, alleles in exact_matches.items():
|
||||||
for allele in alleles:
|
for allele in alleles:
|
||||||
alelle_id = allele["allele_id"]
|
alelle_id = allele["allele_id"]
|
||||||
yield Allele(allele_locus=allele_loci, allele_variant=alelle_id, partial_match_profile=None)
|
result_allele = Allele(allele_locus=allele_loci, allele_variant=alelle_id, partial_match_profile=None)
|
||||||
elif "partial_matches" in sequence_response:
|
yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
|
||||||
partial_matches: dict[str, dict[str, Union[str, float, int]]] = sequence_response["partial_matches"]
|
elif "partial_matches" in sequence_response:
|
||||||
for allele_loci, partial_match in partial_matches.items():
|
partial_matches: dict[str, dict[str, Union[str, float, int]]] = sequence_response["partial_matches"]
|
||||||
if len(partial_match) <= 0:
|
for allele_loci, partial_match in partial_matches.items():
|
||||||
continue
|
if len(partial_match) <= 0:
|
||||||
partial_match_profile = AlignmentStats(
|
continue
|
||||||
percent_identity=float(partial_match["identity"]),
|
partial_match_profile = AlignmentStats(
|
||||||
mismatches=int(partial_match["mismatches"]),
|
percent_identity=float(partial_match["identity"]),
|
||||||
gaps=int(partial_match["gaps"]),
|
mismatches=int(partial_match["mismatches"]),
|
||||||
match_metric=int(partial_match["bitscore"])
|
gaps=int(partial_match["gaps"]),
|
||||||
)
|
match_metric=int(partial_match["bitscore"])
|
||||||
yield Allele(
|
)
|
||||||
allele_locus=allele_loci,
|
result_allele = Allele(
|
||||||
allele_variant=str(partial_match["allele"]),
|
allele_locus=allele_loci,
|
||||||
partial_match_profile=partial_match_profile
|
allele_variant=str(partial_match["allele"]),
|
||||||
)
|
partial_match_profile=partial_match_profile
|
||||||
|
)
|
||||||
|
yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
|
||||||
|
else:
|
||||||
|
raise NoBIGSdbMatchesException(self._database_name, self._scheme_id, sequence_string.name if isinstance(sequence_string, NamedString) else None)
|
||||||
|
except (ConnectionError, ServerDisconnectedError, ClientOSError) as e: # Errors we will retry
|
||||||
|
last_error = e
|
||||||
|
success = False
|
||||||
|
await asyncio.sleep(5) # In case the connection issue is due to rate issues
|
||||||
else:
|
else:
|
||||||
raise NoBIGSdbMatchesException(self._database_name, self._schema_id)
|
success = True
|
||||||
|
if not success and last_error is not None:
|
||||||
|
try:
|
||||||
|
raise last_error
|
||||||
|
except (ConnectionError, ServerDisconnectedError, ClientOSError) as e: # Non-fatal errors
|
||||||
|
yield Allele("error", "error", None)
|
||||||
|
|
||||||
async def determine_mlst_st(self, alleles: Union[AsyncIterable[Allele], Iterable[Allele]]) -> MLSTProfile:
|
async def determine_mlst_st(self, alleles: Union[AsyncIterable[Union[Allele, tuple[str, Allele]]], Iterable[Union[Allele, tuple[str, Allele]]]]) -> Union[MLSTProfile, NamedMLSTProfile]:
|
||||||
uri_path = "designations"
|
uri_path = "designations"
|
||||||
allele_request_dict: dict[str, list[dict[str, str]]] = defaultdict(list)
|
allele_request_dict: dict[str, list[dict[str, str]]] = defaultdict(list)
|
||||||
|
names_list = []
|
||||||
|
def insert_allele_to_request_dict(allele: Union[Allele, tuple[str, Allele]]):
|
||||||
|
if isinstance(allele, Allele):
|
||||||
|
allele_val = allele
|
||||||
|
else:
|
||||||
|
allele_val = allele[1]
|
||||||
|
names_list.append(allele[0])
|
||||||
|
allele_request_dict[allele_val.allele_locus].append({"allele": str(allele_val.allele_variant)})
|
||||||
|
|
||||||
if isinstance(alleles, AsyncIterable):
|
if isinstance(alleles, AsyncIterable):
|
||||||
async for allele in alleles:
|
async for allele in alleles:
|
||||||
allele_request_dict[allele.allele_locus].append({"allele": str(allele.allele_variant)})
|
insert_allele_to_request_dict(allele)
|
||||||
else:
|
else:
|
||||||
for allele in alleles:
|
for allele in alleles:
|
||||||
allele_request_dict[allele.allele_locus].append({"allele": str(allele.allele_variant)})
|
insert_allele_to_request_dict(allele)
|
||||||
request_json = {
|
request_json = {
|
||||||
"designations": allele_request_dict
|
"designations": allele_request_dict
|
||||||
}
|
}
|
||||||
async with self._http_client.post(uri_path, json=request_json) as response:
|
|
||||||
response_json: dict = await response.json()
|
|
||||||
allele_set: Set[Allele] = set()
|
|
||||||
response_json.setdefault("fields", dict())
|
|
||||||
schema_fields_returned: dict[str, str] = response_json["fields"]
|
|
||||||
schema_fields_returned.setdefault("ST", "unknown")
|
|
||||||
schema_fields_returned.setdefault("clonal_complex", "unknown")
|
|
||||||
schema_exact_matches: dict = response_json["exact_matches"]
|
|
||||||
for exact_match_locus, exact_match_alleles in schema_exact_matches.items():
|
|
||||||
if len(exact_match_alleles) > 1:
|
|
||||||
raise ValueError(f"Unexpected number of alleles returned for exact match (Expected 1, retrieved {len(exact_match_alleles)})")
|
|
||||||
allele_set.add(Allele(exact_match_locus, exact_match_alleles[0]["allele_id"], None))
|
|
||||||
if len(allele_set) == 0:
|
|
||||||
raise ValueError("Passed in no alleles.")
|
|
||||||
return MLSTProfile(allele_set, schema_fields_returned["ST"], schema_fields_returned["clonal_complex"])
|
|
||||||
|
|
||||||
async def profile_string(self, query_sequence_strings: Iterable[str]) -> MLSTProfile:
|
attempts = 0
|
||||||
|
success = False
|
||||||
|
last_error = None
|
||||||
|
while attempts < self._retry_limit and not success:
|
||||||
|
attempts += 1
|
||||||
|
try:
|
||||||
|
async with self._http_client.post(uri_path, json=request_json) as response:
|
||||||
|
response_json: dict = await response.json()
|
||||||
|
allele_set: Set[Allele] = set()
|
||||||
|
response_json.setdefault("fields", dict())
|
||||||
|
scheme_fields_returned: dict[str, str] = response_json["fields"]
|
||||||
|
scheme_fields_returned.setdefault("ST", "unknown")
|
||||||
|
scheme_fields_returned.setdefault("clonal_complex", "unknown")
|
||||||
|
scheme_exact_matches: dict = response_json["exact_matches"]
|
||||||
|
for exact_match_locus, exact_match_alleles in scheme_exact_matches.items():
|
||||||
|
allele_set.add(Allele(exact_match_locus, exact_match_alleles[0]["allele_id"], None))
|
||||||
|
if len(allele_set) == 0:
|
||||||
|
raise ValueError("Passed in no alleles.")
|
||||||
|
result_mlst_profile = MLSTProfile(allele_set, scheme_fields_returned["ST"], scheme_fields_returned["clonal_complex"])
|
||||||
|
if len(names_list) > 0:
|
||||||
|
result_mlst_profile = NamedMLSTProfile(str(tuple(names_list)) if len(set(names_list)) > 1 else names_list[0], result_mlst_profile)
|
||||||
|
return result_mlst_profile
|
||||||
|
except (ConnectionError, ServerDisconnectedError, ClientOSError) as e:
|
||||||
|
last_error = e
|
||||||
|
success = False
|
||||||
|
await asyncio.sleep(5)
|
||||||
|
else:
|
||||||
|
success = True
|
||||||
|
try:
|
||||||
|
if last_error is not None:
|
||||||
|
raise last_error
|
||||||
|
except (ConnectionError, ServerDisconnectedError, ClientOSError) as e:
|
||||||
|
result_mlst_profile = NamedMLSTProfile((str(tuple(names_list)) if len(set(names_list)) > 1 else names_list[0]) + ":Error", None)
|
||||||
|
raise ValueError("Last error was not recorded.")
|
||||||
|
|
||||||
|
|
||||||
|
async def profile_string(self, query_sequence_strings: Iterable[Union[NamedString, str]]) -> Union[NamedMLSTProfile, MLSTProfile]:
|
||||||
alleles = self.determine_mlst_allele_variants(query_sequence_strings)
|
alleles = self.determine_mlst_allele_variants(query_sequence_strings)
|
||||||
return await self.determine_mlst_st(alleles)
|
return await self.determine_mlst_st(alleles)
|
||||||
|
|
||||||
async def profile_multiple_strings(self, query_named_string_groups: AsyncIterable[Iterable[NamedString]], stop_on_fail: bool = False) -> AsyncGenerator[NamedMLSTProfile, Any]:
|
async def profile_multiple_strings(self, query_named_string_groups: AsyncIterable[Iterable[NamedString]], stop_on_fail: bool = False) -> AsyncGenerator[NamedMLSTProfile, Any]:
|
||||||
|
tasks: list[Coroutine[Any, Any, Union[NamedMLSTProfile, MLSTProfile]]] = []
|
||||||
async for named_strings in query_named_string_groups:
|
async for named_strings in query_named_string_groups:
|
||||||
names: list[str] = list()
|
tasks.append(self.profile_string(named_strings))
|
||||||
sequences: list[str] = list()
|
for task in asyncio.as_completed(tasks):
|
||||||
for named_string in named_strings:
|
named_mlst_profile = await task
|
||||||
names.append(named_string.name)
|
|
||||||
sequences.append(named_string.sequence)
|
|
||||||
try:
|
try:
|
||||||
yield NamedMLSTProfile("-".join(names), (await self.profile_string(sequences)))
|
if isinstance(named_mlst_profile, NamedMLSTProfile):
|
||||||
|
yield named_mlst_profile
|
||||||
|
else:
|
||||||
|
raise TypeError("MLST profile is not named.")
|
||||||
except NoBIGSdbMatchesException as e:
|
except NoBIGSdbMatchesException as e:
|
||||||
if stop_on_fail:
|
if stop_on_fail:
|
||||||
raise e
|
raise e
|
||||||
yield NamedMLSTProfile("-".join(names), None)
|
causal_name = e.get_causal_query_name()
|
||||||
|
if causal_name is None:
|
||||||
|
raise ValueError("Missing query name despite requiring names.")
|
||||||
|
else:
|
||||||
|
yield NamedMLSTProfile(causal_name, None)
|
||||||
|
|
||||||
async def close(self):
|
async def close(self):
|
||||||
await self._http_client.close()
|
await self._http_client.close()
|
||||||
@@ -151,7 +210,7 @@ class BIGSdbIndex(AbstractAsyncContextManager):
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
self._http_client = ClientSession()
|
self._http_client = ClientSession()
|
||||||
self._known_seqdef_dbs_origin: Union[Mapping[str, str], None] = None
|
self._known_seqdef_dbs_origin: Union[Mapping[str, str], None] = None
|
||||||
self._seqdefdb_schemas: dict[str, Union[Mapping[str, int], None]] = dict()
|
self._seqdefdb_schemes: dict[str, Union[Mapping[str, int], None]] = dict()
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
async def __aenter__(self):
|
async def __aenter__(self):
|
||||||
@@ -177,22 +236,32 @@ class BIGSdbIndex(AbstractAsyncContextManager):
|
|||||||
raise NoSuchBIGSdbDatabaseException(seqdef_db_name)
|
raise NoSuchBIGSdbDatabaseException(seqdef_db_name)
|
||||||
return known_databases[seqdef_db_name]
|
return known_databases[seqdef_db_name]
|
||||||
|
|
||||||
async def get_schemas_for_seqdefdb(self, seqdef_db_name: str, force: bool = False) -> Mapping[str, int]:
|
async def get_schemes_for_seqdefdb(self, seqdef_db_name: str, force: bool = False) -> Mapping[str, int]:
|
||||||
if seqdef_db_name in self._seqdefdb_schemas and not force:
|
if seqdef_db_name in self._seqdefdb_schemes and not force:
|
||||||
return self._seqdefdb_schemas[seqdef_db_name] # type: ignore since it's guaranteed to not be none by conditional
|
return self._seqdefdb_schemes[seqdef_db_name] # type: ignore since it's guaranteed to not be none by conditional
|
||||||
uri_path = f"{await self.get_bigsdb_api_from_seqdefdb(seqdef_db_name)}/db/{seqdef_db_name}/schemes"
|
uri_path = f"{await self.get_bigsdb_api_from_seqdefdb(seqdef_db_name)}/db/{seqdef_db_name}/schemes"
|
||||||
async with self._http_client.get(uri_path) as response:
|
async with self._http_client.get(uri_path) as response:
|
||||||
response_json = await response.json()
|
response_json = await response.json()
|
||||||
schema_descriptions: Mapping[str, int] = dict()
|
scheme_descriptions: Mapping[str, int] = dict()
|
||||||
for scheme_definition in response_json["schemes"]:
|
for scheme_definition in response_json["schemes"]:
|
||||||
scheme_id: int = int(str(scheme_definition["scheme"]).split("/")[-1])
|
scheme_id: int = int(str(scheme_definition["scheme"]).split("/")[-1])
|
||||||
scheme_desc: str = scheme_definition["description"]
|
scheme_desc: str = scheme_definition["description"]
|
||||||
schema_descriptions[scheme_desc] = scheme_id
|
scheme_descriptions[scheme_desc] = scheme_id
|
||||||
self._seqdefdb_schemas[seqdef_db_name] = schema_descriptions
|
self._seqdefdb_schemes[seqdef_db_name] = scheme_descriptions
|
||||||
return self._seqdefdb_schemas[seqdef_db_name] # type: ignore
|
return self._seqdefdb_schemes[seqdef_db_name] # type: ignore
|
||||||
|
|
||||||
async def build_profiler_from_seqdefdb(self, local: bool, dbseqdef_name: str, schema_id: int) -> BIGSdbMLSTProfiler:
|
async def build_profiler_from_seqdefdb(self, local: bool, dbseqdef_name: str, scheme_id: int) -> BIGSdbMLSTProfiler:
|
||||||
return get_BIGSdb_MLST_profiler(local, await self.get_bigsdb_api_from_seqdefdb(dbseqdef_name), dbseqdef_name, schema_id)
|
return get_BIGSdb_MLST_profiler(local, await self.get_bigsdb_api_from_seqdefdb(dbseqdef_name), dbseqdef_name, scheme_id)
|
||||||
|
|
||||||
|
async def get_scheme_loci(self, dbseqdef_name: str, scheme_id: int) -> list[str]:
|
||||||
|
uri_path = f"{await self.get_bigsdb_api_from_seqdefdb(dbseqdef_name)}/db/{dbseqdef_name}/schemes/{scheme_id}"
|
||||||
|
async with self._http_client.get(uri_path) as response:
|
||||||
|
response_json = await response.json()
|
||||||
|
loci = response_json["loci"]
|
||||||
|
results = []
|
||||||
|
for locus in loci:
|
||||||
|
results.append(path.basename(locus))
|
||||||
|
return results
|
||||||
|
|
||||||
async def close(self):
|
async def close(self):
|
||||||
await self._http_client.close()
|
await self._http_client.close()
|
||||||
@@ -200,7 +269,7 @@ class BIGSdbIndex(AbstractAsyncContextManager):
|
|||||||
async def __aexit__(self, exc_type, exc_value, traceback):
|
async def __aexit__(self, exc_type, exc_value, traceback):
|
||||||
await self.close()
|
await self.close()
|
||||||
|
|
||||||
def get_BIGSdb_MLST_profiler(local: bool, database_api: str, database_name: str, schema_id: int):
|
def get_BIGSdb_MLST_profiler(local: bool, database_api: str, database_name: str, scheme_id: int):
|
||||||
if local:
|
if local:
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
return RemoteBIGSdbMLSTProfiler(database_api=database_api, database_name=database_name, schema_id=schema_id)
|
return RemoteBIGSdbMLSTProfiler(database_api=database_api, database_name=database_name, scheme_id=scheme_id)
|
@@ -5,17 +5,21 @@ class BIGSDbDatabaseAPIException(Exception):
|
|||||||
|
|
||||||
|
|
||||||
class NoBIGSdbMatchesException(BIGSDbDatabaseAPIException):
|
class NoBIGSdbMatchesException(BIGSDbDatabaseAPIException):
|
||||||
def __init__(self, database_name: str, database_schema_id: int, *args):
|
def __init__(self, database_name: str, database_scheme_id: int, query_name: Union[None, str], *args):
|
||||||
super().__init__(f"No matches found with schema with ID {database_schema_id} in the database \"{database_name}\".", *args)
|
self._query_name = query_name
|
||||||
|
super().__init__(f"No matches found with scheme with ID {database_scheme_id} in the database \"{database_name}\".", *args)
|
||||||
|
|
||||||
|
def get_causal_query_name(self) -> Union[str, None]:
|
||||||
|
return self._query_name
|
||||||
|
|
||||||
class NoBIGSdbExactMatchesException(NoBIGSdbMatchesException):
|
class NoBIGSdbExactMatchesException(NoBIGSdbMatchesException):
|
||||||
def __init__(self, database_name: str, database_schema_id: int, *args):
|
def __init__(self, database_name: str, database_scheme_id: int, *args):
|
||||||
super().__init__(f"No exact match found with schema with ID {database_schema_id} in the database \"{database_name}\".", *args)
|
super().__init__(f"No exact match found with scheme with ID {database_scheme_id} in the database \"{database_name}\".", *args)
|
||||||
|
|
||||||
class NoSuchBIGSdbDatabaseException(BIGSDbDatabaseAPIException):
|
class NoSuchBIGSdbDatabaseException(BIGSDbDatabaseAPIException):
|
||||||
def __init__(self, database_name: str, *args):
|
def __init__(self, database_name: str, *args):
|
||||||
super().__init__(f"No database \"{database_name}\" found.", *args)
|
super().__init__(f"No database \"{database_name}\" found.", *args)
|
||||||
|
|
||||||
class NoSuchBigSdbSchemaException(BIGSDbDatabaseAPIException):
|
class NoSuchBigSdbschemeException(BIGSDbDatabaseAPIException):
|
||||||
def __init__(self, database_name: str, database_schema_id: int, *args):
|
def __init__(self, database_name: str, database_scheme_id: int, *args):
|
||||||
super().__init__(f"No schema with ID {database_schema_id} in \"{database_name}\" found.", *args)
|
super().__init__(f"No scheme with ID {database_scheme_id} in \"{database_name}\" found.", *args)
|
||||||
|
@@ -1,5 +1,6 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
from io import TextIOWrapper
|
from io import TextIOWrapper
|
||||||
|
from os import path
|
||||||
from typing import Any, AsyncGenerator, Iterable, Union
|
from typing import Any, AsyncGenerator, Iterable, Union
|
||||||
from Bio import SeqIO
|
from Bio import SeqIO
|
||||||
|
|
||||||
@@ -9,9 +10,12 @@ async def read_fasta(handle: Union[str, TextIOWrapper]) -> Iterable[NamedString]
|
|||||||
fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta")
|
fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta")
|
||||||
results = []
|
results = []
|
||||||
for fasta_sequence in await fasta_sequences:
|
for fasta_sequence in await fasta_sequences:
|
||||||
results.append(NamedString(fasta_sequence.id, str(fasta_sequence.seq)))
|
results.append(NamedString("{0}:{1}".format(path.basename(handle.name if isinstance(handle, TextIOWrapper) else handle), fasta_sequence.id), str(fasta_sequence.seq)))
|
||||||
return results
|
return results
|
||||||
|
|
||||||
async def read_multiple_fastas(handles: Iterable[Union[str, TextIOWrapper]]) -> AsyncGenerator[Iterable[NamedString], Any]:
|
async def read_multiple_fastas(handles: Iterable[Union[str, TextIOWrapper]]) -> AsyncGenerator[Iterable[NamedString], Any]:
|
||||||
|
tasks = []
|
||||||
for handle in handles:
|
for handle in handles:
|
||||||
yield await read_fasta(handle)
|
tasks.append(read_fasta(handle))
|
||||||
|
for task in asyncio.as_completed(tasks):
|
||||||
|
yield await task
|
@@ -25,7 +25,7 @@ class SangerTraceData(NamedString):
|
|||||||
analysis_proto_settings_name: str
|
analysis_proto_settings_name: str
|
||||||
analysis_rpto_settings_ver: str
|
analysis_rpto_settings_ver: str
|
||||||
analysis_proto_xml_data: str
|
analysis_proto_xml_data: str
|
||||||
analysis_proto_xml_schema_ver: str
|
analysis_proto_xml_scheme_ver: str
|
||||||
sample_comment: Union[None, str]
|
sample_comment: Union[None, str]
|
||||||
capillary_machine: bool
|
capillary_machine: bool
|
||||||
container_identifier: str
|
container_identifier: str
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
import csv
|
import csv
|
||||||
from os import PathLike
|
from os import PathLike
|
||||||
from typing import AsyncIterable, Collection, Mapping, Sequence, Union
|
from typing import AsyncIterable, Collection, Iterable, Mapping, Sequence, Union
|
||||||
|
|
||||||
from autobigs.engine.structures.mlst import Allele, MLSTProfile, NamedMLSTProfile
|
from autobigs.engine.structures.mlst import Allele, MLSTProfile, NamedMLSTProfile
|
||||||
|
|
||||||
@@ -17,7 +17,7 @@ def alleles_to_text_map(alleles: Collection[Allele]) -> Mapping[str, Union[Seque
|
|||||||
result[locus] = tuple(result[locus]) # type: ignore
|
result[locus] = tuple(result[locus]) # type: ignore
|
||||||
return dict(result)
|
return dict(result)
|
||||||
|
|
||||||
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[NamedMLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]) -> Sequence[str]:
|
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[NamedMLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]], allele_names: Iterable[str]) -> Sequence[str]:
|
||||||
failed = list()
|
failed = list()
|
||||||
with open(handle, "w", newline='') as filehandle:
|
with open(handle, "w", newline='') as filehandle:
|
||||||
header = None
|
header = None
|
||||||
@@ -30,7 +30,7 @@ async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[Named
|
|||||||
continue
|
continue
|
||||||
allele_mapping = alleles_to_text_map(mlst_profile.alleles)
|
allele_mapping = alleles_to_text_map(mlst_profile.alleles)
|
||||||
if writer is None:
|
if writer is None:
|
||||||
header = ["id", "st", "clonal-complex", *sorted(allele_mapping.keys())]
|
header = ["id", "st", "clonal-complex", *sorted(allele_names)]
|
||||||
writer = csv.DictWriter(filehandle, fieldnames=header)
|
writer = csv.DictWriter(filehandle, fieldnames=header)
|
||||||
writer.writeheader()
|
writer.writeheader()
|
||||||
row_dictionary = {
|
row_dictionary = {
|
||||||
|
@@ -7,7 +7,7 @@ import pytest
|
|||||||
from autobigs.engine.analysis import bigsdb
|
from autobigs.engine.analysis import bigsdb
|
||||||
from autobigs.engine.structures import mlst
|
from autobigs.engine.structures import mlst
|
||||||
from autobigs.engine.structures.genomics import NamedString
|
from autobigs.engine.structures.genomics import NamedString
|
||||||
from autobigs.engine.structures.mlst import Allele, MLSTProfile
|
from autobigs.engine.structures.mlst import Allele, MLSTProfile, NamedMLSTProfile
|
||||||
from autobigs.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException
|
from autobigs.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException
|
||||||
from autobigs.engine.analysis.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler, RemoteBIGSdbMLSTProfiler
|
from autobigs.engine.analysis.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler, RemoteBIGSdbMLSTProfiler
|
||||||
|
|
||||||
@@ -71,14 +71,14 @@ hinfluenzae_2014_102_bad_profile = MLSTProfile((
|
|||||||
), "unknown", "unknown")
|
), "unknown", "unknown")
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("local_db,database_api,database_name,schema_id,seq_path,feature_seqs_path,expected_profile,bad_profile", [
|
@pytest.mark.parametrize("local_db,database_api,database_name,scheme_id,seq_path,feature_seqs_path,expected_profile,bad_profile", [
|
||||||
(False, "https://bigsdb.pasteur.fr/api", "pubmlst_bordetella_seqdef", 3, "tohama_I_bpertussis.fasta", "tohama_I_bpertussis_features.fasta", bpertussis_tohamaI_profile, bpertussis_tohamaI_bad_profile),
|
(False, "https://bigsdb.pasteur.fr/api", "pubmlst_bordetella_seqdef", 3, "tohama_I_bpertussis.fasta", "tohama_I_bpertussis_features.fasta", bpertussis_tohamaI_profile, bpertussis_tohamaI_bad_profile),
|
||||||
(False, "https://rest.pubmlst.org", "pubmlst_hinfluenzae_seqdef", 1, "2014-102_hinfluenza.fasta", "2014-102_hinfluenza_features.fasta", hinfluenzae_2014_102_profile, hinfluenzae_2014_102_bad_profile),
|
(False, "https://rest.pubmlst.org", "pubmlst_hinfluenzae_seqdef", 1, "2014-102_hinfluenza.fasta", "2014-102_hinfluenza_features.fasta", hinfluenzae_2014_102_profile, hinfluenzae_2014_102_bad_profile),
|
||||||
])
|
])
|
||||||
class TestBIGSdbMLSTProfiler:
|
class TestBIGSdbMLSTProfiler:
|
||||||
async def test_profiling_results_in_exact_matches_when_exact(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
async def test_profiling_results_in_exact_matches_when_exact(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||||
sequence = get_first_sequence_from_fasta(seq_path)
|
sequence = get_first_sequence_from_fasta(seq_path)
|
||||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as dummy_profiler:
|
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as dummy_profiler:
|
||||||
expected_alleles = mlst.alleles_to_mapping(expected_profile.alleles)
|
expected_alleles = mlst.alleles_to_mapping(expected_profile.alleles)
|
||||||
targets_left = set(mlst.alleles_to_mapping(expected_profile.alleles).keys())
|
targets_left = set(mlst.alleles_to_mapping(expected_profile.alleles).keys())
|
||||||
async for exact_match in dummy_profiler.determine_mlst_allele_variants(query_sequence_strings=[sequence]):
|
async for exact_match in dummy_profiler.determine_mlst_allele_variants(query_sequence_strings=[sequence]):
|
||||||
@@ -89,10 +89,10 @@ class TestBIGSdbMLSTProfiler:
|
|||||||
|
|
||||||
assert len(targets_left) == 0
|
assert len(targets_left) == 0
|
||||||
|
|
||||||
async def test_sequence_profiling_non_exact_returns_non_exact(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
async def test_sequence_profiling_non_exact_returns_non_exact(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||||
target_sequences = get_multiple_sequences_from_fasta(feature_seqs_path)
|
target_sequences = get_multiple_sequences_from_fasta(feature_seqs_path)
|
||||||
mlst_targets = {x.lower() for x in mlst.alleles_to_mapping(expected_profile.alleles).keys()}
|
mlst_targets = {x.lower() for x in mlst.alleles_to_mapping(expected_profile.alleles).keys()}
|
||||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as profiler:
|
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as profiler:
|
||||||
for target_sequence in target_sequences:
|
for target_sequence in target_sequences:
|
||||||
match = re.fullmatch(r".*\[gene=([\w\d]+)\].*", target_sequence.description)
|
match = re.fullmatch(r".*\[gene=([\w\d]+)\].*", target_sequence.description)
|
||||||
if match is None:
|
if match is None:
|
||||||
@@ -102,42 +102,54 @@ class TestBIGSdbMLSTProfiler:
|
|||||||
continue
|
continue
|
||||||
scrambled = gene_scrambler(str(target_sequence.seq), 0.125)
|
scrambled = gene_scrambler(str(target_sequence.seq), 0.125)
|
||||||
async for partial_match in profiler.determine_mlst_allele_variants([scrambled]):
|
async for partial_match in profiler.determine_mlst_allele_variants([scrambled]):
|
||||||
|
assert isinstance(partial_match, Allele)
|
||||||
assert partial_match.partial_match_profile is not None
|
assert partial_match.partial_match_profile is not None
|
||||||
mlst_targets.remove(gene)
|
mlst_targets.remove(gene)
|
||||||
|
|
||||||
assert len(mlst_targets) == 0
|
assert len(mlst_targets) == 0
|
||||||
|
|
||||||
async def test_profiling_results_in_correct_mlst_st(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
async def test_profiling_results_in_correct_mlst_st(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as dummy_profiler:
|
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as dummy_profiler:
|
||||||
mlst_st_data = await dummy_profiler.determine_mlst_st(expected_profile.alleles)
|
mlst_st_data = await dummy_profiler.determine_mlst_st(expected_profile.alleles)
|
||||||
assert mlst_st_data is not None
|
assert mlst_st_data is not None
|
||||||
assert isinstance(mlst_st_data, MLSTProfile)
|
assert isinstance(mlst_st_data, MLSTProfile)
|
||||||
assert mlst_st_data.clonal_complex == expected_profile.clonal_complex
|
assert mlst_st_data.clonal_complex == expected_profile.clonal_complex
|
||||||
assert mlst_st_data.sequence_type == expected_profile.sequence_type
|
assert mlst_st_data.sequence_type == expected_profile.sequence_type
|
||||||
|
|
||||||
async def test_profiling_non_exact_results_in_list_of_mlsts(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
async def test_profiling_non_exact_results_in_list_of_mlsts(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||||
dummy_alleles = bad_profile.alleles
|
dummy_alleles = bad_profile.alleles
|
||||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as dummy_profiler:
|
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as dummy_profiler:
|
||||||
mlst_profile = await dummy_profiler.determine_mlst_st(dummy_alleles)
|
mlst_profile = await dummy_profiler.determine_mlst_st(dummy_alleles)
|
||||||
|
assert isinstance(mlst_profile, MLSTProfile)
|
||||||
assert mlst_profile.clonal_complex == "unknown"
|
assert mlst_profile.clonal_complex == "unknown"
|
||||||
assert mlst_profile.sequence_type == "unknown"
|
assert mlst_profile.sequence_type == "unknown"
|
||||||
|
|
||||||
|
|
||||||
async def test_bigsdb_profile_multiple_strings_same_string_twice(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
async def test_bigsdb_profile_multiple_strings_same_string_twice(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||||
sequence = get_first_sequence_from_fasta(seq_path)
|
sequence = get_first_sequence_from_fasta(seq_path)
|
||||||
dummy_sequences = [[NamedString("seq1", sequence)], [NamedString("seq2", sequence)]]
|
dummy_sequences = [[NamedString("seq1", sequence)], [NamedString("seq2", sequence)]]
|
||||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as dummy_profiler:
|
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as dummy_profiler:
|
||||||
async for named_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable(dummy_sequences)):
|
async for named_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable(dummy_sequences)):
|
||||||
name, profile = named_profile.name, named_profile.mlst_profile
|
name, profile = named_profile.name, named_profile.mlst_profile
|
||||||
assert profile is not None
|
|
||||||
assert isinstance(profile, MLSTProfile)
|
assert isinstance(profile, MLSTProfile)
|
||||||
assert profile.clonal_complex == expected_profile.clonal_complex
|
assert profile.clonal_complex == expected_profile.clonal_complex
|
||||||
assert profile.sequence_type == expected_profile.sequence_type
|
assert profile.sequence_type == expected_profile.sequence_type
|
||||||
|
|
||||||
async def test_bigsdb_profile_multiple_strings_exactmatch_fail_second_no_stop(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
async def test_bigsdb_profile_named_string_no_repeat_name(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||||
|
sequence = get_first_sequence_from_fasta(seq_path)
|
||||||
|
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as dummy_profiler:
|
||||||
|
named_profile = await dummy_profiler.profile_string([NamedString("BX470248.1", sequence)])
|
||||||
|
assert isinstance(named_profile, NamedMLSTProfile)
|
||||||
|
name, profile = named_profile.name, named_profile.mlst_profile
|
||||||
|
assert isinstance(profile, MLSTProfile)
|
||||||
|
assert profile.clonal_complex == expected_profile.clonal_complex
|
||||||
|
assert profile.sequence_type == expected_profile.sequence_type
|
||||||
|
assert name == "BX470248.1"
|
||||||
|
|
||||||
|
async def test_bigsdb_profile_multiple_strings_exactmatch_fail_second_no_stop(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||||
valid_seq = get_first_sequence_from_fasta(seq_path)
|
valid_seq = get_first_sequence_from_fasta(seq_path)
|
||||||
dummy_sequences = [[NamedString("seq1", valid_seq)], [NamedString("should_fail", gene_scrambler(valid_seq, 0.3))], [NamedString("seq3", valid_seq)]]
|
dummy_sequences = [[NamedString("seq1", valid_seq)], [NamedString("should_fail", gene_scrambler(valid_seq, 0.3))], [NamedString("seq3", valid_seq)]]
|
||||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as dummy_profiler:
|
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as dummy_profiler:
|
||||||
async for name_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable(dummy_sequences), True):
|
async for name_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable(dummy_sequences), True):
|
||||||
name, profile = name_profile.name, name_profile.mlst_profile
|
name, profile = name_profile.name, name_profile.mlst_profile
|
||||||
|
|
||||||
@@ -151,11 +163,11 @@ class TestBIGSdbMLSTProfiler:
|
|||||||
assert profile.clonal_complex == expected_profile.clonal_complex
|
assert profile.clonal_complex == expected_profile.clonal_complex
|
||||||
assert profile.sequence_type == expected_profile.sequence_type
|
assert profile.sequence_type == expected_profile.sequence_type
|
||||||
|
|
||||||
async def test_bigsdb_profile_multiple_strings_nonexact_second_no_stop(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
async def test_bigsdb_profile_multiple_strings_nonexact_second_no_stop(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||||
valid_seq = get_first_sequence_from_fasta(seq_path)
|
valid_seq = get_first_sequence_from_fasta(seq_path)
|
||||||
dummy_sequences = [[NamedString("seq1", valid_seq)], [NamedString("should_fail", gene_scrambler(valid_seq, 0.3))], [NamedString("seq3", valid_seq)]]
|
dummy_sequences = [[NamedString("seq1", valid_seq)], [NamedString("should_fail", gene_scrambler(valid_seq, 0.3))], [NamedString("seq3", valid_seq)]]
|
||||||
|
|
||||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as dummy_profiler:
|
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as dummy_profiler:
|
||||||
async for named_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable(dummy_sequences), False):
|
async for named_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable(dummy_sequences), False):
|
||||||
name, profile = named_profile.name, named_profile.mlst_profile
|
name, profile = named_profile.name, named_profile.mlst_profile
|
||||||
|
|
||||||
@@ -183,12 +195,12 @@ class TestBIGSdbIndex:
|
|||||||
async with BIGSdbIndex() as bigsdb_index:
|
async with BIGSdbIndex() as bigsdb_index:
|
||||||
assert (await bigsdb_index.get_bigsdb_api_from_seqdefdb("pubmlst_bordetella_seqdef")) == "https://bigsdb.pasteur.fr/api"
|
assert (await bigsdb_index.get_bigsdb_api_from_seqdefdb("pubmlst_bordetella_seqdef")) == "https://bigsdb.pasteur.fr/api"
|
||||||
|
|
||||||
async def test_bigsdb_index_get_schemas_for_bordetella(self):
|
async def test_bigsdb_index_get_schemes_for_bordetella(self):
|
||||||
async with BIGSdbIndex() as index:
|
async with BIGSdbIndex() as index:
|
||||||
schemas = await index.get_schemas_for_seqdefdb(seqdef_db_name="pubmlst_bordetella_seqdef")
|
schemes = await index.get_schemes_for_seqdefdb(seqdef_db_name="pubmlst_bordetella_seqdef")
|
||||||
assert len(schemas.keys()) > 0
|
assert len(schemes.keys()) > 0
|
||||||
assert "MLST" in schemas
|
assert "MLST" in schemes
|
||||||
assert isinstance(schemas["MLST"], int)
|
assert isinstance(schemes["MLST"], int)
|
||||||
|
|
||||||
async def test_bigsdb_index_get_databases_has_only_seqdef(self):
|
async def test_bigsdb_index_get_databases_has_only_seqdef(self):
|
||||||
async with BIGSdbIndex() as index:
|
async with BIGSdbIndex() as index:
|
||||||
@@ -207,5 +219,15 @@ class TestBIGSdbIndex:
|
|||||||
async with await bigsdb_index.build_profiler_from_seqdefdb(local, "pubmlst_bordetella_seqdef", 3) as profiler:
|
async with await bigsdb_index.build_profiler_from_seqdefdb(local, "pubmlst_bordetella_seqdef", 3) as profiler:
|
||||||
assert isinstance(profiler, BIGSdbMLSTProfiler)
|
assert isinstance(profiler, BIGSdbMLSTProfiler)
|
||||||
profile = await profiler.profile_string(sequence)
|
profile = await profiler.profile_string(sequence)
|
||||||
|
assert isinstance(profile, MLSTProfile)
|
||||||
assert profile.clonal_complex == "ST-2 complex"
|
assert profile.clonal_complex == "ST-2 complex"
|
||||||
assert profile.sequence_type == "1"
|
assert profile.sequence_type == "1"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(["bigsdb_name", "scheme_id", "expected"], [
|
||||||
|
("pubmlst_bordetella_seqdef", 3, ["adk", "fumC", "glyA", "tyrB", "icd", "pepA", "pgm"])
|
||||||
|
])
|
||||||
|
async def test_bigsdb_index_fetches_loci_names(self, bigsdb_name, scheme_id, expected):
|
||||||
|
async with BIGSdbIndex() as bigsdb_index:
|
||||||
|
loci = await bigsdb_index.get_scheme_loci(bigsdb_name, scheme_id)
|
||||||
|
assert set(loci) == set(expected)
|
@@ -4,4 +4,9 @@ from autobigs.engine.reading import read_fasta
|
|||||||
async def test_fasta_reader_not_none():
|
async def test_fasta_reader_not_none():
|
||||||
named_strings = await read_fasta("tests/resources/tohama_I_bpertussis.fasta")
|
named_strings = await read_fasta("tests/resources/tohama_I_bpertussis.fasta")
|
||||||
for named_string in named_strings:
|
for named_string in named_strings:
|
||||||
assert named_string.name == "BX470248.1"
|
assert named_string.name is not None
|
||||||
|
|
||||||
|
async def test_fasta_reader_name_contains_file_and_id():
|
||||||
|
named_strings = await read_fasta("tests/resources/tohama_I_bpertussis.fasta")
|
||||||
|
for named_string in named_strings:
|
||||||
|
assert named_string.name == "tohama_I_bpertussis.fasta:BX470248.1"
|
||||||
|
@@ -27,15 +27,28 @@ async def test_column_order_is_same_as_expected_file(dummy_alphabet_mlst_profile
|
|||||||
dummy_profiles = [dummy_alphabet_mlst_profile]
|
dummy_profiles = [dummy_alphabet_mlst_profile]
|
||||||
with tempfile.TemporaryDirectory() as temp_dir:
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
output_path = path.join(temp_dir, "out.csv")
|
output_path = path.join(temp_dir, "out.csv")
|
||||||
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path)
|
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path, ["A", "D", "B", "C"])
|
||||||
with open(output_path) as csv_handle:
|
with open(output_path) as csv_handle:
|
||||||
csv_reader = reader(csv_handle)
|
csv_reader = reader(csv_handle)
|
||||||
lines = list(csv_reader)
|
lines = list(csv_reader)
|
||||||
target_columns = lines[4:]
|
target_columns = lines[0][3:]
|
||||||
assert target_columns == sorted(target_columns)
|
assert target_columns == sorted(target_columns)
|
||||||
|
|
||||||
|
async def test_csv_writing_sample_name_not_repeated_when_single_sequence(dummy_alphabet_mlst_profile):
|
||||||
|
dummy_profiles = [dummy_alphabet_mlst_profile]
|
||||||
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
output_path = path.join(temp_dir, "out.csv")
|
||||||
|
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path, ["A", "D", "B", "C"])
|
||||||
|
with open(output_path) as csv_handle:
|
||||||
|
csv_reader = reader(csv_handle)
|
||||||
|
lines = list(csv_reader)
|
||||||
|
sample_name = lines[1][0]
|
||||||
|
assert sample_name == "name"
|
||||||
|
|
||||||
|
|
||||||
async def test_alleles_to_text_map_mapping_is_correct(dummy_alphabet_mlst_profile: NamedMLSTProfile):
|
async def test_alleles_to_text_map_mapping_is_correct(dummy_alphabet_mlst_profile: NamedMLSTProfile):
|
||||||
mapping = alleles_to_text_map(dummy_alphabet_mlst_profile.mlst_profile.alleles) # type: ignore
|
assert dummy_alphabet_mlst_profile.mlst_profile is not None
|
||||||
|
mapping = alleles_to_text_map(dummy_alphabet_mlst_profile.mlst_profile.alleles)
|
||||||
expected_mapping = {
|
expected_mapping = {
|
||||||
"A": "1",
|
"A": "1",
|
||||||
"B": "1",
|
"B": "1",
|
||||||
@@ -45,3 +58,13 @@ async def test_alleles_to_text_map_mapping_is_correct(dummy_alphabet_mlst_profil
|
|||||||
for allele_name, allele_ids in mapping.items():
|
for allele_name, allele_ids in mapping.items():
|
||||||
assert allele_name in expected_mapping
|
assert allele_name in expected_mapping
|
||||||
assert allele_ids == expected_mapping[allele_name]
|
assert allele_ids == expected_mapping[allele_name]
|
||||||
|
|
||||||
|
async def test_csv_writing_includes_asterisk_for_non_exact(dummy_alphabet_mlst_profile: NamedMLSTProfile):
|
||||||
|
dummy_profiles = [dummy_alphabet_mlst_profile]
|
||||||
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
output_path = path.join(temp_dir, "out.csv")
|
||||||
|
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path, ["A", "D", "B", "C"])
|
||||||
|
with open(output_path) as csv_handle:
|
||||||
|
csv_reader = reader(csv_handle)
|
||||||
|
lines = list(csv_reader)
|
||||||
|
assert '*' in lines[1][5]
|
133
tests/resources/B3913_bpertussis_minimized_features.fasta
Normal file
133
tests/resources/B3913_bpertussis_minimized_features.fasta
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
>lcl|CP011447.1_gene_2762 [gene=adk] [locus_tag=B3913_2762] [location=2916440..2917096] [gbkey=Gene]
|
||||||
|
ATGCGTCTCATTCTGCTCGGACCGCCCGGAGCCGGCAAAGGCACCCAAGCCGCCTTTCTCACCCAACACT
|
||||||
|
ACGGCATCCCGCAGATATCCACCGGTGACATGCTGCGCGCCGCCGTCAAGGCCGGCACGCCGCTGGGCCT
|
||||||
|
GGAAGCCAAGAAGGTCATGGACGCGGGCGGCCTGGTCTCGGACGACCTGATCATCGGCCTGGTGCGCGAT
|
||||||
|
CGCCTGACCCAGCCCGATTGCGCCAACGGCTACCTGTTCGACGGTTTCCCGCGCACCATCCCGCAGGCCG
|
||||||
|
ACGCGCTCAAGAGCGCCGGCATCGCGCTGGATTACGTGGTCGAGATCGAAGTGCCGGAAAGCGACATCAT
|
||||||
|
CGAACGCATGAGCGAACGCCGCGTGCACCCGGCCAGCGGCCGCAGCTACCACGTACGCTTCAATCCGCCC
|
||||||
|
AAGGCCGAAGGCGTGGACGACGTCACGGGCGAACCGCTGGTGCAGCGCGACGACGACCGCGAGGAAACCG
|
||||||
|
TGCGCCATCGTCTCAACGTCTACCAGAACCAGACCCGCCCGCTGGTCGACTACTACTCGTCCTGGGCCCA
|
||||||
|
GTCCGATGCCGCCGCGGCGCCCAAGTACCGCAAGATCTCCGGCGTCGGCTCGGTCGACGAAATCAAGAGC
|
||||||
|
CGCCTGTCGCAGGCTCTGCAGAGCTAA
|
||||||
|
>lcl|CP011447.1_gene_253 [gene=fumC] [locus_tag=B3913_0253] [location=257438..258829] [gbkey=Gene]
|
||||||
|
ATGAAAACCCGCACCGAAAAAGACACTTTCGGCCCGATCGAGGTGCCCGAGCAGCACCTGTGGGGCGCGC
|
||||||
|
AGACCCAGCGCTCGCTGCATTTCTTCGCGATCTCGACCGAGAAGATGCCGGTGCCGCTGGTCGCCGCCAT
|
||||||
|
GGCACGCCTGAAGCGCGCCGCCGCCAAGGTCAACGCCGAGCTGGGCGAGCTGGATCCGCAGGTCGCAGAC
|
||||||
|
GCCATCATGCGGGCCGCCGATGAGGTGATCGCCGGCAAGTGGCCCGACGAGTTTCCGCTGTCGGTCTGGC
|
||||||
|
AGACCGGCTCGGGCACGCAGAGCAACATGAACATGAACGAGGTGCTGGCCAACCGCGCCTCCGAGCTGCT
|
||||||
|
GGGCGGCGAGCGCGGCGAAGGCCGCAAGGTGCACCCCAACGACCACGTGAACCGGGGCCAGTCGTCCAAC
|
||||||
|
GATACCTTTCCGACCGCCATGCACGTGGCCGCCGCGGTCGAGGTCGAGCACCGCGTGCTGCCCGCCCTGA
|
||||||
|
AGGCGTTGCGCGGCACGCTGGCCGCCAAGAGCGCGGCGTTCTACGACATCGTCAAGATCGGTCGCACCCA
|
||||||
|
TTTGCAGGACGCCACCCCGTTGACGCTGGGCCAGGAGATCTCCGGCTACGTGGCGCAGCTGGACCTGGCC
|
||||||
|
GAGCAGCAGATCCGCGCGACGCTGGCCGGCCTGCACCAGCTGGCCATCGGCGGCACGGCGGTGGGCACCG
|
||||||
|
GCCTGAACGCGCATCCGCAGTTCAGCGCCAAGGTATCGGCCGAACTGGCCCATGACACGGGCAGCGCGTT
|
||||||
|
CGTGTCGGCGCCCAACAAGTTCCAGGCGCTGGCTTCGCACGAGGCGCTGCTGTTCGCGCACGGCGCCTTG
|
||||||
|
AAGACGCTGGCCGCCGGCCTGATGAAGATCGCCAACGATGTGCGCTGGCTGGCCAGCGGCCCGCGCTCGG
|
||||||
|
GGCTGGGCGAAATCAGCATTCCCGAGAACGAGCCGGGCAGCTCCATCATGCCGGGCAAGGTCAACCCGAC
|
||||||
|
CCAGTGCGAAGCCGTCACGATGCTGGCCGCGCAGGTCATGGGCAACGACGTGGCCATCAATGTCGGCGGG
|
||||||
|
GCCAGCGGCAACTTCGAGCTGAACGTCTTCAAGCCGCTGGTGATCCACAATTTCCTGCAGTCGGTGCGCC
|
||||||
|
TGCTGGCCGACGGCATGGTCAGCTTCGACAAGCACTGCGCGGCCGGCATCGAGCCCAACCGCGAGCGCAT
|
||||||
|
CACCGAGCTGGTCGAGCGTTCGCTGATGCTGGTGACTGCGCTCAACCCGCACATCGGCTACGACAAGGCC
|
||||||
|
GCGCAGATCGCCAAGAAGGCGCACAAGGAAAACCTGTCGCTGAAAGAGGCGGCGCTGGCGCTGGGGCACC
|
||||||
|
TGACCGAGGCGCAGTTCGCCGAGTGGGTGGTGCCGGGCGACATGACCAACGCGCGCCGCTAG
|
||||||
|
>lcl|CP011447.1_gene_2963 [gene=glyA] [locus_tag=B3913_2963] [location=complement(3129365..3130612)] [gbkey=Gene]
|
||||||
|
ATGTTCAACCGCAACCTGACCCTCGACCAGGTGGATCCCGACGTCTGGGCCGCCATCCAGAAAGAAGACG
|
||||||
|
TACGCCAGGAACAGCACATCGAGCTGATCGCGTCCGAGAACTACGCCAGCCCCGCCGTGATGCAGGCCCA
|
||||||
|
GGGCACGCAACTGACCAACAAGTATGCGGAAGGCTACCCGGGCAAGCGCTACTACGGCGGTTGCGAGTAC
|
||||||
|
GTCGACGTGGTCGAGCAGCTGGCCATCGACCGCCTGAAGCAGATTTTCGGCGCCGAGGCCGCCAACGTGC
|
||||||
|
AGCCGAACTCCGGCTCGCAGGCCAACCAGGGCGTGTACATGGCGGTGCTCAAGCCGGGCGATACCGTGCT
|
||||||
|
GGGCATGAGCCTGGCCGAAGGCGGTCACCTGACGCACGGCGCGTCGGTCAACGCCTCGGGCAAGCTGTAC
|
||||||
|
AACTTCGTGCCCTACGGCCTGGACGCCGACGAGGTGCTGGACTACGCCCAGGTCGAGCGGCTGACCAAGG
|
||||||
|
AACACAAGCCCAAGCTGATCGTGGCCGGCGCCTCCGCGTACGCGCTGCACATCGACTTCGAGCGCATGGC
|
||||||
|
GCGCATCGCCCACGACAACGGCGCGCTGTTCATGGTGGACATCGCCCACTATGCCGGCCTGGTGGCCGGC
|
||||||
|
GGCGCCTATCCCAACCCGGTGCCGCACGCCGATTTCGTCACCTCCACCACGCACAAGTCGCTGCGCGGCC
|
||||||
|
CGCGCGGCGGCGTCATCATGATGAAGGCCGAGTTCGAGAAGGCCGTCAATTCGGCCATCTTCCCGGGCAT
|
||||||
|
CCAGGGCGGTCCGCTGATGCACGTCATCGCGGCCAAGGCCGTGGCCTTCAAGGAAGCGCTGTCGCCCGAG
|
||||||
|
TTCCAGGATTACGCCCAGCAGGTCGTCAAGAACGCCAAGGTGCTGGCCGATACGCTGGTCAAGCGCGGCC
|
||||||
|
TGCGCATCGTGTCGGGCAGGACCGAAAGCCACGTCATGCTGGTGGACCTGCGTCCCAAGGGCATTACCGG
|
||||||
|
CAAGGAAGCGGAAGCGGTGCTGGGCCAGGCCCACATCACGGTCAACAAGAACGCCATTCCCAACGACCCG
|
||||||
|
GAAAAGCCCTTCGTGACCAGCGGCATCCGCCTGGGCACTCCGGCCATGACCACCCGCGGCTTCAAGGAGG
|
||||||
|
CCGAGGCCGAGCTGACCGCCAACCTGATCGCCGACGTGCTGGACAATCCGCGCGACGAGGCGAACATCGC
|
||||||
|
CGCGGTGCGCGCGCGGGTCAATGAACTGACCGCCCGCCTGCCCGTCTACGGCAACTGA
|
||||||
|
>lcl|CP011447.1_gene_2473 [gene=icd] [locus_tag=B3913_2473] [location=complement(2605674..2606930)] [gbkey=Gene]
|
||||||
|
ATGTCCTATCAACATATCAAGGTTCCCACTGGGGGCCAAAAAATCACGGTCAACGCCGATTACTCGCTGA
|
||||||
|
ATGTGCCCGATCAGGTCATCATTCCGGTCATCGAGGGTGACGGTACGGGCGCCGACATCACGCCGGTGAT
|
||||||
|
GATTAAGGTCGTCGACGCGGCCGTGCAGAAGGCCTATGCGGGCAAGCGCAAGATCCACTGGATGGAAGTC
|
||||||
|
TACGCCGGCGAGAAGGCCACCAAGGTCTACGGCCCGGACGTCTGGCTGCCCGAGGAAACCCTCGACGCCG
|
||||||
|
TCAAGGACTACGTGGTGTCGATCAAGGGTCCGCTGACCACGCCGGTCGGCGGCGGCATCCGTTCGCTGAA
|
||||||
|
CGTGGCGCTGCGCCAGCAGCTGGACCTGTATGTCTGCCTGCGCCCGGTGCGCTACTTCAAGGGCGTGCCC
|
||||||
|
TCGCCGGTGCGCGAGCCCGAGAAGACCGACATGGTCATCTTCCGCGAGAACTCGGAAGACATCTACGCGG
|
||||||
|
GCATCGAGTACATGGCCGAGTCCGAGCAGGCCAAGGACCTGATCCAGTACCTGCAGACCAAGCTGGGCGT
|
||||||
|
GACCAAGATCCGCTTCCCGAACACCTCGTCGATCGGCATCAAGCCGGTTTCGCGCGAAGGCACCGAGCGC
|
||||||
|
CTGGTGCGCAAGGCGCTGCAGTACGCCATCGACAATGACCGCGCCTCGGTGACCCTGGTCCACAAGGGCA
|
||||||
|
ACATCATGAAGTTCACGGAAGGCGGCTTCCGCGACTGGGGCTACGCCCTGGCCCAGAACGAGTTCGGCGC
|
||||||
|
GCAGCCGATCGACGGCGGCCCGTGGTGCAAGTTCAAGAATCCCAAGACGGGTCGCGAGATCATCGTCAAG
|
||||||
|
GATTCGATCGCCGACGCCTTCCTGCAGCAGATCCTGCTGCGTCCGGCCGAATACGACGTGATCGCCACGC
|
||||||
|
TGAACCTGAACGGCGACTACATCTCCGACGCGCTGGCCGCGCAAGTGGGCGGCATCGGCATTGCCCCGGG
|
||||||
|
CGCCAACCTGTCGGATTCCGTGGCCATGTTCGAAGCCACCCACGGCACCGCGCCGAAGTACGCGGGCAAG
|
||||||
|
GACTACGTGAACCCCGGTTCCGAAATCCTGTCGGCCGAAATGATGCTGCGCCACATGGGCTGGACCGAGG
|
||||||
|
CCGCCGACCTGATCATCGCCAGCATGGAGAAATCCATCCTGTCCAAGAAGGTCACCTATGACTTCGCCCG
|
||||||
|
TCTGCTCGAAGGCGCCACCCAGGTGTCGTGCTCGGGCTTCGGTCAGGTCATGATCGACAATATGTAA
|
||||||
|
>lcl|CP011447.1_gene_2403 [gene=pepA] [locus_tag=B3913_2403] [location=2531836..2533335] [gbkey=Gene]
|
||||||
|
ATGGAATTTAGCACACAGACCACTGCCTCCCTGCATCAGATCAAGACTGCGGCCCTGGCCGTCGGCGTCT
|
||||||
|
TCGCCGACGGCGTGCTCAGCGCCGCCGCCGAAGTCATCGACCGCGCCAGCCACGGTGCCGTGGCCGCCGT
|
||||||
|
GGTGAAAAGCGAGTTCCGCGGCCGCACCGGCAGCACGCTGGTGCTGCGCAGCCTGGCCGGCGTCAGCGCC
|
||||||
|
CAGCGCGTGGTGCTGGTGGGCCTGGGCAAGCAGGCCGAATACAACGCCCGCGCGCACGCCAGCGCCGAAC
|
||||||
|
AGGCGTTCGCCGCGGCGTGCGTCGCGGCCCAGGTGGGCGAAGGCGTGTCGACCCTGGCCGGCGTGGCCAT
|
||||||
|
CGAGGGCGTGCCGGTGCGCGCCCGCGCGCGCAGCGCCGCCATCGCCGCGGGCGCGGCGGCCTACCATTAC
|
||||||
|
GATGCGACGTTCGGCAAGGCCAATCGCGACGCCCGCCCCAGGTTGAAGAAAATCGTCCAGGTGGTCGACC
|
||||||
|
GCGCGGCCTCCGCGCAGGCGCAGCTGGGCCTGCGCGAAGGCGCGGCCATCGCCCACGGCATGGAATTGAC
|
||||||
|
CCGCACGCTGGGCAACCTGCCCGGCAACGTGTGCACGCCGGCCTATCTCGGCAATACCGCCAAGAAACTG
|
||||||
|
GCGCGCGAATTCAAGAGCCTCAAGGTCGAGGTGCTCGAACGCAAGCAGGTCGAGGCGCTGGGCATGGGCT
|
||||||
|
CGTTCCTCTCGGTCGCGCGCGGCTCGGAAGAACCGCTGCGCTTCATCGTGCTGCGCCATGCCGGCAAGCC
|
||||||
|
CGCCAAGAAGGACAAGGCCGGCCCGGTCGTCCTGGTGGGCAAGGGCATCACCTTCGATGCTGGCGGCATC
|
||||||
|
TCGCTCAAGCCGGCCGCCACGATGGACGAAATGAAGTACGACATGTGCGGCGCGGCCAGCGTGCTGGGCA
|
||||||
|
CGTTCCGCGCCCTGGCCGAGCTGGAGCTGCCGCTGGATGTGGTGGGCCTGATCGCGGCGTGCGAGAACCT
|
||||||
|
GCCCAGCGGCAAGGCCAACAAGCCCGGCGACGTGGTCACCAGCATGTCGGGCCAGACCATCGAGATCCTC
|
||||||
|
AACACCGACGCCGAAGGCCGCCTGGTGCTGTGCGATGCCCTGACCTACGCCGAGCGCTTCAAGCCCGCGG
|
||||||
|
CCGTGATCGACATCGCCACGTTGACCGGCGCCTGCGTGGTAGCCCTGGGCAACGTCAATAGCGGCCTGTT
|
||||||
|
CTCCAAGGACGACGCGCTGGCCGACGCGCTGCTGGCCGCCAGCCGCCAGTCGCTCGACCCGGCCTGGCGC
|
||||||
|
CTGCCGCTGGACGATGCCTACCAGGACCAGCTCAAGTCCAACTTCGCCGACATCGCCAACATCGGCGGCC
|
||||||
|
CCCCGGCCGGCGCGGTCACGGCGGCCTGCTTCCTGTCGCGCTTCACCAAGGCTTATCCGTGGGCGCACCT
|
||||||
|
GGACATCGCCGGCACGGCCTGGCGCGGCGGCAAGGACAAGGGCGCCACCGGCCGGCCGGTGCCGCTGCTG
|
||||||
|
ATGCAGTACCTGCTGGACCAGGCAGGCTGA
|
||||||
|
>lcl|CP011447.1_gene_3165 [gene=pgm] [locus_tag=B3913_3165] [location=3355021..3356403] [gbkey=Gene]
|
||||||
|
GTGGCGCACCCCTTTCCCGCATCGGTCTACAAGGCGTACGACATCCGTGGCTCGGTTCCCGACCAGCTCG
|
||||||
|
ACCCGGTATTCGCCCGGGCGCTGGGCCGCGCCCTGGCCGCCAGCGCCCGCGCGCAGGGCATCGGCGCCCT
|
||||||
|
GGTGGTCGGCCGCGACGGCCGCCTGAGCAGCCCCGACCTGGCCGGCGCGCTGCAGGAAGGCATCATGGAA
|
||||||
|
GGCGGCGTGGACACCCTGGACATCGGCCAGGTGCCCACGCCGCTGGTCTATTTCGCGGCGCACATCCAGG
|
||||||
|
GCACGGGCTCGGGCGTGGCGGTCACCGGCAGCCACAACCCGCCGCAGTACAACGGCTTCAAGATGATGAT
|
||||||
|
GGGCGGCCAGGCCCTGTACGGCCCGGCCGTGCAGGCGCTGCGCCCGGCCATGCTGGCGCCGGCTGCGGCG
|
||||||
|
CCGGGCACCTGGGGCGAACGCCGCCAGCTCGATGTCGTCCCCGCCTATATCGAGCGCATCGTGTCCGACG
|
||||||
|
TGAAGCTGGCGCGCCCCATGAAGATCGCCGTCGACTGCGGCAATGGCGTGGCCGGCGCCCTGGCGCCGCA
|
||||||
|
ACTGTTCCGCGCGCTGGGTTGCGAAGTGGACGAGCTCTATTGCGAGGTCGACGGCACGTTTCCCAACCAC
|
||||||
|
CATCCCGACCCGGCCGAACCGCGCAACCTGCAGGACCTGATCGCCCATGTCACCAGCACCGACTGCGAGC
|
||||||
|
TGGGCCTGGCCTTCGACGGCGACGGCGACCGCCTCGGCGTGGTGACCAAGTCCGGCCAGATCATCTGGCC
|
||||||
|
CGACCGCCAGCTGATCCTGTTCGCCCGCGACGTGCTGGCCCGCTGTCCCGGCGCGACCATCATCTATGAC
|
||||||
|
GTCAAGTGCAGCCAGCACGTGGGCGTGGCCATCGAGCAAAGCGGCGGCGTGCCGCTGATGTGGCAGACTG
|
||||||
|
GCCATTCGCTGGTGAAGGCCAAGCTGGCCGAGACCGGCGCGCCGCTGGCCGGCGAGATGAGCGGCCATAT
|
||||||
|
CTTCTTCAAGGAGCGCTGGTACGGCTTCGACGACGGCCTGTACACCGGCGCCCGCCTGCTGGAAATCGTC
|
||||||
|
TCCCGCGAAACCGATGCGTCGCGCCCGCTGGAGGCCCTGCCGCAGGCGCTGTCGACCCCCGAGCTCAAGC
|
||||||
|
TGGAGATGGCCGAGGGCGAGCCGCATGCGCTGATCGCCGCCCTGCAGCAGCAGGGCGAGTTCGCCAGCGC
|
||||||
|
CAGCCGGCTGGTTACGATAGACGGCGTGCGCGCGGAATACCCGGACGGCTTCGGGCTGGCGCGCGCCTCC
|
||||||
|
AATACCACCCCCGTCGTCGTGCTGCGCTTCGAAGCGGAGACCGAGCCGGGCCTGGCCCGCATCCAGCAGG
|
||||||
|
AATTCCGCCAGCAGCTGCTGCGGCTGGCTCCGCAAGCCAAACTGCCCTTCTGA
|
||||||
|
>lcl|CP011447.1_gene_2110 [gene=tyrB] [locus_tag=B3913_2110] [location=2214524..2215726] [gbkey=Gene]
|
||||||
|
ATGAGCACTCTTTTCGCTTCCGTCGAACTCGCGCCGCGCGACCCCATTCTTGGCCTGAACGAACAGTACA
|
||||||
|
ACGCCGATACCCGTCCCGGCAAAGTGAACCTGGGCGTGGGCGTGTACTACGACGACGAAGGCCGCATCCC
|
||||||
|
GCTGCTTCAGGCCGTGCGCAAGGCCGAGGTGGCCCGCATCGAAGCCGCCGCCGCCCGCGGCTATCTGCCG
|
||||||
|
ATCGAAGGCATCGCGGGGTACAACAAGGGTGCGCAGGCGCTGCTGCTGGGCGCCGACTCGCCGCTGGCCG
|
||||||
|
CCGAAGGCCGCGTGCTGACCGCGCAGGCCCTGGGCGGCACCGGCGCGCTGAAGATCGGCGCCGACTTCCT
|
||||||
|
GCGCCAGCTGCTGCCGCAGTCCAAGGTCCTCATCAGCGACCCCAGCTGGGAAAACCACCGCGCCCTGTTC
|
||||||
|
GAGCGCGCCGGCTTCCCGGTCGAGACCTACGCTTATTACGATGCCGCCACCCATGGCCTGAACTTCGAAG
|
||||||
|
CCATGCTGGCCGCCCTGCAGGCCGCGCCCGAACAGACCATCGTGGTGCTGCACGCCTGCTGCCACAACCC
|
||||||
|
GACCGGCGTCGATCCCACGCCGCAACAGTGGGAACAGATCGCCGCCGTGGTCAAGGCGCGCAACCTGGTG
|
||||||
|
CCGTTCCTCGACATCGCCTACCAGGGCTTCGGCGAAGGCCTGGAGCAGGACGCCGCCGTGGTGCGCATGT
|
||||||
|
TCGCCGAGCTCGACCTGACCATGTTCATCAGCTCGTCGTTCTCCAAGTCCTTCTCGCTGTATGGCGAGCG
|
||||||
|
GGTCGGGGCCCTGACCGTGGTGGCCGGCAGCAAGGACGAGGCCGCCCGCGTGCTCAGCCAGCTCAAGCGC
|
||||||
|
GTGATCCGCACCAACTACTCCAACCCGCCCACCCACGGCGGCACCGTGGTGTCCACGGTCCTGAACACAC
|
||||||
|
CCGAGCTGTTCGCGCTCTGGGAAAATGAACTGGCCGGCATGCGCGACCGCATCCGCCTGATGCGCAAGGA
|
||||||
|
GCTGGTCGAGAAGATCAAGACCCAGGGCGTGGCGCAGGACTTCAGCTTCGTGCTGGCGCAGCGCGGCATG
|
||||||
|
TTCTCGTACTCGGGCCTGACCGCCGCCCAGGTCGATCGCCTGCGCGAAGAGCACGGCATCTACGCGGTCT
|
||||||
|
CCAGCGGCCGCATCTGCGTGGCCGCGCTCAACAGCCGCAACATCGACGCGGTCGCGGCCGGCATCGCCGC
|
||||||
|
GGTGCTGAAGTAG
|
133
tests/resources/B3921_bpertussis_minimized_features.fasta
Normal file
133
tests/resources/B3921_bpertussis_minimized_features.fasta
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
>lcl|CP011448.1_cds_ALH77808.1_2459 [gene=adk] [locus_tag=B3921_2764] [protein=adenylate kinase] [protein_id=ALH77808.1] [location=2918521..2919177] [gbkey=CDS]
|
||||||
|
ATGCGTCTCATTCTGCTCGGACCGCCCGGAGCCGGCAAAGGCACCCAAGCCGCCTTTCTCACCCAACACT
|
||||||
|
ACGGCATCCCGCAGATATCCACCGGTGACATGCTGCGCGCCGCCGTCAAGGCCGGCACGCCGCTGGGCCT
|
||||||
|
GGAAGCCAAGAAGGTCATGGACGCGGGCGGCCTGGTCTCGGACGACCTGATCATCGGCCTGGTGCGCGAT
|
||||||
|
CGCCTGACCCAGCCCGATTGCGCCAACGGCTACCTGTTCGACGGTTTCCCGCGCACCATCCCGCAGGCCG
|
||||||
|
ACGCGCTCAAGAGCGCCGGCATCGCGCTGGATTACGTGGTCGAGATCGAAGTGCCGGAAAGCGACATCAT
|
||||||
|
CGAACGCATGAGCGAACGCCGCGTGCACCCGGCCAGCGGCCGCAGCTACCACGTACGCTTCAATCCGCCC
|
||||||
|
AAGGCCGAAGGCGTGGACGACGTCACGGGCGAACCGCTGGTGCAGCGCGACGACGACCGCGAGGAAACCG
|
||||||
|
TGCGCCATCGTCTCAACGTCTACCAGAACCAGACCCGCCCGCTGGTCGACTACTACTCGTCCTGGGCCCA
|
||||||
|
GTCCGATGCCGCCGCGGCGCCCAAGTACCGCAAGATCTCCGGCGTCGGCTCGGTCGACGAAATCAAGAGC
|
||||||
|
CGCCTGTCGCAGGCTCTGCAGAGCTAA
|
||||||
|
>lcl|CP011448.1_cds_ALH75563.1_214 [gene=fumC] [locus_tag=B3921_0253] [protein=fumarate hydratase] [protein_id=ALH75563.1] [location=257428..258819] [gbkey=CDS]
|
||||||
|
ATGAAAACCCGCACCGAAAAAGACACTTTCGGCCCGATCGAGGTGCCCGAGCAGCACCTGTGGGGCGCGC
|
||||||
|
AGACCCAGCGCTCGCTGCATTTCTTCGCGATCTCGACCGAGAAGATGCCGGTGCCGCTGGTCGCCGCCAT
|
||||||
|
GGCACGCCTGAAGCGCGCCGCCGCCAAGGTCAACGCCGAGCTGGGCGAGCTGGATCCGCAGGTCGCAGAC
|
||||||
|
GCCATCATGCGGGCCGCCGATGAGGTGATCGCCGGCAAGTGGCCCGACGAGTTTCCGCTGTCGGTCTGGC
|
||||||
|
AGACCGGCTCGGGCACGCAGAGCAACATGAACATGAACGAGGTGCTGGCCAACCGCGCCTCCGAGCTGCT
|
||||||
|
GGGCGGCGAGCGCGGCGAAGGCCGCAAGGTGCACCCCAACGACCACGTGAACCGGGGCCAGTCGTCCAAC
|
||||||
|
GATACCTTTCCGACCGCCATGCACGTGGCCGCCGCGGTCGAGGTCGAGCACCGCGTGCTGCCCGCCCTGA
|
||||||
|
AGGCGTTGCGCGGCACGCTGGCCGCCAAGAGCGCGGCGTTCTACGACATCGTCAAGATCGGTCGCACCCA
|
||||||
|
TTTGCAGGACGCCACCCCGTTGACGCTGGGCCAGGAGATCTCCGGCTACGTGGCGCAGCTGGACCTGGCC
|
||||||
|
GAGCAGCAGATCCGCGCGACGCTGGCCGGCCTGCACCAGCTGGCCATCGGCGGCACGGCGGTGGGCACCG
|
||||||
|
GCCTGAACGCGCATCCGCAGTTCAGCGCCAAGGTATCGGCCGAACTGGCCCATGACACGGGCAGCGCGTT
|
||||||
|
CGTGTCGGCGCCCAACAAGTTCCAGGCGCTGGCTTCGCACGAGGCGCTGCTGTTCGCGCACGGCGCCTTG
|
||||||
|
AAGACGCTGGCCGCCGGCCTGATGAAGATCGCCAACGATGTGCGCTGGCTGGCCAGCGGCCCGCGCTCGG
|
||||||
|
GGCTGGGCGAAATCAGCATTCCCGAGAACGAGCCGGGCAGCTCCATCATGCCGGGCAAGGTCAACCCGAC
|
||||||
|
CCAGTGCGAAGCCGTCACGATGCTGGCCGCGCAGGTCATGGGCAACGACGTGGCCATCAATGTCGGCGGG
|
||||||
|
GCCAGCGGCAACTTCGAGCTGAACGTCTTCAAGCCGCTGGTGATCCACAATTTCCTGCAGTCGGTGCGCC
|
||||||
|
TGCTGGCCGACGGCATGGTCAGCTTCGACAAGCACTGCGCGGCCGGCATCGAGCCCAACCGCGAGCGCAT
|
||||||
|
CACCGAGCTGGTCGAGCGTTCGCTGATGCTGGTGACTGCGCTCAACCCGCACATCGGCTACGACAAGGCC
|
||||||
|
GCGCAGATCGCCAAGAAGGCGCACAAGGAAAACCTGTCGCTGAAAGAGGCGGCGCTGGCGCTGGGGCACC
|
||||||
|
TGACCGAGGCGCAGTTCGCCGAGTGGGTGGTGCCGGGCGACATGACCAACGCGCGCCGCTAG
|
||||||
|
>lcl|CP011448.1_cds_ALH77981.1_2632 [gene=glyA] [locus_tag=B3921_2965] [protein=serine hydroxymethyltransferase] [protein_id=ALH77981.1] [location=complement(3131372..3132619)] [gbkey=CDS]
|
||||||
|
ATGTTCAACCGCAACCTGACCCTCGACCAGGTGGATCCCGACGTCTGGGCCGCCATCCAGAAAGAAGACG
|
||||||
|
TACGCCAGGAACAGCACATCGAGCTGATCGCGTCCGAGAACTACGCCAGCCCCGCCGTGATGCAGGCCCA
|
||||||
|
GGGCACGCAACTGACCAACAAGTATGCGGAAGGCTACCCGGGCAAGCGCTACTACGGCGGTTGCGAGTAC
|
||||||
|
GTCGACGTGGTCGAGCAGCTGGCCATCGACCGCCTGAAGCAGATTTTCGGCGCCGAGGCCGCCAACGTGC
|
||||||
|
AGCCGAACTCCGGCTCGCAGGCCAACCAGGGCGTGTACATGGCGGTGCTCAAGCCGGGCGATACCGTGCT
|
||||||
|
GGGCATGAGCCTGGCCGAAGGCGGTCACCTGACGCACGGCGCGTCGGTCAACGCCTCGGGCAAGCTGTAC
|
||||||
|
AACTTCGTGCCCTACGGCCTGGACGCCGACGAGGTGCTGGACTACGCCCAGGTCGAGCGGCTGACCAAGG
|
||||||
|
AACACAAGCCCAAGCTGATCGTGGCCGGCGCCTCCGCGTACGCGCTGCACATCGACTTCGAGCGCATGGC
|
||||||
|
GCGCATCGCCCACGACAACGGCGCGCTGTTCATGGTGGACATCGCCCACTATGCCGGCCTGGTGGCCGGC
|
||||||
|
GGCGCCTATCCCAACCCGGTGCCGCACGCCGATTTCGTCACCTCCACCACGCACAAGTCGCTGCGCGGCC
|
||||||
|
CGCGCGGCGGCGTCATCATGATGAAGGCCGAGTTCGAGAAGGCCGTCAATTCGGCCATCTTCCCGGGCAT
|
||||||
|
CCAGGGCGGTCCGCTGATGCACGTCATCGCGGCCAAGGCCGTGGCCTTCAAGGAAGCGCTGTCGCCCGAG
|
||||||
|
TTCCAGGATTACGCCCAGCAGGTCGTCAAGAACGCCAAGGTGCTGGCCGATACGCTGGTCAAGCGCGGCC
|
||||||
|
TGCGCATCGTGTCGGGCAGGACCGAAAGCCACGTCATGCTGGTGGACCTGCGTCCCAAGGGCATTACCGG
|
||||||
|
CAAGGAAGCGGAAGCGGTGCTGGGCCAGGCCCACATCACGGTCAACAAGAACGCCATTCCCAACGACCCG
|
||||||
|
GAAAAGCCCTTCGTGACCAGCGGCATCCGCCTGGGCACTCCGGCCATGACCACCCGCGGCTTCAAGGAGG
|
||||||
|
CCGAGGCCGAGCTGACCGCCAACCTGATCGCCGACGTGCTGGACAATCCGCGCGACGAGGCGAACATCGC
|
||||||
|
CGCGGTGCGCGCGCGGGTCAATGAACTGACCGCCCGCCTGCCCGTCTACGGCAACTGA
|
||||||
|
>lcl|CP011448.1_cds_ALH77547.1_2198 [gene=icd] [locus_tag=B3921_2474] [protein=isocitrate dehydrogenase] [protein_id=ALH77547.1] [location=complement(2606706..2607962)] [gbkey=CDS]
|
||||||
|
ATGTCCTATCAACATATCAAGGTTCCCACTGGGGGCCAAAAAATCACGGTCAACGCCGATTACTCGCTGA
|
||||||
|
ATGTGCCCGATCAGGTCATCATTCCGGTCATCGAGGGTGACGGTACGGGCGCCGACATCACGCCGGTGAT
|
||||||
|
GATTAAGGTCGTCGACGCGGCCGTGCAGAAGGCCTATGCGGGCAAGCGCAAGATCCACTGGATGGAAGTC
|
||||||
|
TACGCCGGCGAGAAGGCCACCAAGGTCTACGGCCCGGACGTCTGGCTGCCCGAGGAAACCCTCGACGCCG
|
||||||
|
TCAAGGACTACGTGGTGTCGATCAAGGGTCCGCTGACCACGCCGGTCGGCGGCGGCATCCGTTCGCTGAA
|
||||||
|
CGTGGCGCTGCGCCAGCAGCTGGACCTGTATGTCTGCCTGCGCCCGGTGCGCTACTTCAAGGGCGTGCCC
|
||||||
|
TCGCCGGTGCGCGAGCCCGAGAAGACCGACATGGTCATCTTCCGCGAGAACTCGGAAGACATCTACGCGG
|
||||||
|
GCATCGAGTACATGGCCGAGTCCGAGCAGGCCAAGGACCTGATCCAGTACCTGCAGACCAAGCTGGGCGT
|
||||||
|
GACCAAGATCCGCTTCCCGAACACCTCGTCGATCGGCATCAAGCCGGTTTCGCGCGAAGGCACCGAGCGC
|
||||||
|
CTGGTGCGCAAGGCGCTGCAGTACGCCATCGACAATGACCGCGCCTCGGTGACCCTGGTCCACAAGGGCA
|
||||||
|
ACATCATGAAGTTCACGGAAGGCGGCTTCCGCGACTGGGGCTACGCCCTGGCCCAGAACGAGTTCGGCGC
|
||||||
|
GCAGCCGATCGACGGCGGCCCGTGGTGCAAGTTCAAGAATCCCAAGACGGGTCGCGAGATCATCGTCAAG
|
||||||
|
GATTCGATCGCCGACGCCTTCCTGCAGCAGATCCTGCTGCGTCCGGCCGAATACGACGTGATCGCCACGC
|
||||||
|
TGAACCTGAACGGCGACTACATCTCCGACGCGCTGGCCGCGCAAGTGGGCGGCATCGGCATTGCCCCGGG
|
||||||
|
CGCCAACCTGTCGGATTCCGTGGCCATGTTCGAAGCCACCCACGGCACCGCGCCGAAGTACGCGGGCAAG
|
||||||
|
GACTACGTGAACCCCGGTTCCGAAATCCTGTCGGCCGAAATGATGCTGCGCCACATGGGCTGGACCGAGG
|
||||||
|
CCGCCGACCTGATCATCGCCAGCATGGAGAAATCCATCCTGTCCAAGAAGGTCACCTATGACTTCGCCCG
|
||||||
|
TCTGCTCGAAGGCGCCACCCAGGTGTCGTGCTCGGGCTTCGGTCAGGTCATGATCGACAATATGTAA
|
||||||
|
>lcl|CP011448.1_cds_ALH77480.1_2131 [gene=pepA] [locus_tag=B3921_2404] [protein=leucyl aminopeptidase] [protein_id=ALH77480.1] [location=2532868..2534367] [gbkey=CDS]
|
||||||
|
ATGGAATTTAGCACACAGACCACTGCCTCCCTGCATCAGATCAAGACTGCGGCCCTGGCCGTCGGCGTCT
|
||||||
|
TCGCCGACGGCGTGCTCAGCGCCGCCGCCGAAGTCATCGACCGCGCCAGCCACGGTGCCGTGGCCGCCGT
|
||||||
|
GGTGAAAAGCGAGTTCCGCGGCCGCACCGGCAGCACGCTGGTGCTGCGCAGCCTGGCCGGCGTCAGCGCC
|
||||||
|
CAGCGCGTGGTGCTGGTGGGCCTGGGCAAGCAGGCCGAATACAACGCCCGCGCGCACGCCAGCGCCGAAC
|
||||||
|
AGGCGTTCGCCGCGGCGTGCGTCGCGGCCCAGGTGGGCGAAGGCGTGTCGACCCTGGCCGGCGTGGCCAT
|
||||||
|
CGAGGGCGTGCCGGTGCGCGCCCGCGCGCGCAGCGCCGCCATCGCCGCGGGCGCGGCGGCCTACCATTAC
|
||||||
|
GATGCGACGTTCGGCAAGGCCAATCGCGACGCCCGCCCCAGGTTGAAGAAAATCGTCCAGGTGGTCGACC
|
||||||
|
GCGCGGCCTCCGCGCAGGCGCAGCTGGGCCTGCGCGAAGGCGCGGCCATCGCCCACGGCATGGAATTGAC
|
||||||
|
CCGCACGCTGGGCAACCTGCCCGGCAACGTGTGCACGCCGGCCTATCTCGGCAATACCGCCAAGAAACTG
|
||||||
|
GCGCGCGAATTCAAGAGCCTCAAGGTCGAGGTGCTCGAACGCAAGCAGGTCGAGGCGCTGGGCATGGGCT
|
||||||
|
CGTTCCTCTCGGTCGCGCGCGGCTCGGAAGAACCGCTGCGCTTCATCGTGCTGCGCCATGCCGGCAAGCC
|
||||||
|
CGCCAAGAAGGACAAGGCCGGCCCGGTCGTCCTGGTGGGCAAGGGCATCACCTTCGATGCTGGCGGCATC
|
||||||
|
TCGCTCAAGCCGGCCGCCACGATGGACGAAATGAAGTACGACATGTGCGGCGCGGCCAGCGTGCTGGGCA
|
||||||
|
CGTTCCGCGCCCTGGCCGAGCTGGAGCTGCCGCTGGATGTGGTGGGCCTGATCGCGGCGTGCGAGAACCT
|
||||||
|
GCCCAGCGGCAAGGCCAACAAGCCCGGCGACGTGGTCACCAGCATGTCGGGCCAGACCATCGAGATCCTC
|
||||||
|
AACACCGACGCCGAAGGCCGCCTGGTGCTGTGCGATGCCCTGACCTACGCCGAGCGCTTCAAGCCCGCGG
|
||||||
|
CCGTGATCGACATCGCCACGTTGACCGGCGCCTGCGTGGTAGCCCTGGGCAACGTCAATAGCGGCCTGTT
|
||||||
|
CTCCAAGGACGACGCGCTGGCCGACGCGCTGCTGGCCGCCAGCCGCCAGTCGCTCGACCCGGCCTGGCGC
|
||||||
|
CTGCCGCTGGACGATGCCTACCAGGACCAGCTCAAGTCCAACTTCGCCGACATCGCCAACATCGGCGGCC
|
||||||
|
CCCCGGCCGGCGCGGTCACGGCGGCCTGCTTCCTGTCGCGCTTCACCAAGGCTTATCCGTGGGCGCACCT
|
||||||
|
GGACATCGCCGGCACGGCCTGGCGCGGCGGCAAGGACAAGGGCGCCACCGGCCGGCCGGTGCCGCTGCTG
|
||||||
|
ATGCAGTACCTGCTGGACCAGGCAGGCTGA
|
||||||
|
>lcl|CP011448.1_cds_ALH78163.1_2814 [gene=pgm] [locus_tag=B3921_3166] [protein=phosphoglucomutase] [protein_id=ALH78163.1] [location=3355979..3357361] [gbkey=CDS]
|
||||||
|
GTGGCGCACCCCTTTCCCGCATCGGTCTACAAGGCGTACGACATCCGTGGCTCGGTTCCCGACCAGCTCG
|
||||||
|
ACCCGGTATTCGCCCGGGCGCTGGGCCGCGCCCTGGCCGCCAGCGCCCGCGCGCAGGGCATCGGCGCCCT
|
||||||
|
GGTGGTCGGCCGCGACGGCCGCCTGAGCAGCCCCGACCTGGCCGGCGCGCTGCAGGAAGGCATCATGGAA
|
||||||
|
GGCGGCGTGGACACCCTGGACATCGGCCAGGTGCCCACGCCGCTGGTCTATTTCGCGGCGCACATCCAGG
|
||||||
|
GCACGGGCTCGGGCGTGGCGGTCACCGGCAGCCACAACCCGCCGCAGTACAACGGCTTCAAGATGATGAT
|
||||||
|
GGGCGGCCAGGCCCTGTACGGCCCGGCCGTGCAGGCGCTGCGCCCGGCCATGCTGGCGCCGGCTGCGGCG
|
||||||
|
CCGGGCACCTGGGGCGAACGCCGCCAGCTCGATGTCGTCCCCGCCTATATCGAGCGCATCGTGTCCGACG
|
||||||
|
TGAAGCTGGCGCGCCCCATGAAGATCGCCGTCGACTGCGGCAATGGCGTGGCCGGCGCCCTGGCGCCGCA
|
||||||
|
ACTGTTCCGCGCGCTGGGTTGCGAAGTGGACGAGCTCTATTGCGAGGTCGACGGCACGTTTCCCAACCAC
|
||||||
|
CATCCCGACCCGGCCGAACCGCGCAACCTGCAGGACCTGATCGCCCATGTCACCAGCACCGACTGCGAGC
|
||||||
|
TGGGCCTGGCCTTCGACGGCGACGGCGACCGCCTCGGCGTGGTGACCAAGTCCGGCCAGATCATCTGGCC
|
||||||
|
CGACCGCCAGCTGATCCTGTTCGCCCGCGACGTGCTGGCCCGCTGTCCCGGCGCGACCATCATCTATGAC
|
||||||
|
GTCAAGTGCAGCCAGCACGTGGGCGTGGCCATCGAGCAAAGCGGCGGCGTGCCGCTGATGTGGCAGACTG
|
||||||
|
GCCATTCGCTGGTGAAGGCCAAGCTGGCCGAGACCGGCGCGCCGCTGGCCGGCGAGATGAGCGGCCATAT
|
||||||
|
CTTCTTCAAGGAGCGCTGGTACGGCTTCGACGACGGCCTGTACACCGGCGCCCGCCTGCTGGAAATCGTC
|
||||||
|
TCCCGCGAAACCGATGCGTCGCGCCCGCTGGAGGCCCTGCCGCAGGCGCTGTCGACCCCCGAGCTCAAGC
|
||||||
|
TGGAGATGGCCGAGGGCGAGCCGCATGCGCTGATCGCCGCCCTGCAGCAGCAGGGCGAGTTCGCCAGCGC
|
||||||
|
CAGCCGGCTGGTTACGATAGACGGCGTGCGCGCGGAATACCCGGACGGCTTCGGGCTGGCGCGCGCCTCC
|
||||||
|
AATACCACCCCCGTCGTCGTGCTGCGCTTCGAAGCGGAGACCGAGCCGGGCCTGGCCCGCATCCAGCAGG
|
||||||
|
AATTCCGCCAGCAGCTGCTGCGGCTGGCTCCGCAAGCCAAACTGCCCTTCTGA
|
||||||
|
>lcl|CP011448.1_cds_ALH77215.1_1866 [gene=tyrB] [locus_tag=B3921_2112] [protein=aromatic amino acid aminotransferase] [protein_id=ALH77215.1] [location=2216606..2217808] [gbkey=CDS]
|
||||||
|
ATGAGCACTCTTTTCGCTTCCGTCGAACTCGCGCCGCGCGACCCCATTCTTGGCCTGAACGAACAGTACA
|
||||||
|
ACGCCGATACCCGTCCCGGCAAAGTGAACCTGGGCGTGGGCGTGTACTACGACGACGAAGGCCGCATCCC
|
||||||
|
GCTGCTTCAGGCCGTGCGCAAGGCCGAGGTGGCCCGCATCGAAGCCGCCGCCGCCCGCGGCTATCTGCCG
|
||||||
|
ATCGAAGGCATCGCGGGGTACAACAAGGGTGCGCAGGCGCTGCTGCTGGGCGCCGACTCGCCGCTGGCCG
|
||||||
|
CCGAAGGCCGCGTGCTGACCGCGCAGGCCCTGGGCGGCACCGGCGCGCTGAAGATCGGCGCCGACTTCCT
|
||||||
|
GCGCCAGCTGCTGCCGCAGTCCAAGGTCCTCATCAGCGACCCCAGCTGGGAAAACCACCGCGCCCTGTTC
|
||||||
|
GAGCGCGCCGGCTTCCCGGTCGAGACCTACGCTTATTACGATGCCGCCACCCATGGCCTGAACTTCGAAG
|
||||||
|
CCATGCTGGCCGCCCTGCAGGCCGCGCCCGAACAGACCATCGTGGTGCTGCACGCCTGCTGCCACAACCC
|
||||||
|
GACCGGCGTCGATCCCACGCCGCAACAGTGGGAACAGATCGCCGCCGTGGTCAAGGCGCGCAACCTGGTG
|
||||||
|
CCGTTCCTCGACATCGCCTACCAGGGCTTCGGCGAAGGCCTGGAGCAGGACGCCGCCGTGGTGCGCATGT
|
||||||
|
TCGCCGAGCTCGACCTGACCATGTTCATCAGCTCGTCGTTCTCCAAGTCCTTCTCGCTGTATGGCGAGCG
|
||||||
|
GGTCGGGGCCCTGACCGTGGTGGCCGGCAGCAAGGACGAGGCCGCCCGCGTGCTCAGCCAGCTCAAGCGC
|
||||||
|
GTGATCCGCACCAACTACTCCAACCCGCCCACCCACGGCGGCACCGTGGTGTCCACGGTCCTGAACACAC
|
||||||
|
CCGAGCTGTTCGCGCTCTGGGAAAATGAACTGGCCGGCATGCGCGACCGCATCCGCCTGATGCGCAAGGA
|
||||||
|
GCTGGTCGAGAAGATCAAGACCCAGGGCGTGGCGCAGGACTTCAGCTTCGTGCTGGCGCAGCGCGGCATG
|
||||||
|
TTCTCGTACTCGGGCCTGACCGCCGCCCAGGTCGATCGCCTGCGCGAAGAGCACGGCATCTACGCGGTCT
|
||||||
|
CCAGCGGCCGCATCTGCGTGGCCGCGCTCAACAGCCGCAACATCGACGCGGTCGCGGCCGGCATCGCCGC
|
||||||
|
GGTGCTGAAGTAG
|
Reference in New Issue
Block a user