Compare commits
24 Commits
fd536862e2
...
0.14.1
Author | SHA1 | Date | |
---|---|---|---|
14d760a266 | |||
f8d92a4aad | |||
34bf02c75a | |||
3cb10a4609 | |||
1776f5aa51 | |||
96d715fdcb | |||
e088d1080b | |||
8ffc7c7fb5 | |||
af7edf0942 | |||
481870db97 | |||
62fdada9c1 | |||
3074997db6 | |||
3d6f36a722 | |||
bbd9e67c8c | |||
2ea2f63f29 | |||
17932ecd71 | |||
6cdc4ff4ae | |||
4b34036d17 | |||
27ae89fde7 | |||
06dbb56c28 | |||
79fcce8b84 | |||
f4064f087e | |||
276665f5fd | |||
62ce1c9b2f |
3
.vscode/extensions.json
vendored
3
.vscode/extensions.json
vendored
@@ -1,5 +1,6 @@
|
||||
{
|
||||
"recommendations": [
|
||||
"piotrpalarz.vscode-gitignore-generator"
|
||||
"piotrpalarz.vscode-gitignore-generator",
|
||||
"gruntfuggly.todo-tree"
|
||||
]
|
||||
}
|
25
.vscode/launch.json
vendored
25
.vscode/launch.json
vendored
@@ -1,25 +0,0 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
|
||||
{
|
||||
"name": "autobigs info -lschema pubmlst_bordetella_seqdef",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/src/autobigs/cli/program.py",
|
||||
"console": "integratedTerminal",
|
||||
"args": [
|
||||
"info",
|
||||
"-lschemas",
|
||||
"pubmlst_bordetella_seqdef"
|
||||
],
|
||||
"cwd": "${workspaceFolder}/src",
|
||||
"env": {
|
||||
"PYTHONPATH": "${workspaceFolder}/src"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
10
Jenkinsfile
vendored
10
Jenkinsfile
vendored
@@ -35,17 +35,23 @@ pipeline {
|
||||
stage("publish") {
|
||||
parallel {
|
||||
stage ("git.reslate.systems") {
|
||||
when {
|
||||
not {
|
||||
tag '*.*.*'
|
||||
}
|
||||
}
|
||||
|
||||
environment {
|
||||
CREDS = credentials('username-password-rs-git')
|
||||
}
|
||||
steps {
|
||||
sh 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*'
|
||||
sh 'curl --user ${CREDS_USR}:${CRED_PSW} --upload-file conda-bld/**/*.conda https://git.reslate.systems/api/packages/${CRED_USR}/conda/$(basename conda-bld/**/*.conda)'
|
||||
sh 'curl --user ${CREDS_USR}:${CREDS_PSW} --upload-file conda-bld/**/*.conda https://git.reslate.systems/api/packages/${CREDS_USR}/conda/$(basename conda-bld/**/*.conda)'
|
||||
}
|
||||
}
|
||||
stage ("pypi.org") {
|
||||
when {
|
||||
tag '*.*'
|
||||
tag '*.*.*'
|
||||
}
|
||||
environment {
|
||||
TOKEN = credentials('pypi.org')
|
||||
|
19
README.md
19
README.md
@@ -1,13 +1,14 @@
|
||||
# autoBIGS.Engine
|
||||
# autoBIGS.engine
|
||||
|
||||
A python library implementing common BIGSdb MLST schemes and databases accesses for the purpose of typing sequences automatically. Implementation follows the RESTful API outlined by the official [BIGSdb documentation](https://bigsdb.readthedocs.io/en/latest/rest.html) up to `V1.50.0`.
|
||||
|
||||
|
||||
## Features
|
||||
|
||||
Briefly, this library can:
|
||||
- Import multiple `FASTA` files
|
||||
- Fetch the available BIGSdb databases that is currently live and available
|
||||
- Fetch the available BIGSdb database schemas for a given MLST database
|
||||
- Fetch the available BIGSdb database schemes for a given MLST database
|
||||
- Retrieve exact/non-exact MLST allele variant IDs based off a sequence
|
||||
- Retrieve MLST sequence type IDs based off a sequence
|
||||
- Output all results to a single CSV
|
||||
@@ -22,4 +23,16 @@ Then, it's as easy as running `pip install autobigs-engine` in any terminal that
|
||||
|
||||
### CLI usage
|
||||
|
||||
This is a independent python library and thus does not have any form of direct user interface. One way of using it could be to create your own Python script that makes calls to this libraries functions. Alternatively, you may use `autobigs-cli`, a `Python` package that implements a CLI for calling this library.
|
||||
This is a independent python library and thus does not have any form of direct user interface. One way of using it could be to create your own Python script that makes calls to this libraries functions. Alternatively, you may use `autobigs-cli`, a `Python` package that implements a CLI for calling this library.
|
||||
|
||||
## Versioning
|
||||
|
||||
the autoBIGS project follows [semantic versioning](https://semver.org/) where the three numbers may be interpreted as MAJOR.MINOR.PATCH.
|
||||
|
||||
Note regarding major version 0 ([spec item 4](https://semver.org/#spec-item-4)), the following adaptation of semantic versioning definition is as follows:
|
||||
|
||||
1. Given x.Y.z, Y is only incremented when a backwards incompatible change is made.
|
||||
|
||||
2. Given x.y.Z, Z is only incremented when a backwards compatible change is made.
|
||||
|
||||
Versions of autoBIGS items with a major version number of 0 will introduce numerous changes and patches. As such, changes between such versions should be considered highly variable.
|
@@ -1,44 +0,0 @@
|
||||
{% set name = "autoBIGS.engine" %}
|
||||
{% set version = "0.12.1.dev1+gb8cebb8.d20250221" %}
|
||||
|
||||
package:
|
||||
name: {{ name|lower|replace(".", "-") }}
|
||||
version: {{ version }}
|
||||
|
||||
source:
|
||||
url: file:///workspaces/autoBIGS.engine/dist/autobigs_engine-0.12.1.dev1%2Bgb8cebb8.d20250221.tar.gz
|
||||
sha256: c86441b94f935cfa414ff28ca4c026a070e0fb15988ea3bb7d1a942859a09b16
|
||||
|
||||
build:
|
||||
noarch: python
|
||||
script: {{ PYTHON }} -m pip install . -vv --no-deps --no-build-isolation
|
||||
number: 0
|
||||
run_exports:
|
||||
- {{ pin_subpackage( name|lower|replace(".", "-"), max_pin="x.x") }}
|
||||
requirements:
|
||||
host:
|
||||
- python >=3.12
|
||||
- setuptools >=64
|
||||
- setuptools-scm >=8
|
||||
- pip
|
||||
run:
|
||||
- python >=3.12
|
||||
- biopython ==1.85
|
||||
- aiohttp ==3.11.*
|
||||
|
||||
test:
|
||||
imports:
|
||||
- autobigs
|
||||
commands:
|
||||
- pip check
|
||||
requires:
|
||||
- pip
|
||||
|
||||
about:
|
||||
summary: A library to rapidly fetch fetch MLST profiles given sequences for various diseases.
|
||||
license: GPL-3.0-or-later
|
||||
license_file: LICENSE
|
||||
home: https://github.com/Syph-and-VPD-Lab/autoBIGS.engine
|
||||
extra:
|
||||
recipe-maintainers:
|
||||
- Harrison Deng
|
@@ -12,4 +12,5 @@ dependencies:
|
||||
- twine==6.0.1
|
||||
- setuptools_scm
|
||||
- pytest-cov
|
||||
- grayskull
|
||||
- grayskull
|
||||
- curl
|
@@ -7,9 +7,9 @@ from os import path
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
from typing import Any, AsyncGenerator, AsyncIterable, Iterable, Mapping, Sequence, Set, Union
|
||||
from typing import Any, AsyncGenerator, AsyncIterable, Coroutine, Iterable, Mapping, Sequence, Set, Union
|
||||
|
||||
from aiohttp import ClientSession, ClientTimeout
|
||||
from aiohttp import ClientOSError, ClientSession, ClientTimeout, ServerDisconnectedError
|
||||
|
||||
from autobigs.engine.reading import read_fasta
|
||||
from autobigs.engine.structures.alignment import PairwiseAlignment
|
||||
@@ -43,11 +43,13 @@ class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
|
||||
|
||||
class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
|
||||
|
||||
def __init__(self, database_api: str, database_name: str, schema_id: int):
|
||||
def __init__(self, database_api: str, database_name: str, scheme_id: int, retry_requests: int = 5, timeout: int = 600):
|
||||
self._timeout = timeout
|
||||
self._retry_limit = retry_requests
|
||||
self._database_name = database_name
|
||||
self._schema_id = schema_id
|
||||
self._base_url = f"{database_api}/db/{self._database_name}/schemes/{self._schema_id}/"
|
||||
self._http_client = ClientSession(self._base_url, timeout=ClientTimeout(60))
|
||||
self._scheme_id = scheme_id
|
||||
self._base_url = f"{database_api}/db/{self._database_name}/schemes/{self._scheme_id}/"
|
||||
self._http_client = ClientSession(self._base_url, timeout=ClientTimeout(timeout))
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
@@ -57,40 +59,59 @@ class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
|
||||
uri_path = "sequence"
|
||||
if isinstance(query_sequence_strings, str) or isinstance(query_sequence_strings, NamedString):
|
||||
query_sequence_strings = [query_sequence_strings]
|
||||
|
||||
for sequence_string in query_sequence_strings:
|
||||
async with self._http_client.post(uri_path, json={
|
||||
"sequence": sequence_string if isinstance(sequence_string, str) else sequence_string.sequence,
|
||||
"partial_matches": True
|
||||
}) as response:
|
||||
sequence_response: dict = await response.json()
|
||||
attempts = 0
|
||||
success = False
|
||||
last_error = None
|
||||
while not success and attempts < self._retry_limit:
|
||||
attempts += 1
|
||||
request = self._http_client.post(uri_path, json={
|
||||
"sequence": sequence_string if isinstance(sequence_string, str) else sequence_string.sequence,
|
||||
"partial_matches": True
|
||||
})
|
||||
try:
|
||||
async with request as response:
|
||||
sequence_response: dict = await response.json()
|
||||
|
||||
if "exact_matches" in sequence_response:
|
||||
# loci -> list of alleles with id and loci
|
||||
exact_matches: dict[str, Sequence[dict[str, str]]] = sequence_response["exact_matches"]
|
||||
for allele_loci, alleles in exact_matches.items():
|
||||
for allele in alleles:
|
||||
alelle_id = allele["allele_id"]
|
||||
result_allele = Allele(allele_locus=allele_loci, allele_variant=alelle_id, partial_match_profile=None)
|
||||
yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
|
||||
elif "partial_matches" in sequence_response:
|
||||
partial_matches: dict[str, dict[str, Union[str, float, int]]] = sequence_response["partial_matches"]
|
||||
for allele_loci, partial_match in partial_matches.items():
|
||||
if len(partial_match) <= 0:
|
||||
continue
|
||||
partial_match_profile = AlignmentStats(
|
||||
percent_identity=float(partial_match["identity"]),
|
||||
mismatches=int(partial_match["mismatches"]),
|
||||
gaps=int(partial_match["gaps"]),
|
||||
match_metric=int(partial_match["bitscore"])
|
||||
)
|
||||
result_allele = Allele(
|
||||
allele_locus=allele_loci,
|
||||
allele_variant=str(partial_match["allele"]),
|
||||
partial_match_profile=partial_match_profile
|
||||
)
|
||||
yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
|
||||
if "exact_matches" in sequence_response:
|
||||
# loci -> list of alleles with id and loci
|
||||
exact_matches: dict[str, Sequence[dict[str, str]]] = sequence_response["exact_matches"]
|
||||
for allele_loci, alleles in exact_matches.items():
|
||||
for allele in alleles:
|
||||
alelle_id = allele["allele_id"]
|
||||
result_allele = Allele(allele_locus=allele_loci, allele_variant=alelle_id, partial_match_profile=None)
|
||||
yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
|
||||
elif "partial_matches" in sequence_response:
|
||||
partial_matches: dict[str, dict[str, Union[str, float, int]]] = sequence_response["partial_matches"]
|
||||
for allele_loci, partial_match in partial_matches.items():
|
||||
if len(partial_match) <= 0:
|
||||
continue
|
||||
partial_match_profile = AlignmentStats(
|
||||
percent_identity=float(partial_match["identity"]),
|
||||
mismatches=int(partial_match["mismatches"]),
|
||||
gaps=int(partial_match["gaps"]),
|
||||
match_metric=int(partial_match["bitscore"])
|
||||
)
|
||||
result_allele = Allele(
|
||||
allele_locus=allele_loci,
|
||||
allele_variant=str(partial_match["allele"]),
|
||||
partial_match_profile=partial_match_profile
|
||||
)
|
||||
yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
|
||||
else:
|
||||
raise NoBIGSdbMatchesException(self._database_name, self._scheme_id, sequence_string.name if isinstance(sequence_string, NamedString) else None)
|
||||
except (ConnectionError, ServerDisconnectedError, ClientOSError) as e: # Errors we will retry
|
||||
last_error = e
|
||||
success = False
|
||||
await asyncio.sleep(5) # In case the connection issue is due to rate issues
|
||||
else:
|
||||
raise NoBIGSdbMatchesException(self._database_name, self._schema_id, sequence_string.name if isinstance(sequence_string, NamedString) else None)
|
||||
success = True
|
||||
if not success and last_error is not None:
|
||||
try:
|
||||
raise last_error
|
||||
except (ConnectionError, ServerDisconnectedError, ClientOSError) as e: # Non-fatal errors
|
||||
yield Allele("error", "error", None)
|
||||
|
||||
async def determine_mlst_st(self, alleles: Union[AsyncIterable[Union[Allele, tuple[str, Allele]]], Iterable[Union[Allele, tuple[str, Allele]]]]) -> Union[MLSTProfile, NamedMLSTProfile]:
|
||||
uri_path = "designations"
|
||||
@@ -113,42 +134,66 @@ class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
|
||||
request_json = {
|
||||
"designations": allele_request_dict
|
||||
}
|
||||
async with self._http_client.post(uri_path, json=request_json) as response:
|
||||
response_json: dict = await response.json()
|
||||
allele_set: Set[Allele] = set()
|
||||
response_json.setdefault("fields", dict())
|
||||
schema_fields_returned: dict[str, str] = response_json["fields"]
|
||||
schema_fields_returned.setdefault("ST", "unknown")
|
||||
schema_fields_returned.setdefault("clonal_complex", "unknown")
|
||||
schema_exact_matches: dict = response_json["exact_matches"]
|
||||
for exact_match_locus, exact_match_alleles in schema_exact_matches.items():
|
||||
allele_set.add(Allele(exact_match_locus, exact_match_alleles[0]["allele_id"], None))
|
||||
if len(allele_set) == 0:
|
||||
raise ValueError("Passed in no alleles.")
|
||||
result_mlst_profile = MLSTProfile(allele_set, schema_fields_returned["ST"], schema_fields_returned["clonal_complex"])
|
||||
if len(names_list) > 0:
|
||||
result_mlst_profile = NamedMLSTProfile(str(tuple(names_list)), result_mlst_profile)
|
||||
return result_mlst_profile
|
||||
|
||||
attempts = 0
|
||||
success = False
|
||||
last_error = None
|
||||
while attempts < self._retry_limit and not success:
|
||||
attempts += 1
|
||||
try:
|
||||
async with self._http_client.post(uri_path, json=request_json) as response:
|
||||
response_json: dict = await response.json()
|
||||
allele_set: Set[Allele] = set()
|
||||
response_json.setdefault("fields", dict())
|
||||
scheme_fields_returned: dict[str, str] = response_json["fields"]
|
||||
scheme_fields_returned.setdefault("ST", "unknown")
|
||||
scheme_fields_returned.setdefault("clonal_complex", "unknown")
|
||||
scheme_exact_matches: dict = response_json["exact_matches"]
|
||||
for exact_match_locus, exact_match_alleles in scheme_exact_matches.items():
|
||||
allele_set.add(Allele(exact_match_locus, exact_match_alleles[0]["allele_id"], None))
|
||||
if len(allele_set) == 0:
|
||||
raise ValueError("Passed in no alleles.")
|
||||
result_mlst_profile = MLSTProfile(allele_set, scheme_fields_returned["ST"], scheme_fields_returned["clonal_complex"])
|
||||
if len(names_list) > 0:
|
||||
result_mlst_profile = NamedMLSTProfile(str(tuple(names_list)) if len(set(names_list)) > 1 else names_list[0], result_mlst_profile)
|
||||
return result_mlst_profile
|
||||
except (ConnectionError, ServerDisconnectedError, ClientOSError) as e:
|
||||
last_error = e
|
||||
success = False
|
||||
await asyncio.sleep(5)
|
||||
else:
|
||||
success = True
|
||||
try:
|
||||
if last_error is not None:
|
||||
raise last_error
|
||||
except (ConnectionError, ServerDisconnectedError, ClientOSError) as e:
|
||||
result_mlst_profile = NamedMLSTProfile((str(tuple(names_list)) if len(set(names_list)) > 1 else names_list[0]) + ":Error", None)
|
||||
raise ValueError("Last error was not recorded.")
|
||||
|
||||
|
||||
async def profile_string(self, query_sequence_strings: Iterable[Union[NamedString, str]]) -> Union[NamedMLSTProfile, MLSTProfile]:
|
||||
alleles = self.determine_mlst_allele_variants(query_sequence_strings)
|
||||
return await self.determine_mlst_st(alleles)
|
||||
|
||||
async def profile_multiple_strings(self, query_named_string_groups: AsyncIterable[Iterable[NamedString]], stop_on_fail: bool = False) -> AsyncGenerator[NamedMLSTProfile, Any]:
|
||||
tasks = []
|
||||
tasks: list[Coroutine[Any, Any, Union[NamedMLSTProfile, MLSTProfile]]] = []
|
||||
async for named_strings in query_named_string_groups:
|
||||
tasks.append(self.profile_string(named_strings))
|
||||
for task in asyncio.as_completed(tasks):
|
||||
try:
|
||||
yield await task
|
||||
except NoBIGSdbMatchesException as e:
|
||||
if stop_on_fail:
|
||||
raise e
|
||||
causal_name = e.get_causal_query_name()
|
||||
if causal_name is None:
|
||||
raise ValueError("Missing query name despite requiring names.")
|
||||
else:
|
||||
yield NamedMLSTProfile(causal_name, None)
|
||||
for task in asyncio.as_completed(tasks):
|
||||
named_mlst_profile = await task
|
||||
try:
|
||||
if isinstance(named_mlst_profile, NamedMLSTProfile):
|
||||
yield named_mlst_profile
|
||||
else:
|
||||
raise TypeError("MLST profile is not named.")
|
||||
except NoBIGSdbMatchesException as e:
|
||||
if stop_on_fail:
|
||||
raise e
|
||||
causal_name = e.get_causal_query_name()
|
||||
if causal_name is None:
|
||||
raise ValueError("Missing query name despite requiring names.")
|
||||
else:
|
||||
yield NamedMLSTProfile(causal_name, None)
|
||||
|
||||
async def close(self):
|
||||
await self._http_client.close()
|
||||
@@ -165,7 +210,7 @@ class BIGSdbIndex(AbstractAsyncContextManager):
|
||||
def __init__(self):
|
||||
self._http_client = ClientSession()
|
||||
self._known_seqdef_dbs_origin: Union[Mapping[str, str], None] = None
|
||||
self._seqdefdb_schemas: dict[str, Union[Mapping[str, int], None]] = dict()
|
||||
self._seqdefdb_schemes: dict[str, Union[Mapping[str, int], None]] = dict()
|
||||
super().__init__()
|
||||
|
||||
async def __aenter__(self):
|
||||
@@ -191,22 +236,32 @@ class BIGSdbIndex(AbstractAsyncContextManager):
|
||||
raise NoSuchBIGSdbDatabaseException(seqdef_db_name)
|
||||
return known_databases[seqdef_db_name]
|
||||
|
||||
async def get_schemas_for_seqdefdb(self, seqdef_db_name: str, force: bool = False) -> Mapping[str, int]:
|
||||
if seqdef_db_name in self._seqdefdb_schemas and not force:
|
||||
return self._seqdefdb_schemas[seqdef_db_name] # type: ignore since it's guaranteed to not be none by conditional
|
||||
async def get_schemes_for_seqdefdb(self, seqdef_db_name: str, force: bool = False) -> Mapping[str, int]:
|
||||
if seqdef_db_name in self._seqdefdb_schemes and not force:
|
||||
return self._seqdefdb_schemes[seqdef_db_name] # type: ignore since it's guaranteed to not be none by conditional
|
||||
uri_path = f"{await self.get_bigsdb_api_from_seqdefdb(seqdef_db_name)}/db/{seqdef_db_name}/schemes"
|
||||
async with self._http_client.get(uri_path) as response:
|
||||
response_json = await response.json()
|
||||
schema_descriptions: Mapping[str, int] = dict()
|
||||
scheme_descriptions: Mapping[str, int] = dict()
|
||||
for scheme_definition in response_json["schemes"]:
|
||||
scheme_id: int = int(str(scheme_definition["scheme"]).split("/")[-1])
|
||||
scheme_desc: str = scheme_definition["description"]
|
||||
schema_descriptions[scheme_desc] = scheme_id
|
||||
self._seqdefdb_schemas[seqdef_db_name] = schema_descriptions
|
||||
return self._seqdefdb_schemas[seqdef_db_name] # type: ignore
|
||||
scheme_descriptions[scheme_desc] = scheme_id
|
||||
self._seqdefdb_schemes[seqdef_db_name] = scheme_descriptions
|
||||
return self._seqdefdb_schemes[seqdef_db_name] # type: ignore
|
||||
|
||||
async def build_profiler_from_seqdefdb(self, local: bool, dbseqdef_name: str, schema_id: int) -> BIGSdbMLSTProfiler:
|
||||
return get_BIGSdb_MLST_profiler(local, await self.get_bigsdb_api_from_seqdefdb(dbseqdef_name), dbseqdef_name, schema_id)
|
||||
async def build_profiler_from_seqdefdb(self, local: bool, dbseqdef_name: str, scheme_id: int) -> BIGSdbMLSTProfiler:
|
||||
return get_BIGSdb_MLST_profiler(local, await self.get_bigsdb_api_from_seqdefdb(dbseqdef_name), dbseqdef_name, scheme_id)
|
||||
|
||||
async def get_scheme_loci(self, dbseqdef_name: str, scheme_id: int) -> list[str]:
|
||||
uri_path = f"{await self.get_bigsdb_api_from_seqdefdb(dbseqdef_name)}/db/{dbseqdef_name}/schemes/{scheme_id}"
|
||||
async with self._http_client.get(uri_path) as response:
|
||||
response_json = await response.json()
|
||||
loci = response_json["loci"]
|
||||
results = []
|
||||
for locus in loci:
|
||||
results.append(path.basename(locus))
|
||||
return results
|
||||
|
||||
async def close(self):
|
||||
await self._http_client.close()
|
||||
@@ -214,7 +269,7 @@ class BIGSdbIndex(AbstractAsyncContextManager):
|
||||
async def __aexit__(self, exc_type, exc_value, traceback):
|
||||
await self.close()
|
||||
|
||||
def get_BIGSdb_MLST_profiler(local: bool, database_api: str, database_name: str, schema_id: int):
|
||||
def get_BIGSdb_MLST_profiler(local: bool, database_api: str, database_name: str, scheme_id: int):
|
||||
if local:
|
||||
raise NotImplementedError()
|
||||
return RemoteBIGSdbMLSTProfiler(database_api=database_api, database_name=database_name, schema_id=schema_id)
|
||||
return RemoteBIGSdbMLSTProfiler(database_api=database_api, database_name=database_name, scheme_id=scheme_id)
|
@@ -5,21 +5,21 @@ class BIGSDbDatabaseAPIException(Exception):
|
||||
|
||||
|
||||
class NoBIGSdbMatchesException(BIGSDbDatabaseAPIException):
|
||||
def __init__(self, database_name: str, database_schema_id: int, query_name: Union[None, str], *args):
|
||||
def __init__(self, database_name: str, database_scheme_id: int, query_name: Union[None, str], *args):
|
||||
self._query_name = query_name
|
||||
super().__init__(f"No matches found with schema with ID {database_schema_id} in the database \"{database_name}\".", *args)
|
||||
super().__init__(f"No matches found with scheme with ID {database_scheme_id} in the database \"{database_name}\".", *args)
|
||||
|
||||
def get_causal_query_name(self) -> Union[str, None]:
|
||||
return self._query_name
|
||||
|
||||
class NoBIGSdbExactMatchesException(NoBIGSdbMatchesException):
|
||||
def __init__(self, database_name: str, database_schema_id: int, *args):
|
||||
super().__init__(f"No exact match found with schema with ID {database_schema_id} in the database \"{database_name}\".", *args)
|
||||
def __init__(self, database_name: str, database_scheme_id: int, *args):
|
||||
super().__init__(f"No exact match found with scheme with ID {database_scheme_id} in the database \"{database_name}\".", *args)
|
||||
|
||||
class NoSuchBIGSdbDatabaseException(BIGSDbDatabaseAPIException):
|
||||
def __init__(self, database_name: str, *args):
|
||||
super().__init__(f"No database \"{database_name}\" found.", *args)
|
||||
|
||||
class NoSuchBigSdbSchemaException(BIGSDbDatabaseAPIException):
|
||||
def __init__(self, database_name: str, database_schema_id: int, *args):
|
||||
super().__init__(f"No schema with ID {database_schema_id} in \"{database_name}\" found.", *args)
|
||||
class NoSuchBigSdbschemeException(BIGSDbDatabaseAPIException):
|
||||
def __init__(self, database_name: str, database_scheme_id: int, *args):
|
||||
super().__init__(f"No scheme with ID {database_scheme_id} in \"{database_name}\" found.", *args)
|
||||
|
@@ -1,5 +1,6 @@
|
||||
import asyncio
|
||||
from io import TextIOWrapper
|
||||
from os import path
|
||||
from typing import Any, AsyncGenerator, Iterable, Union
|
||||
from Bio import SeqIO
|
||||
|
||||
@@ -9,7 +10,7 @@ async def read_fasta(handle: Union[str, TextIOWrapper]) -> Iterable[NamedString]
|
||||
fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta")
|
||||
results = []
|
||||
for fasta_sequence in await fasta_sequences:
|
||||
results.append(NamedString(fasta_sequence.id, str(fasta_sequence.seq)))
|
||||
results.append(NamedString("{0}:{1}".format(path.basename(handle.name if isinstance(handle, TextIOWrapper) else handle), fasta_sequence.id), str(fasta_sequence.seq)))
|
||||
return results
|
||||
|
||||
async def read_multiple_fastas(handles: Iterable[Union[str, TextIOWrapper]]) -> AsyncGenerator[Iterable[NamedString], Any]:
|
||||
|
@@ -25,7 +25,7 @@ class SangerTraceData(NamedString):
|
||||
analysis_proto_settings_name: str
|
||||
analysis_rpto_settings_ver: str
|
||||
analysis_proto_xml_data: str
|
||||
analysis_proto_xml_schema_ver: str
|
||||
analysis_proto_xml_scheme_ver: str
|
||||
sample_comment: Union[None, str]
|
||||
capillary_machine: bool
|
||||
container_identifier: str
|
||||
|
@@ -1,7 +1,7 @@
|
||||
from collections import defaultdict
|
||||
import csv
|
||||
from os import PathLike
|
||||
from typing import AsyncIterable, Collection, Mapping, Sequence, Union
|
||||
from typing import AsyncIterable, Collection, Iterable, Mapping, Sequence, Union
|
||||
|
||||
from autobigs.engine.structures.mlst import Allele, MLSTProfile, NamedMLSTProfile
|
||||
|
||||
@@ -17,7 +17,7 @@ def alleles_to_text_map(alleles: Collection[Allele]) -> Mapping[str, Union[Seque
|
||||
result[locus] = tuple(result[locus]) # type: ignore
|
||||
return dict(result)
|
||||
|
||||
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[NamedMLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]) -> Sequence[str]:
|
||||
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[NamedMLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]], allele_names: Iterable[str]) -> Sequence[str]:
|
||||
failed = list()
|
||||
with open(handle, "w", newline='') as filehandle:
|
||||
header = None
|
||||
@@ -30,7 +30,7 @@ async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[Named
|
||||
continue
|
||||
allele_mapping = alleles_to_text_map(mlst_profile.alleles)
|
||||
if writer is None:
|
||||
header = ["id", "st", "clonal-complex", *sorted(allele_mapping.keys())]
|
||||
header = ["id", "st", "clonal-complex", *sorted(allele_names)]
|
||||
writer = csv.DictWriter(filehandle, fieldnames=header)
|
||||
writer.writeheader()
|
||||
row_dictionary = {
|
||||
|
@@ -7,7 +7,7 @@ import pytest
|
||||
from autobigs.engine.analysis import bigsdb
|
||||
from autobigs.engine.structures import mlst
|
||||
from autobigs.engine.structures.genomics import NamedString
|
||||
from autobigs.engine.structures.mlst import Allele, MLSTProfile
|
||||
from autobigs.engine.structures.mlst import Allele, MLSTProfile, NamedMLSTProfile
|
||||
from autobigs.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException
|
||||
from autobigs.engine.analysis.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler, RemoteBIGSdbMLSTProfiler
|
||||
|
||||
@@ -71,14 +71,14 @@ hinfluenzae_2014_102_bad_profile = MLSTProfile((
|
||||
), "unknown", "unknown")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("local_db,database_api,database_name,schema_id,seq_path,feature_seqs_path,expected_profile,bad_profile", [
|
||||
@pytest.mark.parametrize("local_db,database_api,database_name,scheme_id,seq_path,feature_seqs_path,expected_profile,bad_profile", [
|
||||
(False, "https://bigsdb.pasteur.fr/api", "pubmlst_bordetella_seqdef", 3, "tohama_I_bpertussis.fasta", "tohama_I_bpertussis_features.fasta", bpertussis_tohamaI_profile, bpertussis_tohamaI_bad_profile),
|
||||
(False, "https://rest.pubmlst.org", "pubmlst_hinfluenzae_seqdef", 1, "2014-102_hinfluenza.fasta", "2014-102_hinfluenza_features.fasta", hinfluenzae_2014_102_profile, hinfluenzae_2014_102_bad_profile),
|
||||
])
|
||||
class TestBIGSdbMLSTProfiler:
|
||||
async def test_profiling_results_in_exact_matches_when_exact(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
async def test_profiling_results_in_exact_matches_when_exact(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
sequence = get_first_sequence_from_fasta(seq_path)
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as dummy_profiler:
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as dummy_profiler:
|
||||
expected_alleles = mlst.alleles_to_mapping(expected_profile.alleles)
|
||||
targets_left = set(mlst.alleles_to_mapping(expected_profile.alleles).keys())
|
||||
async for exact_match in dummy_profiler.determine_mlst_allele_variants(query_sequence_strings=[sequence]):
|
||||
@@ -89,10 +89,10 @@ class TestBIGSdbMLSTProfiler:
|
||||
|
||||
assert len(targets_left) == 0
|
||||
|
||||
async def test_sequence_profiling_non_exact_returns_non_exact(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
async def test_sequence_profiling_non_exact_returns_non_exact(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
target_sequences = get_multiple_sequences_from_fasta(feature_seqs_path)
|
||||
mlst_targets = {x.lower() for x in mlst.alleles_to_mapping(expected_profile.alleles).keys()}
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as profiler:
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as profiler:
|
||||
for target_sequence in target_sequences:
|
||||
match = re.fullmatch(r".*\[gene=([\w\d]+)\].*", target_sequence.description)
|
||||
if match is None:
|
||||
@@ -102,42 +102,54 @@ class TestBIGSdbMLSTProfiler:
|
||||
continue
|
||||
scrambled = gene_scrambler(str(target_sequence.seq), 0.125)
|
||||
async for partial_match in profiler.determine_mlst_allele_variants([scrambled]):
|
||||
assert isinstance(partial_match, Allele)
|
||||
assert partial_match.partial_match_profile is not None
|
||||
mlst_targets.remove(gene)
|
||||
|
||||
assert len(mlst_targets) == 0
|
||||
|
||||
async def test_profiling_results_in_correct_mlst_st(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as dummy_profiler:
|
||||
async def test_profiling_results_in_correct_mlst_st(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as dummy_profiler:
|
||||
mlst_st_data = await dummy_profiler.determine_mlst_st(expected_profile.alleles)
|
||||
assert mlst_st_data is not None
|
||||
assert isinstance(mlst_st_data, MLSTProfile)
|
||||
assert mlst_st_data.clonal_complex == expected_profile.clonal_complex
|
||||
assert mlst_st_data.sequence_type == expected_profile.sequence_type
|
||||
|
||||
async def test_profiling_non_exact_results_in_list_of_mlsts(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
async def test_profiling_non_exact_results_in_list_of_mlsts(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
dummy_alleles = bad_profile.alleles
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as dummy_profiler:
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as dummy_profiler:
|
||||
mlst_profile = await dummy_profiler.determine_mlst_st(dummy_alleles)
|
||||
assert isinstance(mlst_profile, MLSTProfile)
|
||||
assert mlst_profile.clonal_complex == "unknown"
|
||||
assert mlst_profile.sequence_type == "unknown"
|
||||
|
||||
|
||||
async def test_bigsdb_profile_multiple_strings_same_string_twice(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
async def test_bigsdb_profile_multiple_strings_same_string_twice(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
sequence = get_first_sequence_from_fasta(seq_path)
|
||||
dummy_sequences = [[NamedString("seq1", sequence)], [NamedString("seq2", sequence)]]
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as dummy_profiler:
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as dummy_profiler:
|
||||
async for named_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable(dummy_sequences)):
|
||||
name, profile = named_profile.name, named_profile.mlst_profile
|
||||
assert profile is not None
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == expected_profile.clonal_complex
|
||||
assert profile.sequence_type == expected_profile.sequence_type
|
||||
|
||||
async def test_bigsdb_profile_multiple_strings_exactmatch_fail_second_no_stop(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
async def test_bigsdb_profile_named_string_no_repeat_name(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
sequence = get_first_sequence_from_fasta(seq_path)
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as dummy_profiler:
|
||||
named_profile = await dummy_profiler.profile_string([NamedString("BX470248.1", sequence)])
|
||||
assert isinstance(named_profile, NamedMLSTProfile)
|
||||
name, profile = named_profile.name, named_profile.mlst_profile
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == expected_profile.clonal_complex
|
||||
assert profile.sequence_type == expected_profile.sequence_type
|
||||
assert name == "BX470248.1"
|
||||
|
||||
async def test_bigsdb_profile_multiple_strings_exactmatch_fail_second_no_stop(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
valid_seq = get_first_sequence_from_fasta(seq_path)
|
||||
dummy_sequences = [[NamedString("seq1", valid_seq)], [NamedString("should_fail", gene_scrambler(valid_seq, 0.3))], [NamedString("seq3", valid_seq)]]
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as dummy_profiler:
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as dummy_profiler:
|
||||
async for name_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable(dummy_sequences), True):
|
||||
name, profile = name_profile.name, name_profile.mlst_profile
|
||||
|
||||
@@ -151,11 +163,11 @@ class TestBIGSdbMLSTProfiler:
|
||||
assert profile.clonal_complex == expected_profile.clonal_complex
|
||||
assert profile.sequence_type == expected_profile.sequence_type
|
||||
|
||||
async def test_bigsdb_profile_multiple_strings_nonexact_second_no_stop(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
async def test_bigsdb_profile_multiple_strings_nonexact_second_no_stop(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
valid_seq = get_first_sequence_from_fasta(seq_path)
|
||||
dummy_sequences = [[NamedString("seq1", valid_seq)], [NamedString("should_fail", gene_scrambler(valid_seq, 0.3))], [NamedString("seq3", valid_seq)]]
|
||||
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as dummy_profiler:
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as dummy_profiler:
|
||||
async for named_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable(dummy_sequences), False):
|
||||
name, profile = named_profile.name, named_profile.mlst_profile
|
||||
|
||||
@@ -183,12 +195,12 @@ class TestBIGSdbIndex:
|
||||
async with BIGSdbIndex() as bigsdb_index:
|
||||
assert (await bigsdb_index.get_bigsdb_api_from_seqdefdb("pubmlst_bordetella_seqdef")) == "https://bigsdb.pasteur.fr/api"
|
||||
|
||||
async def test_bigsdb_index_get_schemas_for_bordetella(self):
|
||||
async def test_bigsdb_index_get_schemes_for_bordetella(self):
|
||||
async with BIGSdbIndex() as index:
|
||||
schemas = await index.get_schemas_for_seqdefdb(seqdef_db_name="pubmlst_bordetella_seqdef")
|
||||
assert len(schemas.keys()) > 0
|
||||
assert "MLST" in schemas
|
||||
assert isinstance(schemas["MLST"], int)
|
||||
schemes = await index.get_schemes_for_seqdefdb(seqdef_db_name="pubmlst_bordetella_seqdef")
|
||||
assert len(schemes.keys()) > 0
|
||||
assert "MLST" in schemes
|
||||
assert isinstance(schemes["MLST"], int)
|
||||
|
||||
async def test_bigsdb_index_get_databases_has_only_seqdef(self):
|
||||
async with BIGSdbIndex() as index:
|
||||
@@ -207,5 +219,15 @@ class TestBIGSdbIndex:
|
||||
async with await bigsdb_index.build_profiler_from_seqdefdb(local, "pubmlst_bordetella_seqdef", 3) as profiler:
|
||||
assert isinstance(profiler, BIGSdbMLSTProfiler)
|
||||
profile = await profiler.profile_string(sequence)
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == "ST-2 complex"
|
||||
assert profile.sequence_type == "1"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(["bigsdb_name", "scheme_id", "expected"], [
|
||||
("pubmlst_bordetella_seqdef", 3, ["adk", "fumC", "glyA", "tyrB", "icd", "pepA", "pgm"])
|
||||
])
|
||||
async def test_bigsdb_index_fetches_loci_names(self, bigsdb_name, scheme_id, expected):
|
||||
async with BIGSdbIndex() as bigsdb_index:
|
||||
loci = await bigsdb_index.get_scheme_loci(bigsdb_name, scheme_id)
|
||||
assert set(loci) == set(expected)
|
@@ -4,4 +4,9 @@ from autobigs.engine.reading import read_fasta
|
||||
async def test_fasta_reader_not_none():
|
||||
named_strings = await read_fasta("tests/resources/tohama_I_bpertussis.fasta")
|
||||
for named_string in named_strings:
|
||||
assert named_string.name == "BX470248.1"
|
||||
assert named_string.name is not None
|
||||
|
||||
async def test_fasta_reader_name_contains_file_and_id():
|
||||
named_strings = await read_fasta("tests/resources/tohama_I_bpertussis.fasta")
|
||||
for named_string in named_strings:
|
||||
assert named_string.name == "tohama_I_bpertussis.fasta:BX470248.1"
|
||||
|
@@ -27,15 +27,28 @@ async def test_column_order_is_same_as_expected_file(dummy_alphabet_mlst_profile
|
||||
dummy_profiles = [dummy_alphabet_mlst_profile]
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
output_path = path.join(temp_dir, "out.csv")
|
||||
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path)
|
||||
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path, ["A", "D", "B", "C"])
|
||||
with open(output_path) as csv_handle:
|
||||
csv_reader = reader(csv_handle)
|
||||
lines = list(csv_reader)
|
||||
target_columns = lines[4:]
|
||||
target_columns = lines[0][3:]
|
||||
assert target_columns == sorted(target_columns)
|
||||
|
||||
async def test_csv_writing_sample_name_not_repeated_when_single_sequence(dummy_alphabet_mlst_profile):
|
||||
dummy_profiles = [dummy_alphabet_mlst_profile]
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
output_path = path.join(temp_dir, "out.csv")
|
||||
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path, ["A", "D", "B", "C"])
|
||||
with open(output_path) as csv_handle:
|
||||
csv_reader = reader(csv_handle)
|
||||
lines = list(csv_reader)
|
||||
sample_name = lines[1][0]
|
||||
assert sample_name == "name"
|
||||
|
||||
|
||||
async def test_alleles_to_text_map_mapping_is_correct(dummy_alphabet_mlst_profile: NamedMLSTProfile):
|
||||
mapping = alleles_to_text_map(dummy_alphabet_mlst_profile.mlst_profile.alleles) # type: ignore
|
||||
assert dummy_alphabet_mlst_profile.mlst_profile is not None
|
||||
mapping = alleles_to_text_map(dummy_alphabet_mlst_profile.mlst_profile.alleles)
|
||||
expected_mapping = {
|
||||
"A": "1",
|
||||
"B": "1",
|
||||
@@ -45,3 +58,13 @@ async def test_alleles_to_text_map_mapping_is_correct(dummy_alphabet_mlst_profil
|
||||
for allele_name, allele_ids in mapping.items():
|
||||
assert allele_name in expected_mapping
|
||||
assert allele_ids == expected_mapping[allele_name]
|
||||
|
||||
async def test_csv_writing_includes_asterisk_for_non_exact(dummy_alphabet_mlst_profile: NamedMLSTProfile):
|
||||
dummy_profiles = [dummy_alphabet_mlst_profile]
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
output_path = path.join(temp_dir, "out.csv")
|
||||
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path, ["A", "D", "B", "C"])
|
||||
with open(output_path) as csv_handle:
|
||||
csv_reader = reader(csv_handle)
|
||||
lines = list(csv_reader)
|
||||
assert '*' in lines[1][5]
|
133
tests/resources/B3913_bpertussis_minimized_features.fasta
Normal file
133
tests/resources/B3913_bpertussis_minimized_features.fasta
Normal file
@@ -0,0 +1,133 @@
|
||||
>lcl|CP011447.1_gene_2762 [gene=adk] [locus_tag=B3913_2762] [location=2916440..2917096] [gbkey=Gene]
|
||||
ATGCGTCTCATTCTGCTCGGACCGCCCGGAGCCGGCAAAGGCACCCAAGCCGCCTTTCTCACCCAACACT
|
||||
ACGGCATCCCGCAGATATCCACCGGTGACATGCTGCGCGCCGCCGTCAAGGCCGGCACGCCGCTGGGCCT
|
||||
GGAAGCCAAGAAGGTCATGGACGCGGGCGGCCTGGTCTCGGACGACCTGATCATCGGCCTGGTGCGCGAT
|
||||
CGCCTGACCCAGCCCGATTGCGCCAACGGCTACCTGTTCGACGGTTTCCCGCGCACCATCCCGCAGGCCG
|
||||
ACGCGCTCAAGAGCGCCGGCATCGCGCTGGATTACGTGGTCGAGATCGAAGTGCCGGAAAGCGACATCAT
|
||||
CGAACGCATGAGCGAACGCCGCGTGCACCCGGCCAGCGGCCGCAGCTACCACGTACGCTTCAATCCGCCC
|
||||
AAGGCCGAAGGCGTGGACGACGTCACGGGCGAACCGCTGGTGCAGCGCGACGACGACCGCGAGGAAACCG
|
||||
TGCGCCATCGTCTCAACGTCTACCAGAACCAGACCCGCCCGCTGGTCGACTACTACTCGTCCTGGGCCCA
|
||||
GTCCGATGCCGCCGCGGCGCCCAAGTACCGCAAGATCTCCGGCGTCGGCTCGGTCGACGAAATCAAGAGC
|
||||
CGCCTGTCGCAGGCTCTGCAGAGCTAA
|
||||
>lcl|CP011447.1_gene_253 [gene=fumC] [locus_tag=B3913_0253] [location=257438..258829] [gbkey=Gene]
|
||||
ATGAAAACCCGCACCGAAAAAGACACTTTCGGCCCGATCGAGGTGCCCGAGCAGCACCTGTGGGGCGCGC
|
||||
AGACCCAGCGCTCGCTGCATTTCTTCGCGATCTCGACCGAGAAGATGCCGGTGCCGCTGGTCGCCGCCAT
|
||||
GGCACGCCTGAAGCGCGCCGCCGCCAAGGTCAACGCCGAGCTGGGCGAGCTGGATCCGCAGGTCGCAGAC
|
||||
GCCATCATGCGGGCCGCCGATGAGGTGATCGCCGGCAAGTGGCCCGACGAGTTTCCGCTGTCGGTCTGGC
|
||||
AGACCGGCTCGGGCACGCAGAGCAACATGAACATGAACGAGGTGCTGGCCAACCGCGCCTCCGAGCTGCT
|
||||
GGGCGGCGAGCGCGGCGAAGGCCGCAAGGTGCACCCCAACGACCACGTGAACCGGGGCCAGTCGTCCAAC
|
||||
GATACCTTTCCGACCGCCATGCACGTGGCCGCCGCGGTCGAGGTCGAGCACCGCGTGCTGCCCGCCCTGA
|
||||
AGGCGTTGCGCGGCACGCTGGCCGCCAAGAGCGCGGCGTTCTACGACATCGTCAAGATCGGTCGCACCCA
|
||||
TTTGCAGGACGCCACCCCGTTGACGCTGGGCCAGGAGATCTCCGGCTACGTGGCGCAGCTGGACCTGGCC
|
||||
GAGCAGCAGATCCGCGCGACGCTGGCCGGCCTGCACCAGCTGGCCATCGGCGGCACGGCGGTGGGCACCG
|
||||
GCCTGAACGCGCATCCGCAGTTCAGCGCCAAGGTATCGGCCGAACTGGCCCATGACACGGGCAGCGCGTT
|
||||
CGTGTCGGCGCCCAACAAGTTCCAGGCGCTGGCTTCGCACGAGGCGCTGCTGTTCGCGCACGGCGCCTTG
|
||||
AAGACGCTGGCCGCCGGCCTGATGAAGATCGCCAACGATGTGCGCTGGCTGGCCAGCGGCCCGCGCTCGG
|
||||
GGCTGGGCGAAATCAGCATTCCCGAGAACGAGCCGGGCAGCTCCATCATGCCGGGCAAGGTCAACCCGAC
|
||||
CCAGTGCGAAGCCGTCACGATGCTGGCCGCGCAGGTCATGGGCAACGACGTGGCCATCAATGTCGGCGGG
|
||||
GCCAGCGGCAACTTCGAGCTGAACGTCTTCAAGCCGCTGGTGATCCACAATTTCCTGCAGTCGGTGCGCC
|
||||
TGCTGGCCGACGGCATGGTCAGCTTCGACAAGCACTGCGCGGCCGGCATCGAGCCCAACCGCGAGCGCAT
|
||||
CACCGAGCTGGTCGAGCGTTCGCTGATGCTGGTGACTGCGCTCAACCCGCACATCGGCTACGACAAGGCC
|
||||
GCGCAGATCGCCAAGAAGGCGCACAAGGAAAACCTGTCGCTGAAAGAGGCGGCGCTGGCGCTGGGGCACC
|
||||
TGACCGAGGCGCAGTTCGCCGAGTGGGTGGTGCCGGGCGACATGACCAACGCGCGCCGCTAG
|
||||
>lcl|CP011447.1_gene_2963 [gene=glyA] [locus_tag=B3913_2963] [location=complement(3129365..3130612)] [gbkey=Gene]
|
||||
ATGTTCAACCGCAACCTGACCCTCGACCAGGTGGATCCCGACGTCTGGGCCGCCATCCAGAAAGAAGACG
|
||||
TACGCCAGGAACAGCACATCGAGCTGATCGCGTCCGAGAACTACGCCAGCCCCGCCGTGATGCAGGCCCA
|
||||
GGGCACGCAACTGACCAACAAGTATGCGGAAGGCTACCCGGGCAAGCGCTACTACGGCGGTTGCGAGTAC
|
||||
GTCGACGTGGTCGAGCAGCTGGCCATCGACCGCCTGAAGCAGATTTTCGGCGCCGAGGCCGCCAACGTGC
|
||||
AGCCGAACTCCGGCTCGCAGGCCAACCAGGGCGTGTACATGGCGGTGCTCAAGCCGGGCGATACCGTGCT
|
||||
GGGCATGAGCCTGGCCGAAGGCGGTCACCTGACGCACGGCGCGTCGGTCAACGCCTCGGGCAAGCTGTAC
|
||||
AACTTCGTGCCCTACGGCCTGGACGCCGACGAGGTGCTGGACTACGCCCAGGTCGAGCGGCTGACCAAGG
|
||||
AACACAAGCCCAAGCTGATCGTGGCCGGCGCCTCCGCGTACGCGCTGCACATCGACTTCGAGCGCATGGC
|
||||
GCGCATCGCCCACGACAACGGCGCGCTGTTCATGGTGGACATCGCCCACTATGCCGGCCTGGTGGCCGGC
|
||||
GGCGCCTATCCCAACCCGGTGCCGCACGCCGATTTCGTCACCTCCACCACGCACAAGTCGCTGCGCGGCC
|
||||
CGCGCGGCGGCGTCATCATGATGAAGGCCGAGTTCGAGAAGGCCGTCAATTCGGCCATCTTCCCGGGCAT
|
||||
CCAGGGCGGTCCGCTGATGCACGTCATCGCGGCCAAGGCCGTGGCCTTCAAGGAAGCGCTGTCGCCCGAG
|
||||
TTCCAGGATTACGCCCAGCAGGTCGTCAAGAACGCCAAGGTGCTGGCCGATACGCTGGTCAAGCGCGGCC
|
||||
TGCGCATCGTGTCGGGCAGGACCGAAAGCCACGTCATGCTGGTGGACCTGCGTCCCAAGGGCATTACCGG
|
||||
CAAGGAAGCGGAAGCGGTGCTGGGCCAGGCCCACATCACGGTCAACAAGAACGCCATTCCCAACGACCCG
|
||||
GAAAAGCCCTTCGTGACCAGCGGCATCCGCCTGGGCACTCCGGCCATGACCACCCGCGGCTTCAAGGAGG
|
||||
CCGAGGCCGAGCTGACCGCCAACCTGATCGCCGACGTGCTGGACAATCCGCGCGACGAGGCGAACATCGC
|
||||
CGCGGTGCGCGCGCGGGTCAATGAACTGACCGCCCGCCTGCCCGTCTACGGCAACTGA
|
||||
>lcl|CP011447.1_gene_2473 [gene=icd] [locus_tag=B3913_2473] [location=complement(2605674..2606930)] [gbkey=Gene]
|
||||
ATGTCCTATCAACATATCAAGGTTCCCACTGGGGGCCAAAAAATCACGGTCAACGCCGATTACTCGCTGA
|
||||
ATGTGCCCGATCAGGTCATCATTCCGGTCATCGAGGGTGACGGTACGGGCGCCGACATCACGCCGGTGAT
|
||||
GATTAAGGTCGTCGACGCGGCCGTGCAGAAGGCCTATGCGGGCAAGCGCAAGATCCACTGGATGGAAGTC
|
||||
TACGCCGGCGAGAAGGCCACCAAGGTCTACGGCCCGGACGTCTGGCTGCCCGAGGAAACCCTCGACGCCG
|
||||
TCAAGGACTACGTGGTGTCGATCAAGGGTCCGCTGACCACGCCGGTCGGCGGCGGCATCCGTTCGCTGAA
|
||||
CGTGGCGCTGCGCCAGCAGCTGGACCTGTATGTCTGCCTGCGCCCGGTGCGCTACTTCAAGGGCGTGCCC
|
||||
TCGCCGGTGCGCGAGCCCGAGAAGACCGACATGGTCATCTTCCGCGAGAACTCGGAAGACATCTACGCGG
|
||||
GCATCGAGTACATGGCCGAGTCCGAGCAGGCCAAGGACCTGATCCAGTACCTGCAGACCAAGCTGGGCGT
|
||||
GACCAAGATCCGCTTCCCGAACACCTCGTCGATCGGCATCAAGCCGGTTTCGCGCGAAGGCACCGAGCGC
|
||||
CTGGTGCGCAAGGCGCTGCAGTACGCCATCGACAATGACCGCGCCTCGGTGACCCTGGTCCACAAGGGCA
|
||||
ACATCATGAAGTTCACGGAAGGCGGCTTCCGCGACTGGGGCTACGCCCTGGCCCAGAACGAGTTCGGCGC
|
||||
GCAGCCGATCGACGGCGGCCCGTGGTGCAAGTTCAAGAATCCCAAGACGGGTCGCGAGATCATCGTCAAG
|
||||
GATTCGATCGCCGACGCCTTCCTGCAGCAGATCCTGCTGCGTCCGGCCGAATACGACGTGATCGCCACGC
|
||||
TGAACCTGAACGGCGACTACATCTCCGACGCGCTGGCCGCGCAAGTGGGCGGCATCGGCATTGCCCCGGG
|
||||
CGCCAACCTGTCGGATTCCGTGGCCATGTTCGAAGCCACCCACGGCACCGCGCCGAAGTACGCGGGCAAG
|
||||
GACTACGTGAACCCCGGTTCCGAAATCCTGTCGGCCGAAATGATGCTGCGCCACATGGGCTGGACCGAGG
|
||||
CCGCCGACCTGATCATCGCCAGCATGGAGAAATCCATCCTGTCCAAGAAGGTCACCTATGACTTCGCCCG
|
||||
TCTGCTCGAAGGCGCCACCCAGGTGTCGTGCTCGGGCTTCGGTCAGGTCATGATCGACAATATGTAA
|
||||
>lcl|CP011447.1_gene_2403 [gene=pepA] [locus_tag=B3913_2403] [location=2531836..2533335] [gbkey=Gene]
|
||||
ATGGAATTTAGCACACAGACCACTGCCTCCCTGCATCAGATCAAGACTGCGGCCCTGGCCGTCGGCGTCT
|
||||
TCGCCGACGGCGTGCTCAGCGCCGCCGCCGAAGTCATCGACCGCGCCAGCCACGGTGCCGTGGCCGCCGT
|
||||
GGTGAAAAGCGAGTTCCGCGGCCGCACCGGCAGCACGCTGGTGCTGCGCAGCCTGGCCGGCGTCAGCGCC
|
||||
CAGCGCGTGGTGCTGGTGGGCCTGGGCAAGCAGGCCGAATACAACGCCCGCGCGCACGCCAGCGCCGAAC
|
||||
AGGCGTTCGCCGCGGCGTGCGTCGCGGCCCAGGTGGGCGAAGGCGTGTCGACCCTGGCCGGCGTGGCCAT
|
||||
CGAGGGCGTGCCGGTGCGCGCCCGCGCGCGCAGCGCCGCCATCGCCGCGGGCGCGGCGGCCTACCATTAC
|
||||
GATGCGACGTTCGGCAAGGCCAATCGCGACGCCCGCCCCAGGTTGAAGAAAATCGTCCAGGTGGTCGACC
|
||||
GCGCGGCCTCCGCGCAGGCGCAGCTGGGCCTGCGCGAAGGCGCGGCCATCGCCCACGGCATGGAATTGAC
|
||||
CCGCACGCTGGGCAACCTGCCCGGCAACGTGTGCACGCCGGCCTATCTCGGCAATACCGCCAAGAAACTG
|
||||
GCGCGCGAATTCAAGAGCCTCAAGGTCGAGGTGCTCGAACGCAAGCAGGTCGAGGCGCTGGGCATGGGCT
|
||||
CGTTCCTCTCGGTCGCGCGCGGCTCGGAAGAACCGCTGCGCTTCATCGTGCTGCGCCATGCCGGCAAGCC
|
||||
CGCCAAGAAGGACAAGGCCGGCCCGGTCGTCCTGGTGGGCAAGGGCATCACCTTCGATGCTGGCGGCATC
|
||||
TCGCTCAAGCCGGCCGCCACGATGGACGAAATGAAGTACGACATGTGCGGCGCGGCCAGCGTGCTGGGCA
|
||||
CGTTCCGCGCCCTGGCCGAGCTGGAGCTGCCGCTGGATGTGGTGGGCCTGATCGCGGCGTGCGAGAACCT
|
||||
GCCCAGCGGCAAGGCCAACAAGCCCGGCGACGTGGTCACCAGCATGTCGGGCCAGACCATCGAGATCCTC
|
||||
AACACCGACGCCGAAGGCCGCCTGGTGCTGTGCGATGCCCTGACCTACGCCGAGCGCTTCAAGCCCGCGG
|
||||
CCGTGATCGACATCGCCACGTTGACCGGCGCCTGCGTGGTAGCCCTGGGCAACGTCAATAGCGGCCTGTT
|
||||
CTCCAAGGACGACGCGCTGGCCGACGCGCTGCTGGCCGCCAGCCGCCAGTCGCTCGACCCGGCCTGGCGC
|
||||
CTGCCGCTGGACGATGCCTACCAGGACCAGCTCAAGTCCAACTTCGCCGACATCGCCAACATCGGCGGCC
|
||||
CCCCGGCCGGCGCGGTCACGGCGGCCTGCTTCCTGTCGCGCTTCACCAAGGCTTATCCGTGGGCGCACCT
|
||||
GGACATCGCCGGCACGGCCTGGCGCGGCGGCAAGGACAAGGGCGCCACCGGCCGGCCGGTGCCGCTGCTG
|
||||
ATGCAGTACCTGCTGGACCAGGCAGGCTGA
|
||||
>lcl|CP011447.1_gene_3165 [gene=pgm] [locus_tag=B3913_3165] [location=3355021..3356403] [gbkey=Gene]
|
||||
GTGGCGCACCCCTTTCCCGCATCGGTCTACAAGGCGTACGACATCCGTGGCTCGGTTCCCGACCAGCTCG
|
||||
ACCCGGTATTCGCCCGGGCGCTGGGCCGCGCCCTGGCCGCCAGCGCCCGCGCGCAGGGCATCGGCGCCCT
|
||||
GGTGGTCGGCCGCGACGGCCGCCTGAGCAGCCCCGACCTGGCCGGCGCGCTGCAGGAAGGCATCATGGAA
|
||||
GGCGGCGTGGACACCCTGGACATCGGCCAGGTGCCCACGCCGCTGGTCTATTTCGCGGCGCACATCCAGG
|
||||
GCACGGGCTCGGGCGTGGCGGTCACCGGCAGCCACAACCCGCCGCAGTACAACGGCTTCAAGATGATGAT
|
||||
GGGCGGCCAGGCCCTGTACGGCCCGGCCGTGCAGGCGCTGCGCCCGGCCATGCTGGCGCCGGCTGCGGCG
|
||||
CCGGGCACCTGGGGCGAACGCCGCCAGCTCGATGTCGTCCCCGCCTATATCGAGCGCATCGTGTCCGACG
|
||||
TGAAGCTGGCGCGCCCCATGAAGATCGCCGTCGACTGCGGCAATGGCGTGGCCGGCGCCCTGGCGCCGCA
|
||||
ACTGTTCCGCGCGCTGGGTTGCGAAGTGGACGAGCTCTATTGCGAGGTCGACGGCACGTTTCCCAACCAC
|
||||
CATCCCGACCCGGCCGAACCGCGCAACCTGCAGGACCTGATCGCCCATGTCACCAGCACCGACTGCGAGC
|
||||
TGGGCCTGGCCTTCGACGGCGACGGCGACCGCCTCGGCGTGGTGACCAAGTCCGGCCAGATCATCTGGCC
|
||||
CGACCGCCAGCTGATCCTGTTCGCCCGCGACGTGCTGGCCCGCTGTCCCGGCGCGACCATCATCTATGAC
|
||||
GTCAAGTGCAGCCAGCACGTGGGCGTGGCCATCGAGCAAAGCGGCGGCGTGCCGCTGATGTGGCAGACTG
|
||||
GCCATTCGCTGGTGAAGGCCAAGCTGGCCGAGACCGGCGCGCCGCTGGCCGGCGAGATGAGCGGCCATAT
|
||||
CTTCTTCAAGGAGCGCTGGTACGGCTTCGACGACGGCCTGTACACCGGCGCCCGCCTGCTGGAAATCGTC
|
||||
TCCCGCGAAACCGATGCGTCGCGCCCGCTGGAGGCCCTGCCGCAGGCGCTGTCGACCCCCGAGCTCAAGC
|
||||
TGGAGATGGCCGAGGGCGAGCCGCATGCGCTGATCGCCGCCCTGCAGCAGCAGGGCGAGTTCGCCAGCGC
|
||||
CAGCCGGCTGGTTACGATAGACGGCGTGCGCGCGGAATACCCGGACGGCTTCGGGCTGGCGCGCGCCTCC
|
||||
AATACCACCCCCGTCGTCGTGCTGCGCTTCGAAGCGGAGACCGAGCCGGGCCTGGCCCGCATCCAGCAGG
|
||||
AATTCCGCCAGCAGCTGCTGCGGCTGGCTCCGCAAGCCAAACTGCCCTTCTGA
|
||||
>lcl|CP011447.1_gene_2110 [gene=tyrB] [locus_tag=B3913_2110] [location=2214524..2215726] [gbkey=Gene]
|
||||
ATGAGCACTCTTTTCGCTTCCGTCGAACTCGCGCCGCGCGACCCCATTCTTGGCCTGAACGAACAGTACA
|
||||
ACGCCGATACCCGTCCCGGCAAAGTGAACCTGGGCGTGGGCGTGTACTACGACGACGAAGGCCGCATCCC
|
||||
GCTGCTTCAGGCCGTGCGCAAGGCCGAGGTGGCCCGCATCGAAGCCGCCGCCGCCCGCGGCTATCTGCCG
|
||||
ATCGAAGGCATCGCGGGGTACAACAAGGGTGCGCAGGCGCTGCTGCTGGGCGCCGACTCGCCGCTGGCCG
|
||||
CCGAAGGCCGCGTGCTGACCGCGCAGGCCCTGGGCGGCACCGGCGCGCTGAAGATCGGCGCCGACTTCCT
|
||||
GCGCCAGCTGCTGCCGCAGTCCAAGGTCCTCATCAGCGACCCCAGCTGGGAAAACCACCGCGCCCTGTTC
|
||||
GAGCGCGCCGGCTTCCCGGTCGAGACCTACGCTTATTACGATGCCGCCACCCATGGCCTGAACTTCGAAG
|
||||
CCATGCTGGCCGCCCTGCAGGCCGCGCCCGAACAGACCATCGTGGTGCTGCACGCCTGCTGCCACAACCC
|
||||
GACCGGCGTCGATCCCACGCCGCAACAGTGGGAACAGATCGCCGCCGTGGTCAAGGCGCGCAACCTGGTG
|
||||
CCGTTCCTCGACATCGCCTACCAGGGCTTCGGCGAAGGCCTGGAGCAGGACGCCGCCGTGGTGCGCATGT
|
||||
TCGCCGAGCTCGACCTGACCATGTTCATCAGCTCGTCGTTCTCCAAGTCCTTCTCGCTGTATGGCGAGCG
|
||||
GGTCGGGGCCCTGACCGTGGTGGCCGGCAGCAAGGACGAGGCCGCCCGCGTGCTCAGCCAGCTCAAGCGC
|
||||
GTGATCCGCACCAACTACTCCAACCCGCCCACCCACGGCGGCACCGTGGTGTCCACGGTCCTGAACACAC
|
||||
CCGAGCTGTTCGCGCTCTGGGAAAATGAACTGGCCGGCATGCGCGACCGCATCCGCCTGATGCGCAAGGA
|
||||
GCTGGTCGAGAAGATCAAGACCCAGGGCGTGGCGCAGGACTTCAGCTTCGTGCTGGCGCAGCGCGGCATG
|
||||
TTCTCGTACTCGGGCCTGACCGCCGCCCAGGTCGATCGCCTGCGCGAAGAGCACGGCATCTACGCGGTCT
|
||||
CCAGCGGCCGCATCTGCGTGGCCGCGCTCAACAGCCGCAACATCGACGCGGTCGCGGCCGGCATCGCCGC
|
||||
GGTGCTGAAGTAG
|
133
tests/resources/B3921_bpertussis_minimized_features.fasta
Normal file
133
tests/resources/B3921_bpertussis_minimized_features.fasta
Normal file
@@ -0,0 +1,133 @@
|
||||
>lcl|CP011448.1_cds_ALH77808.1_2459 [gene=adk] [locus_tag=B3921_2764] [protein=adenylate kinase] [protein_id=ALH77808.1] [location=2918521..2919177] [gbkey=CDS]
|
||||
ATGCGTCTCATTCTGCTCGGACCGCCCGGAGCCGGCAAAGGCACCCAAGCCGCCTTTCTCACCCAACACT
|
||||
ACGGCATCCCGCAGATATCCACCGGTGACATGCTGCGCGCCGCCGTCAAGGCCGGCACGCCGCTGGGCCT
|
||||
GGAAGCCAAGAAGGTCATGGACGCGGGCGGCCTGGTCTCGGACGACCTGATCATCGGCCTGGTGCGCGAT
|
||||
CGCCTGACCCAGCCCGATTGCGCCAACGGCTACCTGTTCGACGGTTTCCCGCGCACCATCCCGCAGGCCG
|
||||
ACGCGCTCAAGAGCGCCGGCATCGCGCTGGATTACGTGGTCGAGATCGAAGTGCCGGAAAGCGACATCAT
|
||||
CGAACGCATGAGCGAACGCCGCGTGCACCCGGCCAGCGGCCGCAGCTACCACGTACGCTTCAATCCGCCC
|
||||
AAGGCCGAAGGCGTGGACGACGTCACGGGCGAACCGCTGGTGCAGCGCGACGACGACCGCGAGGAAACCG
|
||||
TGCGCCATCGTCTCAACGTCTACCAGAACCAGACCCGCCCGCTGGTCGACTACTACTCGTCCTGGGCCCA
|
||||
GTCCGATGCCGCCGCGGCGCCCAAGTACCGCAAGATCTCCGGCGTCGGCTCGGTCGACGAAATCAAGAGC
|
||||
CGCCTGTCGCAGGCTCTGCAGAGCTAA
|
||||
>lcl|CP011448.1_cds_ALH75563.1_214 [gene=fumC] [locus_tag=B3921_0253] [protein=fumarate hydratase] [protein_id=ALH75563.1] [location=257428..258819] [gbkey=CDS]
|
||||
ATGAAAACCCGCACCGAAAAAGACACTTTCGGCCCGATCGAGGTGCCCGAGCAGCACCTGTGGGGCGCGC
|
||||
AGACCCAGCGCTCGCTGCATTTCTTCGCGATCTCGACCGAGAAGATGCCGGTGCCGCTGGTCGCCGCCAT
|
||||
GGCACGCCTGAAGCGCGCCGCCGCCAAGGTCAACGCCGAGCTGGGCGAGCTGGATCCGCAGGTCGCAGAC
|
||||
GCCATCATGCGGGCCGCCGATGAGGTGATCGCCGGCAAGTGGCCCGACGAGTTTCCGCTGTCGGTCTGGC
|
||||
AGACCGGCTCGGGCACGCAGAGCAACATGAACATGAACGAGGTGCTGGCCAACCGCGCCTCCGAGCTGCT
|
||||
GGGCGGCGAGCGCGGCGAAGGCCGCAAGGTGCACCCCAACGACCACGTGAACCGGGGCCAGTCGTCCAAC
|
||||
GATACCTTTCCGACCGCCATGCACGTGGCCGCCGCGGTCGAGGTCGAGCACCGCGTGCTGCCCGCCCTGA
|
||||
AGGCGTTGCGCGGCACGCTGGCCGCCAAGAGCGCGGCGTTCTACGACATCGTCAAGATCGGTCGCACCCA
|
||||
TTTGCAGGACGCCACCCCGTTGACGCTGGGCCAGGAGATCTCCGGCTACGTGGCGCAGCTGGACCTGGCC
|
||||
GAGCAGCAGATCCGCGCGACGCTGGCCGGCCTGCACCAGCTGGCCATCGGCGGCACGGCGGTGGGCACCG
|
||||
GCCTGAACGCGCATCCGCAGTTCAGCGCCAAGGTATCGGCCGAACTGGCCCATGACACGGGCAGCGCGTT
|
||||
CGTGTCGGCGCCCAACAAGTTCCAGGCGCTGGCTTCGCACGAGGCGCTGCTGTTCGCGCACGGCGCCTTG
|
||||
AAGACGCTGGCCGCCGGCCTGATGAAGATCGCCAACGATGTGCGCTGGCTGGCCAGCGGCCCGCGCTCGG
|
||||
GGCTGGGCGAAATCAGCATTCCCGAGAACGAGCCGGGCAGCTCCATCATGCCGGGCAAGGTCAACCCGAC
|
||||
CCAGTGCGAAGCCGTCACGATGCTGGCCGCGCAGGTCATGGGCAACGACGTGGCCATCAATGTCGGCGGG
|
||||
GCCAGCGGCAACTTCGAGCTGAACGTCTTCAAGCCGCTGGTGATCCACAATTTCCTGCAGTCGGTGCGCC
|
||||
TGCTGGCCGACGGCATGGTCAGCTTCGACAAGCACTGCGCGGCCGGCATCGAGCCCAACCGCGAGCGCAT
|
||||
CACCGAGCTGGTCGAGCGTTCGCTGATGCTGGTGACTGCGCTCAACCCGCACATCGGCTACGACAAGGCC
|
||||
GCGCAGATCGCCAAGAAGGCGCACAAGGAAAACCTGTCGCTGAAAGAGGCGGCGCTGGCGCTGGGGCACC
|
||||
TGACCGAGGCGCAGTTCGCCGAGTGGGTGGTGCCGGGCGACATGACCAACGCGCGCCGCTAG
|
||||
>lcl|CP011448.1_cds_ALH77981.1_2632 [gene=glyA] [locus_tag=B3921_2965] [protein=serine hydroxymethyltransferase] [protein_id=ALH77981.1] [location=complement(3131372..3132619)] [gbkey=CDS]
|
||||
ATGTTCAACCGCAACCTGACCCTCGACCAGGTGGATCCCGACGTCTGGGCCGCCATCCAGAAAGAAGACG
|
||||
TACGCCAGGAACAGCACATCGAGCTGATCGCGTCCGAGAACTACGCCAGCCCCGCCGTGATGCAGGCCCA
|
||||
GGGCACGCAACTGACCAACAAGTATGCGGAAGGCTACCCGGGCAAGCGCTACTACGGCGGTTGCGAGTAC
|
||||
GTCGACGTGGTCGAGCAGCTGGCCATCGACCGCCTGAAGCAGATTTTCGGCGCCGAGGCCGCCAACGTGC
|
||||
AGCCGAACTCCGGCTCGCAGGCCAACCAGGGCGTGTACATGGCGGTGCTCAAGCCGGGCGATACCGTGCT
|
||||
GGGCATGAGCCTGGCCGAAGGCGGTCACCTGACGCACGGCGCGTCGGTCAACGCCTCGGGCAAGCTGTAC
|
||||
AACTTCGTGCCCTACGGCCTGGACGCCGACGAGGTGCTGGACTACGCCCAGGTCGAGCGGCTGACCAAGG
|
||||
AACACAAGCCCAAGCTGATCGTGGCCGGCGCCTCCGCGTACGCGCTGCACATCGACTTCGAGCGCATGGC
|
||||
GCGCATCGCCCACGACAACGGCGCGCTGTTCATGGTGGACATCGCCCACTATGCCGGCCTGGTGGCCGGC
|
||||
GGCGCCTATCCCAACCCGGTGCCGCACGCCGATTTCGTCACCTCCACCACGCACAAGTCGCTGCGCGGCC
|
||||
CGCGCGGCGGCGTCATCATGATGAAGGCCGAGTTCGAGAAGGCCGTCAATTCGGCCATCTTCCCGGGCAT
|
||||
CCAGGGCGGTCCGCTGATGCACGTCATCGCGGCCAAGGCCGTGGCCTTCAAGGAAGCGCTGTCGCCCGAG
|
||||
TTCCAGGATTACGCCCAGCAGGTCGTCAAGAACGCCAAGGTGCTGGCCGATACGCTGGTCAAGCGCGGCC
|
||||
TGCGCATCGTGTCGGGCAGGACCGAAAGCCACGTCATGCTGGTGGACCTGCGTCCCAAGGGCATTACCGG
|
||||
CAAGGAAGCGGAAGCGGTGCTGGGCCAGGCCCACATCACGGTCAACAAGAACGCCATTCCCAACGACCCG
|
||||
GAAAAGCCCTTCGTGACCAGCGGCATCCGCCTGGGCACTCCGGCCATGACCACCCGCGGCTTCAAGGAGG
|
||||
CCGAGGCCGAGCTGACCGCCAACCTGATCGCCGACGTGCTGGACAATCCGCGCGACGAGGCGAACATCGC
|
||||
CGCGGTGCGCGCGCGGGTCAATGAACTGACCGCCCGCCTGCCCGTCTACGGCAACTGA
|
||||
>lcl|CP011448.1_cds_ALH77547.1_2198 [gene=icd] [locus_tag=B3921_2474] [protein=isocitrate dehydrogenase] [protein_id=ALH77547.1] [location=complement(2606706..2607962)] [gbkey=CDS]
|
||||
ATGTCCTATCAACATATCAAGGTTCCCACTGGGGGCCAAAAAATCACGGTCAACGCCGATTACTCGCTGA
|
||||
ATGTGCCCGATCAGGTCATCATTCCGGTCATCGAGGGTGACGGTACGGGCGCCGACATCACGCCGGTGAT
|
||||
GATTAAGGTCGTCGACGCGGCCGTGCAGAAGGCCTATGCGGGCAAGCGCAAGATCCACTGGATGGAAGTC
|
||||
TACGCCGGCGAGAAGGCCACCAAGGTCTACGGCCCGGACGTCTGGCTGCCCGAGGAAACCCTCGACGCCG
|
||||
TCAAGGACTACGTGGTGTCGATCAAGGGTCCGCTGACCACGCCGGTCGGCGGCGGCATCCGTTCGCTGAA
|
||||
CGTGGCGCTGCGCCAGCAGCTGGACCTGTATGTCTGCCTGCGCCCGGTGCGCTACTTCAAGGGCGTGCCC
|
||||
TCGCCGGTGCGCGAGCCCGAGAAGACCGACATGGTCATCTTCCGCGAGAACTCGGAAGACATCTACGCGG
|
||||
GCATCGAGTACATGGCCGAGTCCGAGCAGGCCAAGGACCTGATCCAGTACCTGCAGACCAAGCTGGGCGT
|
||||
GACCAAGATCCGCTTCCCGAACACCTCGTCGATCGGCATCAAGCCGGTTTCGCGCGAAGGCACCGAGCGC
|
||||
CTGGTGCGCAAGGCGCTGCAGTACGCCATCGACAATGACCGCGCCTCGGTGACCCTGGTCCACAAGGGCA
|
||||
ACATCATGAAGTTCACGGAAGGCGGCTTCCGCGACTGGGGCTACGCCCTGGCCCAGAACGAGTTCGGCGC
|
||||
GCAGCCGATCGACGGCGGCCCGTGGTGCAAGTTCAAGAATCCCAAGACGGGTCGCGAGATCATCGTCAAG
|
||||
GATTCGATCGCCGACGCCTTCCTGCAGCAGATCCTGCTGCGTCCGGCCGAATACGACGTGATCGCCACGC
|
||||
TGAACCTGAACGGCGACTACATCTCCGACGCGCTGGCCGCGCAAGTGGGCGGCATCGGCATTGCCCCGGG
|
||||
CGCCAACCTGTCGGATTCCGTGGCCATGTTCGAAGCCACCCACGGCACCGCGCCGAAGTACGCGGGCAAG
|
||||
GACTACGTGAACCCCGGTTCCGAAATCCTGTCGGCCGAAATGATGCTGCGCCACATGGGCTGGACCGAGG
|
||||
CCGCCGACCTGATCATCGCCAGCATGGAGAAATCCATCCTGTCCAAGAAGGTCACCTATGACTTCGCCCG
|
||||
TCTGCTCGAAGGCGCCACCCAGGTGTCGTGCTCGGGCTTCGGTCAGGTCATGATCGACAATATGTAA
|
||||
>lcl|CP011448.1_cds_ALH77480.1_2131 [gene=pepA] [locus_tag=B3921_2404] [protein=leucyl aminopeptidase] [protein_id=ALH77480.1] [location=2532868..2534367] [gbkey=CDS]
|
||||
ATGGAATTTAGCACACAGACCACTGCCTCCCTGCATCAGATCAAGACTGCGGCCCTGGCCGTCGGCGTCT
|
||||
TCGCCGACGGCGTGCTCAGCGCCGCCGCCGAAGTCATCGACCGCGCCAGCCACGGTGCCGTGGCCGCCGT
|
||||
GGTGAAAAGCGAGTTCCGCGGCCGCACCGGCAGCACGCTGGTGCTGCGCAGCCTGGCCGGCGTCAGCGCC
|
||||
CAGCGCGTGGTGCTGGTGGGCCTGGGCAAGCAGGCCGAATACAACGCCCGCGCGCACGCCAGCGCCGAAC
|
||||
AGGCGTTCGCCGCGGCGTGCGTCGCGGCCCAGGTGGGCGAAGGCGTGTCGACCCTGGCCGGCGTGGCCAT
|
||||
CGAGGGCGTGCCGGTGCGCGCCCGCGCGCGCAGCGCCGCCATCGCCGCGGGCGCGGCGGCCTACCATTAC
|
||||
GATGCGACGTTCGGCAAGGCCAATCGCGACGCCCGCCCCAGGTTGAAGAAAATCGTCCAGGTGGTCGACC
|
||||
GCGCGGCCTCCGCGCAGGCGCAGCTGGGCCTGCGCGAAGGCGCGGCCATCGCCCACGGCATGGAATTGAC
|
||||
CCGCACGCTGGGCAACCTGCCCGGCAACGTGTGCACGCCGGCCTATCTCGGCAATACCGCCAAGAAACTG
|
||||
GCGCGCGAATTCAAGAGCCTCAAGGTCGAGGTGCTCGAACGCAAGCAGGTCGAGGCGCTGGGCATGGGCT
|
||||
CGTTCCTCTCGGTCGCGCGCGGCTCGGAAGAACCGCTGCGCTTCATCGTGCTGCGCCATGCCGGCAAGCC
|
||||
CGCCAAGAAGGACAAGGCCGGCCCGGTCGTCCTGGTGGGCAAGGGCATCACCTTCGATGCTGGCGGCATC
|
||||
TCGCTCAAGCCGGCCGCCACGATGGACGAAATGAAGTACGACATGTGCGGCGCGGCCAGCGTGCTGGGCA
|
||||
CGTTCCGCGCCCTGGCCGAGCTGGAGCTGCCGCTGGATGTGGTGGGCCTGATCGCGGCGTGCGAGAACCT
|
||||
GCCCAGCGGCAAGGCCAACAAGCCCGGCGACGTGGTCACCAGCATGTCGGGCCAGACCATCGAGATCCTC
|
||||
AACACCGACGCCGAAGGCCGCCTGGTGCTGTGCGATGCCCTGACCTACGCCGAGCGCTTCAAGCCCGCGG
|
||||
CCGTGATCGACATCGCCACGTTGACCGGCGCCTGCGTGGTAGCCCTGGGCAACGTCAATAGCGGCCTGTT
|
||||
CTCCAAGGACGACGCGCTGGCCGACGCGCTGCTGGCCGCCAGCCGCCAGTCGCTCGACCCGGCCTGGCGC
|
||||
CTGCCGCTGGACGATGCCTACCAGGACCAGCTCAAGTCCAACTTCGCCGACATCGCCAACATCGGCGGCC
|
||||
CCCCGGCCGGCGCGGTCACGGCGGCCTGCTTCCTGTCGCGCTTCACCAAGGCTTATCCGTGGGCGCACCT
|
||||
GGACATCGCCGGCACGGCCTGGCGCGGCGGCAAGGACAAGGGCGCCACCGGCCGGCCGGTGCCGCTGCTG
|
||||
ATGCAGTACCTGCTGGACCAGGCAGGCTGA
|
||||
>lcl|CP011448.1_cds_ALH78163.1_2814 [gene=pgm] [locus_tag=B3921_3166] [protein=phosphoglucomutase] [protein_id=ALH78163.1] [location=3355979..3357361] [gbkey=CDS]
|
||||
GTGGCGCACCCCTTTCCCGCATCGGTCTACAAGGCGTACGACATCCGTGGCTCGGTTCCCGACCAGCTCG
|
||||
ACCCGGTATTCGCCCGGGCGCTGGGCCGCGCCCTGGCCGCCAGCGCCCGCGCGCAGGGCATCGGCGCCCT
|
||||
GGTGGTCGGCCGCGACGGCCGCCTGAGCAGCCCCGACCTGGCCGGCGCGCTGCAGGAAGGCATCATGGAA
|
||||
GGCGGCGTGGACACCCTGGACATCGGCCAGGTGCCCACGCCGCTGGTCTATTTCGCGGCGCACATCCAGG
|
||||
GCACGGGCTCGGGCGTGGCGGTCACCGGCAGCCACAACCCGCCGCAGTACAACGGCTTCAAGATGATGAT
|
||||
GGGCGGCCAGGCCCTGTACGGCCCGGCCGTGCAGGCGCTGCGCCCGGCCATGCTGGCGCCGGCTGCGGCG
|
||||
CCGGGCACCTGGGGCGAACGCCGCCAGCTCGATGTCGTCCCCGCCTATATCGAGCGCATCGTGTCCGACG
|
||||
TGAAGCTGGCGCGCCCCATGAAGATCGCCGTCGACTGCGGCAATGGCGTGGCCGGCGCCCTGGCGCCGCA
|
||||
ACTGTTCCGCGCGCTGGGTTGCGAAGTGGACGAGCTCTATTGCGAGGTCGACGGCACGTTTCCCAACCAC
|
||||
CATCCCGACCCGGCCGAACCGCGCAACCTGCAGGACCTGATCGCCCATGTCACCAGCACCGACTGCGAGC
|
||||
TGGGCCTGGCCTTCGACGGCGACGGCGACCGCCTCGGCGTGGTGACCAAGTCCGGCCAGATCATCTGGCC
|
||||
CGACCGCCAGCTGATCCTGTTCGCCCGCGACGTGCTGGCCCGCTGTCCCGGCGCGACCATCATCTATGAC
|
||||
GTCAAGTGCAGCCAGCACGTGGGCGTGGCCATCGAGCAAAGCGGCGGCGTGCCGCTGATGTGGCAGACTG
|
||||
GCCATTCGCTGGTGAAGGCCAAGCTGGCCGAGACCGGCGCGCCGCTGGCCGGCGAGATGAGCGGCCATAT
|
||||
CTTCTTCAAGGAGCGCTGGTACGGCTTCGACGACGGCCTGTACACCGGCGCCCGCCTGCTGGAAATCGTC
|
||||
TCCCGCGAAACCGATGCGTCGCGCCCGCTGGAGGCCCTGCCGCAGGCGCTGTCGACCCCCGAGCTCAAGC
|
||||
TGGAGATGGCCGAGGGCGAGCCGCATGCGCTGATCGCCGCCCTGCAGCAGCAGGGCGAGTTCGCCAGCGC
|
||||
CAGCCGGCTGGTTACGATAGACGGCGTGCGCGCGGAATACCCGGACGGCTTCGGGCTGGCGCGCGCCTCC
|
||||
AATACCACCCCCGTCGTCGTGCTGCGCTTCGAAGCGGAGACCGAGCCGGGCCTGGCCCGCATCCAGCAGG
|
||||
AATTCCGCCAGCAGCTGCTGCGGCTGGCTCCGCAAGCCAAACTGCCCTTCTGA
|
||||
>lcl|CP011448.1_cds_ALH77215.1_1866 [gene=tyrB] [locus_tag=B3921_2112] [protein=aromatic amino acid aminotransferase] [protein_id=ALH77215.1] [location=2216606..2217808] [gbkey=CDS]
|
||||
ATGAGCACTCTTTTCGCTTCCGTCGAACTCGCGCCGCGCGACCCCATTCTTGGCCTGAACGAACAGTACA
|
||||
ACGCCGATACCCGTCCCGGCAAAGTGAACCTGGGCGTGGGCGTGTACTACGACGACGAAGGCCGCATCCC
|
||||
GCTGCTTCAGGCCGTGCGCAAGGCCGAGGTGGCCCGCATCGAAGCCGCCGCCGCCCGCGGCTATCTGCCG
|
||||
ATCGAAGGCATCGCGGGGTACAACAAGGGTGCGCAGGCGCTGCTGCTGGGCGCCGACTCGCCGCTGGCCG
|
||||
CCGAAGGCCGCGTGCTGACCGCGCAGGCCCTGGGCGGCACCGGCGCGCTGAAGATCGGCGCCGACTTCCT
|
||||
GCGCCAGCTGCTGCCGCAGTCCAAGGTCCTCATCAGCGACCCCAGCTGGGAAAACCACCGCGCCCTGTTC
|
||||
GAGCGCGCCGGCTTCCCGGTCGAGACCTACGCTTATTACGATGCCGCCACCCATGGCCTGAACTTCGAAG
|
||||
CCATGCTGGCCGCCCTGCAGGCCGCGCCCGAACAGACCATCGTGGTGCTGCACGCCTGCTGCCACAACCC
|
||||
GACCGGCGTCGATCCCACGCCGCAACAGTGGGAACAGATCGCCGCCGTGGTCAAGGCGCGCAACCTGGTG
|
||||
CCGTTCCTCGACATCGCCTACCAGGGCTTCGGCGAAGGCCTGGAGCAGGACGCCGCCGTGGTGCGCATGT
|
||||
TCGCCGAGCTCGACCTGACCATGTTCATCAGCTCGTCGTTCTCCAAGTCCTTCTCGCTGTATGGCGAGCG
|
||||
GGTCGGGGCCCTGACCGTGGTGGCCGGCAGCAAGGACGAGGCCGCCCGCGTGCTCAGCCAGCTCAAGCGC
|
||||
GTGATCCGCACCAACTACTCCAACCCGCCCACCCACGGCGGCACCGTGGTGTCCACGGTCCTGAACACAC
|
||||
CCGAGCTGTTCGCGCTCTGGGAAAATGAACTGGCCGGCATGCGCGACCGCATCCGCCTGATGCGCAAGGA
|
||||
GCTGGTCGAGAAGATCAAGACCCAGGGCGTGGCGCAGGACTTCAGCTTCGTGCTGGCGCAGCGCGGCATG
|
||||
TTCTCGTACTCGGGCCTGACCGCCGCCCAGGTCGATCGCCTGCGCGAAGAGCACGGCATCTACGCGGTCT
|
||||
CCAGCGGCCGCATCTGCGTGGCCGCGCTCAACAGCCGCAACATCGACGCGGTCGCGGCCGGCATCGCCGC
|
||||
GGTGCTGAAGTAG
|
Reference in New Issue
Block a user