Compare commits
6 Commits
af7edf0942
...
34bf02c75a
Author | SHA1 | Date | |
---|---|---|---|
34bf02c75a | |||
3cb10a4609 | |||
1776f5aa51 | |||
96d715fdcb | |||
e088d1080b | |||
8ffc7c7fb5 |
@ -9,7 +9,7 @@ import shutil
|
||||
import tempfile
|
||||
from typing import Any, AsyncGenerator, AsyncIterable, Coroutine, Iterable, Mapping, Sequence, Set, Union
|
||||
|
||||
from aiohttp import ClientSession, ClientTimeout
|
||||
from aiohttp import ClientOSError, ClientSession, ClientTimeout, ServerDisconnectedError
|
||||
|
||||
from autobigs.engine.reading import read_fasta
|
||||
from autobigs.engine.structures.alignment import PairwiseAlignment
|
||||
@ -43,11 +43,12 @@ class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
|
||||
|
||||
class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
|
||||
|
||||
def __init__(self, database_api: str, database_name: str, scheme_id: int):
|
||||
def __init__(self, database_api: str, database_name: str, scheme_id: int, retry_requests: int = 5):
|
||||
self._retry_limit = retry_requests
|
||||
self._database_name = database_name
|
||||
self._scheme_id = scheme_id
|
||||
self._base_url = f"{database_api}/db/{self._database_name}/schemes/{self._scheme_id}/"
|
||||
self._http_client = ClientSession(self._base_url, timeout=ClientTimeout(60))
|
||||
self._http_client = ClientSession(self._base_url, timeout=ClientTimeout(300))
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
@ -57,40 +58,59 @@ class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
|
||||
uri_path = "sequence"
|
||||
if isinstance(query_sequence_strings, str) or isinstance(query_sequence_strings, NamedString):
|
||||
query_sequence_strings = [query_sequence_strings]
|
||||
for sequence_string in query_sequence_strings:
|
||||
async with self._http_client.post(uri_path, json={
|
||||
"sequence": sequence_string if isinstance(sequence_string, str) else sequence_string.sequence,
|
||||
"partial_matches": True
|
||||
}) as response:
|
||||
sequence_response: dict = await response.json()
|
||||
|
||||
if "exact_matches" in sequence_response:
|
||||
# loci -> list of alleles with id and loci
|
||||
exact_matches: dict[str, Sequence[dict[str, str]]] = sequence_response["exact_matches"]
|
||||
for allele_loci, alleles in exact_matches.items():
|
||||
for allele in alleles:
|
||||
alelle_id = allele["allele_id"]
|
||||
result_allele = Allele(allele_locus=allele_loci, allele_variant=alelle_id, partial_match_profile=None)
|
||||
yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
|
||||
elif "partial_matches" in sequence_response:
|
||||
partial_matches: dict[str, dict[str, Union[str, float, int]]] = sequence_response["partial_matches"]
|
||||
for allele_loci, partial_match in partial_matches.items():
|
||||
if len(partial_match) <= 0:
|
||||
continue
|
||||
partial_match_profile = AlignmentStats(
|
||||
percent_identity=float(partial_match["identity"]),
|
||||
mismatches=int(partial_match["mismatches"]),
|
||||
gaps=int(partial_match["gaps"]),
|
||||
match_metric=int(partial_match["bitscore"])
|
||||
)
|
||||
result_allele = Allele(
|
||||
allele_locus=allele_loci,
|
||||
allele_variant=str(partial_match["allele"]),
|
||||
partial_match_profile=partial_match_profile
|
||||
)
|
||||
yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
|
||||
for sequence_string in query_sequence_strings:
|
||||
attempts = 0
|
||||
success = False
|
||||
last_error = None
|
||||
while not success and attempts < self._retry_limit:
|
||||
attempts += 1
|
||||
request = self._http_client.post(uri_path, json={
|
||||
"sequence": sequence_string if isinstance(sequence_string, str) else sequence_string.sequence,
|
||||
"partial_matches": True
|
||||
})
|
||||
try:
|
||||
async with request as response:
|
||||
sequence_response: dict = await response.json()
|
||||
|
||||
if "exact_matches" in sequence_response:
|
||||
# loci -> list of alleles with id and loci
|
||||
exact_matches: dict[str, Sequence[dict[str, str]]] = sequence_response["exact_matches"]
|
||||
for allele_loci, alleles in exact_matches.items():
|
||||
for allele in alleles:
|
||||
alelle_id = allele["allele_id"]
|
||||
result_allele = Allele(allele_locus=allele_loci, allele_variant=alelle_id, partial_match_profile=None)
|
||||
yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
|
||||
elif "partial_matches" in sequence_response:
|
||||
partial_matches: dict[str, dict[str, Union[str, float, int]]] = sequence_response["partial_matches"]
|
||||
for allele_loci, partial_match in partial_matches.items():
|
||||
if len(partial_match) <= 0:
|
||||
continue
|
||||
partial_match_profile = AlignmentStats(
|
||||
percent_identity=float(partial_match["identity"]),
|
||||
mismatches=int(partial_match["mismatches"]),
|
||||
gaps=int(partial_match["gaps"]),
|
||||
match_metric=int(partial_match["bitscore"])
|
||||
)
|
||||
result_allele = Allele(
|
||||
allele_locus=allele_loci,
|
||||
allele_variant=str(partial_match["allele"]),
|
||||
partial_match_profile=partial_match_profile
|
||||
)
|
||||
yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
|
||||
else:
|
||||
raise NoBIGSdbMatchesException(self._database_name, self._scheme_id, sequence_string.name if isinstance(sequence_string, NamedString) else None)
|
||||
except (ConnectionError, ServerDisconnectedError, ClientOSError) as e: # Errors we will retry
|
||||
last_error = e
|
||||
success = False
|
||||
await asyncio.sleep(5) # In case the connection issue is due to rate issues
|
||||
else:
|
||||
raise NoBIGSdbMatchesException(self._database_name, self._scheme_id, sequence_string.name if isinstance(sequence_string, NamedString) else None)
|
||||
success = True
|
||||
if not success and last_error is not None:
|
||||
try:
|
||||
raise last_error
|
||||
except (ConnectionError, ServerDisconnectedError, ClientOSError) as e: # Non-fatal errors
|
||||
yield Allele("error", "error", None)
|
||||
|
||||
async def determine_mlst_st(self, alleles: Union[AsyncIterable[Union[Allele, tuple[str, Allele]]], Iterable[Union[Allele, tuple[str, Allele]]]]) -> Union[MLSTProfile, NamedMLSTProfile]:
|
||||
uri_path = "designations"
|
||||
@ -113,22 +133,42 @@ class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
|
||||
request_json = {
|
||||
"designations": allele_request_dict
|
||||
}
|
||||
async with self._http_client.post(uri_path, json=request_json) as response:
|
||||
response_json: dict = await response.json()
|
||||
allele_set: Set[Allele] = set()
|
||||
response_json.setdefault("fields", dict())
|
||||
scheme_fields_returned: dict[str, str] = response_json["fields"]
|
||||
scheme_fields_returned.setdefault("ST", "unknown")
|
||||
scheme_fields_returned.setdefault("clonal_complex", "unknown")
|
||||
scheme_exact_matches: dict = response_json["exact_matches"]
|
||||
for exact_match_locus, exact_match_alleles in scheme_exact_matches.items():
|
||||
allele_set.add(Allele(exact_match_locus, exact_match_alleles[0]["allele_id"], None))
|
||||
if len(allele_set) == 0:
|
||||
raise ValueError("Passed in no alleles.")
|
||||
result_mlst_profile = MLSTProfile(allele_set, scheme_fields_returned["ST"], scheme_fields_returned["clonal_complex"])
|
||||
if len(names_list) > 0:
|
||||
result_mlst_profile = NamedMLSTProfile(str(tuple(names_list)) if len(set(names_list)) > 1 else names_list[0], result_mlst_profile)
|
||||
return result_mlst_profile
|
||||
|
||||
attempts = 0
|
||||
success = False
|
||||
last_error = None
|
||||
while attempts < self._retry_limit and not success:
|
||||
attempts += 1
|
||||
try:
|
||||
async with self._http_client.post(uri_path, json=request_json) as response:
|
||||
response_json: dict = await response.json()
|
||||
allele_set: Set[Allele] = set()
|
||||
response_json.setdefault("fields", dict())
|
||||
scheme_fields_returned: dict[str, str] = response_json["fields"]
|
||||
scheme_fields_returned.setdefault("ST", "unknown")
|
||||
scheme_fields_returned.setdefault("clonal_complex", "unknown")
|
||||
scheme_exact_matches: dict = response_json["exact_matches"]
|
||||
for exact_match_locus, exact_match_alleles in scheme_exact_matches.items():
|
||||
allele_set.add(Allele(exact_match_locus, exact_match_alleles[0]["allele_id"], None))
|
||||
if len(allele_set) == 0:
|
||||
raise ValueError("Passed in no alleles.")
|
||||
result_mlst_profile = MLSTProfile(allele_set, scheme_fields_returned["ST"], scheme_fields_returned["clonal_complex"])
|
||||
if len(names_list) > 0:
|
||||
result_mlst_profile = NamedMLSTProfile(str(tuple(names_list)) if len(set(names_list)) > 1 else names_list[0], result_mlst_profile)
|
||||
return result_mlst_profile
|
||||
except (ConnectionError, ServerDisconnectedError, ClientOSError) as e:
|
||||
last_error = e
|
||||
success = False
|
||||
await asyncio.sleep(5)
|
||||
else:
|
||||
success = True
|
||||
try:
|
||||
if last_error is not None:
|
||||
raise last_error
|
||||
except (ConnectionError, ServerDisconnectedError, ClientOSError) as e:
|
||||
result_mlst_profile = NamedMLSTProfile((str(tuple(names_list)) if len(set(names_list)) > 1 else names_list[0]) + ":Error", None)
|
||||
raise ValueError("Last error was not recorded.")
|
||||
|
||||
|
||||
async def profile_string(self, query_sequence_strings: Iterable[Union[NamedString, str]]) -> Union[NamedMLSTProfile, MLSTProfile]:
|
||||
alleles = self.determine_mlst_allele_variants(query_sequence_strings)
|
||||
|
Loading…
x
Reference in New Issue
Block a user