Compare commits

..

6 Commits

Author SHA1 Message Date
34bf02c75a Increased timeout to 5 minutes
Some checks reported errors
autoBIGS.engine/pipeline/tag Something is wrong with the build of this commit
autoBIGS.engine/pipeline/head There was a failure building this commit
2025-03-13 18:37:33 +00:00
3cb10a4609 Added client error as acceptable exception
All checks were successful
autoBIGS.engine/pipeline/head This commit looks good
2025-03-13 18:13:38 +00:00
1776f5aa51 Fixed exception syntax
All checks were successful
autoBIGS.engine/pipeline/head This commit looks good
2025-03-13 16:51:15 +00:00
96d715fdcb Added a server disconnect error catch
All checks were successful
autoBIGS.engine/pipeline/head This commit looks good
2025-03-13 16:27:59 +00:00
e088d1080b Added functionality to retry determining ST
Some checks reported errors
autoBIGS.engine/pipeline/head Something is wrong with the build of this commit
2025-03-13 16:13:14 +00:00
8ffc7c7fb5 Added retry functionality for allele variant determination
All checks were successful
autoBIGS.engine/pipeline/head This commit looks good
2025-03-13 15:54:35 +00:00

View File

@ -9,7 +9,7 @@ import shutil
import tempfile
from typing import Any, AsyncGenerator, AsyncIterable, Coroutine, Iterable, Mapping, Sequence, Set, Union
from aiohttp import ClientSession, ClientTimeout
from aiohttp import ClientOSError, ClientSession, ClientTimeout, ServerDisconnectedError
from autobigs.engine.reading import read_fasta
from autobigs.engine.structures.alignment import PairwiseAlignment
@ -43,11 +43,12 @@ class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
def __init__(self, database_api: str, database_name: str, scheme_id: int):
def __init__(self, database_api: str, database_name: str, scheme_id: int, retry_requests: int = 5):
self._retry_limit = retry_requests
self._database_name = database_name
self._scheme_id = scheme_id
self._base_url = f"{database_api}/db/{self._database_name}/schemes/{self._scheme_id}/"
self._http_client = ClientSession(self._base_url, timeout=ClientTimeout(60))
self._http_client = ClientSession(self._base_url, timeout=ClientTimeout(300))
async def __aenter__(self):
return self
@ -57,40 +58,59 @@ class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
uri_path = "sequence"
if isinstance(query_sequence_strings, str) or isinstance(query_sequence_strings, NamedString):
query_sequence_strings = [query_sequence_strings]
for sequence_string in query_sequence_strings:
async with self._http_client.post(uri_path, json={
"sequence": sequence_string if isinstance(sequence_string, str) else sequence_string.sequence,
"partial_matches": True
}) as response:
sequence_response: dict = await response.json()
if "exact_matches" in sequence_response:
# loci -> list of alleles with id and loci
exact_matches: dict[str, Sequence[dict[str, str]]] = sequence_response["exact_matches"]
for allele_loci, alleles in exact_matches.items():
for allele in alleles:
alelle_id = allele["allele_id"]
result_allele = Allele(allele_locus=allele_loci, allele_variant=alelle_id, partial_match_profile=None)
yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
elif "partial_matches" in sequence_response:
partial_matches: dict[str, dict[str, Union[str, float, int]]] = sequence_response["partial_matches"]
for allele_loci, partial_match in partial_matches.items():
if len(partial_match) <= 0:
continue
partial_match_profile = AlignmentStats(
percent_identity=float(partial_match["identity"]),
mismatches=int(partial_match["mismatches"]),
gaps=int(partial_match["gaps"]),
match_metric=int(partial_match["bitscore"])
)
result_allele = Allele(
allele_locus=allele_loci,
allele_variant=str(partial_match["allele"]),
partial_match_profile=partial_match_profile
)
yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
for sequence_string in query_sequence_strings:
attempts = 0
success = False
last_error = None
while not success and attempts < self._retry_limit:
attempts += 1
request = self._http_client.post(uri_path, json={
"sequence": sequence_string if isinstance(sequence_string, str) else sequence_string.sequence,
"partial_matches": True
})
try:
async with request as response:
sequence_response: dict = await response.json()
if "exact_matches" in sequence_response:
# loci -> list of alleles with id and loci
exact_matches: dict[str, Sequence[dict[str, str]]] = sequence_response["exact_matches"]
for allele_loci, alleles in exact_matches.items():
for allele in alleles:
alelle_id = allele["allele_id"]
result_allele = Allele(allele_locus=allele_loci, allele_variant=alelle_id, partial_match_profile=None)
yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
elif "partial_matches" in sequence_response:
partial_matches: dict[str, dict[str, Union[str, float, int]]] = sequence_response["partial_matches"]
for allele_loci, partial_match in partial_matches.items():
if len(partial_match) <= 0:
continue
partial_match_profile = AlignmentStats(
percent_identity=float(partial_match["identity"]),
mismatches=int(partial_match["mismatches"]),
gaps=int(partial_match["gaps"]),
match_metric=int(partial_match["bitscore"])
)
result_allele = Allele(
allele_locus=allele_loci,
allele_variant=str(partial_match["allele"]),
partial_match_profile=partial_match_profile
)
yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
else:
raise NoBIGSdbMatchesException(self._database_name, self._scheme_id, sequence_string.name if isinstance(sequence_string, NamedString) else None)
except (ConnectionError, ServerDisconnectedError, ClientOSError) as e: # Errors we will retry
last_error = e
success = False
await asyncio.sleep(5) # In case the connection issue is due to rate issues
else:
raise NoBIGSdbMatchesException(self._database_name, self._scheme_id, sequence_string.name if isinstance(sequence_string, NamedString) else None)
success = True
if not success and last_error is not None:
try:
raise last_error
except (ConnectionError, ServerDisconnectedError, ClientOSError) as e: # Non-fatal errors
yield Allele("error", "error", None)
async def determine_mlst_st(self, alleles: Union[AsyncIterable[Union[Allele, tuple[str, Allele]]], Iterable[Union[Allele, tuple[str, Allele]]]]) -> Union[MLSTProfile, NamedMLSTProfile]:
uri_path = "designations"
@ -113,22 +133,42 @@ class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
request_json = {
"designations": allele_request_dict
}
async with self._http_client.post(uri_path, json=request_json) as response:
response_json: dict = await response.json()
allele_set: Set[Allele] = set()
response_json.setdefault("fields", dict())
scheme_fields_returned: dict[str, str] = response_json["fields"]
scheme_fields_returned.setdefault("ST", "unknown")
scheme_fields_returned.setdefault("clonal_complex", "unknown")
scheme_exact_matches: dict = response_json["exact_matches"]
for exact_match_locus, exact_match_alleles in scheme_exact_matches.items():
allele_set.add(Allele(exact_match_locus, exact_match_alleles[0]["allele_id"], None))
if len(allele_set) == 0:
raise ValueError("Passed in no alleles.")
result_mlst_profile = MLSTProfile(allele_set, scheme_fields_returned["ST"], scheme_fields_returned["clonal_complex"])
if len(names_list) > 0:
result_mlst_profile = NamedMLSTProfile(str(tuple(names_list)) if len(set(names_list)) > 1 else names_list[0], result_mlst_profile)
return result_mlst_profile
attempts = 0
success = False
last_error = None
while attempts < self._retry_limit and not success:
attempts += 1
try:
async with self._http_client.post(uri_path, json=request_json) as response:
response_json: dict = await response.json()
allele_set: Set[Allele] = set()
response_json.setdefault("fields", dict())
scheme_fields_returned: dict[str, str] = response_json["fields"]
scheme_fields_returned.setdefault("ST", "unknown")
scheme_fields_returned.setdefault("clonal_complex", "unknown")
scheme_exact_matches: dict = response_json["exact_matches"]
for exact_match_locus, exact_match_alleles in scheme_exact_matches.items():
allele_set.add(Allele(exact_match_locus, exact_match_alleles[0]["allele_id"], None))
if len(allele_set) == 0:
raise ValueError("Passed in no alleles.")
result_mlst_profile = MLSTProfile(allele_set, scheme_fields_returned["ST"], scheme_fields_returned["clonal_complex"])
if len(names_list) > 0:
result_mlst_profile = NamedMLSTProfile(str(tuple(names_list)) if len(set(names_list)) > 1 else names_list[0], result_mlst_profile)
return result_mlst_profile
except (ConnectionError, ServerDisconnectedError, ClientOSError) as e:
last_error = e
success = False
await asyncio.sleep(5)
else:
success = True
try:
if last_error is not None:
raise last_error
except (ConnectionError, ServerDisconnectedError, ClientOSError) as e:
result_mlst_profile = NamedMLSTProfile((str(tuple(names_list)) if len(set(names_list)) > 1 else names_list[0]) + ":Error", None)
raise ValueError("Last error was not recorded.")
async def profile_string(self, query_sequence_strings: Iterable[Union[NamedString, str]]) -> Union[NamedMLSTProfile, MLSTProfile]:
alleles = self.determine_mlst_allele_variants(query_sequence_strings)