Compare commits
7 Commits
Author | SHA1 | Date | |
---|---|---|---|
f8d92a4aad | |||
34bf02c75a | |||
3cb10a4609 | |||
1776f5aa51 | |||
96d715fdcb | |||
e088d1080b | |||
8ffc7c7fb5 |
3
.vscode/extensions.json
vendored
3
.vscode/extensions.json
vendored
@ -1,5 +1,6 @@
|
|||||||
{
|
{
|
||||||
"recommendations": [
|
"recommendations": [
|
||||||
"piotrpalarz.vscode-gitignore-generator"
|
"piotrpalarz.vscode-gitignore-generator",
|
||||||
|
"gruntfuggly.todo-tree"
|
||||||
]
|
]
|
||||||
}
|
}
|
@ -9,7 +9,7 @@ import shutil
|
|||||||
import tempfile
|
import tempfile
|
||||||
from typing import Any, AsyncGenerator, AsyncIterable, Coroutine, Iterable, Mapping, Sequence, Set, Union
|
from typing import Any, AsyncGenerator, AsyncIterable, Coroutine, Iterable, Mapping, Sequence, Set, Union
|
||||||
|
|
||||||
from aiohttp import ClientSession, ClientTimeout
|
from aiohttp import ClientOSError, ClientSession, ClientTimeout, ServerDisconnectedError
|
||||||
|
|
||||||
from autobigs.engine.reading import read_fasta
|
from autobigs.engine.reading import read_fasta
|
||||||
from autobigs.engine.structures.alignment import PairwiseAlignment
|
from autobigs.engine.structures.alignment import PairwiseAlignment
|
||||||
@ -43,11 +43,12 @@ class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
|
|||||||
|
|
||||||
class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
|
class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
|
||||||
|
|
||||||
def __init__(self, database_api: str, database_name: str, scheme_id: int):
|
def __init__(self, database_api: str, database_name: str, scheme_id: int, retry_requests: int = 5):
|
||||||
|
self._retry_limit = retry_requests
|
||||||
self._database_name = database_name
|
self._database_name = database_name
|
||||||
self._scheme_id = scheme_id
|
self._scheme_id = scheme_id
|
||||||
self._base_url = f"{database_api}/db/{self._database_name}/schemes/{self._scheme_id}/"
|
self._base_url = f"{database_api}/db/{self._database_name}/schemes/{self._scheme_id}/"
|
||||||
self._http_client = ClientSession(self._base_url, timeout=ClientTimeout(60))
|
self._http_client = ClientSession(self._base_url, timeout=ClientTimeout(300))
|
||||||
|
|
||||||
async def __aenter__(self):
|
async def __aenter__(self):
|
||||||
return self
|
return self
|
||||||
@ -57,40 +58,59 @@ class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
|
|||||||
uri_path = "sequence"
|
uri_path = "sequence"
|
||||||
if isinstance(query_sequence_strings, str) or isinstance(query_sequence_strings, NamedString):
|
if isinstance(query_sequence_strings, str) or isinstance(query_sequence_strings, NamedString):
|
||||||
query_sequence_strings = [query_sequence_strings]
|
query_sequence_strings = [query_sequence_strings]
|
||||||
|
|
||||||
for sequence_string in query_sequence_strings:
|
for sequence_string in query_sequence_strings:
|
||||||
async with self._http_client.post(uri_path, json={
|
attempts = 0
|
||||||
"sequence": sequence_string if isinstance(sequence_string, str) else sequence_string.sequence,
|
success = False
|
||||||
"partial_matches": True
|
last_error = None
|
||||||
}) as response:
|
while not success and attempts < self._retry_limit:
|
||||||
sequence_response: dict = await response.json()
|
attempts += 1
|
||||||
|
request = self._http_client.post(uri_path, json={
|
||||||
|
"sequence": sequence_string if isinstance(sequence_string, str) else sequence_string.sequence,
|
||||||
|
"partial_matches": True
|
||||||
|
})
|
||||||
|
try:
|
||||||
|
async with request as response:
|
||||||
|
sequence_response: dict = await response.json()
|
||||||
|
|
||||||
if "exact_matches" in sequence_response:
|
if "exact_matches" in sequence_response:
|
||||||
# loci -> list of alleles with id and loci
|
# loci -> list of alleles with id and loci
|
||||||
exact_matches: dict[str, Sequence[dict[str, str]]] = sequence_response["exact_matches"]
|
exact_matches: dict[str, Sequence[dict[str, str]]] = sequence_response["exact_matches"]
|
||||||
for allele_loci, alleles in exact_matches.items():
|
for allele_loci, alleles in exact_matches.items():
|
||||||
for allele in alleles:
|
for allele in alleles:
|
||||||
alelle_id = allele["allele_id"]
|
alelle_id = allele["allele_id"]
|
||||||
result_allele = Allele(allele_locus=allele_loci, allele_variant=alelle_id, partial_match_profile=None)
|
result_allele = Allele(allele_locus=allele_loci, allele_variant=alelle_id, partial_match_profile=None)
|
||||||
yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
|
yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
|
||||||
elif "partial_matches" in sequence_response:
|
elif "partial_matches" in sequence_response:
|
||||||
partial_matches: dict[str, dict[str, Union[str, float, int]]] = sequence_response["partial_matches"]
|
partial_matches: dict[str, dict[str, Union[str, float, int]]] = sequence_response["partial_matches"]
|
||||||
for allele_loci, partial_match in partial_matches.items():
|
for allele_loci, partial_match in partial_matches.items():
|
||||||
if len(partial_match) <= 0:
|
if len(partial_match) <= 0:
|
||||||
continue
|
continue
|
||||||
partial_match_profile = AlignmentStats(
|
partial_match_profile = AlignmentStats(
|
||||||
percent_identity=float(partial_match["identity"]),
|
percent_identity=float(partial_match["identity"]),
|
||||||
mismatches=int(partial_match["mismatches"]),
|
mismatches=int(partial_match["mismatches"]),
|
||||||
gaps=int(partial_match["gaps"]),
|
gaps=int(partial_match["gaps"]),
|
||||||
match_metric=int(partial_match["bitscore"])
|
match_metric=int(partial_match["bitscore"])
|
||||||
)
|
)
|
||||||
result_allele = Allele(
|
result_allele = Allele(
|
||||||
allele_locus=allele_loci,
|
allele_locus=allele_loci,
|
||||||
allele_variant=str(partial_match["allele"]),
|
allele_variant=str(partial_match["allele"]),
|
||||||
partial_match_profile=partial_match_profile
|
partial_match_profile=partial_match_profile
|
||||||
)
|
)
|
||||||
yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
|
yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
|
||||||
|
else:
|
||||||
|
raise NoBIGSdbMatchesException(self._database_name, self._scheme_id, sequence_string.name if isinstance(sequence_string, NamedString) else None)
|
||||||
|
except (ConnectionError, ServerDisconnectedError, ClientOSError) as e: # Errors we will retry
|
||||||
|
last_error = e
|
||||||
|
success = False
|
||||||
|
await asyncio.sleep(5) # In case the connection issue is due to rate issues
|
||||||
else:
|
else:
|
||||||
raise NoBIGSdbMatchesException(self._database_name, self._scheme_id, sequence_string.name if isinstance(sequence_string, NamedString) else None)
|
success = True
|
||||||
|
if not success and last_error is not None:
|
||||||
|
try:
|
||||||
|
raise last_error
|
||||||
|
except (ConnectionError, ServerDisconnectedError, ClientOSError) as e: # Non-fatal errors
|
||||||
|
yield Allele("error", "error", None)
|
||||||
|
|
||||||
async def determine_mlst_st(self, alleles: Union[AsyncIterable[Union[Allele, tuple[str, Allele]]], Iterable[Union[Allele, tuple[str, Allele]]]]) -> Union[MLSTProfile, NamedMLSTProfile]:
|
async def determine_mlst_st(self, alleles: Union[AsyncIterable[Union[Allele, tuple[str, Allele]]], Iterable[Union[Allele, tuple[str, Allele]]]]) -> Union[MLSTProfile, NamedMLSTProfile]:
|
||||||
uri_path = "designations"
|
uri_path = "designations"
|
||||||
@ -113,23 +133,43 @@ class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
|
|||||||
request_json = {
|
request_json = {
|
||||||
"designations": allele_request_dict
|
"designations": allele_request_dict
|
||||||
}
|
}
|
||||||
async with self._http_client.post(uri_path, json=request_json) as response:
|
|
||||||
response_json: dict = await response.json()
|
|
||||||
allele_set: Set[Allele] = set()
|
|
||||||
response_json.setdefault("fields", dict())
|
|
||||||
scheme_fields_returned: dict[str, str] = response_json["fields"]
|
|
||||||
scheme_fields_returned.setdefault("ST", "unknown")
|
|
||||||
scheme_fields_returned.setdefault("clonal_complex", "unknown")
|
|
||||||
scheme_exact_matches: dict = response_json["exact_matches"]
|
|
||||||
for exact_match_locus, exact_match_alleles in scheme_exact_matches.items():
|
|
||||||
allele_set.add(Allele(exact_match_locus, exact_match_alleles[0]["allele_id"], None))
|
|
||||||
if len(allele_set) == 0:
|
|
||||||
raise ValueError("Passed in no alleles.")
|
|
||||||
result_mlst_profile = MLSTProfile(allele_set, scheme_fields_returned["ST"], scheme_fields_returned["clonal_complex"])
|
|
||||||
if len(names_list) > 0:
|
|
||||||
result_mlst_profile = NamedMLSTProfile(str(tuple(names_list)) if len(set(names_list)) > 1 else names_list[0], result_mlst_profile)
|
|
||||||
return result_mlst_profile
|
|
||||||
|
|
||||||
|
attempts = 0
|
||||||
|
success = False
|
||||||
|
last_error = None
|
||||||
|
while attempts < self._retry_limit and not success:
|
||||||
|
attempts += 1
|
||||||
|
try:
|
||||||
|
async with self._http_client.post(uri_path, json=request_json) as response:
|
||||||
|
response_json: dict = await response.json()
|
||||||
|
allele_set: Set[Allele] = set()
|
||||||
|
response_json.setdefault("fields", dict())
|
||||||
|
scheme_fields_returned: dict[str, str] = response_json["fields"]
|
||||||
|
scheme_fields_returned.setdefault("ST", "unknown")
|
||||||
|
scheme_fields_returned.setdefault("clonal_complex", "unknown")
|
||||||
|
scheme_exact_matches: dict = response_json["exact_matches"]
|
||||||
|
for exact_match_locus, exact_match_alleles in scheme_exact_matches.items():
|
||||||
|
allele_set.add(Allele(exact_match_locus, exact_match_alleles[0]["allele_id"], None))
|
||||||
|
if len(allele_set) == 0:
|
||||||
|
raise ValueError("Passed in no alleles.")
|
||||||
|
result_mlst_profile = MLSTProfile(allele_set, scheme_fields_returned["ST"], scheme_fields_returned["clonal_complex"])
|
||||||
|
if len(names_list) > 0:
|
||||||
|
result_mlst_profile = NamedMLSTProfile(str(tuple(names_list)) if len(set(names_list)) > 1 else names_list[0], result_mlst_profile)
|
||||||
|
return result_mlst_profile
|
||||||
|
except (ConnectionError, ServerDisconnectedError, ClientOSError) as e:
|
||||||
|
last_error = e
|
||||||
|
success = False
|
||||||
|
await asyncio.sleep(5)
|
||||||
|
else:
|
||||||
|
success = True
|
||||||
|
try:
|
||||||
|
if last_error is not None:
|
||||||
|
raise last_error
|
||||||
|
except (ConnectionError, ServerDisconnectedError, ClientOSError) as e:
|
||||||
|
result_mlst_profile = NamedMLSTProfile((str(tuple(names_list)) if len(set(names_list)) > 1 else names_list[0]) + ":Error", None)
|
||||||
|
raise ValueError("Last error was not recorded.")
|
||||||
|
|
||||||
|
|
||||||
async def profile_string(self, query_sequence_strings: Iterable[Union[NamedString, str]]) -> Union[NamedMLSTProfile, MLSTProfile]:
|
async def profile_string(self, query_sequence_strings: Iterable[Union[NamedString, str]]) -> Union[NamedMLSTProfile, MLSTProfile]:
|
||||||
alleles = self.determine_mlst_allele_variants(query_sequence_strings)
|
alleles = self.determine_mlst_allele_variants(query_sequence_strings)
|
||||||
return await self.determine_mlst_st(alleles)
|
return await self.determine_mlst_st(alleles)
|
||||||
@ -211,6 +251,16 @@ class BIGSdbIndex(AbstractAsyncContextManager):
|
|||||||
|
|
||||||
async def build_profiler_from_seqdefdb(self, local: bool, dbseqdef_name: str, scheme_id: int) -> BIGSdbMLSTProfiler:
|
async def build_profiler_from_seqdefdb(self, local: bool, dbseqdef_name: str, scheme_id: int) -> BIGSdbMLSTProfiler:
|
||||||
return get_BIGSdb_MLST_profiler(local, await self.get_bigsdb_api_from_seqdefdb(dbseqdef_name), dbseqdef_name, scheme_id)
|
return get_BIGSdb_MLST_profiler(local, await self.get_bigsdb_api_from_seqdefdb(dbseqdef_name), dbseqdef_name, scheme_id)
|
||||||
|
|
||||||
|
async def get_scheme_loci(self, dbseqdef_name: str, scheme_id: int) -> list[str]:
|
||||||
|
uri_path = f"{await self.get_bigsdb_api_from_seqdefdb(dbseqdef_name)}/db/{dbseqdef_name}/schemes/{scheme_id}"
|
||||||
|
async with self._http_client.get(uri_path) as response:
|
||||||
|
response_json = await response.json()
|
||||||
|
loci = response_json["loci"]
|
||||||
|
results = []
|
||||||
|
for locus in loci:
|
||||||
|
results.append(path.basename(locus))
|
||||||
|
return results
|
||||||
|
|
||||||
async def close(self):
|
async def close(self):
|
||||||
await self._http_client.close()
|
await self._http_client.close()
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
import csv
|
import csv
|
||||||
from os import PathLike
|
from os import PathLike
|
||||||
from typing import AsyncIterable, Collection, Mapping, Sequence, Union
|
from typing import AsyncIterable, Collection, Iterable, Mapping, Sequence, Union
|
||||||
|
|
||||||
from autobigs.engine.structures.mlst import Allele, MLSTProfile, NamedMLSTProfile
|
from autobigs.engine.structures.mlst import Allele, MLSTProfile, NamedMLSTProfile
|
||||||
|
|
||||||
@ -17,7 +17,7 @@ def alleles_to_text_map(alleles: Collection[Allele]) -> Mapping[str, Union[Seque
|
|||||||
result[locus] = tuple(result[locus]) # type: ignore
|
result[locus] = tuple(result[locus]) # type: ignore
|
||||||
return dict(result)
|
return dict(result)
|
||||||
|
|
||||||
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[NamedMLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]) -> Sequence[str]:
|
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[NamedMLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]], allele_names: Iterable[str]) -> Sequence[str]:
|
||||||
failed = list()
|
failed = list()
|
||||||
with open(handle, "w", newline='') as filehandle:
|
with open(handle, "w", newline='') as filehandle:
|
||||||
header = None
|
header = None
|
||||||
@ -30,7 +30,7 @@ async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[Named
|
|||||||
continue
|
continue
|
||||||
allele_mapping = alleles_to_text_map(mlst_profile.alleles)
|
allele_mapping = alleles_to_text_map(mlst_profile.alleles)
|
||||||
if writer is None:
|
if writer is None:
|
||||||
header = ["id", "st", "clonal-complex", *sorted(allele_mapping.keys())]
|
header = ["id", "st", "clonal-complex", *sorted(allele_names)]
|
||||||
writer = csv.DictWriter(filehandle, fieldnames=header)
|
writer = csv.DictWriter(filehandle, fieldnames=header)
|
||||||
writer.writeheader()
|
writer.writeheader()
|
||||||
row_dictionary = {
|
row_dictionary = {
|
||||||
|
@ -222,3 +222,12 @@ class TestBIGSdbIndex:
|
|||||||
assert isinstance(profile, MLSTProfile)
|
assert isinstance(profile, MLSTProfile)
|
||||||
assert profile.clonal_complex == "ST-2 complex"
|
assert profile.clonal_complex == "ST-2 complex"
|
||||||
assert profile.sequence_type == "1"
|
assert profile.sequence_type == "1"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(["bigsdb_name", "scheme_id", "expected"], [
|
||||||
|
("pubmlst_bordetella_seqdef", 3, ["adk", "fumC", "glyA", "tyrB", "icd", "pepA", "pgm"])
|
||||||
|
])
|
||||||
|
async def test_bigsdb_index_fetches_loci_names(self, bigsdb_name, scheme_id, expected):
|
||||||
|
async with BIGSdbIndex() as bigsdb_index:
|
||||||
|
loci = await bigsdb_index.get_scheme_loci(bigsdb_name, scheme_id)
|
||||||
|
assert set(loci) == set(expected)
|
@ -27,7 +27,7 @@ async def test_column_order_is_same_as_expected_file(dummy_alphabet_mlst_profile
|
|||||||
dummy_profiles = [dummy_alphabet_mlst_profile]
|
dummy_profiles = [dummy_alphabet_mlst_profile]
|
||||||
with tempfile.TemporaryDirectory() as temp_dir:
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
output_path = path.join(temp_dir, "out.csv")
|
output_path = path.join(temp_dir, "out.csv")
|
||||||
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path)
|
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path, ["A", "D", "B", "C"])
|
||||||
with open(output_path) as csv_handle:
|
with open(output_path) as csv_handle:
|
||||||
csv_reader = reader(csv_handle)
|
csv_reader = reader(csv_handle)
|
||||||
lines = list(csv_reader)
|
lines = list(csv_reader)
|
||||||
@ -38,7 +38,7 @@ async def test_csv_writing_sample_name_not_repeated_when_single_sequence(dummy_a
|
|||||||
dummy_profiles = [dummy_alphabet_mlst_profile]
|
dummy_profiles = [dummy_alphabet_mlst_profile]
|
||||||
with tempfile.TemporaryDirectory() as temp_dir:
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
output_path = path.join(temp_dir, "out.csv")
|
output_path = path.join(temp_dir, "out.csv")
|
||||||
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path)
|
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path, ["A", "D", "B", "C"])
|
||||||
with open(output_path) as csv_handle:
|
with open(output_path) as csv_handle:
|
||||||
csv_reader = reader(csv_handle)
|
csv_reader = reader(csv_handle)
|
||||||
lines = list(csv_reader)
|
lines = list(csv_reader)
|
||||||
@ -63,7 +63,7 @@ async def test_csv_writing_includes_asterisk_for_non_exact(dummy_alphabet_mlst_p
|
|||||||
dummy_profiles = [dummy_alphabet_mlst_profile]
|
dummy_profiles = [dummy_alphabet_mlst_profile]
|
||||||
with tempfile.TemporaryDirectory() as temp_dir:
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
output_path = path.join(temp_dir, "out.csv")
|
output_path = path.join(temp_dir, "out.csv")
|
||||||
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path)
|
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path, ["A", "D", "B", "C"])
|
||||||
with open(output_path) as csv_handle:
|
with open(output_path) as csv_handle:
|
||||||
csv_reader = reader(csv_handle)
|
csv_reader = reader(csv_handle)
|
||||||
lines = list(csv_reader)
|
lines = list(csv_reader)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user