Moved "LazyPersistentCachedBIGSdbMLSTProfiler" to separate branch and deleted from current branch
This commit is contained in:
parent
f75707e4fe
commit
f462e6d5e0
@ -131,116 +131,6 @@ class OnlineBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
|
||||
async def __aexit__(self, exc_type, exc_value, traceback):
|
||||
await self.close()
|
||||
|
||||
class LazyPersistentCachedBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
|
||||
def __init__(self, database_api: str, database_name: str, schema_id: int, cache_path: str):
|
||||
self._database_api = database_api
|
||||
self._database_name = database_name
|
||||
self._schema_id = schema_id
|
||||
self._base_url = f"{database_api}/db/{self._database_name}/schemes/{self._schema_id}/"
|
||||
self._http_client = ClientSession(self._base_url, timeout=ClientTimeout(10000))
|
||||
self._cache_path = cache_path
|
||||
self._loci: list[str] = []
|
||||
self._profiles = {}
|
||||
|
||||
async def load_scheme_locis(self):
|
||||
self._loci.clear()
|
||||
async with self._http_client.get("") as schema_response:
|
||||
schema_json = await schema_response.json()
|
||||
for locus in schema_json["loci"]:
|
||||
locus_name = path.basename(locus)
|
||||
self._loci.append(locus_name)
|
||||
self._loci.sort()
|
||||
|
||||
async def load_scheme_profiles(self):
|
||||
self._profiles.clear()
|
||||
with open(self.get_scheme_profile_path()) as profile_cache_handle:
|
||||
reader = csv.DictReader(profile_cache_handle, delimiter="\t")
|
||||
for line in reader:
|
||||
alleles = []
|
||||
for locus in self._loci:
|
||||
alleles.append(line[locus])
|
||||
self._profiles[tuple(alleles)] = (line["ST"], line["clonal_complex"])
|
||||
|
||||
def get_locus_cache_path(self, locus) -> str:
|
||||
return path.join(self._cache_path, locus + "." + "fasta")
|
||||
|
||||
def get_scheme_profile_path(self):
|
||||
return path.join(self._cache_path, "profiles.csv")
|
||||
|
||||
async def download_alleles_cache_data(self):
|
||||
for locus in self._loci:
|
||||
with open(self.get_locus_cache_path(locus), "wb") as fasta_handle:
|
||||
async with self._http_client.get(f"/db/{self._database_name}/loci/{locus}/alleles_fasta") as fasta_response:
|
||||
async for chunk, eof in fasta_response.content.iter_chunks(): # TODO maybe allow chunking to be configurable
|
||||
fasta_handle.write(chunk)
|
||||
|
||||
async def download_scheme_profiles(self):
|
||||
with open(self.get_scheme_profile_path(), "wb") as profile_cache_handle:
|
||||
async with self._http_client.get("profiles_csv") as profiles_response:
|
||||
async for chunk, eof in profiles_response.content.iter_chunks():
|
||||
profile_cache_handle.write(chunk)
|
||||
|
||||
async def fetch_mlst_allele_variants(self, sequence_strings: Iterable[str]) -> AsyncGenerator[Allele, Any]:
|
||||
aligner = PairwiseAligner("blastn")
|
||||
aligner.mode = "local"
|
||||
for sequence_string in sequence_strings:
|
||||
for locus in self._loci:
|
||||
async for fasta_seq in read_fasta(self.get_locus_cache_path(locus)):
|
||||
allele_variant = fasta_seq.name
|
||||
alignment_results = aligner.align(sequence_string, fasta_seq.sequence)
|
||||
top_alignment = sorted(alignment_results)[0]
|
||||
top_alignment_stats = top_alignment.counts()
|
||||
top_alignment_gaps = top_alignment_stats.gaps
|
||||
top_alignment_identities = top_alignment_stats.identities
|
||||
top_alignment_mismatches = top_alignment_stats.mismatches
|
||||
if top_alignment_gaps == 0 and top_alignment_mismatches == 0:
|
||||
yield Allele(locus, allele_variant, None)
|
||||
else:
|
||||
yield Allele(
|
||||
locus,
|
||||
allele_variant,
|
||||
PartialAllelicMatchProfile(
|
||||
percent_identity=top_alignment_identities/top_alignment.length,
|
||||
mismatches=top_alignment_mismatches,
|
||||
gaps=top_alignment_gaps
|
||||
)
|
||||
)
|
||||
|
||||
async def fetch_mlst_st(self, alleles):
|
||||
allele_variants: dict[str, Allele] = {}
|
||||
if isinstance(alleles, AsyncIterable):
|
||||
async for allele in alleles:
|
||||
allele_variants[allele.allele_locus] = allele
|
||||
else:
|
||||
for allele in alleles:
|
||||
allele_variants[allele.allele_locus] = allele
|
||||
ordered_profile = []
|
||||
for locus in self._loci:
|
||||
ordered_profile.append(allele_variants[locus].allele_variant)
|
||||
|
||||
st, clonal_complex = self._profiles[tuple(ordered_profile)]
|
||||
return MLSTProfile(allele_variants, st, clonal_complex)
|
||||
|
||||
async def profile_string(self, sequence_strings: Iterable[str]) -> MLSTProfile:
|
||||
alleles = self.fetch_mlst_allele_variants(sequence_strings)
|
||||
return await self.fetch_mlst_st(alleles)
|
||||
|
||||
async def profile_multiple_strings(self, named_string_groups: AsyncIterable[Iterable[NamedString]], stop_on_fail: bool = False) -> AsyncGenerator[NamedMLSTProfile, Any]:
|
||||
async for named_strings in named_string_groups:
|
||||
for named_string in named_strings:
|
||||
try:
|
||||
yield NamedMLSTProfile(named_string.name, await self.profile_string([named_string.sequence]))
|
||||
except NoBIGSdbMatchesException as e:
|
||||
if stop_on_fail:
|
||||
raise e
|
||||
yield NamedMLSTProfile(named_string.name, None)
|
||||
|
||||
async def close(self):
|
||||
await self._http_client.close()
|
||||
|
||||
async def __aexit__(self, exc_type, exc_value, traceback):
|
||||
await self.close()
|
||||
|
||||
class BIGSdbIndex(AbstractAsyncContextManager):
|
||||
KNOWN_BIGSDB_APIS = {
|
||||
"https://bigsdb.pasteur.fr/api",
|
||||
|
Loading…
x
Reference in New Issue
Block a user