Now tracks failed profilings

This commit is contained in:
Harrison Deng 2025-01-09 17:27:15 +00:00
parent 2843d0d592
commit 9589761ddd
5 changed files with 68214 additions and 13 deletions

View File

@ -12,7 +12,7 @@
// "forwardPorts": [],
// Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": "pip3 install --user -r requirements.txt",
"postCreateCommand": "pip3 install --user -r requirements.txt && pipx install . -e",
"customizations": {
"vscode": {
"extensions": [

View File

@ -41,7 +41,10 @@ async def run(args):
gen_strings = read_multiple_fastas(args.fastas)
async with await bigsdb_index.build_profiler_from_seqdefdb(args.seqdefdb, args.schema) as mlst_profiler:
mlst_profiles = mlst_profiler.profile_multiple_strings(gen_strings)
await write_mlst_profiles_as_csv(mlst_profiles, args.out)
failed = await write_mlst_profiles_as_csv(mlst_profiles, args.out)
if len(failed) > 0:
print(f"A total of {len(failed)} IDs failed:\n{"\n".join(failed)}")
print(f"Completed fetching MLSTs for {len(args.fastas)} sequences.")
def run_asynchronously(args):
asyncio.run(run(args))

View File

@ -15,11 +15,15 @@ def dict_loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Alle
return result_dict
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[tuple[str, MLSTProfile]], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]):
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[tuple[str, Union[MLSTProfile, None]]], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]) -> Sequence[str]:
failed = list()
with open(handle, "w", newline='') as filehandle:
header = None
writer: Union[csv.DictWriter, None] = None
async for name, mlst_profile in mlst_profiles_iterable:
if mlst_profile is None:
failed.append(name)
continue
if writer is None:
header = ["id", "st", "clonal-complex", *mlst_profile.alleles.keys()]
writer = csv.DictWriter(filehandle, fieldnames=header)
@ -30,4 +34,5 @@ async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[tuple
"id": name,
**dict_loci_alleles_variants_from_loci(mlst_profile.alleles)
}
writer.writerow(rowdict=row_dictionary)
writer.writerow(rowdict=row_dictionary)
return failed

View File

@ -46,17 +46,14 @@ class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
"designations": allele_request_dict
}
async with self._http_client.post(uri_path, json=request_json) as response:
response_json = await response.json()
response_json: dict = await response.json()
if "exact_matches" not in response_json:
raise NoBIGSdbExactMatchesException(self._database_name, self._schema_id)
schema_exact_matches: dict = response_json["exact_matches"]
if "fields" not in response_json:
schema_fields_returned = {
"ST": "Unknown",
"Clonal Complex": "Unknown"
}
else:
schema_fields_returned: Mapping[str, str] = response_json["fields"]
response_json.setdefault("fields", dict)
schema_fields_returned: dict[str, str] = response_json["fields"]
schema_fields_returned.setdefault("ST", "unknown")
schema_fields_returned.setdefault("clonal_complex", "unknown")
allele_map: dict[str, list[Allele]] = defaultdict(list)
for exact_match_loci, exact_match_alleles in schema_exact_matches.items():
for exact_match_allele in exact_match_alleles:
@ -68,7 +65,7 @@ class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
return await self.fetch_mlst_st(alleles)
async def profile_multiple_strings(self, namedStrings: AsyncIterable[NamedString], stop_on_fail: bool = False) -> AsyncGenerator[Union[tuple[str, MLSTProfile], tuple[str, None]], Any]:
async def profile_multiple_strings(self, namedStrings: AsyncIterable[NamedString], stop_on_fail: bool = False) -> AsyncGenerator[tuple[str, Union[MLSTProfile, None]], Any]:
async for named_string in namedStrings:
try:
yield (named_string.name, await self.profile_string(named_string.sequence))

68196
tests/resources/12822.fasta Normal file

File diff suppressed because it is too large Load Diff