Now tracks failed profilings

This commit is contained in:
Harrison Deng 2025-01-09 17:27:15 +00:00
parent 2843d0d592
commit 9589761ddd
5 changed files with 68214 additions and 13 deletions

View File

@ -12,7 +12,7 @@
// "forwardPorts": [], // "forwardPorts": [],
// Use 'postCreateCommand' to run commands after the container is created. // Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": "pip3 install --user -r requirements.txt", "postCreateCommand": "pip3 install --user -r requirements.txt && pipx install . -e",
"customizations": { "customizations": {
"vscode": { "vscode": {
"extensions": [ "extensions": [

View File

@ -41,7 +41,10 @@ async def run(args):
gen_strings = read_multiple_fastas(args.fastas) gen_strings = read_multiple_fastas(args.fastas)
async with await bigsdb_index.build_profiler_from_seqdefdb(args.seqdefdb, args.schema) as mlst_profiler: async with await bigsdb_index.build_profiler_from_seqdefdb(args.seqdefdb, args.schema) as mlst_profiler:
mlst_profiles = mlst_profiler.profile_multiple_strings(gen_strings) mlst_profiles = mlst_profiler.profile_multiple_strings(gen_strings)
await write_mlst_profiles_as_csv(mlst_profiles, args.out) failed = await write_mlst_profiles_as_csv(mlst_profiles, args.out)
if len(failed) > 0:
print(f"A total of {len(failed)} IDs failed:\n{"\n".join(failed)}")
print(f"Completed fetching MLSTs for {len(args.fastas)} sequences.")
def run_asynchronously(args): def run_asynchronously(args):
asyncio.run(run(args)) asyncio.run(run(args))

View File

@ -15,11 +15,15 @@ def dict_loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Alle
return result_dict return result_dict
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[tuple[str, MLSTProfile]], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]): async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[tuple[str, Union[MLSTProfile, None]]], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]) -> Sequence[str]:
failed = list()
with open(handle, "w", newline='') as filehandle: with open(handle, "w", newline='') as filehandle:
header = None header = None
writer: Union[csv.DictWriter, None] = None writer: Union[csv.DictWriter, None] = None
async for name, mlst_profile in mlst_profiles_iterable: async for name, mlst_profile in mlst_profiles_iterable:
if mlst_profile is None:
failed.append(name)
continue
if writer is None: if writer is None:
header = ["id", "st", "clonal-complex", *mlst_profile.alleles.keys()] header = ["id", "st", "clonal-complex", *mlst_profile.alleles.keys()]
writer = csv.DictWriter(filehandle, fieldnames=header) writer = csv.DictWriter(filehandle, fieldnames=header)
@ -31,3 +35,4 @@ async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[tuple
**dict_loci_alleles_variants_from_loci(mlst_profile.alleles) **dict_loci_alleles_variants_from_loci(mlst_profile.alleles)
} }
writer.writerow(rowdict=row_dictionary) writer.writerow(rowdict=row_dictionary)
return failed

View File

@ -46,17 +46,14 @@ class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
"designations": allele_request_dict "designations": allele_request_dict
} }
async with self._http_client.post(uri_path, json=request_json) as response: async with self._http_client.post(uri_path, json=request_json) as response:
response_json = await response.json() response_json: dict = await response.json()
if "exact_matches" not in response_json: if "exact_matches" not in response_json:
raise NoBIGSdbExactMatchesException(self._database_name, self._schema_id) raise NoBIGSdbExactMatchesException(self._database_name, self._schema_id)
schema_exact_matches: dict = response_json["exact_matches"] schema_exact_matches: dict = response_json["exact_matches"]
if "fields" not in response_json: response_json.setdefault("fields", dict)
schema_fields_returned = { schema_fields_returned: dict[str, str] = response_json["fields"]
"ST": "Unknown", schema_fields_returned.setdefault("ST", "unknown")
"Clonal Complex": "Unknown" schema_fields_returned.setdefault("clonal_complex", "unknown")
}
else:
schema_fields_returned: Mapping[str, str] = response_json["fields"]
allele_map: dict[str, list[Allele]] = defaultdict(list) allele_map: dict[str, list[Allele]] = defaultdict(list)
for exact_match_loci, exact_match_alleles in schema_exact_matches.items(): for exact_match_loci, exact_match_alleles in schema_exact_matches.items():
for exact_match_allele in exact_match_alleles: for exact_match_allele in exact_match_alleles:
@ -68,7 +65,7 @@ class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
return await self.fetch_mlst_st(alleles) return await self.fetch_mlst_st(alleles)
async def profile_multiple_strings(self, namedStrings: AsyncIterable[NamedString], stop_on_fail: bool = False) -> AsyncGenerator[Union[tuple[str, MLSTProfile], tuple[str, None]], Any]: async def profile_multiple_strings(self, namedStrings: AsyncIterable[NamedString], stop_on_fail: bool = False) -> AsyncGenerator[tuple[str, Union[MLSTProfile, None]], Any]:
async for named_string in namedStrings: async for named_string in namedStrings:
try: try:
yield (named_string.name, await self.profile_string(named_string.sequence)) yield (named_string.name, await self.profile_string(named_string.sequence))

68196
tests/resources/12822.fasta Normal file

File diff suppressed because it is too large Load Diff