Now tracks failed profilings
This commit is contained in:
parent
2843d0d592
commit
9589761ddd
@ -12,7 +12,7 @@
|
||||
// "forwardPorts": [],
|
||||
|
||||
// Use 'postCreateCommand' to run commands after the container is created.
|
||||
"postCreateCommand": "pip3 install --user -r requirements.txt",
|
||||
"postCreateCommand": "pip3 install --user -r requirements.txt && pipx install . -e",
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
|
@ -41,7 +41,10 @@ async def run(args):
|
||||
gen_strings = read_multiple_fastas(args.fastas)
|
||||
async with await bigsdb_index.build_profiler_from_seqdefdb(args.seqdefdb, args.schema) as mlst_profiler:
|
||||
mlst_profiles = mlst_profiler.profile_multiple_strings(gen_strings)
|
||||
await write_mlst_profiles_as_csv(mlst_profiles, args.out)
|
||||
failed = await write_mlst_profiles_as_csv(mlst_profiles, args.out)
|
||||
if len(failed) > 0:
|
||||
print(f"A total of {len(failed)} IDs failed:\n{"\n".join(failed)}")
|
||||
print(f"Completed fetching MLSTs for {len(args.fastas)} sequences.")
|
||||
|
||||
def run_asynchronously(args):
|
||||
asyncio.run(run(args))
|
||||
|
@ -15,11 +15,15 @@ def dict_loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Alle
|
||||
return result_dict
|
||||
|
||||
|
||||
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[tuple[str, MLSTProfile]], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]):
|
||||
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[tuple[str, Union[MLSTProfile, None]]], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]) -> Sequence[str]:
|
||||
failed = list()
|
||||
with open(handle, "w", newline='') as filehandle:
|
||||
header = None
|
||||
writer: Union[csv.DictWriter, None] = None
|
||||
async for name, mlst_profile in mlst_profiles_iterable:
|
||||
if mlst_profile is None:
|
||||
failed.append(name)
|
||||
continue
|
||||
if writer is None:
|
||||
header = ["id", "st", "clonal-complex", *mlst_profile.alleles.keys()]
|
||||
writer = csv.DictWriter(filehandle, fieldnames=header)
|
||||
@ -30,4 +34,5 @@ async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[tuple
|
||||
"id": name,
|
||||
**dict_loci_alleles_variants_from_loci(mlst_profile.alleles)
|
||||
}
|
||||
writer.writerow(rowdict=row_dictionary)
|
||||
writer.writerow(rowdict=row_dictionary)
|
||||
return failed
|
@ -46,17 +46,14 @@ class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
|
||||
"designations": allele_request_dict
|
||||
}
|
||||
async with self._http_client.post(uri_path, json=request_json) as response:
|
||||
response_json = await response.json()
|
||||
response_json: dict = await response.json()
|
||||
if "exact_matches" not in response_json:
|
||||
raise NoBIGSdbExactMatchesException(self._database_name, self._schema_id)
|
||||
schema_exact_matches: dict = response_json["exact_matches"]
|
||||
if "fields" not in response_json:
|
||||
schema_fields_returned = {
|
||||
"ST": "Unknown",
|
||||
"Clonal Complex": "Unknown"
|
||||
}
|
||||
else:
|
||||
schema_fields_returned: Mapping[str, str] = response_json["fields"]
|
||||
response_json.setdefault("fields", dict)
|
||||
schema_fields_returned: dict[str, str] = response_json["fields"]
|
||||
schema_fields_returned.setdefault("ST", "unknown")
|
||||
schema_fields_returned.setdefault("clonal_complex", "unknown")
|
||||
allele_map: dict[str, list[Allele]] = defaultdict(list)
|
||||
for exact_match_loci, exact_match_alleles in schema_exact_matches.items():
|
||||
for exact_match_allele in exact_match_alleles:
|
||||
@ -68,7 +65,7 @@ class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
|
||||
return await self.fetch_mlst_st(alleles)
|
||||
|
||||
|
||||
async def profile_multiple_strings(self, namedStrings: AsyncIterable[NamedString], stop_on_fail: bool = False) -> AsyncGenerator[Union[tuple[str, MLSTProfile], tuple[str, None]], Any]:
|
||||
async def profile_multiple_strings(self, namedStrings: AsyncIterable[NamedString], stop_on_fail: bool = False) -> AsyncGenerator[tuple[str, Union[MLSTProfile, None]], Any]:
|
||||
async for named_string in namedStrings:
|
||||
try:
|
||||
yield (named_string.name, await self.profile_string(named_string.sequence))
|
||||
|
68196
tests/resources/12822.fasta
Normal file
68196
tests/resources/12822.fasta
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user