Now tracks failed profilings

2025-01-09 17:27:15 +00:00
parent 2843d0d592
commit 9589761ddd
5 changed files with 68214 additions and 13 deletions
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -12,7 +12,7 @@
 	// "forwardPorts": [],
 	// Use 'postCreateCommand' to run commands after the container is created.
-	"postCreateCommand": "pip3 install --user -r requirements.txt",
+	"postCreateCommand": "pip3 install --user -r requirements.txt && pipx install . -e",
 	"customizations": {
 		"vscode": {
 			"extensions": [
--- a/src/automlst/cli/exactmatch.py
+++ b/src/automlst/cli/exactmatch.py
@@ -41,7 +41,10 @@ async def run(args):
        gen_strings = read_multiple_fastas(args.fastas)
        async with await bigsdb_index.build_profiler_from_seqdefdb(args.seqdefdb, args.schema) as mlst_profiler:
            mlst_profiles = mlst_profiler.profile_multiple_strings(gen_strings)
-            await write_mlst_profiles_as_csv(mlst_profiles, args.out)
+            failed = await write_mlst_profiles_as_csv(mlst_profiles, args.out)
            if len(failed) > 0:
                print(f"A total of {len(failed)} IDs failed:\n{"\n".join(failed)}")
            print(f"Completed fetching MLSTs for {len(args.fastas)} sequences.")
 def run_asynchronously(args):
    asyncio.run(run(args))
--- a/src/automlst/engine/local/csv.py
+++ b/src/automlst/engine/local/csv.py
@@ -15,11 +15,15 @@ def dict_loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Alle
    return result_dict
-async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[tuple[str, MLSTProfile]], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]):
+async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[tuple[str, Union[MLSTProfile, None]]], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]) -> Sequence[str]:
    failed = list()
    with open(handle, "w", newline='') as filehandle:
        header = None
        writer: Union[csv.DictWriter, None] = None
        async for name, mlst_profile in mlst_profiles_iterable:
            if mlst_profile is None:
                failed.append(name)
                continue
            if writer is None:
                header = ["id", "st", "clonal-complex", *mlst_profile.alleles.keys()]
                writer = csv.DictWriter(filehandle, fieldnames=header)
@@ -31,3 +35,4 @@ async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[tuple
                **dict_loci_alleles_variants_from_loci(mlst_profile.alleles)
            }
            writer.writerow(rowdict=row_dictionary)
    return failed
--- a/src/automlst/engine/remote/databases/bigsdb.py
+++ b/src/automlst/engine/remote/databases/bigsdb.py
@@ -46,17 +46,14 @@ class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
            "designations": allele_request_dict
        }
        async with self._http_client.post(uri_path, json=request_json) as response:
-            response_json = await response.json()
+            response_json: dict = await response.json()
            if "exact_matches" not in response_json:
                raise NoBIGSdbExactMatchesException(self._database_name, self._schema_id)
            schema_exact_matches: dict = response_json["exact_matches"]
-            if "fields" not in response_json:
+            response_json.setdefault("fields", dict)
-                schema_fields_returned = {
+            schema_fields_returned: dict[str, str] = response_json["fields"]
-                    "ST": "Unknown",
+            schema_fields_returned.setdefault("ST", "unknown")
-                    "Clonal Complex": "Unknown"
+            schema_fields_returned.setdefault("clonal_complex", "unknown")
                }
            else:
                schema_fields_returned: Mapping[str, str] = response_json["fields"]
            allele_map: dict[str, list[Allele]] = defaultdict(list)
            for exact_match_loci, exact_match_alleles in schema_exact_matches.items():
                for exact_match_allele in exact_match_alleles:
@@ -68,7 +65,7 @@ class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
        return await self.fetch_mlst_st(alleles)
-    async def profile_multiple_strings(self, namedStrings: AsyncIterable[NamedString], stop_on_fail: bool = False) -> AsyncGenerator[Union[tuple[str, MLSTProfile], tuple[str, None]], Any]:
+    async def profile_multiple_strings(self, namedStrings: AsyncIterable[NamedString], stop_on_fail: bool = False) -> AsyncGenerator[tuple[str, Union[MLSTProfile, None]], Any]:
        async for named_string in namedStrings:
            try:
                yield (named_string.name, await self.profile_string(named_string.sequence))
--- a/tests/resources/12822.fasta
+++ b/tests/resources/12822.fasta