Moved to a general BIGSdb implementation

Updated tests Removed ABIF UI for the time being Began updating CLI
2025-01-08 21:32:10 +00:00
parent 645357ac58
commit 42d0f56b18
20 changed files with 403 additions and 414 deletions
--- a/src/automlst/engine/local/abif.py
+++ b/src/automlst/engine/local/abif.py
@@ -1,11 +1,13 @@
 import asyncio
 from numbers import Number
 from os import path
-from typing import Any, AsyncGenerator, Collection, Sequence, Union
+from typing import Any, AsyncGenerator, Collection, Iterable, Sequence, Union
 from automlst.engine.data.genomics import NamedString, SangerTraceData
 from Bio.SeqRecord import SeqRecord
 from Bio import SeqIO, Align

+from automlst.engine.remote.databases.ncbi.genbank import fetch_ncbi_genbank
+

 def _biopython_read_abif_sequence(seq_path: str) -> SeqRecord:
    with open(seq_path, "rb") as seq_handle:
@@ -110,9 +112,15 @@ def _biopython_local_pairwise_alignment(reference: NamedString, query: NamedStri
    aligner.mode = "local"
    alignment_result = sorted(aligner.align(reference.sequence, query.sequence))[
        0]  # take the best alignment
-    return NamedString(alignment_result.sequences[0].id, alignment_result.sequences[0].seq), NamedString(alignment_result.sequences[1].id, alignment_result.sequences[1].seq)
+    # TODO actually assemble the consensus sequence here
+    raise NotImplementedError("Pairwise alignment unto reference consensus assembly function not ready.")


-async def reference_consensus_assembly(reference: NamedString, sanger_traces: Collection[SangerTraceData]) -> AsyncGenerator[NamedString, Any]:
+async def reference_consensus_assembly(reference: Union[NamedString, str], sanger_traces: Iterable[SangerTraceData]) -> AsyncGenerator[NamedString, Any]:
+    if isinstance(reference, str):
+        reference_seq = NamedString(name=reference, sequence=(await fetch_ncbi_genbank(reference)).sequence)
+    else:
+        reference_seq: NamedString  = reference
    for sanger_trace in sanger_traces:
-        yield (await asyncio.to_thread(_biopython_local_pairwise_alignment, reference, sanger_trace))[1]
+        yield NamedString("NA", "NA")
+        raise NotImplementedError("Pairwise alignment unto reference consensus assembly function not ready.")
--- a/src/automlst/engine/local/csv.py
+++ b/src/automlst/engine/local/csv.py
@@ -6,7 +6,7 @@ from typing import AsyncIterable, Iterable, Mapping, Sequence, Union
 from automlst.engine.data.mlst import Allele, MLSTProfile


-def loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Allele]]):
+def dict_loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Allele]]):
    result_dict: dict[str, list[str]] = {}
    for loci, alleles in alleles_map.items():
        result_dict[loci] = list()
@@ -15,17 +15,19 @@ def loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Allele]])
    return result_dict


-async def write_mlst_profiles_as_csv(mlst_profiles_iterable: Iterable[MLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]):
-    mlst_profiles = list(mlst_profiles_iterable)
-    header = ["st", "clonal-complex", *mlst_profiles[0].alleles.keys()]
+async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[tuple[str, MLSTProfile]], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]):
    with open(handle, "w", newline='') as filehandle:
-        writer = csv.DictWriter(filehandle, fieldnames=header)
-        writer.writeheader()
-        for mlst_profile in mlst_profiles:
+        header = None
+        writer: Union[csv.DictWriter, None] = None
+        async for name, mlst_profile in mlst_profiles_iterable:
+            if writer is None:
+                header = ["st", "clonal-complex", "id", *mlst_profile.alleles.keys()]
+                writer = csv.DictWriter(filehandle, fieldnames=header)
+                writer.writeheader()
            row_dictionary = {
                "st": mlst_profile.sequence_type,
                "clonal-complex": mlst_profile.clonal_complex,
-                **loci_alleles_variants_from_loci(mlst_profile.alleles)
+                "id": name,
+                **dict_loci_alleles_variants_from_loci(mlst_profile.alleles)
            }
-
-            writer.writerow(rowdict=row_dictionary)
+            writer.writerow(rowdict=row_dictionary)
--- a/src/automlst/engine/local/fasta.py
+++ b/src/automlst/engine/local/fasta.py
@@ -1,6 +1,6 @@
 import asyncio
 from io import TextIOWrapper
-from typing import Any, AsyncGenerator, Generator, Sequence, Union
+from typing import Any, AsyncGenerator, Generator, Iterable, Sequence, Union
 from Bio import SeqIO

 from automlst.engine.data.genomics import NamedString
@@ -8,4 +8,9 @@ from automlst.engine.data.genomics import NamedString
 async def read_fasta(handle: Union[str, TextIOWrapper]) -> AsyncGenerator[NamedString, Any]:
    fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta")
    for fasta_sequence in await fasta_sequences:
-        yield NamedString(fasta_sequence.id, str(fasta_sequence.seq))
+        yield NamedString(fasta_sequence.id, str(fasta_sequence.seq))
+
+async def read_multiple_fastas(handles: Iterable[Union[str, TextIOWrapper]]) -> AsyncGenerator[NamedString, Any]:
+    for handle in handles:
+        async for named_seq in read_fasta(handle):
+            yield named_seq