Moved to a general BIGSdb implementation

Updated tests

Removed ABIF UI for the time being

Began updating CLI
This commit is contained in:
2025-01-08 21:32:10 +00:00
parent 645357ac58
commit 42d0f56b18
20 changed files with 403 additions and 414 deletions

View File

@@ -1,11 +1,13 @@
import asyncio
from numbers import Number
from os import path
from typing import Any, AsyncGenerator, Collection, Sequence, Union
from typing import Any, AsyncGenerator, Collection, Iterable, Sequence, Union
from automlst.engine.data.genomics import NamedString, SangerTraceData
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO, Align
from automlst.engine.remote.databases.ncbi.genbank import fetch_ncbi_genbank
def _biopython_read_abif_sequence(seq_path: str) -> SeqRecord:
with open(seq_path, "rb") as seq_handle:
@@ -110,9 +112,15 @@ def _biopython_local_pairwise_alignment(reference: NamedString, query: NamedStri
aligner.mode = "local"
alignment_result = sorted(aligner.align(reference.sequence, query.sequence))[
0] # take the best alignment
return NamedString(alignment_result.sequences[0].id, alignment_result.sequences[0].seq), NamedString(alignment_result.sequences[1].id, alignment_result.sequences[1].seq)
# TODO actually assemble the consensus sequence here
raise NotImplementedError("Pairwise alignment unto reference consensus assembly function not ready.")
async def reference_consensus_assembly(reference: NamedString, sanger_traces: Collection[SangerTraceData]) -> AsyncGenerator[NamedString, Any]:
async def reference_consensus_assembly(reference: Union[NamedString, str], sanger_traces: Iterable[SangerTraceData]) -> AsyncGenerator[NamedString, Any]:
if isinstance(reference, str):
reference_seq = NamedString(name=reference, sequence=(await fetch_ncbi_genbank(reference)).sequence)
else:
reference_seq: NamedString = reference
for sanger_trace in sanger_traces:
yield (await asyncio.to_thread(_biopython_local_pairwise_alignment, reference, sanger_trace))[1]
yield NamedString("NA", "NA")
raise NotImplementedError("Pairwise alignment unto reference consensus assembly function not ready.")

View File

@@ -6,7 +6,7 @@ from typing import AsyncIterable, Iterable, Mapping, Sequence, Union
from automlst.engine.data.mlst import Allele, MLSTProfile
def loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Allele]]):
def dict_loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Allele]]):
result_dict: dict[str, list[str]] = {}
for loci, alleles in alleles_map.items():
result_dict[loci] = list()
@@ -15,17 +15,19 @@ def loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Allele]])
return result_dict
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: Iterable[MLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]):
mlst_profiles = list(mlst_profiles_iterable)
header = ["st", "clonal-complex", *mlst_profiles[0].alleles.keys()]
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[tuple[str, MLSTProfile]], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]):
with open(handle, "w", newline='') as filehandle:
writer = csv.DictWriter(filehandle, fieldnames=header)
writer.writeheader()
for mlst_profile in mlst_profiles:
header = None
writer: Union[csv.DictWriter, None] = None
async for name, mlst_profile in mlst_profiles_iterable:
if writer is None:
header = ["st", "clonal-complex", "id", *mlst_profile.alleles.keys()]
writer = csv.DictWriter(filehandle, fieldnames=header)
writer.writeheader()
row_dictionary = {
"st": mlst_profile.sequence_type,
"clonal-complex": mlst_profile.clonal_complex,
**loci_alleles_variants_from_loci(mlst_profile.alleles)
"id": name,
**dict_loci_alleles_variants_from_loci(mlst_profile.alleles)
}
writer.writerow(rowdict=row_dictionary)
writer.writerow(rowdict=row_dictionary)

View File

@@ -1,6 +1,6 @@
import asyncio
from io import TextIOWrapper
from typing import Any, AsyncGenerator, Generator, Sequence, Union
from typing import Any, AsyncGenerator, Generator, Iterable, Sequence, Union
from Bio import SeqIO
from automlst.engine.data.genomics import NamedString
@@ -8,4 +8,9 @@ from automlst.engine.data.genomics import NamedString
async def read_fasta(handle: Union[str, TextIOWrapper]) -> AsyncGenerator[NamedString, Any]:
fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta")
for fasta_sequence in await fasta_sequences:
yield NamedString(fasta_sequence.id, str(fasta_sequence.seq))
yield NamedString(fasta_sequence.id, str(fasta_sequence.seq))
async def read_multiple_fastas(handles: Iterable[Union[str, TextIOWrapper]]) -> AsyncGenerator[NamedString, Any]:
for handle in handles:
async for named_seq in read_fasta(handle):
yield named_seq