Copied over most code from another of my projects

This commit is contained in:
2025-01-08 15:14:06 +00:00
commit 02985d5e37
32 changed files with 59455 additions and 0 deletions

View File

@@ -0,0 +1,104 @@
import asyncio
from numbers import Number
from os import path
from typing import Sequence, Union
from automlst.engine.data.genomics import SangerTraceData
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO
def _biopython_read_abif_sequence(seq_path: str) -> SeqRecord:
with open(seq_path, "rb") as seq_handle:
return SeqIO.read(seq_handle, "abi")
async def read_abif(seq_path: str) -> SangerTraceData:
ext = path.splitext(seq_path)[1]
if ext.lower() != ".ab1" and ext.lower() != "abi":
raise ValueError(
'seq_path must have file extension of "ab1", or "abi".')
biopython_seq = await asyncio.to_thread(_biopython_read_abif_sequence, seq_path)
biopython_annotations = biopython_seq.annotations
# Lot of type ignoring since Biopython did not define their typing.
biopython_abif_raw = biopython_annotations["abif_raw"] # type: ignore
trace_data = SangerTraceData(
biopython_seq.seq,
biopython_abif_raw.get("APFN2"), # type: ignore
biopython_abif_raw.get("APrN1"), # type: ignore
biopython_abif_raw.get("APrV1"), # type: ignore
biopython_abif_raw.get("APrX1"), # type: ignore
biopython_abif_raw.get("APXV1"), # type: ignore
biopython_abif_raw.get("CMNT1"), # type: ignore
biopython_abif_raw.get("CpEP1"), # type: ignore
biopython_abif_raw.get("CTID1"), # type: ignore
biopython_abif_raw.get("CTNM1"), # type: ignore
biopython_abif_raw.get("CTTL1"), # type: ignore
biopython_abif_raw.get("DATA1"), # type: ignore
biopython_abif_raw.get("DATA2"), # type: ignore
biopython_abif_raw.get("DATA3"), # type: ignore
biopython_abif_raw.get("DATA4"), # type: ignore
biopython_abif_raw.get("DATA5"), # type: ignore
biopython_abif_raw.get("DATA6"), # type: ignore
biopython_abif_raw.get("DATA7"), # type: ignore
biopython_abif_raw.get("DATA8"), # type: ignore
biopython_abif_raw.get("DSam1"), # type: ignore
biopython_abif_raw.get("DyeN1"), # type: ignore
biopython_abif_raw.get("DyeN2"), # type: ignore
biopython_abif_raw.get("DyeN3"), # type: ignore
biopython_abif_raw.get("DyeN4"), # type: ignore
biopython_abif_raw.get("DyeW1"), # type: ignore
biopython_abif_raw.get("DyeW2"), # type: ignore
biopython_abif_raw.get("DyeW3"), # type: ignore
biopython_abif_raw.get("DyeW4"), # type: ignore
biopython_abif_raw.get("DySN1"), # type: ignore
biopython_abif_raw.get("EPVt1"), # type: ignore
biopython_abif_raw.get("EVNT1"), # type: ignore
biopython_abif_raw.get("EVNT2"), # type: ignore
biopython_abif_raw.get("EVNT3"), # type: ignore
biopython_abif_raw.get("EVNT4"), # type: ignore
biopython_abif_raw.get("FWO_1"), # type: ignore
biopython_abif_raw.get("GTyp1"), # type: ignore
biopython_abif_raw.get("InSc1"), # type: ignore
biopython_abif_raw.get("InVt1"), # type: ignore
biopython_abif_raw.get("LANE1"), # type: ignore
biopython_abif_raw.get("LIMS1"), # type: ignore
biopython_abif_raw.get("LNTD1"), # type: ignore
biopython_abif_raw.get("LsrP1"), # type: ignore
biopython_abif_raw.get("MCHN1"), # type: ignore
biopython_abif_raw.get("MODF1"), # type: ignore
biopython_abif_raw.get("MODL1"), # type: ignore
biopython_abif_raw.get("NAVG1"), # type: ignore
biopython_abif_raw.get("NLNE1"), # type: ignore
biopython_abif_raw.get("OfSc1"), # type: ignore
biopython_abif_raw.get("PDMF1"), # type: ignore
biopython_abif_raw.get("PXLB1"), # type: ignore
biopython_abif_raw.get("RGCm1"), # type: ignore
biopython_abif_raw.get("RGNm1"), # type: ignore
biopython_abif_raw.get("RMdV1"), # type: ignore
biopython_abif_raw.get("RMdX1"), # type: ignore
biopython_abif_raw.get("RMXV1"), # type: ignore
biopython_abif_raw.get("RPrN1"), # type: ignore
biopython_abif_raw.get("RPrV1"), # type: ignore
biopython_abif_raw.get("RUND1"), # type: ignore
biopython_abif_raw.get("RUND2"), # type: ignore
biopython_abif_raw.get("RUND3"), # type: ignore
biopython_abif_raw.get("RUND4"), # type: ignore
biopython_abif_raw.get("RunN1"), # type: ignore
biopython_abif_raw.get("RUNT1"), # type: ignore
biopython_abif_raw.get("RUNT2"), # type: ignore
biopython_abif_raw.get("RUNT3"), # type: ignore
biopython_abif_raw.get("RUNT4"), # type: ignore
biopython_abif_raw.get("Satd"), # type: ignore
biopython_abif_raw.get("Scal1"), # type: ignore
biopython_abif_raw.get("SCAN1"), # type: ignore
biopython_abif_raw.get("SMED1"), # type: ignore
biopython_abif_raw.get("SMLt"), # type: ignore
biopython_abif_raw.get("SMPL1"), # type: ignore
biopython_abif_raw.get("SVER1"), # type: ignore
biopython_abif_raw.get("SVER3"), # type: ignore
biopython_abif_raw.get("Tmpr1"), # type: ignore
biopython_abif_raw.get("TUBE"), # type: ignore
biopython_abif_raw.get("User") # type: ignore
)
return trace_data

View File

@@ -0,0 +1,31 @@
import csv
from io import TextIOWrapper
from os import PathLike
from typing import AsyncIterable, Iterable, Mapping, Sequence, Union
from automlst.engine.data.MLST import Allele, MLSTProfile
def loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Allele]]):
result_dict: dict[str, list[str]] = {}
for loci, alleles in alleles_map.items():
result_dict[loci] = list()
for allele in alleles:
result_dict[loci].append(allele.allele_variant)
return result_dict
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: Iterable[MLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]):
mlst_profiles = list(mlst_profiles_iterable)
header = ["st", "clonal-complex", *mlst_profiles[0].alleles.keys()]
with open(handle, "w", newline='') as filehandle:
writer = csv.DictWriter(filehandle, fieldnames=header)
writer.writeheader()
for mlst_profile in mlst_profiles:
row_dictionary = {
"st": mlst_profile.sequence_type,
"clonal-complex": mlst_profile.clonal_complex,
**loci_alleles_variants_from_loci(mlst_profile.alleles)
}
writer.writerow(rowdict=row_dictionary)

View File

@@ -0,0 +1,11 @@
import asyncio
from io import TextIOWrapper
from typing import Any, AsyncGenerator, Generator, Sequence, Union
from Bio import SeqIO
from automlst.engine.data.genomics import NamedString
async def read_fasta(handle: Union[str, TextIOWrapper]) -> AsyncGenerator[NamedString, Any]:
fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta")
for fasta_sequence in await fasta_sequences:
yield NamedString(fasta_sequence.id, str(fasta_sequence.seq))