From 7ea7ead46aa787e3ae3a7e7c29822194424923aa Mon Sep 17 00:00:00 2001 From: Harrison Deng Date: Fri, 17 Jan 2025 14:27:25 +0000 Subject: [PATCH] Moved ABIF code to separate project --- src/automlst/engine/data/local/abif.py | 114 ------------------ .../engine/{ => data}/local/test_fasta.py | 2 +- .../remote/databases/test_bigsdb.py | 0 tests/automlst/engine/local/test_abif.py | 8 -- 4 files changed, 1 insertion(+), 123 deletions(-) delete mode 100644 src/automlst/engine/data/local/abif.py rename tests/automlst/engine/{ => data}/local/test_fasta.py (79%) rename tests/automlst/engine/{ => data}/remote/databases/test_bigsdb.py (100%) delete mode 100644 tests/automlst/engine/local/test_abif.py diff --git a/src/automlst/engine/data/local/abif.py b/src/automlst/engine/data/local/abif.py deleted file mode 100644 index c5ecb1d..0000000 --- a/src/automlst/engine/data/local/abif.py +++ /dev/null @@ -1,114 +0,0 @@ -import asyncio -from numbers import Number -from os import path -from typing import Any, AsyncGenerator, Collection, Iterable, Sequence, Union -from automlst.engine.data.structures.genomics import NamedString, SangerTraceData -from Bio.SeqRecord import SeqRecord -from Bio import SeqIO, Align - - -def _biopython_read_abif_sequence(seq_path: str) -> SeqRecord: - with open(seq_path, "rb") as seq_handle: - return SeqIO.read(seq_handle, "abi") - - -async def read_abif(seq_path: str) -> SangerTraceData: - ext = path.splitext(seq_path)[1] - if ext.lower() != ".ab1" and ext.lower() != "abi": - raise ValueError( - 'seq_path must have file extension of "ab1", or "abi".') - biopython_seq = await asyncio.to_thread(_biopython_read_abif_sequence, seq_path) - biopython_annotations = biopython_seq.annotations - - # Lot of type ignoring since Biopython did not define their typing. - biopython_abif_raw = biopython_annotations["abif_raw"] # type: ignore - trace_data = SangerTraceData( - path.basename(seq_path), - biopython_seq.seq, - biopython_abif_raw.get("APFN2"), # type: ignore - biopython_abif_raw.get("APrN1"), # type: ignore - biopython_abif_raw.get("APrV1"), # type: ignore - biopython_abif_raw.get("APrX1"), # type: ignore - biopython_abif_raw.get("APXV1"), # type: ignore - biopython_abif_raw.get("CMNT1"), # type: ignore - biopython_abif_raw.get("CpEP1"), # type: ignore - biopython_abif_raw.get("CTID1"), # type: ignore - biopython_abif_raw.get("CTNM1"), # type: ignore - biopython_abif_raw.get("CTTL1"), # type: ignore - biopython_abif_raw.get("DATA1"), # type: ignore - biopython_abif_raw.get("DATA2"), # type: ignore - biopython_abif_raw.get("DATA3"), # type: ignore - biopython_abif_raw.get("DATA4"), # type: ignore - biopython_abif_raw.get("DATA5"), # type: ignore - biopython_abif_raw.get("DATA6"), # type: ignore - biopython_abif_raw.get("DATA7"), # type: ignore - biopython_abif_raw.get("DATA8"), # type: ignore - biopython_abif_raw.get("DSam1"), # type: ignore - biopython_abif_raw.get("DyeN1"), # type: ignore - biopython_abif_raw.get("DyeN2"), # type: ignore - biopython_abif_raw.get("DyeN3"), # type: ignore - biopython_abif_raw.get("DyeN4"), # type: ignore - biopython_abif_raw.get("DyeW1"), # type: ignore - biopython_abif_raw.get("DyeW2"), # type: ignore - biopython_abif_raw.get("DyeW3"), # type: ignore - biopython_abif_raw.get("DyeW4"), # type: ignore - biopython_abif_raw.get("DySN1"), # type: ignore - biopython_abif_raw.get("EPVt1"), # type: ignore - biopython_abif_raw.get("EVNT1"), # type: ignore - biopython_abif_raw.get("EVNT2"), # type: ignore - biopython_abif_raw.get("EVNT3"), # type: ignore - biopython_abif_raw.get("EVNT4"), # type: ignore - biopython_abif_raw.get("FWO_1"), # type: ignore - biopython_abif_raw.get("GTyp1"), # type: ignore - biopython_abif_raw.get("InSc1"), # type: ignore - biopython_abif_raw.get("InVt1"), # type: ignore - biopython_abif_raw.get("LANE1"), # type: ignore - biopython_abif_raw.get("LIMS1"), # type: ignore - biopython_abif_raw.get("LNTD1"), # type: ignore - biopython_abif_raw.get("LsrP1"), # type: ignore - biopython_abif_raw.get("MCHN1"), # type: ignore - biopython_abif_raw.get("MODF1"), # type: ignore - biopython_abif_raw.get("MODL1"), # type: ignore - biopython_abif_raw.get("NAVG1"), # type: ignore - biopython_abif_raw.get("NLNE1"), # type: ignore - biopython_abif_raw.get("OfSc1"), # type: ignore - biopython_abif_raw.get("PDMF1"), # type: ignore - biopython_abif_raw.get("PXLB1"), # type: ignore - biopython_abif_raw.get("RGCm1"), # type: ignore - biopython_abif_raw.get("RGNm1"), # type: ignore - biopython_abif_raw.get("RMdV1"), # type: ignore - biopython_abif_raw.get("RMdX1"), # type: ignore - biopython_abif_raw.get("RMXV1"), # type: ignore - biopython_abif_raw.get("RPrN1"), # type: ignore - biopython_abif_raw.get("RPrV1"), # type: ignore - biopython_abif_raw.get("RUND1"), # type: ignore - biopython_abif_raw.get("RUND2"), # type: ignore - biopython_abif_raw.get("RUND3"), # type: ignore - biopython_abif_raw.get("RUND4"), # type: ignore - biopython_abif_raw.get("RunN1"), # type: ignore - biopython_abif_raw.get("RUNT1"), # type: ignore - biopython_abif_raw.get("RUNT2"), # type: ignore - biopython_abif_raw.get("RUNT3"), # type: ignore - biopython_abif_raw.get("RUNT4"), # type: ignore - biopython_abif_raw.get("Satd"), # type: ignore - biopython_abif_raw.get("Scal1"), # type: ignore - biopython_abif_raw.get("SCAN1"), # type: ignore - biopython_abif_raw.get("SMED1"), # type: ignore - biopython_abif_raw.get("SMLt"), # type: ignore - biopython_abif_raw.get("SMPL1"), # type: ignore - biopython_abif_raw.get("SVER1"), # type: ignore - biopython_abif_raw.get("SVER3"), # type: ignore - biopython_abif_raw.get("Tmpr1"), # type: ignore - biopython_abif_raw.get("TUBE"), # type: ignore - biopython_abif_raw.get("User") # type: ignore - ) - return trace_data - - -def _biopython_local_pairwise_alignment(reference: NamedString, query: NamedString) -> tuple[NamedString, NamedString]: - aligner = Align.PairwiseAligner(scoring="blastn") - aligner.mode = "local" - alignment_result = sorted(aligner.align(reference.sequence, query.sequence))[ - 0] # take the best alignment - # TODO actually assemble the consensus sequence here - raise NotImplementedError("Pairwise alignment unto reference consensus assembly function not ready.") diff --git a/tests/automlst/engine/local/test_fasta.py b/tests/automlst/engine/data/local/test_fasta.py similarity index 79% rename from tests/automlst/engine/local/test_fasta.py rename to tests/automlst/engine/data/local/test_fasta.py index cc18ff2..9065458 100644 --- a/tests/automlst/engine/local/test_fasta.py +++ b/tests/automlst/engine/data/local/test_fasta.py @@ -1,4 +1,4 @@ -from automlst.engine.local.fasta import read_fasta +from automlst.engine.data.local.fasta import read_fasta async def test_fasta_reader_not_none(): diff --git a/tests/automlst/engine/remote/databases/test_bigsdb.py b/tests/automlst/engine/data/remote/databases/test_bigsdb.py similarity index 100% rename from tests/automlst/engine/remote/databases/test_bigsdb.py rename to tests/automlst/engine/data/remote/databases/test_bigsdb.py diff --git a/tests/automlst/engine/local/test_abif.py b/tests/automlst/engine/local/test_abif.py deleted file mode 100644 index ff3f05a..0000000 --- a/tests/automlst/engine/local/test_abif.py +++ /dev/null @@ -1,8 +0,0 @@ -import os - -from automlst.engine.data.local.abif import read_abif - -async def test_load_sanger_sequence_has_data(): - assert os.path.exists("tests/resources/1I1_F_P1815443_047.ab1") - result_data = await read_abif("tests/resources/1I1_F_P1815443_047.ab1") - assert result_data is not None