From a3c864b565fb245a0ea4dd2763b26a661a67ea34 Mon Sep 17 00:00:00 2001 From: Harrison Deng Date: Thu, 16 Jan 2025 21:54:52 +0000 Subject: [PATCH] Refactored code layout --- src/automlst/engine/data/local/__init__.py | 0 src/automlst/engine/{ => data}/local/abif.py | 14 +------------- src/automlst/engine/{ => data}/local/csv.py | 2 +- src/automlst/engine/{ => data}/local/fasta.py | 2 +- src/automlst/engine/data/remote/__init__.py | 0 .../engine/{ => data}/remote/databases/bigsdb.py | 4 ++-- src/automlst/engine/data/structures/__init__.py | 0 .../engine/data/{ => structures}/genomics.py | 0 src/automlst/engine/data/{ => structures}/mlst.py | 0 tests/automlst/engine/local/test_abif.py | 6 +----- .../engine/remote/databases/ncbi/test_genbank.py | 5 ----- .../engine/remote/databases/test_bigsdb.py | 6 +++--- 12 files changed, 9 insertions(+), 30 deletions(-) create mode 100644 src/automlst/engine/data/local/__init__.py rename src/automlst/engine/{ => data}/local/abif.py (89%) rename src/automlst/engine/{ => data}/local/csv.py (96%) rename src/automlst/engine/{ => data}/local/fasta.py (91%) create mode 100644 src/automlst/engine/data/remote/__init__.py rename src/automlst/engine/{ => data}/remote/databases/bigsdb.py (98%) create mode 100644 src/automlst/engine/data/structures/__init__.py rename src/automlst/engine/data/{ => structures}/genomics.py (100%) rename src/automlst/engine/data/{ => structures}/mlst.py (100%) delete mode 100644 tests/automlst/engine/remote/databases/ncbi/test_genbank.py diff --git a/src/automlst/engine/data/local/__init__.py b/src/automlst/engine/data/local/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/automlst/engine/local/abif.py b/src/automlst/engine/data/local/abif.py similarity index 89% rename from src/automlst/engine/local/abif.py rename to src/automlst/engine/data/local/abif.py index 654705f..c5ecb1d 100644 --- a/src/automlst/engine/local/abif.py +++ b/src/automlst/engine/data/local/abif.py @@ -2,12 +2,10 @@ import asyncio from numbers import Number from os import path from typing import Any, AsyncGenerator, Collection, Iterable, Sequence, Union -from automlst.engine.data.genomics import NamedString, SangerTraceData +from automlst.engine.data.structures.genomics import NamedString, SangerTraceData from Bio.SeqRecord import SeqRecord from Bio import SeqIO, Align -from automlst.engine.remote.databases.ncbi.genbank import fetch_ncbi_genbank - def _biopython_read_abif_sequence(seq_path: str) -> SeqRecord: with open(seq_path, "rb") as seq_handle: @@ -114,13 +112,3 @@ def _biopython_local_pairwise_alignment(reference: NamedString, query: NamedStri 0] # take the best alignment # TODO actually assemble the consensus sequence here raise NotImplementedError("Pairwise alignment unto reference consensus assembly function not ready.") - - -async def reference_consensus_assembly(reference: Union[NamedString, str], sanger_traces: Iterable[SangerTraceData]) -> AsyncGenerator[NamedString, Any]: - if isinstance(reference, str): - reference_seq = NamedString(name=reference, sequence=(await fetch_ncbi_genbank(reference)).sequence) - else: - reference_seq: NamedString = reference - for sanger_trace in sanger_traces: - yield NamedString("NA", "NA") - raise NotImplementedError("Pairwise alignment unto reference consensus assembly function not ready.") \ No newline at end of file diff --git a/src/automlst/engine/local/csv.py b/src/automlst/engine/data/local/csv.py similarity index 96% rename from src/automlst/engine/local/csv.py rename to src/automlst/engine/data/local/csv.py index 2a3bb1f..8bef3d0 100644 --- a/src/automlst/engine/local/csv.py +++ b/src/automlst/engine/data/local/csv.py @@ -3,7 +3,7 @@ from io import TextIOWrapper from os import PathLike from typing import AsyncIterable, Iterable, Mapping, Sequence, Union -from automlst.engine.data.mlst import Allele, MLSTProfile +from automlst.engine.data.structures.mlst import Allele, MLSTProfile def dict_loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Allele]]): diff --git a/src/automlst/engine/local/fasta.py b/src/automlst/engine/data/local/fasta.py similarity index 91% rename from src/automlst/engine/local/fasta.py rename to src/automlst/engine/data/local/fasta.py index 4fb9cb8..2637ddf 100644 --- a/src/automlst/engine/local/fasta.py +++ b/src/automlst/engine/data/local/fasta.py @@ -3,7 +3,7 @@ from io import TextIOWrapper from typing import Any, AsyncGenerator, Generator, Iterable, Sequence, Union from Bio import SeqIO -from automlst.engine.data.genomics import NamedString +from automlst.engine.data.structures.genomics import NamedString async def read_fasta(handle: Union[str, TextIOWrapper]) -> AsyncGenerator[NamedString, Any]: fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta") diff --git a/src/automlst/engine/data/remote/__init__.py b/src/automlst/engine/data/remote/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/automlst/engine/remote/databases/bigsdb.py b/src/automlst/engine/data/remote/databases/bigsdb.py similarity index 98% rename from src/automlst/engine/remote/databases/bigsdb.py rename to src/automlst/engine/data/remote/databases/bigsdb.py index 6cd877f..53a9d59 100644 --- a/src/automlst/engine/remote/databases/bigsdb.py +++ b/src/automlst/engine/data/remote/databases/bigsdb.py @@ -5,8 +5,8 @@ from typing import Any, AsyncGenerator, AsyncIterable, Collection, Generator, It from aiohttp import ClientSession, ClientTimeout -from automlst.engine.data.genomics import NamedString -from automlst.engine.data.mlst import Allele, PartialAllelicMatchProfile, MLSTProfile +from automlst.engine.data.structures.genomics import NamedString +from automlst.engine.data.structures.mlst import Allele, PartialAllelicMatchProfile, MLSTProfile from automlst.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException, NoSuchBIGSdbDatabaseException class BIGSdbMLSTProfiler(AbstractAsyncContextManager): diff --git a/src/automlst/engine/data/structures/__init__.py b/src/automlst/engine/data/structures/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/automlst/engine/data/genomics.py b/src/automlst/engine/data/structures/genomics.py similarity index 100% rename from src/automlst/engine/data/genomics.py rename to src/automlst/engine/data/structures/genomics.py diff --git a/src/automlst/engine/data/mlst.py b/src/automlst/engine/data/structures/mlst.py similarity index 100% rename from src/automlst/engine/data/mlst.py rename to src/automlst/engine/data/structures/mlst.py diff --git a/tests/automlst/engine/local/test_abif.py b/tests/automlst/engine/local/test_abif.py index cc514e1..ff3f05a 100644 --- a/tests/automlst/engine/local/test_abif.py +++ b/tests/automlst/engine/local/test_abif.py @@ -1,12 +1,8 @@ import os -from automlst.engine.local.abif import read_abif, reference_consensus_assembly +from automlst.engine.data.local.abif import read_abif async def test_load_sanger_sequence_has_data(): assert os.path.exists("tests/resources/1I1_F_P1815443_047.ab1") result_data = await read_abif("tests/resources/1I1_F_P1815443_047.ab1") assert result_data is not None - -async def test_consensus_assembly_with_ncbi(): - consensus = reference_consensus_assembly("ON685494.1", [await read_abif("tests/resources/1I1_F_P1815443_047.ab1"), await read_abif("tests/resources/1I1_R_P1815443_094.ab1")]) - # TODO complete implementing this \ No newline at end of file diff --git a/tests/automlst/engine/remote/databases/ncbi/test_genbank.py b/tests/automlst/engine/remote/databases/ncbi/test_genbank.py deleted file mode 100644 index 25c8fed..0000000 --- a/tests/automlst/engine/remote/databases/ncbi/test_genbank.py +++ /dev/null @@ -1,5 +0,0 @@ -from automlst.engine.remote.databases.ncbi.genbank import fetch_ncbi_genbank - - -async def test_fetch_ncbi_genbank_with_id_works(): - assert len((await fetch_ncbi_genbank("CP011448.1")).sequence) > 0 \ No newline at end of file diff --git a/tests/automlst/engine/remote/databases/test_bigsdb.py b/tests/automlst/engine/remote/databases/test_bigsdb.py index f649281..0aa10de 100644 --- a/tests/automlst/engine/remote/databases/test_bigsdb.py +++ b/tests/automlst/engine/remote/databases/test_bigsdb.py @@ -3,10 +3,10 @@ import re from typing import Collection, Sequence, Union from Bio import SeqIO import pytest -from automlst.engine.data.genomics import NamedString -from automlst.engine.data.mlst import Allele, MLSTProfile +from automlst.engine.data.structures.genomics import NamedString +from automlst.engine.data.structures.mlst import Allele, MLSTProfile from automlst.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException -from automlst.engine.remote.databases.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler +from automlst.engine.data.remote.databases.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler def gene_scrambler(gene: str, mutation_site_count: Union[int, float], alphabet: Sequence[str] = ["A", "T", "C", "G"]): rand = random.Random(gene)