Refactored code layout

This commit is contained in:
Harrison Deng 2025-01-16 21:54:52 +00:00
parent bad7dfc3a8
commit a3c864b565
12 changed files with 9 additions and 30 deletions

View File

@ -2,12 +2,10 @@ import asyncio
from numbers import Number from numbers import Number
from os import path from os import path
from typing import Any, AsyncGenerator, Collection, Iterable, Sequence, Union from typing import Any, AsyncGenerator, Collection, Iterable, Sequence, Union
from automlst.engine.data.genomics import NamedString, SangerTraceData from automlst.engine.data.structures.genomics import NamedString, SangerTraceData
from Bio.SeqRecord import SeqRecord from Bio.SeqRecord import SeqRecord
from Bio import SeqIO, Align from Bio import SeqIO, Align
from automlst.engine.remote.databases.ncbi.genbank import fetch_ncbi_genbank
def _biopython_read_abif_sequence(seq_path: str) -> SeqRecord: def _biopython_read_abif_sequence(seq_path: str) -> SeqRecord:
with open(seq_path, "rb") as seq_handle: with open(seq_path, "rb") as seq_handle:
@ -114,13 +112,3 @@ def _biopython_local_pairwise_alignment(reference: NamedString, query: NamedStri
0] # take the best alignment 0] # take the best alignment
# TODO actually assemble the consensus sequence here # TODO actually assemble the consensus sequence here
raise NotImplementedError("Pairwise alignment unto reference consensus assembly function not ready.") raise NotImplementedError("Pairwise alignment unto reference consensus assembly function not ready.")
async def reference_consensus_assembly(reference: Union[NamedString, str], sanger_traces: Iterable[SangerTraceData]) -> AsyncGenerator[NamedString, Any]:
if isinstance(reference, str):
reference_seq = NamedString(name=reference, sequence=(await fetch_ncbi_genbank(reference)).sequence)
else:
reference_seq: NamedString = reference
for sanger_trace in sanger_traces:
yield NamedString("NA", "NA")
raise NotImplementedError("Pairwise alignment unto reference consensus assembly function not ready.")

View File

@ -3,7 +3,7 @@ from io import TextIOWrapper
from os import PathLike from os import PathLike
from typing import AsyncIterable, Iterable, Mapping, Sequence, Union from typing import AsyncIterable, Iterable, Mapping, Sequence, Union
from automlst.engine.data.mlst import Allele, MLSTProfile from automlst.engine.data.structures.mlst import Allele, MLSTProfile
def dict_loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Allele]]): def dict_loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Allele]]):

View File

@ -3,7 +3,7 @@ from io import TextIOWrapper
from typing import Any, AsyncGenerator, Generator, Iterable, Sequence, Union from typing import Any, AsyncGenerator, Generator, Iterable, Sequence, Union
from Bio import SeqIO from Bio import SeqIO
from automlst.engine.data.genomics import NamedString from automlst.engine.data.structures.genomics import NamedString
async def read_fasta(handle: Union[str, TextIOWrapper]) -> AsyncGenerator[NamedString, Any]: async def read_fasta(handle: Union[str, TextIOWrapper]) -> AsyncGenerator[NamedString, Any]:
fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta") fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta")

View File

@ -5,8 +5,8 @@ from typing import Any, AsyncGenerator, AsyncIterable, Collection, Generator, It
from aiohttp import ClientSession, ClientTimeout from aiohttp import ClientSession, ClientTimeout
from automlst.engine.data.genomics import NamedString from automlst.engine.data.structures.genomics import NamedString
from automlst.engine.data.mlst import Allele, PartialAllelicMatchProfile, MLSTProfile from automlst.engine.data.structures.mlst import Allele, PartialAllelicMatchProfile, MLSTProfile
from automlst.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException, NoSuchBIGSdbDatabaseException from automlst.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException, NoSuchBIGSdbDatabaseException
class BIGSdbMLSTProfiler(AbstractAsyncContextManager): class BIGSdbMLSTProfiler(AbstractAsyncContextManager):

View File

@ -1,12 +1,8 @@
import os import os
from automlst.engine.local.abif import read_abif, reference_consensus_assembly from automlst.engine.data.local.abif import read_abif
async def test_load_sanger_sequence_has_data(): async def test_load_sanger_sequence_has_data():
assert os.path.exists("tests/resources/1I1_F_P1815443_047.ab1") assert os.path.exists("tests/resources/1I1_F_P1815443_047.ab1")
result_data = await read_abif("tests/resources/1I1_F_P1815443_047.ab1") result_data = await read_abif("tests/resources/1I1_F_P1815443_047.ab1")
assert result_data is not None assert result_data is not None
async def test_consensus_assembly_with_ncbi():
consensus = reference_consensus_assembly("ON685494.1", [await read_abif("tests/resources/1I1_F_P1815443_047.ab1"), await read_abif("tests/resources/1I1_R_P1815443_094.ab1")])
# TODO complete implementing this

View File

@ -1,5 +0,0 @@
from automlst.engine.remote.databases.ncbi.genbank import fetch_ncbi_genbank
async def test_fetch_ncbi_genbank_with_id_works():
assert len((await fetch_ncbi_genbank("CP011448.1")).sequence) > 0

View File

@ -3,10 +3,10 @@ import re
from typing import Collection, Sequence, Union from typing import Collection, Sequence, Union
from Bio import SeqIO from Bio import SeqIO
import pytest import pytest
from automlst.engine.data.genomics import NamedString from automlst.engine.data.structures.genomics import NamedString
from automlst.engine.data.mlst import Allele, MLSTProfile from automlst.engine.data.structures.mlst import Allele, MLSTProfile
from automlst.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException from automlst.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException
from automlst.engine.remote.databases.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler from automlst.engine.data.remote.databases.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler
def gene_scrambler(gene: str, mutation_site_count: Union[int, float], alphabet: Sequence[str] = ["A", "T", "C", "G"]): def gene_scrambler(gene: str, mutation_site_count: Union[int, float], alphabet: Sequence[str] = ["A", "T", "C", "G"]):
rand = random.Random(gene) rand = random.Random(gene)