Refactored code layout

This commit is contained in:
Harrison Deng 2025-01-16 21:54:52 +00:00
parent bad7dfc3a8
commit a3c864b565
12 changed files with 9 additions and 30 deletions

View File

@ -2,12 +2,10 @@ import asyncio
from numbers import Number
from os import path
from typing import Any, AsyncGenerator, Collection, Iterable, Sequence, Union
from automlst.engine.data.genomics import NamedString, SangerTraceData
from automlst.engine.data.structures.genomics import NamedString, SangerTraceData
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO, Align
from automlst.engine.remote.databases.ncbi.genbank import fetch_ncbi_genbank
def _biopython_read_abif_sequence(seq_path: str) -> SeqRecord:
with open(seq_path, "rb") as seq_handle:
@ -114,13 +112,3 @@ def _biopython_local_pairwise_alignment(reference: NamedString, query: NamedStri
0] # take the best alignment
# TODO actually assemble the consensus sequence here
raise NotImplementedError("Pairwise alignment unto reference consensus assembly function not ready.")
async def reference_consensus_assembly(reference: Union[NamedString, str], sanger_traces: Iterable[SangerTraceData]) -> AsyncGenerator[NamedString, Any]:
if isinstance(reference, str):
reference_seq = NamedString(name=reference, sequence=(await fetch_ncbi_genbank(reference)).sequence)
else:
reference_seq: NamedString = reference
for sanger_trace in sanger_traces:
yield NamedString("NA", "NA")
raise NotImplementedError("Pairwise alignment unto reference consensus assembly function not ready.")

View File

@ -3,7 +3,7 @@ from io import TextIOWrapper
from os import PathLike
from typing import AsyncIterable, Iterable, Mapping, Sequence, Union
from automlst.engine.data.mlst import Allele, MLSTProfile
from automlst.engine.data.structures.mlst import Allele, MLSTProfile
def dict_loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Allele]]):

View File

@ -3,7 +3,7 @@ from io import TextIOWrapper
from typing import Any, AsyncGenerator, Generator, Iterable, Sequence, Union
from Bio import SeqIO
from automlst.engine.data.genomics import NamedString
from automlst.engine.data.structures.genomics import NamedString
async def read_fasta(handle: Union[str, TextIOWrapper]) -> AsyncGenerator[NamedString, Any]:
fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta")

View File

@ -5,8 +5,8 @@ from typing import Any, AsyncGenerator, AsyncIterable, Collection, Generator, It
from aiohttp import ClientSession, ClientTimeout
from automlst.engine.data.genomics import NamedString
from automlst.engine.data.mlst import Allele, PartialAllelicMatchProfile, MLSTProfile
from automlst.engine.data.structures.genomics import NamedString
from automlst.engine.data.structures.mlst import Allele, PartialAllelicMatchProfile, MLSTProfile
from automlst.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException, NoSuchBIGSdbDatabaseException
class BIGSdbMLSTProfiler(AbstractAsyncContextManager):

View File

@ -1,12 +1,8 @@
import os
from automlst.engine.local.abif import read_abif, reference_consensus_assembly
from automlst.engine.data.local.abif import read_abif
async def test_load_sanger_sequence_has_data():
assert os.path.exists("tests/resources/1I1_F_P1815443_047.ab1")
result_data = await read_abif("tests/resources/1I1_F_P1815443_047.ab1")
assert result_data is not None
async def test_consensus_assembly_with_ncbi():
consensus = reference_consensus_assembly("ON685494.1", [await read_abif("tests/resources/1I1_F_P1815443_047.ab1"), await read_abif("tests/resources/1I1_R_P1815443_094.ab1")])
# TODO complete implementing this

View File

@ -1,5 +0,0 @@
from automlst.engine.remote.databases.ncbi.genbank import fetch_ncbi_genbank
async def test_fetch_ncbi_genbank_with_id_works():
assert len((await fetch_ncbi_genbank("CP011448.1")).sequence) > 0

View File

@ -3,10 +3,10 @@ import re
from typing import Collection, Sequence, Union
from Bio import SeqIO
import pytest
from automlst.engine.data.genomics import NamedString
from automlst.engine.data.mlst import Allele, MLSTProfile
from automlst.engine.data.structures.genomics import NamedString
from automlst.engine.data.structures.mlst import Allele, MLSTProfile
from automlst.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException
from automlst.engine.remote.databases.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler
from automlst.engine.data.remote.databases.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler
def gene_scrambler(gene: str, mutation_site_count: Union[int, float], alphabet: Sequence[str] = ["A", "T", "C", "G"]):
rand = random.Random(gene)