Merge branch 'develop'

2025-01-17 14:34:16 +00:00
parent 3fd3ef9f20 7ea7ead46a
commit cb22dfac9b
15 changed files with 28 additions and 183 deletions
--- a/19
+++ b/19
@@ -30,11 +30,26 @@ pipeline {
            }
        }
        stage("publish") {
+            parallel {
+                stage ("git.reslate.systems") {
                    environment {
-                CREDS = credentials('4d6f64be-d26d-4f95-8de3-b6a9b0beb311')
+                        TOKEN = credentials('git.reslate.systems')
                    }
                    steps {
-                sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/${CREDS_USR}/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*'
+                        sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u __token__ -p ${TOKEN} --non-interactive --disable-progress-bar --verbose dist/*'
+                    }
+                }
+                stage ("test.pypi.org") {
+                    when {
+                        tag '*.*'
+                    }
+                    environment {
+                        TOKEN = credentials('test.pypi.org')
+                    }
+                    steps {
+                        sh returnStatus: true, script: 'python -m twine upload -r testpypi -u __token__ -p ${TOKEN} --non-interactive --disable-progress-bar --verbose dist/*'
+                    }
+                }
            }
        }
    }
--- a/src/automlst/engine/remote/databases/ncbi/init.py
+++ b/src/automlst/engine/remote/databases/ncbi/init.py
--- a/src/automlst/engine/data/local/csv.py
+++ b/src/automlst/engine/data/local/csv.py
@@ -3,7 +3,7 @@ from io import TextIOWrapper
 from os import PathLike
 from typing import AsyncIterable, Iterable, Mapping, Sequence, Union

-from automlst.engine.data.mlst import Allele, MLSTProfile
+from automlst.engine.data.structures.mlst import Allele, MLSTProfile


 def dict_loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Allele]]):
--- a/src/automlst/engine/data/local/fasta.py
+++ b/src/automlst/engine/data/local/fasta.py
@@ -3,7 +3,7 @@ from io import TextIOWrapper
 from typing import Any, AsyncGenerator, Generator, Iterable, Sequence, Union
 from Bio import SeqIO

-from automlst.engine.data.genomics import NamedString
+from automlst.engine.data.structures.genomics import NamedString

 async def read_fasta(handle: Union[str, TextIOWrapper]) -> AsyncGenerator[NamedString, Any]:
    fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta")
--- a/src/automlst/engine/data/remote/init.py
+++ b/src/automlst/engine/data/remote/init.py
--- a/src/automlst/engine/data/remote/databases/bigsdb.py
+++ b/src/automlst/engine/data/remote/databases/bigsdb.py
@@ -5,8 +5,8 @@ from typing import Any, AsyncGenerator, AsyncIterable, Collection, Generator, It

 from aiohttp import ClientSession, ClientTimeout

-from automlst.engine.data.genomics import NamedString
-from automlst.engine.data.mlst import Allele, PartialAllelicMatchProfile, MLSTProfile
+from automlst.engine.data.structures.genomics import NamedString
+from automlst.engine.data.structures.mlst import Allele, PartialAllelicMatchProfile, MLSTProfile
 from automlst.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException, NoSuchBIGSdbDatabaseException

 class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
--- a/src/automlst/engine/data/structures/init.py
+++ b/src/automlst/engine/data/structures/init.py
--- a/src/automlst/engine/data/structures/genomics.py
+++ b/src/automlst/engine/data/structures/genomics.py
--- a/src/automlst/engine/data/structures/mlst.py
+++ b/src/automlst/engine/data/structures/mlst.py
--- a/src/automlst/engine/local/abif.py
+++ b/src/automlst/engine/local/abif.py
@@ -1,126 +0,0 @@
-import asyncio
-from numbers import Number
-from os import path
-from typing import Any, AsyncGenerator, Collection, Iterable, Sequence, Union
-from automlst.engine.data.genomics import NamedString, SangerTraceData
-from Bio.SeqRecord import SeqRecord
-from Bio import SeqIO, Align
-
-from automlst.engine.remote.databases.ncbi.genbank import fetch_ncbi_genbank
-
-
-def _biopython_read_abif_sequence(seq_path: str) -> SeqRecord:
-    with open(seq_path, "rb") as seq_handle:
-        return SeqIO.read(seq_handle, "abi")
-
-
-async def read_abif(seq_path: str) -> SangerTraceData:
-    ext = path.splitext(seq_path)[1]
-    if ext.lower() != ".ab1" and ext.lower() != "abi":
-        raise ValueError(
-            'seq_path must have file extension of "ab1", or "abi".')
-    biopython_seq = await asyncio.to_thread(_biopython_read_abif_sequence, seq_path)
-    biopython_annotations = biopython_seq.annotations
-
-    # Lot of type ignoring since Biopython did not define their typing.
-    biopython_abif_raw = biopython_annotations["abif_raw"]  # type: ignore
-    trace_data = SangerTraceData(
-        path.basename(seq_path),
-        biopython_seq.seq,
-        biopython_abif_raw.get("APFN2"),  # type: ignore
-        biopython_abif_raw.get("APrN1"),  # type: ignore
-        biopython_abif_raw.get("APrV1"),  # type: ignore
-        biopython_abif_raw.get("APrX1"),  # type: ignore
-        biopython_abif_raw.get("APXV1"),  # type: ignore
-        biopython_abif_raw.get("CMNT1"),  # type: ignore
-        biopython_abif_raw.get("CpEP1"),  # type: ignore
-        biopython_abif_raw.get("CTID1"),  # type: ignore
-        biopython_abif_raw.get("CTNM1"),  # type: ignore
-        biopython_abif_raw.get("CTTL1"),  # type: ignore
-        biopython_abif_raw.get("DATA1"),  # type: ignore
-        biopython_abif_raw.get("DATA2"),  # type: ignore
-        biopython_abif_raw.get("DATA3"),  # type: ignore
-        biopython_abif_raw.get("DATA4"),  # type: ignore
-        biopython_abif_raw.get("DATA5"),  # type: ignore
-        biopython_abif_raw.get("DATA6"),  # type: ignore
-        biopython_abif_raw.get("DATA7"),  # type: ignore
-        biopython_abif_raw.get("DATA8"),  # type: ignore
-        biopython_abif_raw.get("DSam1"),  # type: ignore
-        biopython_abif_raw.get("DyeN1"),  # type: ignore
-        biopython_abif_raw.get("DyeN2"),  # type: ignore
-        biopython_abif_raw.get("DyeN3"),  # type: ignore
-        biopython_abif_raw.get("DyeN4"),  # type: ignore
-        biopython_abif_raw.get("DyeW1"),  # type: ignore
-        biopython_abif_raw.get("DyeW2"),  # type: ignore
-        biopython_abif_raw.get("DyeW3"),  # type: ignore
-        biopython_abif_raw.get("DyeW4"),  # type: ignore
-        biopython_abif_raw.get("DySN1"),  # type: ignore
-        biopython_abif_raw.get("EPVt1"),  # type: ignore
-        biopython_abif_raw.get("EVNT1"),  # type: ignore
-        biopython_abif_raw.get("EVNT2"),  # type: ignore
-        biopython_abif_raw.get("EVNT3"),  # type: ignore
-        biopython_abif_raw.get("EVNT4"),  # type: ignore
-        biopython_abif_raw.get("FWO_1"),  # type: ignore
-        biopython_abif_raw.get("GTyp1"),  # type: ignore
-        biopython_abif_raw.get("InSc1"),  # type: ignore
-        biopython_abif_raw.get("InVt1"),  # type: ignore
-        biopython_abif_raw.get("LANE1"),  # type: ignore
-        biopython_abif_raw.get("LIMS1"),  # type: ignore
-        biopython_abif_raw.get("LNTD1"),  # type: ignore
-        biopython_abif_raw.get("LsrP1"),  # type: ignore
-        biopython_abif_raw.get("MCHN1"),  # type: ignore
-        biopython_abif_raw.get("MODF1"),  # type: ignore
-        biopython_abif_raw.get("MODL1"),  # type: ignore
-        biopython_abif_raw.get("NAVG1"),  # type: ignore
-        biopython_abif_raw.get("NLNE1"),  # type: ignore
-        biopython_abif_raw.get("OfSc1"),  # type: ignore
-        biopython_abif_raw.get("PDMF1"),  # type: ignore
-        biopython_abif_raw.get("PXLB1"),  # type: ignore
-        biopython_abif_raw.get("RGCm1"),  # type: ignore
-        biopython_abif_raw.get("RGNm1"),  # type: ignore
-        biopython_abif_raw.get("RMdV1"),  # type: ignore
-        biopython_abif_raw.get("RMdX1"),  # type: ignore
-        biopython_abif_raw.get("RMXV1"),  # type: ignore
-        biopython_abif_raw.get("RPrN1"),  # type: ignore
-        biopython_abif_raw.get("RPrV1"),  # type: ignore
-        biopython_abif_raw.get("RUND1"),  # type: ignore
-        biopython_abif_raw.get("RUND2"),  # type: ignore
-        biopython_abif_raw.get("RUND3"),  # type: ignore
-        biopython_abif_raw.get("RUND4"),  # type: ignore
-        biopython_abif_raw.get("RunN1"),  # type: ignore
-        biopython_abif_raw.get("RUNT1"),  # type: ignore
-        biopython_abif_raw.get("RUNT2"),  # type: ignore
-        biopython_abif_raw.get("RUNT3"),  # type: ignore
-        biopython_abif_raw.get("RUNT4"),  # type: ignore
-        biopython_abif_raw.get("Satd"),  # type: ignore
-        biopython_abif_raw.get("Scal1"),  # type: ignore
-        biopython_abif_raw.get("SCAN1"),  # type: ignore
-        biopython_abif_raw.get("SMED1"),  # type: ignore
-        biopython_abif_raw.get("SMLt"),  # type: ignore
-        biopython_abif_raw.get("SMPL1"),  # type: ignore
-        biopython_abif_raw.get("SVER1"),  # type: ignore
-        biopython_abif_raw.get("SVER3"),  # type: ignore
-        biopython_abif_raw.get("Tmpr1"),  # type: ignore
-        biopython_abif_raw.get("TUBE"),  # type: ignore
-        biopython_abif_raw.get("User")  # type: ignore
-    )
-    return trace_data
-
-
-def _biopython_local_pairwise_alignment(reference: NamedString, query: NamedString) -> tuple[NamedString, NamedString]:
-    aligner = Align.PairwiseAligner(scoring="blastn")
-    aligner.mode = "local"
-    alignment_result = sorted(aligner.align(reference.sequence, query.sequence))[
-        0]  # take the best alignment
-    # TODO actually assemble the consensus sequence here
-    raise NotImplementedError("Pairwise alignment unto reference consensus assembly function not ready.")
-
-
-async def reference_consensus_assembly(reference: Union[NamedString, str], sanger_traces: Iterable[SangerTraceData]) -> AsyncGenerator[NamedString, Any]:
-    if isinstance(reference, str):
-        reference_seq = NamedString(name=reference, sequence=(await fetch_ncbi_genbank(reference)).sequence)
-    else:
-        reference_seq: NamedString  = reference
-    for sanger_trace in sanger_traces:
-        yield NamedString("NA", "NA")
-        raise NotImplementedError("Pairwise alignment unto reference consensus assembly function not ready.")
--- a/src/automlst/engine/remote/databases/ncbi/genbank.py
+++ b/src/automlst/engine/remote/databases/ncbi/genbank.py
@@ -1,27 +0,0 @@
-import asyncio
-from Bio import Entrez
-from Bio import SeqIO
-
-# TODO Change this out for a more professional approach
-Entrez.email = "yunyangdeng@outlook.com"
-
-from automlst.engine.data.genomics import AnnotatedString, StringAnnotation
-
-
-async def fetch_ncbi_genbank(genbank_id: str) -> AnnotatedString:
-    with (await asyncio.to_thread(Entrez.efetch, db="nucleotide", id=genbank_id, rettype="gb", retmode="text")) as fetch_stream:
-        record = SeqIO.read(fetch_stream, "genbank")
-        sequence_features = list()
-        for feature in record.features:
-            start = int(feature.location.start)
-            end = int(feature.location.end)
-            qualifiers = feature.qualifiers
-            for qualifier_key in qualifiers:
-                qualifiers[qualifier_key] = set(qualifiers[qualifier_key])
-            sequence_features.append(StringAnnotation(
-                type=feature.type,
-                start=start,
-                end=end+1,  # Position is exclusive
-                feature_properties=qualifiers
-            ))
-        return AnnotatedString(name=genbank_id, sequence=str(record.seq), annotations=sequence_features)
--- a/tests/automlst/engine/data/local/test_fasta.py
+++ b/tests/automlst/engine/data/local/test_fasta.py
@@ -1,4 +1,4 @@
-from automlst.engine.local.fasta import read_fasta
+from automlst.engine.data.local.fasta import read_fasta


 async def test_fasta_reader_not_none():
--- a/tests/automlst/engine/data/remote/databases/test_bigsdb.py
+++ b/tests/automlst/engine/data/remote/databases/test_bigsdb.py
@@ -3,10 +3,10 @@ import re
 from typing import Collection, Sequence, Union
 from Bio import SeqIO
 import pytest
-from automlst.engine.data.genomics import NamedString
-from automlst.engine.data.mlst import Allele, MLSTProfile
+from automlst.engine.data.structures.genomics import NamedString
+from automlst.engine.data.structures.mlst import Allele, MLSTProfile
 from automlst.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException
-from automlst.engine.remote.databases.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler
+from automlst.engine.data.remote.databases.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler

 def gene_scrambler(gene: str, mutation_site_count: Union[int, float], alphabet: Sequence[str] = ["A", "T", "C", "G"]):
    rand = random.Random(gene)
--- a/tests/automlst/engine/local/test_abif.py
+++ b/tests/automlst/engine/local/test_abif.py
@@ -1,12 +0,0 @@
-import os
-
-from automlst.engine.local.abif import read_abif, reference_consensus_assembly
-
-async def test_load_sanger_sequence_has_data():
-    assert os.path.exists("tests/resources/1I1_F_P1815443_047.ab1")
-    result_data = await read_abif("tests/resources/1I1_F_P1815443_047.ab1")
-    assert result_data is not None
-
-async def test_consensus_assembly_with_ncbi():
-    consensus = reference_consensus_assembly("ON685494.1", [await read_abif("tests/resources/1I1_F_P1815443_047.ab1"), await read_abif("tests/resources/1I1_R_P1815443_094.ab1")])
-    # TODO complete implementing this
--- a/tests/automlst/engine/remote/databases/ncbi/test_genbank.py
+++ b/tests/automlst/engine/remote/databases/ncbi/test_genbank.py
@@ -1,5 +0,0 @@
-from automlst.engine.remote.databases.ncbi.genbank import fetch_ncbi_genbank
-
-
-async def test_fetch_ncbi_genbank_with_id_works():
-    assert len((await fetch_ncbi_genbank("CP011448.1")).sequence) > 0