Fixed issue where multiple allele identifiers were not returned in output.

Changed allele CSV output to non-lists
Publish to pypi if it has a release tag
2025-01-17 17:04:22 +00:00 · 2025-01-17 16:56:52 +00:00 · 2025-01-17 15:13:47 +00:00 · 2025-01-17 15:08:47 +00:00 · 2025-01-17 14:34:16 +00:00 · 2025-01-17 14:27:25 +00:00
18 changed files with 75 additions and 195 deletions
--- a/25
+++ b/25
@@ -30,11 +30,26 @@ pipeline {
            }
        }
        stage("publish") {
-            environment {
-                CREDS = credentials('4d6f64be-d26d-4f95-8de3-b6a9b0beb311')
-            }
-            steps {
-                sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/${CREDS_USR}/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*'
+            parallel {
+                stage ("git.reslate.systems") {
+                    environment {
+                        TOKEN = credentials('git.reslate.systems')
+                    }
+                    steps {
+                        sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u __token__ -p ${TOKEN} --non-interactive --disable-progress-bar --verbose dist/*'
+                    }
+                }
+                stage ("pypi.org") {
+                    when {
+                        tag '*.*'
+                    }
+                    environment {
+                        TOKEN = credentials('pypi.org')
+                    }
+                    steps {
+                        sh returnStatus: true, script: 'python -m twine upload -u __token__ -p ${TOKEN} --non-interactive --disable-progress-bar --verbose dist/*'
+                    }
+                }
            }
        }
    }
--- a/README.md
+++ b/README.md
@@ -1,12 +1,25 @@
-# autoMLST
+# autoMLST.Engine

-A CLI/library for rapidly performing MLST typing via accessing pubMLST and InstitutPasteur MSLT databases.
+A python library implementing common BIGSdb MLST schemes and databases. Implementation follows the RESTful API outlined by the official [BIGSdb documentation](https://bigsdb.readthedocs.io/en/latest/rest.html) up to `V1.50.0`.

-# Components
+## Features

-## automlst.cli
+Briefly, this library can:
+- Import multiple `FASTA` files
+- Fetch the available BIGSdb databases that is currently live and available
+- Fetch the available BIGSdb database schemas for a given MLST database
+- Retrieve exact/non-exact MLST allele variant IDs based off a sequence
+- Retrieve MLST sequence type IDs based off a sequence
+- Output all results to a single CSV

-The command line interface, sets up very minimal and mostly makes calls to the library. Uses argparse and is split into two parts:
+Furthermore, this library is highly asynchronous where any potentially blocking operation, ranging from parsing FASTAs to performing HTTP requests are at least asynchronous, if not fully multithreaded.

- `automlst info`: Provides user information on available databases to pull from, and the schemas available.
- `automlst exactmatch`: Provides users the ability to request exact match results from a given database and schema
+## Usage
+
+This library can be installed through pip. Learn how to [setup and install pip first](https://pip.pypa.io/en/stable/installation/).
+
+Then, it's as easy as running `pip install automlst-engine` in any terminal that has pip in it's path (any terminal where `pip --version` returns a valid version and install path).
+
+### CLI usage
+
+This is a independent python library and thus does not have any form of direct user interface. One way of using it could be to create your own Python script that makes calls to this libraries functions. Alternatively, you may use `automlst-cli`, a `Python` package that implements a CLI for calling this library.
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,6 +5,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "automlst.engine"
 dynamic = ["version"]
+readme = "README.md"

 dependencies = [
    "biopython",
--- a/src/automlst/engine/remote/databases/ncbi/init.py
+++ b/src/automlst/engine/remote/databases/ncbi/init.py
--- a/src/automlst/engine/data/local/csv.py
+++ b/src/automlst/engine/data/local/csv.py
@@ -1,9 +1,8 @@
 import csv
-from io import TextIOWrapper
 from os import PathLike
-from typing import AsyncIterable, Iterable, Mapping, Sequence, Union
+from typing import AsyncIterable, Mapping, Sequence, Union

-from automlst.engine.data.mlst import Allele, MLSTProfile
+from automlst.engine.data.structures.mlst import Allele, MLSTProfile


 def dict_loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Allele]]):
@@ -11,10 +10,11 @@ def dict_loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Alle
    for loci, alleles in alleles_map.items():
        if len(alleles) == 1:
            result_dict[loci] = alleles[0].allele_variant
-        for allele in alleles:
+        else:
            result_locis = list()
-            result_locis.append(allele.allele_variant)
-            result_dict[loci] = result_locis
+            for allele in alleles:
+                result_locis.append(allele.allele_variant)
+                result_dict[loci] = result_locis
    return result_dict


--- a/src/automlst/engine/data/local/fasta.py
+++ b/src/automlst/engine/data/local/fasta.py
@@ -3,7 +3,7 @@ from io import TextIOWrapper
 from typing import Any, AsyncGenerator, Generator, Iterable, Sequence, Union
 from Bio import SeqIO

-from automlst.engine.data.genomics import NamedString
+from automlst.engine.data.structures.genomics import NamedString

 async def read_fasta(handle: Union[str, TextIOWrapper]) -> AsyncGenerator[NamedString, Any]:
    fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta")
--- a/src/automlst/engine/data/remote/init.py
+++ b/src/automlst/engine/data/remote/init.py
--- a/src/automlst/engine/data/remote/databases/bigsdb.py
+++ b/src/automlst/engine/data/remote/databases/bigsdb.py
@@ -5,8 +5,8 @@ from typing import Any, AsyncGenerator, AsyncIterable, Collection, Generator, It

 from aiohttp import ClientSession, ClientTimeout

-from automlst.engine.data.genomics import NamedString
-from automlst.engine.data.mlst import Allele, PartialAllelicMatchProfile, MLSTProfile
+from automlst.engine.data.structures.genomics import NamedString
+from automlst.engine.data.structures.mlst import Allele, PartialAllelicMatchProfile, MLSTProfile
 from automlst.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException, NoSuchBIGSdbDatabaseException

 class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
--- a/src/automlst/engine/data/structures/init.py
+++ b/src/automlst/engine/data/structures/init.py
--- a/src/automlst/engine/data/structures/genomics.py
+++ b/src/automlst/engine/data/structures/genomics.py
--- a/src/automlst/engine/data/structures/mlst.py
+++ b/src/automlst/engine/data/structures/mlst.py
--- a/src/automlst/engine/local/abif.py
+++ b/src/automlst/engine/local/abif.py
@@ -1,126 +0,0 @@
-import asyncio
-from numbers import Number
-from os import path
-from typing import Any, AsyncGenerator, Collection, Iterable, Sequence, Union
-from automlst.engine.data.genomics import NamedString, SangerTraceData
-from Bio.SeqRecord import SeqRecord
-from Bio import SeqIO, Align
-
-from automlst.engine.remote.databases.ncbi.genbank import fetch_ncbi_genbank
-
-
-def _biopython_read_abif_sequence(seq_path: str) -> SeqRecord:
-    with open(seq_path, "rb") as seq_handle:
-        return SeqIO.read(seq_handle, "abi")
-
-
-async def read_abif(seq_path: str) -> SangerTraceData:
-    ext = path.splitext(seq_path)[1]
-    if ext.lower() != ".ab1" and ext.lower() != "abi":
-        raise ValueError(
-            'seq_path must have file extension of "ab1", or "abi".')
-    biopython_seq = await asyncio.to_thread(_biopython_read_abif_sequence, seq_path)
-    biopython_annotations = biopython_seq.annotations
-
-    # Lot of type ignoring since Biopython did not define their typing.
-    biopython_abif_raw = biopython_annotations["abif_raw"]  # type: ignore
-    trace_data = SangerTraceData(
-        path.basename(seq_path),
-        biopython_seq.seq,
-        biopython_abif_raw.get("APFN2"),  # type: ignore
-        biopython_abif_raw.get("APrN1"),  # type: ignore
-        biopython_abif_raw.get("APrV1"),  # type: ignore
-        biopython_abif_raw.get("APrX1"),  # type: ignore
-        biopython_abif_raw.get("APXV1"),  # type: ignore
-        biopython_abif_raw.get("CMNT1"),  # type: ignore
-        biopython_abif_raw.get("CpEP1"),  # type: ignore
-        biopython_abif_raw.get("CTID1"),  # type: ignore
-        biopython_abif_raw.get("CTNM1"),  # type: ignore
-        biopython_abif_raw.get("CTTL1"),  # type: ignore
-        biopython_abif_raw.get("DATA1"),  # type: ignore
-        biopython_abif_raw.get("DATA2"),  # type: ignore
-        biopython_abif_raw.get("DATA3"),  # type: ignore
-        biopython_abif_raw.get("DATA4"),  # type: ignore
-        biopython_abif_raw.get("DATA5"),  # type: ignore
-        biopython_abif_raw.get("DATA6"),  # type: ignore
-        biopython_abif_raw.get("DATA7"),  # type: ignore
-        biopython_abif_raw.get("DATA8"),  # type: ignore
-        biopython_abif_raw.get("DSam1"),  # type: ignore
-        biopython_abif_raw.get("DyeN1"),  # type: ignore
-        biopython_abif_raw.get("DyeN2"),  # type: ignore
-        biopython_abif_raw.get("DyeN3"),  # type: ignore
-        biopython_abif_raw.get("DyeN4"),  # type: ignore
-        biopython_abif_raw.get("DyeW1"),  # type: ignore
-        biopython_abif_raw.get("DyeW2"),  # type: ignore
-        biopython_abif_raw.get("DyeW3"),  # type: ignore
-        biopython_abif_raw.get("DyeW4"),  # type: ignore
-        biopython_abif_raw.get("DySN1"),  # type: ignore
-        biopython_abif_raw.get("EPVt1"),  # type: ignore
-        biopython_abif_raw.get("EVNT1"),  # type: ignore
-        biopython_abif_raw.get("EVNT2"),  # type: ignore
-        biopython_abif_raw.get("EVNT3"),  # type: ignore
-        biopython_abif_raw.get("EVNT4"),  # type: ignore
-        biopython_abif_raw.get("FWO_1"),  # type: ignore
-        biopython_abif_raw.get("GTyp1"),  # type: ignore
-        biopython_abif_raw.get("InSc1"),  # type: ignore
-        biopython_abif_raw.get("InVt1"),  # type: ignore
-        biopython_abif_raw.get("LANE1"),  # type: ignore
-        biopython_abif_raw.get("LIMS1"),  # type: ignore
-        biopython_abif_raw.get("LNTD1"),  # type: ignore
-        biopython_abif_raw.get("LsrP1"),  # type: ignore
-        biopython_abif_raw.get("MCHN1"),  # type: ignore
-        biopython_abif_raw.get("MODF1"),  # type: ignore
-        biopython_abif_raw.get("MODL1"),  # type: ignore
-        biopython_abif_raw.get("NAVG1"),  # type: ignore
-        biopython_abif_raw.get("NLNE1"),  # type: ignore
-        biopython_abif_raw.get("OfSc1"),  # type: ignore
-        biopython_abif_raw.get("PDMF1"),  # type: ignore
-        biopython_abif_raw.get("PXLB1"),  # type: ignore
-        biopython_abif_raw.get("RGCm1"),  # type: ignore
-        biopython_abif_raw.get("RGNm1"),  # type: ignore
-        biopython_abif_raw.get("RMdV1"),  # type: ignore
-        biopython_abif_raw.get("RMdX1"),  # type: ignore
-        biopython_abif_raw.get("RMXV1"),  # type: ignore
-        biopython_abif_raw.get("RPrN1"),  # type: ignore
-        biopython_abif_raw.get("RPrV1"),  # type: ignore
-        biopython_abif_raw.get("RUND1"),  # type: ignore
-        biopython_abif_raw.get("RUND2"),  # type: ignore
-        biopython_abif_raw.get("RUND3"),  # type: ignore
-        biopython_abif_raw.get("RUND4"),  # type: ignore
-        biopython_abif_raw.get("RunN1"),  # type: ignore
-        biopython_abif_raw.get("RUNT1"),  # type: ignore
-        biopython_abif_raw.get("RUNT2"),  # type: ignore
-        biopython_abif_raw.get("RUNT3"),  # type: ignore
-        biopython_abif_raw.get("RUNT4"),  # type: ignore
-        biopython_abif_raw.get("Satd"),  # type: ignore
-        biopython_abif_raw.get("Scal1"),  # type: ignore
-        biopython_abif_raw.get("SCAN1"),  # type: ignore
-        biopython_abif_raw.get("SMED1"),  # type: ignore
-        biopython_abif_raw.get("SMLt"),  # type: ignore
-        biopython_abif_raw.get("SMPL1"),  # type: ignore
-        biopython_abif_raw.get("SVER1"),  # type: ignore
-        biopython_abif_raw.get("SVER3"),  # type: ignore
-        biopython_abif_raw.get("Tmpr1"),  # type: ignore
-        biopython_abif_raw.get("TUBE"),  # type: ignore
-        biopython_abif_raw.get("User")  # type: ignore
-    )
-    return trace_data
-
-
-def _biopython_local_pairwise_alignment(reference: NamedString, query: NamedString) -> tuple[NamedString, NamedString]:
-    aligner = Align.PairwiseAligner(scoring="blastn")
-    aligner.mode = "local"
-    alignment_result = sorted(aligner.align(reference.sequence, query.sequence))[
-        0]  # take the best alignment
-    # TODO actually assemble the consensus sequence here
-    raise NotImplementedError("Pairwise alignment unto reference consensus assembly function not ready.")
-
-
-async def reference_consensus_assembly(reference: Union[NamedString, str], sanger_traces: Iterable[SangerTraceData]) -> AsyncGenerator[NamedString, Any]:
-    if isinstance(reference, str):
-        reference_seq = NamedString(name=reference, sequence=(await fetch_ncbi_genbank(reference)).sequence)
-    else:
-        reference_seq: NamedString  = reference
-    for sanger_trace in sanger_traces:
-        yield NamedString("NA", "NA")
-        raise NotImplementedError("Pairwise alignment unto reference consensus assembly function not ready.")
--- a/src/automlst/engine/remote/databases/ncbi/genbank.py
+++ b/src/automlst/engine/remote/databases/ncbi/genbank.py
@@ -1,27 +0,0 @@
-import asyncio
-from Bio import Entrez
-from Bio import SeqIO
-
-# TODO Change this out for a more professional approach
-Entrez.email = "yunyangdeng@outlook.com"
-
-from automlst.engine.data.genomics import AnnotatedString, StringAnnotation
-
-
-async def fetch_ncbi_genbank(genbank_id: str) -> AnnotatedString:
-    with (await asyncio.to_thread(Entrez.efetch, db="nucleotide", id=genbank_id, rettype="gb", retmode="text")) as fetch_stream:
-        record = SeqIO.read(fetch_stream, "genbank")
-        sequence_features = list()
-        for feature in record.features:
-            start = int(feature.location.start)
-            end = int(feature.location.end)
-            qualifiers = feature.qualifiers
-            for qualifier_key in qualifiers:
-                qualifiers[qualifier_key] = set(qualifiers[qualifier_key])
-            sequence_features.append(StringAnnotation(
-                type=feature.type,
-                start=start,
-                end=end+1,  # Position is exclusive
-                feature_properties=qualifiers
-            ))
-        return AnnotatedString(name=genbank_id, sequence=str(record.seq), annotations=sequence_features)
--- a/tests/automlst/engine/data/local/test_csv.py
+++ b/tests/automlst/engine/data/local/test_csv.py
@@ -0,0 +1,21 @@
+from automlst.engine.data.local.csv import dict_loci_alleles_variants_from_loci
+from automlst.engine.data.structures.mlst import Allele
+
+
+def test_dict_loci_alleles_variants_from_loci_single_loci_not_list():
+    alleles_map = {
+        "adk": [Allele("adk", "1", None)]
+    }
+    results = dict_loci_alleles_variants_from_loci(alleles_map)
+    for loci, variant in results.items():
+        assert isinstance(variant, str)
+        assert variant == "1"
+
+def test_dict_loci_alleles_variants_from_loci_multi_loci_is_list():
+    alleles_map = {
+        "adk": [Allele("adk", "1", None), Allele("adk", "2", None)]
+    }
+    results = dict_loci_alleles_variants_from_loci(alleles_map)
+    for loci, variant in results.items():
+        assert isinstance(variant, list)
+        assert len(variant) == 2
--- a/tests/automlst/engine/data/local/test_fasta.py
+++ b/tests/automlst/engine/data/local/test_fasta.py
@@ -1,4 +1,4 @@
-from automlst.engine.local.fasta import read_fasta
+from automlst.engine.data.local.fasta import read_fasta


 async def test_fasta_reader_not_none():
--- a/tests/automlst/engine/data/remote/databases/test_bigsdb.py
+++ b/tests/automlst/engine/data/remote/databases/test_bigsdb.py
@@ -3,10 +3,10 @@ import re
 from typing import Collection, Sequence, Union
 from Bio import SeqIO
 import pytest
-from automlst.engine.data.genomics import NamedString
-from automlst.engine.data.mlst import Allele, MLSTProfile
+from automlst.engine.data.structures.genomics import NamedString
+from automlst.engine.data.structures.mlst import Allele, MLSTProfile
 from automlst.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException
-from automlst.engine.remote.databases.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler
+from automlst.engine.data.remote.databases.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler

 def gene_scrambler(gene: str, mutation_site_count: Union[int, float], alphabet: Sequence[str] = ["A", "T", "C", "G"]):
    rand = random.Random(gene)
--- a/tests/automlst/engine/local/test_abif.py
+++ b/tests/automlst/engine/local/test_abif.py
@@ -1,12 +0,0 @@
-import os
-
-from automlst.engine.local.abif import read_abif, reference_consensus_assembly
-
-async def test_load_sanger_sequence_has_data():
-    assert os.path.exists("tests/resources/1I1_F_P1815443_047.ab1")
-    result_data = await read_abif("tests/resources/1I1_F_P1815443_047.ab1")
-    assert result_data is not None
-
-async def test_consensus_assembly_with_ncbi():
-    consensus = reference_consensus_assembly("ON685494.1", [await read_abif("tests/resources/1I1_F_P1815443_047.ab1"), await read_abif("tests/resources/1I1_R_P1815443_094.ab1")])
-    # TODO complete implementing this
--- a/tests/automlst/engine/remote/databases/ncbi/test_genbank.py
+++ b/tests/automlst/engine/remote/databases/ncbi/test_genbank.py
@@ -1,5 +0,0 @@
-from automlst.engine.remote.databases.ncbi.genbank import fetch_ncbi_genbank
-
-
-async def test_fetch_ncbi_genbank_with_id_works():
-    assert len((await fetch_ncbi_genbank("CP011448.1")).sequence) > 0
Author	SHA1	Message	Date
Harrison Deng	b89f24a3fa	Fixed issue where multiple allele identifiers were not returned in output.	2025-01-17 17:04:22 +00:00
Harrison Deng	e7c8c5bcf9	Changed allele CSV output to non-lists	2025-01-17 16:56:52 +00:00
Harrison Deng	ab44dfaa48	Publish to pypi if it has a release tag	2025-01-17 15:13:47 +00:00
Harrison Deng	611b956d88	Elaborated on the README.md and added references to it in pyproject.toml	2025-01-17 15:08:47 +00:00
Harrison Deng	cb22dfac9b	Merge branch 'develop'	2025-01-17 14:34:16 +00:00
Harrison Deng	7ea7ead46a	Moved ABIF code to separate project	2025-01-17 14:27:25 +00:00
Harrison Deng	a3c864b565	Refactored code layout	2025-01-16 21:54:52 +00:00
Harrison Deng	bad7dfc3a8	Changing all publishing steps to use API keys	2025-01-16 21:29:20 +00:00
Harrison Deng	4fe0f0f287	Added stage for publishing to test.pypi.org when the tag a version number.	2025-01-16 21:22:49 +00:00