From 4fe0f0f287bc35729e25109d187bafea1187a277 Mon Sep 17 00:00:00 2001 From: Harrison Deng Date: Thu, 16 Jan 2025 21:22:49 +0000 Subject: [PATCH 1/4] Added stage for publishing to test.pypi.org when the tag a version number. --- Jenkinsfile | 25 +++++++++++++---- .../engine/remote/databases/ncbi/__init__.py | 0 .../engine/remote/databases/ncbi/genbank.py | 27 ------------------- 3 files changed, 20 insertions(+), 32 deletions(-) delete mode 100644 src/automlst/engine/remote/databases/ncbi/__init__.py delete mode 100644 src/automlst/engine/remote/databases/ncbi/genbank.py diff --git a/Jenkinsfile b/Jenkinsfile index 8de8402..88a9444 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -30,11 +30,26 @@ pipeline { } } stage("publish") { - environment { - CREDS = credentials('4d6f64be-d26d-4f95-8de3-b6a9b0beb311') - } - steps { - sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/${CREDS_USR}/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*' + parallel { + stage ("git.reslate.systems") { + environment { + CREDS = credentials('4d6f64be-d26d-4f95-8de3-b6a9b0beb311') + } + steps { + sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/${CREDS_USR}/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*' + } + } + stage ("test.pypi.org") { + when { + tag '*.*' + } + environment { + TOKEN = credentials('test.pypi.org') + } + steps { + sh returnStatus: true, script: 'python -m twine upload -r testpypi -u __token__ -p ${TOKEN} --non-interactive --disable-progress-bar --verbose dist/*' + } + } } } } diff --git a/src/automlst/engine/remote/databases/ncbi/__init__.py b/src/automlst/engine/remote/databases/ncbi/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/automlst/engine/remote/databases/ncbi/genbank.py b/src/automlst/engine/remote/databases/ncbi/genbank.py deleted file mode 100644 index a9415ee..0000000 --- a/src/automlst/engine/remote/databases/ncbi/genbank.py +++ /dev/null @@ -1,27 +0,0 @@ -import asyncio -from Bio import Entrez -from Bio import SeqIO - -# TODO Change this out for a more professional approach -Entrez.email = "yunyangdeng@outlook.com" - -from automlst.engine.data.genomics import AnnotatedString, StringAnnotation - - -async def fetch_ncbi_genbank(genbank_id: str) -> AnnotatedString: - with (await asyncio.to_thread(Entrez.efetch, db="nucleotide", id=genbank_id, rettype="gb", retmode="text")) as fetch_stream: - record = SeqIO.read(fetch_stream, "genbank") - sequence_features = list() - for feature in record.features: - start = int(feature.location.start) - end = int(feature.location.end) - qualifiers = feature.qualifiers - for qualifier_key in qualifiers: - qualifiers[qualifier_key] = set(qualifiers[qualifier_key]) - sequence_features.append(StringAnnotation( - type=feature.type, - start=start, - end=end+1, # Position is exclusive - feature_properties=qualifiers - )) - return AnnotatedString(name=genbank_id, sequence=str(record.seq), annotations=sequence_features) \ No newline at end of file From bad7dfc3a892b3c2d8e3149bb828eeb83aaca8bf Mon Sep 17 00:00:00 2001 From: Harrison Deng Date: Thu, 16 Jan 2025 21:29:20 +0000 Subject: [PATCH 2/4] Changing all publishing steps to use API keys --- Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 88a9444..59bb395 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -33,10 +33,10 @@ pipeline { parallel { stage ("git.reslate.systems") { environment { - CREDS = credentials('4d6f64be-d26d-4f95-8de3-b6a9b0beb311') + TOKEN = credentials('git.reslate.systems') } steps { - sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/${CREDS_USR}/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*' + sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u __token__ -p ${TOKEN} --non-interactive --disable-progress-bar --verbose dist/*' } } stage ("test.pypi.org") { From a3c864b565fb245a0ea4dd2763b26a661a67ea34 Mon Sep 17 00:00:00 2001 From: Harrison Deng Date: Thu, 16 Jan 2025 21:54:52 +0000 Subject: [PATCH 3/4] Refactored code layout --- src/automlst/engine/data/local/__init__.py | 0 src/automlst/engine/{ => data}/local/abif.py | 14 +------------- src/automlst/engine/{ => data}/local/csv.py | 2 +- src/automlst/engine/{ => data}/local/fasta.py | 2 +- src/automlst/engine/data/remote/__init__.py | 0 .../engine/{ => data}/remote/databases/bigsdb.py | 4 ++-- src/automlst/engine/data/structures/__init__.py | 0 .../engine/data/{ => structures}/genomics.py | 0 src/automlst/engine/data/{ => structures}/mlst.py | 0 tests/automlst/engine/local/test_abif.py | 6 +----- .../engine/remote/databases/ncbi/test_genbank.py | 5 ----- .../engine/remote/databases/test_bigsdb.py | 6 +++--- 12 files changed, 9 insertions(+), 30 deletions(-) create mode 100644 src/automlst/engine/data/local/__init__.py rename src/automlst/engine/{ => data}/local/abif.py (89%) rename src/automlst/engine/{ => data}/local/csv.py (96%) rename src/automlst/engine/{ => data}/local/fasta.py (91%) create mode 100644 src/automlst/engine/data/remote/__init__.py rename src/automlst/engine/{ => data}/remote/databases/bigsdb.py (98%) create mode 100644 src/automlst/engine/data/structures/__init__.py rename src/automlst/engine/data/{ => structures}/genomics.py (100%) rename src/automlst/engine/data/{ => structures}/mlst.py (100%) delete mode 100644 tests/automlst/engine/remote/databases/ncbi/test_genbank.py diff --git a/src/automlst/engine/data/local/__init__.py b/src/automlst/engine/data/local/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/automlst/engine/local/abif.py b/src/automlst/engine/data/local/abif.py similarity index 89% rename from src/automlst/engine/local/abif.py rename to src/automlst/engine/data/local/abif.py index 654705f..c5ecb1d 100644 --- a/src/automlst/engine/local/abif.py +++ b/src/automlst/engine/data/local/abif.py @@ -2,12 +2,10 @@ import asyncio from numbers import Number from os import path from typing import Any, AsyncGenerator, Collection, Iterable, Sequence, Union -from automlst.engine.data.genomics import NamedString, SangerTraceData +from automlst.engine.data.structures.genomics import NamedString, SangerTraceData from Bio.SeqRecord import SeqRecord from Bio import SeqIO, Align -from automlst.engine.remote.databases.ncbi.genbank import fetch_ncbi_genbank - def _biopython_read_abif_sequence(seq_path: str) -> SeqRecord: with open(seq_path, "rb") as seq_handle: @@ -114,13 +112,3 @@ def _biopython_local_pairwise_alignment(reference: NamedString, query: NamedStri 0] # take the best alignment # TODO actually assemble the consensus sequence here raise NotImplementedError("Pairwise alignment unto reference consensus assembly function not ready.") - - -async def reference_consensus_assembly(reference: Union[NamedString, str], sanger_traces: Iterable[SangerTraceData]) -> AsyncGenerator[NamedString, Any]: - if isinstance(reference, str): - reference_seq = NamedString(name=reference, sequence=(await fetch_ncbi_genbank(reference)).sequence) - else: - reference_seq: NamedString = reference - for sanger_trace in sanger_traces: - yield NamedString("NA", "NA") - raise NotImplementedError("Pairwise alignment unto reference consensus assembly function not ready.") \ No newline at end of file diff --git a/src/automlst/engine/local/csv.py b/src/automlst/engine/data/local/csv.py similarity index 96% rename from src/automlst/engine/local/csv.py rename to src/automlst/engine/data/local/csv.py index 2a3bb1f..8bef3d0 100644 --- a/src/automlst/engine/local/csv.py +++ b/src/automlst/engine/data/local/csv.py @@ -3,7 +3,7 @@ from io import TextIOWrapper from os import PathLike from typing import AsyncIterable, Iterable, Mapping, Sequence, Union -from automlst.engine.data.mlst import Allele, MLSTProfile +from automlst.engine.data.structures.mlst import Allele, MLSTProfile def dict_loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Allele]]): diff --git a/src/automlst/engine/local/fasta.py b/src/automlst/engine/data/local/fasta.py similarity index 91% rename from src/automlst/engine/local/fasta.py rename to src/automlst/engine/data/local/fasta.py index 4fb9cb8..2637ddf 100644 --- a/src/automlst/engine/local/fasta.py +++ b/src/automlst/engine/data/local/fasta.py @@ -3,7 +3,7 @@ from io import TextIOWrapper from typing import Any, AsyncGenerator, Generator, Iterable, Sequence, Union from Bio import SeqIO -from automlst.engine.data.genomics import NamedString +from automlst.engine.data.structures.genomics import NamedString async def read_fasta(handle: Union[str, TextIOWrapper]) -> AsyncGenerator[NamedString, Any]: fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta") diff --git a/src/automlst/engine/data/remote/__init__.py b/src/automlst/engine/data/remote/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/automlst/engine/remote/databases/bigsdb.py b/src/automlst/engine/data/remote/databases/bigsdb.py similarity index 98% rename from src/automlst/engine/remote/databases/bigsdb.py rename to src/automlst/engine/data/remote/databases/bigsdb.py index 6cd877f..53a9d59 100644 --- a/src/automlst/engine/remote/databases/bigsdb.py +++ b/src/automlst/engine/data/remote/databases/bigsdb.py @@ -5,8 +5,8 @@ from typing import Any, AsyncGenerator, AsyncIterable, Collection, Generator, It from aiohttp import ClientSession, ClientTimeout -from automlst.engine.data.genomics import NamedString -from automlst.engine.data.mlst import Allele, PartialAllelicMatchProfile, MLSTProfile +from automlst.engine.data.structures.genomics import NamedString +from automlst.engine.data.structures.mlst import Allele, PartialAllelicMatchProfile, MLSTProfile from automlst.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException, NoSuchBIGSdbDatabaseException class BIGSdbMLSTProfiler(AbstractAsyncContextManager): diff --git a/src/automlst/engine/data/structures/__init__.py b/src/automlst/engine/data/structures/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/automlst/engine/data/genomics.py b/src/automlst/engine/data/structures/genomics.py similarity index 100% rename from src/automlst/engine/data/genomics.py rename to src/automlst/engine/data/structures/genomics.py diff --git a/src/automlst/engine/data/mlst.py b/src/automlst/engine/data/structures/mlst.py similarity index 100% rename from src/automlst/engine/data/mlst.py rename to src/automlst/engine/data/structures/mlst.py diff --git a/tests/automlst/engine/local/test_abif.py b/tests/automlst/engine/local/test_abif.py index cc514e1..ff3f05a 100644 --- a/tests/automlst/engine/local/test_abif.py +++ b/tests/automlst/engine/local/test_abif.py @@ -1,12 +1,8 @@ import os -from automlst.engine.local.abif import read_abif, reference_consensus_assembly +from automlst.engine.data.local.abif import read_abif async def test_load_sanger_sequence_has_data(): assert os.path.exists("tests/resources/1I1_F_P1815443_047.ab1") result_data = await read_abif("tests/resources/1I1_F_P1815443_047.ab1") assert result_data is not None - -async def test_consensus_assembly_with_ncbi(): - consensus = reference_consensus_assembly("ON685494.1", [await read_abif("tests/resources/1I1_F_P1815443_047.ab1"), await read_abif("tests/resources/1I1_R_P1815443_094.ab1")]) - # TODO complete implementing this \ No newline at end of file diff --git a/tests/automlst/engine/remote/databases/ncbi/test_genbank.py b/tests/automlst/engine/remote/databases/ncbi/test_genbank.py deleted file mode 100644 index 25c8fed..0000000 --- a/tests/automlst/engine/remote/databases/ncbi/test_genbank.py +++ /dev/null @@ -1,5 +0,0 @@ -from automlst.engine.remote.databases.ncbi.genbank import fetch_ncbi_genbank - - -async def test_fetch_ncbi_genbank_with_id_works(): - assert len((await fetch_ncbi_genbank("CP011448.1")).sequence) > 0 \ No newline at end of file diff --git a/tests/automlst/engine/remote/databases/test_bigsdb.py b/tests/automlst/engine/remote/databases/test_bigsdb.py index f649281..0aa10de 100644 --- a/tests/automlst/engine/remote/databases/test_bigsdb.py +++ b/tests/automlst/engine/remote/databases/test_bigsdb.py @@ -3,10 +3,10 @@ import re from typing import Collection, Sequence, Union from Bio import SeqIO import pytest -from automlst.engine.data.genomics import NamedString -from automlst.engine.data.mlst import Allele, MLSTProfile +from automlst.engine.data.structures.genomics import NamedString +from automlst.engine.data.structures.mlst import Allele, MLSTProfile from automlst.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException -from automlst.engine.remote.databases.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler +from automlst.engine.data.remote.databases.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler def gene_scrambler(gene: str, mutation_site_count: Union[int, float], alphabet: Sequence[str] = ["A", "T", "C", "G"]): rand = random.Random(gene) From 7ea7ead46aa787e3ae3a7e7c29822194424923aa Mon Sep 17 00:00:00 2001 From: Harrison Deng Date: Fri, 17 Jan 2025 14:27:25 +0000 Subject: [PATCH 4/4] Moved ABIF code to separate project --- src/automlst/engine/data/local/abif.py | 114 ------------------ .../engine/{ => data}/local/test_fasta.py | 2 +- .../remote/databases/test_bigsdb.py | 0 tests/automlst/engine/local/test_abif.py | 8 -- 4 files changed, 1 insertion(+), 123 deletions(-) delete mode 100644 src/automlst/engine/data/local/abif.py rename tests/automlst/engine/{ => data}/local/test_fasta.py (79%) rename tests/automlst/engine/{ => data}/remote/databases/test_bigsdb.py (100%) delete mode 100644 tests/automlst/engine/local/test_abif.py diff --git a/src/automlst/engine/data/local/abif.py b/src/automlst/engine/data/local/abif.py deleted file mode 100644 index c5ecb1d..0000000 --- a/src/automlst/engine/data/local/abif.py +++ /dev/null @@ -1,114 +0,0 @@ -import asyncio -from numbers import Number -from os import path -from typing import Any, AsyncGenerator, Collection, Iterable, Sequence, Union -from automlst.engine.data.structures.genomics import NamedString, SangerTraceData -from Bio.SeqRecord import SeqRecord -from Bio import SeqIO, Align - - -def _biopython_read_abif_sequence(seq_path: str) -> SeqRecord: - with open(seq_path, "rb") as seq_handle: - return SeqIO.read(seq_handle, "abi") - - -async def read_abif(seq_path: str) -> SangerTraceData: - ext = path.splitext(seq_path)[1] - if ext.lower() != ".ab1" and ext.lower() != "abi": - raise ValueError( - 'seq_path must have file extension of "ab1", or "abi".') - biopython_seq = await asyncio.to_thread(_biopython_read_abif_sequence, seq_path) - biopython_annotations = biopython_seq.annotations - - # Lot of type ignoring since Biopython did not define their typing. - biopython_abif_raw = biopython_annotations["abif_raw"] # type: ignore - trace_data = SangerTraceData( - path.basename(seq_path), - biopython_seq.seq, - biopython_abif_raw.get("APFN2"), # type: ignore - biopython_abif_raw.get("APrN1"), # type: ignore - biopython_abif_raw.get("APrV1"), # type: ignore - biopython_abif_raw.get("APrX1"), # type: ignore - biopython_abif_raw.get("APXV1"), # type: ignore - biopython_abif_raw.get("CMNT1"), # type: ignore - biopython_abif_raw.get("CpEP1"), # type: ignore - biopython_abif_raw.get("CTID1"), # type: ignore - biopython_abif_raw.get("CTNM1"), # type: ignore - biopython_abif_raw.get("CTTL1"), # type: ignore - biopython_abif_raw.get("DATA1"), # type: ignore - biopython_abif_raw.get("DATA2"), # type: ignore - biopython_abif_raw.get("DATA3"), # type: ignore - biopython_abif_raw.get("DATA4"), # type: ignore - biopython_abif_raw.get("DATA5"), # type: ignore - biopython_abif_raw.get("DATA6"), # type: ignore - biopython_abif_raw.get("DATA7"), # type: ignore - biopython_abif_raw.get("DATA8"), # type: ignore - biopython_abif_raw.get("DSam1"), # type: ignore - biopython_abif_raw.get("DyeN1"), # type: ignore - biopython_abif_raw.get("DyeN2"), # type: ignore - biopython_abif_raw.get("DyeN3"), # type: ignore - biopython_abif_raw.get("DyeN4"), # type: ignore - biopython_abif_raw.get("DyeW1"), # type: ignore - biopython_abif_raw.get("DyeW2"), # type: ignore - biopython_abif_raw.get("DyeW3"), # type: ignore - biopython_abif_raw.get("DyeW4"), # type: ignore - biopython_abif_raw.get("DySN1"), # type: ignore - biopython_abif_raw.get("EPVt1"), # type: ignore - biopython_abif_raw.get("EVNT1"), # type: ignore - biopython_abif_raw.get("EVNT2"), # type: ignore - biopython_abif_raw.get("EVNT3"), # type: ignore - biopython_abif_raw.get("EVNT4"), # type: ignore - biopython_abif_raw.get("FWO_1"), # type: ignore - biopython_abif_raw.get("GTyp1"), # type: ignore - biopython_abif_raw.get("InSc1"), # type: ignore - biopython_abif_raw.get("InVt1"), # type: ignore - biopython_abif_raw.get("LANE1"), # type: ignore - biopython_abif_raw.get("LIMS1"), # type: ignore - biopython_abif_raw.get("LNTD1"), # type: ignore - biopython_abif_raw.get("LsrP1"), # type: ignore - biopython_abif_raw.get("MCHN1"), # type: ignore - biopython_abif_raw.get("MODF1"), # type: ignore - biopython_abif_raw.get("MODL1"), # type: ignore - biopython_abif_raw.get("NAVG1"), # type: ignore - biopython_abif_raw.get("NLNE1"), # type: ignore - biopython_abif_raw.get("OfSc1"), # type: ignore - biopython_abif_raw.get("PDMF1"), # type: ignore - biopython_abif_raw.get("PXLB1"), # type: ignore - biopython_abif_raw.get("RGCm1"), # type: ignore - biopython_abif_raw.get("RGNm1"), # type: ignore - biopython_abif_raw.get("RMdV1"), # type: ignore - biopython_abif_raw.get("RMdX1"), # type: ignore - biopython_abif_raw.get("RMXV1"), # type: ignore - biopython_abif_raw.get("RPrN1"), # type: ignore - biopython_abif_raw.get("RPrV1"), # type: ignore - biopython_abif_raw.get("RUND1"), # type: ignore - biopython_abif_raw.get("RUND2"), # type: ignore - biopython_abif_raw.get("RUND3"), # type: ignore - biopython_abif_raw.get("RUND4"), # type: ignore - biopython_abif_raw.get("RunN1"), # type: ignore - biopython_abif_raw.get("RUNT1"), # type: ignore - biopython_abif_raw.get("RUNT2"), # type: ignore - biopython_abif_raw.get("RUNT3"), # type: ignore - biopython_abif_raw.get("RUNT4"), # type: ignore - biopython_abif_raw.get("Satd"), # type: ignore - biopython_abif_raw.get("Scal1"), # type: ignore - biopython_abif_raw.get("SCAN1"), # type: ignore - biopython_abif_raw.get("SMED1"), # type: ignore - biopython_abif_raw.get("SMLt"), # type: ignore - biopython_abif_raw.get("SMPL1"), # type: ignore - biopython_abif_raw.get("SVER1"), # type: ignore - biopython_abif_raw.get("SVER3"), # type: ignore - biopython_abif_raw.get("Tmpr1"), # type: ignore - biopython_abif_raw.get("TUBE"), # type: ignore - biopython_abif_raw.get("User") # type: ignore - ) - return trace_data - - -def _biopython_local_pairwise_alignment(reference: NamedString, query: NamedString) -> tuple[NamedString, NamedString]: - aligner = Align.PairwiseAligner(scoring="blastn") - aligner.mode = "local" - alignment_result = sorted(aligner.align(reference.sequence, query.sequence))[ - 0] # take the best alignment - # TODO actually assemble the consensus sequence here - raise NotImplementedError("Pairwise alignment unto reference consensus assembly function not ready.") diff --git a/tests/automlst/engine/local/test_fasta.py b/tests/automlst/engine/data/local/test_fasta.py similarity index 79% rename from tests/automlst/engine/local/test_fasta.py rename to tests/automlst/engine/data/local/test_fasta.py index cc18ff2..9065458 100644 --- a/tests/automlst/engine/local/test_fasta.py +++ b/tests/automlst/engine/data/local/test_fasta.py @@ -1,4 +1,4 @@ -from automlst.engine.local.fasta import read_fasta +from automlst.engine.data.local.fasta import read_fasta async def test_fasta_reader_not_none(): diff --git a/tests/automlst/engine/remote/databases/test_bigsdb.py b/tests/automlst/engine/data/remote/databases/test_bigsdb.py similarity index 100% rename from tests/automlst/engine/remote/databases/test_bigsdb.py rename to tests/automlst/engine/data/remote/databases/test_bigsdb.py diff --git a/tests/automlst/engine/local/test_abif.py b/tests/automlst/engine/local/test_abif.py deleted file mode 100644 index ff3f05a..0000000 --- a/tests/automlst/engine/local/test_abif.py +++ /dev/null @@ -1,8 +0,0 @@ -import os - -from automlst.engine.data.local.abif import read_abif - -async def test_load_sanger_sequence_has_data(): - assert os.path.exists("tests/resources/1I1_F_P1815443_047.ab1") - result_data = await read_abif("tests/resources/1I1_F_P1815443_047.ab1") - assert result_data is not None