Merge branch 'develop'
This commit is contained in:
		
							
								
								
									
										25
									
								
								Jenkinsfile
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										25
									
								
								Jenkinsfile
									
									
									
									
										vendored
									
									
								
							@@ -30,11 +30,26 @@ pipeline {
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        stage("publish") {
 | 
			
		||||
            environment {
 | 
			
		||||
                CREDS = credentials('4d6f64be-d26d-4f95-8de3-b6a9b0beb311')
 | 
			
		||||
            }
 | 
			
		||||
            steps {
 | 
			
		||||
                sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/${CREDS_USR}/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*'
 | 
			
		||||
            parallel {
 | 
			
		||||
                stage ("git.reslate.systems") {
 | 
			
		||||
                    environment {
 | 
			
		||||
                        TOKEN = credentials('git.reslate.systems')
 | 
			
		||||
                    }
 | 
			
		||||
                    steps {
 | 
			
		||||
                        sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u __token__ -p ${TOKEN} --non-interactive --disable-progress-bar --verbose dist/*'
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
                stage ("test.pypi.org") {
 | 
			
		||||
                    when {
 | 
			
		||||
                        tag '*.*'
 | 
			
		||||
                    }
 | 
			
		||||
                    environment {
 | 
			
		||||
                        TOKEN = credentials('test.pypi.org')
 | 
			
		||||
                    }
 | 
			
		||||
                    steps {
 | 
			
		||||
                        sh returnStatus: true, script: 'python -m twine upload -r testpypi -u __token__ -p ${TOKEN} --non-interactive --disable-progress-bar --verbose dist/*'
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 
 | 
			
		||||
@@ -3,7 +3,7 @@ from io import TextIOWrapper
 | 
			
		||||
from os import PathLike
 | 
			
		||||
from typing import AsyncIterable, Iterable, Mapping, Sequence, Union
 | 
			
		||||
 | 
			
		||||
from automlst.engine.data.mlst import Allele, MLSTProfile
 | 
			
		||||
from automlst.engine.data.structures.mlst import Allele, MLSTProfile
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def dict_loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Allele]]):
 | 
			
		||||
@@ -3,7 +3,7 @@ from io import TextIOWrapper
 | 
			
		||||
from typing import Any, AsyncGenerator, Generator, Iterable, Sequence, Union
 | 
			
		||||
from Bio import SeqIO
 | 
			
		||||
 | 
			
		||||
from automlst.engine.data.genomics import NamedString
 | 
			
		||||
from automlst.engine.data.structures.genomics import NamedString
 | 
			
		||||
 | 
			
		||||
async def read_fasta(handle: Union[str, TextIOWrapper]) -> AsyncGenerator[NamedString, Any]:
 | 
			
		||||
    fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta")
 | 
			
		||||
							
								
								
									
										0
									
								
								src/automlst/engine/data/remote/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								src/automlst/engine/data/remote/__init__.py
									
									
									
									
									
										Normal file
									
								
							@@ -5,8 +5,8 @@ from typing import Any, AsyncGenerator, AsyncIterable, Collection, Generator, It
 | 
			
		||||
 | 
			
		||||
from aiohttp import ClientSession, ClientTimeout
 | 
			
		||||
 | 
			
		||||
from automlst.engine.data.genomics import NamedString
 | 
			
		||||
from automlst.engine.data.mlst import Allele, PartialAllelicMatchProfile, MLSTProfile
 | 
			
		||||
from automlst.engine.data.structures.genomics import NamedString
 | 
			
		||||
from automlst.engine.data.structures.mlst import Allele, PartialAllelicMatchProfile, MLSTProfile
 | 
			
		||||
from automlst.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException, NoSuchBIGSdbDatabaseException
 | 
			
		||||
 | 
			
		||||
class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
 | 
			
		||||
							
								
								
									
										0
									
								
								src/automlst/engine/data/structures/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								src/automlst/engine/data/structures/__init__.py
									
									
									
									
									
										Normal file
									
								
							@@ -1,126 +0,0 @@
 | 
			
		||||
import asyncio
 | 
			
		||||
from numbers import Number
 | 
			
		||||
from os import path
 | 
			
		||||
from typing import Any, AsyncGenerator, Collection, Iterable, Sequence, Union
 | 
			
		||||
from automlst.engine.data.genomics import NamedString, SangerTraceData
 | 
			
		||||
from Bio.SeqRecord import SeqRecord
 | 
			
		||||
from Bio import SeqIO, Align
 | 
			
		||||
 | 
			
		||||
from automlst.engine.remote.databases.ncbi.genbank import fetch_ncbi_genbank
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _biopython_read_abif_sequence(seq_path: str) -> SeqRecord:
 | 
			
		||||
    with open(seq_path, "rb") as seq_handle:
 | 
			
		||||
        return SeqIO.read(seq_handle, "abi")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
async def read_abif(seq_path: str) -> SangerTraceData:
 | 
			
		||||
    ext = path.splitext(seq_path)[1]
 | 
			
		||||
    if ext.lower() != ".ab1" and ext.lower() != "abi":
 | 
			
		||||
        raise ValueError(
 | 
			
		||||
            'seq_path must have file extension of "ab1", or "abi".')
 | 
			
		||||
    biopython_seq = await asyncio.to_thread(_biopython_read_abif_sequence, seq_path)
 | 
			
		||||
    biopython_annotations = biopython_seq.annotations
 | 
			
		||||
 | 
			
		||||
    # Lot of type ignoring since Biopython did not define their typing.
 | 
			
		||||
    biopython_abif_raw = biopython_annotations["abif_raw"]  # type: ignore
 | 
			
		||||
    trace_data = SangerTraceData(
 | 
			
		||||
        path.basename(seq_path),
 | 
			
		||||
        biopython_seq.seq,
 | 
			
		||||
        biopython_abif_raw.get("APFN2"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("APrN1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("APrV1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("APrX1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("APXV1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("CMNT1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("CpEP1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("CTID1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("CTNM1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("CTTL1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("DATA1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("DATA2"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("DATA3"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("DATA4"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("DATA5"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("DATA6"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("DATA7"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("DATA8"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("DSam1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("DyeN1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("DyeN2"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("DyeN3"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("DyeN4"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("DyeW1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("DyeW2"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("DyeW3"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("DyeW4"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("DySN1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("EPVt1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("EVNT1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("EVNT2"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("EVNT3"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("EVNT4"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("FWO_1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("GTyp1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("InSc1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("InVt1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("LANE1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("LIMS1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("LNTD1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("LsrP1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("MCHN1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("MODF1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("MODL1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("NAVG1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("NLNE1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("OfSc1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("PDMF1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("PXLB1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("RGCm1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("RGNm1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("RMdV1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("RMdX1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("RMXV1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("RPrN1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("RPrV1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("RUND1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("RUND2"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("RUND3"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("RUND4"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("RunN1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("RUNT1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("RUNT2"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("RUNT3"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("RUNT4"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("Satd"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("Scal1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("SCAN1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("SMED1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("SMLt"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("SMPL1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("SVER1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("SVER3"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("Tmpr1"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("TUBE"),  # type: ignore
 | 
			
		||||
        biopython_abif_raw.get("User")  # type: ignore
 | 
			
		||||
    )
 | 
			
		||||
    return trace_data
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _biopython_local_pairwise_alignment(reference: NamedString, query: NamedString) -> tuple[NamedString, NamedString]:
 | 
			
		||||
    aligner = Align.PairwiseAligner(scoring="blastn")
 | 
			
		||||
    aligner.mode = "local"
 | 
			
		||||
    alignment_result = sorted(aligner.align(reference.sequence, query.sequence))[
 | 
			
		||||
        0]  # take the best alignment
 | 
			
		||||
    # TODO actually assemble the consensus sequence here
 | 
			
		||||
    raise NotImplementedError("Pairwise alignment unto reference consensus assembly function not ready.")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
async def reference_consensus_assembly(reference: Union[NamedString, str], sanger_traces: Iterable[SangerTraceData]) -> AsyncGenerator[NamedString, Any]:
 | 
			
		||||
    if isinstance(reference, str):
 | 
			
		||||
        reference_seq = NamedString(name=reference, sequence=(await fetch_ncbi_genbank(reference)).sequence)
 | 
			
		||||
    else:
 | 
			
		||||
        reference_seq: NamedString  = reference
 | 
			
		||||
    for sanger_trace in sanger_traces:
 | 
			
		||||
        yield NamedString("NA", "NA")
 | 
			
		||||
        raise NotImplementedError("Pairwise alignment unto reference consensus assembly function not ready.")
 | 
			
		||||
@@ -1,27 +0,0 @@
 | 
			
		||||
import asyncio
 | 
			
		||||
from Bio import Entrez
 | 
			
		||||
from Bio import SeqIO
 | 
			
		||||
 | 
			
		||||
# TODO Change this out for a more professional approach
 | 
			
		||||
Entrez.email = "yunyangdeng@outlook.com"
 | 
			
		||||
 | 
			
		||||
from automlst.engine.data.genomics import AnnotatedString, StringAnnotation
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
async def fetch_ncbi_genbank(genbank_id: str) -> AnnotatedString:
 | 
			
		||||
    with (await asyncio.to_thread(Entrez.efetch, db="nucleotide", id=genbank_id, rettype="gb", retmode="text")) as fetch_stream:
 | 
			
		||||
        record = SeqIO.read(fetch_stream, "genbank")
 | 
			
		||||
        sequence_features = list()
 | 
			
		||||
        for feature in record.features:
 | 
			
		||||
            start = int(feature.location.start)
 | 
			
		||||
            end = int(feature.location.end)
 | 
			
		||||
            qualifiers = feature.qualifiers
 | 
			
		||||
            for qualifier_key in qualifiers:
 | 
			
		||||
                qualifiers[qualifier_key] = set(qualifiers[qualifier_key])
 | 
			
		||||
            sequence_features.append(StringAnnotation(
 | 
			
		||||
                type=feature.type,
 | 
			
		||||
                start=start,
 | 
			
		||||
                end=end+1,  # Position is exclusive
 | 
			
		||||
                feature_properties=qualifiers
 | 
			
		||||
            ))
 | 
			
		||||
        return AnnotatedString(name=genbank_id, sequence=str(record.seq), annotations=sequence_features)
 | 
			
		||||
@@ -1,4 +1,4 @@
 | 
			
		||||
from automlst.engine.local.fasta import read_fasta
 | 
			
		||||
from automlst.engine.data.local.fasta import read_fasta
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
async def test_fasta_reader_not_none():
 | 
			
		||||
@@ -3,10 +3,10 @@ import re
 | 
			
		||||
from typing import Collection, Sequence, Union
 | 
			
		||||
from Bio import SeqIO
 | 
			
		||||
import pytest
 | 
			
		||||
from automlst.engine.data.genomics import NamedString
 | 
			
		||||
from automlst.engine.data.mlst import Allele, MLSTProfile
 | 
			
		||||
from automlst.engine.data.structures.genomics import NamedString
 | 
			
		||||
from automlst.engine.data.structures.mlst import Allele, MLSTProfile
 | 
			
		||||
from automlst.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException
 | 
			
		||||
from automlst.engine.remote.databases.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler
 | 
			
		||||
from automlst.engine.data.remote.databases.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler
 | 
			
		||||
 | 
			
		||||
def gene_scrambler(gene: str, mutation_site_count: Union[int, float], alphabet: Sequence[str] = ["A", "T", "C", "G"]):
 | 
			
		||||
    rand = random.Random(gene)
 | 
			
		||||
@@ -1,12 +0,0 @@
 | 
			
		||||
import os
 | 
			
		||||
 | 
			
		||||
from automlst.engine.local.abif import read_abif, reference_consensus_assembly
 | 
			
		||||
 | 
			
		||||
async def test_load_sanger_sequence_has_data():
 | 
			
		||||
    assert os.path.exists("tests/resources/1I1_F_P1815443_047.ab1")
 | 
			
		||||
    result_data = await read_abif("tests/resources/1I1_F_P1815443_047.ab1")
 | 
			
		||||
    assert result_data is not None
 | 
			
		||||
 | 
			
		||||
async def test_consensus_assembly_with_ncbi():
 | 
			
		||||
    consensus = reference_consensus_assembly("ON685494.1", [await read_abif("tests/resources/1I1_F_P1815443_047.ab1"), await read_abif("tests/resources/1I1_R_P1815443_094.ab1")])
 | 
			
		||||
    # TODO complete implementing this
 | 
			
		||||
@@ -1,5 +0,0 @@
 | 
			
		||||
from automlst.engine.remote.databases.ncbi.genbank import fetch_ncbi_genbank
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
async def test_fetch_ncbi_genbank_with_id_works():
 | 
			
		||||
    assert len((await fetch_ncbi_genbank("CP011448.1")).sequence) > 0 
 | 
			
		||||
		Reference in New Issue
	
	Block a user