Merge branch 'develop'
This commit is contained in:
commit
cb22dfac9b
25
Jenkinsfile
vendored
25
Jenkinsfile
vendored
@ -30,11 +30,26 @@ pipeline {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage("publish") {
|
stage("publish") {
|
||||||
environment {
|
parallel {
|
||||||
CREDS = credentials('4d6f64be-d26d-4f95-8de3-b6a9b0beb311')
|
stage ("git.reslate.systems") {
|
||||||
}
|
environment {
|
||||||
steps {
|
TOKEN = credentials('git.reslate.systems')
|
||||||
sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/${CREDS_USR}/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*'
|
}
|
||||||
|
steps {
|
||||||
|
sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u __token__ -p ${TOKEN} --non-interactive --disable-progress-bar --verbose dist/*'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage ("test.pypi.org") {
|
||||||
|
when {
|
||||||
|
tag '*.*'
|
||||||
|
}
|
||||||
|
environment {
|
||||||
|
TOKEN = credentials('test.pypi.org')
|
||||||
|
}
|
||||||
|
steps {
|
||||||
|
sh returnStatus: true, script: 'python -m twine upload -r testpypi -u __token__ -p ${TOKEN} --non-interactive --disable-progress-bar --verbose dist/*'
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3,7 +3,7 @@ from io import TextIOWrapper
|
|||||||
from os import PathLike
|
from os import PathLike
|
||||||
from typing import AsyncIterable, Iterable, Mapping, Sequence, Union
|
from typing import AsyncIterable, Iterable, Mapping, Sequence, Union
|
||||||
|
|
||||||
from automlst.engine.data.mlst import Allele, MLSTProfile
|
from automlst.engine.data.structures.mlst import Allele, MLSTProfile
|
||||||
|
|
||||||
|
|
||||||
def dict_loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Allele]]):
|
def dict_loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Allele]]):
|
@ -3,7 +3,7 @@ from io import TextIOWrapper
|
|||||||
from typing import Any, AsyncGenerator, Generator, Iterable, Sequence, Union
|
from typing import Any, AsyncGenerator, Generator, Iterable, Sequence, Union
|
||||||
from Bio import SeqIO
|
from Bio import SeqIO
|
||||||
|
|
||||||
from automlst.engine.data.genomics import NamedString
|
from automlst.engine.data.structures.genomics import NamedString
|
||||||
|
|
||||||
async def read_fasta(handle: Union[str, TextIOWrapper]) -> AsyncGenerator[NamedString, Any]:
|
async def read_fasta(handle: Union[str, TextIOWrapper]) -> AsyncGenerator[NamedString, Any]:
|
||||||
fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta")
|
fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta")
|
0
src/automlst/engine/data/remote/__init__.py
Normal file
0
src/automlst/engine/data/remote/__init__.py
Normal file
@ -5,8 +5,8 @@ from typing import Any, AsyncGenerator, AsyncIterable, Collection, Generator, It
|
|||||||
|
|
||||||
from aiohttp import ClientSession, ClientTimeout
|
from aiohttp import ClientSession, ClientTimeout
|
||||||
|
|
||||||
from automlst.engine.data.genomics import NamedString
|
from automlst.engine.data.structures.genomics import NamedString
|
||||||
from automlst.engine.data.mlst import Allele, PartialAllelicMatchProfile, MLSTProfile
|
from automlst.engine.data.structures.mlst import Allele, PartialAllelicMatchProfile, MLSTProfile
|
||||||
from automlst.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException, NoSuchBIGSdbDatabaseException
|
from automlst.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException, NoSuchBIGSdbDatabaseException
|
||||||
|
|
||||||
class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
|
class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
|
0
src/automlst/engine/data/structures/__init__.py
Normal file
0
src/automlst/engine/data/structures/__init__.py
Normal file
@ -1,126 +0,0 @@
|
|||||||
import asyncio
|
|
||||||
from numbers import Number
|
|
||||||
from os import path
|
|
||||||
from typing import Any, AsyncGenerator, Collection, Iterable, Sequence, Union
|
|
||||||
from automlst.engine.data.genomics import NamedString, SangerTraceData
|
|
||||||
from Bio.SeqRecord import SeqRecord
|
|
||||||
from Bio import SeqIO, Align
|
|
||||||
|
|
||||||
from automlst.engine.remote.databases.ncbi.genbank import fetch_ncbi_genbank
|
|
||||||
|
|
||||||
|
|
||||||
def _biopython_read_abif_sequence(seq_path: str) -> SeqRecord:
|
|
||||||
with open(seq_path, "rb") as seq_handle:
|
|
||||||
return SeqIO.read(seq_handle, "abi")
|
|
||||||
|
|
||||||
|
|
||||||
async def read_abif(seq_path: str) -> SangerTraceData:
|
|
||||||
ext = path.splitext(seq_path)[1]
|
|
||||||
if ext.lower() != ".ab1" and ext.lower() != "abi":
|
|
||||||
raise ValueError(
|
|
||||||
'seq_path must have file extension of "ab1", or "abi".')
|
|
||||||
biopython_seq = await asyncio.to_thread(_biopython_read_abif_sequence, seq_path)
|
|
||||||
biopython_annotations = biopython_seq.annotations
|
|
||||||
|
|
||||||
# Lot of type ignoring since Biopython did not define their typing.
|
|
||||||
biopython_abif_raw = biopython_annotations["abif_raw"] # type: ignore
|
|
||||||
trace_data = SangerTraceData(
|
|
||||||
path.basename(seq_path),
|
|
||||||
biopython_seq.seq,
|
|
||||||
biopython_abif_raw.get("APFN2"), # type: ignore
|
|
||||||
biopython_abif_raw.get("APrN1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("APrV1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("APrX1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("APXV1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("CMNT1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("CpEP1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("CTID1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("CTNM1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("CTTL1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("DATA1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("DATA2"), # type: ignore
|
|
||||||
biopython_abif_raw.get("DATA3"), # type: ignore
|
|
||||||
biopython_abif_raw.get("DATA4"), # type: ignore
|
|
||||||
biopython_abif_raw.get("DATA5"), # type: ignore
|
|
||||||
biopython_abif_raw.get("DATA6"), # type: ignore
|
|
||||||
biopython_abif_raw.get("DATA7"), # type: ignore
|
|
||||||
biopython_abif_raw.get("DATA8"), # type: ignore
|
|
||||||
biopython_abif_raw.get("DSam1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("DyeN1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("DyeN2"), # type: ignore
|
|
||||||
biopython_abif_raw.get("DyeN3"), # type: ignore
|
|
||||||
biopython_abif_raw.get("DyeN4"), # type: ignore
|
|
||||||
biopython_abif_raw.get("DyeW1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("DyeW2"), # type: ignore
|
|
||||||
biopython_abif_raw.get("DyeW3"), # type: ignore
|
|
||||||
biopython_abif_raw.get("DyeW4"), # type: ignore
|
|
||||||
biopython_abif_raw.get("DySN1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("EPVt1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("EVNT1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("EVNT2"), # type: ignore
|
|
||||||
biopython_abif_raw.get("EVNT3"), # type: ignore
|
|
||||||
biopython_abif_raw.get("EVNT4"), # type: ignore
|
|
||||||
biopython_abif_raw.get("FWO_1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("GTyp1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("InSc1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("InVt1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("LANE1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("LIMS1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("LNTD1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("LsrP1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("MCHN1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("MODF1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("MODL1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("NAVG1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("NLNE1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("OfSc1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("PDMF1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("PXLB1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("RGCm1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("RGNm1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("RMdV1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("RMdX1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("RMXV1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("RPrN1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("RPrV1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("RUND1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("RUND2"), # type: ignore
|
|
||||||
biopython_abif_raw.get("RUND3"), # type: ignore
|
|
||||||
biopython_abif_raw.get("RUND4"), # type: ignore
|
|
||||||
biopython_abif_raw.get("RunN1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("RUNT1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("RUNT2"), # type: ignore
|
|
||||||
biopython_abif_raw.get("RUNT3"), # type: ignore
|
|
||||||
biopython_abif_raw.get("RUNT4"), # type: ignore
|
|
||||||
biopython_abif_raw.get("Satd"), # type: ignore
|
|
||||||
biopython_abif_raw.get("Scal1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("SCAN1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("SMED1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("SMLt"), # type: ignore
|
|
||||||
biopython_abif_raw.get("SMPL1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("SVER1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("SVER3"), # type: ignore
|
|
||||||
biopython_abif_raw.get("Tmpr1"), # type: ignore
|
|
||||||
biopython_abif_raw.get("TUBE"), # type: ignore
|
|
||||||
biopython_abif_raw.get("User") # type: ignore
|
|
||||||
)
|
|
||||||
return trace_data
|
|
||||||
|
|
||||||
|
|
||||||
def _biopython_local_pairwise_alignment(reference: NamedString, query: NamedString) -> tuple[NamedString, NamedString]:
|
|
||||||
aligner = Align.PairwiseAligner(scoring="blastn")
|
|
||||||
aligner.mode = "local"
|
|
||||||
alignment_result = sorted(aligner.align(reference.sequence, query.sequence))[
|
|
||||||
0] # take the best alignment
|
|
||||||
# TODO actually assemble the consensus sequence here
|
|
||||||
raise NotImplementedError("Pairwise alignment unto reference consensus assembly function not ready.")
|
|
||||||
|
|
||||||
|
|
||||||
async def reference_consensus_assembly(reference: Union[NamedString, str], sanger_traces: Iterable[SangerTraceData]) -> AsyncGenerator[NamedString, Any]:
|
|
||||||
if isinstance(reference, str):
|
|
||||||
reference_seq = NamedString(name=reference, sequence=(await fetch_ncbi_genbank(reference)).sequence)
|
|
||||||
else:
|
|
||||||
reference_seq: NamedString = reference
|
|
||||||
for sanger_trace in sanger_traces:
|
|
||||||
yield NamedString("NA", "NA")
|
|
||||||
raise NotImplementedError("Pairwise alignment unto reference consensus assembly function not ready.")
|
|
@ -1,27 +0,0 @@
|
|||||||
import asyncio
|
|
||||||
from Bio import Entrez
|
|
||||||
from Bio import SeqIO
|
|
||||||
|
|
||||||
# TODO Change this out for a more professional approach
|
|
||||||
Entrez.email = "yunyangdeng@outlook.com"
|
|
||||||
|
|
||||||
from automlst.engine.data.genomics import AnnotatedString, StringAnnotation
|
|
||||||
|
|
||||||
|
|
||||||
async def fetch_ncbi_genbank(genbank_id: str) -> AnnotatedString:
|
|
||||||
with (await asyncio.to_thread(Entrez.efetch, db="nucleotide", id=genbank_id, rettype="gb", retmode="text")) as fetch_stream:
|
|
||||||
record = SeqIO.read(fetch_stream, "genbank")
|
|
||||||
sequence_features = list()
|
|
||||||
for feature in record.features:
|
|
||||||
start = int(feature.location.start)
|
|
||||||
end = int(feature.location.end)
|
|
||||||
qualifiers = feature.qualifiers
|
|
||||||
for qualifier_key in qualifiers:
|
|
||||||
qualifiers[qualifier_key] = set(qualifiers[qualifier_key])
|
|
||||||
sequence_features.append(StringAnnotation(
|
|
||||||
type=feature.type,
|
|
||||||
start=start,
|
|
||||||
end=end+1, # Position is exclusive
|
|
||||||
feature_properties=qualifiers
|
|
||||||
))
|
|
||||||
return AnnotatedString(name=genbank_id, sequence=str(record.seq), annotations=sequence_features)
|
|
@ -1,4 +1,4 @@
|
|||||||
from automlst.engine.local.fasta import read_fasta
|
from automlst.engine.data.local.fasta import read_fasta
|
||||||
|
|
||||||
|
|
||||||
async def test_fasta_reader_not_none():
|
async def test_fasta_reader_not_none():
|
@ -3,10 +3,10 @@ import re
|
|||||||
from typing import Collection, Sequence, Union
|
from typing import Collection, Sequence, Union
|
||||||
from Bio import SeqIO
|
from Bio import SeqIO
|
||||||
import pytest
|
import pytest
|
||||||
from automlst.engine.data.genomics import NamedString
|
from automlst.engine.data.structures.genomics import NamedString
|
||||||
from automlst.engine.data.mlst import Allele, MLSTProfile
|
from automlst.engine.data.structures.mlst import Allele, MLSTProfile
|
||||||
from automlst.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException
|
from automlst.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException
|
||||||
from automlst.engine.remote.databases.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler
|
from automlst.engine.data.remote.databases.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler
|
||||||
|
|
||||||
def gene_scrambler(gene: str, mutation_site_count: Union[int, float], alphabet: Sequence[str] = ["A", "T", "C", "G"]):
|
def gene_scrambler(gene: str, mutation_site_count: Union[int, float], alphabet: Sequence[str] = ["A", "T", "C", "G"]):
|
||||||
rand = random.Random(gene)
|
rand = random.Random(gene)
|
@ -1,12 +0,0 @@
|
|||||||
import os
|
|
||||||
|
|
||||||
from automlst.engine.local.abif import read_abif, reference_consensus_assembly
|
|
||||||
|
|
||||||
async def test_load_sanger_sequence_has_data():
|
|
||||||
assert os.path.exists("tests/resources/1I1_F_P1815443_047.ab1")
|
|
||||||
result_data = await read_abif("tests/resources/1I1_F_P1815443_047.ab1")
|
|
||||||
assert result_data is not None
|
|
||||||
|
|
||||||
async def test_consensus_assembly_with_ncbi():
|
|
||||||
consensus = reference_consensus_assembly("ON685494.1", [await read_abif("tests/resources/1I1_F_P1815443_047.ab1"), await read_abif("tests/resources/1I1_R_P1815443_094.ab1")])
|
|
||||||
# TODO complete implementing this
|
|
@ -1,5 +0,0 @@
|
|||||||
from automlst.engine.remote.databases.ncbi.genbank import fetch_ncbi_genbank
|
|
||||||
|
|
||||||
|
|
||||||
async def test_fetch_ncbi_genbank_with_id_works():
|
|
||||||
assert len((await fetch_ncbi_genbank("CP011448.1")).sequence) > 0
|
|
Loading…
x
Reference in New Issue
Block a user