Merge branch 'features/improved-oop-architecture' into features/non-exact-notation
This commit is contained in:
commit
36bca1b70d
@ -1,70 +0,0 @@
|
|||||||
import asyncio
|
|
||||||
from concurrent.futures import Future, ThreadPoolExecutor
|
|
||||||
from contextlib import AbstractContextManager
|
|
||||||
from typing import Any, Set, Union
|
|
||||||
from Bio.Align import PairwiseAligner
|
|
||||||
from queue import Queue
|
|
||||||
|
|
||||||
from autobigs.engine.structures.alignment import AlignmentStats, PairwiseAlignment
|
|
||||||
|
|
||||||
class AsyncBiopythonPairwiseAlignmentEngine(AbstractContextManager):
|
|
||||||
def __enter__(self):
|
|
||||||
self._thread_pool = ThreadPoolExecutor(self._max_threads, thread_name_prefix="async-pairwise-alignment")
|
|
||||||
return self
|
|
||||||
|
|
||||||
def __init__(self, aligner: PairwiseAligner, max_threads: int = 4):
|
|
||||||
self._max_threads = max_threads
|
|
||||||
self._aligner = aligner
|
|
||||||
self._work_left: Set[Future] = set()
|
|
||||||
self._work_complete: Queue[Future] = Queue()
|
|
||||||
|
|
||||||
def align(self, reference: str, query: str, **associated_data):
|
|
||||||
work = self._thread_pool.submit(
|
|
||||||
self.work, reference, query, **associated_data)
|
|
||||||
work.add_done_callback(self._on_complete)
|
|
||||||
self._work_left.add(work)
|
|
||||||
|
|
||||||
def _on_complete(self, future: Future):
|
|
||||||
self._work_left.remove(future)
|
|
||||||
self._work_complete.put(future)
|
|
||||||
|
|
||||||
def work(self, reference, query, **associated_data):
|
|
||||||
alignments = self._aligner.align(reference, query)
|
|
||||||
top_alignment = alignments[0]
|
|
||||||
top_alignment_stats = top_alignment.counts()
|
|
||||||
top_alignment_gaps = top_alignment_stats.gaps
|
|
||||||
top_alignment_identities = top_alignment_stats.identities
|
|
||||||
top_alignment_mismatches = top_alignment_stats.mismatches
|
|
||||||
top_alignment_score = top_alignment.score # type: ignore
|
|
||||||
return PairwiseAlignment(
|
|
||||||
top_alignment.sequences[0],
|
|
||||||
top_alignment.sequences[1],
|
|
||||||
tuple(top_alignment.indices[0]),
|
|
||||||
tuple(top_alignment.indices[1]),
|
|
||||||
AlignmentStats(
|
|
||||||
percent_identity=top_alignment_identities/top_alignment.length,
|
|
||||||
mismatches=top_alignment_mismatches,
|
|
||||||
gaps=top_alignment_gaps,
|
|
||||||
match_metric=top_alignment_score
|
|
||||||
)), associated_data
|
|
||||||
|
|
||||||
async def next_completed(self) -> Union[tuple[PairwiseAlignment, dict[str, Any]], None]:
|
|
||||||
if self._work_complete.empty() and len(self._work_left):
|
|
||||||
return None
|
|
||||||
completed_alignment = await asyncio.wrap_future(self._work_complete.get())
|
|
||||||
return completed_alignment
|
|
||||||
|
|
||||||
def __exit__(self, exc_type, exc_value, traceback):
|
|
||||||
self.shutdown()
|
|
||||||
|
|
||||||
def __aiter__(self):
|
|
||||||
return self
|
|
||||||
|
|
||||||
async def __anext__(self):
|
|
||||||
result = await self.next_completed()
|
|
||||||
if result is None:
|
|
||||||
raise StopAsyncIteration
|
|
||||||
return result
|
|
||||||
|
|
||||||
def shutdown(self):
|
|
||||||
self._thread_pool.shutdown(wait=True, cancel_futures=True)
|
|
@ -1,26 +0,0 @@
|
|||||||
import asyncio
|
|
||||||
from contextlib import AbstractAsyncContextManager
|
|
||||||
import tempfile
|
|
||||||
from typing import Iterable, Union
|
|
||||||
from Bio import Entrez
|
|
||||||
from Bio import SeqIO
|
|
||||||
|
|
||||||
from autobigs.engine.structures.genomics import AnnotatedString, StringAnnotation
|
|
||||||
|
|
||||||
async def fetch_ncbi_genbank(genbank_id: str) -> AnnotatedString:
|
|
||||||
with (await asyncio.to_thread(Entrez.efetch, db="nucleotide", id=genbank_id, rettype="gb", retmode="text")) as fetch_stream:
|
|
||||||
record = SeqIO.read(fetch_stream, "genbank")
|
|
||||||
sequence_features = list()
|
|
||||||
for feature in record.features:
|
|
||||||
start = int(feature.location.start)
|
|
||||||
end = int(feature.location.end)
|
|
||||||
qualifiers = feature.qualifiers
|
|
||||||
for qualifier_key in qualifiers:
|
|
||||||
qualifiers[qualifier_key] = set(qualifiers[qualifier_key])
|
|
||||||
sequence_features.append(StringAnnotation(
|
|
||||||
type=feature.type,
|
|
||||||
start=start,
|
|
||||||
end=end+1, # Position is exclusive
|
|
||||||
feature_properties=qualifiers
|
|
||||||
))
|
|
||||||
return AnnotatedString(name=genbank_id, sequence=str(record.seq), annotations=sequence_features)
|
|
@ -1,42 +0,0 @@
|
|||||||
from Bio import SeqIO
|
|
||||||
from Bio.Align import PairwiseAligner
|
|
||||||
from pytest import mark
|
|
||||||
from pytest import fixture
|
|
||||||
from autobigs.engine.analysis.aligners import AsyncBiopythonPairwiseAlignmentEngine
|
|
||||||
from autobigs.engine.structures.alignment import PairwiseAlignment
|
|
||||||
|
|
||||||
@fixture
|
|
||||||
def tohamaI_bpertussis_adk():
|
|
||||||
return str(SeqIO.read("tests/resources/tohama_I_bpertussis_adk.fasta", format="fasta").seq)
|
|
||||||
|
|
||||||
@fixture
|
|
||||||
def tohamaI_bpertussis_genome():
|
|
||||||
return str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", format="fasta").seq)
|
|
||||||
|
|
||||||
@fixture
|
|
||||||
def fdaargos_1560_hinfluenza_adk():
|
|
||||||
return str(SeqIO.read("tests/resources/fdaargos_1560_hinfluenza_adk.fasta", format="fasta").seq)
|
|
||||||
|
|
||||||
@fixture
|
|
||||||
def fdaargos_1560_hinfluenza_genome():
|
|
||||||
return str(SeqIO.read("tests/resources/fdaargos_1560_hinfluenza.fasta", format="fasta").seq)
|
|
||||||
|
|
||||||
|
|
||||||
@fixture(params=[1, 2])
|
|
||||||
def dummy_engine(request):
|
|
||||||
aligner = PairwiseAligner("blastn")
|
|
||||||
aligner.mode = "local"
|
|
||||||
with AsyncBiopythonPairwiseAlignmentEngine(aligner, request.param) as engine:
|
|
||||||
yield engine
|
|
||||||
|
|
||||||
class TestAsyncPairwiseAlignmentEngine:
|
|
||||||
async def test_single_alignment_no_errors_single_alignment(self, tohamaI_bpertussis_genome, tohamaI_bpertussis_adk: str, dummy_engine: AsyncBiopythonPairwiseAlignmentEngine):
|
|
||||||
dummy_engine.align(tohamaI_bpertussis_genome, tohamaI_bpertussis_adk)
|
|
||||||
async for alignment, additional_information in dummy_engine:
|
|
||||||
assert isinstance(alignment, PairwiseAlignment)
|
|
||||||
|
|
||||||
async def test_single_alignment_no_errors_multiple(self, tohamaI_bpertussis_genome, tohamaI_bpertussis_adk, fdaargos_1560_hinfluenza_genome, fdaargos_1560_hinfluenza_adk, dummy_engine: AsyncBiopythonPairwiseAlignmentEngine):
|
|
||||||
dummy_engine.align(tohamaI_bpertussis_genome, tohamaI_bpertussis_adk)
|
|
||||||
dummy_engine.align(fdaargos_1560_hinfluenza_genome, fdaargos_1560_hinfluenza_adk)
|
|
||||||
async for alignment, additional_information in dummy_engine:
|
|
||||||
assert isinstance(alignment, PairwiseAlignment)
|
|
@ -61,12 +61,12 @@ hinfluenzae_fdaargos_profile = MLSTProfile((
|
|||||||
), "3", "ST-3 complex")
|
), "3", "ST-3 complex")
|
||||||
|
|
||||||
hinfluenzae_fdaargos_bad_profile = MLSTProfile((
|
hinfluenzae_fdaargos_bad_profile = MLSTProfile((
|
||||||
Allele("adk", "1", None),
|
Allele("adk", "3", None),
|
||||||
Allele("atpG", "1", None),
|
Allele("atpG", "121", None),
|
||||||
Allele("frdB", "1", None),
|
Allele("frdB", "6", None),
|
||||||
Allele("fucK", "1", None),
|
Allele("fucK", "5", None),
|
||||||
Allele("mdh", "1", None),
|
Allele("mdh", "12", None),
|
||||||
Allele("pgi", "1", None),
|
Allele("pgi", "4", None),
|
||||||
Allele("recA", "5", None)
|
Allele("recA", "5", None)
|
||||||
), "3", "ST-3 complex")
|
), "3", "ST-3 complex")
|
||||||
|
|
||||||
@ -76,7 +76,7 @@ hinfluenzae_fdaargos_fragmented_sequence = tuple(SeqIO.parse("tests/resources/to
|
|||||||
|
|
||||||
@pytest.mark.parametrize("local_db,database_api,database_name,schema_id,seq_path,feature_seqs_path,expected_profile,bad_profile", [
|
@pytest.mark.parametrize("local_db,database_api,database_name,schema_id,seq_path,feature_seqs_path,expected_profile,bad_profile", [
|
||||||
(False, "https://bigsdb.pasteur.fr/api", "pubmlst_bordetella_seqdef", 3, "tohama_I_bpertussis.fasta", "tohama_I_bpertussis_features.fasta", bpertussis_tohamaI_profile, bpertussis_tohamaI_bad_profile),
|
(False, "https://bigsdb.pasteur.fr/api", "pubmlst_bordetella_seqdef", 3, "tohama_I_bpertussis.fasta", "tohama_I_bpertussis_features.fasta", bpertussis_tohamaI_profile, bpertussis_tohamaI_bad_profile),
|
||||||
(False, "https://bigsdb.pasteur.fr/api", "pubmlst_hinfluenzae_seqdef", 1, "fdaargos_1560_hinfluenza.fasta", "fdaargos_1560_hinfluenza_features.fasta", hinfluenzae_fdaargos_profile, hinfluenzae_fdaargos_bad_profile),
|
(False, "https://rest.pubmlst.org", "pubmlst_hinfluenzae_seqdef", 1, "fdaargos_1560_hinfluenza.fasta", "fdaargos_1560_hinfluenza_features.fasta", hinfluenzae_fdaargos_profile, hinfluenzae_fdaargos_bad_profile),
|
||||||
])
|
])
|
||||||
class TestBIGSdbMLSTProfiler:
|
class TestBIGSdbMLSTProfiler:
|
||||||
async def test_profiling_results_in_exact_matches_when_exact(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
async def test_profiling_results_in_exact_matches_when_exact(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user