Removed features being worked on in separate branch
This commit is contained in:
parent
f76bf86ef6
commit
09a693b696
@ -1,70 +0,0 @@
|
||||
import asyncio
|
||||
from concurrent.futures import Future, ThreadPoolExecutor
|
||||
from contextlib import AbstractContextManager
|
||||
from typing import Any, Set, Union
|
||||
from Bio.Align import PairwiseAligner
|
||||
from queue import Queue
|
||||
|
||||
from autobigs.engine.structures.alignment import AlignmentStats, PairwiseAlignment
|
||||
|
||||
class AsyncBiopythonPairwiseAlignmentEngine(AbstractContextManager):
|
||||
def __enter__(self):
|
||||
self._thread_pool = ThreadPoolExecutor(self._max_threads, thread_name_prefix="async-pairwise-alignment")
|
||||
return self
|
||||
|
||||
def __init__(self, aligner: PairwiseAligner, max_threads: int = 4):
|
||||
self._max_threads = max_threads
|
||||
self._aligner = aligner
|
||||
self._work_left: Set[Future] = set()
|
||||
self._work_complete: Queue[Future] = Queue()
|
||||
|
||||
def align(self, reference: str, query: str, **associated_data):
|
||||
work = self._thread_pool.submit(
|
||||
self.work, reference, query, **associated_data)
|
||||
work.add_done_callback(self._on_complete)
|
||||
self._work_left.add(work)
|
||||
|
||||
def _on_complete(self, future: Future):
|
||||
self._work_left.remove(future)
|
||||
self._work_complete.put(future)
|
||||
|
||||
def work(self, reference, query, **associated_data):
|
||||
alignments = self._aligner.align(reference, query)
|
||||
top_alignment = alignments[0]
|
||||
top_alignment_stats = top_alignment.counts()
|
||||
top_alignment_gaps = top_alignment_stats.gaps
|
||||
top_alignment_identities = top_alignment_stats.identities
|
||||
top_alignment_mismatches = top_alignment_stats.mismatches
|
||||
top_alignment_score = top_alignment.score # type: ignore
|
||||
return PairwiseAlignment(
|
||||
top_alignment.sequences[0],
|
||||
top_alignment.sequences[1],
|
||||
tuple(top_alignment.indices[0]),
|
||||
tuple(top_alignment.indices[1]),
|
||||
AlignmentStats(
|
||||
percent_identity=top_alignment_identities/top_alignment.length,
|
||||
mismatches=top_alignment_mismatches,
|
||||
gaps=top_alignment_gaps,
|
||||
match_metric=top_alignment_score
|
||||
)), associated_data
|
||||
|
||||
async def next_completed(self) -> Union[tuple[PairwiseAlignment, dict[str, Any]], None]:
|
||||
if self._work_complete.empty() and len(self._work_left):
|
||||
return None
|
||||
completed_alignment = await asyncio.wrap_future(self._work_complete.get())
|
||||
return completed_alignment
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
self.shutdown()
|
||||
|
||||
def __aiter__(self):
|
||||
return self
|
||||
|
||||
async def __anext__(self):
|
||||
result = await self.next_completed()
|
||||
if result is None:
|
||||
raise StopAsyncIteration
|
||||
return result
|
||||
|
||||
def shutdown(self):
|
||||
self._thread_pool.shutdown(wait=True, cancel_futures=True)
|
@ -1,26 +0,0 @@
|
||||
import asyncio
|
||||
from contextlib import AbstractAsyncContextManager
|
||||
import tempfile
|
||||
from typing import Iterable, Union
|
||||
from Bio import Entrez
|
||||
from Bio import SeqIO
|
||||
|
||||
from autobigs.engine.structures.genomics import AnnotatedString, StringAnnotation
|
||||
|
||||
async def fetch_ncbi_genbank(genbank_id: str) -> AnnotatedString:
|
||||
with (await asyncio.to_thread(Entrez.efetch, db="nucleotide", id=genbank_id, rettype="gb", retmode="text")) as fetch_stream:
|
||||
record = SeqIO.read(fetch_stream, "genbank")
|
||||
sequence_features = list()
|
||||
for feature in record.features:
|
||||
start = int(feature.location.start)
|
||||
end = int(feature.location.end)
|
||||
qualifiers = feature.qualifiers
|
||||
for qualifier_key in qualifiers:
|
||||
qualifiers[qualifier_key] = set(qualifiers[qualifier_key])
|
||||
sequence_features.append(StringAnnotation(
|
||||
type=feature.type,
|
||||
start=start,
|
||||
end=end+1, # Position is exclusive
|
||||
feature_properties=qualifiers
|
||||
))
|
||||
return AnnotatedString(name=genbank_id, sequence=str(record.seq), annotations=sequence_features)
|
@ -1,42 +0,0 @@
|
||||
from Bio import SeqIO
|
||||
from Bio.Align import PairwiseAligner
|
||||
from pytest import mark
|
||||
from pytest import fixture
|
||||
from autobigs.engine.analysis.aligners import AsyncBiopythonPairwiseAlignmentEngine
|
||||
from autobigs.engine.structures.alignment import PairwiseAlignment
|
||||
|
||||
@fixture
|
||||
def tohamaI_bpertussis_adk():
|
||||
return str(SeqIO.read("tests/resources/tohama_I_bpertussis_adk.fasta", format="fasta").seq)
|
||||
|
||||
@fixture
|
||||
def tohamaI_bpertussis_genome():
|
||||
return str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", format="fasta").seq)
|
||||
|
||||
@fixture
|
||||
def fdaargos_1560_hinfluenza_adk():
|
||||
return str(SeqIO.read("tests/resources/fdaargos_1560_hinfluenza_adk.fasta", format="fasta").seq)
|
||||
|
||||
@fixture
|
||||
def fdaargos_1560_hinfluenza_genome():
|
||||
return str(SeqIO.read("tests/resources/fdaargos_1560_hinfluenza.fasta", format="fasta").seq)
|
||||
|
||||
|
||||
@fixture(params=[1, 2])
|
||||
def dummy_engine(request):
|
||||
aligner = PairwiseAligner("blastn")
|
||||
aligner.mode = "local"
|
||||
with AsyncBiopythonPairwiseAlignmentEngine(aligner, request.param) as engine:
|
||||
yield engine
|
||||
|
||||
class TestAsyncPairwiseAlignmentEngine:
|
||||
async def test_single_alignment_no_errors_single_alignment(self, tohamaI_bpertussis_genome, tohamaI_bpertussis_adk: str, dummy_engine: AsyncBiopythonPairwiseAlignmentEngine):
|
||||
dummy_engine.align(tohamaI_bpertussis_genome, tohamaI_bpertussis_adk)
|
||||
async for alignment, additional_information in dummy_engine:
|
||||
assert isinstance(alignment, PairwiseAlignment)
|
||||
|
||||
async def test_single_alignment_no_errors_multiple(self, tohamaI_bpertussis_genome, tohamaI_bpertussis_adk, fdaargos_1560_hinfluenza_genome, fdaargos_1560_hinfluenza_adk, dummy_engine: AsyncBiopythonPairwiseAlignmentEngine):
|
||||
dummy_engine.align(tohamaI_bpertussis_genome, tohamaI_bpertussis_adk)
|
||||
dummy_engine.align(fdaargos_1560_hinfluenza_genome, fdaargos_1560_hinfluenza_adk)
|
||||
async for alignment, additional_information in dummy_engine:
|
||||
assert isinstance(alignment, PairwiseAlignment)
|
Loading…
x
Reference in New Issue
Block a user