Merge branch 'features/improved-oop-architecture' into features/non-exact-notation
This commit is contained in:
		| @@ -1,70 +0,0 @@ | |||||||
| import asyncio |  | ||||||
| from concurrent.futures import Future, ThreadPoolExecutor |  | ||||||
| from contextlib import AbstractContextManager |  | ||||||
| from typing import Any, Set, Union |  | ||||||
| from Bio.Align import PairwiseAligner |  | ||||||
| from queue import Queue |  | ||||||
|  |  | ||||||
| from autobigs.engine.structures.alignment import AlignmentStats, PairwiseAlignment |  | ||||||
|  |  | ||||||
| class AsyncBiopythonPairwiseAlignmentEngine(AbstractContextManager): |  | ||||||
|     def __enter__(self): |  | ||||||
|         self._thread_pool = ThreadPoolExecutor(self._max_threads, thread_name_prefix="async-pairwise-alignment") |  | ||||||
|         return self |  | ||||||
|  |  | ||||||
|     def __init__(self, aligner: PairwiseAligner, max_threads: int = 4): |  | ||||||
|         self._max_threads = max_threads |  | ||||||
|         self._aligner = aligner |  | ||||||
|         self._work_left: Set[Future] = set() |  | ||||||
|         self._work_complete: Queue[Future] = Queue() |  | ||||||
|  |  | ||||||
|     def align(self, reference: str, query: str, **associated_data): |  | ||||||
|         work = self._thread_pool.submit( |  | ||||||
|             self.work, reference, query, **associated_data) |  | ||||||
|         work.add_done_callback(self._on_complete) |  | ||||||
|         self._work_left.add(work) |  | ||||||
|          |  | ||||||
|     def _on_complete(self, future: Future): |  | ||||||
|         self._work_left.remove(future) |  | ||||||
|         self._work_complete.put(future) |  | ||||||
|  |  | ||||||
|     def work(self, reference, query, **associated_data): |  | ||||||
|         alignments = self._aligner.align(reference, query) |  | ||||||
|         top_alignment = alignments[0] |  | ||||||
|         top_alignment_stats = top_alignment.counts() |  | ||||||
|         top_alignment_gaps = top_alignment_stats.gaps |  | ||||||
|         top_alignment_identities = top_alignment_stats.identities |  | ||||||
|         top_alignment_mismatches = top_alignment_stats.mismatches |  | ||||||
|         top_alignment_score = top_alignment.score # type: ignore |  | ||||||
|         return PairwiseAlignment( |  | ||||||
|             top_alignment.sequences[0], |  | ||||||
|             top_alignment.sequences[1], |  | ||||||
|             tuple(top_alignment.indices[0]), |  | ||||||
|             tuple(top_alignment.indices[1]), |  | ||||||
|             AlignmentStats( |  | ||||||
|                 percent_identity=top_alignment_identities/top_alignment.length, |  | ||||||
|                 mismatches=top_alignment_mismatches, |  | ||||||
|                 gaps=top_alignment_gaps, |  | ||||||
|                 match_metric=top_alignment_score |  | ||||||
|             )), associated_data |  | ||||||
|  |  | ||||||
|     async def next_completed(self) -> Union[tuple[PairwiseAlignment, dict[str, Any]], None]: |  | ||||||
|         if self._work_complete.empty() and len(self._work_left): |  | ||||||
|             return None |  | ||||||
|         completed_alignment = await asyncio.wrap_future(self._work_complete.get()) |  | ||||||
|         return completed_alignment |  | ||||||
|  |  | ||||||
|     def __exit__(self, exc_type, exc_value, traceback): |  | ||||||
|         self.shutdown() |  | ||||||
|  |  | ||||||
|     def __aiter__(self): |  | ||||||
|         return self |  | ||||||
|      |  | ||||||
|     async def __anext__(self): |  | ||||||
|         result = await self.next_completed() |  | ||||||
|         if result is None: |  | ||||||
|             raise StopAsyncIteration |  | ||||||
|         return result |  | ||||||
|  |  | ||||||
|     def shutdown(self): |  | ||||||
|         self._thread_pool.shutdown(wait=True, cancel_futures=True) |  | ||||||
| @@ -1,26 +0,0 @@ | |||||||
| import asyncio |  | ||||||
| from contextlib import AbstractAsyncContextManager |  | ||||||
| import tempfile |  | ||||||
| from typing import Iterable, Union |  | ||||||
| from Bio import Entrez |  | ||||||
| from Bio import SeqIO |  | ||||||
|  |  | ||||||
| from autobigs.engine.structures.genomics import AnnotatedString, StringAnnotation |  | ||||||
|  |  | ||||||
| async def fetch_ncbi_genbank(genbank_id: str) -> AnnotatedString: |  | ||||||
|     with (await asyncio.to_thread(Entrez.efetch, db="nucleotide", id=genbank_id, rettype="gb", retmode="text")) as fetch_stream: |  | ||||||
|         record = SeqIO.read(fetch_stream, "genbank") |  | ||||||
|         sequence_features = list() |  | ||||||
|         for feature in record.features: |  | ||||||
|             start = int(feature.location.start) |  | ||||||
|             end = int(feature.location.end) |  | ||||||
|             qualifiers = feature.qualifiers |  | ||||||
|             for qualifier_key in qualifiers: |  | ||||||
|                 qualifiers[qualifier_key] = set(qualifiers[qualifier_key]) |  | ||||||
|             sequence_features.append(StringAnnotation( |  | ||||||
|                 type=feature.type, |  | ||||||
|                 start=start, |  | ||||||
|                 end=end+1,  # Position is exclusive |  | ||||||
|                 feature_properties=qualifiers |  | ||||||
|             )) |  | ||||||
|         return AnnotatedString(name=genbank_id, sequence=str(record.seq), annotations=sequence_features) |  | ||||||
| @@ -1,42 +0,0 @@ | |||||||
| from Bio import SeqIO |  | ||||||
| from Bio.Align import PairwiseAligner |  | ||||||
| from pytest import mark |  | ||||||
| from pytest import fixture |  | ||||||
| from autobigs.engine.analysis.aligners import AsyncBiopythonPairwiseAlignmentEngine |  | ||||||
| from autobigs.engine.structures.alignment import PairwiseAlignment |  | ||||||
|  |  | ||||||
| @fixture |  | ||||||
| def tohamaI_bpertussis_adk(): |  | ||||||
|     return str(SeqIO.read("tests/resources/tohama_I_bpertussis_adk.fasta", format="fasta").seq) |  | ||||||
|  |  | ||||||
| @fixture |  | ||||||
| def tohamaI_bpertussis_genome(): |  | ||||||
|     return str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", format="fasta").seq) |  | ||||||
|  |  | ||||||
| @fixture |  | ||||||
| def fdaargos_1560_hinfluenza_adk(): |  | ||||||
|     return str(SeqIO.read("tests/resources/fdaargos_1560_hinfluenza_adk.fasta", format="fasta").seq) |  | ||||||
|  |  | ||||||
| @fixture |  | ||||||
| def fdaargos_1560_hinfluenza_genome(): |  | ||||||
|     return str(SeqIO.read("tests/resources/fdaargos_1560_hinfluenza.fasta", format="fasta").seq) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| @fixture(params=[1, 2]) |  | ||||||
| def dummy_engine(request): |  | ||||||
|     aligner = PairwiseAligner("blastn") |  | ||||||
|     aligner.mode = "local" |  | ||||||
|     with AsyncBiopythonPairwiseAlignmentEngine(aligner, request.param) as engine: |  | ||||||
|         yield engine |  | ||||||
|  |  | ||||||
| class TestAsyncPairwiseAlignmentEngine: |  | ||||||
|     async def test_single_alignment_no_errors_single_alignment(self, tohamaI_bpertussis_genome, tohamaI_bpertussis_adk: str, dummy_engine: AsyncBiopythonPairwiseAlignmentEngine): |  | ||||||
|         dummy_engine.align(tohamaI_bpertussis_genome, tohamaI_bpertussis_adk) |  | ||||||
|         async for alignment, additional_information in dummy_engine: |  | ||||||
|             assert isinstance(alignment, PairwiseAlignment) |  | ||||||
|  |  | ||||||
|     async def test_single_alignment_no_errors_multiple(self, tohamaI_bpertussis_genome, tohamaI_bpertussis_adk, fdaargos_1560_hinfluenza_genome, fdaargos_1560_hinfluenza_adk, dummy_engine: AsyncBiopythonPairwiseAlignmentEngine): |  | ||||||
|         dummy_engine.align(tohamaI_bpertussis_genome, tohamaI_bpertussis_adk) |  | ||||||
|         dummy_engine.align(fdaargos_1560_hinfluenza_genome, fdaargos_1560_hinfluenza_adk) |  | ||||||
|         async for alignment, additional_information in dummy_engine: |  | ||||||
|             assert isinstance(alignment, PairwiseAlignment) |  | ||||||
| @@ -61,12 +61,12 @@ hinfluenzae_fdaargos_profile = MLSTProfile(( | |||||||
|     ), "3", "ST-3 complex") |     ), "3", "ST-3 complex") | ||||||
|  |  | ||||||
| hinfluenzae_fdaargos_bad_profile = MLSTProfile(( | hinfluenzae_fdaargos_bad_profile = MLSTProfile(( | ||||||
|         Allele("adk", "1", None), |         Allele("adk", "3", None), | ||||||
|         Allele("atpG", "1", None), |         Allele("atpG", "121", None), | ||||||
|         Allele("frdB", "1", None), |         Allele("frdB", "6", None), | ||||||
|         Allele("fucK", "1", None), |         Allele("fucK", "5", None), | ||||||
|         Allele("mdh", "1", None), |         Allele("mdh", "12", None), | ||||||
|         Allele("pgi", "1", None), |         Allele("pgi", "4", None), | ||||||
|         Allele("recA", "5", None) |         Allele("recA", "5", None) | ||||||
|     ), "3", "ST-3 complex") |     ), "3", "ST-3 complex") | ||||||
|  |  | ||||||
| @@ -76,7 +76,7 @@ hinfluenzae_fdaargos_fragmented_sequence = tuple(SeqIO.parse("tests/resources/to | |||||||
|  |  | ||||||
| @pytest.mark.parametrize("local_db,database_api,database_name,schema_id,seq_path,feature_seqs_path,expected_profile,bad_profile", [ | @pytest.mark.parametrize("local_db,database_api,database_name,schema_id,seq_path,feature_seqs_path,expected_profile,bad_profile", [ | ||||||
|     (False, "https://bigsdb.pasteur.fr/api", "pubmlst_bordetella_seqdef", 3, "tohama_I_bpertussis.fasta", "tohama_I_bpertussis_features.fasta", bpertussis_tohamaI_profile, bpertussis_tohamaI_bad_profile), |     (False, "https://bigsdb.pasteur.fr/api", "pubmlst_bordetella_seqdef", 3, "tohama_I_bpertussis.fasta", "tohama_I_bpertussis_features.fasta", bpertussis_tohamaI_profile, bpertussis_tohamaI_bad_profile), | ||||||
|     (False, "https://bigsdb.pasteur.fr/api", "pubmlst_hinfluenzae_seqdef", 1, "fdaargos_1560_hinfluenza.fasta", "fdaargos_1560_hinfluenza_features.fasta", hinfluenzae_fdaargos_profile, hinfluenzae_fdaargos_bad_profile), |     (False, "https://rest.pubmlst.org", "pubmlst_hinfluenzae_seqdef", 1, "fdaargos_1560_hinfluenza.fasta", "fdaargos_1560_hinfluenza_features.fasta", hinfluenzae_fdaargos_profile, hinfluenzae_fdaargos_bad_profile), | ||||||
| ]) | ]) | ||||||
| class TestBIGSdbMLSTProfiler: | class TestBIGSdbMLSTProfiler: | ||||||
|     async def test_profiling_results_in_exact_matches_when_exact(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile): |     async def test_profiling_results_in_exact_matches_when_exact(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user