From d0318536b21ae7e73fbdd8baf96c3c7ac660657b Mon Sep 17 00:00:00 2001 From: Harrison Deng Date: Fri, 14 Feb 2025 14:35:53 +0000 Subject: [PATCH] Changed FASTA reading to group based on file for merging partial targets --- src/autobigs/engine/reading.py | 11 ++++++----- tests/autobigs/engine/test_reading.py | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/autobigs/engine/reading.py b/src/autobigs/engine/reading.py index 606cc74..6618427 100644 --- a/src/autobigs/engine/reading.py +++ b/src/autobigs/engine/reading.py @@ -5,12 +5,13 @@ from Bio import SeqIO from autobigs.engine.structures.genomics import NamedString -async def read_fasta(handle: Union[str, TextIOWrapper]) -> AsyncGenerator[NamedString, Any]: +async def read_fasta(handle: Union[str, TextIOWrapper]) -> Iterable[NamedString]: fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta") + results = [] for fasta_sequence in await fasta_sequences: - yield NamedString(fasta_sequence.id, str(fasta_sequence.seq)) + results.append(NamedString(fasta_sequence.id, str(fasta_sequence.seq))) + return results -async def read_multiple_fastas(handles: Iterable[Union[str, TextIOWrapper]]) -> AsyncGenerator[NamedString, Any]: +async def read_multiple_fastas(handles: Iterable[Union[str, TextIOWrapper]]) -> AsyncGenerator[Iterable[NamedString], Any]: for handle in handles: - async for named_seq in read_fasta(handle): - yield named_seq \ No newline at end of file + yield await read_fasta(handle) \ No newline at end of file diff --git a/tests/autobigs/engine/test_reading.py b/tests/autobigs/engine/test_reading.py index dc7e51c..0e9eed8 100644 --- a/tests/autobigs/engine/test_reading.py +++ b/tests/autobigs/engine/test_reading.py @@ -2,6 +2,6 @@ from autobigs.engine.reading import read_fasta async def test_fasta_reader_not_none(): - named_strings = read_fasta("tests/resources/tohama_I_bpertussis.fasta") - async for named_string in named_strings: + named_strings = await read_fasta("tests/resources/tohama_I_bpertussis.fasta") + for named_string in named_strings: assert named_string.name == "BX470248.1"