4 Commits

Author SHA1 Message Date
5a03c7e8d8 Multiple string profiling now respects grouped queries (for non-WGS)
All checks were successful
automlst.engine/pipeline/head This commit looks good
2025-02-18 15:34:18 +00:00
ddf9cde175 Added a license text to pyproject.toml 2025-02-14 20:47:06 +00:00
2e8cdd8da9 Updated URL links
All checks were successful
automlst.engine/pipeline/head This commit looks good
autoBIGS.engine/pipeline/tag This commit looks good
2025-02-14 20:37:13 +00:00
d0318536b2 Changed FASTA reading to group based on file for merging partial targets 2025-02-14 14:35:53 +00:00
4 changed files with 22 additions and 16 deletions

View File

@@ -13,11 +13,12 @@ dependencies = [
] ]
requires-python = ">=3.12" requires-python = ">=3.12"
description = "A library to rapidly fetch fetch MLST profiles given sequences for various diseases." description = "A library to rapidly fetch fetch MLST profiles given sequences for various diseases."
license = {text = "GPL-3.0-or-later"}
[project.urls] [project.urls]
Homepage = "https://github.com/RealYHD/autoBIGS.engine" Homepage = "https://github.com/Syph-and-VPD-Lab/autoBIGS.engine"
Source = "https://github.com/RealYHD/autoBIGS.engine" Source = "https://github.com/Syph-and-VPD-Lab/autoBIGS.engine"
Issues = "https://github.com/RealYHD/autoBIGS.engine/issues" Issues = "https://github.com/Syph-and-VPD-Lab/autoBIGS.engine/issues"
[tool.setuptools_scm] [tool.setuptools_scm]

View File

@@ -124,13 +124,17 @@ class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
async def profile_multiple_strings(self, query_named_string_groups: AsyncIterable[Iterable[NamedString]], stop_on_fail: bool = False) -> AsyncGenerator[NamedMLSTProfile, Any]: async def profile_multiple_strings(self, query_named_string_groups: AsyncIterable[Iterable[NamedString]], stop_on_fail: bool = False) -> AsyncGenerator[NamedMLSTProfile, Any]:
async for named_strings in query_named_string_groups: async for named_strings in query_named_string_groups:
names: list[str] = list()
sequences: list[str] = list()
for named_string in named_strings: for named_string in named_strings:
names.append(named_string.name)
sequences.append(named_string.sequence)
try: try:
yield NamedMLSTProfile(named_string.name, (await self.profile_string([named_string.sequence]))) yield NamedMLSTProfile("-".join(names), (await self.profile_string(sequences)))
except NoBIGSdbMatchesException as e: except NoBIGSdbMatchesException as e:
if stop_on_fail: if stop_on_fail:
raise e raise e
yield NamedMLSTProfile(named_string.name, None) yield NamedMLSTProfile("-".join(names), None)
async def close(self): async def close(self):
await self._http_client.close() await self._http_client.close()

View File

@@ -5,12 +5,13 @@ from Bio import SeqIO
from autobigs.engine.structures.genomics import NamedString from autobigs.engine.structures.genomics import NamedString
async def read_fasta(handle: Union[str, TextIOWrapper]) -> AsyncGenerator[NamedString, Any]: async def read_fasta(handle: Union[str, TextIOWrapper]) -> Iterable[NamedString]:
fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta") fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta")
results = []
for fasta_sequence in await fasta_sequences: for fasta_sequence in await fasta_sequences:
yield NamedString(fasta_sequence.id, str(fasta_sequence.seq)) results.append(NamedString(fasta_sequence.id, str(fasta_sequence.seq)))
return results
async def read_multiple_fastas(handles: Iterable[Union[str, TextIOWrapper]]) -> AsyncGenerator[NamedString, Any]: async def read_multiple_fastas(handles: Iterable[Union[str, TextIOWrapper]]) -> AsyncGenerator[Iterable[NamedString], Any]:
for handle in handles: for handle in handles:
async for named_seq in read_fasta(handle): yield await read_fasta(handle)
yield named_seq

View File

@@ -2,6 +2,6 @@ from autobigs.engine.reading import read_fasta
async def test_fasta_reader_not_none(): async def test_fasta_reader_not_none():
named_strings = read_fasta("tests/resources/tohama_I_bpertussis.fasta") named_strings = await read_fasta("tests/resources/tohama_I_bpertussis.fasta")
async for named_string in named_strings: for named_string in named_strings:
assert named_string.name == "BX470248.1" assert named_string.name == "BX470248.1"