Harrison Deng f3d152b5fa
All checks were successful
NSBDiagnosisToolkit/pipeline/head This commit looks good
CLI CSV now outputs original FASTA record ID
2025-01-03 23:09:02 +00:00

21 lines
1.0 KiB
Python

from os import path
from typing import Any, AsyncGenerator, AsyncIterable, Iterable, Sequence
from nsbdiagnosistoolkit.engine.data.MLST import MLSTProfile
from nsbdiagnosistoolkit.engine.data.genomics import NamedString
from nsbdiagnosistoolkit.engine.local.abif import read_abif
from nsbdiagnosistoolkit.engine.local.fasta import read_fasta
from nsbdiagnosistoolkit.engine.remote.databases.institutpasteur.profiling import InstitutPasteurProfiler
async def read_all_fastas(fastas: Iterable[str]) -> AsyncGenerator[NamedString, Any]:
for fasta_path in fastas:
async for fasta in read_fasta(fasta_path):
yield fasta
async def profile_all_genetic_strings(strings: AsyncIterable[NamedString], database_name: str) -> Sequence[tuple[str, MLSTProfile]]:
profiles = list()
async with InstitutPasteurProfiler(database_name=database_name) as profiler:
async for named_string in strings:
profiles.append((named_string.name, await profiler.profile_string(named_string.sequence)))
return profiles