Compare commits

...

4 Commits

Author SHA1 Message Date
11f97e89d5 Added setuptools-scm
All checks were successful
NSBDiagnosisToolkit/pipeline/head This commit looks good
2025-01-03 23:21:57 +00:00
f3d152b5fa CLI CSV now outputs original FASTA record ID
All checks were successful
NSBDiagnosisToolkit/pipeline/head This commit looks good
2025-01-03 23:09:02 +00:00
f5608b33f9 Removed generated file
All checks were successful
NSBDiagnosisToolkit/pipeline/head This commit looks good
2025-01-03 22:51:38 +00:00
18b003e0e2 No more error when output folder already exists
All checks were successful
NSBDiagnosisToolkit/pipeline/head This commit looks good
2025-01-03 22:49:00 +00:00
7 changed files with 21 additions and 21 deletions

2
.gitignore vendored
View File

@@ -356,4 +356,4 @@ package
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
src/output.csv
output

2
.vscode/launch.json vendored
View File

@@ -6,7 +6,7 @@
"configurations": [
{
"name": "Python Debugger: Current File with Arguments",
"name": "CLI ipdbmlst",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/src/nsbdiagnosistoolkit/cli/root.py",

View File

@@ -1,2 +0,0 @@
st,clonal-complex,fumC,pepA,icd,adk,pgm,tyrB,glyA
1,ST-2 complex,['1'],['1'],['1'],['1'],['1'],['1'],['1']
1 st clonal-complex fumC pepA icd adk pgm tyrB glyA
2 1 ST-2 complex ['1'] ['1'] ['1'] ['1'] ['1'] ['1'] ['1']

View File

@@ -1,5 +1,5 @@
[build-system]
requires = ["setuptools >= 61.0"]
requires = ["setuptools>=64", "setuptools-scm>=8"]
build-backend = "setuptools.build_meta"
[project]

View File

@@ -7,17 +7,15 @@ from nsbdiagnosistoolkit.engine.local.fasta import read_fasta
from nsbdiagnosistoolkit.engine.remote.databases.institutpasteur.profiling import InstitutPasteurProfiler
async def aggregate_sequences(fastas: Iterable[str], abifs: Iterable[str]) -> AsyncGenerator[str, Any]:
async def read_all_fastas(fastas: Iterable[str]) -> AsyncGenerator[NamedString, Any]:
for fasta_path in fastas:
async for fasta in read_fasta(fasta_path):
yield fasta.sequence
for abif_path in abifs:
abif_data = await read_abif(abif_path)
yield "".join(abif_data.sequence)
yield fasta
async def profile_all_genetic_strings(strings: AsyncIterable[str], database_name: str) -> Sequence[MLSTProfile]:
async def profile_all_genetic_strings(strings: AsyncIterable[NamedString], database_name: str) -> Sequence[tuple[str, MLSTProfile]]:
profiles = list()
async with InstitutPasteurProfiler(database_name=database_name) as profiler:
async for string in strings:
profiles.append(await profiler.profile_string(string))
async for named_string in strings:
profiles.append((named_string.name, await profiler.profile_string(named_string.sequence)))
return profiles

View File

@@ -57,14 +57,17 @@ parser.add_argument(
def cli():
args = parser.parse_args()
gen_strings = aggregator.aggregate_sequences(args.fastas, args.abifs)
os.makedirs(args.out)
gen_strings = aggregator.read_all_fastas(args.fastas)
os.makedirs(args.out, exist_ok=True)
if args.institut_pasteur_db is not None:
mlst_profiles = aggregator.profile_all_genetic_strings(
gen_strings, args.institut_pasteur_db)
asyncio.run(write_mlst_profiles_as_csv(
asyncio.run(mlst_profiles), str(path.join(args.out, "MLST_" + args.run_name + ".csv"))))
asyncio.run(mlst_profiles),
str(path.join(args.out, "MLST_" + args.run_name + ".csv")
)
))
if __name__ == "__main__":
cli()
cli()

View File

@@ -1,7 +1,7 @@
import csv
from io import TextIOWrapper
from os import PathLike
from typing import AsyncIterable, Iterable, Mapping, Sequence, Union
from typing import AsyncIterable, Iterable, Mapping, Sequence, Tuple, Union
from nsbdiagnosistoolkit.engine.data.MLST import Allele, MLSTProfile
@@ -15,16 +15,17 @@ def loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Allele]])
return result_dict
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: Iterable[MLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]):
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: Iterable[tuple[str, MLSTProfile]], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]):
mlst_profiles = list(mlst_profiles_iterable)
header = ["st", "clonal-complex", *mlst_profiles[0].alleles.keys()]
header = ["name", "st", "clonal-complex", *mlst_profiles[0][1].alleles.keys()]
with open(handle, "w", newline='') as filehandle:
writer = csv.DictWriter(filehandle, fieldnames=header)
writer.writeheader()
for mlst_profile in mlst_profiles:
for name, mlst_profile in mlst_profiles:
row_dictionary = {
"st": mlst_profile.sequence_type,
"clonal-complex": mlst_profile.clonal_complex,
"name": name,
**loci_alleles_variants_from_loci(mlst_profile.alleles)
}