Compare commits

...

7 Commits

Author SHA1 Message Date
11f97e89d5 Added setuptools-scm
All checks were successful
NSBDiagnosisToolkit/pipeline/head This commit looks good
2025-01-03 23:21:57 +00:00
f3d152b5fa CLI CSV now outputs original FASTA record ID
All checks were successful
NSBDiagnosisToolkit/pipeline/head This commit looks good
2025-01-03 23:09:02 +00:00
f5608b33f9 Removed generated file
All checks were successful
NSBDiagnosisToolkit/pipeline/head This commit looks good
2025-01-03 22:51:38 +00:00
18b003e0e2 No more error when output folder already exists
All checks were successful
NSBDiagnosisToolkit/pipeline/head This commit looks good
2025-01-03 22:49:00 +00:00
afbd883e3e Updated credentials ID
All checks were successful
NSBDiagnosisToolkit/pipeline/head This commit looks good
2025-01-03 21:30:02 +00:00
cb8adac970 Added twine to requirements
Some checks failed
NSBDiagnosisToolkit/pipeline/head There was a failure building this commit
2025-01-03 20:54:49 +00:00
fe1112ed91 Added a README.md 2025-01-03 20:52:57 +00:00
10 changed files with 37 additions and 23 deletions

2
.gitignore vendored
View File

@@ -356,4 +356,4 @@ package
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option) # Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
src/output.csv output

2
.vscode/launch.json vendored
View File

@@ -6,7 +6,7 @@
"configurations": [ "configurations": [
{ {
"name": "Python Debugger: Current File with Arguments", "name": "CLI ipdbmlst",
"type": "debugpy", "type": "debugpy",
"request": "launch", "request": "launch",
"program": "${workspaceFolder}/src/nsbdiagnosistoolkit/cli/root.py", "program": "${workspaceFolder}/src/nsbdiagnosistoolkit/cli/root.py",

2
Jenkinsfile vendored
View File

@@ -37,7 +37,7 @@ pipeline {
} }
stage("publish") { stage("publish") {
environment { environment {
CREDS = credentials('git.reslate.systems/ydeng') CREDS = credentials('4d6f64be-d26d-4f95-8de3-b6a9b0beb311')
} }
when { when {
branch '**/main' branch '**/main'

13
README.md Normal file
View File

@@ -0,0 +1,13 @@
# NSBDiagnosis Toolkit
A software suite automating the diagnostic steps from Sanger Trace Files (ABIFs) and FASTAs. Namely, this tool is (or will be) capable of:
- [ ] Improved consensus conflict resolution via a peak strength metric
- [ ] Automatic reporting on variations from a reference file
- [x] Automatic annotating based off pre-existing GenBank data from NCBI
- [x] Automatic pulling GenBank Data
- [x] Automatic querying of Institut Pasteur's MLST databases
- [x] Automatic export of spreadsheet file in the form of a CSV
- [x] User friendly, and automatable (Galaxy compatible) command-line interface
- [ ] Interactive Web UI

View File

@@ -1,2 +0,0 @@
st,clonal-complex,fumC,pepA,icd,adk,pgm,tyrB,glyA
1,ST-2 complex,['1'],['1'],['1'],['1'],['1'],['1'],['1']
1 st clonal-complex fumC pepA icd adk pgm tyrB glyA
2 1 ST-2 complex ['1'] ['1'] ['1'] ['1'] ['1'] ['1'] ['1']

View File

@@ -1,5 +1,5 @@
[build-system] [build-system]
requires = ["setuptools >= 61.0"] requires = ["setuptools>=64", "setuptools-scm>=8"]
build-backend = "setuptools.build_meta" build-backend = "setuptools.build_meta"
[project] [project]

View File

@@ -3,3 +3,4 @@ biopython
pytest pytest
pytest-asyncio pytest-asyncio
build build
twine

View File

@@ -7,17 +7,15 @@ from nsbdiagnosistoolkit.engine.local.fasta import read_fasta
from nsbdiagnosistoolkit.engine.remote.databases.institutpasteur.profiling import InstitutPasteurProfiler from nsbdiagnosistoolkit.engine.remote.databases.institutpasteur.profiling import InstitutPasteurProfiler
async def aggregate_sequences(fastas: Iterable[str], abifs: Iterable[str]) -> AsyncGenerator[str, Any]: async def read_all_fastas(fastas: Iterable[str]) -> AsyncGenerator[NamedString, Any]:
for fasta_path in fastas: for fasta_path in fastas:
async for fasta in read_fasta(fasta_path): async for fasta in read_fasta(fasta_path):
yield fasta.sequence yield fasta
for abif_path in abifs:
abif_data = await read_abif(abif_path)
yield "".join(abif_data.sequence)
async def profile_all_genetic_strings(strings: AsyncIterable[str], database_name: str) -> Sequence[MLSTProfile]:
async def profile_all_genetic_strings(strings: AsyncIterable[NamedString], database_name: str) -> Sequence[tuple[str, MLSTProfile]]:
profiles = list() profiles = list()
async with InstitutPasteurProfiler(database_name=database_name) as profiler: async with InstitutPasteurProfiler(database_name=database_name) as profiler:
async for string in strings: async for named_string in strings:
profiles.append(await profiler.profile_string(string)) profiles.append((named_string.name, await profiler.profile_string(named_string.sequence)))
return profiles return profiles

View File

@@ -57,13 +57,16 @@ parser.add_argument(
def cli(): def cli():
args = parser.parse_args() args = parser.parse_args()
gen_strings = aggregator.aggregate_sequences(args.fastas, args.abifs) gen_strings = aggregator.read_all_fastas(args.fastas)
os.makedirs(args.out) os.makedirs(args.out, exist_ok=True)
if args.institut_pasteur_db is not None: if args.institut_pasteur_db is not None:
mlst_profiles = aggregator.profile_all_genetic_strings( mlst_profiles = aggregator.profile_all_genetic_strings(
gen_strings, args.institut_pasteur_db) gen_strings, args.institut_pasteur_db)
asyncio.run(write_mlst_profiles_as_csv( asyncio.run(write_mlst_profiles_as_csv(
asyncio.run(mlst_profiles), str(path.join(args.out, "MLST_" + args.run_name + ".csv")))) asyncio.run(mlst_profiles),
str(path.join(args.out, "MLST_" + args.run_name + ".csv")
)
))
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -1,7 +1,7 @@
import csv import csv
from io import TextIOWrapper from io import TextIOWrapper
from os import PathLike from os import PathLike
from typing import AsyncIterable, Iterable, Mapping, Sequence, Union from typing import AsyncIterable, Iterable, Mapping, Sequence, Tuple, Union
from nsbdiagnosistoolkit.engine.data.MLST import Allele, MLSTProfile from nsbdiagnosistoolkit.engine.data.MLST import Allele, MLSTProfile
@@ -15,16 +15,17 @@ def loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Allele]])
return result_dict return result_dict
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: Iterable[MLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]): async def write_mlst_profiles_as_csv(mlst_profiles_iterable: Iterable[tuple[str, MLSTProfile]], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]):
mlst_profiles = list(mlst_profiles_iterable) mlst_profiles = list(mlst_profiles_iterable)
header = ["st", "clonal-complex", *mlst_profiles[0].alleles.keys()] header = ["name", "st", "clonal-complex", *mlst_profiles[0][1].alleles.keys()]
with open(handle, "w", newline='') as filehandle: with open(handle, "w", newline='') as filehandle:
writer = csv.DictWriter(filehandle, fieldnames=header) writer = csv.DictWriter(filehandle, fieldnames=header)
writer.writeheader() writer.writeheader()
for mlst_profile in mlst_profiles: for name, mlst_profile in mlst_profiles:
row_dictionary = { row_dictionary = {
"st": mlst_profile.sequence_type, "st": mlst_profile.sequence_type,
"clonal-complex": mlst_profile.clonal_complex, "clonal-complex": mlst_profile.clonal_complex,
"name": name,
**loci_alleles_variants_from_loci(mlst_profile.alleles) **loci_alleles_variants_from_loci(mlst_profile.alleles)
} }