Added setuptools-scm

CLI CSV now outputs original FASTA record ID
Removed generated file
2025-01-03 23:21:57 +00:00 · 2025-01-03 23:09:02 +00:00 · 2025-01-03 22:51:38 +00:00 · 2025-01-03 22:49:00 +00:00 · 2025-01-03 21:30:02 +00:00 · 2025-01-03 20:54:49 +00:00
10 changed files with 37 additions and 23 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -356,4 +356,4 @@ package
 # Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
-src/output.csv
+output
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -6,7 +6,7 @@
    "configurations": [
        {
-            "name": "Python Debugger: Current File with Arguments",
+            "name": "CLI ipdbmlst",
            "type": "debugpy",
            "request": "launch",
            "program": "${workspaceFolder}/src/nsbdiagnosistoolkit/cli/root.py",
--- a/2
+++ b/2
@@ -37,7 +37,7 @@ pipeline {
        }
        stage("publish") {
            environment {
-                CREDS = credentials('git.reslate.systems/ydeng')
+                CREDS = credentials('4d6f64be-d26d-4f95-8de3-b6a9b0beb311')
            }
            when {
                branch '**/main'
--- a/README.md
+++ b/README.md
@@ -0,0 +1,13 @@
 # NSBDiagnosis Toolkit
 A software suite automating the diagnostic steps from Sanger Trace Files (ABIFs) and FASTAs. Namely, this tool is (or will be) capable of:
 - [ ] Improved consensus conflict resolution via a peak strength metric
 - [ ] Automatic reporting on variations from a reference file
 - [x] Automatic annotating based off pre-existing GenBank data from NCBI
 - [x] Automatic pulling GenBank Data
 - [x] Automatic querying of Institut Pasteur's MLST databases
 - [x] Automatic export of spreadsheet file in the form of a CSV
 - [x] User friendly, and automatable (Galaxy compatible) command-line interface
 - [ ] Interactive Web UI
--- a/output/MLST_20250103204147.csv
+++ b/output/MLST_20250103204147.csv
@@ -1,2 +0,0 @@
 st,clonal-complex,fumC,pepA,icd,adk,pgm,tyrB,glyA
 1,ST-2 complex,['1'],['1'],['1'],['1'],['1'],['1'],['1']
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
 [build-system]
-requires = ["setuptools >= 61.0"]
+requires = ["setuptools>=64", "setuptools-scm>=8"]
 build-backend = "setuptools.build_meta"
 [project]
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,3 +3,4 @@ biopython
 pytest
 pytest-asyncio
 build
 twine
--- a/src/nsbdiagnosistoolkit/cli/aggregator.py
+++ b/src/nsbdiagnosistoolkit/cli/aggregator.py
@@ -7,17 +7,15 @@ from nsbdiagnosistoolkit.engine.local.fasta import read_fasta
 from nsbdiagnosistoolkit.engine.remote.databases.institutpasteur.profiling import InstitutPasteurProfiler
-async def aggregate_sequences(fastas: Iterable[str], abifs: Iterable[str]) -> AsyncGenerator[str, Any]:
+async def read_all_fastas(fastas: Iterable[str]) -> AsyncGenerator[NamedString, Any]:
    for fasta_path in fastas:
        async for fasta in read_fasta(fasta_path):
-            yield fasta.sequence
+            yield fasta
    for abif_path in abifs:
        abif_data = await read_abif(abif_path)
        yield "".join(abif_data.sequence)
-async def profile_all_genetic_strings(strings: AsyncIterable[str], database_name: str) -> Sequence[MLSTProfile]:
+
 async def profile_all_genetic_strings(strings: AsyncIterable[NamedString], database_name: str) -> Sequence[tuple[str, MLSTProfile]]:
    profiles = list()
    async with InstitutPasteurProfiler(database_name=database_name) as profiler:
-        async for string in strings:
+        async for named_string in strings:
-            profiles.append(await profiler.profile_string(string))
+            profiles.append((named_string.name, await profiler.profile_string(named_string.sequence)))
    return profiles
--- a/src/nsbdiagnosistoolkit/cli/root.py
+++ b/src/nsbdiagnosistoolkit/cli/root.py
@@ -57,13 +57,16 @@ parser.add_argument(
 def cli():
    args = parser.parse_args()
-    gen_strings = aggregator.aggregate_sequences(args.fastas, args.abifs)
+    gen_strings = aggregator.read_all_fastas(args.fastas)
-    os.makedirs(args.out)
+    os.makedirs(args.out, exist_ok=True)
    if args.institut_pasteur_db is not None:
        mlst_profiles = aggregator.profile_all_genetic_strings(
            gen_strings, args.institut_pasteur_db)
        asyncio.run(write_mlst_profiles_as_csv(
-            asyncio.run(mlst_profiles), str(path.join(args.out, "MLST_" + args.run_name + ".csv"))))
+            asyncio.run(mlst_profiles),
            str(path.join(args.out, "MLST_" + args.run_name + ".csv")
                )
        ))
 if __name__ == "__main__":
--- a/src/nsbdiagnosistoolkit/engine/local/csv.py
+++ b/src/nsbdiagnosistoolkit/engine/local/csv.py
@@ -1,7 +1,7 @@
 import csv
 from io import TextIOWrapper
 from os import PathLike
-from typing import AsyncIterable, Iterable, Mapping, Sequence, Union
+from typing import AsyncIterable, Iterable, Mapping, Sequence, Tuple, Union
 from nsbdiagnosistoolkit.engine.data.MLST import Allele, MLSTProfile
@@ -15,16 +15,17 @@ def loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Allele]])
    return result_dict
-async def write_mlst_profiles_as_csv(mlst_profiles_iterable: Iterable[MLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]):
+async def write_mlst_profiles_as_csv(mlst_profiles_iterable: Iterable[tuple[str, MLSTProfile]], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]):
    mlst_profiles = list(mlst_profiles_iterable)
-    header = ["st", "clonal-complex", *mlst_profiles[0].alleles.keys()]
+    header = ["name", "st", "clonal-complex", *mlst_profiles[0][1].alleles.keys()]
    with open(handle, "w", newline='') as filehandle:
        writer = csv.DictWriter(filehandle, fieldnames=header)
        writer.writeheader()
-        for mlst_profile in mlst_profiles:
+        for name, mlst_profile in mlst_profiles:
            row_dictionary = {
                "st": mlst_profile.sequence_type,
                "clonal-complex": mlst_profile.clonal_complex,
                "name": name,
                **loci_alleles_variants_from_loci(mlst_profile.alleles)
            }
Author	SHA1	Message	Date
Harrison Deng	11f97e89d5	Added setuptools-scm All checks were successful NSBDiagnosisToolkit/pipeline/head This commit looks good Details	2025-01-03 23:21:57 +00:00
Harrison Deng	f3d152b5fa	CLI CSV now outputs original FASTA record ID All checks were successful NSBDiagnosisToolkit/pipeline/head This commit looks good Details	2025-01-03 23:09:02 +00:00
Harrison Deng	f5608b33f9	Removed generated file All checks were successful NSBDiagnosisToolkit/pipeline/head This commit looks good Details	2025-01-03 22:51:38 +00:00
Harrison Deng	18b003e0e2	No more error when output folder already exists All checks were successful NSBDiagnosisToolkit/pipeline/head This commit looks good Details	2025-01-03 22:49:00 +00:00
Harrison Deng	afbd883e3e	Updated credentials ID All checks were successful NSBDiagnosisToolkit/pipeline/head This commit looks good Details	2025-01-03 21:30:02 +00:00
Harrison Deng	cb8adac970	Added twine to requirements Some checks failed NSBDiagnosisToolkit/pipeline/head There was a failure building this commit Details	2025-01-03 20:54:49 +00:00
Harrison Deng	fe1112ed91	Added a README.md	2025-01-03 20:52:57 +00:00
`@@ -356,4 +356,4 @@ package`

	`# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)`	`# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)`

	`src/output.csv`	`output`
		`@@ -1,2 +0,0 @@`
			`st,clonal-complex,fumC,pepA,icd,adk,pgm,tyrB,glyA`
			`1,ST-2 complex,['1'],['1'],['1'],['1'],['1'],['1'],['1']`