Compare commits
7 Commits
0a37bb5176
...
develop
Author | SHA1 | Date | |
---|---|---|---|
11f97e89d5 | |||
f3d152b5fa | |||
f5608b33f9 | |||
18b003e0e2 | |||
afbd883e3e | |||
cb8adac970 | |||
fe1112ed91 |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -356,4 +356,4 @@ package
|
|||||||
|
|
||||||
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
|
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
|
||||||
|
|
||||||
src/output.csv
|
output
|
||||||
|
2
.vscode/launch.json
vendored
2
.vscode/launch.json
vendored
@@ -6,7 +6,7 @@
|
|||||||
"configurations": [
|
"configurations": [
|
||||||
|
|
||||||
{
|
{
|
||||||
"name": "Python Debugger: Current File with Arguments",
|
"name": "CLI ipdbmlst",
|
||||||
"type": "debugpy",
|
"type": "debugpy",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"program": "${workspaceFolder}/src/nsbdiagnosistoolkit/cli/root.py",
|
"program": "${workspaceFolder}/src/nsbdiagnosistoolkit/cli/root.py",
|
||||||
|
2
Jenkinsfile
vendored
2
Jenkinsfile
vendored
@@ -37,7 +37,7 @@ pipeline {
|
|||||||
}
|
}
|
||||||
stage("publish") {
|
stage("publish") {
|
||||||
environment {
|
environment {
|
||||||
CREDS = credentials('git.reslate.systems/ydeng')
|
CREDS = credentials('4d6f64be-d26d-4f95-8de3-b6a9b0beb311')
|
||||||
}
|
}
|
||||||
when {
|
when {
|
||||||
branch '**/main'
|
branch '**/main'
|
||||||
|
13
README.md
Normal file
13
README.md
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
# NSBDiagnosis Toolkit
|
||||||
|
|
||||||
|
A software suite automating the diagnostic steps from Sanger Trace Files (ABIFs) and FASTAs. Namely, this tool is (or will be) capable of:
|
||||||
|
|
||||||
|
- [ ] Improved consensus conflict resolution via a peak strength metric
|
||||||
|
- [ ] Automatic reporting on variations from a reference file
|
||||||
|
- [x] Automatic annotating based off pre-existing GenBank data from NCBI
|
||||||
|
- [x] Automatic pulling GenBank Data
|
||||||
|
- [x] Automatic querying of Institut Pasteur's MLST databases
|
||||||
|
- [x] Automatic export of spreadsheet file in the form of a CSV
|
||||||
|
- [x] User friendly, and automatable (Galaxy compatible) command-line interface
|
||||||
|
- [ ] Interactive Web UI
|
||||||
|
|
@@ -1,2 +0,0 @@
|
|||||||
st,clonal-complex,fumC,pepA,icd,adk,pgm,tyrB,glyA
|
|
||||||
1,ST-2 complex,['1'],['1'],['1'],['1'],['1'],['1'],['1']
|
|
|
@@ -1,5 +1,5 @@
|
|||||||
[build-system]
|
[build-system]
|
||||||
requires = ["setuptools >= 61.0"]
|
requires = ["setuptools>=64", "setuptools-scm>=8"]
|
||||||
build-backend = "setuptools.build_meta"
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
|
@@ -2,4 +2,5 @@ aiohttp[speedups]
|
|||||||
biopython
|
biopython
|
||||||
pytest
|
pytest
|
||||||
pytest-asyncio
|
pytest-asyncio
|
||||||
build
|
build
|
||||||
|
twine
|
@@ -7,17 +7,15 @@ from nsbdiagnosistoolkit.engine.local.fasta import read_fasta
|
|||||||
from nsbdiagnosistoolkit.engine.remote.databases.institutpasteur.profiling import InstitutPasteurProfiler
|
from nsbdiagnosistoolkit.engine.remote.databases.institutpasteur.profiling import InstitutPasteurProfiler
|
||||||
|
|
||||||
|
|
||||||
async def aggregate_sequences(fastas: Iterable[str], abifs: Iterable[str]) -> AsyncGenerator[str, Any]:
|
async def read_all_fastas(fastas: Iterable[str]) -> AsyncGenerator[NamedString, Any]:
|
||||||
for fasta_path in fastas:
|
for fasta_path in fastas:
|
||||||
async for fasta in read_fasta(fasta_path):
|
async for fasta in read_fasta(fasta_path):
|
||||||
yield fasta.sequence
|
yield fasta
|
||||||
for abif_path in abifs:
|
|
||||||
abif_data = await read_abif(abif_path)
|
|
||||||
yield "".join(abif_data.sequence)
|
|
||||||
|
|
||||||
async def profile_all_genetic_strings(strings: AsyncIterable[str], database_name: str) -> Sequence[MLSTProfile]:
|
|
||||||
|
async def profile_all_genetic_strings(strings: AsyncIterable[NamedString], database_name: str) -> Sequence[tuple[str, MLSTProfile]]:
|
||||||
profiles = list()
|
profiles = list()
|
||||||
async with InstitutPasteurProfiler(database_name=database_name) as profiler:
|
async with InstitutPasteurProfiler(database_name=database_name) as profiler:
|
||||||
async for string in strings:
|
async for named_string in strings:
|
||||||
profiles.append(await profiler.profile_string(string))
|
profiles.append((named_string.name, await profiler.profile_string(named_string.sequence)))
|
||||||
return profiles
|
return profiles
|
@@ -57,14 +57,17 @@ parser.add_argument(
|
|||||||
|
|
||||||
def cli():
|
def cli():
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
gen_strings = aggregator.aggregate_sequences(args.fastas, args.abifs)
|
gen_strings = aggregator.read_all_fastas(args.fastas)
|
||||||
os.makedirs(args.out)
|
os.makedirs(args.out, exist_ok=True)
|
||||||
if args.institut_pasteur_db is not None:
|
if args.institut_pasteur_db is not None:
|
||||||
mlst_profiles = aggregator.profile_all_genetic_strings(
|
mlst_profiles = aggregator.profile_all_genetic_strings(
|
||||||
gen_strings, args.institut_pasteur_db)
|
gen_strings, args.institut_pasteur_db)
|
||||||
asyncio.run(write_mlst_profiles_as_csv(
|
asyncio.run(write_mlst_profiles_as_csv(
|
||||||
asyncio.run(mlst_profiles), str(path.join(args.out, "MLST_" + args.run_name + ".csv"))))
|
asyncio.run(mlst_profiles),
|
||||||
|
str(path.join(args.out, "MLST_" + args.run_name + ".csv")
|
||||||
|
)
|
||||||
|
))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
cli()
|
cli()
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
import csv
|
import csv
|
||||||
from io import TextIOWrapper
|
from io import TextIOWrapper
|
||||||
from os import PathLike
|
from os import PathLike
|
||||||
from typing import AsyncIterable, Iterable, Mapping, Sequence, Union
|
from typing import AsyncIterable, Iterable, Mapping, Sequence, Tuple, Union
|
||||||
|
|
||||||
from nsbdiagnosistoolkit.engine.data.MLST import Allele, MLSTProfile
|
from nsbdiagnosistoolkit.engine.data.MLST import Allele, MLSTProfile
|
||||||
|
|
||||||
@@ -15,16 +15,17 @@ def loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Allele]])
|
|||||||
return result_dict
|
return result_dict
|
||||||
|
|
||||||
|
|
||||||
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: Iterable[MLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]):
|
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: Iterable[tuple[str, MLSTProfile]], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]):
|
||||||
mlst_profiles = list(mlst_profiles_iterable)
|
mlst_profiles = list(mlst_profiles_iterable)
|
||||||
header = ["st", "clonal-complex", *mlst_profiles[0].alleles.keys()]
|
header = ["name", "st", "clonal-complex", *mlst_profiles[0][1].alleles.keys()]
|
||||||
with open(handle, "w", newline='') as filehandle:
|
with open(handle, "w", newline='') as filehandle:
|
||||||
writer = csv.DictWriter(filehandle, fieldnames=header)
|
writer = csv.DictWriter(filehandle, fieldnames=header)
|
||||||
writer.writeheader()
|
writer.writeheader()
|
||||||
for mlst_profile in mlst_profiles:
|
for name, mlst_profile in mlst_profiles:
|
||||||
row_dictionary = {
|
row_dictionary = {
|
||||||
"st": mlst_profile.sequence_type,
|
"st": mlst_profile.sequence_type,
|
||||||
"clonal-complex": mlst_profile.clonal_complex,
|
"clonal-complex": mlst_profile.clonal_complex,
|
||||||
|
"name": name,
|
||||||
**loci_alleles_variants_from_loci(mlst_profile.alleles)
|
**loci_alleles_variants_from_loci(mlst_profile.alleles)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user