Moved to a general BIGSdb implementation

Updated tests

Removed ABIF UI for the time being

Began updating CLI
This commit is contained in:
2025-01-08 21:32:10 +00:00
parent 645357ac58
commit 42d0f56b18
20 changed files with 403 additions and 414 deletions

View File

View File

@@ -1,23 +0,0 @@
from os import path
from typing import Any, AsyncGenerator, AsyncIterable, Iterable, Sequence
from automlst.engine.data.mlst import MLSTProfile
from automlst.engine.data.genomics import NamedString
from automlst.engine.local.abif import read_abif
from automlst.engine.local.fasta import read_fasta
from automlst.engine.remote.databases.institutpasteur.mlst import InstitutPasteurProfiler
async def aggregate_sequences(fastas: Iterable[str], abifs: Iterable[str]) -> AsyncGenerator[str, Any]:
for fasta_path in fastas:
async for fasta in read_fasta(fasta_path):
yield fasta.sequence
for abif_path in abifs:
abif_data = await read_abif(abif_path)
yield "".join(abif_data.sequence)
async def profile_all_genetic_strings(strings: AsyncIterable[str], database_name: str) -> Sequence[MLSTProfile]:
profiles = list()
async with InstitutPasteurProfiler(database_name=database_name) as profiler:
async for string in strings:
profiles.append(await profiler.profile_string(string))
return profiles

43
src/automlst/cli/info.py Normal file
View File

@@ -0,0 +1,43 @@
import asyncio
from automlst.cli import program
from automlst.engine.remote.databases.bigsdb import BIGSdbIndex
parser = program.subparsers.add_parser(__name__)
parser.add_argument(
"--retrieve-bigsdbs", "-l",
action="store_true",
dest="list_dbs",
required=False,
default=False,
type=bool,
help="Lists all known BIGSdb MLST databases (fetched from known APIs and cached)."
)
parser.add_argument(
"--retrieve-bigsdb-schemas", "-lschemas",
nargs="+",
action="extend",
dest="list_bigsdb_schemas",
required=False,
default=[],
type=str,
help="Lists the known schema IDs for a given BIGSdb sequence definition database name"
)
async def run(args):
async with BIGSdbIndex() as bigsdb_index:
if args.list_dbs:
known_seqdef_dbs = await bigsdb_index.get_known_seqdef_dbs(force=False)
print(", ".join(known_seqdef_dbs.keys()))
for bigsdb_schema_name in args.list_bigsdb_schemas:
schemas = await bigsdb_index.get_schemas_for_seqdefdb(bigsdb_schema_name)
for schema_desc, schema_id in schemas.items():
print(f"{schema_desc}: {schema_id}")
def run_asynchronously(args):
asyncio.run(run(args))
parser.set_defaults(func=run_asynchronously)

View File

@@ -0,0 +1,55 @@
import asyncio
import datetime
from typing import Any, AsyncGenerator, AsyncIterable, Iterable, Sequence, Union
from automlst.cli import program
from automlst.engine.data.genomics import NamedString
from automlst.engine.data.mlst import MLSTProfile
from automlst.engine.local.abif import read_abif, reference_consensus_assembly
from automlst.engine.local.csv import write_mlst_profiles_as_csv
from automlst.engine.local.fasta import read_fasta, read_multiple_fastas
from automlst.engine.remote.databases.bigsdb import BIGSdbIndex, BigSDBMLSTProfiler
parser = program.subparsers.add_parser(__name__)
parser.add_argument(
"--fasta", "-fa", "-fst",
nargs="+",
action='extend',
dest="fastas",
required=False,
default=[],
type=str,
help="The FASTA files to process. Multiple can be listed."
)
parser.add_argument(
"seqdefdb",
help="The BIGSdb seqdef database to use for typing."
)
parser.add_argument(
"schema",
type=int,
help="The BIGSdb seqdef database schema ID (integer) to use for typing."
)
parser.add_argument(
"out",
default=f'./{datetime.datetime.now().strftime(r"%Y%m%d%H%M%S")}',
help="The output CSV name (.csv will be appended)."
)
async def run(args):
async with BIGSdbIndex() as bigsdb_index:
gen_strings = read_multiple_fastas(args.fastas)
async with await bigsdb_index.build_profiler_from_seqdefdb(args.seqdefdb, args.schema) as mlst_profiler:
mlst_profiles = mlst_profiler.profile_multiple_strings(gen_strings)
await write_mlst_profiles_as_csv(mlst_profiles, args.out)
def run_asynchronously(args):
asyncio.run(run(args))
parser.set_defaults(func=run_asynchronously)

View File

@@ -0,0 +1,22 @@
import argparse
import asyncio
import datetime
from os import path
import os
from automlst.engine.data.genomics import NamedString
from automlst.engine.local.abif import read_abif
from automlst.engine.local.csv import write_mlst_profiles_as_csv
from automlst.engine.local.fasta import read_fasta
from automlst.engine.remote.databases.bigsdb import BIGSdbIndex
root_parser = argparse.ArgumentParser()
subparsers = root_parser.add_subparsers(required=True)
def run():
args = root_parser.parse_args()
args.func(args)
if __name__ == "__main__":
run()

View File

@@ -1,86 +0,0 @@
import argparse
import asyncio
import datetime
from os import path
import os
from automlst.cli import aggregated
from automlst.engine.data.genomics import NamedString
from automlst.engine.local.abif import read_abif
from automlst.engine.local.csv import write_mlst_profiles_as_csv
from automlst.engine.local.fasta import read_fasta
parser = argparse.ArgumentParser()
parser.add_argument(
"--run-name", "-name",
dest="run_name",
required=False,
default=datetime.datetime.now().strftime(r"%Y%m%d%H%M%S"),
type=str,
help="The name of the run. Will use a date and time string if not provided."
)
parser.add_argument(
"--fasta", "-fa", "-fst",
nargs="+",
action='extend',
dest="fastas",
required=False,
default=[],
type=str,
help="The FASTA files to process. Multiple can be listed."
)
parser.add_argument(
"--abif", "-abi", "-ab1",
action='extend',
dest="abifs",
required=False,
default=[],
type=str,
help="The ABIF files to process. Multiple can be listed."
)
parser.add_argument(
"--ncbi-assembly-reference", "-refncbi",
dest="ncbi_assembly_reference",
required=False,
default=None,
type=str,
help="The NCBI GenBank accession ID for the consensus assembly. Either this argument, or the path equivalent must be given if ABIF files are used."
)
parser.add_argument(
"--assembly-reference", "-ref",
dest="assembly_reference",
required=False,
default=None,
type=str,
help="The path to the FASTA sequence to be used as a reference for consensus building."
)
parser.add_argument(
"--institut-pasteur-mlst",
"-ipdbmlst",
dest="institut_pasteur_db",
required=False,
default=None,
type=str,
help="The Institut Pasteur MLST database to use."
)
parser.add_argument(
"out",
default="./.",
help="The output folder. Files will be named by the provided (or default) run name. Outputs will be automatically generated depending on which arguments are used."
)
def run():
args = parser.parse_args()
gen_strings = aggregated.aggregate_sequences(args.fastas, args.abifs)
os.makedirs(args.out, exist_ok=True)
if args.institut_pasteur_db is not None:
mlst_profiles = aggregated.profile_all_genetic_strings(
gen_strings, args.institut_pasteur_db)
asyncio.run(write_mlst_profiles_as_csv(
asyncio.run(mlst_profiles), str(path.join(args.out, "MLST_" + args.run_name + ".csv"))))
if __name__ == "__main__":
run()