Moved to a general BIGSdb implementation
Updated tests Removed ABIF UI for the time being Began updating CLI
This commit is contained in:
0
src/automlst/cli/__init__.py
Normal file
0
src/automlst/cli/__init__.py
Normal file
@@ -1,23 +0,0 @@
|
||||
from os import path
|
||||
from typing import Any, AsyncGenerator, AsyncIterable, Iterable, Sequence
|
||||
from automlst.engine.data.mlst import MLSTProfile
|
||||
from automlst.engine.data.genomics import NamedString
|
||||
from automlst.engine.local.abif import read_abif
|
||||
from automlst.engine.local.fasta import read_fasta
|
||||
from automlst.engine.remote.databases.institutpasteur.mlst import InstitutPasteurProfiler
|
||||
|
||||
|
||||
async def aggregate_sequences(fastas: Iterable[str], abifs: Iterable[str]) -> AsyncGenerator[str, Any]:
|
||||
for fasta_path in fastas:
|
||||
async for fasta in read_fasta(fasta_path):
|
||||
yield fasta.sequence
|
||||
for abif_path in abifs:
|
||||
abif_data = await read_abif(abif_path)
|
||||
yield "".join(abif_data.sequence)
|
||||
|
||||
async def profile_all_genetic_strings(strings: AsyncIterable[str], database_name: str) -> Sequence[MLSTProfile]:
|
||||
profiles = list()
|
||||
async with InstitutPasteurProfiler(database_name=database_name) as profiler:
|
||||
async for string in strings:
|
||||
profiles.append(await profiler.profile_string(string))
|
||||
return profiles
|
43
src/automlst/cli/info.py
Normal file
43
src/automlst/cli/info.py
Normal file
@@ -0,0 +1,43 @@
|
||||
import asyncio
|
||||
from automlst.cli import program
|
||||
from automlst.engine.remote.databases.bigsdb import BIGSdbIndex
|
||||
|
||||
|
||||
parser = program.subparsers.add_parser(__name__)
|
||||
|
||||
parser.add_argument(
|
||||
"--retrieve-bigsdbs", "-l",
|
||||
action="store_true",
|
||||
dest="list_dbs",
|
||||
required=False,
|
||||
default=False,
|
||||
type=bool,
|
||||
help="Lists all known BIGSdb MLST databases (fetched from known APIs and cached)."
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--retrieve-bigsdb-schemas", "-lschemas",
|
||||
nargs="+",
|
||||
action="extend",
|
||||
dest="list_bigsdb_schemas",
|
||||
required=False,
|
||||
default=[],
|
||||
type=str,
|
||||
help="Lists the known schema IDs for a given BIGSdb sequence definition database name"
|
||||
)
|
||||
|
||||
async def run(args):
|
||||
async with BIGSdbIndex() as bigsdb_index:
|
||||
if args.list_dbs:
|
||||
known_seqdef_dbs = await bigsdb_index.get_known_seqdef_dbs(force=False)
|
||||
print(", ".join(known_seqdef_dbs.keys()))
|
||||
|
||||
for bigsdb_schema_name in args.list_bigsdb_schemas:
|
||||
schemas = await bigsdb_index.get_schemas_for_seqdefdb(bigsdb_schema_name)
|
||||
for schema_desc, schema_id in schemas.items():
|
||||
print(f"{schema_desc}: {schema_id}")
|
||||
|
||||
def run_asynchronously(args):
|
||||
asyncio.run(run(args))
|
||||
|
||||
parser.set_defaults(func=run_asynchronously)
|
55
src/automlst/cli/profile.py
Normal file
55
src/automlst/cli/profile.py
Normal file
@@ -0,0 +1,55 @@
|
||||
|
||||
import asyncio
|
||||
import datetime
|
||||
from typing import Any, AsyncGenerator, AsyncIterable, Iterable, Sequence, Union
|
||||
from automlst.cli import program
|
||||
from automlst.engine.data.genomics import NamedString
|
||||
from automlst.engine.data.mlst import MLSTProfile
|
||||
from automlst.engine.local.abif import read_abif, reference_consensus_assembly
|
||||
from automlst.engine.local.csv import write_mlst_profiles_as_csv
|
||||
from automlst.engine.local.fasta import read_fasta, read_multiple_fastas
|
||||
from automlst.engine.remote.databases.bigsdb import BIGSdbIndex, BigSDBMLSTProfiler
|
||||
|
||||
|
||||
parser = program.subparsers.add_parser(__name__)
|
||||
|
||||
parser.add_argument(
|
||||
"--fasta", "-fa", "-fst",
|
||||
nargs="+",
|
||||
action='extend',
|
||||
dest="fastas",
|
||||
required=False,
|
||||
default=[],
|
||||
type=str,
|
||||
help="The FASTA files to process. Multiple can be listed."
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"seqdefdb",
|
||||
help="The BIGSdb seqdef database to use for typing."
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"schema",
|
||||
type=int,
|
||||
help="The BIGSdb seqdef database schema ID (integer) to use for typing."
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"out",
|
||||
default=f'./{datetime.datetime.now().strftime(r"%Y%m%d%H%M%S")}',
|
||||
help="The output CSV name (.csv will be appended)."
|
||||
)
|
||||
|
||||
|
||||
async def run(args):
|
||||
async with BIGSdbIndex() as bigsdb_index:
|
||||
gen_strings = read_multiple_fastas(args.fastas)
|
||||
async with await bigsdb_index.build_profiler_from_seqdefdb(args.seqdefdb, args.schema) as mlst_profiler:
|
||||
mlst_profiles = mlst_profiler.profile_multiple_strings(gen_strings)
|
||||
await write_mlst_profiles_as_csv(mlst_profiles, args.out)
|
||||
|
||||
def run_asynchronously(args):
|
||||
asyncio.run(run(args))
|
||||
|
||||
parser.set_defaults(func=run_asynchronously)
|
22
src/automlst/cli/program.py
Normal file
22
src/automlst/cli/program.py
Normal file
@@ -0,0 +1,22 @@
|
||||
import argparse
|
||||
import asyncio
|
||||
import datetime
|
||||
from os import path
|
||||
import os
|
||||
|
||||
from automlst.engine.data.genomics import NamedString
|
||||
from automlst.engine.local.abif import read_abif
|
||||
from automlst.engine.local.csv import write_mlst_profiles_as_csv
|
||||
from automlst.engine.local.fasta import read_fasta
|
||||
from automlst.engine.remote.databases.bigsdb import BIGSdbIndex
|
||||
|
||||
root_parser = argparse.ArgumentParser()
|
||||
subparsers = root_parser.add_subparsers(required=True)
|
||||
|
||||
def run():
|
||||
args = root_parser.parse_args()
|
||||
args.func(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
@@ -1,86 +0,0 @@
|
||||
import argparse
|
||||
import asyncio
|
||||
import datetime
|
||||
from os import path
|
||||
import os
|
||||
|
||||
from automlst.cli import aggregated
|
||||
from automlst.engine.data.genomics import NamedString
|
||||
from automlst.engine.local.abif import read_abif
|
||||
from automlst.engine.local.csv import write_mlst_profiles_as_csv
|
||||
from automlst.engine.local.fasta import read_fasta
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--run-name", "-name",
|
||||
dest="run_name",
|
||||
required=False,
|
||||
default=datetime.datetime.now().strftime(r"%Y%m%d%H%M%S"),
|
||||
type=str,
|
||||
help="The name of the run. Will use a date and time string if not provided."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--fasta", "-fa", "-fst",
|
||||
nargs="+",
|
||||
action='extend',
|
||||
dest="fastas",
|
||||
required=False,
|
||||
default=[],
|
||||
type=str,
|
||||
help="The FASTA files to process. Multiple can be listed."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--abif", "-abi", "-ab1",
|
||||
action='extend',
|
||||
dest="abifs",
|
||||
required=False,
|
||||
default=[],
|
||||
type=str,
|
||||
help="The ABIF files to process. Multiple can be listed."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ncbi-assembly-reference", "-refncbi",
|
||||
dest="ncbi_assembly_reference",
|
||||
required=False,
|
||||
default=None,
|
||||
type=str,
|
||||
help="The NCBI GenBank accession ID for the consensus assembly. Either this argument, or the path equivalent must be given if ABIF files are used."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--assembly-reference", "-ref",
|
||||
dest="assembly_reference",
|
||||
required=False,
|
||||
default=None,
|
||||
type=str,
|
||||
help="The path to the FASTA sequence to be used as a reference for consensus building."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--institut-pasteur-mlst",
|
||||
"-ipdbmlst",
|
||||
dest="institut_pasteur_db",
|
||||
required=False,
|
||||
default=None,
|
||||
type=str,
|
||||
help="The Institut Pasteur MLST database to use."
|
||||
)
|
||||
parser.add_argument(
|
||||
"out",
|
||||
default="./.",
|
||||
help="The output folder. Files will be named by the provided (or default) run name. Outputs will be automatically generated depending on which arguments are used."
|
||||
)
|
||||
|
||||
|
||||
def run():
|
||||
args = parser.parse_args()
|
||||
gen_strings = aggregated.aggregate_sequences(args.fastas, args.abifs)
|
||||
os.makedirs(args.out, exist_ok=True)
|
||||
if args.institut_pasteur_db is not None:
|
||||
mlst_profiles = aggregated.profile_all_genetic_strings(
|
||||
gen_strings, args.institut_pasteur_db)
|
||||
asyncio.run(write_mlst_profiles_as_csv(
|
||||
asyncio.run(mlst_profiles), str(path.join(args.out, "MLST_" + args.run_name + ".csv"))))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
Reference in New Issue
Block a user