Compare commits
11 Commits
0.4.2
...
af9c8c70b8
Author | SHA1 | Date | |
---|---|---|---|
af9c8c70b8 | |||
319edf36af | |||
43a17d698b | |||
e2f19acd5a | |||
1f6023b06b | |||
9100f83390 | |||
419aa36e9d | |||
ca28068477 | |||
32dcfd99f8 | |||
4eca35a556 | |||
81d63bc54d |
@@ -12,7 +12,7 @@
|
|||||||
// "forwardPorts": [],
|
// "forwardPorts": [],
|
||||||
|
|
||||||
// Use 'postCreateCommand' to run commands after the container is created.
|
// Use 'postCreateCommand' to run commands after the container is created.
|
||||||
"postCreateCommand": "pip3 install --user -r requirements.txt",
|
"postCreateCommand": "pip3 install --user -r requirements.txt && pip install -e .",
|
||||||
"customizations": {
|
"customizations": {
|
||||||
"vscode": {
|
"vscode": {
|
||||||
"extensions": [
|
"extensions": [
|
||||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@@ -212,3 +212,4 @@ pyrightconfig.json
|
|||||||
|
|
||||||
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
|
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
|
||||||
|
|
||||||
|
out.csv
|
||||||
|
6
Jenkinsfile
vendored
6
Jenkinsfile
vendored
@@ -33,10 +33,10 @@ pipeline {
|
|||||||
parallel {
|
parallel {
|
||||||
stage ("git.reslate.systems") {
|
stage ("git.reslate.systems") {
|
||||||
environment {
|
environment {
|
||||||
TOKEN = credentials('git.reslate.systems')
|
CREDS = credentials('username-password-rs-git')
|
||||||
}
|
}
|
||||||
steps {
|
steps {
|
||||||
sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u __token__ -p ${TOKEN} --non-interactive --disable-progress-bar --verbose dist/*'
|
sh script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage ("pypi.org") {
|
stage ("pypi.org") {
|
||||||
@@ -47,7 +47,7 @@ pipeline {
|
|||||||
TOKEN = credentials('pypi.org')
|
TOKEN = credentials('pypi.org')
|
||||||
}
|
}
|
||||||
steps {
|
steps {
|
||||||
sh returnStatus: true, script: 'python -m twine upload -u __token__ -p ${TOKEN} --non-interactive --disable-progress-bar --verbose dist/*'
|
sh script: 'python -m twine upload -u __token__ -p ${TOKEN} --non-interactive --disable-progress-bar --verbose dist/*'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
17
README.md
17
README.md
@@ -12,6 +12,7 @@ This CLI is capable of exactly what [autoBIGS.engine](https://pypi.org/project/a
|
|||||||
- Fetch the available BIGSdb database schemas for a given MLST database
|
- Fetch the available BIGSdb database schemas for a given MLST database
|
||||||
- Retrieve exact/non-exact MLST allele variant IDs based off a sequence
|
- Retrieve exact/non-exact MLST allele variant IDs based off a sequence
|
||||||
- Retrieve MLST sequence type IDs based off a sequence
|
- Retrieve MLST sequence type IDs based off a sequence
|
||||||
|
- Inexact matches are annotated with an asterisk (\*)
|
||||||
- Output all results to a single CSV
|
- Output all results to a single CSV
|
||||||
|
|
||||||
## Planned Features for CLI
|
## Planned Features for CLI
|
||||||
@@ -40,6 +41,18 @@ Let's say you have a fasta called `seq.fasta` which contains several sequences.
|
|||||||
|
|
||||||
3. Then, run `autobigs st -h` and familiarize yourself with the parameters needed for sequence typing.
|
3. Then, run `autobigs st -h` and familiarize yourself with the parameters needed for sequence typing.
|
||||||
|
|
||||||
4. Namely, you should find that you will need to run `autobigs st seq.fasta pubmlst_bordetella_seqdef 3 output.csv`. You can optionally include multiple `FASTA` files, and/or `--exact` to only retrieve exact sequence types, and/or `--stop-on-fail` to stop typing if one of your sequences fail to retrieve any type.
|
4. Namely, you should find that you will need to run `autobigs st seq.fasta pubmlst_bordetella_seqdef 3 output.csv`. You can optionally include multiple `FASTA` files, and `--stop-on-fail` to stop typing if one of your sequences fail to retrieve any type.
|
||||||
|
|
||||||
5. Sit tight, and wait. The `output.csv` will contain your results once completed.
|
5. Sit tight, and wait. The `output.csv` will contain your results once completed.
|
||||||
|
|
||||||
|
## Versioning
|
||||||
|
|
||||||
|
the autoBIGS project follows [semantic versioning](https://semver.org/) where the three numbers may be interpreted as MAJOR.MINOR.PATCH.
|
||||||
|
|
||||||
|
Note regarding major version 0 ([spec item 4](https://semver.org/#spec-item-4)), the following adaptation of semantic versioning definition is as follows:
|
||||||
|
|
||||||
|
1. Given x.Y.z, Y is only incremented when a backwards incompatible change is made.
|
||||||
|
|
||||||
|
2. Given x.y.Z, Z is only incremented when a backwards compatible change is made.
|
||||||
|
|
||||||
|
Versions of autoBIGS items with a major version number of 0 will introduce numerous changes and patches. As such, changes between such versions should be considered highly variable.
|
@@ -8,7 +8,7 @@ dynamic = ["version"]
|
|||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
license = {file = "LICENSE"}
|
license = {file = "LICENSE"}
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"autoBIGS-engine"
|
"autoBIGS-engine==0.12.*"
|
||||||
]
|
]
|
||||||
requires-python = ">=3.12"
|
requires-python = ">=3.12"
|
||||||
description = "A CLI tool to rapidly fetch fetch MLST profiles given sequences for various diseases."
|
description = "A CLI tool to rapidly fetch fetch MLST profiles given sequences for various diseases."
|
||||||
|
@@ -1,6 +1,8 @@
|
|||||||
from argparse import ArgumentParser, Namespace
|
from argparse import ArgumentParser, Namespace
|
||||||
import asyncio
|
import asyncio
|
||||||
from autobigs.engine.data.remote.databases.bigsdb import BIGSdbIndex
|
import csv
|
||||||
|
from os import path
|
||||||
|
from autobigs.engine.analysis.bigsdb import BIGSdbIndex
|
||||||
|
|
||||||
def setup_parser(parser: ArgumentParser):
|
def setup_parser(parser: ArgumentParser):
|
||||||
parser.description = "Fetches the latest BIGSdb MLST database definitions."
|
parser.description = "Fetches the latest BIGSdb MLST database definitions."
|
||||||
@@ -24,6 +26,14 @@ def setup_parser(parser: ArgumentParser):
|
|||||||
help="Lists the known schema IDs for a given BIGSdb sequence definition database name. The name, and then the ID of the schema is given."
|
help="Lists the known schema IDs for a given BIGSdb sequence definition database name. The name, and then the ID of the schema is given."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--csv-prefix", "-o",
|
||||||
|
dest="csv_output",
|
||||||
|
required=False,
|
||||||
|
default=None,
|
||||||
|
help="Output list as CSV at a given path. A suffix is added depending on the action taken."
|
||||||
|
)
|
||||||
|
|
||||||
parser.set_defaults(run=run_asynchronously)
|
parser.set_defaults(run=run_asynchronously)
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
@@ -31,15 +41,31 @@ async def run(args: Namespace):
|
|||||||
async with BIGSdbIndex() as bigsdb_index:
|
async with BIGSdbIndex() as bigsdb_index:
|
||||||
if args.list_dbs:
|
if args.list_dbs:
|
||||||
known_seqdef_dbs = await bigsdb_index.get_known_seqdef_dbs(force=False)
|
known_seqdef_dbs = await bigsdb_index.get_known_seqdef_dbs(force=False)
|
||||||
print("\n".join(known_seqdef_dbs.keys()))
|
sorted_seqdef_dbs = [(name, source) for name, source in sorted(known_seqdef_dbs.items())]
|
||||||
|
print("The following are all known BIGS database names, and their source (sorted alphabetically):")
|
||||||
|
print("\n".join(["{0}: {1}".format(name, source) for name, source in sorted_seqdef_dbs]))
|
||||||
|
if args.csv_output:
|
||||||
|
dbs_csv_path = path.splitext(args.csv_output)[0] + "_" + "dbs.csv"
|
||||||
|
with open(dbs_csv_path, "w") as csv_out_handle:
|
||||||
|
writer = csv.writer(csv_out_handle)
|
||||||
|
writer.writerow(("BIGSdb Names", "Source"))
|
||||||
|
writer.writerows(sorted_seqdef_dbs)
|
||||||
|
print("\nDatabase output written to {0}".format(dbs_csv_path))
|
||||||
|
|
||||||
for bigsdb_schema_name in args.list_bigsdb_schemas:
|
for bigsdb_schema_name in args.list_bigsdb_schemas:
|
||||||
schemas = await bigsdb_index.get_schemas_for_seqdefdb(bigsdb_schema_name)
|
schemas = await bigsdb_index.get_schemas_for_seqdefdb(bigsdb_schema_name)
|
||||||
for schema_desc, schema_id in schemas.items():
|
sorted_schemas = [(name, id) for name, id in sorted(schemas.items())]
|
||||||
print(f"{schema_desc}: {schema_id}")
|
print("The following are the known schemas for \"{0}\", and their associated IDs:".format(bigsdb_schema_name))
|
||||||
|
print("\n".join(["{0}: {1}".format(name, id) for name, id in sorted_schemas]))
|
||||||
|
if args.csv_output:
|
||||||
|
schema_csv_path = path.splitext(args.csv_output)[0] + "_" + "schemas.csv"
|
||||||
|
with open(schema_csv_path, "w") as csv_out_handle:
|
||||||
|
writer = csv.writer(csv_out_handle)
|
||||||
|
writer.writerow(("Name", "ID"))
|
||||||
|
writer.writerows(sorted_schemas)
|
||||||
|
print("\nSchema list output written to {0}".format(schema_csv_path))
|
||||||
if not (args.list_dbs or len(args.list_bigsdb_schemas) > 0):
|
if not (args.list_dbs or len(args.list_bigsdb_schemas) > 0):
|
||||||
print("Nothing to do. Try specifying \"-l\".")
|
print("Nothing to do. Try specifying \"-l\" for a list of known databases, or \"-h\" for more information.")
|
||||||
|
|
||||||
def run_asynchronously(args: Namespace):
|
def run_asynchronously(args: Namespace):
|
||||||
asyncio.run(run(args))
|
asyncio.run(run(args))
|
||||||
|
@@ -10,7 +10,7 @@ import importlib
|
|||||||
root_parser = argparse.ArgumentParser(epilog='Use "%(prog)s info -h" to learn how to get available MLST databases, and their available schemas.'
|
root_parser = argparse.ArgumentParser(epilog='Use "%(prog)s info -h" to learn how to get available MLST databases, and their available schemas.'
|
||||||
+ ' Once that is done, use "%(prog)s st -h" to learn how to retrieve MLST profiles.'
|
+ ' Once that is done, use "%(prog)s st -h" to learn how to retrieve MLST profiles.'
|
||||||
)
|
)
|
||||||
subparsers = root_parser.add_subparsers(required=True)
|
subparsers = root_parser.add_subparsers(required=False)
|
||||||
|
|
||||||
info.setup_parser(subparsers.add_parser(get_module_base_name(info.__name__)))
|
info.setup_parser(subparsers.add_parser(get_module_base_name(info.__name__)))
|
||||||
st.setup_parser(subparsers.add_parser(get_module_base_name(st.__name__)))
|
st.setup_parser(subparsers.add_parser(get_module_base_name(st.__name__)))
|
||||||
@@ -33,6 +33,8 @@ def run():
|
|||||||
metadata.version("autoBIGS-engine")}.')
|
metadata.version("autoBIGS-engine")}.')
|
||||||
if hasattr(args, "run"):
|
if hasattr(args, "run"):
|
||||||
args.run(args)
|
args.run(args)
|
||||||
|
elif not args.version:
|
||||||
|
root_parser.print_usage()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@@ -2,9 +2,9 @@
|
|||||||
from argparse import ArgumentParser, Namespace
|
from argparse import ArgumentParser, Namespace
|
||||||
import asyncio
|
import asyncio
|
||||||
import datetime
|
import datetime
|
||||||
from autobigs.engine.data.local.csv import write_mlst_profiles_as_csv
|
from autobigs.engine.writing import write_mlst_profiles_as_csv
|
||||||
from autobigs.engine.data.local.fasta import read_multiple_fastas
|
from autobigs.engine.reading import read_multiple_fastas
|
||||||
from autobigs.engine.data.remote.databases.bigsdb import BIGSdbIndex
|
from autobigs.engine.analysis.bigsdb import BIGSdbIndex
|
||||||
|
|
||||||
|
|
||||||
def setup_parser(parser: ArgumentParser):
|
def setup_parser(parser: ArgumentParser):
|
||||||
@@ -35,15 +35,6 @@ def setup_parser(parser: ArgumentParser):
|
|||||||
help="The output CSV name (.csv will be appended)."
|
help="The output CSV name (.csv will be appended)."
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"--exact", "-ex",
|
|
||||||
action="store_true",
|
|
||||||
dest="exact",
|
|
||||||
required=False,
|
|
||||||
default=False,
|
|
||||||
help="Should run exact matching rather than returning all similar ones"
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--stop-on-fail", "-sof",
|
"--stop-on-fail", "-sof",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
@@ -58,11 +49,11 @@ def setup_parser(parser: ArgumentParser):
|
|||||||
async def run(args: Namespace):
|
async def run(args: Namespace):
|
||||||
async with BIGSdbIndex() as bigsdb_index:
|
async with BIGSdbIndex() as bigsdb_index:
|
||||||
gen_strings = read_multiple_fastas(args.fastas)
|
gen_strings = read_multiple_fastas(args.fastas)
|
||||||
async with await bigsdb_index.build_profiler_from_seqdefdb(args.seqdefdb, args.schema) as mlst_profiler:
|
async with await bigsdb_index.build_profiler_from_seqdefdb(False, args.seqdefdb, args.schema) as mlst_profiler:
|
||||||
mlst_profiles = mlst_profiler.profile_multiple_strings(gen_strings, exact=args.exact)
|
mlst_profiles = mlst_profiler.profile_multiple_strings(gen_strings, args.stop_on_fail)
|
||||||
failed = await write_mlst_profiles_as_csv(mlst_profiles, args.out)
|
failed = await write_mlst_profiles_as_csv(mlst_profiles, args.out)
|
||||||
if len(failed) > 0:
|
if len(failed) > 0:
|
||||||
print(f"A total of {len(failed)} IDs failed:\n{"\n".join(failed)}")
|
print(f"A total of {len(failed)} IDs failed (no profile found):\n{"\n".join(failed)}")
|
||||||
print(f"Completed fetching MLSTs for {len(args.fastas)} sequences.")
|
print(f"Completed fetching MLSTs for {len(args.fastas)} sequences.")
|
||||||
|
|
||||||
def run_asynchronously(args):
|
def run_asynchronously(args):
|
||||||
|
Reference in New Issue
Block a user