19 Commits

Author SHA1 Message Date
19b23539b3 Added bioconda and personal conda repos to channels
Some checks reported errors
automlst.cli/pipeline/head Something is wrong with the build of this commit
2025-02-26 15:22:32 +00:00
dbd8238cef Added recipe patching script
Some checks failed
automlst.cli/pipeline/head There was a failure building this commit
2025-02-21 14:14:02 +00:00
4b0fac0801 Added grayskull and curl to environment.yml
Some checks failed
automlst.cli/pipeline/head There was a failure building this commit
2025-02-21 06:51:20 +00:00
d78ae19c4f Re-added pytest-cov to conda environment.yml
Some checks failed
automlst.cli/pipeline/head There was a failure building this commit
2025-02-21 06:40:20 +00:00
6b8376c470 Added publishing to personal git repo
Some checks failed
automlst.cli/pipeline/head There was a failure building this commit
2025-02-21 06:33:07 +00:00
a4d8de7cc6 Changing CSV argument to --csv or -o
All checks were successful
automlst.cli/pipeline/head This commit looks good
automlst.cli/pipeline/tag This commit looks good
2025-02-19 19:57:15 +00:00
5ef5b6ac08 Updated pyproject.toml to use license text and updated repo
All checks were successful
automlst.cli/pipeline/head This commit looks good
2025-02-19 16:26:59 +00:00
3aa2916324 Updated pipeline to not publish to system repo if it's a tagged version
All checks were successful
automlst.cli/pipeline/head This commit looks good
automlst.cli/pipeline/tag This commit looks good
2025-02-19 16:02:31 +00:00
af9c8c70b8 Stop on fail argument now works
All checks were successful
automlst.cli/pipeline/head This commit looks good
2025-02-19 15:50:18 +00:00
319edf36af Added option to output database and schemas lists to CSV 2025-02-19 15:01:57 +00:00
43a17d698b Updated readme to reflect recent changes and discuss versioning
All checks were successful
automlst.cli/pipeline/head This commit looks good
2025-02-18 19:16:39 +00:00
e2f19acd5a Updated CLI to follow APIs specified by the 0.12.0 engine
Some checks failed
automlst.cli/pipeline/head This commit looks good
automlst.cli/pipeline/tag There was a failure building this commit
2025-02-18 16:33:08 +00:00
1f6023b06b Bumped engine version requirement to version 0.11.0 2025-02-18 14:53:21 +00:00
9100f83390 Removed unused file from repository 2025-02-13 21:56:38 +00:00
419aa36e9d Removed unused parameter for typing 2025-02-13 21:56:20 +00:00
ca28068477 Made database name list predictable (sorted) 2025-02-13 21:56:06 +00:00
32dcfd99f8 Updated to reflect changes in Engine 0.10.* 2025-02-12 21:54:34 +00:00
4eca35a556 Fixed --version not showing 2025-02-11 16:56:04 +00:00
81d63bc54d Updated private git credentials and now fails CI publishing steps 2025-01-24 21:01:14 +00:00
12 changed files with 228 additions and 42 deletions

16
.devcontainer/Dockerfile Normal file
View File

@@ -0,0 +1,16 @@
FROM mcr.microsoft.com/devcontainers/miniconda:1-3
# Copy environment.yml (if found) to a temp location so we update the environment. Also
# copy "noop.txt" so the COPY instruction does not fail if no environment.yml exists.
COPY environment.yml* .devcontainer/noop.txt /tmp/conda-tmp/
RUN if [ -f "/tmp/conda-tmp/environment.yml" ]; then umask 0002 && /opt/conda/bin/conda env update -n base -f /tmp/conda-tmp/environment.yml; fi \
&& rm -rf /tmp/conda-tmp
# [Optional] Uncomment to install a different version of Python than the default
# RUN conda install -y python=3.6 \
# && pip install --no-cache-dir pipx \
# && pipx reinstall-all
# [Optional] Uncomment this section to install additional OS packages.
# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
# && apt-get -y install --no-install-recommends <your-package-list-here>

View File

@@ -1,9 +1,11 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/python
// README at: https://github.com/devcontainers/templates/tree/main/src/miniconda
{
"name": "Python 3",
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
"image": "mcr.microsoft.com/devcontainers/python:1-3.12-bullseye",
"name": "Miniconda (Python 3)",
"build": {
"context": "..",
"dockerfile": "Dockerfile"
},
// Features to add to the dev container. More info: https://containers.dev/features.
// "features": {},
@@ -12,7 +14,9 @@
// "forwardPorts": [],
// Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": "pip3 install --user -r requirements.txt",
"postCreateCommand": "pip install -e .",
// Configure tool-specific properties.
"customizations": {
"vscode": {
"extensions": [
@@ -20,8 +24,6 @@
]
}
},
// Configure tool-specific properties.
// "customizations": {},
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "root"

3
.devcontainer/noop.txt Normal file
View File

@@ -0,0 +1,3 @@
This file is copied into the container along with environment.yml* from the
parent folder. This is done to prevent the Dockerfile COPY instruction from
failing if no environment.yml is found.

1
.gitignore vendored
View File

@@ -212,3 +212,4 @@ pyrightconfig.json
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
out.csv

27
Jenkinsfile vendored
View File

@@ -2,14 +2,16 @@ pipeline {
agent {
kubernetes {
cloud 'rsys-devel'
defaultContainer 'pip'
inheritFrom 'pip'
defaultContainer 'miniforge3'
inheritFrom 'miniforge'
}
}
stages {
stage("install") {
steps {
sh 'python -m pip install -r requirements.txt'
sh 'conda config --add channels bioconda'
sh 'conda config --add channels https://git.reslate.systems/api/packages/ydeng/conda'
sh 'conda env update -n base -f environment.yml'
}
}
stage("unit tests") {
@@ -22,32 +24,41 @@ pipeline {
stage("build") {
steps {
sh "python -m build"
sh "grayskull pypi dist/*.tar.gz --maintainers 'Harrison Deng'"
sh "python scripts/patch_recipe.py"
sh 'conda build autobigs-cli -c bioconda --output-folder conda-bld --verify'
}
}
stage("archive") {
steps {
archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl', fingerprint: true, followSymlinks: false, onlyIfSuccessful: true
archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl, conda-bld/**/*.conda', fingerprint: true, followSymlinks: false, onlyIfSuccessful: true
}
}
stage("publish") {
parallel {
stage ("git.reslate.systems") {
when {
not {
tag '*.*.*'
}
}
environment {
TOKEN = credentials('git.reslate.systems')
CREDS = credentials('username-password-rs-git')
}
steps {
sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u __token__ -p ${TOKEN} --non-interactive --disable-progress-bar --verbose dist/*'
sh script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*'
sh 'curl --user ${CREDS_USR}:${CREDS_PSW} --upload-file conda-bld/**/*.conda https://git.reslate.systems/api/packages/${CREDS_USR}/conda/$(basename conda-bld/**/*.conda)'
}
}
stage ("pypi.org") {
when {
tag '*.*'
tag '*.*.*'
}
environment {
TOKEN = credentials('pypi.org')
}
steps {
sh returnStatus: true, script: 'python -m twine upload -u __token__ -p ${TOKEN} --non-interactive --disable-progress-bar --verbose dist/*'
sh script: 'python -m twine upload -u __token__ -p ${TOKEN} --non-interactive --disable-progress-bar --verbose dist/*'
}
}
}

View File

@@ -12,6 +12,7 @@ This CLI is capable of exactly what [autoBIGS.engine](https://pypi.org/project/a
- Fetch the available BIGSdb database schemas for a given MLST database
- Retrieve exact/non-exact MLST allele variant IDs based off a sequence
- Retrieve MLST sequence type IDs based off a sequence
- Inexact matches are annotated with an asterisk (\*)
- Output all results to a single CSV
## Planned Features for CLI
@@ -40,6 +41,18 @@ Let's say you have a fasta called `seq.fasta` which contains several sequences.
3. Then, run `autobigs st -h` and familiarize yourself with the parameters needed for sequence typing.
4. Namely, you should find that you will need to run `autobigs st seq.fasta pubmlst_bordetella_seqdef 3 output.csv`. You can optionally include multiple `FASTA` files, and/or `--exact` to only retrieve exact sequence types, and/or `--stop-on-fail` to stop typing if one of your sequences fail to retrieve any type.
4. Namely, you should find that you will need to run `autobigs st seq.fasta pubmlst_bordetella_seqdef 3 output.csv`. You can optionally include multiple `FASTA` files, and `--stop-on-fail` to stop typing if one of your sequences fail to retrieve any type.
5. Sit tight, and wait. The `output.csv` will contain your results once completed.
## Versioning
the autoBIGS project follows [semantic versioning](https://semver.org/) where the three numbers may be interpreted as MAJOR.MINOR.PATCH.
Note regarding major version 0 ([spec item 4](https://semver.org/#spec-item-4)), the following adaptation of semantic versioning definition is as follows:
1. Given x.Y.z, Y is only incremented when a backwards incompatible change is made.
2. Given x.y.Z, Z is only incremented when a backwards compatible change is made.
Versions of autoBIGS items with a major version number of 0 will introduce numerous changes and patches. As such, changes between such versions should be considered highly variable.

14
environment.yml Normal file
View File

@@ -0,0 +1,14 @@
name: base
channels:
- bioconda
- conda-forge
dependencies:
- pytest
- pytest-asyncio
- pytest-cov
- python-build
- conda-build
- twine==6.0.1
- setuptools_scm
- grayskull
- curl

View File

@@ -6,16 +6,16 @@ build-backend = "setuptools.build_meta"
name = "autoBIGS.cli"
dynamic = ["version"]
readme = "README.md"
license = {file = "LICENSE"}
license = {text = "GPL-3.0-or-later"}
dependencies = [
"autoBIGS-engine"
"autoBIGS-engine==0.12.*"
]
requires-python = ">=3.12"
description = "A CLI tool to rapidly fetch fetch MLST profiles given sequences for various diseases."
[project.urls]
Repository = "https://github.com/RealYHD/autoBIGS.cli"
Issues = "https://github.com/RealYHD/autoBIGS.cli/issues"
Repository = "https://github.com/Syph-and-VPD-Lab/autoBIGS.cli"
Issues = "https://github.com/Syph-and-VPD-Lab/autoBIGS.cli/issues"
[project.scripts]

103
scripts/patch_recipe.py Normal file
View File

@@ -0,0 +1,103 @@
#!/usr/bin/env python3
import argparse
from os import fdopen, path
import os
import re
import shutil
from sys import argv
import tempfile
INDENTATION = " "
GRAYSKULL_OUTPUT_PATH = "autoBIGS.cli"
RUN_EXPORTED_VALUE = r'{{ pin_subpackage( name|lower|replace(".", "-"), max_pin="x.x") }}'
LICENSE_SUFFIX = "-or-later"
HOME_PAGE = "https://github.com/Syph-and-VPD-Lab/autoBIGS.cli"
def _calc_indentation(line: str):
return len(re.findall(INDENTATION, line.split(line.strip())[0])) if line != "\n" else 0
def read_grayskull_output():
original_recipe = path.abspath(GRAYSKULL_OUTPUT_PATH)
original_meta = path.join(original_recipe, "meta.yaml")
meta_file = open(original_meta)
lines = meta_file.readlines()
meta_file.close()
return lines
def update_naming_scheme(lines):
modified_lines = []
for line in lines:
matches = re.finditer(r"\{\{\s*name\|lower()\s+\}\}", line)
modified_line = line
for match in matches:
modified_line = modified_line[:match.start(1)] + r'|replace(".", "-")' + modified_line[match.end(1):]
modified_lines.append(modified_line)
return modified_lines
def inject_run_exports(lines: list[str]):
package_indent = False
modified_lines = []
for line in lines:
indentation_count = _calc_indentation(line)
if line == "build:\n" and indentation_count == 0:
package_indent = True
modified_lines.append(line)
elif package_indent and indentation_count == 0:
modified_lines.append(INDENTATION*1 + "run_exports:\n")
modified_lines.append(INDENTATION*2 + "- " + RUN_EXPORTED_VALUE + "\n")
package_indent = False
else:
modified_lines.append(line)
return modified_lines
def suffix_license(lines: list[str]):
about_indent = False
modified_lines = []
for line in lines:
indentation_count = _calc_indentation(line)
if line == "about:\n" and indentation_count == 0:
about_indent = True
modified_lines.append(line)
elif about_indent and indentation_count == 1 and line.lstrip().startswith("license:"):
modified_lines.append(line.rstrip() + LICENSE_SUFFIX + "\n")
about_indent = False
else:
modified_lines.append(line)
return modified_lines
def inject_home_page(lines: list[str]):
about_indent = False
modified_lines = []
for line in lines:
indentation_count = _calc_indentation(line)
if line == "about:\n" and indentation_count == 0:
about_indent = True
modified_lines.append(line)
elif about_indent and indentation_count == 0:
modified_lines.append(INDENTATION + "home: " + HOME_PAGE + "\n")
about_indent = False
else:
modified_lines.append(line)
return modified_lines
def write_to_original(lines: list[str]):
original_recipe = path.abspath(GRAYSKULL_OUTPUT_PATH)
original_meta = path.join(original_recipe, "meta.yaml")
with open(original_meta, "w") as file:
file.writelines(lines)
def rename_recipe_dir():
new_recipe_name = path.abspath(path.join(GRAYSKULL_OUTPUT_PATH.replace(".", "-").lower()))
shutil.rmtree(new_recipe_name, ignore_errors=True)
os.replace(path.abspath(GRAYSKULL_OUTPUT_PATH), new_recipe_name)
if __name__ == "__main__":
original_grayskull_out = read_grayskull_output()
modified_recipe_meta = None
modified_recipe_meta = update_naming_scheme(original_grayskull_out)
modified_recipe_meta = inject_run_exports(modified_recipe_meta)
modified_recipe_meta = suffix_license(modified_recipe_meta)
modified_recipe_meta = inject_home_page(modified_recipe_meta)
write_to_original(modified_recipe_meta)
rename_recipe_dir()

View File

@@ -1,6 +1,8 @@
from argparse import ArgumentParser, Namespace
import asyncio
from autobigs.engine.data.remote.databases.bigsdb import BIGSdbIndex
import csv
from os import path
from autobigs.engine.analysis.bigsdb import BIGSdbIndex
def setup_parser(parser: ArgumentParser):
parser.description = "Fetches the latest BIGSdb MLST database definitions."
@@ -24,22 +26,50 @@ def setup_parser(parser: ArgumentParser):
help="Lists the known schema IDs for a given BIGSdb sequence definition database name. The name, and then the ID of the schema is given."
)
parser.add_argument(
"--csv", "-o",
dest="csv_output",
required=False,
default=None,
help="Output list as CSV at a given path. A suffix is added depending on the action taken."
)
parser.set_defaults(run=run_asynchronously)
return parser
async def run(args: Namespace):
async with BIGSdbIndex() as bigsdb_index:
if args.list_dbs and len(args.list_bigsdb_schemas) > 0:
print("Cannot specify both database listing and schema listing, please choose one!")
exit(1)
if args.list_dbs:
known_seqdef_dbs = await bigsdb_index.get_known_seqdef_dbs(force=False)
print("\n".join(known_seqdef_dbs.keys()))
sorted_seqdef_dbs = [(name, source) for name, source in sorted(known_seqdef_dbs.items())]
print("The following are all known BIGS database names, and their source (sorted alphabetically):")
print("\n".join(["{0}: {1}".format(name, source) for name, source in sorted_seqdef_dbs]))
if args.csv_output:
with open(args.csv_output, "w") as csv_out_handle:
writer = csv.writer(csv_out_handle)
writer.writerow(("BIGSdb Names", "Source"))
writer.writerows(sorted_seqdef_dbs)
print("\nDatabase output written to {0}".format(args.csv_output))
for bigsdb_schema_name in args.list_bigsdb_schemas:
schemas = await bigsdb_index.get_schemas_for_seqdefdb(bigsdb_schema_name)
for schema_desc, schema_id in schemas.items():
print(f"{schema_desc}: {schema_id}")
sorted_schemas = [(name, id) for name, id in sorted(schemas.items())]
print("The following are the known schemas for \"{0}\", and their associated IDs:".format(bigsdb_schema_name))
print("\n".join(["{0}: {1}".format(name, id) for name, id in sorted_schemas]))
if args.csv_output:
with open(args.csv_output, "w") as csv_out_handle:
writer = csv.writer(csv_out_handle)
writer.writerow(("Name", "ID"))
writer.writerows(sorted_schemas)
print("\nSchema list output written to {0}".format(args.csv_output))
if not (args.list_dbs or len(args.list_bigsdb_schemas) > 0):
print("Nothing to do. Try specifying \"-l\".")
print("Nothing to do. Try specifying \"-l\" for a list of known databases, or \"-h\" for more information.")
exit(1)
def run_asynchronously(args: Namespace):
asyncio.run(run(args))

View File

@@ -10,7 +10,7 @@ import importlib
root_parser = argparse.ArgumentParser(epilog='Use "%(prog)s info -h" to learn how to get available MLST databases, and their available schemas.'
+ ' Once that is done, use "%(prog)s st -h" to learn how to retrieve MLST profiles.'
)
subparsers = root_parser.add_subparsers(required=True)
subparsers = root_parser.add_subparsers(required=False)
info.setup_parser(subparsers.add_parser(get_module_base_name(info.__name__)))
st.setup_parser(subparsers.add_parser(get_module_base_name(st.__name__)))
@@ -33,6 +33,8 @@ def run():
metadata.version("autoBIGS-engine")}.')
if hasattr(args, "run"):
args.run(args)
elif not args.version:
root_parser.print_usage()
if __name__ == "__main__":

View File

@@ -2,9 +2,9 @@
from argparse import ArgumentParser, Namespace
import asyncio
import datetime
from autobigs.engine.data.local.csv import write_mlst_profiles_as_csv
from autobigs.engine.data.local.fasta import read_multiple_fastas
from autobigs.engine.data.remote.databases.bigsdb import BIGSdbIndex
from autobigs.engine.writing import write_mlst_profiles_as_csv
from autobigs.engine.reading import read_multiple_fastas
from autobigs.engine.analysis.bigsdb import BIGSdbIndex
def setup_parser(parser: ArgumentParser):
@@ -35,15 +35,6 @@ def setup_parser(parser: ArgumentParser):
help="The output CSV name (.csv will be appended)."
)
parser.add_argument(
"--exact", "-ex",
action="store_true",
dest="exact",
required=False,
default=False,
help="Should run exact matching rather than returning all similar ones"
)
parser.add_argument(
"--stop-on-fail", "-sof",
action="store_true",
@@ -58,11 +49,11 @@ def setup_parser(parser: ArgumentParser):
async def run(args: Namespace):
async with BIGSdbIndex() as bigsdb_index:
gen_strings = read_multiple_fastas(args.fastas)
async with await bigsdb_index.build_profiler_from_seqdefdb(args.seqdefdb, args.schema) as mlst_profiler:
mlst_profiles = mlst_profiler.profile_multiple_strings(gen_strings, exact=args.exact)
async with await bigsdb_index.build_profiler_from_seqdefdb(False, args.seqdefdb, args.schema) as mlst_profiler:
mlst_profiles = mlst_profiler.profile_multiple_strings(gen_strings, args.stop_on_fail)
failed = await write_mlst_profiles_as_csv(mlst_profiles, args.out)
if len(failed) > 0:
print(f"A total of {len(failed)} IDs failed:\n{"\n".join(failed)}")
print(f"A total of {len(failed)} IDs failed (no profile found):\n{"\n".join(failed)}")
print(f"Completed fetching MLSTs for {len(args.fastas)} sequences.")
def run_asynchronously(args):