Added bioconda and personal conda repos to channels

Added recipe patching script
Added grayskull and curl to environment.yml
2025-02-26 15:22:32 +00:00 · 2025-02-21 14:14:02 +00:00 · 2025-02-21 06:51:20 +00:00 · 2025-02-21 06:40:20 +00:00 · 2025-02-21 06:33:07 +00:00 · 2025-02-19 19:57:15 +00:00
12 changed files with 228 additions and 42 deletions
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -0,0 +1,16 @@
+FROM mcr.microsoft.com/devcontainers/miniconda:1-3
+
+# Copy environment.yml (if found) to a temp location so we update the environment. Also
+# copy "noop.txt" so the COPY instruction does not fail if no environment.yml exists.
+COPY environment.yml* .devcontainer/noop.txt /tmp/conda-tmp/
+RUN if [ -f "/tmp/conda-tmp/environment.yml" ]; then umask 0002 && /opt/conda/bin/conda env update -n base -f /tmp/conda-tmp/environment.yml; fi \
+    && rm -rf /tmp/conda-tmp
+
+# [Optional] Uncomment to install a different version of Python than the default
+# RUN conda install -y python=3.6 \
+#     && pip install --no-cache-dir pipx \
+#     && pipx reinstall-all
+
+# [Optional] Uncomment this section to install additional OS packages.
+# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
+#     && apt-get -y install --no-install-recommends <your-package-list-here>
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -1,9 +1,11 @@
 // For format details, see https://aka.ms/devcontainer.json. For config options, see the
-// README at: https://github.com/devcontainers/templates/tree/main/src/python
+// README at: https://github.com/devcontainers/templates/tree/main/src/miniconda
 {
-	"name": "Python 3",
-	// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
-	"image": "mcr.microsoft.com/devcontainers/python:1-3.12-bullseye",
+	"name": "Miniconda (Python 3)",
+	"build": { 
+		"context": "..",
+		"dockerfile": "Dockerfile"
+	},

 	// Features to add to the dev container. More info: https://containers.dev/features.
 	// "features": {},
@@ -12,7 +14,9 @@
 	// "forwardPorts": [],

 	// Use 'postCreateCommand' to run commands after the container is created.
-	"postCreateCommand": "pip3 install --user -r requirements.txt",
+	"postCreateCommand": "pip install -e .",
+
+	// Configure tool-specific properties.
 	"customizations": {
 		"vscode": {
 			"extensions": [
@@ -20,8 +24,6 @@
 			]
 		}
 	},
-	// Configure tool-specific properties.
-	// "customizations": {},

 	// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
 	// "remoteUser": "root"
--- a/.devcontainer/noop.txt
+++ b/.devcontainer/noop.txt
@@ -0,0 +1,3 @@
+This file is copied into the container along with environment.yml* from the
+parent folder. This is done to prevent the Dockerfile COPY instruction from 
+failing if no environment.yml is found.
--- a/.gitignore
+++ b/.gitignore
@@ -212,3 +212,4 @@ pyrightconfig.json

 # Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)

+out.csv
--- a/27
+++ b/27
@@ -2,14 +2,16 @@ pipeline {
    agent {
        kubernetes {
            cloud 'rsys-devel'
-            defaultContainer 'pip'
-            inheritFrom 'pip'
+            defaultContainer 'miniforge3'
+            inheritFrom 'miniforge'
        }
    }
    stages {
        stage("install") {
            steps {
-                sh 'python -m pip install -r requirements.txt'
+                sh 'conda config --add channels bioconda'
+                sh 'conda config --add channels https://git.reslate.systems/api/packages/ydeng/conda'
+                sh 'conda env update -n base -f environment.yml'
            }
        }
        stage("unit tests") {
@@ -22,32 +24,41 @@ pipeline {
        stage("build") {
            steps {
                sh "python -m build"
+                sh "grayskull pypi dist/*.tar.gz --maintainers 'Harrison Deng'"
+                sh "python scripts/patch_recipe.py"
+                sh 'conda build autobigs-cli -c bioconda --output-folder conda-bld --verify'
            }
        }
        stage("archive") {
            steps {
-                archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl', fingerprint: true, followSymlinks: false, onlyIfSuccessful: true
+                archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl, conda-bld/**/*.conda', fingerprint: true, followSymlinks: false, onlyIfSuccessful: true
            }
        }
        stage("publish") {
            parallel {
                stage ("git.reslate.systems") {
+                    when {
+                        not {
+                            tag '*.*.*'
+                        }
+                    }
                    environment {
-                        TOKEN = credentials('git.reslate.systems')
+                        CREDS = credentials('username-password-rs-git')
                    }
                    steps {
-                        sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u __token__ -p ${TOKEN} --non-interactive --disable-progress-bar --verbose dist/*'
+                        sh script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*'
+                        sh 'curl --user ${CREDS_USR}:${CREDS_PSW} --upload-file conda-bld/**/*.conda https://git.reslate.systems/api/packages/${CREDS_USR}/conda/$(basename conda-bld/**/*.conda)'
                    }
                }
                stage ("pypi.org") {
                    when {
-                        tag '*.*'
+                        tag '*.*.*'
                    }
                    environment {
                        TOKEN = credentials('pypi.org')
                    }
                    steps {
-                        sh returnStatus: true, script: 'python -m twine upload -u __token__ -p ${TOKEN} --non-interactive --disable-progress-bar --verbose dist/*'
+                        sh script: 'python -m twine upload -u __token__ -p ${TOKEN} --non-interactive --disable-progress-bar --verbose dist/*'
                    }
                }
            }
--- a/README.md
+++ b/README.md
@@ -12,6 +12,7 @@ This CLI is capable of exactly what [autoBIGS.engine](https://pypi.org/project/a
 - Fetch the available BIGSdb database schemas for a given MLST database
 - Retrieve exact/non-exact MLST allele variant IDs based off a sequence
 - Retrieve MLST sequence type IDs based off a sequence
+- Inexact matches are annotated with an asterisk (\*)
 - Output all results to a single CSV

 ## Planned Features for CLI
@@ -40,6 +41,18 @@ Let's say you have a fasta called `seq.fasta` which contains several sequences.

 3. Then, run `autobigs st -h` and familiarize yourself with the parameters needed for sequence typing.

-4. Namely, you should find that you will need to run `autobigs st seq.fasta pubmlst_bordetella_seqdef 3 output.csv`. You can optionally include multiple `FASTA` files, and/or `--exact` to only retrieve exact sequence types, and/or `--stop-on-fail` to stop typing if one of your sequences fail to retrieve any type. 
+4. Namely, you should find that you will need to run `autobigs st seq.fasta pubmlst_bordetella_seqdef 3 output.csv`. You can optionally include multiple `FASTA` files, and `--stop-on-fail` to stop typing if one of your sequences fail to retrieve any type. 

 5. Sit tight, and wait. The `output.csv` will contain your results once completed.
+
+## Versioning
+
+the autoBIGS project follows [semantic versioning](https://semver.org/) where the three numbers may be interpreted as MAJOR.MINOR.PATCH.
+
+Note regarding major version 0 ([spec item 4](https://semver.org/#spec-item-4)), the following adaptation of semantic versioning definition is as follows:
+
+1. Given x.Y.z, Y is only incremented when a backwards incompatible change is made.
+
+2. Given x.y.Z, Z is only incremented when a backwards compatible change is made.
+
+Versions of autoBIGS items with a major version number of 0 will introduce numerous changes and patches. As such, changes between such versions should be considered highly variable.
--- a/environment.yml
+++ b/environment.yml
@@ -0,0 +1,14 @@
+name: base
+channels:
+  - bioconda
+  - conda-forge
+dependencies:
+  - pytest
+  - pytest-asyncio
+  - pytest-cov
+  - python-build
+  - conda-build
+  - twine==6.0.1
+  - setuptools_scm
+  - grayskull
+  - curl
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,16 +6,16 @@ build-backend = "setuptools.build_meta"
 name = "autoBIGS.cli"
 dynamic = ["version"]
 readme = "README.md"
-license = {file = "LICENSE"}
+license = {text = "GPL-3.0-or-later"}
 dependencies = [
-    "autoBIGS-engine"
+    "autoBIGS-engine==0.12.*"
 ]
 requires-python = ">=3.12"
 description = "A CLI tool to rapidly fetch fetch MLST profiles given sequences for various diseases."

 [project.urls]
-Repository = "https://github.com/RealYHD/autoBIGS.cli"
-Issues = "https://github.com/RealYHD/autoBIGS.cli/issues"
+Repository = "https://github.com/Syph-and-VPD-Lab/autoBIGS.cli"
+Issues = "https://github.com/Syph-and-VPD-Lab/autoBIGS.cli/issues"


 [project.scripts]
--- a/scripts/patch_recipe.py
+++ b/scripts/patch_recipe.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+
+import argparse
+from os import fdopen, path
+import os
+import re
+import shutil
+from sys import argv
+import tempfile
+
+INDENTATION = "  "
+GRAYSKULL_OUTPUT_PATH = "autoBIGS.cli"
+RUN_EXPORTED_VALUE = r'{{ pin_subpackage( name|lower|replace(".", "-"), max_pin="x.x") }}'
+LICENSE_SUFFIX = "-or-later"
+HOME_PAGE = "https://github.com/Syph-and-VPD-Lab/autoBIGS.cli"
+
+def _calc_indentation(line: str):
+    return len(re.findall(INDENTATION, line.split(line.strip())[0])) if line != "\n" else 0
+
+def read_grayskull_output():
+    original_recipe = path.abspath(GRAYSKULL_OUTPUT_PATH)
+    original_meta = path.join(original_recipe, "meta.yaml")
+    meta_file = open(original_meta)
+    lines = meta_file.readlines()
+    meta_file.close()
+    return lines
+
+def update_naming_scheme(lines):
+    modified_lines = []
+    for line in lines:
+        matches = re.finditer(r"\{\{\s*name\|lower()\s+\}\}", line)
+        modified_line = line
+        for match in matches:
+            modified_line = modified_line[:match.start(1)] + r'|replace(".", "-")' + modified_line[match.end(1):]
+        modified_lines.append(modified_line)
+    return modified_lines
+
+def inject_run_exports(lines: list[str]):
+    package_indent = False
+    modified_lines = []
+    for line in lines:
+        indentation_count = _calc_indentation(line)
+        if line == "build:\n" and indentation_count == 0:
+            package_indent = True
+            modified_lines.append(line)
+        elif package_indent and indentation_count == 0:
+            modified_lines.append(INDENTATION*1 + "run_exports:\n")
+            modified_lines.append(INDENTATION*2 + "- " + RUN_EXPORTED_VALUE + "\n")
+            package_indent = False
+        else:
+            modified_lines.append(line)
+    return modified_lines
+
+def suffix_license(lines: list[str]):
+    about_indent = False
+    modified_lines = []
+    for line in lines:
+        indentation_count = _calc_indentation(line)
+        if line == "about:\n" and indentation_count == 0:
+            about_indent = True
+            modified_lines.append(line)
+        elif about_indent and indentation_count == 1 and line.lstrip().startswith("license:"):
+            modified_lines.append(line.rstrip() + LICENSE_SUFFIX  + "\n")
+            about_indent = False
+        else:
+            modified_lines.append(line)
+    return modified_lines
+
+def inject_home_page(lines: list[str]):
+    about_indent = False
+    modified_lines = []
+    for line in lines:
+        indentation_count = _calc_indentation(line)
+        if line == "about:\n" and indentation_count == 0:
+            about_indent = True
+            modified_lines.append(line)
+        elif about_indent and indentation_count == 0:
+            modified_lines.append(INDENTATION + "home: " + HOME_PAGE + "\n")
+            about_indent = False
+        else:
+            modified_lines.append(line)
+    return modified_lines
+
+def write_to_original(lines: list[str]):
+    original_recipe = path.abspath(GRAYSKULL_OUTPUT_PATH)
+    original_meta = path.join(original_recipe, "meta.yaml")
+    with open(original_meta, "w") as file:
+        file.writelines(lines)
+
+def rename_recipe_dir():
+    new_recipe_name = path.abspath(path.join(GRAYSKULL_OUTPUT_PATH.replace(".", "-").lower()))
+    shutil.rmtree(new_recipe_name, ignore_errors=True)
+    os.replace(path.abspath(GRAYSKULL_OUTPUT_PATH), new_recipe_name)
+
+if __name__ == "__main__":
+    original_grayskull_out = read_grayskull_output()
+    modified_recipe_meta = None
+    modified_recipe_meta = update_naming_scheme(original_grayskull_out)
+    modified_recipe_meta = inject_run_exports(modified_recipe_meta)
+    modified_recipe_meta = suffix_license(modified_recipe_meta)
+    modified_recipe_meta = inject_home_page(modified_recipe_meta)
+    write_to_original(modified_recipe_meta)
+    rename_recipe_dir()
--- a/src/autobigs/cli/info.py
+++ b/src/autobigs/cli/info.py
@@ -1,6 +1,8 @@
 from argparse import ArgumentParser, Namespace
 import asyncio
-from autobigs.engine.data.remote.databases.bigsdb import BIGSdbIndex
+import csv
+from os import path
+from autobigs.engine.analysis.bigsdb import BIGSdbIndex

 def setup_parser(parser: ArgumentParser):
    parser.description = "Fetches the latest BIGSdb MLST database definitions."
@@ -24,22 +26,50 @@ def setup_parser(parser: ArgumentParser):
        help="Lists the known schema IDs for a given BIGSdb sequence definition database name. The name, and then the ID of the schema is given."
    )

+    parser.add_argument(
+        "--csv", "-o",
+        dest="csv_output",
+        required=False,
+        default=None,
+        help="Output list as CSV at a given path. A suffix is added depending on the action taken."
+    )
+
    parser.set_defaults(run=run_asynchronously)
    return parser

 async def run(args: Namespace):
    async with BIGSdbIndex() as bigsdb_index:
+        if args.list_dbs and len(args.list_bigsdb_schemas) > 0:
+            print("Cannot specify both database listing and schema listing, please choose one!")
+            exit(1)
+
        if args.list_dbs:
            known_seqdef_dbs = await bigsdb_index.get_known_seqdef_dbs(force=False)
-            print("\n".join(known_seqdef_dbs.keys()))
+            sorted_seqdef_dbs = [(name, source) for name, source in sorted(known_seqdef_dbs.items())]
+            print("The following are all known BIGS database names, and their source (sorted alphabetically):")
+            print("\n".join(["{0}: {1}".format(name, source) for name, source in sorted_seqdef_dbs]))
+            if args.csv_output:
+                with open(args.csv_output, "w") as csv_out_handle:
+                    writer = csv.writer(csv_out_handle)
+                    writer.writerow(("BIGSdb Names", "Source"))
+                    writer.writerows(sorted_seqdef_dbs)
+                    print("\nDatabase output written to {0}".format(args.csv_output))

        for bigsdb_schema_name in args.list_bigsdb_schemas:
            schemas = await bigsdb_index.get_schemas_for_seqdefdb(bigsdb_schema_name)
-            for schema_desc, schema_id in schemas.items():
-                print(f"{schema_desc}: {schema_id}")
+            sorted_schemas = [(name, id) for name, id in sorted(schemas.items())]
+            print("The following are the known schemas for \"{0}\", and their associated IDs:".format(bigsdb_schema_name))
+            print("\n".join(["{0}: {1}".format(name, id) for name, id in sorted_schemas]))
+            if args.csv_output:
+                with open(args.csv_output, "w") as csv_out_handle:
+                    writer = csv.writer(csv_out_handle)
+                    writer.writerow(("Name", "ID"))
+                    writer.writerows(sorted_schemas)
+                    print("\nSchema list output written to {0}".format(args.csv_output))

        if not (args.list_dbs or len(args.list_bigsdb_schemas) > 0):
-            print("Nothing to do. Try specifying \"-l\".")
+            print("Nothing to do. Try specifying \"-l\" for a list of known databases, or \"-h\" for more information.")
+            exit(1)

 def run_asynchronously(args: Namespace):
    asyncio.run(run(args))
--- a/src/autobigs/cli/program.py
+++ b/src/autobigs/cli/program.py
@@ -10,7 +10,7 @@ import importlib
 root_parser = argparse.ArgumentParser(epilog='Use "%(prog)s info -h" to learn how to get available MLST databases, and their available schemas.'
                                      + ' Once that is done, use "%(prog)s st -h" to learn how to retrieve MLST profiles.'
                                      )
-subparsers = root_parser.add_subparsers(required=True)
+subparsers = root_parser.add_subparsers(required=False)

 info.setup_parser(subparsers.add_parser(get_module_base_name(info.__name__)))
 st.setup_parser(subparsers.add_parser(get_module_base_name(st.__name__)))
@@ -33,6 +33,8 @@ def run():
              metadata.version("autoBIGS-engine")}.')
    if hasattr(args, "run"):
        args.run(args)
+    elif not args.version:
+        root_parser.print_usage()


 if __name__ == "__main__":
--- a/src/autobigs/cli/st.py
+++ b/src/autobigs/cli/st.py
@@ -2,9 +2,9 @@
 from argparse import ArgumentParser, Namespace
 import asyncio
 import datetime
-from autobigs.engine.data.local.csv import write_mlst_profiles_as_csv
-from autobigs.engine.data.local.fasta import read_multiple_fastas
-from autobigs.engine.data.remote.databases.bigsdb import BIGSdbIndex
+from autobigs.engine.writing import write_mlst_profiles_as_csv
+from autobigs.engine.reading import read_multiple_fastas
+from autobigs.engine.analysis.bigsdb import BIGSdbIndex


 def setup_parser(parser: ArgumentParser):
@@ -35,15 +35,6 @@ def setup_parser(parser: ArgumentParser):
        help="The output CSV name (.csv will be appended)."
    )

-    parser.add_argument(
-        "--exact", "-ex",
-        action="store_true",
-        dest="exact",
-        required=False,
-        default=False,
-        help="Should run exact matching rather than returning all similar ones"
-    )
-
    parser.add_argument(
        "--stop-on-fail", "-sof",
        action="store_true",
@@ -58,11 +49,11 @@ def setup_parser(parser: ArgumentParser):
 async def run(args: Namespace):
    async with BIGSdbIndex() as bigsdb_index:
        gen_strings = read_multiple_fastas(args.fastas)
-        async with await bigsdb_index.build_profiler_from_seqdefdb(args.seqdefdb, args.schema) as mlst_profiler:
-            mlst_profiles = mlst_profiler.profile_multiple_strings(gen_strings, exact=args.exact)
+        async with await bigsdb_index.build_profiler_from_seqdefdb(False, args.seqdefdb, args.schema) as mlst_profiler:
+            mlst_profiles = mlst_profiler.profile_multiple_strings(gen_strings, args.stop_on_fail)
            failed = await write_mlst_profiles_as_csv(mlst_profiles, args.out)
            if len(failed) > 0:
-                print(f"A total of {len(failed)} IDs failed:\n{"\n".join(failed)}")
+                print(f"A total of {len(failed)} IDs failed (no profile found):\n{"\n".join(failed)}")
            print(f"Completed fetching MLSTs for {len(args.fastas)} sequences.")

 def run_asynchronously(args):
Author	SHA1	Message	Date
Harrison Deng	19b23539b3	Added bioconda and personal conda repos to channels Some checks reported errors automlst.cli/pipeline/head Something is wrong with the build of this commit Details	2025-02-26 15:22:32 +00:00
Harrison Deng	dbd8238cef	Added recipe patching script Some checks failed automlst.cli/pipeline/head There was a failure building this commit Details	2025-02-21 14:14:02 +00:00
Harrison Deng	4b0fac0801	Added grayskull and curl to environment.yml Some checks failed automlst.cli/pipeline/head There was a failure building this commit Details	2025-02-21 06:51:20 +00:00
Harrison Deng	d78ae19c4f	Re-added pytest-cov to conda environment.yml Some checks failed automlst.cli/pipeline/head There was a failure building this commit Details	2025-02-21 06:40:20 +00:00
Harrison Deng	6b8376c470	Added publishing to personal git repo Some checks failed automlst.cli/pipeline/head There was a failure building this commit Details	2025-02-21 06:33:07 +00:00
Harrison Deng	a4d8de7cc6	Changing CSV argument to --csv or -o All checks were successful automlst.cli/pipeline/head This commit looks good Details automlst.cli/pipeline/tag This commit looks good Details	2025-02-19 19:57:15 +00:00
Harrison Deng	5ef5b6ac08	Updated pyproject.toml to use license text and updated repo All checks were successful automlst.cli/pipeline/head This commit looks good Details	2025-02-19 16:26:59 +00:00
Harrison Deng	3aa2916324	Updated pipeline to not publish to system repo if it's a tagged version All checks were successful automlst.cli/pipeline/head This commit looks good Details automlst.cli/pipeline/tag This commit looks good Details	2025-02-19 16:02:31 +00:00
Harrison Deng	af9c8c70b8	Stop on fail argument now works All checks were successful automlst.cli/pipeline/head This commit looks good Details	2025-02-19 15:50:18 +00:00
Harrison Deng	319edf36af	Added option to output database and schemas lists to CSV	2025-02-19 15:01:57 +00:00
Harrison Deng	43a17d698b	Updated readme to reflect recent changes and discuss versioning All checks were successful automlst.cli/pipeline/head This commit looks good Details	2025-02-18 19:16:39 +00:00
Harrison Deng	e2f19acd5a	Updated CLI to follow APIs specified by the 0.12.0 engine Some checks failed automlst.cli/pipeline/head This commit looks good Details automlst.cli/pipeline/tag There was a failure building this commit Details	2025-02-18 16:33:08 +00:00
Harrison Deng	1f6023b06b	Bumped engine version requirement to version 0.11.0	2025-02-18 14:53:21 +00:00
Harrison Deng	9100f83390	Removed unused file from repository	2025-02-13 21:56:38 +00:00
Harrison Deng	419aa36e9d	Removed unused parameter for typing	2025-02-13 21:56:20 +00:00
Harrison Deng	ca28068477	Made database name list predictable (sorted)	2025-02-13 21:56:06 +00:00
Harrison Deng	32dcfd99f8	Updated to reflect changes in Engine 0.10.*	2025-02-12 21:54:34 +00:00
Harrison Deng	4eca35a556	Fixed --version not showing	2025-02-11 16:56:04 +00:00
Harrison Deng	81d63bc54d	Updated private git credentials and now fails CI publishing steps	2025-01-24 21:01:14 +00:00