Initial attempt at switching to a conda based build environment

Infrastructure for concurrent processing implemented
Writing now uses named MLST profile
2025-02-21 05:37:56 +00:00 · 2025-02-19 15:49:46 +00:00 · 2025-02-18 16:03:17 +00:00 · 2025-02-18 15:34:18 +00:00 · 2025-02-14 20:47:06 +00:00
16 changed files with 261 additions and 206 deletions
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -0,0 +1,11 @@
 FROM mcr.microsoft.com/devcontainers/anaconda:1-3
 # Copy environment.yml (if found) to a temp location so we update the environment. Also
 # copy "noop.txt" so the COPY instruction does not fail if no environment.yml exists.
 COPY environment.yml* .devcontainer/noop.txt /tmp/conda-tmp/
 RUN if [ -f "/tmp/conda-tmp/environment.yml" ]; then umask 0002 && /opt/conda/bin/conda env update -n base -f /tmp/conda-tmp/environment.yml; fi \
    && rm -rf /tmp/conda-tmp
 # [Optional] Uncomment this section to install additional OS packages.
 # RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
 #     && apt-get -y install --no-install-recommends <your-package-list-here>
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -1,9 +1,11 @@
 // For format details, see https://aka.ms/devcontainer.json. For config options, see the
-// README at: https://github.com/devcontainers/templates/tree/main/src/python
+// README at: https://github.com/devcontainers/templates/tree/main/src/anaconda
 {
-	"name": "Python 3",
+	"name": "Anaconda (Python 3)",
-	// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
+	"build": { 
-	"image": "mcr.microsoft.com/devcontainers/python:1-3.12-bullseye",
+		"context": "..",
 		"dockerfile": "Dockerfile"
 	}
 	// Features to add to the dev container. More info: https://containers.dev/features.
 	// "features": {},
@@ -12,14 +14,7 @@
 	// "forwardPorts": [],
 	// Use 'postCreateCommand' to run commands after the container is created.
-	"postCreateCommand": "pip3 install --user -r requirements.txt",
+	// "postCreateCommand": "python --version",
 	"customizations": {
 		"vscode": {
 			"extensions": [
 				"mechatroner.rainbow-csv"
 			]
 		}
 	}
 	// Configure tool-specific properties.
 	// "customizations": {},
--- a/.devcontainer/noop.txt
+++ b/.devcontainer/noop.txt
@@ -0,0 +1,3 @@
 This file copied into the container along with environment.yml* from the parent
 folder. This file is included to prevents the Dockerfile COPY instruction from 
 failing if no environment.yml is found.
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,6 @@
 # File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig
-# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,svelte,python,linux,node
+# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,linux,python
-# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,svelte,python,linux,node
+# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,linux,python
 ### Linux ###
 *~
@@ -17,146 +17,6 @@
 # .nfs files are created when an open file is removed but is still being accessed
 .nfs*
 ### Node ###
 # Logs
 logs
 *.log
 npm-debug.log*
 yarn-debug.log*
 yarn-error.log*
 lerna-debug.log*
 .pnpm-debug.log*
 # Diagnostic reports (https://nodejs.org/api/report.html)
 report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
 # Runtime data
 pids
 *.pid
 *.seed
 *.pid.lock
 # Directory for instrumented libs generated by jscoverage/JSCover
 lib-cov
 # Coverage directory used by tools like istanbul
 coverage
 *.lcov
 # nyc test coverage
 .nyc_output
 # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
 .grunt
 # Bower dependency directory (https://bower.io/)
 bower_components
 # node-waf configuration
 .lock-wscript
 # Compiled binary addons (https://nodejs.org/api/addons.html)
 build/Release
 # Dependency directories
 node_modules/
 jspm_packages/
 # Snowpack dependency directory (https://snowpack.dev/)
 web_modules/
 # TypeScript cache
 *.tsbuildinfo
 # Optional npm cache directory
 .npm
 # Optional eslint cache
 .eslintcache
 # Optional stylelint cache
 .stylelintcache
 # Microbundle cache
 .rpt2_cache/
 .rts2_cache_cjs/
 .rts2_cache_es/
 .rts2_cache_umd/
 # Optional REPL history
 .node_repl_history
 # Output of 'npm pack'
 *.tgz
 # Yarn Integrity file
 .yarn-integrity
 # dotenv environment variable files
 .env
 .env.development.local
 .env.test.local
 .env.production.local
 .env.local
 # parcel-bundler cache (https://parceljs.org/)
 .cache
 .parcel-cache
 # Next.js build output
 .next
 out
 # Nuxt.js build / generate output
 .nuxt
 dist
 # Gatsby files
 .cache/
 # Comment in the public line in if your project uses Gatsby and not Next.js
 # https://nextjs.org/blog/next-9-1#public-directory-support
 # public
 # vuepress build output
 .vuepress/dist
 # vuepress v2.x temp and cache directory
 .temp
 # Docusaurus cache and generated files
 .docusaurus
 # Serverless directories
 .serverless/
 # FuseBox cache
 .fusebox/
 # DynamoDB Local files
 .dynamodb/
 # TernJS port file
 .tern-port
 # Stores VSCode versions used for testing VSCode extensions
 .vscode-test
 # yarn v2
 .yarn/cache
 .yarn/unplugged
 .yarn/build-state.yml
 .yarn/install-state.gz
 .pnp.*
 ### Node Patch ###
 # Serverless Webpack directories
 .webpack/
 # Optional stylelint cache
 # SvelteKit build / generate output
 .svelte-kit
 ### Python ###
 # Byte-compiled / optimized / DLL files
 __pycache__/
@@ -202,6 +62,7 @@ htmlcov/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
@@ -215,6 +76,7 @@ cover/
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
@@ -278,6 +140,7 @@ celerybeat.pid
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
@@ -326,13 +189,6 @@ poetry.toml
 # LSP config files
 pyrightconfig.json
 ### Svelte ###
 # gitignore template for the SvelteKit, frontend web component framework
 # website: https://kit.svelte.dev/
 .svelte-kit/
 package
 ### VisualStudioCode ###
 .vscode/*
 !.vscode/settings.json
@@ -352,9 +208,8 @@ package
 .history
 .ionide
-# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,svelte,python,linux,node
+# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,linux,python
 # Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
-output
+conda-bld
 *.private.*
--- a/.vscode/extensions.json
+++ b/.vscode/extensions.json
@@ -0,0 +1,5 @@
 {
    "recommendations": [
        "piotrpalarz.vscode-gitignore-generator"
    ]
 }
--- a/10
+++ b/10
@@ -9,7 +9,7 @@ pipeline {
    stages {
        stage("install") {
            steps {
-                sh 'python -m pip install -r requirements.txt'
+                sh 'conda env update -n base -f environment.yml'
            }
        }
        stage("unit tests") {
@@ -22,11 +22,14 @@ pipeline {
        stage("build") {
            steps {
                sh "python -m build"
                sh "grayskull pypi dist/*.tar.gz --maintainers 'Harrison Deng'"
                sh "python scripts/patch_recipe.py"
                sh 'conda build autobigs-engine -c bioconda --output-folder conda-bld --verify'
            }
        }
        stage("archive") {
            steps {
-                archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl', fingerprint: true, followSymlinks: false, onlyIfSuccessful: true
+                archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl, conda-bld/**/*.conda', fingerprint: true, followSymlinks: false, onlyIfSuccessful: true
            }
        }
        stage("publish") {
@@ -36,7 +39,8 @@ pipeline {
                        CREDS = credentials('username-password-rs-git')
                    }
                    steps {
-                        sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*'
+                        sh 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*'
                        sh 'curl --user ${CREDS_USR}:${CRED_PSW} --upload-file conda-bld/**/*.conda https://git.reslate.systems/api/packages/${CRED_USR}/conda/$(basename conda-bld/**/*.conda)'
                    }
                }
                stage ("pypi.org") {
--- a/autobigs-engine/meta.yaml
+++ b/autobigs-engine/meta.yaml
@@ -0,0 +1,44 @@
 {% set name = "autoBIGS.engine" %}
 {% set version = "0.12.1.dev1+gb8cebb8.d20250221" %}
 package:
  name: {{ name|lower|replace(".", "-") }}
  version: {{ version }}
 source:
  url: file:///workspaces/autoBIGS.engine/dist/autobigs_engine-0.12.1.dev1%2Bgb8cebb8.d20250221.tar.gz
  sha256: c86441b94f935cfa414ff28ca4c026a070e0fb15988ea3bb7d1a942859a09b16
 build:
  noarch: python
  script: {{ PYTHON }} -m pip install . -vv --no-deps --no-build-isolation
  number: 0
  run_exports:
    - {{ pin_subpackage( name|lower|replace(".", "-"), max_pin="x.x") }}
 requirements:
  host:
    - python >=3.12
    - setuptools >=64
    - setuptools-scm >=8
    - pip
  run:
    - python >=3.12
    - biopython ==1.85
    - aiohttp ==3.11.*
 test:
  imports:
    - autobigs
  commands:
    - pip check
  requires:
    - pip
 about:
  summary: A library to rapidly fetch fetch MLST profiles given sequences for various diseases.
  license: GPL-3.0-or-later
  license_file: LICENSE
  home: https://github.com/Syph-and-VPD-Lab/autoBIGS.engine
 extra:
  recipe-maintainers:
    - Harrison Deng
--- a/environment.yml
+++ b/environment.yml
@@ -0,0 +1,15 @@
 name: ci
 channels:
  - bioconda
  - conda-forge
 dependencies:
  - aiohttp==3.11.*
  - biopython==1.85
  - pytest
  - pytest-asyncio
  - python-build
  - conda-build
  - twine
  - setuptools_scm
  - pytest-cov
  - grayskull
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,6 +13,7 @@ dependencies = [
 ]
 requires-python = ">=3.12"
 description = "A library to rapidly fetch fetch MLST profiles given sequences for various diseases."
 license = {text = "GPL-3.0-or-later"}
 [project.urls]
 Homepage = "https://github.com/Syph-and-VPD-Lab/autoBIGS.engine"
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +0,0 @@
 aiohttp[speedups]==3.11.*
 biopython==1.85
 pytest
 pytest-asyncio
 build
 twine
 setuptools_scm
 pytest-cov
--- a/scripts/patch_recipe.py
+++ b/scripts/patch_recipe.py
@@ -0,0 +1,103 @@
 #!/usr/bin/env python3
 import argparse
 from os import fdopen, path
 import os
 import re
 import shutil
 from sys import argv
 import tempfile
 INDENTATION = "  "
 GRAYSKULL_OUTPUT_PATH = "autoBIGS.engine"
 RUN_EXPORTED_VALUE = r'{{ pin_subpackage( name|lower|replace(".", "-"), max_pin="x.x") }}'
 LICENSE_SUFFIX = "-or-later"
 HOME_PAGE = "https://github.com/Syph-and-VPD-Lab/autoBIGS.engine"
 def _calc_indentation(line: str):
    return len(re.findall(INDENTATION, line.split(line.strip())[0])) if line != "\n" else 0
 def read_grayskull_output():
    original_recipe = path.abspath(GRAYSKULL_OUTPUT_PATH)
    original_meta = path.join(original_recipe, "meta.yaml")
    meta_file = open(original_meta)
    lines = meta_file.readlines()
    meta_file.close()
    return lines
 def update_naming_scheme(lines):
    modified_lines = []
    for line in lines:
        matches = re.finditer(r"\{\{\s*name\|lower()\s+\}\}", line)
        modified_line = line
        for match in matches:
            modified_line = modified_line[:match.start(1)] + r'|replace(".", "-")' + modified_line[match.end(1):]
        modified_lines.append(modified_line)
    return modified_lines
 def inject_run_exports(lines: list[str]):
    package_indent = False
    modified_lines = []
    for line in lines:
        indentation_count = _calc_indentation(line)
        if line == "build:\n" and indentation_count == 0:
            package_indent = True
            modified_lines.append(line)
        elif package_indent and indentation_count == 0:
            modified_lines.append(INDENTATION*1 + "run_exports:\n")
            modified_lines.append(INDENTATION*2 + "- " + RUN_EXPORTED_VALUE + "\n")
            package_indent = False
        else:
            modified_lines.append(line)
    return modified_lines
 def suffix_license(lines: list[str]):
    about_indent = False
    modified_lines = []
    for line in lines:
        indentation_count = _calc_indentation(line)
        if line == "about:\n" and indentation_count == 0:
            about_indent = True
            modified_lines.append(line)
        elif about_indent and indentation_count == 1 and line.lstrip().startswith("license:"):
            modified_lines.append(line.rstrip() + LICENSE_SUFFIX  + "\n")
            about_indent = False
        else:
            modified_lines.append(line)
    return modified_lines
 def inject_home_page(lines: list[str]):
    about_indent = False
    modified_lines = []
    for line in lines:
        indentation_count = _calc_indentation(line)
        if line == "about:\n" and indentation_count == 0:
            about_indent = True
            modified_lines.append(line)
        elif about_indent and indentation_count == 0:
            modified_lines.append(INDENTATION + "home: " + HOME_PAGE + "\n")
            about_indent = False
        else:
            modified_lines.append(line)
    return modified_lines
 def write_to_original(lines: list[str]):
    original_recipe = path.abspath(GRAYSKULL_OUTPUT_PATH)
    original_meta = path.join(original_recipe, "meta.yaml")
    with open(original_meta, "w") as file:
        file.writelines(lines)
 def rename_recipe_dir():
    new_recipe_name = path.abspath(path.join(GRAYSKULL_OUTPUT_PATH.replace(".", "-").lower()))
    shutil.rmtree(new_recipe_name, ignore_errors=True)
    os.replace(path.abspath(GRAYSKULL_OUTPUT_PATH), new_recipe_name)
 if __name__ == "__main__":
    original_grayskull_out = read_grayskull_output()
    modified_recipe_meta = None
    modified_recipe_meta = update_naming_scheme(original_grayskull_out)
    modified_recipe_meta = inject_run_exports(modified_recipe_meta)
    modified_recipe_meta = suffix_license(modified_recipe_meta)
    modified_recipe_meta = inject_home_page(modified_recipe_meta)
    write_to_original(modified_recipe_meta)
    rename_recipe_dir()
--- a/src/autobigs/engine/analysis/bigsdb.py
+++ b/src/autobigs/engine/analysis/bigsdb.py
@@ -22,15 +22,15 @@ from Bio.Align import PairwiseAligner
 class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
    @abstractmethod
-    def determine_mlst_allele_variants(self, query_sequence_strings: Iterable[str]) -> AsyncGenerator[Allele, Any]:
+    def determine_mlst_allele_variants(self, query_sequence_strings: Union[Iterable[Union[NamedString, str]], Union[NamedString, str]]) -> AsyncGenerator[Union[Allele, tuple[str, Allele]], Any]:
        pass
    @abstractmethod
-    async def determine_mlst_st(self, alleles: Union[AsyncIterable[Allele], Iterable[Allele]]) -> MLSTProfile:
+    async def determine_mlst_st(self, alleles: Union[AsyncIterable[Union[Allele, tuple[str, Allele]]], Iterable[Union[Allele, tuple[str, Allele]]]]) -> Union[MLSTProfile, NamedMLSTProfile]:
        pass
    @abstractmethod
-    async def profile_string(self, query_sequence_strings: Iterable[str]) -> MLSTProfile:
+    async def profile_string(self, query_sequence_strings: Iterable[Union[NamedString, str]]) -> Union[NamedMLSTProfile, MLSTProfile]:
        pass
    @abstractmethod
@@ -52,14 +52,14 @@ class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
    async def __aenter__(self):
        return self
-    async def determine_mlst_allele_variants(self, query_sequence_strings: Union[Iterable[str], str]) -> AsyncGenerator[Allele, Any]:
+    async def determine_mlst_allele_variants(self, query_sequence_strings: Union[Iterable[Union[NamedString, str]], Union[NamedString, str]]) -> AsyncGenerator[Union[Allele, tuple[str, Allele]], Any]:
        # See https://bigsdb.pasteur.fr/api/db/pubmlst_bordetella_seqdef/schemes
        uri_path = "sequence"
-        if isinstance(query_sequence_strings, str):
+        if isinstance(query_sequence_strings, str) or isinstance(query_sequence_strings, NamedString):
            query_sequence_strings = [query_sequence_strings]
        for sequence_string in query_sequence_strings:
            async with self._http_client.post(uri_path, json={
-                "sequence": sequence_string,
+                "sequence": sequence_string if isinstance(sequence_string, str) else sequence_string.sequence,
                "partial_matches": True
            }) as response:
                sequence_response: dict = await response.json()
@@ -70,7 +70,8 @@ class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
                    for allele_loci, alleles in exact_matches.items():
                        for allele in alleles:
                            alelle_id = allele["allele_id"]
-                            yield Allele(allele_locus=allele_loci, allele_variant=alelle_id, partial_match_profile=None)
+                            result_allele = Allele(allele_locus=allele_loci, allele_variant=alelle_id, partial_match_profile=None)
                            yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
                elif "partial_matches" in sequence_response:
                    partial_matches: dict[str, dict[str, Union[str, float, int]]] = sequence_response["partial_matches"] 
                    for allele_loci, partial_match in partial_matches.items():
@@ -82,23 +83,33 @@ class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
                            gaps=int(partial_match["gaps"]),
                            match_metric=int(partial_match["bitscore"])
                        )
-                        yield Allele(
+                        result_allele = Allele(
                            allele_locus=allele_loci,
                            allele_variant=str(partial_match["allele"]),
                            partial_match_profile=partial_match_profile
                        )
                        yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
                else:
-                    raise NoBIGSdbMatchesException(self._database_name, self._schema_id)
+                    raise NoBIGSdbMatchesException(self._database_name, self._schema_id, sequence_string.name if isinstance(sequence_string, NamedString) else None)
-    async def determine_mlst_st(self, alleles: Union[AsyncIterable[Allele], Iterable[Allele]]) -> MLSTProfile:
+    async def determine_mlst_st(self, alleles: Union[AsyncIterable[Union[Allele, tuple[str, Allele]]], Iterable[Union[Allele, tuple[str, Allele]]]]) -> Union[MLSTProfile, NamedMLSTProfile]:
        uri_path = "designations"
        allele_request_dict: dict[str, list[dict[str, str]]] = defaultdict(list)
        names_list = []
        def insert_allele_to_request_dict(allele: Union[Allele, tuple[str, Allele]]):
            if isinstance(allele, Allele):
                allele_val = allele
            else:
                allele_val = allele[1]
                names_list.append(allele[0])
            allele_request_dict[allele_val.allele_locus].append({"allele": str(allele_val.allele_variant)})
        if isinstance(alleles, AsyncIterable):
            async for allele in alleles:
-                allele_request_dict[allele.allele_locus].append({"allele": str(allele.allele_variant)})
+                insert_allele_to_request_dict(allele)
        else:
            for allele in alleles:
-                allele_request_dict[allele.allele_locus].append({"allele": str(allele.allele_variant)})
+                insert_allele_to_request_dict(allele)
        request_json = {
            "designations": allele_request_dict
        }
@@ -111,26 +122,33 @@ class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
            schema_fields_returned.setdefault("clonal_complex", "unknown")
            schema_exact_matches: dict = response_json["exact_matches"]
            for exact_match_locus, exact_match_alleles in schema_exact_matches.items():
                if len(exact_match_alleles) > 1:
                    raise ValueError(f"Unexpected number of alleles returned for exact match (Expected 1, retrieved {len(exact_match_alleles)})")
                allele_set.add(Allele(exact_match_locus, exact_match_alleles[0]["allele_id"], None))
            if len(allele_set) == 0:
                raise ValueError("Passed in no alleles.")
-            return MLSTProfile(allele_set, schema_fields_returned["ST"], schema_fields_returned["clonal_complex"])
+            result_mlst_profile = MLSTProfile(allele_set, schema_fields_returned["ST"], schema_fields_returned["clonal_complex"])
            if len(names_list) > 0:
                result_mlst_profile = NamedMLSTProfile(str(tuple(names_list)), result_mlst_profile)
            return result_mlst_profile
-    async def profile_string(self, query_sequence_strings: Iterable[str]) -> MLSTProfile:
+    async def profile_string(self, query_sequence_strings: Iterable[Union[NamedString, str]]) -> Union[NamedMLSTProfile, MLSTProfile]:
        alleles = self.determine_mlst_allele_variants(query_sequence_strings)
        return await self.determine_mlst_st(alleles)
    async def profile_multiple_strings(self, query_named_string_groups: AsyncIterable[Iterable[NamedString]], stop_on_fail: bool = False) -> AsyncGenerator[NamedMLSTProfile, Any]:
        tasks = []
        async for named_strings in query_named_string_groups:
-            for named_string in named_strings:
+            tasks.append(self.profile_string(named_strings))
            for task in asyncio.as_completed(tasks):
                try:
-                    yield NamedMLSTProfile(named_string.name, (await self.profile_string([named_string.sequence])))
+                    yield await task
                except NoBIGSdbMatchesException as e:
                    if stop_on_fail:
                        raise e
-                    yield NamedMLSTProfile(named_string.name, None)
+                    causal_name = e.get_causal_query_name()
                    if causal_name is None:
                        raise ValueError("Missing query name despite requiring names.")
                    else:
                        yield NamedMLSTProfile(causal_name, None)
    async def close(self):
        await self._http_client.close()
--- a/src/autobigs/engine/exceptions/database.py
+++ b/src/autobigs/engine/exceptions/database.py
@@ -5,8 +5,12 @@ class BIGSDbDatabaseAPIException(Exception):
 class NoBIGSdbMatchesException(BIGSDbDatabaseAPIException):
-    def __init__(self, database_name: str, database_schema_id: int, *args):
+    def __init__(self, database_name: str, database_schema_id: int, query_name: Union[None, str], *args):
        self._query_name = query_name
        super().__init__(f"No matches found with schema with ID {database_schema_id}  in the database \"{database_name}\".", *args)
    def get_causal_query_name(self) -> Union[str, None]:
        return self._query_name
 class NoBIGSdbExactMatchesException(NoBIGSdbMatchesException):
    def __init__(self, database_name: str, database_schema_id: int, *args):
--- a/src/autobigs/engine/reading.py
+++ b/src/autobigs/engine/reading.py
@@ -13,5 +13,8 @@ async def read_fasta(handle: Union[str, TextIOWrapper]) -> Iterable[NamedString]
    return results
 async def read_multiple_fastas(handles: Iterable[Union[str, TextIOWrapper]]) -> AsyncGenerator[Iterable[NamedString], Any]:
    tasks = []
    for handle in handles:
-        yield await read_fasta(handle)
+        tasks.append(read_fasta(handle))
    for task in asyncio.as_completed(tasks):
        yield await task
--- a/src/autobigs/engine/writing.py
+++ b/src/autobigs/engine/writing.py
@@ -3,7 +3,7 @@ import csv
 from os import PathLike
 from typing import AsyncIterable, Collection, Mapping, Sequence, Union
-from autobigs.engine.structures.mlst import Allele, MLSTProfile
+from autobigs.engine.structures.mlst import Allele, MLSTProfile, NamedMLSTProfile
 def alleles_to_text_map(alleles: Collection[Allele]) -> Mapping[str, Union[Sequence[str], str]]:
@@ -17,12 +17,14 @@ def alleles_to_text_map(alleles: Collection[Allele]) -> Mapping[str, Union[Seque
            result[locus] = tuple(result[locus]) # type: ignore
    return dict(result)
-async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[tuple[str, Union[MLSTProfile, None]]], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]) -> Sequence[str]:
+async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[NamedMLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]) -> Sequence[str]:
    failed = list()
    with open(handle, "w", newline='') as filehandle:
        header = None
        writer: Union[csv.DictWriter, None] = None
-        async for name, mlst_profile in mlst_profiles_iterable:
+        async for named_mlst_profile in mlst_profiles_iterable:
            name = named_mlst_profile.name
            mlst_profile = named_mlst_profile.mlst_profile
            if mlst_profile is None:
                failed.append(name)
                continue
--- a/tests/autobigs/engine/test_writing.py
+++ b/tests/autobigs/engine/test_writing.py
@@ -3,7 +3,7 @@ from typing import AsyncIterable, Iterable
 import pytest
 from autobigs.engine.structures.alignment import AlignmentStats
 from autobigs.engine.writing import alleles_to_text_map, write_mlst_profiles_as_csv
-from autobigs.engine.structures.mlst import Allele, MLSTProfile
+from autobigs.engine.structures.mlst import Allele, MLSTProfile, NamedMLSTProfile
 import tempfile
 from csv import reader
 from os import path
@@ -11,20 +11,20 @@ from os import path
@pytest.fixture
 def dummy_alphabet_mlst_profile():
-    return MLSTProfile((
+    return NamedMLSTProfile("name", MLSTProfile((
        Allele("A", "1", None),
        Allele("D", "1", None),
        Allele("B", "1", None),
        Allele("C", "1", None),
        Allele("C", "2", AlignmentStats(90, 10, 0, 90))
-    ), "mysterious", "very mysterious")
+    ), "mysterious", "very mysterious"))
 async def iterable_to_asynciterable(iterable: Iterable):
    for iterated in iterable:
        yield iterated
 async def test_column_order_is_same_as_expected_file(dummy_alphabet_mlst_profile: MLSTProfile):
-    dummy_profiles = [("test_1", dummy_alphabet_mlst_profile)]
+    dummy_profiles = [dummy_alphabet_mlst_profile]
    with tempfile.TemporaryDirectory() as temp_dir:
        output_path = path.join(temp_dir, "out.csv")
        await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path)
@@ -34,8 +34,8 @@ async def test_column_order_is_same_as_expected_file(dummy_alphabet_mlst_profile
            target_columns = lines[4:]
            assert target_columns == sorted(target_columns)
-async def test_alleles_to_text_map_mapping_is_correct(dummy_alphabet_mlst_profile: MLSTProfile):
+async def test_alleles_to_text_map_mapping_is_correct(dummy_alphabet_mlst_profile: NamedMLSTProfile):
-    mapping = alleles_to_text_map(dummy_alphabet_mlst_profile.alleles)
+    mapping = alleles_to_text_map(dummy_alphabet_mlst_profile.mlst_profile.alleles) # type: ignore
    expected_mapping = {
        "A": "1",
        "B": "1",
@@ -44,4 +44,4 @@ async def test_alleles_to_text_map_mapping_is_correct(dummy_alphabet_mlst_profil
    }
    for allele_name, allele_ids in mapping.items():
        assert allele_name in expected_mapping
-        assert allele_ids == expected_mapping[allele_name]
+        assert allele_ids == expected_mapping[allele_name]
Author	SHA1	Message	Date
Harrison Deng	2822a483e3	Initial attempt at switching to a conda based build environment Some checks failed autoBIGS.engine/pipeline/head There was a failure building this commit Details	2025-02-21 05:37:56 +00:00
Harrison Deng	b8cebb8ba4	Infrastructure for concurrent processing implemented All checks were successful autoBIGS.engine/pipeline/head This commit looks good Details	2025-02-19 15:49:46 +00:00
Harrison Deng	7384895578	Writing now uses named MLST profile All checks were successful automlst.engine/pipeline/head This commit looks good Details automlst.engine/pipeline/tag This commit looks good Details	2025-02-18 16:03:17 +00:00
Harrison Deng	5a03c7e8d8	Multiple string profiling now respects grouped queries (for non-WGS) All checks were successful automlst.engine/pipeline/head This commit looks good Details	2025-02-18 15:34:18 +00:00
Harrison Deng	ddf9cde175	Added a license text to pyproject.toml	2025-02-14 20:47:06 +00:00