Twine version specified to 6.0.1 to avoid Twine issue 15611

Changed requested kubernetes container to be miniforge
Initial attempt at switching to a conda based build environment
2025-02-21 05:53:08 +00:00 · 2025-02-21 05:52:34 +00:00 · 2025-02-21 05:37:56 +00:00 · 2025-02-19 15:49:46 +00:00 · 2025-02-18 16:03:17 +00:00 · 2025-02-18 15:34:18 +00:00
38 changed files with 56820 additions and 24333 deletions
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -0,0 +1,11 @@
 FROM mcr.microsoft.com/devcontainers/anaconda:1-3
 # Copy environment.yml (if found) to a temp location so we update the environment. Also
 # copy "noop.txt" so the COPY instruction does not fail if no environment.yml exists.
 COPY environment.yml* .devcontainer/noop.txt /tmp/conda-tmp/
 RUN if [ -f "/tmp/conda-tmp/environment.yml" ]; then umask 0002 && /opt/conda/bin/conda env update -n base -f /tmp/conda-tmp/environment.yml; fi \
    && rm -rf /tmp/conda-tmp
 # [Optional] Uncomment this section to install additional OS packages.
 # RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
 #     && apt-get -y install --no-install-recommends <your-package-list-here>
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -1,9 +1,11 @@
 // For format details, see https://aka.ms/devcontainer.json. For config options, see the
-// README at: https://github.com/devcontainers/templates/tree/main/src/python
+// README at: https://github.com/devcontainers/templates/tree/main/src/anaconda
 {
-	"name": "Python 3",
+	"name": "Anaconda (Python 3)",
-	// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
+	"build": { 
-	"image": "mcr.microsoft.com/devcontainers/python:1-3.12-bullseye",
+		"context": "..",
 		"dockerfile": "Dockerfile"
 	}
 	// Features to add to the dev container. More info: https://containers.dev/features.
 	// "features": {},
@@ -12,14 +14,7 @@
 	// "forwardPorts": [],
 	// Use 'postCreateCommand' to run commands after the container is created.
-	"postCreateCommand": "pip3 install --user -r requirements.txt",
+	// "postCreateCommand": "python --version",
 	"customizations": {
 		"vscode": {
 			"extensions": [
 				"mechatroner.rainbow-csv"
 			]
 		}
 	}
 	// Configure tool-specific properties.
 	// "customizations": {},
--- a/.devcontainer/noop.txt
+++ b/.devcontainer/noop.txt
@@ -0,0 +1,3 @@
 This file copied into the container along with environment.yml* from the parent
 folder. This file is included to prevents the Dockerfile COPY instruction from 
 failing if no environment.yml is found.
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,6 @@
 # File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig
-# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,svelte,python,linux,node
+# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,linux,python
-# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,svelte,python,linux,node
+# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,linux,python
 ### Linux ###
 *~
@@ -17,146 +17,6 @@
 # .nfs files are created when an open file is removed but is still being accessed
 .nfs*
 ### Node ###
 # Logs
 logs
 *.log
 npm-debug.log*
 yarn-debug.log*
 yarn-error.log*
 lerna-debug.log*
 .pnpm-debug.log*
 # Diagnostic reports (https://nodejs.org/api/report.html)
 report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
 # Runtime data
 pids
 *.pid
 *.seed
 *.pid.lock
 # Directory for instrumented libs generated by jscoverage/JSCover
 lib-cov
 # Coverage directory used by tools like istanbul
 coverage
 *.lcov
 # nyc test coverage
 .nyc_output
 # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
 .grunt
 # Bower dependency directory (https://bower.io/)
 bower_components
 # node-waf configuration
 .lock-wscript
 # Compiled binary addons (https://nodejs.org/api/addons.html)
 build/Release
 # Dependency directories
 node_modules/
 jspm_packages/
 # Snowpack dependency directory (https://snowpack.dev/)
 web_modules/
 # TypeScript cache
 *.tsbuildinfo
 # Optional npm cache directory
 .npm
 # Optional eslint cache
 .eslintcache
 # Optional stylelint cache
 .stylelintcache
 # Microbundle cache
 .rpt2_cache/
 .rts2_cache_cjs/
 .rts2_cache_es/
 .rts2_cache_umd/
 # Optional REPL history
 .node_repl_history
 # Output of 'npm pack'
 *.tgz
 # Yarn Integrity file
 .yarn-integrity
 # dotenv environment variable files
 .env
 .env.development.local
 .env.test.local
 .env.production.local
 .env.local
 # parcel-bundler cache (https://parceljs.org/)
 .cache
 .parcel-cache
 # Next.js build output
 .next
 out
 # Nuxt.js build / generate output
 .nuxt
 dist
 # Gatsby files
 .cache/
 # Comment in the public line in if your project uses Gatsby and not Next.js
 # https://nextjs.org/blog/next-9-1#public-directory-support
 # public
 # vuepress build output
 .vuepress/dist
 # vuepress v2.x temp and cache directory
 .temp
 # Docusaurus cache and generated files
 .docusaurus
 # Serverless directories
 .serverless/
 # FuseBox cache
 .fusebox/
 # DynamoDB Local files
 .dynamodb/
 # TernJS port file
 .tern-port
 # Stores VSCode versions used for testing VSCode extensions
 .vscode-test
 # yarn v2
 .yarn/cache
 .yarn/unplugged
 .yarn/build-state.yml
 .yarn/install-state.gz
 .pnp.*
 ### Node Patch ###
 # Serverless Webpack directories
 .webpack/
 # Optional stylelint cache
 # SvelteKit build / generate output
 .svelte-kit
 ### Python ###
 # Byte-compiled / optimized / DLL files
 __pycache__/
@@ -202,6 +62,7 @@ htmlcov/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
@@ -215,6 +76,7 @@ cover/
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
@@ -278,6 +140,7 @@ celerybeat.pid
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
@@ -326,13 +189,6 @@ poetry.toml
 # LSP config files
 pyrightconfig.json
 ### Svelte ###
 # gitignore template for the SvelteKit, frontend web component framework
 # website: https://kit.svelte.dev/
 .svelte-kit/
 package
 ### VisualStudioCode ###
 .vscode/*
 !.vscode/settings.json
@@ -352,9 +208,8 @@ package
 .history
 .ionide
-# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,svelte,python,linux,node
+# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,linux,python
 # Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
-output
+conda-bld
 *.private.*
--- a/.vscode/extensions.json
+++ b/.vscode/extensions.json
@@ -0,0 +1,5 @@
 {
    "recommendations": [
        "piotrpalarz.vscode-gitignore-generator"
    ]
 }
--- a/14
+++ b/14
@@ -2,14 +2,14 @@ pipeline {
    agent {
        kubernetes {
            cloud 'rsys-devel'
-            defaultContainer 'pip'
+            defaultContainer 'miniforge3'
-            inheritFrom 'pip'
+            inheritFrom 'miniforge'
        }
    }
    stages {
        stage("install") {
            steps {
-                sh 'python -m pip install -r requirements.txt'
+                sh 'conda env update -n base -f environment.yml'
            }
        }
        stage("unit tests") {
@@ -22,11 +22,14 @@ pipeline {
        stage("build") {
            steps {
                sh "python -m build"
                sh "grayskull pypi dist/*.tar.gz --maintainers 'Harrison Deng'"
                sh "python scripts/patch_recipe.py"
                sh 'conda build autobigs-engine -c bioconda --output-folder conda-bld --verify'
            }
        }
        stage("archive") {
            steps {
-                archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl', fingerprint: true, followSymlinks: false, onlyIfSuccessful: true
+                archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl, conda-bld/**/*.conda', fingerprint: true, followSymlinks: false, onlyIfSuccessful: true
            }
        }
        stage("publish") {
@@ -36,7 +39,8 @@ pipeline {
                        CREDS = credentials('username-password-rs-git')
                    }
                    steps {
-                        sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*'
+                        sh 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*'
                        sh 'curl --user ${CREDS_USR}:${CRED_PSW} --upload-file conda-bld/**/*.conda https://git.reslate.systems/api/packages/${CRED_USR}/conda/$(basename conda-bld/**/*.conda)'
                    }
                }
                stage ("pypi.org") {
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # autoBIGS.Engine
-A python library implementing common BIGSdb MLST schemes and databases. Implementation follows the RESTful API outlined by the official [BIGSdb documentation](https://bigsdb.readthedocs.io/en/latest/rest.html) up to `V1.50.0`.
+A python library implementing common BIGSdb MLST schemes and databases accesses for the purpose of typing sequences automatically. Implementation follows the RESTful API outlined by the official [BIGSdb documentation](https://bigsdb.readthedocs.io/en/latest/rest.html) up to `V1.50.0`.
 ## Features
--- a/autobigs-engine/meta.yaml
+++ b/autobigs-engine/meta.yaml
@@ -0,0 +1,44 @@
 {% set name = "autoBIGS.engine" %}
 {% set version = "0.12.1.dev1+gb8cebb8.d20250221" %}
 package:
  name: {{ name|lower|replace(".", "-") }}
  version: {{ version }}
 source:
  url: file:///workspaces/autoBIGS.engine/dist/autobigs_engine-0.12.1.dev1%2Bgb8cebb8.d20250221.tar.gz
  sha256: c86441b94f935cfa414ff28ca4c026a070e0fb15988ea3bb7d1a942859a09b16
 build:
  noarch: python
  script: {{ PYTHON }} -m pip install . -vv --no-deps --no-build-isolation
  number: 0
  run_exports:
    - {{ pin_subpackage( name|lower|replace(".", "-"), max_pin="x.x") }}
 requirements:
  host:
    - python >=3.12
    - setuptools >=64
    - setuptools-scm >=8
    - pip
  run:
    - python >=3.12
    - biopython ==1.85
    - aiohttp ==3.11.*
 test:
  imports:
    - autobigs
  commands:
    - pip check
  requires:
    - pip
 about:
  summary: A library to rapidly fetch fetch MLST profiles given sequences for various diseases.
  license: GPL-3.0-or-later
  license_file: LICENSE
  home: https://github.com/Syph-and-VPD-Lab/autoBIGS.engine
 extra:
  recipe-maintainers:
    - Harrison Deng
--- a/environment.yml
+++ b/environment.yml
@@ -0,0 +1,15 @@
 name: ci
 channels:
  - bioconda
  - conda-forge
 dependencies:
  - aiohttp==3.11.*
  - biopython==1.85
  - pytest
  - pytest-asyncio
  - python-build
  - conda-build
  - twine==6.0.1
  - setuptools_scm
  - pytest-cov
  - grayskull
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,12 +11,14 @@ dependencies = [
    "biopython==1.85",
    "aiohttp[speedups]==3.11.*",
 ]
-requires-python = ">=3.11"
+requires-python = ">=3.12"
 description = "A library to rapidly fetch fetch MLST profiles given sequences for various diseases."
 license = {text = "GPL-3.0-or-later"}
 [project.urls]
-Repository = "https://github.com/RealYHD/autoBIGS.engine"
+Homepage = "https://github.com/Syph-and-VPD-Lab/autoBIGS.engine"
-Issues = "https://github.com/RealYHD/autoBIGS.engine/issues"
+Source = "https://github.com/Syph-and-VPD-Lab/autoBIGS.engine"
 Issues = "https://github.com/Syph-and-VPD-Lab/autoBIGS.engine/issues"
 [tool.setuptools_scm]
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +0,0 @@
 aiohttp[speedups]==3.11.*
 biopython==1.85
 pytest
 pytest-asyncio
 build
 twine
 setuptools_scm
 pytest-cov
--- a/scripts/patch_recipe.py
+++ b/scripts/patch_recipe.py
@@ -0,0 +1,103 @@
 #!/usr/bin/env python3
 import argparse
 from os import fdopen, path
 import os
 import re
 import shutil
 from sys import argv
 import tempfile
 INDENTATION = "  "
 GRAYSKULL_OUTPUT_PATH = "autoBIGS.engine"
 RUN_EXPORTED_VALUE = r'{{ pin_subpackage( name|lower|replace(".", "-"), max_pin="x.x") }}'
 LICENSE_SUFFIX = "-or-later"
 HOME_PAGE = "https://github.com/Syph-and-VPD-Lab/autoBIGS.engine"
 def _calc_indentation(line: str):
    return len(re.findall(INDENTATION, line.split(line.strip())[0])) if line != "\n" else 0
 def read_grayskull_output():
    original_recipe = path.abspath(GRAYSKULL_OUTPUT_PATH)
    original_meta = path.join(original_recipe, "meta.yaml")
    meta_file = open(original_meta)
    lines = meta_file.readlines()
    meta_file.close()
    return lines
 def update_naming_scheme(lines):
    modified_lines = []
    for line in lines:
        matches = re.finditer(r"\{\{\s*name\|lower()\s+\}\}", line)
        modified_line = line
        for match in matches:
            modified_line = modified_line[:match.start(1)] + r'|replace(".", "-")' + modified_line[match.end(1):]
        modified_lines.append(modified_line)
    return modified_lines
 def inject_run_exports(lines: list[str]):
    package_indent = False
    modified_lines = []
    for line in lines:
        indentation_count = _calc_indentation(line)
        if line == "build:\n" and indentation_count == 0:
            package_indent = True
            modified_lines.append(line)
        elif package_indent and indentation_count == 0:
            modified_lines.append(INDENTATION*1 + "run_exports:\n")
            modified_lines.append(INDENTATION*2 + "- " + RUN_EXPORTED_VALUE + "\n")
            package_indent = False
        else:
            modified_lines.append(line)
    return modified_lines
 def suffix_license(lines: list[str]):
    about_indent = False
    modified_lines = []
    for line in lines:
        indentation_count = _calc_indentation(line)
        if line == "about:\n" and indentation_count == 0:
            about_indent = True
            modified_lines.append(line)
        elif about_indent and indentation_count == 1 and line.lstrip().startswith("license:"):
            modified_lines.append(line.rstrip() + LICENSE_SUFFIX  + "\n")
            about_indent = False
        else:
            modified_lines.append(line)
    return modified_lines
 def inject_home_page(lines: list[str]):
    about_indent = False
    modified_lines = []
    for line in lines:
        indentation_count = _calc_indentation(line)
        if line == "about:\n" and indentation_count == 0:
            about_indent = True
            modified_lines.append(line)
        elif about_indent and indentation_count == 0:
            modified_lines.append(INDENTATION + "home: " + HOME_PAGE + "\n")
            about_indent = False
        else:
            modified_lines.append(line)
    return modified_lines
 def write_to_original(lines: list[str]):
    original_recipe = path.abspath(GRAYSKULL_OUTPUT_PATH)
    original_meta = path.join(original_recipe, "meta.yaml")
    with open(original_meta, "w") as file:
        file.writelines(lines)
 def rename_recipe_dir():
    new_recipe_name = path.abspath(path.join(GRAYSKULL_OUTPUT_PATH.replace(".", "-").lower()))
    shutil.rmtree(new_recipe_name, ignore_errors=True)
    os.replace(path.abspath(GRAYSKULL_OUTPUT_PATH), new_recipe_name)
 if __name__ == "__main__":
    original_grayskull_out = read_grayskull_output()
    modified_recipe_meta = None
    modified_recipe_meta = update_naming_scheme(original_grayskull_out)
    modified_recipe_meta = inject_run_exports(modified_recipe_meta)
    modified_recipe_meta = suffix_license(modified_recipe_meta)
    modified_recipe_meta = inject_home_page(modified_recipe_meta)
    write_to_original(modified_recipe_meta)
    rename_recipe_dir()
--- a/src/autobigs/engine/analysis/init.py
+++ b/src/autobigs/engine/analysis/init.py
--- a/src/autobigs/engine/analysis/bigsdb.py
+++ b/src/autobigs/engine/analysis/bigsdb.py
@@ -0,0 +1,220 @@
 from abc import abstractmethod
 import asyncio
 from collections import defaultdict
 from contextlib import AbstractAsyncContextManager
 import csv
 from os import path
 import os
 import shutil
 import tempfile
 from typing import Any, AsyncGenerator, AsyncIterable, Iterable, Mapping, Sequence, Set, Union
 from aiohttp import ClientSession, ClientTimeout
 from autobigs.engine.reading import read_fasta
 from autobigs.engine.structures.alignment import PairwiseAlignment
 from autobigs.engine.structures.genomics import NamedString
 from autobigs.engine.structures.mlst import Allele, NamedMLSTProfile, AlignmentStats, MLSTProfile
 from autobigs.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException, NoSuchBIGSdbDatabaseException
 from Bio.Align import PairwiseAligner
 class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
    @abstractmethod
    def determine_mlst_allele_variants(self, query_sequence_strings: Union[Iterable[Union[NamedString, str]], Union[NamedString, str]]) -> AsyncGenerator[Union[Allele, tuple[str, Allele]], Any]:
        pass
    @abstractmethod
    async def determine_mlst_st(self, alleles: Union[AsyncIterable[Union[Allele, tuple[str, Allele]]], Iterable[Union[Allele, tuple[str, Allele]]]]) -> Union[MLSTProfile, NamedMLSTProfile]:
        pass
    @abstractmethod
    async def profile_string(self, query_sequence_strings: Iterable[Union[NamedString, str]]) -> Union[NamedMLSTProfile, MLSTProfile]:
        pass
    @abstractmethod
    def profile_multiple_strings(self, query_named_string_groups: AsyncIterable[Iterable[NamedString]], stop_on_fail: bool = False) -> AsyncGenerator[NamedMLSTProfile, Any]:
        pass
    @abstractmethod
    async def close(self):
        pass
 class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
    def __init__(self, database_api: str, database_name: str, schema_id: int):
        self._database_name = database_name
        self._schema_id = schema_id
        self._base_url = f"{database_api}/db/{self._database_name}/schemes/{self._schema_id}/"
        self._http_client = ClientSession(self._base_url, timeout=ClientTimeout(60))
    async def __aenter__(self):
        return self
    async def determine_mlst_allele_variants(self, query_sequence_strings: Union[Iterable[Union[NamedString, str]], Union[NamedString, str]]) -> AsyncGenerator[Union[Allele, tuple[str, Allele]], Any]:
        # See https://bigsdb.pasteur.fr/api/db/pubmlst_bordetella_seqdef/schemes
        uri_path = "sequence"
        if isinstance(query_sequence_strings, str) or isinstance(query_sequence_strings, NamedString):
            query_sequence_strings = [query_sequence_strings]
        for sequence_string in query_sequence_strings:
            async with self._http_client.post(uri_path, json={
                "sequence": sequence_string if isinstance(sequence_string, str) else sequence_string.sequence,
                "partial_matches": True
            }) as response:
                sequence_response: dict = await response.json()
                if "exact_matches" in sequence_response:
                    # loci -> list of alleles with id and loci
                    exact_matches: dict[str, Sequence[dict[str, str]]] = sequence_response["exact_matches"]  
                    for allele_loci, alleles in exact_matches.items():
                        for allele in alleles:
                            alelle_id = allele["allele_id"]
                            result_allele = Allele(allele_locus=allele_loci, allele_variant=alelle_id, partial_match_profile=None)
                            yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
                elif "partial_matches" in sequence_response:
                    partial_matches: dict[str, dict[str, Union[str, float, int]]] = sequence_response["partial_matches"] 
                    for allele_loci, partial_match in partial_matches.items():
                        if len(partial_match) <= 0:
                            continue
                        partial_match_profile = AlignmentStats(
                            percent_identity=float(partial_match["identity"]),
                            mismatches=int(partial_match["mismatches"]),
                            gaps=int(partial_match["gaps"]),
                            match_metric=int(partial_match["bitscore"])
                        )
                        result_allele = Allele(
                            allele_locus=allele_loci,
                            allele_variant=str(partial_match["allele"]),
                            partial_match_profile=partial_match_profile
                        )
                        yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
                else:
                    raise NoBIGSdbMatchesException(self._database_name, self._schema_id, sequence_string.name if isinstance(sequence_string, NamedString) else None)
    async def determine_mlst_st(self, alleles: Union[AsyncIterable[Union[Allele, tuple[str, Allele]]], Iterable[Union[Allele, tuple[str, Allele]]]]) -> Union[MLSTProfile, NamedMLSTProfile]:
        uri_path = "designations"
        allele_request_dict: dict[str, list[dict[str, str]]] = defaultdict(list)
        names_list = []
        def insert_allele_to_request_dict(allele: Union[Allele, tuple[str, Allele]]):
            if isinstance(allele, Allele):
                allele_val = allele
            else:
                allele_val = allele[1]
                names_list.append(allele[0])
            allele_request_dict[allele_val.allele_locus].append({"allele": str(allele_val.allele_variant)})
        if isinstance(alleles, AsyncIterable):
            async for allele in alleles:
                insert_allele_to_request_dict(allele)
        else:
            for allele in alleles:
                insert_allele_to_request_dict(allele)
        request_json = {
            "designations": allele_request_dict
        }
        async with self._http_client.post(uri_path, json=request_json) as response:
            response_json: dict = await response.json()
            allele_set: Set[Allele] = set()
            response_json.setdefault("fields", dict())
            schema_fields_returned: dict[str, str] = response_json["fields"]
            schema_fields_returned.setdefault("ST", "unknown")
            schema_fields_returned.setdefault("clonal_complex", "unknown")
            schema_exact_matches: dict = response_json["exact_matches"]
            for exact_match_locus, exact_match_alleles in schema_exact_matches.items():
                allele_set.add(Allele(exact_match_locus, exact_match_alleles[0]["allele_id"], None))
            if len(allele_set) == 0:
                raise ValueError("Passed in no alleles.")
            result_mlst_profile = MLSTProfile(allele_set, schema_fields_returned["ST"], schema_fields_returned["clonal_complex"])
            if len(names_list) > 0:
                result_mlst_profile = NamedMLSTProfile(str(tuple(names_list)), result_mlst_profile)
            return result_mlst_profile
    async def profile_string(self, query_sequence_strings: Iterable[Union[NamedString, str]]) -> Union[NamedMLSTProfile, MLSTProfile]:
        alleles = self.determine_mlst_allele_variants(query_sequence_strings)
        return await self.determine_mlst_st(alleles)
    async def profile_multiple_strings(self, query_named_string_groups: AsyncIterable[Iterable[NamedString]], stop_on_fail: bool = False) -> AsyncGenerator[NamedMLSTProfile, Any]:
        tasks = []
        async for named_strings in query_named_string_groups:
            tasks.append(self.profile_string(named_strings))
            for task in asyncio.as_completed(tasks):
                try:
                    yield await task
                except NoBIGSdbMatchesException as e:
                    if stop_on_fail:
                        raise e
                    causal_name = e.get_causal_query_name()
                    if causal_name is None:
                        raise ValueError("Missing query name despite requiring names.")
                    else:
                        yield NamedMLSTProfile(causal_name, None)
    async def close(self):
        await self._http_client.close()
    async def __aexit__(self, exc_type, exc_value, traceback):
        await self.close()
 class BIGSdbIndex(AbstractAsyncContextManager):
    KNOWN_BIGSDB_APIS = {
        "https://bigsdb.pasteur.fr/api",
        "https://rest.pubmlst.org"
    }
    def __init__(self):
        self._http_client = ClientSession()
        self._known_seqdef_dbs_origin: Union[Mapping[str, str], None] = None
        self._seqdefdb_schemas: dict[str, Union[Mapping[str, int], None]] = dict()
        super().__init__()
    async def __aenter__(self):
        return self
    async def get_known_seqdef_dbs(self, force: bool = False) -> Mapping[str, str]:
        if self._known_seqdef_dbs_origin is not None and not force:
            return self._known_seqdef_dbs_origin
        known_seqdef_dbs = dict()
        for known_bigsdb in BIGSdbIndex.KNOWN_BIGSDB_APIS:
            async with self._http_client.get(f"{known_bigsdb}/db") as response:
                response_json_databases = await response.json()
                for database_group in response_json_databases:
                    for database_info in database_group["databases"]:
                        if str(database_info["name"]).endswith("seqdef"):
                            known_seqdef_dbs[database_info["name"]] = known_bigsdb
        self._known_seqdef_dbs_origin = dict(known_seqdef_dbs)
        return self._known_seqdef_dbs_origin
    async def get_bigsdb_api_from_seqdefdb(self, seqdef_db_name: str) -> str:
        known_databases = await self.get_known_seqdef_dbs()
        if seqdef_db_name not in known_databases:
            raise NoSuchBIGSdbDatabaseException(seqdef_db_name)
        return known_databases[seqdef_db_name]     
    async def get_schemas_for_seqdefdb(self, seqdef_db_name: str, force: bool = False) -> Mapping[str, int]:
        if seqdef_db_name in self._seqdefdb_schemas and not force:
            return self._seqdefdb_schemas[seqdef_db_name] # type: ignore since it's guaranteed to not be none by conditional
        uri_path = f"{await self.get_bigsdb_api_from_seqdefdb(seqdef_db_name)}/db/{seqdef_db_name}/schemes"
        async with self._http_client.get(uri_path) as response: 
            response_json = await response.json()
            schema_descriptions: Mapping[str, int] = dict()
            for scheme_definition in response_json["schemes"]:
                scheme_id: int = int(str(scheme_definition["scheme"]).split("/")[-1])
                scheme_desc: str = scheme_definition["description"]
                schema_descriptions[scheme_desc] = scheme_id
            self._seqdefdb_schemas[seqdef_db_name] = schema_descriptions
            return self._seqdefdb_schemas[seqdef_db_name] # type: ignore
    async def build_profiler_from_seqdefdb(self, local: bool, dbseqdef_name: str, schema_id: int) -> BIGSdbMLSTProfiler:
        return get_BIGSdb_MLST_profiler(local, await self.get_bigsdb_api_from_seqdefdb(dbseqdef_name), dbseqdef_name, schema_id)
    async def close(self):
        await self._http_client.close()
    async def __aexit__(self, exc_type, exc_value, traceback):
        await self.close()
 def get_BIGSdb_MLST_profiler(local: bool, database_api: str, database_name: str, schema_id: int):
    if local:
        raise NotImplementedError()
    return RemoteBIGSdbMLSTProfiler(database_api=database_api, database_name=database_name, schema_id=schema_id)
--- a/src/autobigs/engine/data/local/csv.py
+++ b/src/autobigs/engine/data/local/csv.py
@@ -1,41 +0,0 @@
 import csv
 from os import PathLike
 from typing import AsyncIterable, Mapping, Sequence, Union
 from autobigs.engine.data.structures.mlst import Allele, MLSTProfile
 def dict_loci_alleles_variants_from_loci(alleles_map: Mapping[str, Sequence[Allele]]):
    result_dict: dict[str, Union[list[str], str]] = {}
    for loci, alleles in alleles_map.items():
        if len(alleles) == 1:
            result_dict[loci] = alleles[0].allele_variant
        else:
            result_locis = list()
            for allele in alleles:
                result_locis.append(allele.allele_variant)
                result_dict[loci] = result_locis
    return result_dict
 async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[tuple[str, Union[MLSTProfile, None]]], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]) -> Sequence[str]:
    failed = list()
    with open(handle, "w", newline='') as filehandle:
        header = None
        writer: Union[csv.DictWriter, None] = None
        async for name, mlst_profile in mlst_profiles_iterable:
            if mlst_profile is None:
                failed.append(name)
                continue
            if writer is None:
                header = ["id", "st", "clonal-complex", *mlst_profile.alleles.keys()]
                writer = csv.DictWriter(filehandle, fieldnames=header)
                writer.writeheader()
            row_dictionary = {
                "st": mlst_profile.sequence_type,
                "clonal-complex": mlst_profile.clonal_complex,
                "id": name,
                **dict_loci_alleles_variants_from_loci(mlst_profile.alleles)
            }
            writer.writerow(rowdict=row_dictionary)
    return failed
--- a/src/autobigs/engine/data/local/fasta.py
+++ b/src/autobigs/engine/data/local/fasta.py
@@ -1,16 +0,0 @@
 import asyncio
 from io import TextIOWrapper
 from typing import Any, AsyncGenerator, Generator, Iterable, Sequence, Union
 from Bio import SeqIO
 from autobigs.engine.data.structures.genomics import NamedString
 async def read_fasta(handle: Union[str, TextIOWrapper]) -> AsyncGenerator[NamedString, Any]:
    fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta")
    for fasta_sequence in await fasta_sequences:
        yield NamedString(fasta_sequence.id, str(fasta_sequence.seq))
 async def read_multiple_fastas(handles: Iterable[Union[str, TextIOWrapper]]) -> AsyncGenerator[NamedString, Any]:
    for handle in handles:
        async for named_seq in read_fasta(handle):
            yield named_seq
--- a/src/autobigs/engine/data/remote/databases/bigsdb.py
+++ b/src/autobigs/engine/data/remote/databases/bigsdb.py
@@ -1,166 +0,0 @@
 from collections import defaultdict
 from contextlib import AbstractAsyncContextManager
 from numbers import Number
 from typing import Any, AsyncGenerator, AsyncIterable, Collection, Generator, Iterable, Mapping, Sequence, Union
 from aiohttp import ClientSession, ClientTimeout
 from autobigs.engine.data.structures.genomics import NamedString
 from autobigs.engine.data.structures.mlst import Allele, PartialAllelicMatchProfile, MLSTProfile
 from autobigs.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException, NoSuchBIGSdbDatabaseException
 class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
    def __init__(self, database_api: str, database_name: str, schema_id: int):
        self._database_name = database_name
        self._schema_id = schema_id
        self._base_url = f"{database_api}/db/{self._database_name}/schemes/{self._schema_id}/"
        self._http_client = ClientSession(self._base_url, timeout=ClientTimeout(10000))
    async def __aenter__(self):
        return self
    async def fetch_mlst_allele_variants(self, sequence_string: str, exact: bool) -> AsyncGenerator[Allele, Any]:
        # See https://bigsdb.pasteur.fr/api/db/pubmlst_bordetella_seqdef/schemes
        uri_path = "sequence"
        response = await self._http_client.post(uri_path, json={
            "sequence": sequence_string,
            "partial_matches": not exact
        })
        sequence_response: dict = await response.json()
        if "exact_matches" in sequence_response:
            # loci -> list of alleles with id and loci
            exact_matches: dict[str, Sequence[dict[str, str]]] = sequence_response["exact_matches"]  
            for allele_loci, alleles in exact_matches.items():
                for allele in alleles:
                    alelle_id = allele["allele_id"]
                    yield Allele(allele_loci=allele_loci, allele_variant=alelle_id, partial_match_profile=None)
        elif "partial_matches" in sequence_response:
            if exact:
                raise NoBIGSdbExactMatchesException(self._database_name, self._schema_id)
            partial_matches: dict[str, dict[str, Union[str, float, int]]] = sequence_response["partial_matches"] 
            for allele_loci, partial_match in partial_matches.items():
                if len(partial_match) <= 0:
                    continue
                partial_match_profile = PartialAllelicMatchProfile(
                    percent_identity=float(partial_match["identity"]),
                    mismatches=int(partial_match["mismatches"]),
                    bitscore=float(partial_match["bitscore"]),
                    gaps=int(partial_match["gaps"])
                )
                yield Allele(
                    allele_loci=allele_loci,
                    allele_variant=str(partial_match["allele"]),
                    partial_match_profile=partial_match_profile
                )
        else:
            raise NoBIGSdbMatchesException(self._database_name, self._schema_id)
    async def fetch_mlst_st(self, alleles: Union[AsyncIterable[Allele], Iterable[Allele]]) -> MLSTProfile:
        uri_path = "designations"
        allele_request_dict: dict[str, list[dict[str, str]]] = defaultdict(list)
        if isinstance(alleles, AsyncIterable):
            async for allele in alleles:
                allele_request_dict[allele.allele_loci].append({"allele": str(allele.allele_variant)})
        else:
            for allele in alleles:
                allele_request_dict[allele.allele_loci].append({"allele": str(allele.allele_variant)})
        request_json = {
            "designations": allele_request_dict
        }
        async with self._http_client.post(uri_path, json=request_json) as response:
            response_json: dict = await response.json()
            allele_map: dict[str, list[Allele]] = defaultdict(list)
            response_json.setdefault("fields", dict())
            schema_fields_returned: dict[str, str] = response_json["fields"]
            schema_fields_returned.setdefault("ST", "unknown")
            schema_fields_returned.setdefault("clonal_complex", "unknown")
            schema_exact_matches: dict = response_json["exact_matches"]
            for exact_match_loci, exact_match_alleles in schema_exact_matches.items():
                for exact_match_allele in exact_match_alleles:
                    allele_map[exact_match_loci].append(Allele(exact_match_loci, exact_match_allele["allele_id"], None))
            if len(allele_map) == 0:
                raise ValueError("Passed in no alleles.")
            return MLSTProfile(dict(allele_map), schema_fields_returned["ST"], schema_fields_returned["clonal_complex"])
    async def profile_string(self, string: str, exact: bool = False) -> MLSTProfile:
        alleles = self.fetch_mlst_allele_variants(string, exact)
        return await self.fetch_mlst_st(alleles)
    async def profile_multiple_strings(self, namedStrings: AsyncIterable[NamedString], exact: bool = False, stop_on_fail: bool = False) -> AsyncGenerator[tuple[str, Union[MLSTProfile, None]], Any]:
        async for named_string in namedStrings:
            try:
                yield (named_string.name, await self.profile_string(named_string.sequence, exact))
            except NoBIGSdbMatchesException as e:
                if stop_on_fail:
                    raise e
                yield (named_string.name, None)
    async def close(self):
        await self._http_client.close()
    async def __aexit__(self, exc_type, exc_value, traceback):
        await self.close()
 class BIGSdbIndex(AbstractAsyncContextManager):
    KNOWN_BIGSDB_APIS = {
        "https://bigsdb.pasteur.fr/api",
        "https://rest.pubmlst.org"
    }
    def __init__(self):
        self._http_client = ClientSession()
        self._known_seqdef_dbs_origin: Union[Mapping[str, str], None] = None
        self._seqdefdb_schemas: dict[str, Union[Mapping[str, int], None]] = dict()
        super().__init__()
    async def __aenter__(self):
        return self
    async def get_known_seqdef_dbs(self, force: bool = False) -> Mapping[str, str]:
        if self._known_seqdef_dbs_origin is not None and not force:
            return self._known_seqdef_dbs_origin
        known_seqdef_dbs = dict()
        for known_bigsdb in BIGSdbIndex.KNOWN_BIGSDB_APIS:
            async with self._http_client.get(f"{known_bigsdb}/db") as response:
                response_json_databases = await response.json()
                for database_group in response_json_databases:
                    for database_info in database_group["databases"]:
                        if str(database_info["name"]).endswith("seqdef"):
                            known_seqdef_dbs[database_info["name"]] = known_bigsdb
        self._known_seqdef_dbs_origin = dict(known_seqdef_dbs)
        return self._known_seqdef_dbs_origin
    async def get_bigsdb_api_from_seqdefdb(self, seqdef_db_name: str) -> str:
        known_databases = await self.get_known_seqdef_dbs()
        if seqdef_db_name not in known_databases:
            raise NoSuchBIGSdbDatabaseException(seqdef_db_name)
        return known_databases[seqdef_db_name]     
    async def get_schemas_for_seqdefdb(self, seqdef_db_name: str, force: bool = False) -> Mapping[str, int]:
        if seqdef_db_name in self._seqdefdb_schemas and not force:
            return self._seqdefdb_schemas[seqdef_db_name] # type: ignore since it's guaranteed to not be none by conditional
        uri_path = f"{await self.get_bigsdb_api_from_seqdefdb(seqdef_db_name)}/db/{seqdef_db_name}/schemes"
        async with self._http_client.get(uri_path) as response: 
            response_json = await response.json()
            schema_descriptions: Mapping[str, int] = dict()
            for scheme_definition in response_json["schemes"]:
                scheme_id: int = int(str(scheme_definition["scheme"]).split("/")[-1])
                scheme_desc: str = scheme_definition["description"]
                schema_descriptions[scheme_desc] = scheme_id
            self._seqdefdb_schemas[seqdef_db_name] = schema_descriptions
            return self._seqdefdb_schemas[seqdef_db_name] # type: ignore
    async def build_profiler_from_seqdefdb(self, dbseqdef_name: str, schema_id: int) -> BIGSdbMLSTProfiler:
        return BIGSdbMLSTProfiler(await self.get_bigsdb_api_from_seqdefdb(dbseqdef_name), dbseqdef_name, schema_id)
    async def close(self):
        await self._http_client.close()
    async def __aexit__(self, exc_type, exc_value, traceback):
        await self.close()
--- a/src/autobigs/engine/data/structures/init.py
+++ b/src/autobigs/engine/data/structures/init.py
--- a/src/autobigs/engine/data/structures/mlst.py
+++ b/src/autobigs/engine/data/structures/mlst.py
@@ -1,21 +0,0 @@
 from dataclasses import dataclass
 from typing import Mapping, Sequence, Union
@dataclass(frozen=True)
 class PartialAllelicMatchProfile:
    percent_identity: float
    mismatches: int
    bitscore: float
    gaps: int
@dataclass(frozen=True)
 class Allele:
    allele_loci: str
    allele_variant: str
    partial_match_profile: Union[None, PartialAllelicMatchProfile]
@dataclass(frozen=True)
 class MLSTProfile:
    alleles: Mapping[str, Sequence[Allele]]
    sequence_type: str
    clonal_complex: str
--- a/src/autobigs/engine/exceptions/init.py
+++ b/src/autobigs/engine/exceptions/init.py
--- a/src/autobigs/engine/exceptions/database.py
+++ b/src/autobigs/engine/exceptions/database.py
@@ -5,8 +5,12 @@ class BIGSDbDatabaseAPIException(Exception):
 class NoBIGSdbMatchesException(BIGSDbDatabaseAPIException):
-    def __init__(self, database_name: str, database_schema_id: int, *args):
+    def __init__(self, database_name: str, database_schema_id: int, query_name: Union[None, str], *args):
        self._query_name = query_name
        super().__init__(f"No matches found with schema with ID {database_schema_id}  in the database \"{database_name}\".", *args)
    def get_causal_query_name(self) -> Union[str, None]:
        return self._query_name
 class NoBIGSdbExactMatchesException(NoBIGSdbMatchesException):
    def __init__(self, database_name: str, database_schema_id: int, *args):
--- a/src/autobigs/engine/reading.py
+++ b/src/autobigs/engine/reading.py
@@ -0,0 +1,20 @@
 import asyncio
 from io import TextIOWrapper
 from typing import Any, AsyncGenerator, Iterable, Union
 from Bio import SeqIO
 from autobigs.engine.structures.genomics import NamedString
 async def read_fasta(handle: Union[str, TextIOWrapper]) -> Iterable[NamedString]:
    fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta")
    results = []
    for fasta_sequence in await fasta_sequences:
        results.append(NamedString(fasta_sequence.id, str(fasta_sequence.seq)))
    return results
 async def read_multiple_fastas(handles: Iterable[Union[str, TextIOWrapper]]) -> AsyncGenerator[Iterable[NamedString], Any]:
    tasks = []
    for handle in handles:
        tasks.append(read_fasta(handle))
    for task in asyncio.as_completed(tasks):
        yield await task
--- a/src/autobigs/engine/data/remote/init.py
+++ b/src/autobigs/engine/data/remote/init.py
--- a/src/autobigs/engine/structures/alignment.py
+++ b/src/autobigs/engine/structures/alignment.py
@@ -0,0 +1,18 @@
 from dataclasses import dataclass
 from numbers import Number
 from typing import Sequence
@dataclass(frozen=True)
 class AlignmentStats:
    percent_identity: float
    mismatches: int
    gaps: int
    match_metric: int
@dataclass(frozen=True)
 class PairwiseAlignment:
    reference: str
    query: str
    reference_indices: Sequence[Number]
    query_indices: Sequence[Number]
    alignment_stats: AlignmentStats
--- a/src/autobigs/engine/data/structures/genomics.py
+++ b/src/autobigs/engine/data/structures/genomics.py
--- a/src/autobigs/engine/structures/mlst.py
+++ b/src/autobigs/engine/structures/mlst.py
@@ -0,0 +1,33 @@
 from collections import defaultdict
 from dataclasses import dataclass
 from typing import Collection, Iterable, Mapping, Sequence, Union
 from autobigs.engine.structures.alignment import AlignmentStats
@dataclass(frozen=True)
 class Allele:
    allele_locus: str
    allele_variant: str
    partial_match_profile: Union[None, AlignmentStats]
@dataclass(frozen=True)
 class MLSTProfile:
    alleles: Collection[Allele]
    sequence_type: str
    clonal_complex: str
@dataclass(frozen=True)
 class NamedMLSTProfile:
    name: str
    mlst_profile: Union[None, MLSTProfile]
 def alleles_to_mapping(alleles: Iterable[Allele]):
    result = defaultdict(list)
    for allele in alleles:
        result[allele.allele_locus].append(allele.allele_variant)
    result = dict(result)
    for locus, variant in result.items():
        if len(variant) == 1:
            result[locus] = variant[0]
    return result
--- a/src/autobigs/engine/writing.py
+++ b/src/autobigs/engine/writing.py
@@ -0,0 +1,43 @@
 from collections import defaultdict
 import csv
 from os import PathLike
 from typing import AsyncIterable, Collection, Mapping, Sequence, Union
 from autobigs.engine.structures.mlst import Allele, MLSTProfile, NamedMLSTProfile
 def alleles_to_text_map(alleles: Collection[Allele]) -> Mapping[str, Union[Sequence[str], str]]:
    result = defaultdict(list)
    for allele in alleles:
        result[allele.allele_locus].append(allele.allele_variant + ("*" if allele.partial_match_profile is not None else ""))
    for locus in result.keys():
        if len(result[locus]) == 1:
            result[locus] = result[locus][0] # Take the only one
        else:
            result[locus] = tuple(result[locus]) # type: ignore
    return dict(result)
 async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[NamedMLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]) -> Sequence[str]:
    failed = list()
    with open(handle, "w", newline='') as filehandle:
        header = None
        writer: Union[csv.DictWriter, None] = None
        async for named_mlst_profile in mlst_profiles_iterable:
            name = named_mlst_profile.name
            mlst_profile = named_mlst_profile.mlst_profile
            if mlst_profile is None:
                failed.append(name)
                continue
            allele_mapping = alleles_to_text_map(mlst_profile.alleles)
            if writer is None:
                header = ["id", "st", "clonal-complex", *sorted(allele_mapping.keys())]
                writer = csv.DictWriter(filehandle, fieldnames=header)
                writer.writeheader()
            row_dictionary = {
                "st": mlst_profile.sequence_type,
                "clonal-complex": mlst_profile.clonal_complex,
                "id": name,
                **allele_mapping
            }
            writer.writerow(rowdict=row_dictionary)
    return failed
--- a/tests/autobigs/engine/analysis/test_bigsdb.py
+++ b/tests/autobigs/engine/analysis/test_bigsdb.py
@@ -0,0 +1,211 @@
 from os import path
 import random
 import re
 from typing import Callable, Collection, Sequence, Union
 from Bio import SeqIO
 import pytest
 from autobigs.engine.analysis import bigsdb
 from autobigs.engine.structures import mlst
 from autobigs.engine.structures.genomics import NamedString
 from autobigs.engine.structures.mlst import Allele, MLSTProfile
 from autobigs.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException
 from autobigs.engine.analysis.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler, RemoteBIGSdbMLSTProfiler
 async def generate_async_iterable(normal_iterable):
    for dummy_sequence in normal_iterable:
        yield dummy_sequence
 def gene_scrambler(gene: str, mutation_site_count: Union[int, float], alphabet: Sequence[str] = ["A", "T", "C", "G"]):
    rand = random.Random(gene)
    if isinstance(mutation_site_count, float):
        mutation_site_count = int(mutation_site_count * len(gene))
    random_locations = rand.choices(range(len(gene)), k=mutation_site_count)
    scrambled = list(gene)
    for random_location in random_locations:
        scrambled[random_location] = rand.choice(alphabet)
    return "".join(scrambled)
 def get_first_sequence_from_fasta(resource: str):
    return str(SeqIO.read(path.join("tests/resources/", resource), "fasta").seq)
 def get_multiple_sequences_from_fasta(resource: str):
    return tuple(SeqIO.parse(path.join("tests/resources/", resource), "fasta"))
 bpertussis_tohamaI_profile = MLSTProfile((
        Allele("adk", "1", None),
        Allele("fumC", "1", None),
        Allele("glyA", "1", None),
        Allele("tyrB", "1", None),
        Allele("icd", "1", None),
        Allele("pepA", "1", None),
        Allele("pgm", "1", None)), "1", "ST-2 complex")
 bpertussis_tohamaI_bad_profile = MLSTProfile((
        Allele("adk", "1", None),
        Allele("fumC", "2", None),
        Allele("glyA", "36", None),
        Allele("tyrB", "4", None),
        Allele("icd", "4", None),
        Allele("pepA", "1", None),
        Allele("pgm", "5", None),
    ), "unknown", "unknown")
 hinfluenzae_2014_102_profile = MLSTProfile((
        Allele("adk", "28", None),
        Allele("atpG", "33", None),
        Allele("frdB", "7", None),
        Allele("fucK", "18", None),
        Allele("mdh", "11", None),
        Allele("pgi", "125", None),
        Allele("recA", "89", None)
    ), "478", "unknown")
 hinfluenzae_2014_102_bad_profile = MLSTProfile((
        Allele("adk", "3", None),
        Allele("atpG", "121", None),
        Allele("frdB", "6", None),
        Allele("fucK", "5", None),
        Allele("mdh", "12", None),
        Allele("pgi", "4", None),
        Allele("recA", "5", None)
    ), "unknown", "unknown")
@pytest.mark.parametrize("local_db,database_api,database_name,schema_id,seq_path,feature_seqs_path,expected_profile,bad_profile", [
    (False, "https://bigsdb.pasteur.fr/api", "pubmlst_bordetella_seqdef", 3, "tohama_I_bpertussis.fasta", "tohama_I_bpertussis_features.fasta", bpertussis_tohamaI_profile, bpertussis_tohamaI_bad_profile),
    (False, "https://rest.pubmlst.org", "pubmlst_hinfluenzae_seqdef", 1, "2014-102_hinfluenza.fasta", "2014-102_hinfluenza_features.fasta", hinfluenzae_2014_102_profile, hinfluenzae_2014_102_bad_profile),
 ])
 class TestBIGSdbMLSTProfiler:
    async def test_profiling_results_in_exact_matches_when_exact(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
        sequence = get_first_sequence_from_fasta(seq_path)
        async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as dummy_profiler:
            expected_alleles = mlst.alleles_to_mapping(expected_profile.alleles)
            targets_left = set(mlst.alleles_to_mapping(expected_profile.alleles).keys())
            async for exact_match in dummy_profiler.determine_mlst_allele_variants(query_sequence_strings=[sequence]):
                assert isinstance(exact_match, Allele)
                assert exact_match.allele_locus in expected_alleles
                assert exact_match.allele_variant == expected_alleles[exact_match.allele_locus]
                targets_left.remove(exact_match.allele_locus)
            assert len(targets_left) == 0
    async def test_sequence_profiling_non_exact_returns_non_exact(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
        target_sequences = get_multiple_sequences_from_fasta(feature_seqs_path)
        mlst_targets = {x.lower() for x in mlst.alleles_to_mapping(expected_profile.alleles).keys()}
        async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as profiler:
            for target_sequence in target_sequences:
                match = re.fullmatch(r".*\[gene=([\w\d]+)\].*", target_sequence.description)
                if match is None:
                    continue
                gene = match.group(1).lower()
                if gene not in mlst_targets:
                    continue
                scrambled = gene_scrambler(str(target_sequence.seq), 0.125)
                async for partial_match in profiler.determine_mlst_allele_variants([scrambled]):
                    assert partial_match.partial_match_profile is not None
                    mlst_targets.remove(gene)
            assert len(mlst_targets) == 0
    async def test_profiling_results_in_correct_mlst_st(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
        async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as dummy_profiler:
            mlst_st_data = await dummy_profiler.determine_mlst_st(expected_profile.alleles)
            assert mlst_st_data is not None
            assert isinstance(mlst_st_data, MLSTProfile)
            assert mlst_st_data.clonal_complex == expected_profile.clonal_complex
            assert mlst_st_data.sequence_type == expected_profile.sequence_type
    async def test_profiling_non_exact_results_in_list_of_mlsts(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
        dummy_alleles = bad_profile.alleles
        async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as dummy_profiler:
            mlst_profile = await dummy_profiler.determine_mlst_st(dummy_alleles)
            assert mlst_profile.clonal_complex == "unknown"
            assert mlst_profile.sequence_type == "unknown"
    async def test_bigsdb_profile_multiple_strings_same_string_twice(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
        sequence = get_first_sequence_from_fasta(seq_path)
        dummy_sequences = [[NamedString("seq1", sequence)], [NamedString("seq2", sequence)]]
        async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as dummy_profiler:
            async for named_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable(dummy_sequences)):
                name, profile = named_profile.name, named_profile.mlst_profile
                assert profile is not None
                assert isinstance(profile, MLSTProfile)
                assert profile.clonal_complex == expected_profile.clonal_complex
                assert profile.sequence_type == expected_profile.sequence_type
    async def test_bigsdb_profile_multiple_strings_exactmatch_fail_second_no_stop(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
        valid_seq = get_first_sequence_from_fasta(seq_path)
        dummy_sequences = [[NamedString("seq1", valid_seq)], [NamedString("should_fail", gene_scrambler(valid_seq, 0.3))], [NamedString("seq3", valid_seq)]]
        async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as dummy_profiler:
            async for name_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable(dummy_sequences), True):
                name, profile = name_profile.name, name_profile.mlst_profile
                assert profile is not None
                if name == "should_fail":
                    assert profile.clonal_complex == "unknown"
                    assert profile.sequence_type == "unknown"
                    assert len(profile.alleles) > 0
                else:
                    assert isinstance(profile, MLSTProfile)
                    assert profile.clonal_complex == expected_profile.clonal_complex
                    assert profile.sequence_type == expected_profile.sequence_type
    async def test_bigsdb_profile_multiple_strings_nonexact_second_no_stop(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
        valid_seq = get_first_sequence_from_fasta(seq_path)
        dummy_sequences = [[NamedString("seq1", valid_seq)], [NamedString("should_fail", gene_scrambler(valid_seq, 0.3))], [NamedString("seq3", valid_seq)]]
        async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as dummy_profiler:
            async for named_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable(dummy_sequences), False):
                name, profile = named_profile.name, named_profile.mlst_profile
                assert profile is not None
                if name == "should_fail":
                    assert profile.clonal_complex == "unknown"
                    assert profile.sequence_type == "unknown"
                    assert len(profile.alleles) > 0
                else:
                    assert isinstance(profile, MLSTProfile)
                    assert profile.clonal_complex == expected_profile.clonal_complex
                    assert profile.sequence_type == expected_profile.sequence_type
 class TestBIGSdbIndex:
    async def test_bigsdb_index_all_databases_is_not_empty(self):
        async with BIGSdbIndex() as bigsdb_index:
            assert len(await bigsdb_index.get_known_seqdef_dbs()) > 0
    async def test_bigsdb_index_references_pubmlst_correctly(self):
        async with BIGSdbIndex() as bigsdb_index:
            assert (await bigsdb_index.get_bigsdb_api_from_seqdefdb("pubmlst_hinfluenzae_seqdef")) == "https://rest.pubmlst.org"
    async def test_bigsdb_index_references_institutpasteur_correctly(self):
        async with BIGSdbIndex() as bigsdb_index:
            assert (await bigsdb_index.get_bigsdb_api_from_seqdefdb("pubmlst_bordetella_seqdef")) == "https://bigsdb.pasteur.fr/api"
    async def test_bigsdb_index_get_schemas_for_bordetella(self):
        async with BIGSdbIndex() as index:
            schemas = await index.get_schemas_for_seqdefdb(seqdef_db_name="pubmlst_bordetella_seqdef")
            assert len(schemas.keys()) > 0
            assert "MLST" in schemas
            assert isinstance(schemas["MLST"], int)
    async def test_bigsdb_index_get_databases_has_only_seqdef(self):
        async with BIGSdbIndex() as index:
            databases = await index.get_known_seqdef_dbs()
            assert len(databases.keys()) > 0
            for database_name in databases.keys():
                assert database_name.endswith("seqdef")
            assert databases["pubmlst_bordetella_seqdef"] == "https://bigsdb.pasteur.fr/api"
    @pytest.mark.parametrize("local", [
        (False)
    ])
    async def test_bigsdb_index_instantiates_correct_profiler(self, local):
        sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
        async with BIGSdbIndex() as bigsdb_index:
            async with await bigsdb_index.build_profiler_from_seqdefdb(local, "pubmlst_bordetella_seqdef", 3) as profiler:
                assert isinstance(profiler, BIGSdbMLSTProfiler)
                profile = await profiler.profile_string(sequence)
                assert profile.clonal_complex == "ST-2 complex"
                assert profile.sequence_type == "1"
--- a/tests/autobigs/engine/data/local/test_csv.py
+++ b/tests/autobigs/engine/data/local/test_csv.py
@@ -1,21 +0,0 @@
 from autobigs.engine.data.local.csv import dict_loci_alleles_variants_from_loci
 from autobigs.engine.data.structures.mlst import Allele
 def test_dict_loci_alleles_variants_from_loci_single_loci_not_list():
    alleles_map = {
        "adk": [Allele("adk", "1", None)]
    }
    results = dict_loci_alleles_variants_from_loci(alleles_map)
    for loci, variant in results.items():
        assert isinstance(variant, str)
        assert variant == "1"
 def test_dict_loci_alleles_variants_from_loci_multi_loci_is_list():
    alleles_map = {
        "adk": [Allele("adk", "1", None), Allele("adk", "2", None)]
    }
    results = dict_loci_alleles_variants_from_loci(alleles_map)
    for loci, variant in results.items():
        assert isinstance(variant, list)
        assert len(variant) == 2
--- a/tests/autobigs/engine/data/local/test_fasta.py
+++ b/tests/autobigs/engine/data/local/test_fasta.py
@@ -1,7 +0,0 @@
 from autobigs.engine.data.local.fasta import read_fasta
 async def test_fasta_reader_not_none():
    named_strings = read_fasta("tests/resources/tohama_I_bpertussis.fasta")
    async for named_string in named_strings:
        assert named_string.name == "BX470248.1"
--- a/tests/autobigs/engine/data/remote/databases/test_bigsdb.py
+++ b/tests/autobigs/engine/data/remote/databases/test_bigsdb.py
@@ -1,244 +0,0 @@
 import random
 import re
 from typing import Collection, Sequence, Union
 from Bio import SeqIO
 import pytest
 from autobigs.engine.data.structures.genomics import NamedString
 from autobigs.engine.data.structures.mlst import Allele, MLSTProfile
 from autobigs.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException
 from autobigs.engine.data.remote.databases.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler
 def gene_scrambler(gene: str, mutation_site_count: Union[int, float], alphabet: Sequence[str] = ["A", "T", "C", "G"]):
    rand = random.Random(gene)
    if isinstance(mutation_site_count, float):
        mutation_site_count = int(mutation_site_count * len(gene))
    random_locations = rand.choices(range(len(gene)), k=mutation_site_count)
    scrambled = list(gene)
    for random_location in random_locations:
        scrambled[random_location] = rand.choice(alphabet)
    return "".join(scrambled)
 async def test_institutpasteur_profiling_results_in_exact_matches_when_exact():
    sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
    async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
        targets_left = {"adk", "fumC", "glyA", "tyrB", "icd", "pepA", "pgm"}
        async for exact_match in dummy_profiler.fetch_mlst_allele_variants(sequence_string=sequence, exact=True):
            assert isinstance(exact_match, Allele)
            assert exact_match.allele_variant == '1' # All of Tohama I has allele id I
            targets_left.remove(exact_match.allele_loci)
        assert len(targets_left) == 0
 async def test_institutpasteur_sequence_profiling_non_exact_returns_non_exact():
    sequences = list(SeqIO.parse("tests/resources/tohama_I_bpertussis_coding.fasta", "fasta"))
    mlst_targets = {"adk", "fumc", "glya", "tyrb", "icd", "pepa", "pgm"}
    async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as profiler:
        for sequence in sequences:
            match = re.fullmatch(r".*\[gene=([\w\d]+)\].*", sequence.description)
            if match is None:
                continue
            gene = match.group(1)
            if gene.lower() not in mlst_targets:
                continue
            scrambled = gene_scrambler(str(sequence.seq), 0.125)
            async for partial_match in profiler.fetch_mlst_allele_variants(scrambled, False):
                assert partial_match.partial_match_profile is not None
                mlst_targets.remove(gene.lower())
        assert len(mlst_targets) == 0
 async def test_institutpasteur_profiling_results_in_correct_mlst_st():
    async def dummy_allele_generator():
        dummy_alleles = [
        Allele("adk", "1", None),
        Allele("fumC", "1", None),
        Allele("glyA", "1", None),
        Allele("tyrB", "1", None),
        Allele("icd", "1", None),
        Allele("pepA", "1", None),
        Allele("pgm", "1", None),
        ]
        for dummy_allele in dummy_alleles:
            yield dummy_allele
    async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
        mlst_st_data = await dummy_profiler.fetch_mlst_st(dummy_allele_generator())
        assert mlst_st_data is not None
        assert isinstance(mlst_st_data, MLSTProfile)
        assert mlst_st_data.clonal_complex == "ST-2 complex"
        assert mlst_st_data.sequence_type == "1"
 async def test_institutpasteur_profiling_non_exact_results_in_list_of_mlsts():
    dummy_alleles = [
    Allele("adk", "1", None),
    Allele("fumC", "2", None),
    Allele("glyA", "36", None),
    Allele("tyrB", "4", None),
    Allele("icd", "4", None),
    Allele("pepA", "1", None),
    Allele("pgm", "5", None),
    ]
    async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
        mlst_profile = await dummy_profiler.fetch_mlst_st(dummy_alleles)
        assert mlst_profile.clonal_complex == "unknown"
        assert mlst_profile.sequence_type == "unknown"
 async def test_institutpasteur_sequence_profiling_is_correct():
    sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
    async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
        profile = await dummy_profiler.profile_string(sequence)
        assert profile is not None
        assert isinstance(profile, MLSTProfile)
        assert profile.clonal_complex == "ST-2 complex"
        assert profile.sequence_type == "1"
 async def test_pubmlst_profiling_results_in_exact_matches_when_exact():
    dummy_alleles = {
        Allele("adk", "1", None),
        Allele("atpG", "1", None),
        Allele("frdB", "1", None),
        Allele("fucK", "1", None),
        Allele("mdh", "1", None),
        Allele("pgi", "1", None),
        Allele("recA", "5", None),
    }
    sequence = str(SeqIO.read("tests/resources/FDAARGOS_1560.fasta", "fasta").seq)
    async with BIGSdbMLSTProfiler(database_api="https://rest.pubmlst.org/", database_name="pubmlst_hinfluenzae_seqdef", schema_id=1) as dummy_profiler:
        exact_matches = dummy_profiler.fetch_mlst_allele_variants(sequence_string=sequence, exact=True)
        async for exact_match in exact_matches:
            assert isinstance(exact_match, Allele)
            dummy_alleles.remove(exact_match)
        assert len(dummy_alleles) == 0
 async def test_pubmlst_profiling_results_in_correct_st():
    async def generate_dummy_targets():
        dummy_alleles = [
                Allele("adk", "1", None),
                Allele("atpG", "1", None),
                Allele("frdB", "1", None),
                Allele("fucK", "1", None),
                Allele("mdh", "1", None),
                Allele("pgi", "1", None),
                Allele("recA", "5", None),
            ]
        for dummy_allele in dummy_alleles:
            yield dummy_allele
    async with BIGSdbMLSTProfiler(database_api="https://rest.pubmlst.org/", database_name="pubmlst_hinfluenzae_seqdef", schema_id=1) as dummy_profiler:
        mlst_st_data = await dummy_profiler.fetch_mlst_st(generate_dummy_targets())
        assert mlst_st_data is not None
        assert isinstance(mlst_st_data, MLSTProfile)
        assert mlst_st_data.clonal_complex == "ST-3 complex"
        assert mlst_st_data.sequence_type == "3"
 async def test_pubmlst_sequence_profiling_is_correct():
    sequence = str(SeqIO.read("tests/resources/FDAARGOS_1560.fasta", "fasta").seq)
    async with BIGSdbMLSTProfiler(database_api="https://rest.pubmlst.org/", database_name="pubmlst_hinfluenzae_seqdef", schema_id=1) as dummy_profiler:
        profile = await dummy_profiler.profile_string(sequence)
        assert profile is not None
        assert isinstance(profile, MLSTProfile)
        assert profile.clonal_complex == "ST-3 complex"
        assert profile.sequence_type == "3"
 async def test_bigsdb_index_all_databases_is_not_empty():
    async with BIGSdbIndex() as bigsdb_index:
        assert len(await bigsdb_index.get_known_seqdef_dbs()) > 0
 async def test_bigsdb_index_references_pubmlst_correctly():
    async with BIGSdbIndex() as bigsdb_index:
        assert (await bigsdb_index.get_bigsdb_api_from_seqdefdb("pubmlst_hinfluenzae_seqdef")) == "https://rest.pubmlst.org"
 async def test_bigsdb_index_references_institutpasteur_correctly():
    async with BIGSdbIndex() as bigsdb_index:
        assert (await bigsdb_index.get_bigsdb_api_from_seqdefdb("pubmlst_bordetella_seqdef")) == "https://bigsdb.pasteur.fr/api"
 async def test_bigsdb_index_instantiates_correct_profiler():
    sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
    async with BIGSdbIndex() as bigsdb_index:
        async with await bigsdb_index.build_profiler_from_seqdefdb("pubmlst_bordetella_seqdef", 3) as profiler:
            profile = await profiler.profile_string(sequence)
            assert profile.clonal_complex == "ST-2 complex"
            assert profile.sequence_type == "1"
 async def test_bigsdb_profile_multiple_strings_same_string_twice():
    sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
    dummy_sequences = [NamedString("seq1", sequence), NamedString("seq2", sequence)]
    async def generate_async_iterable_sequences():
        for dummy_sequence in dummy_sequences:
            yield dummy_sequence
    async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
        async for name, profile in dummy_profiler.profile_multiple_strings(generate_async_iterable_sequences()):
            assert profile is not None
            assert isinstance(profile, MLSTProfile)
            assert profile.clonal_complex == "ST-2 complex"
            assert profile.sequence_type == "1"
 async def test_bigsdb_profile_multiple_strings_exactmatch_fail_second_no_stop():
    valid_seq = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
    dummy_sequences = [NamedString("seq1", valid_seq), NamedString("should_fail", gene_scrambler(valid_seq, 0.3)), NamedString("seq3", valid_seq)]
    async def generate_async_iterable_sequences():
        for dummy_sequence in dummy_sequences:
            yield dummy_sequence
    async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
        async for name, profile in dummy_profiler.profile_multiple_strings(generate_async_iterable_sequences(), True):
            if name == "should_fail":
                assert profile is None
            else:
                assert profile is not None
                assert isinstance(profile, MLSTProfile)
                assert profile.clonal_complex == "ST-2 complex"
                assert profile.sequence_type == "1"
 async def test_bigsdb_profile_multiple_strings_nonexact_second_no_stop():
    valid_seq = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
    dummy_sequences = [NamedString("seq1", valid_seq), NamedString("should_fail", gene_scrambler(valid_seq, 0.3)), NamedString("seq3", valid_seq)]
    async def generate_async_iterable_sequences():
        for dummy_sequence in dummy_sequences:
            yield dummy_sequence
    async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
        async for name, profile in dummy_profiler.profile_multiple_strings(generate_async_iterable_sequences(), False):
            if name == "should_fail":
                assert profile is not None
                assert profile.clonal_complex == "unknown"
                assert profile.sequence_type == "unknown"
                assert len(profile.alleles) > 0
            else:
                assert profile is not None
                assert isinstance(profile, MLSTProfile)
                assert profile.clonal_complex == "ST-2 complex"
                assert profile.sequence_type == "1"
 async def test_bigsdb_profile_multiple_strings_fail_second_stop():
    valid_seq = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
    invalid_seq = str(SeqIO.read("tests/resources/FDAARGOS_1560.fasta", "fasta").seq)
    dummy_sequences = [NamedString("seq1", valid_seq), NamedString("should_fail", invalid_seq), NamedString("seq3", valid_seq)]
    async def generate_async_iterable_sequences():
        for dummy_sequence in dummy_sequences:
            yield dummy_sequence
    async with BIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
        with pytest.raises(NoBIGSdbMatchesException):
            async for name, profile in dummy_profiler.profile_multiple_strings(generate_async_iterable_sequences(), exact=True, stop_on_fail=True):
                if name == "should_fail":
                    pytest.fail("Exception should have been thrown, no exception was thrown.")
                else:
                    assert profile is not None
                    assert isinstance(profile, MLSTProfile)
                    assert profile.clonal_complex == "ST-2 complex"
                    assert profile.sequence_type == "1"
 async def test_bigsdb_index_get_schemas_for_bordetella():
    async with BIGSdbIndex() as index:
        schemas = await index.get_schemas_for_seqdefdb(seqdef_db_name="pubmlst_bordetella_seqdef")
        assert len(schemas.keys()) > 0
        assert "MLST" in schemas
        assert isinstance(schemas["MLST"], int)
 async def test_bigsdb_index_get_databases_has_only_seqdef():
    async with BIGSdbIndex() as index:
        databases = await index.get_known_seqdef_dbs()
        assert len(databases.keys()) > 0
        for database_name in databases.keys():
            assert database_name.endswith("seqdef")
        assert databases["pubmlst_bordetella_seqdef"] == "https://bigsdb.pasteur.fr/api"
--- a/tests/autobigs/engine/test_reading.py
+++ b/tests/autobigs/engine/test_reading.py
@@ -0,0 +1,7 @@
 from autobigs.engine.reading import read_fasta
 async def test_fasta_reader_not_none():
    named_strings = await read_fasta("tests/resources/tohama_I_bpertussis.fasta")
    for named_string in named_strings:
        assert named_string.name == "BX470248.1"
--- a/tests/autobigs/engine/test_writing.py
+++ b/tests/autobigs/engine/test_writing.py
@@ -0,0 +1,47 @@
 from typing import AsyncIterable, Iterable
 import pytest
 from autobigs.engine.structures.alignment import AlignmentStats
 from autobigs.engine.writing import alleles_to_text_map, write_mlst_profiles_as_csv
 from autobigs.engine.structures.mlst import Allele, MLSTProfile, NamedMLSTProfile
 import tempfile
 from csv import reader
 from os import path
@pytest.fixture
 def dummy_alphabet_mlst_profile():
    return NamedMLSTProfile("name", MLSTProfile((
        Allele("A", "1", None),
        Allele("D", "1", None),
        Allele("B", "1", None),
        Allele("C", "1", None),
        Allele("C", "2", AlignmentStats(90, 10, 0, 90))
    ), "mysterious", "very mysterious"))
 async def iterable_to_asynciterable(iterable: Iterable):
    for iterated in iterable:
        yield iterated
 async def test_column_order_is_same_as_expected_file(dummy_alphabet_mlst_profile: MLSTProfile):
    dummy_profiles = [dummy_alphabet_mlst_profile]
    with tempfile.TemporaryDirectory() as temp_dir:
        output_path = path.join(temp_dir, "out.csv")
        await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path)
        with open(output_path) as csv_handle:
            csv_reader = reader(csv_handle)
            lines = list(csv_reader)
            target_columns = lines[4:]
            assert target_columns == sorted(target_columns)
 async def test_alleles_to_text_map_mapping_is_correct(dummy_alphabet_mlst_profile: NamedMLSTProfile):
    mapping = alleles_to_text_map(dummy_alphabet_mlst_profile.mlst_profile.alleles) # type: ignore
    expected_mapping = {
        "A": "1",
        "B": "1",
        "C": ("1", "2*"),
        "D": "1"
    }
    for allele_name, allele_ids in mapping.items():
        assert allele_name in expected_mapping
        assert allele_ids == expected_mapping[allele_name]
--- a/tests/resources/2014-102_hinfluenza.fasta
+++ b/tests/resources/2014-102_hinfluenza.fasta
--- a/tests/resources/2014-102_hinfluenza_features.fasta
+++ b/tests/resources/2014-102_hinfluenza_features.fasta
--- a/tests/resources/FDAARGOS_1560.fasta
+++ b/tests/resources/FDAARGOS_1560.fasta
--- a/tests/resources/tohama_I_bpertussis_adk.fasta
+++ b/tests/resources/tohama_I_bpertussis_adk.fasta
@@ -0,0 +1,11 @@
 >lcl|BX640419.1_cds_CAE43044.1_2724 [gene=adK] [locus_tag=BP2769] [db_xref=GOA:P0DKX8,InterPro:IPR000850,InterPro:IPR006259,InterPro:IPR007862,InterPro:IPR027417] [protein=adenylate kinase] [protein_id=CAE43044.1] [location=164032..164688] [gbkey=CDS]
 ATGCGTCTCATTCTGCTCGGACCGCCCGGAGCCGGCAAAGGCACCCAAGCCGCCTTTCTCACCCAACACT
 ACGGCATCCCGCAGATATCCACCGGTGACATGCTGCGCGCCGCCGTCAAGGCCGGCACGCCGCTGGGCCT
 GGAAGCCAAGAAGGTCATGGACGCGGGCGGCCTGGTCTCGGACGACCTGATCATCGGCCTGGTGCGCGAT
 CGCCTGACCCAGCCCGATTGCGCCAACGGCTACCTGTTCGACGGTTTCCCGCGCACCATCCCGCAGGCCG
 ACGCGCTCAAGAGCGCCGGCATCGCGCTGGATTACGTGGTCGAGATCGAAGTGCCGGAAAGCGACATCAT
 CGAACGCATGAGCGAACGCCGCGTGCACCCGGCCAGCGGCCGCAGCTACCACGTACGCTTCAATCCGCCC
 AAGGCCGAAGGCGTGGACGACGTCACGGGCGAACCGCTGGTGCAGCGCGACGACGACCGCGAGGAAACCG
 TGCGCCATCGTCTCAACGTCTACCAGAACCAGACCCGCCCGCTGGTCGACTACTACTCGTCCTGGGCCCA
 GTCCGATGCCGCCGCGGCGCCCAAGTACCGCAAGATCTCCGGCGTCGGCTCGGTCGACGAAATCAAGAGC
 CGCCTGTCGCAGGCTCTGCAGAGCTAA
--- a/tests/resources/tohama_I_bpertussis_features.fasta
+++ b/tests/resources/tohama_I_bpertussis_features.fasta
Author	SHA1	Message	Date
Harrison Deng	fd536862e2	Twine version specified to 6.0.1 to avoid Twine issue 15611 Some checks failed autoBIGS.engine/pipeline/head There was a failure building this commit Details	2025-02-21 05:53:08 +00:00
Harrison Deng	576dc303f4	Changed requested kubernetes container to be miniforge	2025-02-21 05:52:34 +00:00
Harrison Deng	2822a483e3	Initial attempt at switching to a conda based build environment Some checks failed autoBIGS.engine/pipeline/head There was a failure building this commit Details	2025-02-21 05:37:56 +00:00
Harrison Deng	b8cebb8ba4	Infrastructure for concurrent processing implemented All checks were successful autoBIGS.engine/pipeline/head This commit looks good Details	2025-02-19 15:49:46 +00:00
Harrison Deng	7384895578	Writing now uses named MLST profile All checks were successful automlst.engine/pipeline/head This commit looks good Details automlst.engine/pipeline/tag This commit looks good Details	2025-02-18 16:03:17 +00:00
Harrison Deng	5a03c7e8d8	Multiple string profiling now respects grouped queries (for non-WGS) All checks were successful automlst.engine/pipeline/head This commit looks good Details	2025-02-18 15:34:18 +00:00
Harrison Deng	ddf9cde175	Added a license text to pyproject.toml	2025-02-14 20:47:06 +00:00
Harrison Deng	2e8cdd8da9	Updated URL links All checks were successful automlst.engine/pipeline/head This commit looks good Details autoBIGS.engine/pipeline/tag This commit looks good Details	2025-02-14 20:37:13 +00:00
Harrison Deng	d0318536b2	Changed FASTA reading to group based on file for merging partial targets	2025-02-14 14:35:53 +00:00
Harrison Deng	765cf9d418	Merge branch 'features/improved-oop-architecture' into features/non-exact-notation	2025-02-12 17:53:25 +00:00
Harrison Deng	348c3d00b4	Updated README.md to be more clear	2025-02-12 17:52:53 +00:00
Harrison Deng	1c3f7f9ed8	Removed test for instantiating local MLST profiler	2025-02-12 17:46:55 +00:00
Harrison Deng	e4ddaf2e8c	Changed to a MLST typable sequence for pubMLST tests	2025-02-12 17:43:26 +00:00
Harrison Deng	73aade2bde	Merge branch 'features/improved-oop-architecture' into features/non-exact-notation	2025-02-12 17:07:51 +00:00
Harrison Deng	af8590baa7	Removed import of deleted feature	2025-02-12 17:07:10 +00:00
Harrison Deng	36bca1b70d	Merge branch 'features/improved-oop-architecture' into features/non-exact-notation	2025-02-12 17:02:22 +00:00
Harrison Deng	09a693b696	Removed features being worked on in separate branch	2025-02-12 17:02:00 +00:00
Harrison Deng	f76bf86ef6	Fixed bad profile for H. influenzae non-exact test case	2025-02-12 16:59:50 +00:00
Harrison Deng	a60daf3ee2	Updated H. influenzae database API url	2025-02-12 16:39:13 +00:00
Harrison Deng	fbfd993269	Copied tests over from CSV tests and updated to reflect current code base	2025-02-12 16:36:59 +00:00
Harrison Deng	ba606c35a9	conversion of collection of alleles to map now produces results with tuples instead of lists	2025-02-12 16:36:31 +00:00
Harrison Deng	4183840ba0	Added notation to indicate inexact matching in CSV	2025-02-12 15:59:19 +00:00
Harrison Deng	7fb3eab5b6	Added pubMLST test case to bigsdb tests and updated to reflect codebase changes	2025-02-12 15:53:14 +00:00
Harrison Deng	175a51f968	Replaced local profiler with a not implemented exception	2025-02-12 15:52:48 +00:00
Harrison Deng	897f7ee922	Merge branch 'develop' into features/local-typing Some checks reported errors automlst.engine/pipeline/head Something is wrong with the build of this commit Details	2025-02-12 15:01:12 +00:00
Harrison Deng	bfc286e6b0	Updated test cases to reflect changes in codebase MLSTProfile will always return a value, even if there were no exact matches. Removed a test case specifically testing for stopping on failure, which is a removed feature.	2025-02-12 14:57:51 +00:00
Harrison Deng	a88225fcff	Added check to wrap string into list to prevent decomposing string for querying	2025-02-12 14:46:29 +00:00
Harrison Deng	c18d817cd9	Added test to verify that CSV target columns are ordered	2025-02-12 14:38:12 +00:00
Harrison Deng	f462e6d5e0	Moved "LazyPersistentCachedBIGSdbMLSTProfiler" to separate branch and deleted from current branch	2025-02-11 19:24:23 +00:00
Harrison Deng	e568e9fb2c	Adapted latest merged reading codebase to current codebase	2025-02-11 19:13:29 +00:00
Harrison Deng	4b9eb8674d	Merge branch 'develop' into features/local-typing	2025-02-11 17:55:34 +00:00
Harrison Deng	f75707e4fe	CSV output column order is now predictable (sorted)	2025-02-11 17:54:48 +00:00
Harrison Deng	b4845fab34	Added automatic handling of strings instead of arrays of sequences to typing	2025-02-06 21:15:50 +00:00
Harrison Deng	fe999f1cab	Added a unit test for multithreaded alignments	2025-02-06 18:01:50 +00:00
Harrison Deng	85946eb110	Fixed match metric difference between remote and local	2025-02-06 17:12:31 +00:00
Harrison Deng	a27e09da31	Added code to retrieve sequences and annotations from NCBI GenBank	2025-02-06 17:11:20 +00:00
Harrison Deng	ba2b688e89	Removed sorting as it seems unecessary	2025-02-05 22:06:50 +00:00
Harrison Deng	49f31b7943	Async aligner work tracking issue fixed	2025-02-05 21:47:51 +00:00
Harrison Deng	1c6e1cfb35	Fixed issue with hashing a ndarray by using tuple.	2025-02-05 20:43:53 +00:00
Harrison Deng	fb99526162	Updated iteration on asynchronous aligner	2025-02-05 17:17:37 +00:00
Harrison Deng	ff8a1aff08	Implemented annotated local typing method without testing	2025-02-04 16:19:00 +00:00
Harrison Deng	341ca933a3	Fixed typo in CI script	2025-01-29 17:00:25 +00:00
Harrison Deng	3e3898334f	Began implementing LazyPersistentCachedBIGSdbMLSTProfiler	2025-01-27 22:03:49 +00:00
Harrison Deng	ba1f0aa318	Fixed potential memory leak	2025-01-27 22:02:52 +00:00
Harrison Deng	6d0157581f	Removed conda environment step for now	2025-01-24 21:43:55 +00:00
Harrison Deng	4bcbfa0c6a	Began adding conda steps for automatic PRs to Bioconda	2025-01-24 19:33:27 +00:00
Harrison Deng	ca0f9673b0	Upgraded Python version requirement due to use of f-strings	2025-01-24 17:00:57 +00:00