5 Commits

Author SHA1 Message Date
2822a483e3 Initial attempt at switching to a conda based build environment
Some checks failed
autoBIGS.engine/pipeline/head There was a failure building this commit
2025-02-21 05:37:56 +00:00
b8cebb8ba4 Infrastructure for concurrent processing implemented
All checks were successful
autoBIGS.engine/pipeline/head This commit looks good
2025-02-19 15:49:46 +00:00
7384895578 Writing now uses named MLST profile
All checks were successful
automlst.engine/pipeline/head This commit looks good
automlst.engine/pipeline/tag This commit looks good
2025-02-18 16:03:17 +00:00
5a03c7e8d8 Multiple string profiling now respects grouped queries (for non-WGS)
All checks were successful
automlst.engine/pipeline/head This commit looks good
2025-02-18 15:34:18 +00:00
ddf9cde175 Added a license text to pyproject.toml 2025-02-14 20:47:06 +00:00
16 changed files with 261 additions and 206 deletions

11
.devcontainer/Dockerfile Normal file
View File

@@ -0,0 +1,11 @@
FROM mcr.microsoft.com/devcontainers/anaconda:1-3
# Copy environment.yml (if found) to a temp location so we update the environment. Also
# copy "noop.txt" so the COPY instruction does not fail if no environment.yml exists.
COPY environment.yml* .devcontainer/noop.txt /tmp/conda-tmp/
RUN if [ -f "/tmp/conda-tmp/environment.yml" ]; then umask 0002 && /opt/conda/bin/conda env update -n base -f /tmp/conda-tmp/environment.yml; fi \
&& rm -rf /tmp/conda-tmp
# [Optional] Uncomment this section to install additional OS packages.
# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
# && apt-get -y install --no-install-recommends <your-package-list-here>

View File

@@ -1,9 +1,11 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the // For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/python // README at: https://github.com/devcontainers/templates/tree/main/src/anaconda
{ {
"name": "Python 3", "name": "Anaconda (Python 3)",
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile "build": {
"image": "mcr.microsoft.com/devcontainers/python:1-3.12-bullseye", "context": "..",
"dockerfile": "Dockerfile"
}
// Features to add to the dev container. More info: https://containers.dev/features. // Features to add to the dev container. More info: https://containers.dev/features.
// "features": {}, // "features": {},
@@ -12,14 +14,7 @@
// "forwardPorts": [], // "forwardPorts": [],
// Use 'postCreateCommand' to run commands after the container is created. // Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": "pip3 install --user -r requirements.txt", // "postCreateCommand": "python --version",
"customizations": {
"vscode": {
"extensions": [
"mechatroner.rainbow-csv"
]
}
}
// Configure tool-specific properties. // Configure tool-specific properties.
// "customizations": {}, // "customizations": {},

3
.devcontainer/noop.txt Normal file
View File

@@ -0,0 +1,3 @@
This file copied into the container along with environment.yml* from the parent
folder. This file is included to prevents the Dockerfile COPY instruction from
failing if no environment.yml is found.

159
.gitignore vendored
View File

@@ -1,6 +1,6 @@
# File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig # File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig
# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,svelte,python,linux,node # Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,linux,python
# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,svelte,python,linux,node # Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,linux,python
### Linux ### ### Linux ###
*~ *~
@@ -17,146 +17,6 @@
# .nfs files are created when an open file is removed but is still being accessed # .nfs files are created when an open file is removed but is still being accessed
.nfs* .nfs*
### Node ###
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
.pnpm-debug.log*
# Diagnostic reports (https://nodejs.org/api/report.html)
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
# Runtime data
pids
*.pid
*.seed
*.pid.lock
# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
# Coverage directory used by tools like istanbul
coverage
*.lcov
# nyc test coverage
.nyc_output
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt
# Bower dependency directory (https://bower.io/)
bower_components
# node-waf configuration
.lock-wscript
# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release
# Dependency directories
node_modules/
jspm_packages/
# Snowpack dependency directory (https://snowpack.dev/)
web_modules/
# TypeScript cache
*.tsbuildinfo
# Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Optional stylelint cache
.stylelintcache
# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz
# Yarn Integrity file
.yarn-integrity
# dotenv environment variable files
.env
.env.development.local
.env.test.local
.env.production.local
.env.local
# parcel-bundler cache (https://parceljs.org/)
.cache
.parcel-cache
# Next.js build output
.next
out
# Nuxt.js build / generate output
.nuxt
dist
# Gatsby files
.cache/
# Comment in the public line in if your project uses Gatsby and not Next.js
# https://nextjs.org/blog/next-9-1#public-directory-support
# public
# vuepress build output
.vuepress/dist
# vuepress v2.x temp and cache directory
.temp
# Docusaurus cache and generated files
.docusaurus
# Serverless directories
.serverless/
# FuseBox cache
.fusebox/
# DynamoDB Local files
.dynamodb/
# TernJS port file
.tern-port
# Stores VSCode versions used for testing VSCode extensions
.vscode-test
# yarn v2
.yarn/cache
.yarn/unplugged
.yarn/build-state.yml
.yarn/install-state.gz
.pnp.*
### Node Patch ###
# Serverless Webpack directories
.webpack/
# Optional stylelint cache
# SvelteKit build / generate output
.svelte-kit
### Python ### ### Python ###
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files
__pycache__/ __pycache__/
@@ -202,6 +62,7 @@ htmlcov/
.nox/ .nox/
.coverage .coverage
.coverage.* .coverage.*
.cache
nosetests.xml nosetests.xml
coverage.xml coverage.xml
*.cover *.cover
@@ -215,6 +76,7 @@ cover/
*.pot *.pot
# Django stuff: # Django stuff:
*.log
local_settings.py local_settings.py
db.sqlite3 db.sqlite3
db.sqlite3-journal db.sqlite3-journal
@@ -278,6 +140,7 @@ celerybeat.pid
*.sage.py *.sage.py
# Environments # Environments
.env
.venv .venv
env/ env/
venv/ venv/
@@ -326,13 +189,6 @@ poetry.toml
# LSP config files # LSP config files
pyrightconfig.json pyrightconfig.json
### Svelte ###
# gitignore template for the SvelteKit, frontend web component framework
# website: https://kit.svelte.dev/
.svelte-kit/
package
### VisualStudioCode ### ### VisualStudioCode ###
.vscode/* .vscode/*
!.vscode/settings.json !.vscode/settings.json
@@ -352,9 +208,8 @@ package
.history .history
.ionide .ionide
# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,svelte,python,linux,node # End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,linux,python
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option) # Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
output conda-bld
*.private.*

5
.vscode/extensions.json vendored Normal file
View File

@@ -0,0 +1,5 @@
{
"recommendations": [
"piotrpalarz.vscode-gitignore-generator"
]
}

10
Jenkinsfile vendored
View File

@@ -9,7 +9,7 @@ pipeline {
stages { stages {
stage("install") { stage("install") {
steps { steps {
sh 'python -m pip install -r requirements.txt' sh 'conda env update -n base -f environment.yml'
} }
} }
stage("unit tests") { stage("unit tests") {
@@ -22,11 +22,14 @@ pipeline {
stage("build") { stage("build") {
steps { steps {
sh "python -m build" sh "python -m build"
sh "grayskull pypi dist/*.tar.gz --maintainers 'Harrison Deng'"
sh "python scripts/patch_recipe.py"
sh 'conda build autobigs-engine -c bioconda --output-folder conda-bld --verify'
} }
} }
stage("archive") { stage("archive") {
steps { steps {
archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl', fingerprint: true, followSymlinks: false, onlyIfSuccessful: true archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl, conda-bld/**/*.conda', fingerprint: true, followSymlinks: false, onlyIfSuccessful: true
} }
} }
stage("publish") { stage("publish") {
@@ -36,7 +39,8 @@ pipeline {
CREDS = credentials('username-password-rs-git') CREDS = credentials('username-password-rs-git')
} }
steps { steps {
sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*' sh 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/ydeng/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*'
sh 'curl --user ${CREDS_USR}:${CRED_PSW} --upload-file conda-bld/**/*.conda https://git.reslate.systems/api/packages/${CRED_USR}/conda/$(basename conda-bld/**/*.conda)'
} }
} }
stage ("pypi.org") { stage ("pypi.org") {

44
autobigs-engine/meta.yaml Normal file
View File

@@ -0,0 +1,44 @@
{% set name = "autoBIGS.engine" %}
{% set version = "0.12.1.dev1+gb8cebb8.d20250221" %}
package:
name: {{ name|lower|replace(".", "-") }}
version: {{ version }}
source:
url: file:///workspaces/autoBIGS.engine/dist/autobigs_engine-0.12.1.dev1%2Bgb8cebb8.d20250221.tar.gz
sha256: c86441b94f935cfa414ff28ca4c026a070e0fb15988ea3bb7d1a942859a09b16
build:
noarch: python
script: {{ PYTHON }} -m pip install . -vv --no-deps --no-build-isolation
number: 0
run_exports:
- {{ pin_subpackage( name|lower|replace(".", "-"), max_pin="x.x") }}
requirements:
host:
- python >=3.12
- setuptools >=64
- setuptools-scm >=8
- pip
run:
- python >=3.12
- biopython ==1.85
- aiohttp ==3.11.*
test:
imports:
- autobigs
commands:
- pip check
requires:
- pip
about:
summary: A library to rapidly fetch fetch MLST profiles given sequences for various diseases.
license: GPL-3.0-or-later
license_file: LICENSE
home: https://github.com/Syph-and-VPD-Lab/autoBIGS.engine
extra:
recipe-maintainers:
- Harrison Deng

15
environment.yml Normal file
View File

@@ -0,0 +1,15 @@
name: ci
channels:
- bioconda
- conda-forge
dependencies:
- aiohttp==3.11.*
- biopython==1.85
- pytest
- pytest-asyncio
- python-build
- conda-build
- twine
- setuptools_scm
- pytest-cov
- grayskull

View File

@@ -13,6 +13,7 @@ dependencies = [
] ]
requires-python = ">=3.12" requires-python = ">=3.12"
description = "A library to rapidly fetch fetch MLST profiles given sequences for various diseases." description = "A library to rapidly fetch fetch MLST profiles given sequences for various diseases."
license = {text = "GPL-3.0-or-later"}
[project.urls] [project.urls]
Homepage = "https://github.com/Syph-and-VPD-Lab/autoBIGS.engine" Homepage = "https://github.com/Syph-and-VPD-Lab/autoBIGS.engine"

View File

@@ -1,8 +0,0 @@
aiohttp[speedups]==3.11.*
biopython==1.85
pytest
pytest-asyncio
build
twine
setuptools_scm
pytest-cov

103
scripts/patch_recipe.py Normal file
View File

@@ -0,0 +1,103 @@
#!/usr/bin/env python3
import argparse
from os import fdopen, path
import os
import re
import shutil
from sys import argv
import tempfile
INDENTATION = " "
GRAYSKULL_OUTPUT_PATH = "autoBIGS.engine"
RUN_EXPORTED_VALUE = r'{{ pin_subpackage( name|lower|replace(".", "-"), max_pin="x.x") }}'
LICENSE_SUFFIX = "-or-later"
HOME_PAGE = "https://github.com/Syph-and-VPD-Lab/autoBIGS.engine"
def _calc_indentation(line: str):
return len(re.findall(INDENTATION, line.split(line.strip())[0])) if line != "\n" else 0
def read_grayskull_output():
original_recipe = path.abspath(GRAYSKULL_OUTPUT_PATH)
original_meta = path.join(original_recipe, "meta.yaml")
meta_file = open(original_meta)
lines = meta_file.readlines()
meta_file.close()
return lines
def update_naming_scheme(lines):
modified_lines = []
for line in lines:
matches = re.finditer(r"\{\{\s*name\|lower()\s+\}\}", line)
modified_line = line
for match in matches:
modified_line = modified_line[:match.start(1)] + r'|replace(".", "-")' + modified_line[match.end(1):]
modified_lines.append(modified_line)
return modified_lines
def inject_run_exports(lines: list[str]):
package_indent = False
modified_lines = []
for line in lines:
indentation_count = _calc_indentation(line)
if line == "build:\n" and indentation_count == 0:
package_indent = True
modified_lines.append(line)
elif package_indent and indentation_count == 0:
modified_lines.append(INDENTATION*1 + "run_exports:\n")
modified_lines.append(INDENTATION*2 + "- " + RUN_EXPORTED_VALUE + "\n")
package_indent = False
else:
modified_lines.append(line)
return modified_lines
def suffix_license(lines: list[str]):
about_indent = False
modified_lines = []
for line in lines:
indentation_count = _calc_indentation(line)
if line == "about:\n" and indentation_count == 0:
about_indent = True
modified_lines.append(line)
elif about_indent and indentation_count == 1 and line.lstrip().startswith("license:"):
modified_lines.append(line.rstrip() + LICENSE_SUFFIX + "\n")
about_indent = False
else:
modified_lines.append(line)
return modified_lines
def inject_home_page(lines: list[str]):
about_indent = False
modified_lines = []
for line in lines:
indentation_count = _calc_indentation(line)
if line == "about:\n" and indentation_count == 0:
about_indent = True
modified_lines.append(line)
elif about_indent and indentation_count == 0:
modified_lines.append(INDENTATION + "home: " + HOME_PAGE + "\n")
about_indent = False
else:
modified_lines.append(line)
return modified_lines
def write_to_original(lines: list[str]):
original_recipe = path.abspath(GRAYSKULL_OUTPUT_PATH)
original_meta = path.join(original_recipe, "meta.yaml")
with open(original_meta, "w") as file:
file.writelines(lines)
def rename_recipe_dir():
new_recipe_name = path.abspath(path.join(GRAYSKULL_OUTPUT_PATH.replace(".", "-").lower()))
shutil.rmtree(new_recipe_name, ignore_errors=True)
os.replace(path.abspath(GRAYSKULL_OUTPUT_PATH), new_recipe_name)
if __name__ == "__main__":
original_grayskull_out = read_grayskull_output()
modified_recipe_meta = None
modified_recipe_meta = update_naming_scheme(original_grayskull_out)
modified_recipe_meta = inject_run_exports(modified_recipe_meta)
modified_recipe_meta = suffix_license(modified_recipe_meta)
modified_recipe_meta = inject_home_page(modified_recipe_meta)
write_to_original(modified_recipe_meta)
rename_recipe_dir()

View File

@@ -22,15 +22,15 @@ from Bio.Align import PairwiseAligner
class BIGSdbMLSTProfiler(AbstractAsyncContextManager): class BIGSdbMLSTProfiler(AbstractAsyncContextManager):
@abstractmethod @abstractmethod
def determine_mlst_allele_variants(self, query_sequence_strings: Iterable[str]) -> AsyncGenerator[Allele, Any]: def determine_mlst_allele_variants(self, query_sequence_strings: Union[Iterable[Union[NamedString, str]], Union[NamedString, str]]) -> AsyncGenerator[Union[Allele, tuple[str, Allele]], Any]:
pass pass
@abstractmethod @abstractmethod
async def determine_mlst_st(self, alleles: Union[AsyncIterable[Allele], Iterable[Allele]]) -> MLSTProfile: async def determine_mlst_st(self, alleles: Union[AsyncIterable[Union[Allele, tuple[str, Allele]]], Iterable[Union[Allele, tuple[str, Allele]]]]) -> Union[MLSTProfile, NamedMLSTProfile]:
pass pass
@abstractmethod @abstractmethod
async def profile_string(self, query_sequence_strings: Iterable[str]) -> MLSTProfile: async def profile_string(self, query_sequence_strings: Iterable[Union[NamedString, str]]) -> Union[NamedMLSTProfile, MLSTProfile]:
pass pass
@abstractmethod @abstractmethod
@@ -52,14 +52,14 @@ class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
async def __aenter__(self): async def __aenter__(self):
return self return self
async def determine_mlst_allele_variants(self, query_sequence_strings: Union[Iterable[str], str]) -> AsyncGenerator[Allele, Any]: async def determine_mlst_allele_variants(self, query_sequence_strings: Union[Iterable[Union[NamedString, str]], Union[NamedString, str]]) -> AsyncGenerator[Union[Allele, tuple[str, Allele]], Any]:
# See https://bigsdb.pasteur.fr/api/db/pubmlst_bordetella_seqdef/schemes # See https://bigsdb.pasteur.fr/api/db/pubmlst_bordetella_seqdef/schemes
uri_path = "sequence" uri_path = "sequence"
if isinstance(query_sequence_strings, str): if isinstance(query_sequence_strings, str) or isinstance(query_sequence_strings, NamedString):
query_sequence_strings = [query_sequence_strings] query_sequence_strings = [query_sequence_strings]
for sequence_string in query_sequence_strings: for sequence_string in query_sequence_strings:
async with self._http_client.post(uri_path, json={ async with self._http_client.post(uri_path, json={
"sequence": sequence_string, "sequence": sequence_string if isinstance(sequence_string, str) else sequence_string.sequence,
"partial_matches": True "partial_matches": True
}) as response: }) as response:
sequence_response: dict = await response.json() sequence_response: dict = await response.json()
@@ -70,7 +70,8 @@ class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
for allele_loci, alleles in exact_matches.items(): for allele_loci, alleles in exact_matches.items():
for allele in alleles: for allele in alleles:
alelle_id = allele["allele_id"] alelle_id = allele["allele_id"]
yield Allele(allele_locus=allele_loci, allele_variant=alelle_id, partial_match_profile=None) result_allele = Allele(allele_locus=allele_loci, allele_variant=alelle_id, partial_match_profile=None)
yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
elif "partial_matches" in sequence_response: elif "partial_matches" in sequence_response:
partial_matches: dict[str, dict[str, Union[str, float, int]]] = sequence_response["partial_matches"] partial_matches: dict[str, dict[str, Union[str, float, int]]] = sequence_response["partial_matches"]
for allele_loci, partial_match in partial_matches.items(): for allele_loci, partial_match in partial_matches.items():
@@ -82,23 +83,33 @@ class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
gaps=int(partial_match["gaps"]), gaps=int(partial_match["gaps"]),
match_metric=int(partial_match["bitscore"]) match_metric=int(partial_match["bitscore"])
) )
yield Allele( result_allele = Allele(
allele_locus=allele_loci, allele_locus=allele_loci,
allele_variant=str(partial_match["allele"]), allele_variant=str(partial_match["allele"]),
partial_match_profile=partial_match_profile partial_match_profile=partial_match_profile
) )
yield result_allele if isinstance(sequence_string, str) else (sequence_string.name, result_allele)
else: else:
raise NoBIGSdbMatchesException(self._database_name, self._schema_id) raise NoBIGSdbMatchesException(self._database_name, self._schema_id, sequence_string.name if isinstance(sequence_string, NamedString) else None)
async def determine_mlst_st(self, alleles: Union[AsyncIterable[Allele], Iterable[Allele]]) -> MLSTProfile: async def determine_mlst_st(self, alleles: Union[AsyncIterable[Union[Allele, tuple[str, Allele]]], Iterable[Union[Allele, tuple[str, Allele]]]]) -> Union[MLSTProfile, NamedMLSTProfile]:
uri_path = "designations" uri_path = "designations"
allele_request_dict: dict[str, list[dict[str, str]]] = defaultdict(list) allele_request_dict: dict[str, list[dict[str, str]]] = defaultdict(list)
names_list = []
def insert_allele_to_request_dict(allele: Union[Allele, tuple[str, Allele]]):
if isinstance(allele, Allele):
allele_val = allele
else:
allele_val = allele[1]
names_list.append(allele[0])
allele_request_dict[allele_val.allele_locus].append({"allele": str(allele_val.allele_variant)})
if isinstance(alleles, AsyncIterable): if isinstance(alleles, AsyncIterable):
async for allele in alleles: async for allele in alleles:
allele_request_dict[allele.allele_locus].append({"allele": str(allele.allele_variant)}) insert_allele_to_request_dict(allele)
else: else:
for allele in alleles: for allele in alleles:
allele_request_dict[allele.allele_locus].append({"allele": str(allele.allele_variant)}) insert_allele_to_request_dict(allele)
request_json = { request_json = {
"designations": allele_request_dict "designations": allele_request_dict
} }
@@ -111,26 +122,33 @@ class RemoteBIGSdbMLSTProfiler(BIGSdbMLSTProfiler):
schema_fields_returned.setdefault("clonal_complex", "unknown") schema_fields_returned.setdefault("clonal_complex", "unknown")
schema_exact_matches: dict = response_json["exact_matches"] schema_exact_matches: dict = response_json["exact_matches"]
for exact_match_locus, exact_match_alleles in schema_exact_matches.items(): for exact_match_locus, exact_match_alleles in schema_exact_matches.items():
if len(exact_match_alleles) > 1:
raise ValueError(f"Unexpected number of alleles returned for exact match (Expected 1, retrieved {len(exact_match_alleles)})")
allele_set.add(Allele(exact_match_locus, exact_match_alleles[0]["allele_id"], None)) allele_set.add(Allele(exact_match_locus, exact_match_alleles[0]["allele_id"], None))
if len(allele_set) == 0: if len(allele_set) == 0:
raise ValueError("Passed in no alleles.") raise ValueError("Passed in no alleles.")
return MLSTProfile(allele_set, schema_fields_returned["ST"], schema_fields_returned["clonal_complex"]) result_mlst_profile = MLSTProfile(allele_set, schema_fields_returned["ST"], schema_fields_returned["clonal_complex"])
if len(names_list) > 0:
result_mlst_profile = NamedMLSTProfile(str(tuple(names_list)), result_mlst_profile)
return result_mlst_profile
async def profile_string(self, query_sequence_strings: Iterable[str]) -> MLSTProfile: async def profile_string(self, query_sequence_strings: Iterable[Union[NamedString, str]]) -> Union[NamedMLSTProfile, MLSTProfile]:
alleles = self.determine_mlst_allele_variants(query_sequence_strings) alleles = self.determine_mlst_allele_variants(query_sequence_strings)
return await self.determine_mlst_st(alleles) return await self.determine_mlst_st(alleles)
async def profile_multiple_strings(self, query_named_string_groups: AsyncIterable[Iterable[NamedString]], stop_on_fail: bool = False) -> AsyncGenerator[NamedMLSTProfile, Any]: async def profile_multiple_strings(self, query_named_string_groups: AsyncIterable[Iterable[NamedString]], stop_on_fail: bool = False) -> AsyncGenerator[NamedMLSTProfile, Any]:
tasks = []
async for named_strings in query_named_string_groups: async for named_strings in query_named_string_groups:
for named_string in named_strings: tasks.append(self.profile_string(named_strings))
for task in asyncio.as_completed(tasks):
try: try:
yield NamedMLSTProfile(named_string.name, (await self.profile_string([named_string.sequence]))) yield await task
except NoBIGSdbMatchesException as e: except NoBIGSdbMatchesException as e:
if stop_on_fail: if stop_on_fail:
raise e raise e
yield NamedMLSTProfile(named_string.name, None) causal_name = e.get_causal_query_name()
if causal_name is None:
raise ValueError("Missing query name despite requiring names.")
else:
yield NamedMLSTProfile(causal_name, None)
async def close(self): async def close(self):
await self._http_client.close() await self._http_client.close()

View File

@@ -5,8 +5,12 @@ class BIGSDbDatabaseAPIException(Exception):
class NoBIGSdbMatchesException(BIGSDbDatabaseAPIException): class NoBIGSdbMatchesException(BIGSDbDatabaseAPIException):
def __init__(self, database_name: str, database_schema_id: int, *args): def __init__(self, database_name: str, database_schema_id: int, query_name: Union[None, str], *args):
self._query_name = query_name
super().__init__(f"No matches found with schema with ID {database_schema_id} in the database \"{database_name}\".", *args) super().__init__(f"No matches found with schema with ID {database_schema_id} in the database \"{database_name}\".", *args)
def get_causal_query_name(self) -> Union[str, None]:
return self._query_name
class NoBIGSdbExactMatchesException(NoBIGSdbMatchesException): class NoBIGSdbExactMatchesException(NoBIGSdbMatchesException):
def __init__(self, database_name: str, database_schema_id: int, *args): def __init__(self, database_name: str, database_schema_id: int, *args):

View File

@@ -13,5 +13,8 @@ async def read_fasta(handle: Union[str, TextIOWrapper]) -> Iterable[NamedString]
return results return results
async def read_multiple_fastas(handles: Iterable[Union[str, TextIOWrapper]]) -> AsyncGenerator[Iterable[NamedString], Any]: async def read_multiple_fastas(handles: Iterable[Union[str, TextIOWrapper]]) -> AsyncGenerator[Iterable[NamedString], Any]:
tasks = []
for handle in handles: for handle in handles:
yield await read_fasta(handle) tasks.append(read_fasta(handle))
for task in asyncio.as_completed(tasks):
yield await task

View File

@@ -3,7 +3,7 @@ import csv
from os import PathLike from os import PathLike
from typing import AsyncIterable, Collection, Mapping, Sequence, Union from typing import AsyncIterable, Collection, Mapping, Sequence, Union
from autobigs.engine.structures.mlst import Allele, MLSTProfile from autobigs.engine.structures.mlst import Allele, MLSTProfile, NamedMLSTProfile
def alleles_to_text_map(alleles: Collection[Allele]) -> Mapping[str, Union[Sequence[str], str]]: def alleles_to_text_map(alleles: Collection[Allele]) -> Mapping[str, Union[Sequence[str], str]]:
@@ -17,12 +17,14 @@ def alleles_to_text_map(alleles: Collection[Allele]) -> Mapping[str, Union[Seque
result[locus] = tuple(result[locus]) # type: ignore result[locus] = tuple(result[locus]) # type: ignore
return dict(result) return dict(result)
async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[tuple[str, Union[MLSTProfile, None]]], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]) -> Sequence[str]: async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[NamedMLSTProfile], handle: Union[str, bytes, PathLike[str], PathLike[bytes]]) -> Sequence[str]:
failed = list() failed = list()
with open(handle, "w", newline='') as filehandle: with open(handle, "w", newline='') as filehandle:
header = None header = None
writer: Union[csv.DictWriter, None] = None writer: Union[csv.DictWriter, None] = None
async for name, mlst_profile in mlst_profiles_iterable: async for named_mlst_profile in mlst_profiles_iterable:
name = named_mlst_profile.name
mlst_profile = named_mlst_profile.mlst_profile
if mlst_profile is None: if mlst_profile is None:
failed.append(name) failed.append(name)
continue continue

View File

@@ -3,7 +3,7 @@ from typing import AsyncIterable, Iterable
import pytest import pytest
from autobigs.engine.structures.alignment import AlignmentStats from autobigs.engine.structures.alignment import AlignmentStats
from autobigs.engine.writing import alleles_to_text_map, write_mlst_profiles_as_csv from autobigs.engine.writing import alleles_to_text_map, write_mlst_profiles_as_csv
from autobigs.engine.structures.mlst import Allele, MLSTProfile from autobigs.engine.structures.mlst import Allele, MLSTProfile, NamedMLSTProfile
import tempfile import tempfile
from csv import reader from csv import reader
from os import path from os import path
@@ -11,20 +11,20 @@ from os import path
@pytest.fixture @pytest.fixture
def dummy_alphabet_mlst_profile(): def dummy_alphabet_mlst_profile():
return MLSTProfile(( return NamedMLSTProfile("name", MLSTProfile((
Allele("A", "1", None), Allele("A", "1", None),
Allele("D", "1", None), Allele("D", "1", None),
Allele("B", "1", None), Allele("B", "1", None),
Allele("C", "1", None), Allele("C", "1", None),
Allele("C", "2", AlignmentStats(90, 10, 0, 90)) Allele("C", "2", AlignmentStats(90, 10, 0, 90))
), "mysterious", "very mysterious") ), "mysterious", "very mysterious"))
async def iterable_to_asynciterable(iterable: Iterable): async def iterable_to_asynciterable(iterable: Iterable):
for iterated in iterable: for iterated in iterable:
yield iterated yield iterated
async def test_column_order_is_same_as_expected_file(dummy_alphabet_mlst_profile: MLSTProfile): async def test_column_order_is_same_as_expected_file(dummy_alphabet_mlst_profile: MLSTProfile):
dummy_profiles = [("test_1", dummy_alphabet_mlst_profile)] dummy_profiles = [dummy_alphabet_mlst_profile]
with tempfile.TemporaryDirectory() as temp_dir: with tempfile.TemporaryDirectory() as temp_dir:
output_path = path.join(temp_dir, "out.csv") output_path = path.join(temp_dir, "out.csv")
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path) await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path)
@@ -34,8 +34,8 @@ async def test_column_order_is_same_as_expected_file(dummy_alphabet_mlst_profile
target_columns = lines[4:] target_columns = lines[4:]
assert target_columns == sorted(target_columns) assert target_columns == sorted(target_columns)
async def test_alleles_to_text_map_mapping_is_correct(dummy_alphabet_mlst_profile: MLSTProfile): async def test_alleles_to_text_map_mapping_is_correct(dummy_alphabet_mlst_profile: NamedMLSTProfile):
mapping = alleles_to_text_map(dummy_alphabet_mlst_profile.alleles) mapping = alleles_to_text_map(dummy_alphabet_mlst_profile.mlst_profile.alleles) # type: ignore
expected_mapping = { expected_mapping = {
"A": "1", "A": "1",
"B": "1", "B": "1",
@@ -44,4 +44,4 @@ async def test_alleles_to_text_map_mapping_is_correct(dummy_alphabet_mlst_profil
} }
for allele_name, allele_ids in mapping.items(): for allele_name, allele_ids in mapping.items():
assert allele_name in expected_mapping assert allele_name in expected_mapping
assert allele_ids == expected_mapping[allele_name] assert allele_ids == expected_mapping[allele_name]