Moved to a general BIGSdb implementation

Updated tests

Removed ABIF UI for the time being

Began updating CLI
This commit is contained in:
2025-01-08 21:32:10 +00:00
parent 645357ac58
commit 42d0f56b18
20 changed files with 403 additions and 414 deletions

View File

@@ -1,8 +1,12 @@
import os
from automlst.engine.local.abif import read_abif
from automlst.engine.local.abif import read_abif, reference_consensus_assembly
async def test_load_sanger_sequence_has_data():
assert os.path.exists("tests/resources/1I1_F_P1815443_047.ab1")
result_data = await read_abif("tests/resources/1I1_F_P1815443_047.ab1")
assert result_data is not None
assert result_data is not None
async def test_consensus_assembly_with_ncbi():
consensus = reference_consensus_assembly("ON685494.1", [await read_abif("tests/resources/1I1_F_P1815443_047.ab1"), await read_abif("tests/resources/1I1_R_P1815443_094.ab1")])
# TODO complete implementing this

View File

@@ -1,54 +0,0 @@
from Bio import SeqIO
from automlst.engine.data.mlst import Allele, MLSTProfile
from automlst.engine.remote.databases.institutpasteur.mlst import InstitutPasteurProfiler
async def test_profiling_results_in_exact_matches_when_exact():
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
async with InstitutPasteurProfiler(database_name="pubmlst_bordetella_seqdef") as dummy_profiler:
exact_matches = dummy_profiler.fetch_mlst_allele_variants(schema_id=3, sequence_string=sequence)
targets_left = {"adk", "fumC", "glyA", "tyrB", "icd", "pepA", "pgm"}
async for exact_match in exact_matches:
assert isinstance(exact_match, Allele)
assert exact_match.allele_variant == '1' # All of Tohama I has allele id I
targets_left.remove(exact_match.allele_loci)
assert len(targets_left) == 0
async def test_profiling_results_in_correct_st():
async def dummy_allele_generator():
dummy_alleles = [
Allele("adk", "1"),
Allele("fumC", "1"),
Allele("glyA", "1"),
Allele("tyrB", "1"),
Allele("icd", "1"),
Allele("pepA", "1"),
Allele("pgm", "1"),
]
for dummy_allele in dummy_alleles:
yield dummy_allele
async with InstitutPasteurProfiler(database_name="pubmlst_bordetella_seqdef") as dummy_profiler:
mlst_st_data = await dummy_profiler.fetch_mlst_st(3, dummy_allele_generator())
assert mlst_st_data is not None
assert isinstance(mlst_st_data, MLSTProfile)
assert mlst_st_data.clonal_complex == "ST-2 complex"
assert mlst_st_data.sequence_type == "1"
async def test_sequence_profiling_is_correct():
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
dummy_alleles = [
Allele("adk", "1"),
Allele("fumC", "1"),
Allele("glyA", "1"),
Allele("tyrB", "1"),
Allele("icd", "1"),
Allele("pepA", "1"),
Allele("pgm", "1"),
]
async with InstitutPasteurProfiler(database_name="pubmlst_bordetella_seqdef") as dummy_profiler:
profile = await dummy_profiler.profile_string(3, sequence)
assert profile is not None
assert isinstance(profile, MLSTProfile)
assert profile.clonal_complex == "ST-2 complex"
assert profile.sequence_type == "1"

View File

@@ -1,53 +0,0 @@
import asyncio
from Bio import SeqIO
from automlst.engine.data.mlst import Allele, MLSTProfile
from automlst.engine.remote.databases.pubmlst.mlst import PubMLSTProfiler
async def test_profiling_results_in_exact_matches_when_exact():
dummy_alleles = {
Allele("adk", "1"),
Allele("atpG", "1"),
Allele("frdB", "1"),
Allele("fucK", "1"),
Allele("mdh", "1"),
Allele("pgi", "1"),
Allele("recA", "5"),
}
sequence = str(SeqIO.read("tests/resources/FDAARGOS_1560.fasta", "fasta").seq)
async with PubMLSTProfiler(database_name="pubmlst_hinfluenzae_seqdef") as dummy_profiler:
exact_matches = dummy_profiler.fetch_mlst_allele_variants(schema_id=1, sequence_string=sequence)
async for exact_match in exact_matches:
assert isinstance(exact_match, Allele)
dummy_alleles.remove(exact_match)
assert len(dummy_alleles) == 0
async def test_profiling_results_in_correct_st():
async def generate_dummy_targets():
dummy_alleles = [
Allele("adk", "1"),
Allele("atpG", "1"),
Allele("frdB", "1"),
Allele("fucK", "1"),
Allele("mdh", "1"),
Allele("pgi", "1"),
Allele("recA", "5"),
]
for dummy_allele in dummy_alleles:
yield dummy_allele
async with PubMLSTProfiler(database_name="pubmlst_hinfluenzae_seqdef") as dummy_profiler:
mlst_st_data = await dummy_profiler.fetch_mlst_st(1, generate_dummy_targets())
assert mlst_st_data is not None
assert isinstance(mlst_st_data, MLSTProfile)
assert mlst_st_data.clonal_complex == "ST-3 complex"
assert mlst_st_data.sequence_type == "3"
async def test_sequence_profiling_is_correct():
sequence = str(SeqIO.read("tests/resources/FDAARGOS_1560.fasta", "fasta").seq)
async with PubMLSTProfiler(database_name="pubmlst_hinfluenzae_seqdef") as dummy_profiler:
profile = await dummy_profiler.profile_string(1, sequence)
assert profile is not None
assert isinstance(profile, MLSTProfile)
assert profile.clonal_complex == "ST-3 complex"
assert profile.sequence_type == "3"

View File

@@ -0,0 +1,115 @@
from Bio import SeqIO
from automlst.engine.data.mlst import Allele, MLSTProfile
from automlst.engine.remote.databases.bigsdb import BIGSdbIndex, BigSDBMLSTProfiler
async def test_institutpasteur_profiling_results_in_exact_matches_when_exact():
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
async with BigSDBMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
exact_matches = dummy_profiler.fetch_mlst_allele_variants(sequence_string=sequence)
targets_left = {"adk", "fumC", "glyA", "tyrB", "icd", "pepA", "pgm"}
async for exact_match in exact_matches:
assert isinstance(exact_match, Allele)
assert exact_match.allele_variant == '1' # All of Tohama I has allele id I
targets_left.remove(exact_match.allele_loci)
assert len(targets_left) == 0
async def test_institutpasteur_profiling_results_in_correct_mlst_st():
async def dummy_allele_generator():
dummy_alleles = [
Allele("adk", "1"),
Allele("fumC", "1"),
Allele("glyA", "1"),
Allele("tyrB", "1"),
Allele("icd", "1"),
Allele("pepA", "1"),
Allele("pgm", "1"),
]
for dummy_allele in dummy_alleles:
yield dummy_allele
async with BigSDBMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
mlst_st_data = await dummy_profiler.fetch_mlst_st(dummy_allele_generator())
assert mlst_st_data is not None
assert isinstance(mlst_st_data, MLSTProfile)
assert mlst_st_data.clonal_complex == "ST-2 complex"
assert mlst_st_data.sequence_type == "1"
async def test_institutpasteur_sequence_profiling_is_correct():
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
async with BigSDBMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
profile = await dummy_profiler.profile_string(sequence)
assert profile is not None
assert isinstance(profile, MLSTProfile)
assert profile.clonal_complex == "ST-2 complex"
assert profile.sequence_type == "1"
async def test_pubmlst_profiling_results_in_exact_matches_when_exact():
dummy_alleles = {
Allele("adk", "1"),
Allele("atpG", "1"),
Allele("frdB", "1"),
Allele("fucK", "1"),
Allele("mdh", "1"),
Allele("pgi", "1"),
Allele("recA", "5"),
}
sequence = str(SeqIO.read("tests/resources/FDAARGOS_1560.fasta", "fasta").seq)
async with BigSDBMLSTProfiler(database_api="https://rest.pubmlst.org/", database_name="pubmlst_hinfluenzae_seqdef", schema_id=1) as dummy_profiler:
exact_matches = dummy_profiler.fetch_mlst_allele_variants(sequence_string=sequence)
async for exact_match in exact_matches:
assert isinstance(exact_match, Allele)
dummy_alleles.remove(exact_match)
assert len(dummy_alleles) == 0
async def test_pubmlst_profiling_results_in_correct_st():
async def generate_dummy_targets():
dummy_alleles = [
Allele("adk", "1"),
Allele("atpG", "1"),
Allele("frdB", "1"),
Allele("fucK", "1"),
Allele("mdh", "1"),
Allele("pgi", "1"),
Allele("recA", "5"),
]
for dummy_allele in dummy_alleles:
yield dummy_allele
async with BigSDBMLSTProfiler(database_api="https://rest.pubmlst.org/", database_name="pubmlst_hinfluenzae_seqdef", schema_id=1) as dummy_profiler:
mlst_st_data = await dummy_profiler.fetch_mlst_st(generate_dummy_targets())
assert mlst_st_data is not None
assert isinstance(mlst_st_data, MLSTProfile)
assert mlst_st_data.clonal_complex == "ST-3 complex"
assert mlst_st_data.sequence_type == "3"
async def test_pubmlst_sequence_profiling_is_correct():
sequence = str(SeqIO.read("tests/resources/FDAARGOS_1560.fasta", "fasta").seq)
async with BigSDBMLSTProfiler(database_api="https://rest.pubmlst.org/", database_name="pubmlst_hinfluenzae_seqdef", schema_id=1) as dummy_profiler:
profile = await dummy_profiler.profile_string(sequence)
assert profile is not None
assert isinstance(profile, MLSTProfile)
assert profile.clonal_complex == "ST-3 complex"
assert profile.sequence_type == "3"
async def test_bigsdb_index_all_databases_is_not_empty():
async with BIGSdbIndex() as bigsdb_index:
assert len(await bigsdb_index.get_known_seqdef_dbs()) > 0
async def test_bigsdb_index_references_pubmlst_correctly():
async with BIGSdbIndex() as bigsdb_index:
assert (await bigsdb_index.get_bigsdb_api_from_seqdefdb("pubmlst_hinfluenzae_seqdef")) == "https://rest.pubmlst.org"
async def test_bigsdb_index_references_institutpasteur_correctly():
async with BIGSdbIndex() as bigsdb_index:
assert (await bigsdb_index.get_bigsdb_api_from_seqdefdb("pubmlst_bordetella_seqdef")) == "https://bigsdb.pasteur.fr/api"
async def test_bigsdb_index_instantiates_correct_profiler():
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
async with BIGSdbIndex() as bigsdb_index:
async with await bigsdb_index.build_profiler_from_seqdefdb("pubmlst_bordetella_seqdef", 3) as profiler:
profile = await profiler.profile_string(sequence)
assert profile.clonal_complex == "ST-2 complex"
assert profile.sequence_type == "1"