Implemented annotated local typing method without testing
This commit is contained in:
27
tests/autobigs/engine/analysis/test_aligners.py
Normal file
27
tests/autobigs/engine/analysis/test_aligners.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from Bio import SeqIO
|
||||
from Bio.Align import PairwiseAligner
|
||||
from pytest import mark
|
||||
from pytest import fixture
|
||||
from autobigs.engine.analysis.aligners import AsyncPairwiseAlignmentEngine
|
||||
from autobigs.engine.structures.alignment import PairwiseAlignment
|
||||
|
||||
@fixture
|
||||
def tohamaI_bpertussis_adk():
|
||||
return str(SeqIO.read("tests/resources/tohama_I_bpertussis_adk.fasta", format="fasta").seq)
|
||||
|
||||
@fixture
|
||||
def tohamaI_bpertussis_genome():
|
||||
return str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", format="fasta").seq)
|
||||
|
||||
@fixture(params=[1, 2])
|
||||
def dummy_engine(request):
|
||||
aligner = PairwiseAligner("blastn")
|
||||
aligner.mode = "local"
|
||||
with AsyncPairwiseAlignmentEngine(aligner, request.param) as engine:
|
||||
yield engine
|
||||
|
||||
class TestAsyncPairwiseAlignmentEngine:
|
||||
async def test_single_alignment_no_errors(self, tohamaI_bpertussis_genome, tohamaI_bpertussis_adk: str, dummy_engine: AsyncPairwiseAlignmentEngine):
|
||||
dummy_engine.align(tohamaI_bpertussis_genome, tohamaI_bpertussis_adk)
|
||||
async for alignment, additional_information in dummy_engine:
|
||||
assert isinstance(alignment, PairwiseAlignment)
|
210
tests/autobigs/engine/analysis/test_bigsdb.py
Normal file
210
tests/autobigs/engine/analysis/test_bigsdb.py
Normal file
@@ -0,0 +1,210 @@
|
||||
from os import path
|
||||
import random
|
||||
import re
|
||||
from typing import Callable, Collection, Sequence, Union
|
||||
from Bio import SeqIO
|
||||
import pytest
|
||||
from autobigs.engine.analysis import bigsdb
|
||||
from autobigs.engine.structures import mlst
|
||||
from autobigs.engine.structures.genomics import NamedString
|
||||
from autobigs.engine.structures.mlst import Allele, MLSTProfile
|
||||
from autobigs.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException
|
||||
from autobigs.engine.analysis.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler, LocalBIGSdbMLSTProfiler, RemoteBIGSdbMLSTProfiler
|
||||
|
||||
async def generate_async_iterable(normal_iterable):
|
||||
for dummy_sequence in normal_iterable:
|
||||
yield dummy_sequence
|
||||
|
||||
def gene_scrambler(gene: str, mutation_site_count: Union[int, float], alphabet: Sequence[str] = ["A", "T", "C", "G"]):
|
||||
rand = random.Random(gene)
|
||||
if isinstance(mutation_site_count, float):
|
||||
mutation_site_count = int(mutation_site_count * len(gene))
|
||||
random_locations = rand.choices(range(len(gene)), k=mutation_site_count)
|
||||
scrambled = list(gene)
|
||||
for random_location in random_locations:
|
||||
scrambled[random_location] = rand.choice(alphabet)
|
||||
return "".join(scrambled)
|
||||
|
||||
def get_first_sequence_from_fasta(resource: str):
|
||||
return str(SeqIO.read(path.join("tests/resources/", resource), "fasta").seq)
|
||||
|
||||
def get_multiple_sequences_from_fasta(resource: str):
|
||||
return tuple(SeqIO.parse(path.join("tests/resources/", resource), "fasta"))
|
||||
|
||||
bpertussis_tohamaI_profile = MLSTProfile((
|
||||
Allele("adk", "1", None),
|
||||
Allele("fumC", "1", None),
|
||||
Allele("glyA", "1", None),
|
||||
Allele("tyrB", "1", None),
|
||||
Allele("icd", "1", None),
|
||||
Allele("pepA", "1", None),
|
||||
Allele("pgm", "1", None)), "1", "ST-2 complex")
|
||||
|
||||
bpertussis_tohamaI_bad_profile = MLSTProfile((
|
||||
Allele("adk", "1", None),
|
||||
Allele("fumC", "2", None),
|
||||
Allele("glyA", "36", None),
|
||||
Allele("tyrB", "4", None),
|
||||
Allele("icd", "4", None),
|
||||
Allele("pepA", "1", None),
|
||||
Allele("pgm", "5", None),
|
||||
), "unknown", "unknown")
|
||||
|
||||
hinfluenzae_fdaargos_profile = MLSTProfile((
|
||||
Allele("adk", "1", None),
|
||||
Allele("atpG", "1", None),
|
||||
Allele("frdB", "1", None),
|
||||
Allele("fucK", "1", None),
|
||||
Allele("mdh", "1", None),
|
||||
Allele("pgi", "1", None),
|
||||
Allele("recA", "5", None)
|
||||
), "3", "ST-3 complex")
|
||||
|
||||
hinfluenzae_fdaargos_bad_profile = MLSTProfile((
|
||||
Allele("adk", "1", None),
|
||||
Allele("atpG", "1", None),
|
||||
Allele("frdB", "1", None),
|
||||
Allele("fucK", "1", None),
|
||||
Allele("mdh", "1", None),
|
||||
Allele("pgi", "1", None),
|
||||
Allele("recA", "5", None)
|
||||
), "3", "ST-3 complex")
|
||||
|
||||
hinfluenzae_fdaargos_sequence = str(SeqIO.read("tests/resources/fdaargos_1560_hinfluenza.fasta", "fasta").seq)
|
||||
|
||||
hinfluenzae_fdaargos_fragmented_sequence = tuple(SeqIO.parse("tests/resources/tohama_I_bpertussis_features.fasta", "fasta"))
|
||||
|
||||
@pytest.mark.parametrize("local_db,database_api,database_name,schema_id,seq_path,feature_seqs_path,expected_profile,bad_profile", [
|
||||
(False, "https://bigsdb.pasteur.fr/api", "pubmlst_bordetella_seqdef", 3, "tohama_I_bpertussis.fasta", "tohama_I_bpertussis_features.fasta", bpertussis_tohamaI_profile, bpertussis_tohamaI_bad_profile),
|
||||
(True, "https://bigsdb.pasteur.fr/api", "pubmlst_bordetella_seqdef", 3, "tohama_I_bpertussis.fasta", "tohama_I_bpertussis_features.fasta", bpertussis_tohamaI_profile, bpertussis_tohamaI_bad_profile),
|
||||
])
|
||||
class TestBIGSdbMLSTProfiler:
|
||||
async def test_profiling_results_in_exact_matches_when_exact(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
sequence = get_first_sequence_from_fasta(seq_path)
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as dummy_profiler:
|
||||
expected_alleles = mlst.alleles_to_mapping(expected_profile.alleles)
|
||||
targets_left = set(mlst.alleles_to_mapping(expected_profile.alleles).keys())
|
||||
async for exact_match in dummy_profiler.determine_mlst_allele_variants(query_sequence_strings=[sequence]):
|
||||
assert isinstance(exact_match, Allele)
|
||||
assert exact_match.allele_locus in expected_alleles
|
||||
assert exact_match.allele_variant == expected_alleles[exact_match.allele_locus]
|
||||
targets_left.remove(exact_match.allele_locus)
|
||||
|
||||
assert len(targets_left) == 0
|
||||
|
||||
async def test_sequence_profiling_non_exact_returns_non_exact(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
target_sequences = get_multiple_sequences_from_fasta(feature_seqs_path)
|
||||
mlst_targets = {x.lower() for x in mlst.alleles_to_mapping(expected_profile.alleles).keys()}
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as profiler:
|
||||
for target_sequence in target_sequences:
|
||||
match = re.fullmatch(r".*\[gene=([\w\d]+)\].*", target_sequence.description)
|
||||
if match is None:
|
||||
continue
|
||||
gene = match.group(1).lower()
|
||||
if gene not in mlst_targets:
|
||||
continue
|
||||
scrambled = gene_scrambler(str(target_sequence.seq), 0.125)
|
||||
async for partial_match in profiler.determine_mlst_allele_variants([scrambled]):
|
||||
assert partial_match.partial_match_profile is not None
|
||||
mlst_targets.remove(gene)
|
||||
|
||||
assert len(mlst_targets) == 0
|
||||
|
||||
async def test_profiling_results_in_correct_mlst_st(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as dummy_profiler:
|
||||
mlst_st_data = await dummy_profiler.determine_mlst_st(expected_profile.alleles)
|
||||
assert mlst_st_data is not None
|
||||
assert isinstance(mlst_st_data, MLSTProfile)
|
||||
assert mlst_st_data.clonal_complex == expected_profile.clonal_complex
|
||||
assert mlst_st_data.sequence_type == expected_profile.sequence_type
|
||||
|
||||
async def test_profiling_non_exact_results_in_list_of_mlsts(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
dummy_alleles = bad_profile.alleles
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as dummy_profiler:
|
||||
mlst_profile = await dummy_profiler.determine_mlst_st(dummy_alleles)
|
||||
assert mlst_profile.clonal_complex == "unknown"
|
||||
assert mlst_profile.sequence_type == "unknown"
|
||||
|
||||
|
||||
async def test_bigsdb_profile_multiple_strings_same_string_twice(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
sequence = get_first_sequence_from_fasta(seq_path)
|
||||
dummy_sequences = [[NamedString("seq1", sequence)], [NamedString("seq2", sequence)]]
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as dummy_profiler:
|
||||
async for named_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable(dummy_sequences)):
|
||||
name, profile = named_profile.name, named_profile.mlst_profile
|
||||
assert profile is not None
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == expected_profile.clonal_complex
|
||||
assert profile.sequence_type == expected_profile.sequence_type
|
||||
|
||||
async def test_bigsdb_profile_multiple_strings_exactmatch_fail_second_no_stop(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
valid_seq = get_first_sequence_from_fasta(seq_path)
|
||||
dummy_sequences = [[NamedString("seq1", valid_seq)], [NamedString("should_fail", gene_scrambler(valid_seq, 0.3))], [NamedString("seq3", valid_seq)]]
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as dummy_profiler:
|
||||
async for name_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable(dummy_sequences), True):
|
||||
name, profile = name_profile.name, name_profile.mlst_profile
|
||||
|
||||
assert profile is not None
|
||||
if name == "should_fail":
|
||||
assert profile.clonal_complex == "unknown"
|
||||
assert profile.sequence_type == "unknown"
|
||||
assert len(profile.alleles) > 0
|
||||
else:
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == expected_profile.clonal_complex
|
||||
assert profile.sequence_type == expected_profile.sequence_type
|
||||
|
||||
async def test_bigsdb_profile_multiple_strings_nonexact_second_no_stop(self, local_db, database_api, database_name, schema_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
|
||||
valid_seq = get_first_sequence_from_fasta(seq_path)
|
||||
dummy_sequences = [[NamedString("seq1", valid_seq)], [NamedString("should_fail", gene_scrambler(valid_seq, 0.3))], [NamedString("seq3", valid_seq)]]
|
||||
|
||||
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, schema_id) as dummy_profiler:
|
||||
async for named_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable(dummy_sequences), False):
|
||||
name, profile = named_profile.name, named_profile.mlst_profile
|
||||
|
||||
assert profile is not None
|
||||
if name == "should_fail":
|
||||
assert profile.clonal_complex == "unknown"
|
||||
assert profile.sequence_type == "unknown"
|
||||
assert len(profile.alleles) > 0
|
||||
else:
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == expected_profile.clonal_complex
|
||||
assert profile.sequence_type == expected_profile.sequence_type
|
||||
|
||||
class TestBIGSdbIndex:
|
||||
|
||||
async def test_bigsdb_index_all_databases_is_not_empty(self):
|
||||
async with BIGSdbIndex() as bigsdb_index:
|
||||
assert len(await bigsdb_index.get_known_seqdef_dbs()) > 0
|
||||
|
||||
async def test_bigsdb_index_references_pubmlst_correctly(self):
|
||||
async with BIGSdbIndex() as bigsdb_index:
|
||||
assert (await bigsdb_index.get_bigsdb_api_from_seqdefdb("pubmlst_hinfluenzae_seqdef")) == "https://rest.pubmlst.org"
|
||||
|
||||
async def test_bigsdb_index_references_institutpasteur_correctly(self):
|
||||
async with BIGSdbIndex() as bigsdb_index:
|
||||
assert (await bigsdb_index.get_bigsdb_api_from_seqdefdb("pubmlst_bordetella_seqdef")) == "https://bigsdb.pasteur.fr/api"
|
||||
|
||||
async def test_bigsdb_index_get_schemas_for_bordetella(self):
|
||||
async with BIGSdbIndex() as index:
|
||||
schemas = await index.get_schemas_for_seqdefdb(seqdef_db_name="pubmlst_bordetella_seqdef")
|
||||
assert len(schemas.keys()) > 0
|
||||
assert "MLST" in schemas
|
||||
assert isinstance(schemas["MLST"], int)
|
||||
|
||||
async def test_bigsdb_index_get_databases_has_only_seqdef(self):
|
||||
async with BIGSdbIndex() as index:
|
||||
databases = await index.get_known_seqdef_dbs()
|
||||
assert len(databases.keys()) > 0
|
||||
for database_name in databases.keys():
|
||||
assert database_name.endswith("seqdef")
|
||||
assert databases["pubmlst_bordetella_seqdef"] == "https://bigsdb.pasteur.fr/api"
|
||||
|
||||
async def test_bigsdb_index_instantiates_correct_profiler(self):
|
||||
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
||||
async with BIGSdbIndex() as bigsdb_index:
|
||||
async with await bigsdb_index.build_profiler_from_seqdefdb("pubmlst_bordetella_seqdef", 3) as profiler:
|
||||
profile = await profiler.profile_string(sequence)
|
||||
assert profile.clonal_complex == "ST-2 complex"
|
||||
assert profile.sequence_type == "1"
|
@@ -1,21 +0,0 @@
|
||||
from autobigs.engine.data.local.csv import dict_loci_alleles_variants_from_loci
|
||||
from autobigs.engine.data.structures.mlst import Allele
|
||||
|
||||
|
||||
def test_dict_loci_alleles_variants_from_loci_single_loci_not_list():
|
||||
alleles_map = {
|
||||
"adk": [Allele("adk", "1", None)]
|
||||
}
|
||||
results = dict_loci_alleles_variants_from_loci(alleles_map)
|
||||
for loci, variant in results.items():
|
||||
assert isinstance(variant, str)
|
||||
assert variant == "1"
|
||||
|
||||
def test_dict_loci_alleles_variants_from_loci_multi_loci_is_list():
|
||||
alleles_map = {
|
||||
"adk": [Allele("adk", "1", None), Allele("adk", "2", None)]
|
||||
}
|
||||
results = dict_loci_alleles_variants_from_loci(alleles_map)
|
||||
for loci, variant in results.items():
|
||||
assert isinstance(variant, list)
|
||||
assert len(variant) == 2
|
@@ -1,249 +0,0 @@
|
||||
import random
|
||||
import re
|
||||
from typing import Collection, Sequence, Union
|
||||
from Bio import SeqIO
|
||||
import pytest
|
||||
from autobigs.engine.data.structures.genomics import NamedString
|
||||
from autobigs.engine.data.structures.mlst import Allele, MLSTProfile
|
||||
from autobigs.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException
|
||||
from autobigs.engine.data.remote.databases.bigsdb import BIGSdbIndex, OnlineBIGSdbMLSTProfiler
|
||||
|
||||
def gene_scrambler(gene: str, mutation_site_count: Union[int, float], alphabet: Sequence[str] = ["A", "T", "C", "G"]):
|
||||
rand = random.Random(gene)
|
||||
if isinstance(mutation_site_count, float):
|
||||
mutation_site_count = int(mutation_site_count * len(gene))
|
||||
random_locations = rand.choices(range(len(gene)), k=mutation_site_count)
|
||||
scrambled = list(gene)
|
||||
for random_location in random_locations:
|
||||
scrambled[random_location] = rand.choice(alphabet)
|
||||
return "".join(scrambled)
|
||||
|
||||
async def test_institutpasteur_profiling_results_in_exact_matches_when_exact():
|
||||
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
||||
async with OnlineBIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
targets_left = {"adk", "fumC", "glyA", "tyrB", "icd", "pepA", "pgm"}
|
||||
async for exact_match in dummy_profiler.fetch_mlst_allele_variants(sequence_strings=[sequence]):
|
||||
assert isinstance(exact_match, Allele)
|
||||
assert exact_match.allele_variant == '1' # All of Tohama I has allele id I
|
||||
targets_left.remove(exact_match.allele_locus)
|
||||
|
||||
assert len(targets_left) == 0
|
||||
|
||||
async def test_institutpasteur_sequence_profiling_non_exact_returns_non_exact():
|
||||
sequences = list(SeqIO.parse("tests/resources/tohama_I_bpertussis_coding.fasta", "fasta"))
|
||||
mlst_targets = {"adk", "fumc", "glya", "tyrb", "icd", "pepa", "pgm"}
|
||||
async with OnlineBIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as profiler:
|
||||
for sequence in sequences:
|
||||
match = re.fullmatch(r".*\[gene=([\w\d]+)\].*", sequence.description)
|
||||
if match is None:
|
||||
continue
|
||||
gene = match.group(1)
|
||||
if gene.lower() not in mlst_targets:
|
||||
continue
|
||||
scrambled = gene_scrambler(str(sequence.seq), 0.125)
|
||||
async for partial_match in profiler.fetch_mlst_allele_variants(scrambled):
|
||||
assert partial_match.partial_match_profile is not None
|
||||
mlst_targets.remove(gene.lower())
|
||||
|
||||
assert len(mlst_targets) == 0
|
||||
|
||||
async def test_institutpasteur_profiling_results_in_correct_mlst_st():
|
||||
async def dummy_allele_generator():
|
||||
dummy_alleles = [
|
||||
Allele("adk", "1", None),
|
||||
Allele("fumC", "1", None),
|
||||
Allele("glyA", "1", None),
|
||||
Allele("tyrB", "1", None),
|
||||
Allele("icd", "1", None),
|
||||
Allele("pepA", "1", None),
|
||||
Allele("pgm", "1", None),
|
||||
]
|
||||
for dummy_allele in dummy_alleles:
|
||||
yield dummy_allele
|
||||
async with OnlineBIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
mlst_st_data = await dummy_profiler.fetch_mlst_st(dummy_allele_generator())
|
||||
assert mlst_st_data is not None
|
||||
assert isinstance(mlst_st_data, MLSTProfile)
|
||||
assert mlst_st_data.clonal_complex == "ST-2 complex"
|
||||
assert mlst_st_data.sequence_type == "1"
|
||||
|
||||
async def test_institutpasteur_profiling_non_exact_results_in_list_of_mlsts():
|
||||
dummy_alleles = [
|
||||
Allele("adk", "1", None),
|
||||
Allele("fumC", "2", None),
|
||||
Allele("glyA", "36", None),
|
||||
Allele("tyrB", "4", None),
|
||||
Allele("icd", "4", None),
|
||||
Allele("pepA", "1", None),
|
||||
Allele("pgm", "5", None),
|
||||
]
|
||||
async with OnlineBIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
mlst_profile = await dummy_profiler.fetch_mlst_st(dummy_alleles)
|
||||
assert mlst_profile.clonal_complex == "unknown"
|
||||
assert mlst_profile.sequence_type == "unknown"
|
||||
|
||||
|
||||
async def test_institutpasteur_sequence_profiling_is_correct():
|
||||
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
||||
async with OnlineBIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
profile = await dummy_profiler.profile_string(sequence)
|
||||
assert profile is not None
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == "ST-2 complex"
|
||||
assert profile.sequence_type == "1"
|
||||
|
||||
|
||||
async def test_pubmlst_profiling_results_in_exact_matches_when_exact():
|
||||
dummy_alleles = {
|
||||
Allele("adk", "1", None),
|
||||
Allele("atpG", "1", None),
|
||||
Allele("frdB", "1", None),
|
||||
Allele("fucK", "1", None),
|
||||
Allele("mdh", "1", None),
|
||||
Allele("pgi", "1", None),
|
||||
Allele("recA", "5", None),
|
||||
}
|
||||
sequence = str(SeqIO.read("tests/resources/FDAARGOS_1560.fasta", "fasta").seq)
|
||||
async with OnlineBIGSdbMLSTProfiler(database_api="https://rest.pubmlst.org/", database_name="pubmlst_hinfluenzae_seqdef", schema_id=1) as dummy_profiler:
|
||||
exact_matches = dummy_profiler.fetch_mlst_allele_variants(sequence_strings=sequence)
|
||||
async for exact_match in exact_matches:
|
||||
assert isinstance(exact_match, Allele)
|
||||
dummy_alleles.remove(exact_match)
|
||||
|
||||
assert len(dummy_alleles) == 0
|
||||
|
||||
async def test_pubmlst_profiling_results_in_correct_st():
|
||||
async def generate_dummy_targets():
|
||||
dummy_alleles = [
|
||||
Allele("adk", "1", None),
|
||||
Allele("atpG", "1", None),
|
||||
Allele("frdB", "1", None),
|
||||
Allele("fucK", "1", None),
|
||||
Allele("mdh", "1", None),
|
||||
Allele("pgi", "1", None),
|
||||
Allele("recA", "5", None),
|
||||
]
|
||||
for dummy_allele in dummy_alleles:
|
||||
yield dummy_allele
|
||||
async with OnlineBIGSdbMLSTProfiler(database_api="https://rest.pubmlst.org/", database_name="pubmlst_hinfluenzae_seqdef", schema_id=1) as dummy_profiler:
|
||||
mlst_st_data = await dummy_profiler.fetch_mlst_st(generate_dummy_targets())
|
||||
assert mlst_st_data is not None
|
||||
assert isinstance(mlst_st_data, MLSTProfile)
|
||||
assert mlst_st_data.clonal_complex == "ST-3 complex"
|
||||
assert mlst_st_data.sequence_type == "3"
|
||||
|
||||
async def test_pubmlst_sequence_profiling_is_correct():
|
||||
sequence = str(SeqIO.read("tests/resources/FDAARGOS_1560.fasta", "fasta").seq)
|
||||
async with OnlineBIGSdbMLSTProfiler(database_api="https://rest.pubmlst.org/", database_name="pubmlst_hinfluenzae_seqdef", schema_id=1) as dummy_profiler:
|
||||
profile = await dummy_profiler.profile_string(sequence)
|
||||
assert profile is not None
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == "ST-3 complex"
|
||||
assert profile.sequence_type == "3"
|
||||
|
||||
async def test_bigsdb_index_all_databases_is_not_empty():
|
||||
async with BIGSdbIndex() as bigsdb_index:
|
||||
assert len(await bigsdb_index.get_known_seqdef_dbs()) > 0
|
||||
|
||||
async def test_bigsdb_index_references_pubmlst_correctly():
|
||||
async with BIGSdbIndex() as bigsdb_index:
|
||||
assert (await bigsdb_index.get_bigsdb_api_from_seqdefdb("pubmlst_hinfluenzae_seqdef")) == "https://rest.pubmlst.org"
|
||||
|
||||
async def test_bigsdb_index_references_institutpasteur_correctly():
|
||||
async with BIGSdbIndex() as bigsdb_index:
|
||||
assert (await bigsdb_index.get_bigsdb_api_from_seqdefdb("pubmlst_bordetella_seqdef")) == "https://bigsdb.pasteur.fr/api"
|
||||
|
||||
|
||||
async def test_bigsdb_index_instantiates_correct_profiler():
|
||||
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
||||
async with BIGSdbIndex() as bigsdb_index:
|
||||
async with await bigsdb_index.build_profiler_from_seqdefdb("pubmlst_bordetella_seqdef", 3) as profiler:
|
||||
profile = await profiler.profile_string(sequence)
|
||||
assert profile.clonal_complex == "ST-2 complex"
|
||||
assert profile.sequence_type == "1"
|
||||
|
||||
async def test_bigsdb_profile_multiple_strings_same_string_twice():
|
||||
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
||||
dummy_sequences = [NamedString("seq1", sequence), NamedString("seq2", sequence)]
|
||||
async def generate_async_iterable_sequences():
|
||||
for dummy_sequence in dummy_sequences:
|
||||
yield [dummy_sequence]
|
||||
async with OnlineBIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
async for named_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable_sequences()):
|
||||
name, profile = named_profile.name, named_profile.mlst_profile
|
||||
assert profile is not None
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == "ST-2 complex"
|
||||
assert profile.sequence_type == "1"
|
||||
|
||||
async def test_bigsdb_profile_multiple_strings_exactmatch_fail_second_no_stop():
|
||||
valid_seq = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
||||
dummy_sequences = [NamedString("seq1", valid_seq), NamedString("should_fail", gene_scrambler(valid_seq, 0.3)), NamedString("seq3", valid_seq)]
|
||||
async def generate_async_iterable_sequences():
|
||||
for dummy_sequence in dummy_sequences:
|
||||
yield [dummy_sequence]
|
||||
async with OnlineBIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
async for name_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable_sequences(), True):
|
||||
name, profile = name_profile.name, name_profile.mlst_profile
|
||||
|
||||
if name == "should_fail":
|
||||
assert profile is None
|
||||
else:
|
||||
assert profile is not None
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == "ST-2 complex"
|
||||
assert profile.sequence_type == "1"
|
||||
|
||||
async def test_bigsdb_profile_multiple_strings_nonexact_second_no_stop():
|
||||
valid_seq = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
||||
dummy_sequences = [NamedString("seq1", valid_seq), NamedString("should_fail", gene_scrambler(valid_seq, 0.3)), NamedString("seq3", valid_seq)]
|
||||
async def generate_async_iterable_sequences():
|
||||
for dummy_sequence in dummy_sequences:
|
||||
yield [dummy_sequence]
|
||||
async with OnlineBIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
async for named_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable_sequences(), False):
|
||||
name, profile = named_profile.name, named_profile.mlst_profile
|
||||
if name == "should_fail":
|
||||
assert profile is not None
|
||||
assert profile.clonal_complex == "unknown"
|
||||
assert profile.sequence_type == "unknown"
|
||||
assert len(profile.alleles) > 0
|
||||
else:
|
||||
assert profile is not None
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == "ST-2 complex"
|
||||
assert profile.sequence_type == "1"
|
||||
|
||||
async def test_bigsdb_profile_multiple_strings_fail_second_stop():
|
||||
valid_seq = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
||||
invalid_seq = str(SeqIO.read("tests/resources/FDAARGOS_1560.fasta", "fasta").seq)
|
||||
dummy_sequences = [NamedString("seq1", valid_seq), NamedString("should_fail", invalid_seq), NamedString("seq3", valid_seq)]
|
||||
async def generate_async_iterable_sequences():
|
||||
for dummy_sequence in dummy_sequences:
|
||||
yield [dummy_sequence]
|
||||
async with OnlineBIGSdbMLSTProfiler(database_api="https://bigsdb.pasteur.fr/api", database_name="pubmlst_bordetella_seqdef", schema_id=3) as dummy_profiler:
|
||||
with pytest.raises(NoBIGSdbMatchesException):
|
||||
async for named_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable_sequences(), stop_on_fail=True):
|
||||
name, profile = named_profile.name, named_profile.mlst_profile
|
||||
if name == "should_fail":
|
||||
pytest.fail("Exception should have been thrown, no exception was thrown.")
|
||||
else:
|
||||
assert profile is not None
|
||||
assert isinstance(profile, MLSTProfile)
|
||||
assert profile.clonal_complex == "ST-2 complex"
|
||||
assert profile.sequence_type == "1"
|
||||
|
||||
async def test_bigsdb_index_get_schemas_for_bordetella():
|
||||
async with BIGSdbIndex() as index:
|
||||
schemas = await index.get_schemas_for_seqdefdb(seqdef_db_name="pubmlst_bordetella_seqdef")
|
||||
assert len(schemas.keys()) > 0
|
||||
assert "MLST" in schemas
|
||||
assert isinstance(schemas["MLST"], int)
|
||||
|
||||
async def test_bigsdb_index_get_databases_has_only_seqdef():
|
||||
async with BIGSdbIndex() as index:
|
||||
databases = await index.get_known_seqdef_dbs()
|
||||
assert len(databases.keys()) > 0
|
||||
for database_name in databases.keys():
|
||||
assert database_name.endswith("seqdef")
|
||||
assert databases["pubmlst_bordetella_seqdef"] == "https://bigsdb.pasteur.fr/api"
|
@@ -1,4 +1,4 @@
|
||||
from autobigs.engine.data.local.fasta import read_fasta
|
||||
from autobigs.engine.reading import read_fasta
|
||||
|
||||
|
||||
async def test_fasta_reader_not_none():
|
0
tests/autobigs/engine/test_writing.py
Normal file
0
tests/autobigs/engine/test_writing.py
Normal file
27246
tests/resources/fdaargos_1560_hinfluenza_features.fasta
Normal file
27246
tests/resources/fdaargos_1560_hinfluenza_features.fasta
Normal file
File diff suppressed because it is too large
Load Diff
11
tests/resources/tohama_I_bpertussis_adk.fasta
Normal file
11
tests/resources/tohama_I_bpertussis_adk.fasta
Normal file
@@ -0,0 +1,11 @@
|
||||
>lcl|BX640419.1_cds_CAE43044.1_2724 [gene=adK] [locus_tag=BP2769] [db_xref=GOA:P0DKX8,InterPro:IPR000850,InterPro:IPR006259,InterPro:IPR007862,InterPro:IPR027417] [protein=adenylate kinase] [protein_id=CAE43044.1] [location=164032..164688] [gbkey=CDS]
|
||||
ATGCGTCTCATTCTGCTCGGACCGCCCGGAGCCGGCAAAGGCACCCAAGCCGCCTTTCTCACCCAACACT
|
||||
ACGGCATCCCGCAGATATCCACCGGTGACATGCTGCGCGCCGCCGTCAAGGCCGGCACGCCGCTGGGCCT
|
||||
GGAAGCCAAGAAGGTCATGGACGCGGGCGGCCTGGTCTCGGACGACCTGATCATCGGCCTGGTGCGCGAT
|
||||
CGCCTGACCCAGCCCGATTGCGCCAACGGCTACCTGTTCGACGGTTTCCCGCGCACCATCCCGCAGGCCG
|
||||
ACGCGCTCAAGAGCGCCGGCATCGCGCTGGATTACGTGGTCGAGATCGAAGTGCCGGAAAGCGACATCAT
|
||||
CGAACGCATGAGCGAACGCCGCGTGCACCCGGCCAGCGGCCGCAGCTACCACGTACGCTTCAATCCGCCC
|
||||
AAGGCCGAAGGCGTGGACGACGTCACGGGCGAACCGCTGGTGCAGCGCGACGACGACCGCGAGGAAACCG
|
||||
TGCGCCATCGTCTCAACGTCTACCAGAACCAGACCCGCCCGCTGGTCGACTACTACTCGTCCTGGGCCCA
|
||||
GTCCGATGCCGCCGCGGCGCCCAAGTACCGCAAGATCTCCGGCGTCGGCTCGGTCGACGAAATCAAGAGC
|
||||
CGCCTGTCGCAGGCTCTGCAGAGCTAA
|
Reference in New Issue
Block a user