Added test to check CSV name is not repeated
All checks were successful
autoBIGS.engine/pipeline/head This commit looks good

This commit is contained in:
Harrison Deng 2025-02-26 07:10:44 +00:00
parent 2ea2f63f29
commit bbd9e67c8c
2 changed files with 26 additions and 3 deletions

View File

@ -7,7 +7,7 @@ import pytest
from autobigs.engine.analysis import bigsdb from autobigs.engine.analysis import bigsdb
from autobigs.engine.structures import mlst from autobigs.engine.structures import mlst
from autobigs.engine.structures.genomics import NamedString from autobigs.engine.structures.genomics import NamedString
from autobigs.engine.structures.mlst import Allele, MLSTProfile from autobigs.engine.structures.mlst import Allele, MLSTProfile, NamedMLSTProfile
from autobigs.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException from autobigs.engine.exceptions.database import NoBIGSdbExactMatchesException, NoBIGSdbMatchesException
from autobigs.engine.analysis.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler, RemoteBIGSdbMLSTProfiler from autobigs.engine.analysis.bigsdb import BIGSdbIndex, BIGSdbMLSTProfiler, RemoteBIGSdbMLSTProfiler
@ -131,11 +131,21 @@ class TestBIGSdbMLSTProfiler:
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as dummy_profiler: async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as dummy_profiler:
async for named_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable(dummy_sequences)): async for named_profile in dummy_profiler.profile_multiple_strings(generate_async_iterable(dummy_sequences)):
name, profile = named_profile.name, named_profile.mlst_profile name, profile = named_profile.name, named_profile.mlst_profile
assert profile is not None
assert isinstance(profile, MLSTProfile) assert isinstance(profile, MLSTProfile)
assert profile.clonal_complex == expected_profile.clonal_complex assert profile.clonal_complex == expected_profile.clonal_complex
assert profile.sequence_type == expected_profile.sequence_type assert profile.sequence_type == expected_profile.sequence_type
async def test_bigsdb_profile_named_string_no_repeat_name(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
sequence = get_first_sequence_from_fasta(seq_path)
async with bigsdb.get_BIGSdb_MLST_profiler(local_db, database_api, database_name, scheme_id) as dummy_profiler:
named_profile = await dummy_profiler.profile_string([NamedString("BX470248.1", sequence)])
assert isinstance(named_profile, NamedMLSTProfile)
name, profile = named_profile.name, named_profile.mlst_profile
assert isinstance(profile, MLSTProfile)
assert profile.clonal_complex == expected_profile.clonal_complex
assert profile.sequence_type == expected_profile.sequence_type
assert name == "BX470248.1"
async def test_bigsdb_profile_multiple_strings_exactmatch_fail_second_no_stop(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile): async def test_bigsdb_profile_multiple_strings_exactmatch_fail_second_no_stop(self, local_db, database_api, database_name, scheme_id, seq_path: str, feature_seqs_path: str, expected_profile: MLSTProfile, bad_profile: MLSTProfile):
valid_seq = get_first_sequence_from_fasta(seq_path) valid_seq = get_first_sequence_from_fasta(seq_path)
dummy_sequences = [[NamedString("seq1", valid_seq)], [NamedString("should_fail", gene_scrambler(valid_seq, 0.3))], [NamedString("seq3", valid_seq)]] dummy_sequences = [[NamedString("seq1", valid_seq)], [NamedString("should_fail", gene_scrambler(valid_seq, 0.3))], [NamedString("seq3", valid_seq)]]

View File

@ -31,9 +31,21 @@ async def test_column_order_is_same_as_expected_file(dummy_alphabet_mlst_profile
with open(output_path) as csv_handle: with open(output_path) as csv_handle:
csv_reader = reader(csv_handle) csv_reader = reader(csv_handle)
lines = list(csv_reader) lines = list(csv_reader)
target_columns = lines[4:] target_columns = lines[0][3:]
assert target_columns == sorted(target_columns) assert target_columns == sorted(target_columns)
async def test_csv_writing_sample_name_not_repeated_when_single_sequence(dummy_alphabet_mlst_profile):
dummy_profiles = [dummy_alphabet_mlst_profile]
with tempfile.TemporaryDirectory() as temp_dir:
output_path = path.join(temp_dir, "out.csv")
await write_mlst_profiles_as_csv(iterable_to_asynciterable(dummy_profiles), output_path)
with open(output_path) as csv_handle:
csv_reader = reader(csv_handle)
lines = list(csv_reader)
sample_name = lines[1][0]
assert sample_name == "name"
async def test_alleles_to_text_map_mapping_is_correct(dummy_alphabet_mlst_profile: NamedMLSTProfile): async def test_alleles_to_text_map_mapping_is_correct(dummy_alphabet_mlst_profile: NamedMLSTProfile):
mapping = alleles_to_text_map(dummy_alphabet_mlst_profile.mlst_profile.alleles) # type: ignore mapping = alleles_to_text_map(dummy_alphabet_mlst_profile.mlst_profile.alleles) # type: ignore
expected_mapping = { expected_mapping = {
@ -45,3 +57,4 @@ async def test_alleles_to_text_map_mapping_is_correct(dummy_alphabet_mlst_profil
for allele_name, allele_ids in mapping.items(): for allele_name, allele_ids in mapping.items():
assert allele_name in expected_mapping assert allele_name in expected_mapping
assert allele_ids == expected_mapping[allele_name] assert allele_ids == expected_mapping[allele_name]