Completed NCBI lookup, FASTA reading, ABIF reading, and Institut Pasteur querying

This commit is contained in:
2025-01-03 16:41:51 +00:00
parent b834aa93b0
commit 362e0867e5
22 changed files with 339 additions and 56 deletions

View File

@@ -0,0 +1,8 @@
import os
from mlstmyfasta.engine.local.abif import load_sanger_sequence
async def test_load_sanger_sequence_has_data():
assert os.path.exists("tests/resources/1I1_F_P1815443_047.ab1")
result_data = await load_sanger_sequence("tests/resources/1I1_F_P1815443_047.ab1")
assert result_data is not None

View File

@@ -0,0 +1,7 @@
from mlstmyfasta.engine.local.fasta import read_fasta
async def test_fasta_reader_not_none():
named_strings = read_fasta("tests/resources/tohama_I_bpertussis.fasta")
async for named_string in named_strings:
assert named_string.name == "BX470248.1"

View File

@@ -0,0 +1,16 @@
from Bio import SeqIO
from mlstmyfasta.engine.data.MLST import Allele
from mlstmyfasta.engine.remote.databases.institutpasteur.profiling import InstitutPasteurProfiler
async def test_profiling_results_in_exact_matches_when_exact():
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
async with InstitutPasteurProfiler(database_name="pubmlst_bordetella_seqdef") as dummy_profiler:
exact_matches = dummy_profiler.fetch_mlst_profile(sequence_string=sequence)
targets_left = {"adk", "fumC", "glyA", "tyrB", "icd", "pepA", "pgm"}
async for exact_match in exact_matches:
assert isinstance(exact_match, Allele)
assert exact_match.allele_variant == '1' # All of Tohama I has allele id I
targets_left.remove(exact_match.allele_loci)
assert len(targets_left) == 0

View File

@@ -0,0 +1,5 @@
from mlstmyfasta.engine.remote.databases.ncbi.genbank import fetch_ncbi_genbank
async def test_fetch_ncbi_genbank_with_id_works():
assert len((await fetch_ncbi_genbank("CP011448.1")).sequence) > 0

View File

@@ -1,14 +1,11 @@
from mlstmyfasta.engine.annotations import annotate_from_genbank, fetch_ncbi_genbank
from mlstmyfasta.engine.annotate import annotate_from_genbank, fetch_ncbi_genbank
from Bio import SeqIO
from mlstmyfasta.engine.data.genomics import AnnotatedString
async def test_fetch_ncbi_genbank_with_id_works():
assert len((await fetch_ncbi_genbank("CP011448.1")).sequence) > 0
async def test_annotate_from_genbank_for_adk_annotation():
sequence = SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq
annotated_sequence = await annotate_from_genbank("CP011448.1", "bpertussis_tohamaI", str(sequence), max_annotation_length=750, gene_targets=set(["adk"]))
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
annotated_sequence = await annotate_from_genbank("CP011448.1", "bpertussis_tohamaI", sequence, max_annotation_length=750, gene_targets=set(["adk"]))
assert isinstance(annotated_sequence, AnnotatedString)
assert len(annotated_sequence.annotations) >= 1
assert annotated_sequence.annotations[0].type == "gene"

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.