Many changes, see details:
Some checks failed
ydeng/bmlsa/pipeline/head There was a failure building this commit

Now allow for changing whether alignment is local or global and various scoring parameters

Refactored directory structure

Removed redundant aligned dict pattern for simple iterable

Added unit tests
This commit is contained in:
2023-04-28 10:49:07 -05:00
parent d42ed83b22
commit ae3732eba2
15 changed files with 781 additions and 125 deletions

View File

@@ -0,0 +1,51 @@
import pytest
from Bio import SeqIO
from bmlsa.aligner import align_many_to_one_ssw
from bmlsa.cli import DEFAULT_ALIGNMENT_PARAMETERS
from bmlsa.datatypes import AlignedSequence
from collections.abc import Iterable
@pytest.fixture
def reference_sequence():
return str(
list(SeqIO.parse("tests/resources/NC_045512_coding.fasta", "fasta"))[0].seq
)
@pytest.fixture
def queries():
return [
AlignedSequence(
"ORF10",
"ATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAAT"
"GAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAG",
start=29558,
end=29674,
)
]
def test_align_many_to_one_returns_data(reference_sequence, queries):
results = align_many_to_one_ssw(
reference_sequence, queries, **DEFAULT_ALIGNMENT_PARAMETERS["BLASTp"]
)
assert isinstance(results, Iterable)
def test_align_many_to_one_returns_correct_data_structure(reference_sequence, queries):
results = align_many_to_one_ssw(
reference_sequence, queries, **DEFAULT_ALIGNMENT_PARAMETERS["BLASTp"]
)
for original, aligned_seq in results:
assert isinstance(original, AlignedSequence)
assert isinstance(aligned_seq, AlignedSequence)
def test_align_many_to_one_returns_correct_data(reference_sequence, queries):
results = align_many_to_one_ssw(
reference_sequence, queries, **DEFAULT_ALIGNMENT_PARAMETERS["BLASTp"]
)
for original, aligned_seq in results:
assert original.start == aligned_seq.start
assert original.end == aligned_seq.end

35
tests/bmlsa/test_io.py Normal file
View File

@@ -0,0 +1,35 @@
from csv import reader
from os import path
from bmlsa.datatypes import AlignedSequence
from bmlsa.io import read_annotations_from_csv, save_alignments_to_csv
from collections.abc import Iterable
def test_read_annotations_from_csv_has_data():
results = read_annotations_from_csv(
"tests/resources/SARS_CoV-2_genes.csv", "id", "sequence"
)
assert isinstance(results, Iterable)
def test_read_annotations_from_csv_data_valid():
results = read_annotations_from_csv(
"tests/resources/SARS_CoV-2_genes.csv", "id", "sequence"
)
for aligned_seq in results:
assert isinstance(aligned_seq.id, str)
assert isinstance(aligned_seq.sequence, str)
def test_save_alignments_to_csv_produces_correct_headers_in_csv(tmpdir):
output_path = path.join(tmpdir, "alignment_results.csv")
dummy_sequence = AlignedSequence("DUMMY", "ATACTGGAAAA", name="test_sequence")
alignments = [(dummy_sequence, dummy_sequence)]
save_alignments_to_csv(alignments, output_path)
with open(output_path, "r") as csv_fd:
results = list(reader(csv_fd))
vars_to_check = list(vars(dummy_sequence).keys())
for var_to_check in vars_to_check:
assert "original" + var_to_check in results[0]
assert "aligned" + var_to_check in results[0]