From d42ed83b22f5da0433277992c1d0a3fe778b3a52 Mon Sep 17 00:00:00 2001 From: Harrison Date: Fri, 28 Apr 2023 08:38:55 -0500 Subject: [PATCH 01/24] Added 'features' section to README.md --- README.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f49be76..f4a0e4c 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,13 @@ # BMLSA -A Basic multi local sequence alignment tool using the Biopython implementation of the Smith-Waterman alignment algorithm \ No newline at end of file +A Basic multi local sequence alignment tool using the Biopython implementation of the Smith-Waterman alignment algorithm. + +## Features + + - Aligns a CSV of short sequences locally to a given reference sequence + - Outputs in a CSV with old and new positions + - Selectable parameters for alignment + - Compatible with broad-range of input CSV formats + - Automatically align with all sequences in reference FASTA + - All produced output is human readable! + From ae3732eba2b04f825769c36fb6226140d5686ca8 Mon Sep 17 00:00:00 2001 From: Harrison Date: Fri, 28 Apr 2023 10:49:07 -0500 Subject: [PATCH 02/24] Many changes, see details: Now allow for changing whether alignment is local or global and various scoring parameters Refactored directory structure Removed redundant aligned dict pattern for simple iterable Added unit tests --- .vscode/settings.json | 10 +- Jenkinsfile | 10 +- bmlsa/cli.py | 94 ------ environment.yml | 5 +- setup.cfg | 11 +- {bmlsa => src/bmlsa}/__init__.py | 0 {bmlsa => src/bmlsa}/aligner.py | 38 ++- src/bmlsa/cli.py | 191 +++++++++++ {bmlsa => src/bmlsa}/datatypes.py | 0 {bmlsa => src/bmlsa}/exceptions.py | 0 bmlsa/persistence.py => src/bmlsa/io.py | 23 +- tests/bmlsa/test_aligner.py | 51 +++ tests/bmlsa/test_io.py | 35 ++ tests/resources/NC_045512_coding.fasta | 430 ++++++++++++++++++++++++ tests/resources/SARS_CoV-2_genes.csv | 8 + 15 files changed, 781 insertions(+), 125 deletions(-) delete mode 100644 bmlsa/cli.py rename {bmlsa => src/bmlsa}/__init__.py (100%) rename {bmlsa => src/bmlsa}/aligner.py (51%) create mode 100644 src/bmlsa/cli.py rename {bmlsa => src/bmlsa}/datatypes.py (100%) rename {bmlsa => src/bmlsa}/exceptions.py (100%) rename bmlsa/persistence.py => src/bmlsa/io.py (86%) create mode 100644 tests/bmlsa/test_aligner.py create mode 100644 tests/bmlsa/test_io.py create mode 100644 tests/resources/NC_045512_coding.fasta create mode 100644 tests/resources/SARS_CoV-2_genes.csv diff --git a/.vscode/settings.json b/.vscode/settings.json index de288e1..5be7edb 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,11 @@ { - "python.formatting.provider": "black" + "python.formatting.provider": "black", + "cSpell.words": [ + "Biopython", + "BLOSUM", + "bmlsa", + "FASTA" + ], + "python.testing.pytestEnabled": true, + "python.analysis.inlayHints.pytestParameters": true } \ No newline at end of file diff --git a/Jenkinsfile b/Jenkinsfile index a6244f5..3b9c413 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -12,14 +12,20 @@ pipeline { sh 'echo "mamba activate bmlsa" >> ~/.bashrc' } } + stage("unit tests") { + steps { + sh "python -m pytest --junitxml=test_results.xml" + xunit checksName: '', tools: [JUnit(excludesPattern: '', pattern: 'test_results.xml', stopProcessingIfError: true)] + } + } stage("build") { steps { sh "python -m build" } } - stage("test") { + stage("test installation") { steps { - sh "pip install dist/*.whl" + sh "pip install dist/*.whl --force-reinstall" sh "bmlsa -h" } } diff --git a/bmlsa/cli.py b/bmlsa/cli.py deleted file mode 100644 index a082cae..0000000 --- a/bmlsa/cli.py +++ /dev/null @@ -1,94 +0,0 @@ -import os -import argparse -from Bio import SeqIO -from bmlsa.aligner import protein_align_many_to_one_ssw - -from bmlsa.persistence import read_annotations_from_csv, save_alignments_to_csv - - -def main(): - argparser = argparse.ArgumentParser("blmsa") - argparser.add_argument( - "annotations", - type=str, - help=( - "Path to CSV containing the sequences to align as well as the " - "annotations for the respective sequences" - ), - metavar="a", - ) - argparser.add_argument( - "sequence", - type=str, - help=( - "Path to the sequence to annotate in FASTA format. " - "If multiple sequences are present, annotations will be run on each" - ), - metavar="s", - ) - argparser.add_argument( - "output", type=str, help="Path to output location", metavar="o" - ) - argparser.add_argument( - "-I", "--id-header", type=str, help="The header for the ID of the annotation" - ) - argparser.add_argument( - "-N", - "--name-header", - type=str, - help="The header for the name of the annotation", - required=False, - ) - argparser.add_argument( - "-D", - "--desc-header", - type=str, - help="The header for the description of the annotation", - required=False, - ) - argparser.add_argument( - "-T", - "--start-header", - type=str, - help="The header for the start of the annotation", - required=False, - ) - argparser.add_argument( - "-E", - "--end-header", - type=str, - help="The header for the end of the annotation", - required=False, - ) - argparser.add_argument( - "-S", - "--seq-header", - type=str, - help="The header for the sequence of the annotation", - ) - args = argparser.parse_args() - given_annotations = read_annotations_from_csv( - args.annotations, - args.id_header, - args.name_header, - args.desc_header, - args.start_header, - args.end_header, - args.seq_header, - ) - with open(args.sequence, "r") as sequence_fd: - for sequence in SeqIO.parse(sequence_fd, "fasta"): - aligned_annotations = protein_align_many_to_one_ssw( - str(sequence.seq), given_annotations - ) - save_alignments_to_csv( - aligned_annotations, - os.path.join( - args.output, - sequence.id.replace("|", "+").replace(".", "_") + ".csv", - ), - ) - - -if __name__ == "__main__": - main() diff --git a/environment.yml b/environment.yml index 9fcaff8..1186aac 100644 --- a/environment.yml +++ b/environment.yml @@ -4,5 +4,8 @@ channels: dependencies: - biopython=1.81 - build=0.7 - - pytest=7 + - pytest=7.3 + - hypothesis=6.56 - twine=4 + - python-build=0.10.0 + - setuptools \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index bdfa8b2..d09d243 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,12 +1,17 @@ [metadata] name = bmlsa -version = 0.0.3 +version = 0.0.4 [options] -packages = bmlsa +package_dir = + = src install_requires = biopython ==1.81 [options.entry_points] console_scripts = - bmlsa = bmlsa.cli:main \ No newline at end of file + bmlsa = bmlsa.cli:main + +[tool:pytest] +pythonpath = src +testpaths = tests \ No newline at end of file diff --git a/bmlsa/__init__.py b/src/bmlsa/__init__.py similarity index 100% rename from bmlsa/__init__.py rename to src/bmlsa/__init__.py diff --git a/bmlsa/aligner.py b/src/bmlsa/aligner.py similarity index 51% rename from bmlsa/aligner.py rename to src/bmlsa/aligner.py index eb9acb4..7b04cf7 100644 --- a/bmlsa/aligner.py +++ b/src/bmlsa/aligner.py @@ -1,20 +1,35 @@ +import logging +from typing import Iterable from Bio.Align import PairwiseAligner, substitution_matrices from bmlsa.datatypes import AlignedSequence from bmlsa.exceptions import UnexpectedAlignmentResult +logger = logging.getLogger(__name__) -def protein_align_many_to_one_ssw(sequence: str, queries: dict[str, AlignedSequence]): - annotation_pairs = {} + +def align_many_to_one_ssw( + reference_sequence: str, + queries: Iterable[AlignedSequence], + extend_gap_score: int, + open_gap_score: int, + alignment_mode: str, + substitution_matrix: str = "BLOSUM62", +): + # TODO Consider using the built in "scoring" parameter aligner = PairwiseAligner() - aligner.mode = "local" - aligner.substitution_matrix = substitution_matrices.load("BLOSUM62") - aligner.extend_gap_score = -1 - aligner.open_gap_score = -11 - for id, query in queries.items(): + aligner.substitution_matrix = substitution_matrices.load(substitution_matrix) + aligner.extend_gap_score = extend_gap_score + aligner.open_gap_score = open_gap_score + aligner.mode = alignment_mode + for query in queries: try: - alignments = aligner.align(sequence, query.sequence) + alignments = aligner.align(reference_sequence, query.sequence) except ValueError: + logger.warning( + 'Skipping sequence with id "%s" due to invalid characters', query.id + ) continue + # TODO Implement comparison with input positions to choose best if len(alignments) > 1: raise UnexpectedAlignmentResult( "More than one alignment resulted from a single query." @@ -22,8 +37,8 @@ def protein_align_many_to_one_ssw(sequence: str, queries: dict[str, AlignedSeque for alignment in alignments: score, query_aligned = (alignment.score, alignment.aligned[0][0]) aligned_start, aligned_end = query_aligned - annotation_pairs[id] = AlignedSequence( - id, + yield AlignedSequence( + query.id, query.sequence, query.name, query.description, @@ -31,7 +46,7 @@ def protein_align_many_to_one_ssw(sequence: str, queries: dict[str, AlignedSeque query.end, query.score, ), AlignedSequence( - id, + query.id, alignment.query, query.name, query.description, @@ -39,4 +54,3 @@ def protein_align_many_to_one_ssw(sequence: str, queries: dict[str, AlignedSeque aligned_end, score, ) - return annotation_pairs diff --git a/src/bmlsa/cli.py b/src/bmlsa/cli.py new file mode 100644 index 0000000..2a97c93 --- /dev/null +++ b/src/bmlsa/cli.py @@ -0,0 +1,191 @@ +import os +import argparse +from Bio import SeqIO +import logging +from bmlsa.aligner import align_many_to_one_ssw + +from bmlsa.io import read_annotations_from_csv, save_alignments_to_csv + +logger = logging.getLogger(__name__) + +DEFAULT_ALIGNMENT_PARAMETERS = { + "BLASTp": { + "extend_gap_score": -1, + "open_gap_score": -11, + "substitution_matrix": "BLOSUM62", + "alignment_mode": "local", + } +} + + +def run(args): + if ( + not (args.extend_gap_score and args.open_gap_score and args.alignment_mode) + and not args.behave_as + ): + logger.error( + 'Must either specify all of "--extend-gap-score", "--open-gap-score", ' + '"--alignment-mode", or, at least "--behave-as". See help (-h) for ' + "more information." + ) + exit(1) + queries = read_annotations_from_csv( + args.annotations, + args.id_header, + args.seq_header, + args.name_header, + args.desc_header, + args.start_header, + args.end_header, + ) + + scoring_parameter = ( + { + "extend_gap_score": args.extend_gap_score, + "open_gap_score": args.open_gap_score, + "alignment_mode": args.alignment_mode, + "substitution_matrix": args.substitution_matrix + if args.substitution_matrix is not None + else "BLOSUM62", + } + if args.behave_as not in DEFAULT_ALIGNMENT_PARAMETERS + else DEFAULT_ALIGNMENT_PARAMETERS[args.behave_as] + ) + + with open(args.sequence, "r") as sequence_fd: + for sequence in SeqIO.parse(sequence_fd, "fasta"): + aligned_annotations = align_many_to_one_ssw( + str(sequence.seq), queries, **scoring_parameter + ) + save_alignments_to_csv( + aligned_annotations, + os.path.join( + args.output, + sequence.id.replace("|", "+").replace(".", "_") + ".csv", + ), + ) + + +def main(): + arg_parser = argparse.ArgumentParser("bmlsa") + arg_parser.add_argument( + "annotations", + type=str, + help=( + "Path to CSV containing the sequences to align as well as the " + "annotations for the respective sequences." + ), + metavar="a", + ) + arg_parser.add_argument( + "sequence", + type=str, + help=( + "Path to the sequence to use as reference in FASTA format. " + "If multiple sequences are present in the same FASTA file, " + "each will be used as a separate reference sequence for separate " + "runs automatically." + ), + metavar="s", + ) + arg_parser.add_argument( + "output", type=str, help="Path to output location", metavar="o" + ) + arg_parser.add_argument( + "-I", + "--id-header", + type=str, + help="The header of the column for the ID of the sequence to align to " + "the reference sequence.", + required=True, + ) + arg_parser.add_argument( + "-N", + "--name-header", + type=str, + help="The header of the column for the name of the sequence to align to " + "the reference sequence.", + required=False, + ) + arg_parser.add_argument( + "-D", + "--desc-header", + type=str, + help="The header of the column for the description of the sequence to " + "align to the reference sequence.", + required=False, + ) + arg_parser.add_argument( + "-T", + "--start-header", + type=str, + help="The header of the column for the start position of the sequence to " + "align to the reference sequence.", + required=False, + ) + arg_parser.add_argument( + "-E", + "--end-header", + type=str, + help="The header of the column for end position of the sequence to " + "align to the reference sequence.", + required=False, + ) + arg_parser.add_argument( + "-S", + "--seq-header", + type=str, + help="The header of the column for the actual sequence to align to the " + "reference sequence.", + required=True, + ) + arg_parser.add_argument( + "-e", + "--extend-gap-score", + type=int, + help="The scoring for extending a gap.", + required=False, + default=None, + ) + arg_parser.add_argument( + "-o", + "--open-gap-score", + type=int, + help="The scoring for opening a gap.", + required=False, + default=None, + ) + arg_parser.add_argument( + "-M", + "--alignment-mode", + type=str, + help="The alignment mode.", + choices=["local", "global"], + required=False, + ) + arg_parser.add_argument( + "-m", + "--substitution-matrix", + type="str", + help="The name of the substitution matrix.", + required=False, + default=None, + ) + arg_parser.add_argument( + "-B", + "--behave-as", + type=str, + help="Use built-in parameters for alignment scoring. If this is specified " + "along either of the scoring arguments, the alignment parameter arguments " + '("--open-gap-score" and/or "--extend-gab-score" and "--alignment-mode") ' + "will override any defaults set by this argument.", + required=False, + default=None, + ) + arg_parser.add + args = arg_parser.parse_args() + run(args) + + +if __name__ == "__main__": + main() diff --git a/bmlsa/datatypes.py b/src/bmlsa/datatypes.py similarity index 100% rename from bmlsa/datatypes.py rename to src/bmlsa/datatypes.py diff --git a/bmlsa/exceptions.py b/src/bmlsa/exceptions.py similarity index 100% rename from bmlsa/exceptions.py rename to src/bmlsa/exceptions.py diff --git a/bmlsa/persistence.py b/src/bmlsa/io.py similarity index 86% rename from bmlsa/persistence.py rename to src/bmlsa/io.py index a63a070..7f2ba47 100644 --- a/bmlsa/persistence.py +++ b/src/bmlsa/io.py @@ -1,4 +1,5 @@ import csv +from typing import Iterable from bmlsa.datatypes import AlignedSequence @@ -6,39 +7,39 @@ from bmlsa.datatypes import AlignedSequence def read_annotations_from_csv( csv_path: str, id_header: str, - name_header: str, - desc_header: str, - start_header: str, - end_header: str, sequence_header: str, + name_header: str = None, + desc_header: str = None, + start_header: str = None, + end_header: str = None, ): annotations = {} with open(csv_path, "r") as csv_fd: reader = csv.reader(csv_fd) id_ind = None + sequence_ind = None name_ind = None desc_ind = None start_ind = None end_ind = None - sequence_ind = None headers_parsed = False for row in reader: if not headers_parsed: id_ind = row.index(id_header) + sequence_ind = row.index(sequence_header) name_ind = row.index(name_header) if name_header else None desc_ind = row.index(desc_header) if desc_header else None start_ind = row.index(start_header) if start_header else None end_ind = row.index(end_header) if end_header else None - sequence_ind = row.index(sequence_header) headers_parsed = True continue id = row[id_ind] + sequence = row[sequence_ind] name = row[name_ind] if name_header else None desc = row[desc_ind] if desc_header else None start = row[start_ind] if start_header else None end = row[end_ind] if end_header else None - sequence = row[sequence_ind] - annotations[id] = AlignedSequence( + yield AlignedSequence( id, sequence, name, @@ -46,18 +47,16 @@ def read_annotations_from_csv( int(start) if start else None, int(end) if end else None, ) - return annotations def save_alignments_to_csv( - aligned_pairs: dict[str, tuple[AlignedSequence, AlignedSequence]], output_path: str + aligned_pairs: Iterable[tuple[AlignedSequence, AlignedSequence]], output_path: str ): with open(output_path, "w") as output_fd: writer = csv.writer(output_fd) header_wrote = False header_order = None - for id, annotations in aligned_pairs.items(): - original, aligned = annotations + for original, aligned in aligned_pairs: original_vars = vars(original) aligned_vars = vars(aligned) if not header_wrote: diff --git a/tests/bmlsa/test_aligner.py b/tests/bmlsa/test_aligner.py new file mode 100644 index 0000000..4bc2544 --- /dev/null +++ b/tests/bmlsa/test_aligner.py @@ -0,0 +1,51 @@ +import pytest +from Bio import SeqIO +from bmlsa.aligner import align_many_to_one_ssw +from bmlsa.cli import DEFAULT_ALIGNMENT_PARAMETERS +from bmlsa.datatypes import AlignedSequence +from collections.abc import Iterable + + +@pytest.fixture +def reference_sequence(): + return str( + list(SeqIO.parse("tests/resources/NC_045512_coding.fasta", "fasta"))[0].seq + ) + + +@pytest.fixture +def queries(): + return [ + AlignedSequence( + "ORF10", + "ATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAAT" + "GAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAG", + start=29558, + end=29674, + ) + ] + + +def test_align_many_to_one_returns_data(reference_sequence, queries): + results = align_many_to_one_ssw( + reference_sequence, queries, **DEFAULT_ALIGNMENT_PARAMETERS["BLASTp"] + ) + assert isinstance(results, Iterable) + + +def test_align_many_to_one_returns_correct_data_structure(reference_sequence, queries): + results = align_many_to_one_ssw( + reference_sequence, queries, **DEFAULT_ALIGNMENT_PARAMETERS["BLASTp"] + ) + for original, aligned_seq in results: + assert isinstance(original, AlignedSequence) + assert isinstance(aligned_seq, AlignedSequence) + + +def test_align_many_to_one_returns_correct_data(reference_sequence, queries): + results = align_many_to_one_ssw( + reference_sequence, queries, **DEFAULT_ALIGNMENT_PARAMETERS["BLASTp"] + ) + for original, aligned_seq in results: + assert original.start == aligned_seq.start + assert original.end == aligned_seq.end diff --git a/tests/bmlsa/test_io.py b/tests/bmlsa/test_io.py new file mode 100644 index 0000000..ac60ae8 --- /dev/null +++ b/tests/bmlsa/test_io.py @@ -0,0 +1,35 @@ +from csv import reader +from os import path +from bmlsa.datatypes import AlignedSequence +from bmlsa.io import read_annotations_from_csv, save_alignments_to_csv +from collections.abc import Iterable + + +def test_read_annotations_from_csv_has_data(): + results = read_annotations_from_csv( + "tests/resources/SARS_CoV-2_genes.csv", "id", "sequence" + ) + assert isinstance(results, Iterable) + + +def test_read_annotations_from_csv_data_valid(): + results = read_annotations_from_csv( + "tests/resources/SARS_CoV-2_genes.csv", "id", "sequence" + ) + for aligned_seq in results: + assert isinstance(aligned_seq.id, str) + assert isinstance(aligned_seq.sequence, str) + + +def test_save_alignments_to_csv_produces_correct_headers_in_csv(tmpdir): + output_path = path.join(tmpdir, "alignment_results.csv") + dummy_sequence = AlignedSequence("DUMMY", "ATACTGGAAAA", name="test_sequence") + alignments = [(dummy_sequence, dummy_sequence)] + save_alignments_to_csv(alignments, output_path) + + with open(output_path, "r") as csv_fd: + results = list(reader(csv_fd)) + vars_to_check = list(vars(dummy_sequence).keys()) + for var_to_check in vars_to_check: + assert "original" + var_to_check in results[0] + assert "aligned" + var_to_check in results[0] diff --git a/tests/resources/NC_045512_coding.fasta b/tests/resources/NC_045512_coding.fasta new file mode 100644 index 0000000..b364687 --- /dev/null +++ b/tests/resources/NC_045512_coding.fasta @@ -0,0 +1,430 @@ +>NC_045512.2 Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome +ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAA +CGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAAC +TAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTG +TTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTC +CCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTAC +GTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGG +CTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGAT +GCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAGTACGGTC +GTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGTGGCTTACCGCAAGGTTCT +TCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTA +GGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTG +TTACCCGTGAACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGG +CCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCATGCACTTTG +TCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCGTGAACATGAGCATGAAATTG +CTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAA +ATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAA +CCAAGGGTTGAAAAGAAAAAGCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCAC +CAAATGAATGCAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCA +GACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGAAGGTGCCACT +ACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAG +GACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCATTCTTCGTAAGGGTGGTCG +CACTATTGCCTTTGGAGGCTGTGTGTTCTCTTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCA +CGTGCTAGCGCTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACA +ACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGA +GATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGAT +TATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAAGGAAAAGCTAAAAAAG +GTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCG +TGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCC +GCTATAACAATACTAGATGGAATTTCACAGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTG +ATTTGGCTACTAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTG +GCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGAAGAGAAGTTT +AAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAA +TTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCTTTAAGCTTGT +AAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATTTA +GGTGAAACATTTGTCACGCACTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCC +TACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTT +AACAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAA +GCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACACAGAAAAGT +ACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTCACACTCAAAGGCGGTGCACCAACAAA +GGTTACTTTTGGTGATGACACTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTT +GATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAA +ATGAGTTCGCCTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACC +ACTGGGCATTGATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGGTGAGTTTAAA +TTGGCTTCACATATGTATTGTTCTTTCTACCCTCCAGATGAGGATGAAGAAGAAGGTGATTGTGAAGAAG +AAGAGTTTGAGCCATCAACTCAATATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATT +TGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATTGGTTAGATGATGATAGTCAA +CAAACTGTTGGTCAACAAGACGGCAGTGAGGACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTC +AACCTCAATTAGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTGGTTATTT +AAAACTTACTGACAATGTATACATTAAAAATGCAGACATTGTGGAAGAAGCTAAAAAGGTAAAACCAACA +GTGGTTGTTAATGCAGCCAATGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTA +CTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGACCACTTAAAGTGGGTGGTAG +TTGTGTTTTAAGCGGACACAATCTTGCTAAACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGT +GAAGACATTCAACTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTTGCACCATTAT +TATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTTAAGAGTTTGTGTAGATACTGTTCGCACAAA +TGTCTACTTAGCTGTCTTTGATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGT +GAAAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAAGCCATTTATAACTGAAAGTA +AACCTTCAGTTGAACAGAGAAAACAAGATGATAAGAAAATCAAAGCTTGTGTTGAAGAAGTTACAACAAC +TCTGGAAGAAACTAAGTTCCTCACAGAAAACTTGTTACTTTATATTGACATTAATGGCAATCTTCATCCA +GATTCTGCCACTCTTGTTAGTGACATTGACATCACTTTCTTAAAGAAAGATGCTCCATATATAGTGGGTG +ATGTTGTTCAAGAGGGTGTTTTAACTGCTGTGGTTATACCTACTAAAAAGGCTGGTGGCACTACTGAAAT +GCTAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCACTTACCCGGGTCAGGGTTTAAAT +GGTTACACTGTAGAGGAGGCAAAGACAGTGCTTAAAAAGTGTAAAAGTGCCTTTTACATTCTACCATCTA +TTATCTCTAATGAGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAAATGCTTGCACATGC +AGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACTAAAGCCATAGTTTCAACTATACAGCGTAAA +TATAAGGGTATTAAAATACAAGAGGGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAA +CAACTGTAGCGTCACTTATCAACACACTTAACGATCTAAATGAAACTCTTGTTACAATGCCACTTGGCTA +TGTAACACATGGCTTAAATTTGGAAGAAGCTGCTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTT +TCTGTTTCTTCACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAAACACCTGAAG +AACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAAAGATTGGTCCTATTCTGGACAATCTACACA +ACTAGGTATAGAATTTCTTAAGAGAGGTGATAAAAGTGTATATTACACTAGTAATCCTACCACATTCCAC +CTAGATGGTGAAGTTATCACCTTTGACAATCTTAAGACACTTCTTTCTTTGAGAGAAGTGAGGACTATTA +AGGTGTTTACAACAGTAGACAACATTAACCTCCACACGCAAGTTGTGGACATGTCAATGACATATGGACA +ACAGTTTGGTCCAACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTCACATGAAGGT +AAAACATTTTATGTTTTACCTAATGATGACACTCTACGTGTTGAGGCTTTTGAGTACTACCACACAACTG +ATCCTAGTTTTCTGGGTAGGTACATGTCAGCATTAAATCACACTAAAAAGTGGAAATACCCACAAGTTAA +TGGTTTAACTTCTATTAAATGGGCAGATAACAACTGTTATCTTGCCACTGCATTGTTAACACTCCAACAA +ATAGAGTTGAAGTTTAATCCACCTGCTCTACAAGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTA +ACTTTTGTGCACTTATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTAGAGAAACAAT +GAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGCAAAAGAGTCTTGAACGTGGTGTGTAAAACTTGT +GGACAACAGCAGACAACCCTTAAGGGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTATGAACAAT +TTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAAAATATCTAGTACAACAGGAGTC +ACCTTTTGTTATGATGTCAGCACCACCTGCTCAGTATGAACTTAAGCATGGTACATTTACTTGTGCTAGT +GAGTACACTGGTAATTACCAGTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTGTATTGCATAG +ACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGGTCCTATTACGGATGTTTTCTACAAAGAAAACAG +TTACACAACAACCATAAAACCAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTAAG +TTGGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAATTGATCTTGTACCAAACCAAC +CATATCCAAACGCAAGCTTCGATAATTTTAAGTTTGTATGTGATAATATCAAATTTGCTGATGATTTAAA +CCAGTTAACTGGTTATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTACATTTTTCCCTGACTTAAATGGT +GATGTGGTGGCTATTGATTATAAACACTACACACCCTCTTTTAAGAAAGGAGCTAAATTGTTACATAAAC +CTATTGTTTGGCATGTTAACAATGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTG +TCTTTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGAAGTCAGAGGACGCGCAGGGA +ATGGATAATCTTGCCTGCGAAGATCTAAAACCAGTCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGA +AAGACGTTCTTGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAACCAGCAAATAA +TAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTAATGGCTGCTTATGTAGACAATTCTAGTCTT +ACTATTAAGAAACCTAATGAATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTG +CTGTTAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTTCTTAACAAAGTTGTTAGTAC +AACTACTAACATAGTTACACGGTGTTTAAACCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTA +TTGCTACAATTGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCGACTACTATAG +CAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGAGGCTTCATTTAATTATTTGAAGTCACCTAA +TTTTTCTAAACTGATAAATATTATAATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTAC +TCAACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTACTGTACTGGTTACAGAGAAG +GCTATTTGAACTCTACTAATGTCACTATTGCAACCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCT +TAGTGGTTTAGATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTCATCTTTTAAA +TGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTGGCATATATTCTTTTCACTAGGTTTTTCT +ATGTACTTGGATTGGCTGCAATCATGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTC +TTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAGCTATGGTTAGAATGTACATC +TTCTTTGCATCATTTTATTATGTATGGAAAAGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTT +GTATGATGTGTTACAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATGGTGTTAGAAG +GTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGCAAACTACACAATTGGAATTGTGTTAATTGT +GATACATTCTGTGCTGGTAGTACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAA +GACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACAGTGAAGAATGGTTCCATCCA +TCTTTACTTTGATAAAGCTGGTCAAAAGACTTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGAC +AACCTGAGAGCTAATAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGTAAATCAAAAT +GTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAGTCAGCTTATGTGTCAACCTATACTGTTACT +AGATCAGGCATTAGTGTCTGATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTT +AATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACTAGTTGCAACTGCAGAAGCTG +AACTTGCAAAGAATGTGTCCTTAGACAATGTCTTATCTACTTTTATTTCAGCAGCTCGGCAAGGGTTTGT +TGATTCAGATGTAGAAACTAAAGATGTTGTTGAATGTCTTAAATTGTCACATCAATCTGACATAGAAGTT +ACTGGCGATAGTTGTAATAACTATATGCTCACCTATAACAAAGTTGAAAACATGACACCCCGTGACCTTG +GTGCTTGTATTGACTGTAGTGCGCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTTGAT +ATGGAACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAATACGTAGTGCTGCTAAAAAG +AATAACTTACCTTTTAAGTTGACATGTGCAACTACTAGACAAGTTGTTAATGTTGTAACAACAAAGATAG +CACTTAAGGGTGGTAAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACACTTGTGTTCCTTTT +TGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTCATGTCTAAACATACTGACTTTTCAAGTGAA +ATCATAGGATACAAGGCTATTGATGGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTA +ACAAACATGCTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTATACTAATGACAAAGCTTGCCC +ATTGATTGCTGCAGTCATAACAAGAGAAGTGGGTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGC +ACAACTAATGGTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATCTGTTACACAC +CATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGCTTGTGTTTTGGCTGCTGAATGTACAATTTT +TAAAGATGCTTCTGGTAAGCCAGTACCATATTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTAT +GAAAGTTTACGCCCTGACACACGTTATGTGCTCATGGATGGCTCTATTATTCAATTTCCTAACACCTACC +TTGAAGGTTCTGTTAGAGTGGTAACAACTTTTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATC +AGAAGCTGGTGTTTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAGATCTTTACCA +GGAGTTTTCTGTGGTGTAGATGCTGTAAATTTACTTACTAATATGTTTACACCACTAATTCAACCTATTG +GTGCTTTGGACATATCAGCATCTATAGTAGCTGGTGGTATTGTAGCTATCGTAGTAACATGCCTTGCCTA +CTATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCATGTAGTTGCCTTTAATACTTTACTATTC +CTTATGTCATTCACTGTACTCTGTTTAACACCAGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTT +ACTTGTACTTGACATTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGATGGTTATGTT +CACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATCATTTGTATTTCCACAAAGCATTTCTATTGG +TTCTTTAGTAATTACCTAAAGAGACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAAGCTG +CGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGTGATGTGCTATTACCTCTTAC +GCAATATAATAGATACTTAGCTCTTTATAATAAGTACAAGTATTTTAGTGGAGCAATGGATACAACTAGC +TACAGAGAAGCTGCTTGTTGTCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCAGGTTCTGATGTTC +TTTACCAACCACCACAAACCTCTATCACCTCAGCTGTTTTGCAGAGTGGTTTTAGAAAAATGGCATTCCC +ATCTGGTAAAGTTGAGGGTTGTATGGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGCTT +GATGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGACATGCTTAACCCTAATTATGAAG +ATTTACTCATTCGTAAGTCTAATCATAATTTCTTGGTACAGGCTGGTAATGTTCAACTCAGGGTTATTGG +ACATTCTATGCAAAATTGTGTACTTAAGCTTAAGGTTGATACAGCCAATCCTAAGACACCTAAGTATAAG +TTTGTTCGCATTCAACCAGGACAGACTTTTTCAGTGTTAGCTTGTTACAATGGTTCACCATCTGGTGTTT +ACCAATGTGCTATGAGGCCCAATTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGG +TTTTAACATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGGAATTACCAACTGGAGTTCAT +GCTGGCACAGACTTAGAAGGTAACTTTTATGGACCTTTTGTTGACAGGCAAACAGCACAAGCAGCTGGTA +CGGACACAACTATTACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTATAAATGGAGACAGGTGGTT +TCTCAATCGATTTACCACAACTCTTAATGACTTTAACCTTGTGGCTATGAAGTACAATTATGAACCTCTA +ACACAAGACCATGTTGACATACTAGGACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTG +CTTCATTAAAAGAATTACTGCAAAATGGTATGAATGGACGTACCATATTGGGTAGTGCTTTATTAGAAGA +TGAATTTACACCTTTTGATGTTGTTAGACAATGCTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACA +ATCAAGGGTACACACCACTGGTTGTTACTCACAATTTTGACTTCACTTTTAGTTTTAGTCCAGAGTACTC +AATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTTACCTTTTGCTATGGGTATTATTGCTATGTC +TGCTTTTGCAATGATGTTTGTCAAACATAAGCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCC +ACTGTAGCTTATTTTAATATGGTCTATATGCCTGCTAGTTGGGTGATGCGTATTATGACATGGTTGGATA +TGGTTGATACTAGTTTGTCTGGTTTTAAGCTAAAAGACTGTGTTATGTATGCATCAGCTGTAGTGTTACT +AATCCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTG +ACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCT +CTGTTACTTCTAACTACTCAGGTGTAGTTACAACTGTCATGTTTTTGGCCAGAGGTATTGTTTTTATGTG +TGTTGAGTATTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTATAATGCTAGTTTATTGTTTC +TTAGGCTATTTTTGTACTTGTTACTTTGGCCTCTTTTGTTTACTCAACCGCTACTTTAGACTGACTCTTG +GTGTTTATGATTACTTAGTTTCTACACAGGAGTTTAGATATATGAATTCACAGGGACTACTCCCACCCAA +GAATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTGGGTGTTGGTGGCAAACCTTGTATCAAAGTA +GCCACTGTACAGTCTAAAATGTCAGATGTAAAGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAAC +TCAGAGTAGAATCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACACAATGACATTCTCTTAGCTAA +AGATACTACTGAAGCCTTTGAAAAAATGGTTTCACTACTTTCTGTTTTGCTTTCCATGCAGGGTGCTGTA +GACATAAACAAGCTTTGTGAAGAAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCCTCAGAGTTTA +GTTCCCTTCCATCATATGCAGCTTTTGCTACTGCTCAAGAAGCTTATGAGCAGGCTGTTGCTAATGGTGA +TTCTGAAGTTGTTCTTAAAAAGTTGAAGAAGTCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCA +GCCATGCAACGTAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAATGTATAAACAGGCTAGATCTG +AGGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGACAATGCTTTTCACTATGCTTAGAAAGTTGGATAA +TGATGCACTCAACAACATTATCAACAATGCAAGAGATGGTTGTGTTCCCTTGAACATAATACCTCTTACA +ACAGCAGCCAAACTAATGGTTGTCATACCAGACTATAACACATATAAAAATACGTGTGATGGTACAACAT +TTACTTATGCATCAGCATTGTGGGAAATCCAACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAG +TGAAATTAGTATGGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAAGGGCCAATTCT +GCTGTCAAATTACAGAATAATGAGCTTAGTCCTGTTGCACTACGACAGATGTCTTGTGCTGCCGGTACTA +CACAAACTGCTTGCACTGATGACAATGCGTTAGCTTACTACAACACAACAAAGGGAGGTAGGTTTGTACT +TGCACTGTTATCCGATTTACAGGATTTGAAATGGGCTAGATTCCCTAAGAGTGATGGAACTGGTACTATC +TATACAGAACTGGAACCACCTTGTAGGTTTGTTACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTAT +ACTTTATTAAAGGATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGTTTAGCTGCCACAGTACGTCT +ACAAGCTGGTAATGCAACAGAAGTGCCTGCCAATTCAACTGTATTATCTTTCTGTGCTTTTGCTGTAGAT +GCTGCTAAAGCTTACAAAGATTATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTTAAGATGTTGT +GTACACACACTGGTACTGGTCAGGCAATAACAGTTACACCGGAAGCCAATATGGATCAAGAATCCTTTGG +TGGTGCATCGTGTTGTCTGTACTGCCGTTGCCACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTA +AAAGGTAAGTATGTACAAATACCTACAACTTGTGCTAATGACCCTGTGGGTTTTACACTTAAAAACACAG +TCTGTACCGTCTGCGGTATGTGGAAAGGTTATGGCTGTAGTTGTGATCAACTCCGCGAACCCATGCTTCA +GTCAGCTGATGCACAATCGTTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGTGCGGCA +CAGGCACTAGTACTGATGTCGTATACAGGGCTTTTGACATCTACAATGATAAAGTAGCTGGTTTTGCTAA +ATTCCTAAAAACTAATTGTTGTCGCTTCCAAGAAAAGGACGAAGATGACAATTTAATTGATTCTTACTTT +GTAGTTAAGAGACACACTTTCTCTAACTACCAACATGAAGAAACAATTTATAATTTACTTAAGGATTGTC +CAGCTGTTGCTAAACATGACTTCTTTAAGTTTAGAATAGACGGTGACATGGTACCACATATATCACGTCA +ACGTCTTACTAAATACACAATGGCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGGTAATTGTGAC +ACATTAAAAGAAATACTTGTCACATACAATTGTTGTGATGATGATTATTTCAATAAAAAGGACTGGTATG +ATTTTGTAGAAAACCCAGATATATTACGCGTATACGCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTT +AAAAACAGTACAATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTGTACTGACATTAGATAATCAA +GATCTCAATGGTAACTGGTATGATTTCGGTGATTTCATACAAACCACGCCAGGTAGTGGAGTTCCTGTTG +TAGATTCTTATTATTCATTGTTAATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAGAGTCACATGT +TGACACTGACTTAACAAAGCCTTACATTAAGTGGGATTTGTTAAAATATGACTTCACGGAAGAGAGGTTA +AAACTCTTTGACCGTTATTTTAAATATTGGGATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATG +ACAGATGCATTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTGTTCCCACCTACAAGTTTTGG +ACCACTAGTGAGAAAAATATTTGTTGATGGTGTTCCATTTGTAGTTTCAACTGGATACCACTTCAGAGAG +CTAGGTGTTGTACATAATCAGGATGTAAACTTACATAGCTCTAGACTTAGTTTTAAGGAATTACTTGTGT +ATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCTATTACTAGATAAACGCACTACGTGCTTTTC +AGTAGCTGCACTTACTAACAATGTTGCTTTTCAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTAT +GACTTTGCTGTGTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACACTTCTTCTTTGCTC +AGGATGGTAATGCTGCTATCAGCGATTATGACTACTATCGTTATAATCTACCAACAATGTGTGATATCAG +ACAACTACTATTTGTAGTTGAAGTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTGTATTAATGCT +AACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTTTTCCATTTAATAAATGGGGTAAGGCTAGAC +TTTATTATGATTCAATGAGTTATGAGGATCAAGATGCACTTTTCGCATATACAAAACGTAATGTCATCCC +TACTATAACTCAAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAGCTCGCACCGTAGCTGGTGTC +TCTATCTGTAGTACTATGACCAATAGACAGTTTCATCAAAAATTATTGAAATCAATAGCCGCCACTAGAG +GAGCTACTGTAGTAATTGGAACAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAACTGTTTATAG +TGATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCTAAATGTGATAGAGCCATGCCTAACATGCTT +AGAATTATGGCCTCACTTGTTCTTGCTCGCAAACATACAACGTGTTGTAGCTTGTCACACCGTTTCTATA +GATTAGCTAATGAGTGTGCTCAAGTATTGAGTGAAATGGTCATGTGTGGCGGTTCACTATATGTTAAACC +AGGTGGAACCTCATCAGGAGATGCCACAACTGCTTATGCTAATAGTGTTTTTAACATTTGTCAAGCTGTC +ACGGCCAATGTTAATGCACTTTTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTCCGCAATTTAC +AACACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGTTGACACAGACTTTGTGAATGAGTTTTACGC +ATATTTGCGTAAACATTTCTCAATGATGATACTCTCTGACGATGCTGTTGTGTGTTTCAATAGCACTTAT +GCATCTCAAGGTCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTATTATCAAAACAATGTTTTTA +TGTCTGAAGCAAAATGTTGGACTGAGACTGACCTTACTAAAGGACCTCATGAATTTTGCTCTCAACATAC +AATGCTAGTTAAACAGGGTGATGATTATGTGTACCTTCCTTACCCAGATCCATCAAGAATCCTAGGGGCC +GGCTGTTTTGTAGATGATATCGTAAAAACAGATGGTACACTTATGATTGAACGGTTCGTGTCTTTAGCTA +TAGATGCTTACCCACTTACTAAACATCCTAATCAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATA +CATAAGAAAGCTACATGATGAGTTAACAGGACACATGTTAGACATGTATTCTGTTATGCTTACTAATGAT +AACACTTCAAGGTATTGGGAACCTGAGTTTTATGAGGCTATGTACACACCGCATACAGTCTTACAGGCTG +TTGGGGCTTGTGTTCTTTGCAATTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTAGACCATTCTT +ATGTTGTAAATGCTGTTACGACCATGTCATATCAACATCACATAAATTAGTCTTGTCTGTTAATCCGTAT +GTTTGCAATGCTCCAGGTTGTGATGTCACAGATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATT +GTAAATCACATAAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAAGTTTTTGGTTTATATAAAAA +TACATGTGTTGGTAGCGATAATGTTACTGACTTTAATGCAATTGCAACATGTGACTGGACAAATGCTGGT +GATTACATTTTAGCTAACACCTGTACTGAAAGACTCAAGCTTTTTGCAGCAGAAACGCTCAAAGCTACTG +AGGAGACATTTAAACTGTCTTATGGTATTGCTACTGTACGTGAAGTGCTGTCTGACAGAGAATTACATCT +TTCATGGGAAGTTGGTAAACCTAGACCACCACTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACT +AAAAACAGTAAAGTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTATGGTGATGCTGTTGTTTACC +GAGGTACAACAACTTACAAATTAAATGTTGGTGATTATTTTGTGCTGACATCACATACAGTAATGCCATT +AAGTGCACCTACACTAGTGCCACAAGAGCACTATGTTAGAATTACTGGCTTATACCCAACACTCAATATC +TCAGATGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGGTTGGTATGCAAAAGTATTCTACACTCCAGG +GACCACCTGGTACTGGTAAGAGTCATTTTGCTATTGGCCTAGCTCTCTACTACCCTTCTGCTCGCATAGT +GTATACAGCTTGCTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCATTAAAATATTTGCCTATAGAT +AAATGTAGTAGAATTATACCTGCACGTGCTCGTGTAGAGTGTTTTGATAAATTCAAAGTGAATTCAACAT +TAGAACAGTATGTCTTTTGTACTGTAAATGCATTGCCTGAGACGACAGCAGATATAGTTGTCTTTGATGA +AATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAATGCCAGATTACGTGCTAAGCACTATGTGTAC +ATTGGCGACCCTGCTCAATTACCTGCACCACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATT +TCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCC +TGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGATAATAAGCTTAAAGCACATAAAGACAAATCA +GCTCAATGCTTTAAAATGTTTTATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATTAACAGGCCAC +AAATAGGCGTGGTAAGAGAATTCCTTACACGTAACCCTGCTTGGAGAAAAGCTGTCTTTATTTCACCTTA +TAATTCACAGAATGCTGTAGCCTCAAAGATTTTGGGACTACCAACTCAAACTGTTGATTCATCACAGGGC +TCAGAATATGACTATGTCATATTCACTCAAACCACTGAAACAGCTCACTCTTGTAATGTAAACAGATTTA +ATGTTGCTATTACCAGAGCAAAAGTAGGCATACTTTGCATAATGTCTGATAGAGACCTTTATGACAAGTT +GCAATTTACAAGTCTTGAAATTCCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGTAACAGGACTC +TTTAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTACACAGGCACCTACACACCTCAGTGTTGACA +CTAAATTCAAAACTGAAGGTTTATGTGTTGACATACCTGGCATACCTAAGGACATGACCTATAGAAGACT +CATCTCTATGATGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTAACATGTTTATCACCCGCGAA +GAAGCTATAAGACATGTACGTGCATGGATTGGCTTCGATGTCGAGGGGTGTCATGCTACTAGAGAAGCTG +TTGGTACCAATTTACCTTTACAGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTACCTACAGGTTA +TGTTGATACACCTAATAATACAGATTTTTCCAGAGTTAGTGCTAAACCACCGCCTGGAGATCAATTTAAA +CACCTCATACCACTTATGTACAAAGGACTTCCTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAA +GTGACACACTTAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCACATGGCTTTGAGTTGACATC +TATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACCTGTTGTCTATGTGATAGACGTGCCACATGCTTT +TCCACTGCTTCAGACACTTATGCCTGTTGGCATCATTCTATTGGATTTGATTACGTCTATAATCCGTTTA +TGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACAAAGCAACCATGATCTGTATTGTCAAGTCCA +TGGTAATGCACATGTAGCTAGTTGTGATGCAATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTT +AAGCGTGTTGACTGGACTATTGAATATCCTATAATTGGTGATGAACTGAAGATTAATGCGGCTTGTAGAA +AGGTTCAACACATGGTTGTTAAAGCTGCATTATTAGCAGACAAATTCCCAGTTCTTCACGACATTGGTAA +CCCTAAAGCTATTAAGTGTGTACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACAGCCTTGTAGT +GACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATGCCACACATTCTGACAAATTCACAGATGGTG +TATGCCTATTTTGGAATTGCAATGTCGATAGATATCCTGCTAATTCCATTGTTTGTAGATTTGACACTAG +AGTGCTATCTAACCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATGTAAATAAACATGCATTCCAC +ACACCAGCTTTTGATAAAAGTGCTTTTGTTAATTTAAAACAATTACCATTTTTCTATTACTCTGACAGTC +CATGTGAGTCTCATGGAAAACAAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTGCTACGTGTAT +AACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCATGCTAATGAGTACAGATTGTATCTCGATGCT +TATAACATGATGATCTCAGCTGGCTTTAGCTTGTGGGTTTACAAACAATTTGATACTTATAACCTCTGGA +ACACTTTTACAAGACTTCAGAGTTTAGAAAATGTGGCTTTTAATGTTGTAAATAAGGGACACTTTGATGG +ACAACAGGGTGAAGTACCAGTTTCTATCATTAATAACACTGTTTACACAAAAGTTGATGGTGTTGATGTA +GAATTGTTTGAAAATAAAACAACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAGCGCAACATTA +AACCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGTGGACATTGCTGCTAATACTGTGATCTGGGA +CTACAAAAGAGATGCTCCAGCACATATATCTACTATTGGTGTTTGTTCTATGACTGACATAGCCAAGAAA +CCAACTGAAACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGTTGATGGTCAAGTAGACTTAT +TTAGAAATGCCCGTAATGGTGTTCTTATTACAGAAGGTAGTGTTAAAGGTTTACAACCATCTGTAGGTCC +CAAACAAGCTAGTCTTAATGGAGTCACATTAATTGGAGAAGCCGTAAAAACACAGTTCAATTATTATAAG +AAAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACTTTACTCAGAGTAGAAATTTACAAGAATTTA +AACCCAGGAGTCAAATGGAAATTGATTTCTTAGAATTAGCTATGGATGAATTCATTGAACGGTATAAATT +AGAAGGCTATGCCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTCAGTTAGGTGGTTTACATCTA +CTGATTGGACTAGCTAAACGTTTTAAGGAATCACCTTTTGAATTAGAAGATTTTATTCCTATGGACAGTA +CAGTTAAAAACTATTTCATAACAGATGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTGTTATTGATTT +ATTACTTGATGATTTTGTTGAAATAATAAAATCCCAAGATTTATCTGTAGTTTCTAAGGTTGTCAAAGTG +ACTATTGACTATACAGAAATTTCATTTATGCTTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAA +AATTACAATCTAGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTTTACAAAATGCAAAGAATGCT +ATTAGAAAAGTGTGACCTTCAAAATTATGGTGATAGTGCAACATTACCTAAAGGCATAATGATGAATGTC +GCAAAATATACTCAACTGTGTCAATATTTAAACACATTAACATTAGCTGTACCCTATAATATGAGAGTTA +TACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGGTACAGCTGTTTTAAGACAGTGGTTGCCTAC +GGGTACGCTGCTTGTCGATTCAGATCTTAATGACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGAT +TGTGCAACTGTACATACAGCTAATAAATGGGATCTCATTATTAGTGATATGTACGACCCTAAGACTAAAA +ATGTTACAAAAGAAAATGACTCTAAAGAGGGTTTTTTCACTTACATTTGTGGGTTTATACAACAAAAGCT +AGCTCTTGGAGGTTCCGTGGCTATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTATAAGCTCATG +GGACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGAATGCGTCATCATCTGAAGCATTTTTAATTG +GATGTAATTATCTTGGCAAACCACGCGAACAAATAGATGGTTATGTCATGCATGCAAATTACATATTTTG +GAGGAATACAAATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGAGTAAATTTCCCCTTAAATTA +AGGGGTACTGCTGTTATGTCTTTAAAAGAAGGTCAAATCAATGATATGATTTTATCTCTTCTTAGTAAAG +GTAGACTTATAATTAGAGAAAACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAAACGAA +CAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACCAGAACTCA +ATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCA +GTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTATACATG +TCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGC +TTCCACTGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCC +CTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCAT +TTTTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTATTCTAGTGC +GAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTC +AAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTA +TTAATTTAGTGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTAT +TAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCA +GGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATA +ATGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTT +GAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATT +GTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCATCTG +TTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATC +ATTTTCCACTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTAT +GCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTG +ATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTC +TAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGA +GATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACT +TTCCTTTACAATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACT +TTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAAC +AAATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTC +TGCCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGA +GATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTCTAAC +CAGGTTGCTGTTCTTTATCAGGATGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTA +CTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGC +TGAACATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACT +CAGACTAATTCTCCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTG +GTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAGTGTTAC +CACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCA +ACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAACCGTGCTTTAACTGGAA +TAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACC +AATTAAAGATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCA +TTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATT +GCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACC +TTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGG +ACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTG +GAGTTACACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAA +AATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCA +CAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATA +TCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAG +TTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCT +ACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTA +TGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAA +GAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTT +TCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACA +CATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACC +TGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGATTTA +GGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTG +CCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCC +ATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGT +ATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACG +ACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACATAAACGAACTTATGGATTTGTTTATGAGA +ATCTTCACAATTGGAACTGTAACTTTGAAGCAAGGTGAAATCAAGGATGCTACTCCTTCAGATTTTGTTC +GCGCTACTGCAACGATACCGATACAAGCCTCACTCCCTTTCGGATGGCTTATTGTTGGCGTTGCACTTCT +TGCTGTTTTTCAGAGCGCTTCCAAAATCATAACCCTCAAAAAGAGATGGCAACTAGCACTCTCCAAGGGT +GTTCACTTTGTTTGCAACTTGCTGTTGTTGTTTGTAACAGTTTACTCACACCTTTTGCTCGTTGCTGCTG +GCCTTGAAGCCCCTTTTCTCTATCTTTATGCTTTAGTCTACTTCTTGCAGAGTATAAACTTTGTAAGAAT +AATAATGAGGCTTTGGCTTTGCTGGAAATGCCGTTCCAAAAACCCATTACTTTATGATGCCAACTATTTT +CTTTGCTGGCATACTAATTGTTACGACTATTGTATACCTTACAATAGTGTAACTTCTTCAATTGTCATTA +CTTCAGGTGATGGCACAACAAGTCCTATTTCTGAACATGACTACCAGATTGGTGGTTATACTGAAAAATG +GGAATCTGGAGTAAAAGACTGTGTTGTATTACACAGTTACTTCACTTCAGACTATTACCAGCTGTACTCA +ACTCAATTGAGTACAGACACTGGTGTTGAACATGTTACCTTCTTCATCTACAATAAAATTGTTGATGAGC +CTGAAGAACATGTCCAAATTCACACAATCGACGGTTCATCCGGAGTTGTTAATCCAGTAATGGAACCAAT +TTATGATGAACCGACGACGACTACTAGCGTGCCTTTGTAAGCACAAGCTGATGAGTACGAACTTATGTAC +TCATTCGTTTCGGAAGAGACAGGTACGTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGTAT +TCTTGCTAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGTACTGCTGCAATATTGTTAACGT +GAGTCTTGTAAAACCTTCTTTTTACGTTTACTCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTCCTGAT +CTTCTGGTCTAAACGAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAGCCATGGCAGA +TTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGCTCCTTGAACAATGGAACCTAGTAATAGGTTTC +CTATTCCTTACATGGATTTGTCTTCTACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAATTA +AGTTAATTTTCCTCTGGCTGTTATGGCCAGTAACTTTAGCTTGTTTTGTGCTTGCTGCTGTTTACAGAAT +AAATTGGATCACCGGTGGAATTGCTATCGCAATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTC +ATTGCTTCTTTCAGACTGTTTGCGCGTACGCGTTCCATGTGGTCATTCAATCCAGAAACTAACATTCTTC +TCAACGTGCCACTCCATGGCACTATTCTGACCAGACCGCTTCTAGAAAGTGAACTCGTAATCGGAGCTGT +GATCCTTCGTGGACATCTTCGTATTGCTGGACACCATCTAGGACGCTGTGACATCAAGGACCTGCCTAAA +GAAATCACTGTTGCTACATCACGAACGCTTTCTTATTACAAATTGGGAGCTTCGCAGCGTGTAGCAGGTG +ACTCAGGTTTTGCTGCATACAGTCGCTACAGGATTGGCAACTATAAATTAAACACAGACCATTCCAGTAG +CAGTGACAATATTGCTTTGCTTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGACTTTCAGGTTAC +TATAGCAGAGATATTACTAATTATTATGAGGACTTTTAAAGTTTCCATTTGGAATCTTGATTACATCATA +AACCTCATAATTAAAAATTTATCTAAGTCACTAACTGAGAATAAATATTCTCAATTAGATGAAGAGCAAC +CAATGGAGATTGATTAAACGAACATGAAAATTATTCTTTTCTTGGCACTGATAACACTCGCTACTTGTGA +GCTTTATCACTACCAAGAGTGTGTTAGAGGTACAACAGTACTTTTAAAAGAACCTTGCTCTTCTGGAACA +TACGAGGGCAATTCACCATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCTTTAGCACTCAAT +TTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTATCAGTTACGTGCCAGATCAGTTTCACCTAAACT +GTTCATCAGACAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGTTT +ATAACACTTTGCTTCACACTCAAAAGAAAGACAGAATGATTGAACTTTCATTAATTGACTTCTATTTGTG +CTTTTTAGCCTTTCTGCTATTCCTTGTTTTAATTATGCTTATTATCTTTTGGTTCTCACTTGAACTGCAA +GATCATAATGAAACTTGTCACGCCTAAACGAACATGAAATTTCTTGTTTTCTTAGGAATCATCACAACTG +TAGCTGCATTTCACCAAGAATGTAGTTTACAGTCATGTACTCAACATCAACCATATGTAGTTGATGACCC +GTGTCCTATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAA +TTGTGCGTGGATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATATCGGTAATTATACAGTTTCCT +GTTTACCTTTTACAATTAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGA +AGACTTTTTAGAGTATCATGACGTTCGTGTTGTTTTAGATTTCATCTAAACGAACAAACTAAAATGTCTG +ATAATGGACCCCAAAATCAGCGAAATGCACCCCGCATTACGTTTGGTGGACCCTCAGATTCAACTGGCAG +TAACCAGAATGGAGAACGCAGTGGGGCGCGATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATACT +GCGTCTTGGTTCACCGCTCTCACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGACAAGGCGTTC +CAATTAACACCAATAGCAGTCCAGATGACCAAATTGGCTACTACCGAAGAGCTACCAGACGAATTCGTGG +TGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATTTCTACTACCTAGGAACTGGGCCAGAAGCT +GGACTTCCCTATGGTGCTAACAAAGACGGCATCATATGGGTTGCAACTGAGGGAGCCTTGAATACACCAA +AAGATCACATTGGCACCCGCAATCCTGCTAACAATGCTGCAATCGTGCTACAACTTCCTCAAGGAACAAC +ATTGCCAAAAGGCTTCTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCACGT +AGTCGCAACAGTTCAAGAAATTCAACTCCAGGCAGCAGTAGGGGAACTTCTCCTGCTAGAATGGCTGGCA +ATGGCGGTGATGCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAAAATGTCTGG +TAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTGCTGAGGCTTCTAAGAAGCCTCGG +CAAAAACGTACTGCCACTAAAGCATACAATGTAACACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCC +AAGGAAATTTTGGGGACCAGGAACTAATCAGACAAGGAACTGATTACAAACATTGGCCGCAAATTGCACA +ATTTGCCCCCAGCGCTTCAGCGTTCTTCGGAATGTCGCGCATTGGCATGGAAGTCACACCTTCGGGAACG +TGGTTGACCTACACAGGTGCCATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCATTTTGC +TGAATAAGCATATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAGGC +TGATGAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAGCAAACTGTGACTCTTCTTCCTGCTGCAGAT +TTGGATGATTTCTCCAAACAATTGCAACAATCCATGAGCAGTGCTGACTCAACTCAGGCCTAAACTCATG +CAGACCACACAAGGCAGATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTT +GTGCAGAATGAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCT +TTAATCAGTGTGTAACATTAGGGAGGACTTGAAAGAGCCACCACATTTTCACCGAGGCCACGCGGAGTAC +GATCGAGTGTACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAAT +TTTAGTAGTGCTATCCCCATGTGATTTTAATAGCTTCTTAGGAGAATGACAAAAAAAAAAAAAAAAAAAA +AAAAAAAAAAAAA + diff --git a/tests/resources/SARS_CoV-2_genes.csv b/tests/resources/SARS_CoV-2_genes.csv new file mode 100644 index 0000000..3e8442d --- /dev/null +++ b/tests/resources/SARS_CoV-2_genes.csv @@ -0,0 +1,8 @@ +id,sequence,start,end +ORF8,"ATGAAATTTCTTGTTTTCTTAGGAATCATCACAACTGTAGCTGCATTTCACCAAGAATGTAGTTTACAGT +CATGTACTCAACATCAACCATATGTAGTTGATGACCCGTGTCCTATTCACTTCTATTCTAAATGGTATAT +TAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAATTGTGCGTGGATGAGGCTGGTTCTAAATCACCC +ATTCAGTACATCGATATCGGTAATTATACAGTTTCCTGTTTACCTTTTACAATTAATTGCCAGGAACCTA +AATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGAAGACTTTTTAGAGTATCATGACGTTCGTGTTGT +ATGAAATTTCTTGTTTTCTTAGGAATCATCACAACTGTAGCTGCATTTCACCAAGAATGTAGTTTACAGTCATGTACTCAACATCAACCATATGTAGTTGATGACCCGTGTCCTATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAATTGTGCGTGGATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATATCGGTAATTATACAGTTTCCTGTTTACCTTTTACAATTAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGAAGACTTTTTAGAGTATCATGACGTTCGTGTTGTTTTAGATTTCATCTAA",27894,28259 +ORF10,ATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAATGAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAG,29558,29674 From 927a8a170ab5bd79ecc52a425d4ac9f8d66f12d6 Mon Sep 17 00:00:00 2001 From: Harrison Date: Fri, 28 Apr 2023 10:50:55 -0500 Subject: [PATCH 03/24] Removed unused dictionary declaration --- src/bmlsa/io.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/bmlsa/io.py b/src/bmlsa/io.py index 7f2ba47..a47da27 100644 --- a/src/bmlsa/io.py +++ b/src/bmlsa/io.py @@ -13,7 +13,6 @@ def read_annotations_from_csv( start_header: str = None, end_header: str = None, ): - annotations = {} with open(csv_path, "r") as csv_fd: reader = csv.reader(csv_fd) id_ind = None From 515c1301466572fce96a3b33e364c72b37de4ee5 Mon Sep 17 00:00:00 2001 From: Harrison Date: Fri, 28 Apr 2023 10:53:14 -0500 Subject: [PATCH 04/24] Replaced 'build' with 'python-build' in 'environment.yml' --- environment.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/environment.yml b/environment.yml index 1186aac..29ce3ee 100644 --- a/environment.yml +++ b/environment.yml @@ -3,7 +3,6 @@ channels: - conda-forge dependencies: - biopython=1.81 - - build=0.7 - pytest=7.3 - hypothesis=6.56 - twine=4 From 51c2bd1f7be284af073084f991838a5150f83aeb Mon Sep 17 00:00:00 2001 From: Harrison Date: Fri, 28 Apr 2023 10:58:31 -0500 Subject: [PATCH 05/24] Conda environment creation now has '--force' for pipeline --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 3b9c413..47d3b91 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -8,7 +8,7 @@ pipeline { } stage("install") { steps { - sh 'mamba env update --file environment.yml' + sh 'mamba env create --force --file environment.yml' sh 'echo "mamba activate bmlsa" >> ~/.bashrc' } } From 2f86ec050f395f712428b9166a5f567c1ee5ddef Mon Sep 17 00:00:00 2001 From: Harrison Date: Fri, 28 Apr 2023 11:02:15 -0500 Subject: [PATCH 06/24] Pipeline now updates environment if possible, otherwise, re-creates it --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 47d3b91..04d28b3 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -8,7 +8,7 @@ pipeline { } stage("install") { steps { - sh 'mamba env create --force --file environment.yml' + sh 'mamba env update --file environment.yml || mamba env create --force --file environment.yml' sh 'echo "mamba activate bmlsa" >> ~/.bashrc' } } From beef3ee6a549f85dab51b95d113febc0bfbad3e5 Mon Sep 17 00:00:00 2001 From: Harrison Date: Fri, 28 Apr 2023 11:04:12 -0500 Subject: [PATCH 07/24] Fixed bug in 'cli.py' regarding argument type --- src/bmlsa/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bmlsa/cli.py b/src/bmlsa/cli.py index 2a97c93..9b28430 100644 --- a/src/bmlsa/cli.py +++ b/src/bmlsa/cli.py @@ -166,7 +166,7 @@ def main(): arg_parser.add_argument( "-m", "--substitution-matrix", - type="str", + type=str, help="The name of the substitution matrix.", required=False, default=None, From 78b4a74bc8a62276a069f55188300d2fe5e0b758 Mon Sep 17 00:00:00 2001 From: Harrison Date: Fri, 28 Apr 2023 11:08:51 -0500 Subject: [PATCH 08/24] Removed erroneous line from 'cli.py' --- src/bmlsa/cli.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/bmlsa/cli.py b/src/bmlsa/cli.py index 9b28430..0db7e0a 100644 --- a/src/bmlsa/cli.py +++ b/src/bmlsa/cli.py @@ -182,7 +182,6 @@ def main(): required=False, default=None, ) - arg_parser.add args = arg_parser.parse_args() run(args) From 9eebaa2f91e5cab8fa0779dc71799637396bcb5b Mon Sep 17 00:00:00 2001 From: Harrison Date: Fri, 28 Apr 2023 11:13:37 -0500 Subject: [PATCH 09/24] Added check to 'cli.py' to verify '--behave-as' parameter --- src/bmlsa/cli.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/bmlsa/cli.py b/src/bmlsa/cli.py index 0db7e0a..73a6a7d 100644 --- a/src/bmlsa/cli.py +++ b/src/bmlsa/cli.py @@ -39,6 +39,10 @@ def run(args): args.end_header, ) + if args.behave_as and args.behave_as not in DEFAULT_ALIGNMENT_PARAMETERS: + logger.error('"--behave-as" received bad option.') + exit(1) + scoring_parameter = ( { "extend_gap_score": args.extend_gap_score, From 3c5e934c7cdf704203cc83a2340785344e83d571 Mon Sep 17 00:00:00 2001 From: Harrison Date: Fri, 28 Apr 2023 11:14:42 -0500 Subject: [PATCH 10/24] Updated 'Jenkinsfile' to fingerprint generated artifacts --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 04d28b3..6e18ecc 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -31,7 +31,7 @@ pipeline { } stage("archive") { steps { - archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl' + archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl', fingerprint: true, followSymlinks: false, onlyIfSuccessful: true } } stage("publish") { From 6017eadb2c6f8e87a3883fccf7cb0a08892b2d8b Mon Sep 17 00:00:00 2001 From: Harrison Date: Fri, 28 Apr 2023 11:30:23 -0500 Subject: [PATCH 11/24] Clean sequences of line breaks and carriage return --- src/bmlsa/datatypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bmlsa/datatypes.py b/src/bmlsa/datatypes.py index f007ca6..09a0504 100644 --- a/src/bmlsa/datatypes.py +++ b/src/bmlsa/datatypes.py @@ -14,7 +14,7 @@ class AlignedSequence: self._end = end self._id = id self._name = name - self._sequence = sequence + self._sequence = sequence.replace("\n", "").replace("\r", "") self._score = score @property From c34b21930699e555a029abff6b3c826a504def37 Mon Sep 17 00:00:00 2001 From: Harrison Date: Fri, 28 Apr 2023 11:31:41 -0500 Subject: [PATCH 12/24] CLI will now recursively make output directory if needed --- src/bmlsa/cli.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/bmlsa/cli.py b/src/bmlsa/cli.py index 73a6a7d..f577737 100644 --- a/src/bmlsa/cli.py +++ b/src/bmlsa/cli.py @@ -28,7 +28,7 @@ def run(args): '"--alignment-mode", or, at least "--behave-as". See help (-h) for ' "more information." ) - exit(1) + exit(3) queries = read_annotations_from_csv( args.annotations, args.id_header, @@ -41,7 +41,7 @@ def run(args): if args.behave_as and args.behave_as not in DEFAULT_ALIGNMENT_PARAMETERS: logger.error('"--behave-as" received bad option.') - exit(1) + exit(3) scoring_parameter = ( { @@ -56,6 +56,7 @@ def run(args): else DEFAULT_ALIGNMENT_PARAMETERS[args.behave_as] ) + os.makedirs(args.output, exist_ok=True) with open(args.sequence, "r") as sequence_fd: for sequence in SeqIO.parse(sequence_fd, "fasta"): aligned_annotations = align_many_to_one_ssw( From 11d55903559d3c7c978b9510210f371e29b7a726 Mon Sep 17 00:00:00 2001 From: Harrison Date: Fri, 28 Apr 2023 11:32:17 -0500 Subject: [PATCH 13/24] Added another post-installation test step --- Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Jenkinsfile b/Jenkinsfile index 6e18ecc..8e97804 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -27,6 +27,7 @@ pipeline { steps { sh "pip install dist/*.whl --force-reinstall" sh "bmlsa -h" + sh "bmlsa -I id -S sequence -B BLASTp tests/resources/SARS_CoV-2_genes.csv tests/resources/NC_045512_coding.fasta /output/" } } stage("archive") { From 00cedbb1812c6e25bf84614613acc7af431e1682 Mon Sep 17 00:00:00 2001 From: Harrison Date: Fri, 28 Apr 2023 11:32:35 -0500 Subject: [PATCH 14/24] Created convenience VSCode 'launch.json' --- .vscode/launch.json | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 .vscode/launch.json diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..2b7008b --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,26 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Module", + "type": "python", + "request": "launch", + "module": "bmlsa.cli", + "args": [ + "-I", + "id", + "-S", + "sequence", + "-B", + "BLASTp", + "${workspaceFolder}/tests/resources/SARS_CoV-2_genes.csv", + "${workspaceFolder}/tests/resources/NC_045512_coding.fasta", + "${workspaceFolder}/output" + ], + "justMyCode": true + } + ] +} \ No newline at end of file From 587c2e753ac7275b58b7c1b0fb994cc753896f36 Mon Sep 17 00:00:00 2001 From: Harrison Date: Fri, 28 Apr 2023 11:33:22 -0500 Subject: [PATCH 15/24] '.gitignore' now ignores 'output' directory --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index a31886c..90d6956 100644 --- a/.gitignore +++ b/.gitignore @@ -212,3 +212,4 @@ pyrightconfig.json # Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option) +output \ No newline at end of file From 7e3f43434e553346bc5190f66ccb78ca0fbdc047 Mon Sep 17 00:00:00 2001 From: Harrison Date: Fri, 28 Apr 2023 12:44:59 -0500 Subject: [PATCH 16/24] Added documentation --- .vscode/settings.json | 3 ++- src/bmlsa/aligner.py | 36 +++++++++++++++++++++---------- src/bmlsa/cli.py | 14 ++++++------ src/bmlsa/datatypes.py | 4 +++- src/bmlsa/io.py | 43 +++++++++++++++++++++++++++++++------ tests/bmlsa/test_aligner.py | 8 +++---- tests/bmlsa/test_io.py | 18 ++++++---------- 7 files changed, 84 insertions(+), 42 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 5be7edb..b035e8f 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -7,5 +7,6 @@ "FASTA" ], "python.testing.pytestEnabled": true, - "python.analysis.inlayHints.pytestParameters": true + "python.analysis.inlayHints.pytestParameters": true, + "autoDocstring.docstringFormat": "sphinx" } \ No newline at end of file diff --git a/src/bmlsa/aligner.py b/src/bmlsa/aligner.py index 7b04cf7..70aadc0 100644 --- a/src/bmlsa/aligner.py +++ b/src/bmlsa/aligner.py @@ -1,20 +1,38 @@ import logging -from typing import Iterable +from typing import Generator, Iterable from Bio.Align import PairwiseAligner, substitution_matrices -from bmlsa.datatypes import AlignedSequence -from bmlsa.exceptions import UnexpectedAlignmentResult +from bmlsa.datatypes import QuerySequence logger = logging.getLogger(__name__) def align_many_to_one_ssw( reference_sequence: str, - queries: Iterable[AlignedSequence], + queries: Iterable[QuerySequence], extend_gap_score: int, open_gap_score: int, alignment_mode: str, substitution_matrix: str = "BLOSUM62", -): +) -> Generator[tuple[QuerySequence, QuerySequence], None, None]: + """Aligns :obj:`bmlsa.datatypes.QuerySequence` objects to a given reference sequence + + :param reference_sequence: The reference sequence to align to + :type reference_sequence: str + :param queries: A iterable sequence of :obj:`bmlsa.datatypes.QuerySequence` + :type queries: Iterable[QuerySequence] + :param extend_gap_score: The gap score to use for alignment. Typically negative. + :type extend_gap_score: int + :param open_gap_score: The open gap score to use for alignment. Typically negative. + :type open_gap_score: int + :param alignment_mode: The alignment mode to use. Either "local" or "global". + :type alignment_mode: str + :param substitution_matrix: The name of the substitution matrix available + in :module:`Bio.Align`, defaults to "BLOSUM62" + :type substitution_matrix: str, optional + :yield: Pairs of :obj:`bmlsa.datatypes.QuerySequence` objects where the first is + the original, and the second is the aligned version + :rtype: a generator of :obj:`bmlsa.datatypes.QuerySequence` objects + """ # TODO Consider using the built in "scoring" parameter aligner = PairwiseAligner() aligner.substitution_matrix = substitution_matrices.load(substitution_matrix) @@ -30,14 +48,10 @@ def align_many_to_one_ssw( ) continue # TODO Implement comparison with input positions to choose best - if len(alignments) > 1: - raise UnexpectedAlignmentResult( - "More than one alignment resulted from a single query." - ) for alignment in alignments: score, query_aligned = (alignment.score, alignment.aligned[0][0]) aligned_start, aligned_end = query_aligned - yield AlignedSequence( + yield QuerySequence( query.id, query.sequence, query.name, @@ -45,7 +59,7 @@ def align_many_to_one_ssw( query.start, query.end, query.score, - ), AlignedSequence( + ), QuerySequence( query.id, alignment.query, query.name, diff --git a/src/bmlsa/cli.py b/src/bmlsa/cli.py index f577737..b1ba931 100644 --- a/src/bmlsa/cli.py +++ b/src/bmlsa/cli.py @@ -4,7 +4,7 @@ from Bio import SeqIO import logging from bmlsa.aligner import align_many_to_one_ssw -from bmlsa.io import read_annotations_from_csv, save_alignments_to_csv +from bmlsa.io import queries_from_csv, save_alignments_to_csv logger = logging.getLogger(__name__) @@ -29,8 +29,8 @@ def run(args): "more information." ) exit(3) - queries = read_annotations_from_csv( - args.annotations, + queries = queries_from_csv( + args.queries, args.id_header, args.seq_header, args.name_header, @@ -59,11 +59,11 @@ def run(args): os.makedirs(args.output, exist_ok=True) with open(args.sequence, "r") as sequence_fd: for sequence in SeqIO.parse(sequence_fd, "fasta"): - aligned_annotations = align_many_to_one_ssw( + aligned_queries = align_many_to_one_ssw( str(sequence.seq), queries, **scoring_parameter ) save_alignments_to_csv( - aligned_annotations, + aligned_queries, os.path.join( args.output, sequence.id.replace("|", "+").replace(".", "_") + ".csv", @@ -74,11 +74,11 @@ def run(args): def main(): arg_parser = argparse.ArgumentParser("bmlsa") arg_parser.add_argument( - "annotations", + "queries", type=str, help=( "Path to CSV containing the sequences to align as well as the " - "annotations for the respective sequences." + "queries for the respective sequences." ), metavar="a", ) diff --git a/src/bmlsa/datatypes.py b/src/bmlsa/datatypes.py index 09a0504..ad178af 100644 --- a/src/bmlsa/datatypes.py +++ b/src/bmlsa/datatypes.py @@ -1,4 +1,6 @@ -class AlignedSequence: +class QuerySequence: + """Represents a sequence that may be aligned.""" + def __init__( self, id: str, diff --git a/src/bmlsa/io.py b/src/bmlsa/io.py index a47da27..53b9c0d 100644 --- a/src/bmlsa/io.py +++ b/src/bmlsa/io.py @@ -1,10 +1,10 @@ import csv -from typing import Iterable +from typing import Generator, Iterable -from bmlsa.datatypes import AlignedSequence +from bmlsa.datatypes import QuerySequence -def read_annotations_from_csv( +def queries_from_csv( csv_path: str, id_header: str, sequence_header: str, @@ -12,7 +12,29 @@ def read_annotations_from_csv( desc_header: str = None, start_header: str = None, end_header: str = None, -): +) -> Generator[QuerySequence, None, None]: + """Generates and :obj:`bmlsa.datatypes.QuerySequence` instances from a CSV file. + + :param csv_path: Path to CSV to use + :type csv_path: str + :param id_header: The column title for the unique identifier for each query sequence + :type id_header: str + :param sequence_header: The column title for the sequences themselves + :type sequence_header: str + :param name_header: The column title for the name of the sequence, defaults to None + :type name_header: str, optional + :param desc_header: The column title for the description of the sequence, + defaults to None + :type desc_header: str, optional + :param start_header: The column title for the start position of the sequence, + defaults to None + :type start_header: str, optional + :param end_header: The column title for the end position of the sequence, + defaults to None + :type end_header: str, optional + :yield: One :obj:`bmlsa.datatypes.QuerySequence` for each row + :rtype: A generator that yields objects of :class:`bmlsa.datatypes.QuerySequence` + """ with open(csv_path, "r") as csv_fd: reader = csv.reader(csv_fd) id_ind = None @@ -38,7 +60,7 @@ def read_annotations_from_csv( desc = row[desc_ind] if desc_header else None start = row[start_ind] if start_header else None end = row[end_ind] if end_header else None - yield AlignedSequence( + yield QuerySequence( id, sequence, name, @@ -49,8 +71,15 @@ def read_annotations_from_csv( def save_alignments_to_csv( - aligned_pairs: Iterable[tuple[AlignedSequence, AlignedSequence]], output_path: str -): + aligned_pairs: Iterable[tuple[QuerySequence, QuerySequence]], output_path: str +) -> None: + """Saves alignments to a CSV. + + :param aligned_pairs: An iterable of the original sequence and aligned sequences + :type aligned_pairs: Iterable[tuple[AlignedSequence, AlignedSequence]] + :param output_path: A path to the output directory + :type output_path: str + """ with open(output_path, "w") as output_fd: writer = csv.writer(output_fd) header_wrote = False diff --git a/tests/bmlsa/test_aligner.py b/tests/bmlsa/test_aligner.py index 4bc2544..a13e677 100644 --- a/tests/bmlsa/test_aligner.py +++ b/tests/bmlsa/test_aligner.py @@ -2,7 +2,7 @@ import pytest from Bio import SeqIO from bmlsa.aligner import align_many_to_one_ssw from bmlsa.cli import DEFAULT_ALIGNMENT_PARAMETERS -from bmlsa.datatypes import AlignedSequence +from bmlsa.datatypes import QuerySequence from collections.abc import Iterable @@ -16,7 +16,7 @@ def reference_sequence(): @pytest.fixture def queries(): return [ - AlignedSequence( + QuerySequence( "ORF10", "ATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAAT" "GAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAG", @@ -38,8 +38,8 @@ def test_align_many_to_one_returns_correct_data_structure(reference_sequence, qu reference_sequence, queries, **DEFAULT_ALIGNMENT_PARAMETERS["BLASTp"] ) for original, aligned_seq in results: - assert isinstance(original, AlignedSequence) - assert isinstance(aligned_seq, AlignedSequence) + assert isinstance(original, QuerySequence) + assert isinstance(aligned_seq, QuerySequence) def test_align_many_to_one_returns_correct_data(reference_sequence, queries): diff --git a/tests/bmlsa/test_io.py b/tests/bmlsa/test_io.py index ac60ae8..835e08c 100644 --- a/tests/bmlsa/test_io.py +++ b/tests/bmlsa/test_io.py @@ -1,21 +1,17 @@ from csv import reader from os import path -from bmlsa.datatypes import AlignedSequence -from bmlsa.io import read_annotations_from_csv, save_alignments_to_csv +from bmlsa.datatypes import QuerySequence +from bmlsa.io import queries_from_csv, save_alignments_to_csv from collections.abc import Iterable -def test_read_annotations_from_csv_has_data(): - results = read_annotations_from_csv( - "tests/resources/SARS_CoV-2_genes.csv", "id", "sequence" - ) +def test_queries_from_csv_has_data(): + results = queries_from_csv("tests/resources/SARS_CoV-2_genes.csv", "id", "sequence") assert isinstance(results, Iterable) -def test_read_annotations_from_csv_data_valid(): - results = read_annotations_from_csv( - "tests/resources/SARS_CoV-2_genes.csv", "id", "sequence" - ) +def test_queries_from_csv_data_valid(): + results = queries_from_csv("tests/resources/SARS_CoV-2_genes.csv", "id", "sequence") for aligned_seq in results: assert isinstance(aligned_seq.id, str) assert isinstance(aligned_seq.sequence, str) @@ -23,7 +19,7 @@ def test_read_annotations_from_csv_data_valid(): def test_save_alignments_to_csv_produces_correct_headers_in_csv(tmpdir): output_path = path.join(tmpdir, "alignment_results.csv") - dummy_sequence = AlignedSequence("DUMMY", "ATACTGGAAAA", name="test_sequence") + dummy_sequence = QuerySequence("DUMMY", "ATACTGGAAAA", name="test_sequence") alignments = [(dummy_sequence, dummy_sequence)] save_alignments_to_csv(alignments, output_path) From 47cdd8bc28cb18d876ea323cdc7619e2fa59afe3 Mon Sep 17 00:00:00 2001 From: Harrison Date: Fri, 28 Apr 2023 13:28:12 -0500 Subject: [PATCH 17/24] Added basic automatic documentation generation --- .gitignore | 4 +++- Jenkinsfile | 5 +++- docs/Makefile | 20 ++++++++++++++++ docs/make.bat | 35 +++++++++++++++++++++++++++ docs/source/bmlsa.rst | 53 +++++++++++++++++++++++++++++++++++++++++ docs/source/conf.py | 33 +++++++++++++++++++++++++ docs/source/index.rst | 21 ++++++++++++++++ docs/source/modules.rst | 7 ++++++ src/bmlsa/aligner.py | 4 ++-- src/bmlsa/io.py | 6 ++--- 10 files changed, 181 insertions(+), 7 deletions(-) create mode 100644 docs/Makefile create mode 100644 docs/make.bat create mode 100644 docs/source/bmlsa.rst create mode 100644 docs/source/conf.py create mode 100644 docs/source/index.rst create mode 100644 docs/source/modules.rst diff --git a/.gitignore b/.gitignore index 90d6956..77c690b 100644 --- a/.gitignore +++ b/.gitignore @@ -212,4 +212,6 @@ pyrightconfig.json # Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option) -output \ No newline at end of file +output +!docs +docs/build \ No newline at end of file diff --git a/Jenkinsfile b/Jenkinsfile index 8e97804..fa5569a 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -21,13 +21,16 @@ pipeline { stage("build") { steps { sh "python -m build" + sh 'sphinx-apidoc -o docs/source/ src/bmlsa' + sh 'make -C docs html' + publishHTML([allowMissing: false, alwaysLinkToLastBuild: false, keepAll: false, reportDir: 'docs/build/html', reportFiles: 'index.html', reportName: 'BMLSA Docs', reportTitles: '']) } } stage("test installation") { steps { sh "pip install dist/*.whl --force-reinstall" sh "bmlsa -h" - sh "bmlsa -I id -S sequence -B BLASTp tests/resources/SARS_CoV-2_genes.csv tests/resources/NC_045512_coding.fasta /output/" + sh "bmlsa -I id -S sequence -B BLASTp tests/resources/SARS_CoV-2_genes.csv tests/resources/NC_045512_coding.fasta ./output/" } } stage("archive") { diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d0c3cbf --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..747ffb7 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/source/bmlsa.rst b/docs/source/bmlsa.rst new file mode 100644 index 0000000..415748c --- /dev/null +++ b/docs/source/bmlsa.rst @@ -0,0 +1,53 @@ +bmlsa package +============= + +Submodules +---------- + +bmlsa.aligner module +-------------------- + +.. automodule:: bmlsa.aligner + :members: + :undoc-members: + :show-inheritance: + +bmlsa.cli module +---------------- + +.. automodule:: bmlsa.cli + :members: + :undoc-members: + :show-inheritance: + +bmlsa.datatypes module +---------------------- + +.. automodule:: bmlsa.datatypes + :members: + :undoc-members: + :show-inheritance: + +bmlsa.exceptions module +----------------------- + +.. automodule:: bmlsa.exceptions + :members: + :undoc-members: + :show-inheritance: + +bmlsa.io module +--------------- + +.. automodule:: bmlsa.io + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: bmlsa + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..bba3af0 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,33 @@ +import os +import sys + +# Configure system path + +sys.path.insert(0, os.path.abspath("../src/")) + +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = "BMLSA" +copyright = "2023, Harrison" +author = "Harrison" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = ["sphinx.ext.autodoc"] + +templates_path = ["_templates"] +exclude_patterns = [] + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "alabaster" +html_static_path = ["_static"] diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..122e6df --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,21 @@ +.. BMLSA documentation master file, created by + sphinx-quickstart on Fri Apr 28 13:04:16 2023. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to BMLSA's documentation! +================================= + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + modules + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/source/modules.rst b/docs/source/modules.rst new file mode 100644 index 0000000..5b5c2ed --- /dev/null +++ b/docs/source/modules.rst @@ -0,0 +1,7 @@ +bmlsa +===== + +.. toctree:: + :maxdepth: 4 + + bmlsa diff --git a/src/bmlsa/aligner.py b/src/bmlsa/aligner.py index 70aadc0..060a085 100644 --- a/src/bmlsa/aligner.py +++ b/src/bmlsa/aligner.py @@ -27,10 +27,10 @@ def align_many_to_one_ssw( :param alignment_mode: The alignment mode to use. Either "local" or "global". :type alignment_mode: str :param substitution_matrix: The name of the substitution matrix available - in :module:`Bio.Align`, defaults to "BLOSUM62" + in :mod:`Bio.Align`, defaults to "BLOSUM62" :type substitution_matrix: str, optional :yield: Pairs of :obj:`bmlsa.datatypes.QuerySequence` objects where the first is - the original, and the second is the aligned version + the original, and the second is the aligned version :rtype: a generator of :obj:`bmlsa.datatypes.QuerySequence` objects """ # TODO Consider using the built in "scoring" parameter diff --git a/src/bmlsa/io.py b/src/bmlsa/io.py index 53b9c0d..4561b59 100644 --- a/src/bmlsa/io.py +++ b/src/bmlsa/io.py @@ -24,13 +24,13 @@ def queries_from_csv( :param name_header: The column title for the name of the sequence, defaults to None :type name_header: str, optional :param desc_header: The column title for the description of the sequence, - defaults to None + defaults to None :type desc_header: str, optional :param start_header: The column title for the start position of the sequence, - defaults to None + defaults to None :type start_header: str, optional :param end_header: The column title for the end position of the sequence, - defaults to None + defaults to None :type end_header: str, optional :yield: One :obj:`bmlsa.datatypes.QuerySequence` for each row :rtype: A generator that yields objects of :class:`bmlsa.datatypes.QuerySequence` From 61149b88b7471ae4f80371465ec8d1691e953469 Mon Sep 17 00:00:00 2001 From: Harrison Date: Fri, 28 Apr 2023 13:32:50 -0500 Subject: [PATCH 18/24] Added sphinx to build environment --- environment.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 29ce3ee..b14e79d 100644 --- a/environment.yml +++ b/environment.yml @@ -7,4 +7,5 @@ dependencies: - hypothesis=6.56 - twine=4 - python-build=0.10.0 - - setuptools \ No newline at end of file + - setuptools=67.6 + - sphinx=6.2 \ No newline at end of file From e942b65ebd9f8cffa649521e6f7214a3491fbe60 Mon Sep 17 00:00:00 2001 From: Harrison Date: Fri, 28 Apr 2023 13:42:38 -0500 Subject: [PATCH 19/24] Added '--force' to 'sphinx-apidoc' step --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index fa5569a..0a5cd82 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -21,7 +21,7 @@ pipeline { stage("build") { steps { sh "python -m build" - sh 'sphinx-apidoc -o docs/source/ src/bmlsa' + sh 'sphinx-apidoc -o docs/source/ src/bmlsa --force' sh 'make -C docs html' publishHTML([allowMissing: false, alwaysLinkToLastBuild: false, keepAll: false, reportDir: 'docs/build/html', reportFiles: 'index.html', reportName: 'BMLSA Docs', reportTitles: '']) } From adf747358782dc143cfbef0ffae8974cc2d91301 Mon Sep 17 00:00:00 2001 From: Harrison Date: Fri, 28 Apr 2023 13:46:58 -0500 Subject: [PATCH 20/24] Fixed some doc typos --- src/bmlsa/aligner.py | 6 ++---- src/bmlsa/datatypes.py | 17 +++++++++++++++++ src/bmlsa/io.py | 9 +++------ 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/bmlsa/aligner.py b/src/bmlsa/aligner.py index 060a085..294077a 100644 --- a/src/bmlsa/aligner.py +++ b/src/bmlsa/aligner.py @@ -26,11 +26,9 @@ def align_many_to_one_ssw( :type open_gap_score: int :param alignment_mode: The alignment mode to use. Either "local" or "global". :type alignment_mode: str - :param substitution_matrix: The name of the substitution matrix available - in :mod:`Bio.Align`, defaults to "BLOSUM62" + :param substitution_matrix: The name of the substitution matrix available in :mod:`Bio.Align`, defaults to "BLOSUM62" :type substitution_matrix: str, optional - :yield: Pairs of :obj:`bmlsa.datatypes.QuerySequence` objects where the first is - the original, and the second is the aligned version + :yield: Pairs of :obj:`bmlsa.datatypes.QuerySequence` objects where the first is the original, and the second is the aligned version :rtype: a generator of :obj:`bmlsa.datatypes.QuerySequence` objects """ # TODO Consider using the built in "scoring" parameter diff --git a/src/bmlsa/datatypes.py b/src/bmlsa/datatypes.py index ad178af..caf7cdf 100644 --- a/src/bmlsa/datatypes.py +++ b/src/bmlsa/datatypes.py @@ -11,6 +11,23 @@ class QuerySequence: end: int = None, score: int = None, ) -> None: + """Instantiates a :obj:bmlsa.datatypes.QuerySequence object + + :param id: The id of the query sequence + :type id: str + :param sequence: The sequence itself + :type sequence: str + :param name: The name of the sequence, defaults to None + :type name: str, optional + :param description: The description of the sequence, defaults to None + :type description: str, optional + :param start: The start of the sequence, defaults to None + :type start: int, optional + :param end: The end of the sequence, defaults to None + :type end: int, optional + :param score: The alignment score of the sequence, defaults to None + :type score: int, optional + """ self._description = description self._start = start self._end = end diff --git a/src/bmlsa/io.py b/src/bmlsa/io.py index 4561b59..8e47dd7 100644 --- a/src/bmlsa/io.py +++ b/src/bmlsa/io.py @@ -23,14 +23,11 @@ def queries_from_csv( :type sequence_header: str :param name_header: The column title for the name of the sequence, defaults to None :type name_header: str, optional - :param desc_header: The column title for the description of the sequence, - defaults to None + :param desc_header: The column title for the description of the sequence, defaults to None :type desc_header: str, optional - :param start_header: The column title for the start position of the sequence, - defaults to None + :param start_header: The column title for the start position of the sequence, defaults to None :type start_header: str, optional - :param end_header: The column title for the end position of the sequence, - defaults to None + :param end_header: The column title for the end position of the sequence, defaults to None :type end_header: str, optional :yield: One :obj:`bmlsa.datatypes.QuerySequence` for each row :rtype: A generator that yields objects of :class:`bmlsa.datatypes.QuerySequence` From cbf32d253b631f0e63e7accae74be7b6e1ed4dd5 Mon Sep 17 00:00:00 2001 From: Harrison Date: Fri, 28 Apr 2023 13:56:10 -0500 Subject: [PATCH 21/24] Added docs cleaning step to pipeline --- Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Jenkinsfile b/Jenkinsfile index 0a5cd82..96586fa 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -4,6 +4,7 @@ pipeline { stage("clean") { steps { sh 'rm -rf ./dist/*' + sh 'rm -rf ./docs/build/*' } } stage("install") { From 4383e36c1ffcb054a9aa11e8365c6faf5bd667bf Mon Sep 17 00:00:00 2001 From: Harrison Date: Fri, 28 Apr 2023 14:01:10 -0500 Subject: [PATCH 22/24] Removed space in doc title --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 96586fa..b2a9737 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -24,7 +24,7 @@ pipeline { sh "python -m build" sh 'sphinx-apidoc -o docs/source/ src/bmlsa --force' sh 'make -C docs html' - publishHTML([allowMissing: false, alwaysLinkToLastBuild: false, keepAll: false, reportDir: 'docs/build/html', reportFiles: 'index.html', reportName: 'BMLSA Docs', reportTitles: '']) + publishHTML([allowMissing: false, alwaysLinkToLastBuild: false, keepAll: false, reportDir: 'docs/build/html', reportFiles: 'index.html', reportName: 'BMLSA', reportTitles: '']) } } stage("test installation") { From f0da14237789974ee5c5543231c2d40797e5f9b7 Mon Sep 17 00:00:00 2001 From: Harrison Date: Fri, 28 Apr 2023 14:10:15 -0500 Subject: [PATCH 23/24] Updated documentation publishing title and 'README.md' to link to CI --- Jenkinsfile | 2 +- README.md | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index b2a9737..e6db3d1 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -24,7 +24,7 @@ pipeline { sh "python -m build" sh 'sphinx-apidoc -o docs/source/ src/bmlsa --force' sh 'make -C docs html' - publishHTML([allowMissing: false, alwaysLinkToLastBuild: false, keepAll: false, reportDir: 'docs/build/html', reportFiles: 'index.html', reportName: 'BMLSA', reportTitles: '']) + publishHTML([allowMissing: false, alwaysLinkToLastBuild: false, keepAll: false, reportDir: 'docs/build/html', reportFiles: 'index.html', reportName: 'Documentation', reportTitles: '']) } } stage("test installation") { diff --git a/README.md b/README.md index f4a0e4c..4c86ff0 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +[![Build Status](https://ci.reslate.systems/job/ydeng/job/bmlsa/job/master/badge/icon)](https://ci.reslate.systems/job/ydeng/job/bmlsa/job/master/) + # BMLSA A Basic multi local sequence alignment tool using the Biopython implementation of the Smith-Waterman alignment algorithm. @@ -11,3 +13,6 @@ A Basic multi local sequence alignment tool using the Biopython implementation o - Automatically align with all sequences in reference FASTA - All produced output is human readable! +## More Information + +For all live downloadable artifacts, build statuses, unit test results, and documentation, check out the continuous integration page for the [master branch](https://ci.reslate.systems/job/ydeng/job/bmlsa/job/master/) ([development branch](https://ci.reslate.systems/job/ydeng/job/bmlsa/job/develop/)). \ No newline at end of file From 7bfe49445a5a7ff08565fda1317fa116e2d27b5f Mon Sep 17 00:00:00 2001 From: Harrison Date: Fri, 28 Apr 2023 14:11:24 -0500 Subject: [PATCH 24/24] Moved steps and removed 'clean' stage --- Jenkinsfile | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index e6db3d1..f4660b4 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,12 +1,6 @@ pipeline { agent any stages { - stage("clean") { - steps { - sh 'rm -rf ./dist/*' - sh 'rm -rf ./docs/build/*' - } - } stage("install") { steps { sh 'mamba env update --file environment.yml || mamba env create --force --file environment.yml' @@ -21,6 +15,8 @@ pipeline { } stage("build") { steps { + sh 'rm -rf ./dist/*' + sh 'rm -rf ./docs/build/*' sh "python -m build" sh 'sphinx-apidoc -o docs/source/ src/bmlsa --force' sh 'make -C docs html'