Compare commits
25 Commits
d75a83585c
...
ebb27556b5
Author | SHA1 | Date | |
---|---|---|---|
ebb27556b5 | |||
7bfe49445a | |||
f0da142377 | |||
4383e36c1f | |||
cbf32d253b | |||
adf7473587 | |||
e942b65ebd | |||
61149b88b7 | |||
47cdd8bc28 | |||
7e3f43434e | |||
587c2e753a | |||
00cedbb181 | |||
11d5590355 | |||
c34b219306 | |||
6017eadb2c | |||
3c5e934c7c | |||
9eebaa2f91 | |||
78b4a74bc8 | |||
beef3ee6a5 | |||
2f86ec050f | |||
51c2bd1f7b | |||
515c130146 | |||
927a8a170a | |||
ae3732eba2 | |||
d42ed83b22 |
3
.gitignore
vendored
3
.gitignore
vendored
@ -212,3 +212,6 @@ pyrightconfig.json
|
|||||||
|
|
||||||
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
|
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
|
||||||
|
|
||||||
|
output
|
||||||
|
!docs
|
||||||
|
docs/build
|
26
.vscode/launch.json
vendored
Normal file
26
.vscode/launch.json
vendored
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Python: Module",
|
||||||
|
"type": "python",
|
||||||
|
"request": "launch",
|
||||||
|
"module": "bmlsa.cli",
|
||||||
|
"args": [
|
||||||
|
"-I",
|
||||||
|
"id",
|
||||||
|
"-S",
|
||||||
|
"sequence",
|
||||||
|
"-B",
|
||||||
|
"BLASTp",
|
||||||
|
"${workspaceFolder}/tests/resources/SARS_CoV-2_genes.csv",
|
||||||
|
"${workspaceFolder}/tests/resources/NC_045512_coding.fasta",
|
||||||
|
"${workspaceFolder}/output"
|
||||||
|
],
|
||||||
|
"justMyCode": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
11
.vscode/settings.json
vendored
11
.vscode/settings.json
vendored
@ -1,3 +1,12 @@
|
|||||||
{
|
{
|
||||||
"python.formatting.provider": "black"
|
"python.formatting.provider": "black",
|
||||||
|
"cSpell.words": [
|
||||||
|
"Biopython",
|
||||||
|
"BLOSUM",
|
||||||
|
"bmlsa",
|
||||||
|
"FASTA"
|
||||||
|
],
|
||||||
|
"python.testing.pytestEnabled": true,
|
||||||
|
"python.analysis.inlayHints.pytestParameters": true,
|
||||||
|
"autoDocstring.docstringFormat": "sphinx"
|
||||||
}
|
}
|
23
Jenkinsfile
vendored
23
Jenkinsfile
vendored
@ -1,26 +1,33 @@
|
|||||||
pipeline {
|
pipeline {
|
||||||
agent any
|
agent any
|
||||||
stages {
|
stages {
|
||||||
stage("clean") {
|
|
||||||
steps {
|
|
||||||
sh 'rm -rf ./dist/*'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
stage("install") {
|
stage("install") {
|
||||||
steps {
|
steps {
|
||||||
sh 'mamba env update --file environment.yml'
|
sh 'mamba env update --file environment.yml || mamba env create --force --file environment.yml'
|
||||||
sh 'echo "mamba activate bmlsa" >> ~/.bashrc'
|
sh 'echo "mamba activate bmlsa" >> ~/.bashrc'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
stage("unit tests") {
|
||||||
|
steps {
|
||||||
|
sh "python -m pytest --junitxml=test_results.xml"
|
||||||
|
xunit checksName: '', tools: [JUnit(excludesPattern: '', pattern: 'test_results.xml', stopProcessingIfError: true)]
|
||||||
|
}
|
||||||
|
}
|
||||||
stage("build") {
|
stage("build") {
|
||||||
steps {
|
steps {
|
||||||
|
sh 'rm -rf ./dist/*'
|
||||||
|
sh 'rm -rf ./docs/build/*'
|
||||||
sh "python -m build"
|
sh "python -m build"
|
||||||
|
sh 'sphinx-apidoc -o docs/source/ src/bmlsa --force'
|
||||||
|
sh 'make -C docs html'
|
||||||
|
publishHTML([allowMissing: false, alwaysLinkToLastBuild: false, keepAll: false, reportDir: 'docs/build/html', reportFiles: 'index.html', reportName: 'Documentation', reportTitles: ''])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage("test") {
|
stage("test installation") {
|
||||||
steps {
|
steps {
|
||||||
sh "pip install dist/*.whl"
|
sh "pip install dist/*.whl --force-reinstall"
|
||||||
sh "bmlsa -h"
|
sh "bmlsa -h"
|
||||||
|
sh "bmlsa -I id -S sequence -B BLASTp tests/resources/SARS_CoV-2_genes.csv tests/resources/NC_045512_coding.fasta ./output/"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage("archive") {
|
stage("archive") {
|
||||||
|
17
README.md
17
README.md
@ -1,3 +1,18 @@
|
|||||||
|
[![Build Status](https://ci.reslate.systems/job/ydeng/job/bmlsa/job/master/badge/icon)](https://ci.reslate.systems/job/ydeng/job/bmlsa/job/master/)
|
||||||
|
|
||||||
# BMLSA
|
# BMLSA
|
||||||
|
|
||||||
A Basic multi local sequence alignment tool using the Biopython implementation of the Smith-Waterman alignment algorithm
|
A Basic multi local sequence alignment tool using the Biopython implementation of the Smith-Waterman alignment algorithm.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- Aligns a CSV of short sequences locally to a given reference sequence
|
||||||
|
- Outputs in a CSV with old and new positions
|
||||||
|
- Selectable parameters for alignment
|
||||||
|
- Compatible with broad-range of input CSV formats
|
||||||
|
- Automatically align with all sequences in reference FASTA
|
||||||
|
- All produced output is human readable!
|
||||||
|
|
||||||
|
## More Information
|
||||||
|
|
||||||
|
For all live downloadable artifacts, build statuses, unit test results, and documentation, check out the continuous integration page for the [master branch](https://ci.reslate.systems/job/ydeng/job/bmlsa/job/master/) ([development branch](https://ci.reslate.systems/job/ydeng/job/bmlsa/job/develop/)).
|
@ -1,42 +0,0 @@
|
|||||||
from Bio.Align import PairwiseAligner, substitution_matrices
|
|
||||||
from bmlsa.datatypes import AlignedSequence
|
|
||||||
from bmlsa.exceptions import UnexpectedAlignmentResult
|
|
||||||
|
|
||||||
|
|
||||||
def protein_align_many_to_one_ssw(sequence: str, queries: dict[str, AlignedSequence]):
|
|
||||||
annotation_pairs = {}
|
|
||||||
aligner = PairwiseAligner()
|
|
||||||
aligner.mode = "local"
|
|
||||||
aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
|
|
||||||
aligner.extend_gap_score = -1
|
|
||||||
aligner.open_gap_score = -11
|
|
||||||
for id, query in queries.items():
|
|
||||||
try:
|
|
||||||
alignments = aligner.align(sequence, query.sequence)
|
|
||||||
except ValueError:
|
|
||||||
continue
|
|
||||||
if len(alignments) > 1:
|
|
||||||
raise UnexpectedAlignmentResult(
|
|
||||||
"More than one alignment resulted from a single query."
|
|
||||||
)
|
|
||||||
for alignment in alignments:
|
|
||||||
score, query_aligned = (alignment.score, alignment.aligned[0][0])
|
|
||||||
aligned_start, aligned_end = query_aligned
|
|
||||||
annotation_pairs[id] = AlignedSequence(
|
|
||||||
id,
|
|
||||||
query.sequence,
|
|
||||||
query.name,
|
|
||||||
query.description,
|
|
||||||
query.start,
|
|
||||||
query.end,
|
|
||||||
query.score,
|
|
||||||
), AlignedSequence(
|
|
||||||
id,
|
|
||||||
alignment.query,
|
|
||||||
query.name,
|
|
||||||
query.description,
|
|
||||||
int(aligned_start) + 1,
|
|
||||||
aligned_end,
|
|
||||||
score,
|
|
||||||
)
|
|
||||||
return annotation_pairs
|
|
94
bmlsa/cli.py
94
bmlsa/cli.py
@ -1,94 +0,0 @@
|
|||||||
import os
|
|
||||||
import argparse
|
|
||||||
from Bio import SeqIO
|
|
||||||
from bmlsa.aligner import protein_align_many_to_one_ssw
|
|
||||||
|
|
||||||
from bmlsa.persistence import read_annotations_from_csv, save_alignments_to_csv
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
argparser = argparse.ArgumentParser("blmsa")
|
|
||||||
argparser.add_argument(
|
|
||||||
"annotations",
|
|
||||||
type=str,
|
|
||||||
help=(
|
|
||||||
"Path to CSV containing the sequences to align as well as the "
|
|
||||||
"annotations for the respective sequences"
|
|
||||||
),
|
|
||||||
metavar="a",
|
|
||||||
)
|
|
||||||
argparser.add_argument(
|
|
||||||
"sequence",
|
|
||||||
type=str,
|
|
||||||
help=(
|
|
||||||
"Path to the sequence to annotate in FASTA format. "
|
|
||||||
"If multiple sequences are present, annotations will be run on each"
|
|
||||||
),
|
|
||||||
metavar="s",
|
|
||||||
)
|
|
||||||
argparser.add_argument(
|
|
||||||
"output", type=str, help="Path to output location", metavar="o"
|
|
||||||
)
|
|
||||||
argparser.add_argument(
|
|
||||||
"-I", "--id-header", type=str, help="The header for the ID of the annotation"
|
|
||||||
)
|
|
||||||
argparser.add_argument(
|
|
||||||
"-N",
|
|
||||||
"--name-header",
|
|
||||||
type=str,
|
|
||||||
help="The header for the name of the annotation",
|
|
||||||
required=False,
|
|
||||||
)
|
|
||||||
argparser.add_argument(
|
|
||||||
"-D",
|
|
||||||
"--desc-header",
|
|
||||||
type=str,
|
|
||||||
help="The header for the description of the annotation",
|
|
||||||
required=False,
|
|
||||||
)
|
|
||||||
argparser.add_argument(
|
|
||||||
"-T",
|
|
||||||
"--start-header",
|
|
||||||
type=str,
|
|
||||||
help="The header for the start of the annotation",
|
|
||||||
required=False,
|
|
||||||
)
|
|
||||||
argparser.add_argument(
|
|
||||||
"-E",
|
|
||||||
"--end-header",
|
|
||||||
type=str,
|
|
||||||
help="The header for the end of the annotation",
|
|
||||||
required=False,
|
|
||||||
)
|
|
||||||
argparser.add_argument(
|
|
||||||
"-S",
|
|
||||||
"--seq-header",
|
|
||||||
type=str,
|
|
||||||
help="The header for the sequence of the annotation",
|
|
||||||
)
|
|
||||||
args = argparser.parse_args()
|
|
||||||
given_annotations = read_annotations_from_csv(
|
|
||||||
args.annotations,
|
|
||||||
args.id_header,
|
|
||||||
args.name_header,
|
|
||||||
args.desc_header,
|
|
||||||
args.start_header,
|
|
||||||
args.end_header,
|
|
||||||
args.seq_header,
|
|
||||||
)
|
|
||||||
with open(args.sequence, "r") as sequence_fd:
|
|
||||||
for sequence in SeqIO.parse(sequence_fd, "fasta"):
|
|
||||||
aligned_annotations = protein_align_many_to_one_ssw(
|
|
||||||
str(sequence.seq), given_annotations
|
|
||||||
)
|
|
||||||
save_alignments_to_csv(
|
|
||||||
aligned_annotations,
|
|
||||||
os.path.join(
|
|
||||||
args.output,
|
|
||||||
sequence.id.replace("|", "+").replace(".", "_") + ".csv",
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
@ -1,46 +0,0 @@
|
|||||||
class AlignedSequence:
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
id: str,
|
|
||||||
sequence: str,
|
|
||||||
name: str = None,
|
|
||||||
description: str = None,
|
|
||||||
start: int = None,
|
|
||||||
end: int = None,
|
|
||||||
score: int = None,
|
|
||||||
) -> None:
|
|
||||||
self._description = description
|
|
||||||
self._start = start
|
|
||||||
self._end = end
|
|
||||||
self._id = id
|
|
||||||
self._name = name
|
|
||||||
self._sequence = sequence
|
|
||||||
self._score = score
|
|
||||||
|
|
||||||
@property
|
|
||||||
def start(self):
|
|
||||||
return self._start
|
|
||||||
|
|
||||||
@property
|
|
||||||
def end(self):
|
|
||||||
return self._end
|
|
||||||
|
|
||||||
@property
|
|
||||||
def id(self):
|
|
||||||
return self._id
|
|
||||||
|
|
||||||
@property
|
|
||||||
def name(self):
|
|
||||||
return self._name
|
|
||||||
|
|
||||||
@property
|
|
||||||
def description(self):
|
|
||||||
return self._description
|
|
||||||
|
|
||||||
@property
|
|
||||||
def sequence(self):
|
|
||||||
return self._sequence
|
|
||||||
|
|
||||||
@property
|
|
||||||
def score(self):
|
|
||||||
return self._score
|
|
20
docs/Makefile
Normal file
20
docs/Makefile
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
# Minimal makefile for Sphinx documentation
|
||||||
|
#
|
||||||
|
|
||||||
|
# You can set these variables from the command line, and also
|
||||||
|
# from the environment for the first two.
|
||||||
|
SPHINXOPTS ?=
|
||||||
|
SPHINXBUILD ?= sphinx-build
|
||||||
|
SOURCEDIR = source
|
||||||
|
BUILDDIR = build
|
||||||
|
|
||||||
|
# Put it first so that "make" without argument is like "make help".
|
||||||
|
help:
|
||||||
|
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||||
|
|
||||||
|
.PHONY: help Makefile
|
||||||
|
|
||||||
|
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||||
|
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||||
|
%: Makefile
|
||||||
|
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
35
docs/make.bat
Normal file
35
docs/make.bat
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
@ECHO OFF
|
||||||
|
|
||||||
|
pushd %~dp0
|
||||||
|
|
||||||
|
REM Command file for Sphinx documentation
|
||||||
|
|
||||||
|
if "%SPHINXBUILD%" == "" (
|
||||||
|
set SPHINXBUILD=sphinx-build
|
||||||
|
)
|
||||||
|
set SOURCEDIR=source
|
||||||
|
set BUILDDIR=build
|
||||||
|
|
||||||
|
%SPHINXBUILD% >NUL 2>NUL
|
||||||
|
if errorlevel 9009 (
|
||||||
|
echo.
|
||||||
|
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
||||||
|
echo.installed, then set the SPHINXBUILD environment variable to point
|
||||||
|
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
||||||
|
echo.may add the Sphinx directory to PATH.
|
||||||
|
echo.
|
||||||
|
echo.If you don't have Sphinx installed, grab it from
|
||||||
|
echo.https://www.sphinx-doc.org/
|
||||||
|
exit /b 1
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "" goto help
|
||||||
|
|
||||||
|
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||||
|
goto end
|
||||||
|
|
||||||
|
:help
|
||||||
|
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||||
|
|
||||||
|
:end
|
||||||
|
popd
|
53
docs/source/bmlsa.rst
Normal file
53
docs/source/bmlsa.rst
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
bmlsa package
|
||||||
|
=============
|
||||||
|
|
||||||
|
Submodules
|
||||||
|
----------
|
||||||
|
|
||||||
|
bmlsa.aligner module
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
.. automodule:: bmlsa.aligner
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
bmlsa.cli module
|
||||||
|
----------------
|
||||||
|
|
||||||
|
.. automodule:: bmlsa.cli
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
bmlsa.datatypes module
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
.. automodule:: bmlsa.datatypes
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
bmlsa.exceptions module
|
||||||
|
-----------------------
|
||||||
|
|
||||||
|
.. automodule:: bmlsa.exceptions
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
bmlsa.io module
|
||||||
|
---------------
|
||||||
|
|
||||||
|
.. automodule:: bmlsa.io
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
Module contents
|
||||||
|
---------------
|
||||||
|
|
||||||
|
.. automodule:: bmlsa
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
33
docs/source/conf.py
Normal file
33
docs/source/conf.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Configure system path
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.abspath("../src/"))
|
||||||
|
|
||||||
|
# Configuration file for the Sphinx documentation builder.
|
||||||
|
#
|
||||||
|
# For the full list of built-in configuration values, see the documentation:
|
||||||
|
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
||||||
|
|
||||||
|
# -- Project information -----------------------------------------------------
|
||||||
|
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
|
||||||
|
|
||||||
|
project = "BMLSA"
|
||||||
|
copyright = "2023, Harrison"
|
||||||
|
author = "Harrison"
|
||||||
|
|
||||||
|
# -- General configuration ---------------------------------------------------
|
||||||
|
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
||||||
|
|
||||||
|
extensions = ["sphinx.ext.autodoc"]
|
||||||
|
|
||||||
|
templates_path = ["_templates"]
|
||||||
|
exclude_patterns = []
|
||||||
|
|
||||||
|
|
||||||
|
# -- Options for HTML output -------------------------------------------------
|
||||||
|
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
|
||||||
|
|
||||||
|
html_theme = "alabaster"
|
||||||
|
html_static_path = ["_static"]
|
21
docs/source/index.rst
Normal file
21
docs/source/index.rst
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
.. BMLSA documentation master file, created by
|
||||||
|
sphinx-quickstart on Fri Apr 28 13:04:16 2023.
|
||||||
|
You can adapt this file completely to your liking, but it should at least
|
||||||
|
contain the root `toctree` directive.
|
||||||
|
|
||||||
|
Welcome to BMLSA's documentation!
|
||||||
|
=================================
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
:caption: Contents:
|
||||||
|
|
||||||
|
modules
|
||||||
|
|
||||||
|
|
||||||
|
Indices and tables
|
||||||
|
==================
|
||||||
|
|
||||||
|
* :ref:`genindex`
|
||||||
|
* :ref:`modindex`
|
||||||
|
* :ref:`search`
|
7
docs/source/modules.rst
Normal file
7
docs/source/modules.rst
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
bmlsa
|
||||||
|
=====
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 4
|
||||||
|
|
||||||
|
bmlsa
|
@ -3,6 +3,9 @@ channels:
|
|||||||
- conda-forge
|
- conda-forge
|
||||||
dependencies:
|
dependencies:
|
||||||
- biopython=1.81
|
- biopython=1.81
|
||||||
- build=0.7
|
- pytest=7.3
|
||||||
- pytest=7
|
- hypothesis=6.56
|
||||||
- twine=4
|
- twine=4
|
||||||
|
- python-build=0.10.0
|
||||||
|
- setuptools=67.6
|
||||||
|
- sphinx=6.2
|
11
setup.cfg
11
setup.cfg
@ -1,12 +1,17 @@
|
|||||||
[metadata]
|
[metadata]
|
||||||
name = bmlsa
|
name = bmlsa
|
||||||
version = 0.0.3
|
version = 0.0.4
|
||||||
|
|
||||||
[options]
|
[options]
|
||||||
packages = bmlsa
|
package_dir =
|
||||||
|
= src
|
||||||
install_requires =
|
install_requires =
|
||||||
biopython ==1.81
|
biopython ==1.81
|
||||||
|
|
||||||
[options.entry_points]
|
[options.entry_points]
|
||||||
console_scripts =
|
console_scripts =
|
||||||
bmlsa = bmlsa.cli:main
|
bmlsa = bmlsa.cli:main
|
||||||
|
|
||||||
|
[tool:pytest]
|
||||||
|
pythonpath = src
|
||||||
|
testpaths = tests
|
68
src/bmlsa/aligner.py
Normal file
68
src/bmlsa/aligner.py
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
import logging
|
||||||
|
from typing import Generator, Iterable
|
||||||
|
from Bio.Align import PairwiseAligner, substitution_matrices
|
||||||
|
from bmlsa.datatypes import QuerySequence
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def align_many_to_one_ssw(
|
||||||
|
reference_sequence: str,
|
||||||
|
queries: Iterable[QuerySequence],
|
||||||
|
extend_gap_score: int,
|
||||||
|
open_gap_score: int,
|
||||||
|
alignment_mode: str,
|
||||||
|
substitution_matrix: str = "BLOSUM62",
|
||||||
|
) -> Generator[tuple[QuerySequence, QuerySequence], None, None]:
|
||||||
|
"""Aligns :obj:`bmlsa.datatypes.QuerySequence` objects to a given reference sequence
|
||||||
|
|
||||||
|
:param reference_sequence: The reference sequence to align to
|
||||||
|
:type reference_sequence: str
|
||||||
|
:param queries: A iterable sequence of :obj:`bmlsa.datatypes.QuerySequence`
|
||||||
|
:type queries: Iterable[QuerySequence]
|
||||||
|
:param extend_gap_score: The gap score to use for alignment. Typically negative.
|
||||||
|
:type extend_gap_score: int
|
||||||
|
:param open_gap_score: The open gap score to use for alignment. Typically negative.
|
||||||
|
:type open_gap_score: int
|
||||||
|
:param alignment_mode: The alignment mode to use. Either "local" or "global".
|
||||||
|
:type alignment_mode: str
|
||||||
|
:param substitution_matrix: The name of the substitution matrix available in :mod:`Bio.Align`, defaults to "BLOSUM62"
|
||||||
|
:type substitution_matrix: str, optional
|
||||||
|
:yield: Pairs of :obj:`bmlsa.datatypes.QuerySequence` objects where the first is the original, and the second is the aligned version
|
||||||
|
:rtype: a generator of :obj:`bmlsa.datatypes.QuerySequence` objects
|
||||||
|
"""
|
||||||
|
# TODO Consider using the built in "scoring" parameter
|
||||||
|
aligner = PairwiseAligner()
|
||||||
|
aligner.substitution_matrix = substitution_matrices.load(substitution_matrix)
|
||||||
|
aligner.extend_gap_score = extend_gap_score
|
||||||
|
aligner.open_gap_score = open_gap_score
|
||||||
|
aligner.mode = alignment_mode
|
||||||
|
for query in queries:
|
||||||
|
try:
|
||||||
|
alignments = aligner.align(reference_sequence, query.sequence)
|
||||||
|
except ValueError:
|
||||||
|
logger.warning(
|
||||||
|
'Skipping sequence with id "%s" due to invalid characters', query.id
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
# TODO Implement comparison with input positions to choose best
|
||||||
|
for alignment in alignments:
|
||||||
|
score, query_aligned = (alignment.score, alignment.aligned[0][0])
|
||||||
|
aligned_start, aligned_end = query_aligned
|
||||||
|
yield QuerySequence(
|
||||||
|
query.id,
|
||||||
|
query.sequence,
|
||||||
|
query.name,
|
||||||
|
query.description,
|
||||||
|
query.start,
|
||||||
|
query.end,
|
||||||
|
query.score,
|
||||||
|
), QuerySequence(
|
||||||
|
query.id,
|
||||||
|
alignment.query,
|
||||||
|
query.name,
|
||||||
|
query.description,
|
||||||
|
int(aligned_start) + 1,
|
||||||
|
aligned_end,
|
||||||
|
score,
|
||||||
|
)
|
195
src/bmlsa/cli.py
Normal file
195
src/bmlsa/cli.py
Normal file
@ -0,0 +1,195 @@
|
|||||||
|
import os
|
||||||
|
import argparse
|
||||||
|
from Bio import SeqIO
|
||||||
|
import logging
|
||||||
|
from bmlsa.aligner import align_many_to_one_ssw
|
||||||
|
|
||||||
|
from bmlsa.io import queries_from_csv, save_alignments_to_csv
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
DEFAULT_ALIGNMENT_PARAMETERS = {
|
||||||
|
"BLASTp": {
|
||||||
|
"extend_gap_score": -1,
|
||||||
|
"open_gap_score": -11,
|
||||||
|
"substitution_matrix": "BLOSUM62",
|
||||||
|
"alignment_mode": "local",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def run(args):
|
||||||
|
if (
|
||||||
|
not (args.extend_gap_score and args.open_gap_score and args.alignment_mode)
|
||||||
|
and not args.behave_as
|
||||||
|
):
|
||||||
|
logger.error(
|
||||||
|
'Must either specify all of "--extend-gap-score", "--open-gap-score", '
|
||||||
|
'"--alignment-mode", or, at least "--behave-as". See help (-h) for '
|
||||||
|
"more information."
|
||||||
|
)
|
||||||
|
exit(3)
|
||||||
|
queries = queries_from_csv(
|
||||||
|
args.queries,
|
||||||
|
args.id_header,
|
||||||
|
args.seq_header,
|
||||||
|
args.name_header,
|
||||||
|
args.desc_header,
|
||||||
|
args.start_header,
|
||||||
|
args.end_header,
|
||||||
|
)
|
||||||
|
|
||||||
|
if args.behave_as and args.behave_as not in DEFAULT_ALIGNMENT_PARAMETERS:
|
||||||
|
logger.error('"--behave-as" received bad option.')
|
||||||
|
exit(3)
|
||||||
|
|
||||||
|
scoring_parameter = (
|
||||||
|
{
|
||||||
|
"extend_gap_score": args.extend_gap_score,
|
||||||
|
"open_gap_score": args.open_gap_score,
|
||||||
|
"alignment_mode": args.alignment_mode,
|
||||||
|
"substitution_matrix": args.substitution_matrix
|
||||||
|
if args.substitution_matrix is not None
|
||||||
|
else "BLOSUM62",
|
||||||
|
}
|
||||||
|
if args.behave_as not in DEFAULT_ALIGNMENT_PARAMETERS
|
||||||
|
else DEFAULT_ALIGNMENT_PARAMETERS[args.behave_as]
|
||||||
|
)
|
||||||
|
|
||||||
|
os.makedirs(args.output, exist_ok=True)
|
||||||
|
with open(args.sequence, "r") as sequence_fd:
|
||||||
|
for sequence in SeqIO.parse(sequence_fd, "fasta"):
|
||||||
|
aligned_queries = align_many_to_one_ssw(
|
||||||
|
str(sequence.seq), queries, **scoring_parameter
|
||||||
|
)
|
||||||
|
save_alignments_to_csv(
|
||||||
|
aligned_queries,
|
||||||
|
os.path.join(
|
||||||
|
args.output,
|
||||||
|
sequence.id.replace("|", "+").replace(".", "_") + ".csv",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
arg_parser = argparse.ArgumentParser("bmlsa")
|
||||||
|
arg_parser.add_argument(
|
||||||
|
"queries",
|
||||||
|
type=str,
|
||||||
|
help=(
|
||||||
|
"Path to CSV containing the sequences to align as well as the "
|
||||||
|
"queries for the respective sequences."
|
||||||
|
),
|
||||||
|
metavar="a",
|
||||||
|
)
|
||||||
|
arg_parser.add_argument(
|
||||||
|
"sequence",
|
||||||
|
type=str,
|
||||||
|
help=(
|
||||||
|
"Path to the sequence to use as reference in FASTA format. "
|
||||||
|
"If multiple sequences are present in the same FASTA file, "
|
||||||
|
"each will be used as a separate reference sequence for separate "
|
||||||
|
"runs automatically."
|
||||||
|
),
|
||||||
|
metavar="s",
|
||||||
|
)
|
||||||
|
arg_parser.add_argument(
|
||||||
|
"output", type=str, help="Path to output location", metavar="o"
|
||||||
|
)
|
||||||
|
arg_parser.add_argument(
|
||||||
|
"-I",
|
||||||
|
"--id-header",
|
||||||
|
type=str,
|
||||||
|
help="The header of the column for the ID of the sequence to align to "
|
||||||
|
"the reference sequence.",
|
||||||
|
required=True,
|
||||||
|
)
|
||||||
|
arg_parser.add_argument(
|
||||||
|
"-N",
|
||||||
|
"--name-header",
|
||||||
|
type=str,
|
||||||
|
help="The header of the column for the name of the sequence to align to "
|
||||||
|
"the reference sequence.",
|
||||||
|
required=False,
|
||||||
|
)
|
||||||
|
arg_parser.add_argument(
|
||||||
|
"-D",
|
||||||
|
"--desc-header",
|
||||||
|
type=str,
|
||||||
|
help="The header of the column for the description of the sequence to "
|
||||||
|
"align to the reference sequence.",
|
||||||
|
required=False,
|
||||||
|
)
|
||||||
|
arg_parser.add_argument(
|
||||||
|
"-T",
|
||||||
|
"--start-header",
|
||||||
|
type=str,
|
||||||
|
help="The header of the column for the start position of the sequence to "
|
||||||
|
"align to the reference sequence.",
|
||||||
|
required=False,
|
||||||
|
)
|
||||||
|
arg_parser.add_argument(
|
||||||
|
"-E",
|
||||||
|
"--end-header",
|
||||||
|
type=str,
|
||||||
|
help="The header of the column for end position of the sequence to "
|
||||||
|
"align to the reference sequence.",
|
||||||
|
required=False,
|
||||||
|
)
|
||||||
|
arg_parser.add_argument(
|
||||||
|
"-S",
|
||||||
|
"--seq-header",
|
||||||
|
type=str,
|
||||||
|
help="The header of the column for the actual sequence to align to the "
|
||||||
|
"reference sequence.",
|
||||||
|
required=True,
|
||||||
|
)
|
||||||
|
arg_parser.add_argument(
|
||||||
|
"-e",
|
||||||
|
"--extend-gap-score",
|
||||||
|
type=int,
|
||||||
|
help="The scoring for extending a gap.",
|
||||||
|
required=False,
|
||||||
|
default=None,
|
||||||
|
)
|
||||||
|
arg_parser.add_argument(
|
||||||
|
"-o",
|
||||||
|
"--open-gap-score",
|
||||||
|
type=int,
|
||||||
|
help="The scoring for opening a gap.",
|
||||||
|
required=False,
|
||||||
|
default=None,
|
||||||
|
)
|
||||||
|
arg_parser.add_argument(
|
||||||
|
"-M",
|
||||||
|
"--alignment-mode",
|
||||||
|
type=str,
|
||||||
|
help="The alignment mode.",
|
||||||
|
choices=["local", "global"],
|
||||||
|
required=False,
|
||||||
|
)
|
||||||
|
arg_parser.add_argument(
|
||||||
|
"-m",
|
||||||
|
"--substitution-matrix",
|
||||||
|
type=str,
|
||||||
|
help="The name of the substitution matrix.",
|
||||||
|
required=False,
|
||||||
|
default=None,
|
||||||
|
)
|
||||||
|
arg_parser.add_argument(
|
||||||
|
"-B",
|
||||||
|
"--behave-as",
|
||||||
|
type=str,
|
||||||
|
help="Use built-in parameters for alignment scoring. If this is specified "
|
||||||
|
"along either of the scoring arguments, the alignment parameter arguments "
|
||||||
|
'("--open-gap-score" and/or "--extend-gab-score" and "--alignment-mode") '
|
||||||
|
"will override any defaults set by this argument.",
|
||||||
|
required=False,
|
||||||
|
default=None,
|
||||||
|
)
|
||||||
|
args = arg_parser.parse_args()
|
||||||
|
run(args)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
65
src/bmlsa/datatypes.py
Normal file
65
src/bmlsa/datatypes.py
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
class QuerySequence:
|
||||||
|
"""Represents a sequence that may be aligned."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
id: str,
|
||||||
|
sequence: str,
|
||||||
|
name: str = None,
|
||||||
|
description: str = None,
|
||||||
|
start: int = None,
|
||||||
|
end: int = None,
|
||||||
|
score: int = None,
|
||||||
|
) -> None:
|
||||||
|
"""Instantiates a :obj:bmlsa.datatypes.QuerySequence object
|
||||||
|
|
||||||
|
:param id: The id of the query sequence
|
||||||
|
:type id: str
|
||||||
|
:param sequence: The sequence itself
|
||||||
|
:type sequence: str
|
||||||
|
:param name: The name of the sequence, defaults to None
|
||||||
|
:type name: str, optional
|
||||||
|
:param description: The description of the sequence, defaults to None
|
||||||
|
:type description: str, optional
|
||||||
|
:param start: The start of the sequence, defaults to None
|
||||||
|
:type start: int, optional
|
||||||
|
:param end: The end of the sequence, defaults to None
|
||||||
|
:type end: int, optional
|
||||||
|
:param score: The alignment score of the sequence, defaults to None
|
||||||
|
:type score: int, optional
|
||||||
|
"""
|
||||||
|
self._description = description
|
||||||
|
self._start = start
|
||||||
|
self._end = end
|
||||||
|
self._id = id
|
||||||
|
self._name = name
|
||||||
|
self._sequence = sequence.replace("\n", "").replace("\r", "")
|
||||||
|
self._score = score
|
||||||
|
|
||||||
|
@property
|
||||||
|
def start(self):
|
||||||
|
return self._start
|
||||||
|
|
||||||
|
@property
|
||||||
|
def end(self):
|
||||||
|
return self._end
|
||||||
|
|
||||||
|
@property
|
||||||
|
def id(self):
|
||||||
|
return self._id
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self):
|
||||||
|
return self._name
|
||||||
|
|
||||||
|
@property
|
||||||
|
def description(self):
|
||||||
|
return self._description
|
||||||
|
|
||||||
|
@property
|
||||||
|
def sequence(self):
|
||||||
|
return self._sequence
|
||||||
|
|
||||||
|
@property
|
||||||
|
def score(self):
|
||||||
|
return self._score
|
@ -1,44 +1,63 @@
|
|||||||
import csv
|
import csv
|
||||||
|
from typing import Generator, Iterable
|
||||||
|
|
||||||
from bmlsa.datatypes import AlignedSequence
|
from bmlsa.datatypes import QuerySequence
|
||||||
|
|
||||||
|
|
||||||
def read_annotations_from_csv(
|
def queries_from_csv(
|
||||||
csv_path: str,
|
csv_path: str,
|
||||||
id_header: str,
|
id_header: str,
|
||||||
name_header: str,
|
|
||||||
desc_header: str,
|
|
||||||
start_header: str,
|
|
||||||
end_header: str,
|
|
||||||
sequence_header: str,
|
sequence_header: str,
|
||||||
):
|
name_header: str = None,
|
||||||
annotations = {}
|
desc_header: str = None,
|
||||||
|
start_header: str = None,
|
||||||
|
end_header: str = None,
|
||||||
|
) -> Generator[QuerySequence, None, None]:
|
||||||
|
"""Generates and :obj:`bmlsa.datatypes.QuerySequence` instances from a CSV file.
|
||||||
|
|
||||||
|
:param csv_path: Path to CSV to use
|
||||||
|
:type csv_path: str
|
||||||
|
:param id_header: The column title for the unique identifier for each query sequence
|
||||||
|
:type id_header: str
|
||||||
|
:param sequence_header: The column title for the sequences themselves
|
||||||
|
:type sequence_header: str
|
||||||
|
:param name_header: The column title for the name of the sequence, defaults to None
|
||||||
|
:type name_header: str, optional
|
||||||
|
:param desc_header: The column title for the description of the sequence, defaults to None
|
||||||
|
:type desc_header: str, optional
|
||||||
|
:param start_header: The column title for the start position of the sequence, defaults to None
|
||||||
|
:type start_header: str, optional
|
||||||
|
:param end_header: The column title for the end position of the sequence, defaults to None
|
||||||
|
:type end_header: str, optional
|
||||||
|
:yield: One :obj:`bmlsa.datatypes.QuerySequence` for each row
|
||||||
|
:rtype: A generator that yields objects of :class:`bmlsa.datatypes.QuerySequence`
|
||||||
|
"""
|
||||||
with open(csv_path, "r") as csv_fd:
|
with open(csv_path, "r") as csv_fd:
|
||||||
reader = csv.reader(csv_fd)
|
reader = csv.reader(csv_fd)
|
||||||
id_ind = None
|
id_ind = None
|
||||||
|
sequence_ind = None
|
||||||
name_ind = None
|
name_ind = None
|
||||||
desc_ind = None
|
desc_ind = None
|
||||||
start_ind = None
|
start_ind = None
|
||||||
end_ind = None
|
end_ind = None
|
||||||
sequence_ind = None
|
|
||||||
headers_parsed = False
|
headers_parsed = False
|
||||||
for row in reader:
|
for row in reader:
|
||||||
if not headers_parsed:
|
if not headers_parsed:
|
||||||
id_ind = row.index(id_header)
|
id_ind = row.index(id_header)
|
||||||
|
sequence_ind = row.index(sequence_header)
|
||||||
name_ind = row.index(name_header) if name_header else None
|
name_ind = row.index(name_header) if name_header else None
|
||||||
desc_ind = row.index(desc_header) if desc_header else None
|
desc_ind = row.index(desc_header) if desc_header else None
|
||||||
start_ind = row.index(start_header) if start_header else None
|
start_ind = row.index(start_header) if start_header else None
|
||||||
end_ind = row.index(end_header) if end_header else None
|
end_ind = row.index(end_header) if end_header else None
|
||||||
sequence_ind = row.index(sequence_header)
|
|
||||||
headers_parsed = True
|
headers_parsed = True
|
||||||
continue
|
continue
|
||||||
id = row[id_ind]
|
id = row[id_ind]
|
||||||
|
sequence = row[sequence_ind]
|
||||||
name = row[name_ind] if name_header else None
|
name = row[name_ind] if name_header else None
|
||||||
desc = row[desc_ind] if desc_header else None
|
desc = row[desc_ind] if desc_header else None
|
||||||
start = row[start_ind] if start_header else None
|
start = row[start_ind] if start_header else None
|
||||||
end = row[end_ind] if end_header else None
|
end = row[end_ind] if end_header else None
|
||||||
sequence = row[sequence_ind]
|
yield QuerySequence(
|
||||||
annotations[id] = AlignedSequence(
|
|
||||||
id,
|
id,
|
||||||
sequence,
|
sequence,
|
||||||
name,
|
name,
|
||||||
@ -46,18 +65,23 @@ def read_annotations_from_csv(
|
|||||||
int(start) if start else None,
|
int(start) if start else None,
|
||||||
int(end) if end else None,
|
int(end) if end else None,
|
||||||
)
|
)
|
||||||
return annotations
|
|
||||||
|
|
||||||
|
|
||||||
def save_alignments_to_csv(
|
def save_alignments_to_csv(
|
||||||
aligned_pairs: dict[str, tuple[AlignedSequence, AlignedSequence]], output_path: str
|
aligned_pairs: Iterable[tuple[QuerySequence, QuerySequence]], output_path: str
|
||||||
):
|
) -> None:
|
||||||
|
"""Saves alignments to a CSV.
|
||||||
|
|
||||||
|
:param aligned_pairs: An iterable of the original sequence and aligned sequences
|
||||||
|
:type aligned_pairs: Iterable[tuple[AlignedSequence, AlignedSequence]]
|
||||||
|
:param output_path: A path to the output directory
|
||||||
|
:type output_path: str
|
||||||
|
"""
|
||||||
with open(output_path, "w") as output_fd:
|
with open(output_path, "w") as output_fd:
|
||||||
writer = csv.writer(output_fd)
|
writer = csv.writer(output_fd)
|
||||||
header_wrote = False
|
header_wrote = False
|
||||||
header_order = None
|
header_order = None
|
||||||
for id, annotations in aligned_pairs.items():
|
for original, aligned in aligned_pairs:
|
||||||
original, aligned = annotations
|
|
||||||
original_vars = vars(original)
|
original_vars = vars(original)
|
||||||
aligned_vars = vars(aligned)
|
aligned_vars = vars(aligned)
|
||||||
if not header_wrote:
|
if not header_wrote:
|
51
tests/bmlsa/test_aligner.py
Normal file
51
tests/bmlsa/test_aligner.py
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
import pytest
|
||||||
|
from Bio import SeqIO
|
||||||
|
from bmlsa.aligner import align_many_to_one_ssw
|
||||||
|
from bmlsa.cli import DEFAULT_ALIGNMENT_PARAMETERS
|
||||||
|
from bmlsa.datatypes import QuerySequence
|
||||||
|
from collections.abc import Iterable
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def reference_sequence():
|
||||||
|
return str(
|
||||||
|
list(SeqIO.parse("tests/resources/NC_045512_coding.fasta", "fasta"))[0].seq
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def queries():
|
||||||
|
return [
|
||||||
|
QuerySequence(
|
||||||
|
"ORF10",
|
||||||
|
"ATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAAT"
|
||||||
|
"GAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAG",
|
||||||
|
start=29558,
|
||||||
|
end=29674,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_align_many_to_one_returns_data(reference_sequence, queries):
|
||||||
|
results = align_many_to_one_ssw(
|
||||||
|
reference_sequence, queries, **DEFAULT_ALIGNMENT_PARAMETERS["BLASTp"]
|
||||||
|
)
|
||||||
|
assert isinstance(results, Iterable)
|
||||||
|
|
||||||
|
|
||||||
|
def test_align_many_to_one_returns_correct_data_structure(reference_sequence, queries):
|
||||||
|
results = align_many_to_one_ssw(
|
||||||
|
reference_sequence, queries, **DEFAULT_ALIGNMENT_PARAMETERS["BLASTp"]
|
||||||
|
)
|
||||||
|
for original, aligned_seq in results:
|
||||||
|
assert isinstance(original, QuerySequence)
|
||||||
|
assert isinstance(aligned_seq, QuerySequence)
|
||||||
|
|
||||||
|
|
||||||
|
def test_align_many_to_one_returns_correct_data(reference_sequence, queries):
|
||||||
|
results = align_many_to_one_ssw(
|
||||||
|
reference_sequence, queries, **DEFAULT_ALIGNMENT_PARAMETERS["BLASTp"]
|
||||||
|
)
|
||||||
|
for original, aligned_seq in results:
|
||||||
|
assert original.start == aligned_seq.start
|
||||||
|
assert original.end == aligned_seq.end
|
31
tests/bmlsa/test_io.py
Normal file
31
tests/bmlsa/test_io.py
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
from csv import reader
|
||||||
|
from os import path
|
||||||
|
from bmlsa.datatypes import QuerySequence
|
||||||
|
from bmlsa.io import queries_from_csv, save_alignments_to_csv
|
||||||
|
from collections.abc import Iterable
|
||||||
|
|
||||||
|
|
||||||
|
def test_queries_from_csv_has_data():
|
||||||
|
results = queries_from_csv("tests/resources/SARS_CoV-2_genes.csv", "id", "sequence")
|
||||||
|
assert isinstance(results, Iterable)
|
||||||
|
|
||||||
|
|
||||||
|
def test_queries_from_csv_data_valid():
|
||||||
|
results = queries_from_csv("tests/resources/SARS_CoV-2_genes.csv", "id", "sequence")
|
||||||
|
for aligned_seq in results:
|
||||||
|
assert isinstance(aligned_seq.id, str)
|
||||||
|
assert isinstance(aligned_seq.sequence, str)
|
||||||
|
|
||||||
|
|
||||||
|
def test_save_alignments_to_csv_produces_correct_headers_in_csv(tmpdir):
|
||||||
|
output_path = path.join(tmpdir, "alignment_results.csv")
|
||||||
|
dummy_sequence = QuerySequence("DUMMY", "ATACTGGAAAA", name="test_sequence")
|
||||||
|
alignments = [(dummy_sequence, dummy_sequence)]
|
||||||
|
save_alignments_to_csv(alignments, output_path)
|
||||||
|
|
||||||
|
with open(output_path, "r") as csv_fd:
|
||||||
|
results = list(reader(csv_fd))
|
||||||
|
vars_to_check = list(vars(dummy_sequence).keys())
|
||||||
|
for var_to_check in vars_to_check:
|
||||||
|
assert "original" + var_to_check in results[0]
|
||||||
|
assert "aligned" + var_to_check in results[0]
|
430
tests/resources/NC_045512_coding.fasta
Normal file
430
tests/resources/NC_045512_coding.fasta
Normal file
@ -0,0 +1,430 @@
|
|||||||
|
>NC_045512.2 Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome
|
||||||
|
ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAA
|
||||||
|
CGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAAC
|
||||||
|
TAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTG
|
||||||
|
TTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTC
|
||||||
|
CCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTAC
|
||||||
|
GTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGG
|
||||||
|
CTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGAT
|
||||||
|
GCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAGTACGGTC
|
||||||
|
GTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGTGGCTTACCGCAAGGTTCT
|
||||||
|
TCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTA
|
||||||
|
GGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTG
|
||||||
|
TTACCCGTGAACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGG
|
||||||
|
CCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCATGCACTTTG
|
||||||
|
TCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCGTGAACATGAGCATGAAATTG
|
||||||
|
CTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAA
|
||||||
|
ATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAA
|
||||||
|
CCAAGGGTTGAAAAGAAAAAGCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCAC
|
||||||
|
CAAATGAATGCAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCA
|
||||||
|
GACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGAAGGTGCCACT
|
||||||
|
ACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAG
|
||||||
|
GACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCATTCTTCGTAAGGGTGGTCG
|
||||||
|
CACTATTGCCTTTGGAGGCTGTGTGTTCTCTTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCA
|
||||||
|
CGTGCTAGCGCTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACA
|
||||||
|
ACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGA
|
||||||
|
GATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGAT
|
||||||
|
TATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAAGGAAAAGCTAAAAAAG
|
||||||
|
GTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCG
|
||||||
|
TGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCC
|
||||||
|
GCTATAACAATACTAGATGGAATTTCACAGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTG
|
||||||
|
ATTTGGCTACTAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTG
|
||||||
|
GCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGAAGAGAAGTTT
|
||||||
|
AAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAA
|
||||||
|
TTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCTTTAAGCTTGT
|
||||||
|
AAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATTTA
|
||||||
|
GGTGAAACATTTGTCACGCACTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCC
|
||||||
|
TACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTT
|
||||||
|
AACAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAA
|
||||||
|
GCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACACAGAAAAGT
|
||||||
|
ACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTCACACTCAAAGGCGGTGCACCAACAAA
|
||||||
|
GGTTACTTTTGGTGATGACACTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTT
|
||||||
|
GATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAA
|
||||||
|
ATGAGTTCGCCTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACC
|
||||||
|
ACTGGGCATTGATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGGTGAGTTTAAA
|
||||||
|
TTGGCTTCACATATGTATTGTTCTTTCTACCCTCCAGATGAGGATGAAGAAGAAGGTGATTGTGAAGAAG
|
||||||
|
AAGAGTTTGAGCCATCAACTCAATATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATT
|
||||||
|
TGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATTGGTTAGATGATGATAGTCAA
|
||||||
|
CAAACTGTTGGTCAACAAGACGGCAGTGAGGACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTC
|
||||||
|
AACCTCAATTAGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTGGTTATTT
|
||||||
|
AAAACTTACTGACAATGTATACATTAAAAATGCAGACATTGTGGAAGAAGCTAAAAAGGTAAAACCAACA
|
||||||
|
GTGGTTGTTAATGCAGCCAATGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTA
|
||||||
|
CTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGACCACTTAAAGTGGGTGGTAG
|
||||||
|
TTGTGTTTTAAGCGGACACAATCTTGCTAAACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGT
|
||||||
|
GAAGACATTCAACTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTTGCACCATTAT
|
||||||
|
TATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTTAAGAGTTTGTGTAGATACTGTTCGCACAAA
|
||||||
|
TGTCTACTTAGCTGTCTTTGATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGT
|
||||||
|
GAAAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAAGCCATTTATAACTGAAAGTA
|
||||||
|
AACCTTCAGTTGAACAGAGAAAACAAGATGATAAGAAAATCAAAGCTTGTGTTGAAGAAGTTACAACAAC
|
||||||
|
TCTGGAAGAAACTAAGTTCCTCACAGAAAACTTGTTACTTTATATTGACATTAATGGCAATCTTCATCCA
|
||||||
|
GATTCTGCCACTCTTGTTAGTGACATTGACATCACTTTCTTAAAGAAAGATGCTCCATATATAGTGGGTG
|
||||||
|
ATGTTGTTCAAGAGGGTGTTTTAACTGCTGTGGTTATACCTACTAAAAAGGCTGGTGGCACTACTGAAAT
|
||||||
|
GCTAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCACTTACCCGGGTCAGGGTTTAAAT
|
||||||
|
GGTTACACTGTAGAGGAGGCAAAGACAGTGCTTAAAAAGTGTAAAAGTGCCTTTTACATTCTACCATCTA
|
||||||
|
TTATCTCTAATGAGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAAATGCTTGCACATGC
|
||||||
|
AGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACTAAAGCCATAGTTTCAACTATACAGCGTAAA
|
||||||
|
TATAAGGGTATTAAAATACAAGAGGGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAA
|
||||||
|
CAACTGTAGCGTCACTTATCAACACACTTAACGATCTAAATGAAACTCTTGTTACAATGCCACTTGGCTA
|
||||||
|
TGTAACACATGGCTTAAATTTGGAAGAAGCTGCTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTT
|
||||||
|
TCTGTTTCTTCACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAAACACCTGAAG
|
||||||
|
AACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAAAGATTGGTCCTATTCTGGACAATCTACACA
|
||||||
|
ACTAGGTATAGAATTTCTTAAGAGAGGTGATAAAAGTGTATATTACACTAGTAATCCTACCACATTCCAC
|
||||||
|
CTAGATGGTGAAGTTATCACCTTTGACAATCTTAAGACACTTCTTTCTTTGAGAGAAGTGAGGACTATTA
|
||||||
|
AGGTGTTTACAACAGTAGACAACATTAACCTCCACACGCAAGTTGTGGACATGTCAATGACATATGGACA
|
||||||
|
ACAGTTTGGTCCAACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTCACATGAAGGT
|
||||||
|
AAAACATTTTATGTTTTACCTAATGATGACACTCTACGTGTTGAGGCTTTTGAGTACTACCACACAACTG
|
||||||
|
ATCCTAGTTTTCTGGGTAGGTACATGTCAGCATTAAATCACACTAAAAAGTGGAAATACCCACAAGTTAA
|
||||||
|
TGGTTTAACTTCTATTAAATGGGCAGATAACAACTGTTATCTTGCCACTGCATTGTTAACACTCCAACAA
|
||||||
|
ATAGAGTTGAAGTTTAATCCACCTGCTCTACAAGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTA
|
||||||
|
ACTTTTGTGCACTTATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTAGAGAAACAAT
|
||||||
|
GAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGCAAAAGAGTCTTGAACGTGGTGTGTAAAACTTGT
|
||||||
|
GGACAACAGCAGACAACCCTTAAGGGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTATGAACAAT
|
||||||
|
TTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAAAATATCTAGTACAACAGGAGTC
|
||||||
|
ACCTTTTGTTATGATGTCAGCACCACCTGCTCAGTATGAACTTAAGCATGGTACATTTACTTGTGCTAGT
|
||||||
|
GAGTACACTGGTAATTACCAGTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTGTATTGCATAG
|
||||||
|
ACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGGTCCTATTACGGATGTTTTCTACAAAGAAAACAG
|
||||||
|
TTACACAACAACCATAAAACCAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTAAG
|
||||||
|
TTGGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAATTGATCTTGTACCAAACCAAC
|
||||||
|
CATATCCAAACGCAAGCTTCGATAATTTTAAGTTTGTATGTGATAATATCAAATTTGCTGATGATTTAAA
|
||||||
|
CCAGTTAACTGGTTATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTACATTTTTCCCTGACTTAAATGGT
|
||||||
|
GATGTGGTGGCTATTGATTATAAACACTACACACCCTCTTTTAAGAAAGGAGCTAAATTGTTACATAAAC
|
||||||
|
CTATTGTTTGGCATGTTAACAATGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTG
|
||||||
|
TCTTTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGAAGTCAGAGGACGCGCAGGGA
|
||||||
|
ATGGATAATCTTGCCTGCGAAGATCTAAAACCAGTCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGA
|
||||||
|
AAGACGTTCTTGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAACCAGCAAATAA
|
||||||
|
TAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTAATGGCTGCTTATGTAGACAATTCTAGTCTT
|
||||||
|
ACTATTAAGAAACCTAATGAATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTG
|
||||||
|
CTGTTAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTTCTTAACAAAGTTGTTAGTAC
|
||||||
|
AACTACTAACATAGTTACACGGTGTTTAAACCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTA
|
||||||
|
TTGCTACAATTGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCGACTACTATAG
|
||||||
|
CAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGAGGCTTCATTTAATTATTTGAAGTCACCTAA
|
||||||
|
TTTTTCTAAACTGATAAATATTATAATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTAC
|
||||||
|
TCAACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTACTGTACTGGTTACAGAGAAG
|
||||||
|
GCTATTTGAACTCTACTAATGTCACTATTGCAACCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCT
|
||||||
|
TAGTGGTTTAGATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTCATCTTTTAAA
|
||||||
|
TGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTGGCATATATTCTTTTCACTAGGTTTTTCT
|
||||||
|
ATGTACTTGGATTGGCTGCAATCATGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTC
|
||||||
|
TTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAGCTATGGTTAGAATGTACATC
|
||||||
|
TTCTTTGCATCATTTTATTATGTATGGAAAAGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTT
|
||||||
|
GTATGATGTGTTACAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATGGTGTTAGAAG
|
||||||
|
GTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGCAAACTACACAATTGGAATTGTGTTAATTGT
|
||||||
|
GATACATTCTGTGCTGGTAGTACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAA
|
||||||
|
GACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACAGTGAAGAATGGTTCCATCCA
|
||||||
|
TCTTTACTTTGATAAAGCTGGTCAAAAGACTTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGAC
|
||||||
|
AACCTGAGAGCTAATAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGTAAATCAAAAT
|
||||||
|
GTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAGTCAGCTTATGTGTCAACCTATACTGTTACT
|
||||||
|
AGATCAGGCATTAGTGTCTGATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTT
|
||||||
|
AATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACTAGTTGCAACTGCAGAAGCTG
|
||||||
|
AACTTGCAAAGAATGTGTCCTTAGACAATGTCTTATCTACTTTTATTTCAGCAGCTCGGCAAGGGTTTGT
|
||||||
|
TGATTCAGATGTAGAAACTAAAGATGTTGTTGAATGTCTTAAATTGTCACATCAATCTGACATAGAAGTT
|
||||||
|
ACTGGCGATAGTTGTAATAACTATATGCTCACCTATAACAAAGTTGAAAACATGACACCCCGTGACCTTG
|
||||||
|
GTGCTTGTATTGACTGTAGTGCGCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTTGAT
|
||||||
|
ATGGAACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAATACGTAGTGCTGCTAAAAAG
|
||||||
|
AATAACTTACCTTTTAAGTTGACATGTGCAACTACTAGACAAGTTGTTAATGTTGTAACAACAAAGATAG
|
||||||
|
CACTTAAGGGTGGTAAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACACTTGTGTTCCTTTT
|
||||||
|
TGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTCATGTCTAAACATACTGACTTTTCAAGTGAA
|
||||||
|
ATCATAGGATACAAGGCTATTGATGGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTA
|
||||||
|
ACAAACATGCTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTATACTAATGACAAAGCTTGCCC
|
||||||
|
ATTGATTGCTGCAGTCATAACAAGAGAAGTGGGTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGC
|
||||||
|
ACAACTAATGGTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATCTGTTACACAC
|
||||||
|
CATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGCTTGTGTTTTGGCTGCTGAATGTACAATTTT
|
||||||
|
TAAAGATGCTTCTGGTAAGCCAGTACCATATTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTAT
|
||||||
|
GAAAGTTTACGCCCTGACACACGTTATGTGCTCATGGATGGCTCTATTATTCAATTTCCTAACACCTACC
|
||||||
|
TTGAAGGTTCTGTTAGAGTGGTAACAACTTTTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATC
|
||||||
|
AGAAGCTGGTGTTTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAGATCTTTACCA
|
||||||
|
GGAGTTTTCTGTGGTGTAGATGCTGTAAATTTACTTACTAATATGTTTACACCACTAATTCAACCTATTG
|
||||||
|
GTGCTTTGGACATATCAGCATCTATAGTAGCTGGTGGTATTGTAGCTATCGTAGTAACATGCCTTGCCTA
|
||||||
|
CTATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCATGTAGTTGCCTTTAATACTTTACTATTC
|
||||||
|
CTTATGTCATTCACTGTACTCTGTTTAACACCAGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTT
|
||||||
|
ACTTGTACTTGACATTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGATGGTTATGTT
|
||||||
|
CACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATCATTTGTATTTCCACAAAGCATTTCTATTGG
|
||||||
|
TTCTTTAGTAATTACCTAAAGAGACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAAGCTG
|
||||||
|
CGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGTGATGTGCTATTACCTCTTAC
|
||||||
|
GCAATATAATAGATACTTAGCTCTTTATAATAAGTACAAGTATTTTAGTGGAGCAATGGATACAACTAGC
|
||||||
|
TACAGAGAAGCTGCTTGTTGTCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCAGGTTCTGATGTTC
|
||||||
|
TTTACCAACCACCACAAACCTCTATCACCTCAGCTGTTTTGCAGAGTGGTTTTAGAAAAATGGCATTCCC
|
||||||
|
ATCTGGTAAAGTTGAGGGTTGTATGGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGCTT
|
||||||
|
GATGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGACATGCTTAACCCTAATTATGAAG
|
||||||
|
ATTTACTCATTCGTAAGTCTAATCATAATTTCTTGGTACAGGCTGGTAATGTTCAACTCAGGGTTATTGG
|
||||||
|
ACATTCTATGCAAAATTGTGTACTTAAGCTTAAGGTTGATACAGCCAATCCTAAGACACCTAAGTATAAG
|
||||||
|
TTTGTTCGCATTCAACCAGGACAGACTTTTTCAGTGTTAGCTTGTTACAATGGTTCACCATCTGGTGTTT
|
||||||
|
ACCAATGTGCTATGAGGCCCAATTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGG
|
||||||
|
TTTTAACATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGGAATTACCAACTGGAGTTCAT
|
||||||
|
GCTGGCACAGACTTAGAAGGTAACTTTTATGGACCTTTTGTTGACAGGCAAACAGCACAAGCAGCTGGTA
|
||||||
|
CGGACACAACTATTACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTATAAATGGAGACAGGTGGTT
|
||||||
|
TCTCAATCGATTTACCACAACTCTTAATGACTTTAACCTTGTGGCTATGAAGTACAATTATGAACCTCTA
|
||||||
|
ACACAAGACCATGTTGACATACTAGGACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTG
|
||||||
|
CTTCATTAAAAGAATTACTGCAAAATGGTATGAATGGACGTACCATATTGGGTAGTGCTTTATTAGAAGA
|
||||||
|
TGAATTTACACCTTTTGATGTTGTTAGACAATGCTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACA
|
||||||
|
ATCAAGGGTACACACCACTGGTTGTTACTCACAATTTTGACTTCACTTTTAGTTTTAGTCCAGAGTACTC
|
||||||
|
AATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTTACCTTTTGCTATGGGTATTATTGCTATGTC
|
||||||
|
TGCTTTTGCAATGATGTTTGTCAAACATAAGCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCC
|
||||||
|
ACTGTAGCTTATTTTAATATGGTCTATATGCCTGCTAGTTGGGTGATGCGTATTATGACATGGTTGGATA
|
||||||
|
TGGTTGATACTAGTTTGTCTGGTTTTAAGCTAAAAGACTGTGTTATGTATGCATCAGCTGTAGTGTTACT
|
||||||
|
AATCCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTG
|
||||||
|
ACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCT
|
||||||
|
CTGTTACTTCTAACTACTCAGGTGTAGTTACAACTGTCATGTTTTTGGCCAGAGGTATTGTTTTTATGTG
|
||||||
|
TGTTGAGTATTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTATAATGCTAGTTTATTGTTTC
|
||||||
|
TTAGGCTATTTTTGTACTTGTTACTTTGGCCTCTTTTGTTTACTCAACCGCTACTTTAGACTGACTCTTG
|
||||||
|
GTGTTTATGATTACTTAGTTTCTACACAGGAGTTTAGATATATGAATTCACAGGGACTACTCCCACCCAA
|
||||||
|
GAATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTGGGTGTTGGTGGCAAACCTTGTATCAAAGTA
|
||||||
|
GCCACTGTACAGTCTAAAATGTCAGATGTAAAGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAAC
|
||||||
|
TCAGAGTAGAATCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACACAATGACATTCTCTTAGCTAA
|
||||||
|
AGATACTACTGAAGCCTTTGAAAAAATGGTTTCACTACTTTCTGTTTTGCTTTCCATGCAGGGTGCTGTA
|
||||||
|
GACATAAACAAGCTTTGTGAAGAAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCCTCAGAGTTTA
|
||||||
|
GTTCCCTTCCATCATATGCAGCTTTTGCTACTGCTCAAGAAGCTTATGAGCAGGCTGTTGCTAATGGTGA
|
||||||
|
TTCTGAAGTTGTTCTTAAAAAGTTGAAGAAGTCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCA
|
||||||
|
GCCATGCAACGTAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAATGTATAAACAGGCTAGATCTG
|
||||||
|
AGGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGACAATGCTTTTCACTATGCTTAGAAAGTTGGATAA
|
||||||
|
TGATGCACTCAACAACATTATCAACAATGCAAGAGATGGTTGTGTTCCCTTGAACATAATACCTCTTACA
|
||||||
|
ACAGCAGCCAAACTAATGGTTGTCATACCAGACTATAACACATATAAAAATACGTGTGATGGTACAACAT
|
||||||
|
TTACTTATGCATCAGCATTGTGGGAAATCCAACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAG
|
||||||
|
TGAAATTAGTATGGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAAGGGCCAATTCT
|
||||||
|
GCTGTCAAATTACAGAATAATGAGCTTAGTCCTGTTGCACTACGACAGATGTCTTGTGCTGCCGGTACTA
|
||||||
|
CACAAACTGCTTGCACTGATGACAATGCGTTAGCTTACTACAACACAACAAAGGGAGGTAGGTTTGTACT
|
||||||
|
TGCACTGTTATCCGATTTACAGGATTTGAAATGGGCTAGATTCCCTAAGAGTGATGGAACTGGTACTATC
|
||||||
|
TATACAGAACTGGAACCACCTTGTAGGTTTGTTACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTAT
|
||||||
|
ACTTTATTAAAGGATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGTTTAGCTGCCACAGTACGTCT
|
||||||
|
ACAAGCTGGTAATGCAACAGAAGTGCCTGCCAATTCAACTGTATTATCTTTCTGTGCTTTTGCTGTAGAT
|
||||||
|
GCTGCTAAAGCTTACAAAGATTATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTTAAGATGTTGT
|
||||||
|
GTACACACACTGGTACTGGTCAGGCAATAACAGTTACACCGGAAGCCAATATGGATCAAGAATCCTTTGG
|
||||||
|
TGGTGCATCGTGTTGTCTGTACTGCCGTTGCCACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTA
|
||||||
|
AAAGGTAAGTATGTACAAATACCTACAACTTGTGCTAATGACCCTGTGGGTTTTACACTTAAAAACACAG
|
||||||
|
TCTGTACCGTCTGCGGTATGTGGAAAGGTTATGGCTGTAGTTGTGATCAACTCCGCGAACCCATGCTTCA
|
||||||
|
GTCAGCTGATGCACAATCGTTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGTGCGGCA
|
||||||
|
CAGGCACTAGTACTGATGTCGTATACAGGGCTTTTGACATCTACAATGATAAAGTAGCTGGTTTTGCTAA
|
||||||
|
ATTCCTAAAAACTAATTGTTGTCGCTTCCAAGAAAAGGACGAAGATGACAATTTAATTGATTCTTACTTT
|
||||||
|
GTAGTTAAGAGACACACTTTCTCTAACTACCAACATGAAGAAACAATTTATAATTTACTTAAGGATTGTC
|
||||||
|
CAGCTGTTGCTAAACATGACTTCTTTAAGTTTAGAATAGACGGTGACATGGTACCACATATATCACGTCA
|
||||||
|
ACGTCTTACTAAATACACAATGGCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGGTAATTGTGAC
|
||||||
|
ACATTAAAAGAAATACTTGTCACATACAATTGTTGTGATGATGATTATTTCAATAAAAAGGACTGGTATG
|
||||||
|
ATTTTGTAGAAAACCCAGATATATTACGCGTATACGCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTT
|
||||||
|
AAAAACAGTACAATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTGTACTGACATTAGATAATCAA
|
||||||
|
GATCTCAATGGTAACTGGTATGATTTCGGTGATTTCATACAAACCACGCCAGGTAGTGGAGTTCCTGTTG
|
||||||
|
TAGATTCTTATTATTCATTGTTAATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAGAGTCACATGT
|
||||||
|
TGACACTGACTTAACAAAGCCTTACATTAAGTGGGATTTGTTAAAATATGACTTCACGGAAGAGAGGTTA
|
||||||
|
AAACTCTTTGACCGTTATTTTAAATATTGGGATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATG
|
||||||
|
ACAGATGCATTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTGTTCCCACCTACAAGTTTTGG
|
||||||
|
ACCACTAGTGAGAAAAATATTTGTTGATGGTGTTCCATTTGTAGTTTCAACTGGATACCACTTCAGAGAG
|
||||||
|
CTAGGTGTTGTACATAATCAGGATGTAAACTTACATAGCTCTAGACTTAGTTTTAAGGAATTACTTGTGT
|
||||||
|
ATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCTATTACTAGATAAACGCACTACGTGCTTTTC
|
||||||
|
AGTAGCTGCACTTACTAACAATGTTGCTTTTCAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTAT
|
||||||
|
GACTTTGCTGTGTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACACTTCTTCTTTGCTC
|
||||||
|
AGGATGGTAATGCTGCTATCAGCGATTATGACTACTATCGTTATAATCTACCAACAATGTGTGATATCAG
|
||||||
|
ACAACTACTATTTGTAGTTGAAGTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTGTATTAATGCT
|
||||||
|
AACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTTTTCCATTTAATAAATGGGGTAAGGCTAGAC
|
||||||
|
TTTATTATGATTCAATGAGTTATGAGGATCAAGATGCACTTTTCGCATATACAAAACGTAATGTCATCCC
|
||||||
|
TACTATAACTCAAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAGCTCGCACCGTAGCTGGTGTC
|
||||||
|
TCTATCTGTAGTACTATGACCAATAGACAGTTTCATCAAAAATTATTGAAATCAATAGCCGCCACTAGAG
|
||||||
|
GAGCTACTGTAGTAATTGGAACAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAACTGTTTATAG
|
||||||
|
TGATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCTAAATGTGATAGAGCCATGCCTAACATGCTT
|
||||||
|
AGAATTATGGCCTCACTTGTTCTTGCTCGCAAACATACAACGTGTTGTAGCTTGTCACACCGTTTCTATA
|
||||||
|
GATTAGCTAATGAGTGTGCTCAAGTATTGAGTGAAATGGTCATGTGTGGCGGTTCACTATATGTTAAACC
|
||||||
|
AGGTGGAACCTCATCAGGAGATGCCACAACTGCTTATGCTAATAGTGTTTTTAACATTTGTCAAGCTGTC
|
||||||
|
ACGGCCAATGTTAATGCACTTTTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTCCGCAATTTAC
|
||||||
|
AACACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGTTGACACAGACTTTGTGAATGAGTTTTACGC
|
||||||
|
ATATTTGCGTAAACATTTCTCAATGATGATACTCTCTGACGATGCTGTTGTGTGTTTCAATAGCACTTAT
|
||||||
|
GCATCTCAAGGTCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTATTATCAAAACAATGTTTTTA
|
||||||
|
TGTCTGAAGCAAAATGTTGGACTGAGACTGACCTTACTAAAGGACCTCATGAATTTTGCTCTCAACATAC
|
||||||
|
AATGCTAGTTAAACAGGGTGATGATTATGTGTACCTTCCTTACCCAGATCCATCAAGAATCCTAGGGGCC
|
||||||
|
GGCTGTTTTGTAGATGATATCGTAAAAACAGATGGTACACTTATGATTGAACGGTTCGTGTCTTTAGCTA
|
||||||
|
TAGATGCTTACCCACTTACTAAACATCCTAATCAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATA
|
||||||
|
CATAAGAAAGCTACATGATGAGTTAACAGGACACATGTTAGACATGTATTCTGTTATGCTTACTAATGAT
|
||||||
|
AACACTTCAAGGTATTGGGAACCTGAGTTTTATGAGGCTATGTACACACCGCATACAGTCTTACAGGCTG
|
||||||
|
TTGGGGCTTGTGTTCTTTGCAATTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTAGACCATTCTT
|
||||||
|
ATGTTGTAAATGCTGTTACGACCATGTCATATCAACATCACATAAATTAGTCTTGTCTGTTAATCCGTAT
|
||||||
|
GTTTGCAATGCTCCAGGTTGTGATGTCACAGATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATT
|
||||||
|
GTAAATCACATAAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAAGTTTTTGGTTTATATAAAAA
|
||||||
|
TACATGTGTTGGTAGCGATAATGTTACTGACTTTAATGCAATTGCAACATGTGACTGGACAAATGCTGGT
|
||||||
|
GATTACATTTTAGCTAACACCTGTACTGAAAGACTCAAGCTTTTTGCAGCAGAAACGCTCAAAGCTACTG
|
||||||
|
AGGAGACATTTAAACTGTCTTATGGTATTGCTACTGTACGTGAAGTGCTGTCTGACAGAGAATTACATCT
|
||||||
|
TTCATGGGAAGTTGGTAAACCTAGACCACCACTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACT
|
||||||
|
AAAAACAGTAAAGTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTATGGTGATGCTGTTGTTTACC
|
||||||
|
GAGGTACAACAACTTACAAATTAAATGTTGGTGATTATTTTGTGCTGACATCACATACAGTAATGCCATT
|
||||||
|
AAGTGCACCTACACTAGTGCCACAAGAGCACTATGTTAGAATTACTGGCTTATACCCAACACTCAATATC
|
||||||
|
TCAGATGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGGTTGGTATGCAAAAGTATTCTACACTCCAGG
|
||||||
|
GACCACCTGGTACTGGTAAGAGTCATTTTGCTATTGGCCTAGCTCTCTACTACCCTTCTGCTCGCATAGT
|
||||||
|
GTATACAGCTTGCTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCATTAAAATATTTGCCTATAGAT
|
||||||
|
AAATGTAGTAGAATTATACCTGCACGTGCTCGTGTAGAGTGTTTTGATAAATTCAAAGTGAATTCAACAT
|
||||||
|
TAGAACAGTATGTCTTTTGTACTGTAAATGCATTGCCTGAGACGACAGCAGATATAGTTGTCTTTGATGA
|
||||||
|
AATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAATGCCAGATTACGTGCTAAGCACTATGTGTAC
|
||||||
|
ATTGGCGACCCTGCTCAATTACCTGCACCACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATT
|
||||||
|
TCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCC
|
||||||
|
TGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGATAATAAGCTTAAAGCACATAAAGACAAATCA
|
||||||
|
GCTCAATGCTTTAAAATGTTTTATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATTAACAGGCCAC
|
||||||
|
AAATAGGCGTGGTAAGAGAATTCCTTACACGTAACCCTGCTTGGAGAAAAGCTGTCTTTATTTCACCTTA
|
||||||
|
TAATTCACAGAATGCTGTAGCCTCAAAGATTTTGGGACTACCAACTCAAACTGTTGATTCATCACAGGGC
|
||||||
|
TCAGAATATGACTATGTCATATTCACTCAAACCACTGAAACAGCTCACTCTTGTAATGTAAACAGATTTA
|
||||||
|
ATGTTGCTATTACCAGAGCAAAAGTAGGCATACTTTGCATAATGTCTGATAGAGACCTTTATGACAAGTT
|
||||||
|
GCAATTTACAAGTCTTGAAATTCCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGTAACAGGACTC
|
||||||
|
TTTAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTACACAGGCACCTACACACCTCAGTGTTGACA
|
||||||
|
CTAAATTCAAAACTGAAGGTTTATGTGTTGACATACCTGGCATACCTAAGGACATGACCTATAGAAGACT
|
||||||
|
CATCTCTATGATGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTAACATGTTTATCACCCGCGAA
|
||||||
|
GAAGCTATAAGACATGTACGTGCATGGATTGGCTTCGATGTCGAGGGGTGTCATGCTACTAGAGAAGCTG
|
||||||
|
TTGGTACCAATTTACCTTTACAGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTACCTACAGGTTA
|
||||||
|
TGTTGATACACCTAATAATACAGATTTTTCCAGAGTTAGTGCTAAACCACCGCCTGGAGATCAATTTAAA
|
||||||
|
CACCTCATACCACTTATGTACAAAGGACTTCCTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAA
|
||||||
|
GTGACACACTTAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCACATGGCTTTGAGTTGACATC
|
||||||
|
TATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACCTGTTGTCTATGTGATAGACGTGCCACATGCTTT
|
||||||
|
TCCACTGCTTCAGACACTTATGCCTGTTGGCATCATTCTATTGGATTTGATTACGTCTATAATCCGTTTA
|
||||||
|
TGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACAAAGCAACCATGATCTGTATTGTCAAGTCCA
|
||||||
|
TGGTAATGCACATGTAGCTAGTTGTGATGCAATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTT
|
||||||
|
AAGCGTGTTGACTGGACTATTGAATATCCTATAATTGGTGATGAACTGAAGATTAATGCGGCTTGTAGAA
|
||||||
|
AGGTTCAACACATGGTTGTTAAAGCTGCATTATTAGCAGACAAATTCCCAGTTCTTCACGACATTGGTAA
|
||||||
|
CCCTAAAGCTATTAAGTGTGTACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACAGCCTTGTAGT
|
||||||
|
GACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATGCCACACATTCTGACAAATTCACAGATGGTG
|
||||||
|
TATGCCTATTTTGGAATTGCAATGTCGATAGATATCCTGCTAATTCCATTGTTTGTAGATTTGACACTAG
|
||||||
|
AGTGCTATCTAACCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATGTAAATAAACATGCATTCCAC
|
||||||
|
ACACCAGCTTTTGATAAAAGTGCTTTTGTTAATTTAAAACAATTACCATTTTTCTATTACTCTGACAGTC
|
||||||
|
CATGTGAGTCTCATGGAAAACAAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTGCTACGTGTAT
|
||||||
|
AACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCATGCTAATGAGTACAGATTGTATCTCGATGCT
|
||||||
|
TATAACATGATGATCTCAGCTGGCTTTAGCTTGTGGGTTTACAAACAATTTGATACTTATAACCTCTGGA
|
||||||
|
ACACTTTTACAAGACTTCAGAGTTTAGAAAATGTGGCTTTTAATGTTGTAAATAAGGGACACTTTGATGG
|
||||||
|
ACAACAGGGTGAAGTACCAGTTTCTATCATTAATAACACTGTTTACACAAAAGTTGATGGTGTTGATGTA
|
||||||
|
GAATTGTTTGAAAATAAAACAACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAGCGCAACATTA
|
||||||
|
AACCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGTGGACATTGCTGCTAATACTGTGATCTGGGA
|
||||||
|
CTACAAAAGAGATGCTCCAGCACATATATCTACTATTGGTGTTTGTTCTATGACTGACATAGCCAAGAAA
|
||||||
|
CCAACTGAAACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGTTGATGGTCAAGTAGACTTAT
|
||||||
|
TTAGAAATGCCCGTAATGGTGTTCTTATTACAGAAGGTAGTGTTAAAGGTTTACAACCATCTGTAGGTCC
|
||||||
|
CAAACAAGCTAGTCTTAATGGAGTCACATTAATTGGAGAAGCCGTAAAAACACAGTTCAATTATTATAAG
|
||||||
|
AAAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACTTTACTCAGAGTAGAAATTTACAAGAATTTA
|
||||||
|
AACCCAGGAGTCAAATGGAAATTGATTTCTTAGAATTAGCTATGGATGAATTCATTGAACGGTATAAATT
|
||||||
|
AGAAGGCTATGCCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTCAGTTAGGTGGTTTACATCTA
|
||||||
|
CTGATTGGACTAGCTAAACGTTTTAAGGAATCACCTTTTGAATTAGAAGATTTTATTCCTATGGACAGTA
|
||||||
|
CAGTTAAAAACTATTTCATAACAGATGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTGTTATTGATTT
|
||||||
|
ATTACTTGATGATTTTGTTGAAATAATAAAATCCCAAGATTTATCTGTAGTTTCTAAGGTTGTCAAAGTG
|
||||||
|
ACTATTGACTATACAGAAATTTCATTTATGCTTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAA
|
||||||
|
AATTACAATCTAGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTTTACAAAATGCAAAGAATGCT
|
||||||
|
ATTAGAAAAGTGTGACCTTCAAAATTATGGTGATAGTGCAACATTACCTAAAGGCATAATGATGAATGTC
|
||||||
|
GCAAAATATACTCAACTGTGTCAATATTTAAACACATTAACATTAGCTGTACCCTATAATATGAGAGTTA
|
||||||
|
TACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGGTACAGCTGTTTTAAGACAGTGGTTGCCTAC
|
||||||
|
GGGTACGCTGCTTGTCGATTCAGATCTTAATGACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGAT
|
||||||
|
TGTGCAACTGTACATACAGCTAATAAATGGGATCTCATTATTAGTGATATGTACGACCCTAAGACTAAAA
|
||||||
|
ATGTTACAAAAGAAAATGACTCTAAAGAGGGTTTTTTCACTTACATTTGTGGGTTTATACAACAAAAGCT
|
||||||
|
AGCTCTTGGAGGTTCCGTGGCTATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTATAAGCTCATG
|
||||||
|
GGACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGAATGCGTCATCATCTGAAGCATTTTTAATTG
|
||||||
|
GATGTAATTATCTTGGCAAACCACGCGAACAAATAGATGGTTATGTCATGCATGCAAATTACATATTTTG
|
||||||
|
GAGGAATACAAATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGAGTAAATTTCCCCTTAAATTA
|
||||||
|
AGGGGTACTGCTGTTATGTCTTTAAAAGAAGGTCAAATCAATGATATGATTTTATCTCTTCTTAGTAAAG
|
||||||
|
GTAGACTTATAATTAGAGAAAACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAAACGAA
|
||||||
|
CAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACCAGAACTCA
|
||||||
|
ATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCA
|
||||||
|
GTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTATACATG
|
||||||
|
TCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGC
|
||||||
|
TTCCACTGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCC
|
||||||
|
CTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCAT
|
||||||
|
TTTTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTATTCTAGTGC
|
||||||
|
GAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTC
|
||||||
|
AAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTA
|
||||||
|
TTAATTTAGTGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTAT
|
||||||
|
TAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCA
|
||||||
|
GGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATA
|
||||||
|
ATGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTT
|
||||||
|
GAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATT
|
||||||
|
GTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCATCTG
|
||||||
|
TTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATC
|
||||||
|
ATTTTCCACTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTAT
|
||||||
|
GCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTG
|
||||||
|
ATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTC
|
||||||
|
TAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGA
|
||||||
|
GATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACT
|
||||||
|
TTCCTTTACAATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACT
|
||||||
|
TTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAAC
|
||||||
|
AAATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTC
|
||||||
|
TGCCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGA
|
||||||
|
GATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTCTAAC
|
||||||
|
CAGGTTGCTGTTCTTTATCAGGATGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTA
|
||||||
|
CTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGC
|
||||||
|
TGAACATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACT
|
||||||
|
CAGACTAATTCTCCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTG
|
||||||
|
GTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAGTGTTAC
|
||||||
|
CACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCA
|
||||||
|
ACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAACCGTGCTTTAACTGGAA
|
||||||
|
TAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACC
|
||||||
|
AATTAAAGATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCA
|
||||||
|
TTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATT
|
||||||
|
GCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACC
|
||||||
|
TTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGG
|
||||||
|
ACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTG
|
||||||
|
GAGTTACACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAA
|
||||||
|
AATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCA
|
||||||
|
CAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATA
|
||||||
|
TCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAG
|
||||||
|
TTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCT
|
||||||
|
ACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTA
|
||||||
|
TGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAA
|
||||||
|
GAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTT
|
||||||
|
TCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACA
|
||||||
|
CATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACC
|
||||||
|
TGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGATTTA
|
||||||
|
GGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTG
|
||||||
|
CCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCC
|
||||||
|
ATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGT
|
||||||
|
ATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACG
|
||||||
|
ACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACATAAACGAACTTATGGATTTGTTTATGAGA
|
||||||
|
ATCTTCACAATTGGAACTGTAACTTTGAAGCAAGGTGAAATCAAGGATGCTACTCCTTCAGATTTTGTTC
|
||||||
|
GCGCTACTGCAACGATACCGATACAAGCCTCACTCCCTTTCGGATGGCTTATTGTTGGCGTTGCACTTCT
|
||||||
|
TGCTGTTTTTCAGAGCGCTTCCAAAATCATAACCCTCAAAAAGAGATGGCAACTAGCACTCTCCAAGGGT
|
||||||
|
GTTCACTTTGTTTGCAACTTGCTGTTGTTGTTTGTAACAGTTTACTCACACCTTTTGCTCGTTGCTGCTG
|
||||||
|
GCCTTGAAGCCCCTTTTCTCTATCTTTATGCTTTAGTCTACTTCTTGCAGAGTATAAACTTTGTAAGAAT
|
||||||
|
AATAATGAGGCTTTGGCTTTGCTGGAAATGCCGTTCCAAAAACCCATTACTTTATGATGCCAACTATTTT
|
||||||
|
CTTTGCTGGCATACTAATTGTTACGACTATTGTATACCTTACAATAGTGTAACTTCTTCAATTGTCATTA
|
||||||
|
CTTCAGGTGATGGCACAACAAGTCCTATTTCTGAACATGACTACCAGATTGGTGGTTATACTGAAAAATG
|
||||||
|
GGAATCTGGAGTAAAAGACTGTGTTGTATTACACAGTTACTTCACTTCAGACTATTACCAGCTGTACTCA
|
||||||
|
ACTCAATTGAGTACAGACACTGGTGTTGAACATGTTACCTTCTTCATCTACAATAAAATTGTTGATGAGC
|
||||||
|
CTGAAGAACATGTCCAAATTCACACAATCGACGGTTCATCCGGAGTTGTTAATCCAGTAATGGAACCAAT
|
||||||
|
TTATGATGAACCGACGACGACTACTAGCGTGCCTTTGTAAGCACAAGCTGATGAGTACGAACTTATGTAC
|
||||||
|
TCATTCGTTTCGGAAGAGACAGGTACGTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGTAT
|
||||||
|
TCTTGCTAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGTACTGCTGCAATATTGTTAACGT
|
||||||
|
GAGTCTTGTAAAACCTTCTTTTTACGTTTACTCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTCCTGAT
|
||||||
|
CTTCTGGTCTAAACGAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAGCCATGGCAGA
|
||||||
|
TTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGCTCCTTGAACAATGGAACCTAGTAATAGGTTTC
|
||||||
|
CTATTCCTTACATGGATTTGTCTTCTACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAATTA
|
||||||
|
AGTTAATTTTCCTCTGGCTGTTATGGCCAGTAACTTTAGCTTGTTTTGTGCTTGCTGCTGTTTACAGAAT
|
||||||
|
AAATTGGATCACCGGTGGAATTGCTATCGCAATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTC
|
||||||
|
ATTGCTTCTTTCAGACTGTTTGCGCGTACGCGTTCCATGTGGTCATTCAATCCAGAAACTAACATTCTTC
|
||||||
|
TCAACGTGCCACTCCATGGCACTATTCTGACCAGACCGCTTCTAGAAAGTGAACTCGTAATCGGAGCTGT
|
||||||
|
GATCCTTCGTGGACATCTTCGTATTGCTGGACACCATCTAGGACGCTGTGACATCAAGGACCTGCCTAAA
|
||||||
|
GAAATCACTGTTGCTACATCACGAACGCTTTCTTATTACAAATTGGGAGCTTCGCAGCGTGTAGCAGGTG
|
||||||
|
ACTCAGGTTTTGCTGCATACAGTCGCTACAGGATTGGCAACTATAAATTAAACACAGACCATTCCAGTAG
|
||||||
|
CAGTGACAATATTGCTTTGCTTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGACTTTCAGGTTAC
|
||||||
|
TATAGCAGAGATATTACTAATTATTATGAGGACTTTTAAAGTTTCCATTTGGAATCTTGATTACATCATA
|
||||||
|
AACCTCATAATTAAAAATTTATCTAAGTCACTAACTGAGAATAAATATTCTCAATTAGATGAAGAGCAAC
|
||||||
|
CAATGGAGATTGATTAAACGAACATGAAAATTATTCTTTTCTTGGCACTGATAACACTCGCTACTTGTGA
|
||||||
|
GCTTTATCACTACCAAGAGTGTGTTAGAGGTACAACAGTACTTTTAAAAGAACCTTGCTCTTCTGGAACA
|
||||||
|
TACGAGGGCAATTCACCATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCTTTAGCACTCAAT
|
||||||
|
TTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTATCAGTTACGTGCCAGATCAGTTTCACCTAAACT
|
||||||
|
GTTCATCAGACAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGTTT
|
||||||
|
ATAACACTTTGCTTCACACTCAAAAGAAAGACAGAATGATTGAACTTTCATTAATTGACTTCTATTTGTG
|
||||||
|
CTTTTTAGCCTTTCTGCTATTCCTTGTTTTAATTATGCTTATTATCTTTTGGTTCTCACTTGAACTGCAA
|
||||||
|
GATCATAATGAAACTTGTCACGCCTAAACGAACATGAAATTTCTTGTTTTCTTAGGAATCATCACAACTG
|
||||||
|
TAGCTGCATTTCACCAAGAATGTAGTTTACAGTCATGTACTCAACATCAACCATATGTAGTTGATGACCC
|
||||||
|
GTGTCCTATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAA
|
||||||
|
TTGTGCGTGGATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATATCGGTAATTATACAGTTTCCT
|
||||||
|
GTTTACCTTTTACAATTAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGA
|
||||||
|
AGACTTTTTAGAGTATCATGACGTTCGTGTTGTTTTAGATTTCATCTAAACGAACAAACTAAAATGTCTG
|
||||||
|
ATAATGGACCCCAAAATCAGCGAAATGCACCCCGCATTACGTTTGGTGGACCCTCAGATTCAACTGGCAG
|
||||||
|
TAACCAGAATGGAGAACGCAGTGGGGCGCGATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATACT
|
||||||
|
GCGTCTTGGTTCACCGCTCTCACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGACAAGGCGTTC
|
||||||
|
CAATTAACACCAATAGCAGTCCAGATGACCAAATTGGCTACTACCGAAGAGCTACCAGACGAATTCGTGG
|
||||||
|
TGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATTTCTACTACCTAGGAACTGGGCCAGAAGCT
|
||||||
|
GGACTTCCCTATGGTGCTAACAAAGACGGCATCATATGGGTTGCAACTGAGGGAGCCTTGAATACACCAA
|
||||||
|
AAGATCACATTGGCACCCGCAATCCTGCTAACAATGCTGCAATCGTGCTACAACTTCCTCAAGGAACAAC
|
||||||
|
ATTGCCAAAAGGCTTCTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCACGT
|
||||||
|
AGTCGCAACAGTTCAAGAAATTCAACTCCAGGCAGCAGTAGGGGAACTTCTCCTGCTAGAATGGCTGGCA
|
||||||
|
ATGGCGGTGATGCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAAAATGTCTGG
|
||||||
|
TAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTGCTGAGGCTTCTAAGAAGCCTCGG
|
||||||
|
CAAAAACGTACTGCCACTAAAGCATACAATGTAACACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCC
|
||||||
|
AAGGAAATTTTGGGGACCAGGAACTAATCAGACAAGGAACTGATTACAAACATTGGCCGCAAATTGCACA
|
||||||
|
ATTTGCCCCCAGCGCTTCAGCGTTCTTCGGAATGTCGCGCATTGGCATGGAAGTCACACCTTCGGGAACG
|
||||||
|
TGGTTGACCTACACAGGTGCCATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCATTTTGC
|
||||||
|
TGAATAAGCATATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAGGC
|
||||||
|
TGATGAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAGCAAACTGTGACTCTTCTTCCTGCTGCAGAT
|
||||||
|
TTGGATGATTTCTCCAAACAATTGCAACAATCCATGAGCAGTGCTGACTCAACTCAGGCCTAAACTCATG
|
||||||
|
CAGACCACACAAGGCAGATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTT
|
||||||
|
GTGCAGAATGAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCT
|
||||||
|
TTAATCAGTGTGTAACATTAGGGAGGACTTGAAAGAGCCACCACATTTTCACCGAGGCCACGCGGAGTAC
|
||||||
|
GATCGAGTGTACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAAT
|
||||||
|
TTTAGTAGTGCTATCCCCATGTGATTTTAATAGCTTCTTAGGAGAATGACAAAAAAAAAAAAAAAAAAAA
|
||||||
|
AAAAAAAAAAAAA
|
||||||
|
|
8
tests/resources/SARS_CoV-2_genes.csv
Normal file
8
tests/resources/SARS_CoV-2_genes.csv
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
id,sequence,start,end
|
||||||
|
ORF8,"ATGAAATTTCTTGTTTTCTTAGGAATCATCACAACTGTAGCTGCATTTCACCAAGAATGTAGTTTACAGT
|
||||||
|
CATGTACTCAACATCAACCATATGTAGTTGATGACCCGTGTCCTATTCACTTCTATTCTAAATGGTATAT
|
||||||
|
TAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAATTGTGCGTGGATGAGGCTGGTTCTAAATCACCC
|
||||||
|
ATTCAGTACATCGATATCGGTAATTATACAGTTTCCTGTTTACCTTTTACAATTAATTGCCAGGAACCTA
|
||||||
|
AATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGAAGACTTTTTAGAGTATCATGACGTTCGTGTTGT
|
||||||
|
ATGAAATTTCTTGTTTTCTTAGGAATCATCACAACTGTAGCTGCATTTCACCAAGAATGTAGTTTACAGTCATGTACTCAACATCAACCATATGTAGTTGATGACCCGTGTCCTATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAATTGTGCGTGGATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATATCGGTAATTATACAGTTTCCTGTTTACCTTTTACAATTAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGAAGACTTTTTAGAGTATCATGACGTTCGTGTTGTTTTAGATTTCATCTAA",27894,28259
|
||||||
|
ORF10,ATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAATGAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAG,29558,29674
|
|
Loading…
Reference in New Issue
Block a user