Compare commits
21 Commits
cac100d0ac
...
develop
Author | SHA1 | Date | |
---|---|---|---|
953dee357f | |||
411a169079 | |||
6a3e692d82 | |||
dcf28c8051 | |||
ddb98514fb | |||
3320b6fc43 | |||
26f95c66a2 | |||
cd033e99e5 | |||
0e5c323070 | |||
bb1a7dfe38 | |||
40f49a48ac | |||
dac4d5a620 | |||
c02b726745 | |||
d2dfbb9464 | |||
72f7af1879 | |||
aaeb02853e | |||
5042d0d5fa | |||
52bd3ac07d | |||
7c0d209470 | |||
bca38d5838 | |||
8cb0e17ed3 |
16
.devcontainer/Dockerfile
Normal file
16
.devcontainer/Dockerfile
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
FROM mcr.microsoft.com/devcontainers/miniconda:1-3
|
||||||
|
|
||||||
|
# Copy environment.yml (if found) to a temp location so we update the environment. Also
|
||||||
|
# copy "noop.txt" so the COPY instruction does not fail if no environment.yml exists.
|
||||||
|
COPY environment.yml* .devcontainer/noop.txt /tmp/conda-tmp/
|
||||||
|
RUN if [ -f "/tmp/conda-tmp/environment.yml" ]; then umask 0002 && /opt/conda/bin/conda env update -n base -f /tmp/conda-tmp/environment.yml; fi \
|
||||||
|
&& rm -rf /tmp/conda-tmp
|
||||||
|
|
||||||
|
# [Optional] Uncomment to install a different version of Python than the default
|
||||||
|
# RUN conda install -y python=3.6 \
|
||||||
|
# && pip install --no-cache-dir pipx \
|
||||||
|
# && pipx reinstall-all
|
||||||
|
|
||||||
|
# [Optional] Uncomment this section to install additional OS packages.
|
||||||
|
# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
|
||||||
|
# && apt-get -y install --no-install-recommends <your-package-list-here>
|
3
.devcontainer/noop.txt
Normal file
3
.devcontainer/noop.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
This file is copied into the container along with environment.yml* from the
|
||||||
|
parent folder. This is done to prevent the Dockerfile COPY instruction from
|
||||||
|
failing if no environment.yml is found.
|
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,2 +1,3 @@
|
|||||||
|
galaxy-server/**
|
||||||
|
test_results
|
||||||
autobigs-cli_test_report.html
|
autobigs-cli_test_report.html
|
||||||
galaxy-server/**
|
|
0
.planemo.yml
Normal file
0
.planemo.yml
Normal file
12
.shed.yml
Normal file
12
.shed.yml
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
name: autobigs
|
||||||
|
owner: iuc
|
||||||
|
description: Automated MLST typing from PubMLST and InstitutPasteur.
|
||||||
|
homepage_url: https://github.com/Syph-and-VPD-Lab/autoBIGS.cli
|
||||||
|
long_description: |
|
||||||
|
A program that allows quickly batched requests for obtaining MLST
|
||||||
|
profiles on multiple FASTA sequences and exporting it as a convenient
|
||||||
|
CSV. Capable of querying a variety of MLST databases from both
|
||||||
|
Institut Pasteur and PubMLST.
|
||||||
|
remote_repository_url: https://github.com/Syph-and-VPD-Lab/autoBIGS.cli
|
||||||
|
categories:
|
||||||
|
- Sequence Analysis
|
1
.vscode/extensions.json
vendored
1
.vscode/extensions.json
vendored
@@ -1,6 +1,5 @@
|
|||||||
{
|
{
|
||||||
"recommendations": [
|
"recommendations": [
|
||||||
"davelopez.galaxy-tools",
|
|
||||||
"mechatroner.rainbow-csv",
|
"mechatroner.rainbow-csv",
|
||||||
"redhat.vscode-xml",
|
"redhat.vscode-xml",
|
||||||
"ms-vscode.live-server"
|
"ms-vscode.live-server"
|
||||||
|
12
.vscode/tasks.json
vendored
12
.vscode/tasks.json
vendored
@@ -8,6 +8,18 @@
|
|||||||
"type": "shell",
|
"type": "shell",
|
||||||
"command": "planemo lint",
|
"command": "planemo lint",
|
||||||
"problemMatcher": []
|
"problemMatcher": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"label": "serve",
|
||||||
|
"type": "shell",
|
||||||
|
"command": "planemo serve",
|
||||||
|
"problemMatcher": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"label": "run tests",
|
||||||
|
"type": "shell",
|
||||||
|
"command": "planemo test autobigs-cli.xml --test_output_junit test_results/junit_results.xml --test_output_json test_results/results.json --test_output test_results/human.html",
|
||||||
|
"problemMatcher": []
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
31
Jenkinsfile
vendored
Normal file
31
Jenkinsfile
vendored
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
pipeline {
|
||||||
|
agent {
|
||||||
|
kubernetes {
|
||||||
|
cloud 'rsys-devel'
|
||||||
|
defaultContainer 'pip'
|
||||||
|
inheritFrom 'pip'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stages {
|
||||||
|
stage ("install") {
|
||||||
|
steps {
|
||||||
|
sh 'pip install -r requirements.txt'
|
||||||
|
sh 'pip install standard-imghdr'
|
||||||
|
sh 'planemo ci_setup'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage ("lint") {
|
||||||
|
steps {
|
||||||
|
sh "planemo lint autobigs-cli.xml"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage ("test") {
|
||||||
|
steps {
|
||||||
|
sh 'mkdir test_results'
|
||||||
|
sh "planemo test autobigs-cli.xml --test_output_junit test_results/junit_results.xml --test_output_json test_results/results.json --test_output test_results/human.html"
|
||||||
|
xunit checksName: '', tools: [JUnit(excludesPattern: '', pattern: 'test_results/junit_report.xml', stopProcessingIfError: true)]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
17
README.md
Normal file
17
README.md
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
|
||||||
|
# autoBIGS.Galaxy
|
||||||
|
|
||||||
|
A program that allows quickly batched requests for obtaining MLST profiles on multiple FASTA sequences and exporting it as a convenient CSV. Capable of querying a variety of MLST databases from both Institut Pasteur and PubMLST. autoBIGS.galaxy is the galaxy frontend implementation of autoBIGS.cli and autoBIGS.engine.
|
||||||
|
|
||||||
|
This Galaxy tool implements [autoBIGS.engine](https://pypi.org/project/autoBIGS.engine) via wrapping the official [autoBIGS.cli](https://github.com/Syph-and-VPD-Lab/autoBIGS.cli) wrapper.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
This CLI is capable of exactly what [autoBIGS.engine](https://pypi.org/project/autoBIGS.engine) is capable of:
|
||||||
|
- Import multiple whole genome FASTA files
|
||||||
|
- Fetch the available BIGSdb databases that is currently live and available
|
||||||
|
- Fetch the available BIGSdb database schemas for a given MLST database
|
||||||
|
- Retrieve exact/non-exact MLST allele variant IDs based off a sequence
|
||||||
|
- Retrieve MLST sequence type IDs based off a sequence
|
||||||
|
- Inexact matches are annotated with an asterisk (\*)
|
||||||
|
- Output all results to a single CSV
|
201
autobigs-cli.xml
201
autobigs-cli.xml
@@ -1,105 +1,142 @@
|
|||||||
<tool id="autobigs-cli" name="AutoBIGS.CLI" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
|
<tool id="autobigs-cli" name="autoBIGS.cli" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
|
||||||
<description>Automated MLST typing from PubMLST and InstitutPasteur.</description>
|
<description>Automated MLST typing from PubMLST and InstitutPasteur MLST databases.</description>
|
||||||
<macros>
|
<macros>
|
||||||
<token name="@TOOL_VERSION@">0.4.3</token>
|
<token name="@TOOL_VERSION@">0.5.0</token>
|
||||||
<token name="@VERSION_SUFFIX@">0</token>
|
<token name="@VERSION_SUFFIX@">0</token>
|
||||||
<import>autobigs-cli_macros.xml</import>
|
<import>autobigs-cli_macros.xml</import>
|
||||||
<expand macro="bio_tools"/>
|
<expand macro="bio_tools"/>
|
||||||
</macros>
|
</macros>
|
||||||
|
|
||||||
<requirements>
|
<requirements>
|
||||||
<requirement type="package" version="@TOOL_VERSION@">autobigs-cli</requirement>
|
<requirement type="package" version="@TOOL_VERSION@">autobigs-cli</requirement>
|
||||||
</requirements>
|
</requirements>
|
||||||
|
|
||||||
<command detect_errors="exit_code"><![CDATA[
|
<command detect_errors="exit_code"><![CDATA[
|
||||||
#if $operation == "info":
|
#if $mode.operation == "info":
|
||||||
#if $retrieved == "schema":
|
#if $mode.select_info.retrieved == "schemes":
|
||||||
autoBIGS info $selected_schema
|
autoBIGS info --retrieve-bigsdb-schemas#for $scheme in $mode.select_info.schemes_list# '${scheme.selected_db}' #end for# --csv $info_schemes_out
|
||||||
#else if $retrieved == "databases":
|
#else if $mode.select_info.retrieved == "databases":
|
||||||
autoBIGS info --retrieve-bigsdbs
|
autoBIGS info --retrieve-bigsdbs --csv $info_db_out
|
||||||
#end if
|
#end if
|
||||||
#else if $operation == "st":
|
#else if $mode.operation == "st":
|
||||||
autoBIGS st $fastas $seqdefdb $schema $mlst_profiles_output
|
autoBIGS st "$fasta" "$seqdefdb" $schema "$mlst_profiles_output" $stop_on_fail
|
||||||
#end if
|
#end if
|
||||||
]]></command>
|
]]></command>
|
||||||
<inputs>
|
<inputs>
|
||||||
<conditional name="mode">
|
<conditional name="mode">
|
||||||
<param name="operation" label="Operation" type="select">
|
<param name="operation" label="Operation" type="select" help="The operational mode of the tool.">
|
||||||
<option value="info"/>
|
<option value="info">Retrieve Database Information</option>
|
||||||
<option value="st"/>
|
<option value="st">Perform Sequence Typing</option>
|
||||||
</param>
|
</param>
|
||||||
<when value="info">
|
<when value="info">
|
||||||
<conditional name="select-info">
|
<conditional name="select_info">
|
||||||
<param name="retrieved" label="Schema or Database List" type="select">
|
<param name="retrieved" label="Retrieve" type="select" help="The type of information should the tool retrieve.">
|
||||||
<option value="schema"/>
|
<option value="schemes">Available Schemes for Database</option>
|
||||||
<option value="databases"/>
|
<option value="databases">Available Databases</option>
|
||||||
</param>
|
</param>
|
||||||
<when value="schema">
|
<when value="schemes">
|
||||||
<repeat name="schemas" title="Schemas">
|
<repeat name="schemes_list" title="Schemes" min="1">
|
||||||
<param name="selected_schema" argument="--retrieve-bigsdb-schemas" label="Retrieve schemas for" type="integer" min="0"/>
|
<param name="selected_db" argument="--retrieve-bigsdb-schemas" label="Database Name" type="text" optional="false" help="The name of the database to retrieve schemes for."/>
|
||||||
</repeat>
|
</repeat>
|
||||||
</when>
|
|
||||||
<when value="databases">
|
|
||||||
<!-- No need to do anything -->
|
|
||||||
</when>
|
</when>
|
||||||
</conditional>
|
<when value="databases">
|
||||||
</when>
|
|
||||||
<when value="st">
|
</when>
|
||||||
<param name="fastas" label="FASTA files" type="data" format="fasta,fas,fa,fna,ffn,faa,mpfa,frn"/>
|
</conditional>
|
||||||
<param name="seqdefdb" label="BIGSdb SeqDef Name" type="text"/>
|
</when>
|
||||||
<param name="schema" label="BIGSdb SeqDef Schema ID" type="integer" min="0"/>
|
<when value="st">
|
||||||
</when>
|
<param name="fasta" label="FASTA file" type="data" format="fasta,fas,fa,fna,ffn,faa,mpfa,frn" multiple="true" help="The FASTA(s) file to perform sequence typing on."/>
|
||||||
</conditional>
|
<param name="seqdefdb" label="BIGSdb SeqDef Name" type="text" optional="false" help="The database to use for typing."/>
|
||||||
</inputs>
|
<param name="schema" label="BIGSdb SeqDef Schema ID" type="integer" min="0" help="The (integer) ID of the scheme to be used."/>
|
||||||
<outputs>
|
<param name="stop_on_fail" label="Stop on failure to match" truevalue="--sof" falsevalue="" value="false" type="boolean" help="Stops running on failure to obtain sequence type."/>
|
||||||
<data name="mlst_profiles_output" label="${tool.name} on ${on_string}" format="csv">
|
</when>
|
||||||
<filter>mode == "st"</filter>
|
</conditional>
|
||||||
</data>
|
</inputs>
|
||||||
</outputs>
|
<outputs>
|
||||||
|
<data name="mlst_profiles_output" label="${tool.name} on ${on_string}" format="csv">
|
||||||
|
<filter>mode['operation'] == 'st'</filter>
|
||||||
|
</data>
|
||||||
|
<data name="info_db_out" label="${tool.name} supported Databases" format="csv">
|
||||||
|
<filter>mode['operation'] == 'info' and mode['select_info']['retrieved'] == 'databases'</filter>
|
||||||
|
</data>
|
||||||
|
<data name="info_schemes_out" label="${tool.name} supported Schemes" format="csv">
|
||||||
|
<filter>mode['operation'] == 'info' and mode['select_info']['retrieved'] == 'schemes'</filter>
|
||||||
|
</data>
|
||||||
|
</outputs>
|
||||||
<tests>
|
<tests>
|
||||||
<test>
|
<test expect_num_outputs="1">
|
||||||
<param name="operation" value="st" />
|
<param name="operation" value="st" />
|
||||||
<param name="fastas" value="tohama_I_bpertussis.fasta" />
|
<param name="fasta" value="tohama_I_minimized_features.fasta" />
|
||||||
<param name="seqdefdb" value="pubmlst_bordetella_seqdef" />
|
<param name="seqdefdb" value="pubmlst_bordetella_seqdef" />
|
||||||
<param name="schema" value="3" />
|
<param name="schema" value="3" />
|
||||||
<output name="mlst_profiles_output" file="results.csv" ftype="csv" />
|
<output name="mlst_profiles_output" file="tohama_I_minimized_features_typed.csv" ftype="csv" />
|
||||||
|
</test>
|
||||||
|
<test expect_num_outputs="1">
|
||||||
|
<param name="operation" value="info" />
|
||||||
|
<param name="retrieved" value="databases" />
|
||||||
|
<output name="info_db_out">
|
||||||
|
<assert_contents>
|
||||||
|
<has_text text="BIGSdb Names,Source"/>
|
||||||
|
<has_text text="pubmlst_bordetella_seqdef,https://bigsdb.pasteur.fr/api"/>
|
||||||
|
</assert_contents>
|
||||||
|
</output>
|
||||||
|
</test>
|
||||||
|
<test expect_num_outputs="1">
|
||||||
|
<param name="operation" value="info" />
|
||||||
|
<repeat name="schemes_list">
|
||||||
|
<param name="selected_db" value="pubmlst_bordetella_seqdef" />
|
||||||
|
</repeat>
|
||||||
|
<output name="info_schemes_out">
|
||||||
|
<assert_contents>
|
||||||
|
<has_text text="Name,ID"/>
|
||||||
|
<has_text text="MLST,3"/>
|
||||||
|
</assert_contents>
|
||||||
|
</output>
|
||||||
</test>
|
</test>
|
||||||
</tests>
|
</tests>
|
||||||
<help><![CDATA[
|
<help><![CDATA[
|
||||||
|
What is autoBIGS
|
||||||
|
================
|
||||||
|
autoBIGS is a tool to help automatically performes multi-locus sequence typing (MLST) on given data by performing calls to necessary web BIGS database web APIs.
|
||||||
|
|
||||||
usage: autoBIGS info [-h] [--retrieve-bigsdbs] [--retrieve-bigsdb-schemas LIST_BIGSDB_SCHEMAS [LIST_BIGSDB_SCHEMAS ...]]
|
Tool Modes
|
||||||
|
==========
|
||||||
|
autoBIGS has two modes. Since the tool works with the sequence definition and MLST databases live, it is also able to tell the user the currently available databases, and their associated schemas. See the following sections for more information.
|
||||||
|
|
||||||
Fetches the latest BIGSdb MLST database definitions.
|
Obtaining Database Name and Schema ID
|
||||||
|
=====================================
|
||||||
|
You will need a database name and scheme ID for sequence typing. Follow these steps to check what databases, schemes, and their associated IDs are available.
|
||||||
|
|
||||||
options:
|
1. Select "Retrieve Database Information" for "Operation"
|
||||||
-h, --help show this help message and exit
|
2. Then for "Retrieve" select "Databases" and run. This will produce a table job output with all possible database names and their database API URL.
|
||||||
--retrieve-bigsdbs Lists all known BIGSdb MLST databases (fetched from known APIs and cached).
|
3. Note down the names of the databases you are interested in.
|
||||||
--retrieve-bigsdb-schemas SCHEMA_IDS
|
4. Select "Retrieve Database Information" for "Operation"
|
||||||
Lists the known schema IDs for a given BIGSdb sequence definition database name. The name, and then the ID of the schema is given.
|
5. Then, for "Retrieve", select "Available schemes for Database".
|
||||||
|
6. Enter the noted interested database and run. This will return a CSV with the scheme IDs. Note down interested IDs.
|
||||||
|
|
||||||
usage: autoBIGS st [-h] [--exact] [--stop-on-fail] fastas [fastas ...] seqdefdb schema out
|
Performing Sequence Typing
|
||||||
|
==========================
|
||||||
|
Simply select "Perform Sequence Typing" for the "Operation" and select your FASTA files. Then, enter your SeqDef Database name (see "Obtaining Database Name and Schema ID" above) and schema ID.
|
||||||
|
|
||||||
Returns MLST exact profile matches.
|
Special behaviour for FASTAs with multiple sequences
|
||||||
|
====================================================
|
||||||
|
AutoBIGS will treat multiple sequences in the same FASTA file as part of the same sample. This will result in a fasta with multiple sequences within the same row with the final sequence type being retrieved from the resulting alleles of the individual sequences within the FASTA. This is helpful if the input FASTA was obtained from a targetted form of sequencing for the specific needed regions (e.g., Sanger sequencing of 7 house keeping genes), rather than a whole genome sequence. If your data is a whole genome sequence (WGS) of the entire genome or similar, each WGS should have it's own FASTA.
|
||||||
|
|
||||||
positional arguments:
|
More Information
|
||||||
fastas The FASTA files to process. Multiple can be listed.
|
================
|
||||||
seqdefdb The BIGSdb seqdef database to use for typing.
|
For more information on the tool being wrapped itself, please see the `autoBIGS.cli Github repository`_. Issues, bugs, and feature requests for the tool itself should be submitted to the `autoBIGS.cli issues`_. If the issue/bug/feature request is solely pertanent to the Galaxy wrapper, please check out the `autoBIGS.galaxy issues`_ tracker on GitHub.
|
||||||
schema The BIGSdb seqdef database schema ID (integer) to use for typing.
|
|
||||||
out The output CSV name (.csv will be appended).
|
|
||||||
|
|
||||||
options:
|
|
||||||
-h, --help show this help message and exit
|
|
||||||
--exact, -ex Should run exact matching rather than returning all similar ones
|
|
||||||
--stop-on-fail, -sof Should the algorithm stop in the case there are no matches (or partial matches when
|
|
||||||
expecting exact matches).
|
|
||||||
|
|
||||||
|
|
||||||
]]></help>
|
.. _autoBIGS.cli GitHub repository: https://github.com/Syph-and-VPD-Lab/autoBIGS.cli
|
||||||
|
.. _autoBIGS.cli issues: https://github.com/Syph-and-VPD-Lab/autoBIGS.cli/issues
|
||||||
|
.. _autoBIGS.galaxy: https://github.com/Syph-and-VPD-Lab/autoBIGS.galaxy
|
||||||
|
.. _autoBIGS.galaxy issues: https://github.com/Syph-and-VPD-Lab/autoBIGS.galaxy/issues
|
||||||
|
]]></help>
|
||||||
<citations>
|
<citations>
|
||||||
<citation type="bibtex">
|
<citation type="bibtex">
|
||||||
@book{Deng2025RealYHD,
|
@book{Deng2025RealYHD,
|
||||||
title = {RealYHD/autoBIGS.cli},
|
title = {Syph-and-VPD-Lab/autoBIGS.cli},
|
||||||
url = {https://github.com/RealYHD/autoBIGS.cli},
|
url = {https://github.com/Syph-and-VPD-Lab/autoBIGS.cli},
|
||||||
author = {Deng, Harrison},
|
author = {Deng, Harrison},
|
||||||
date = {2025-01-24},
|
date = {2025-01-24},
|
||||||
year = {2025},
|
year = {2025},
|
||||||
|
7
environment.yml
Normal file
7
environment.yml
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
name: base
|
||||||
|
channels:
|
||||||
|
- bioconda
|
||||||
|
- conda-forge
|
||||||
|
dependencies:
|
||||||
|
- python==3.12
|
||||||
|
- planemo
|
146
test-data/databases-2025-02-21.csv
Normal file
146
test-data/databases-2025-02-21.csv
Normal file
@@ -0,0 +1,146 @@
|
|||||||
|
BIGSdb Names,Source
|
||||||
|
pubmlst_aactinomycetemcomitans_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_abaumannii_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_achromobacter_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_aeromonas_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_afumigatus_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_aparagallinarum_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_aphagocytophilum_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_arcobacter_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_bbacilliformis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_bcc_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_bcereus_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_bfragilis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_bhenselae_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_blastocystis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_blicheniformis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_bmallei_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_bordetella_seqdef,https://bigsdb.pasteur.fr/api
|
||||||
|
pubmlst_borrelia_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_bpseudomallei_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_brachyspira_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_brucella_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_bsubtilis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_bwashoensis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_calbicans_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_campylobacter_nonjejuni_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_campylobacter_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_cbotulinum_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_cchauvoei_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_cdifficile_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_cfreundii_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_cglabrata_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_chlamydiales_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_ckrusei_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_cmaltaromaticum_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_cperfringens_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_cronobacter_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_csepticum_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_csinensis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_ctropicalis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_diphtheria_seqdef,https://bigsdb.pasteur.fr/api
|
||||||
|
pubmlst_dnodosus_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_ecloacae_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_edwardsiella_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_efaecalis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_efaecium_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_escherichia_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_fpsychrophilum_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_gallibacterium_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_geotrichum_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_hcinaedi_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_helicobacter_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_hinfluenzae_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_hparasuis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_hsuis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_kaerogenes_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_klebsiella_seqdef,https://bigsdb.pasteur.fr/api
|
||||||
|
pubmlst_koxytoca_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_kseptempunctata_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_leptospira_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_lgarvieae_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_liberibacter_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_listeria_seqdef,https://bigsdb.pasteur.fr/api
|
||||||
|
pubmlst_llactis_phage_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_lsalivarius_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_mabscessus_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_magalactiae_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_manserisalpingitidis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_mbovis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_mcanis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_mcaseolyticus_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_mflocculare_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_mgallisepticum_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_mgenitalium_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_mhaemolytica_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_mhominis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_mhyopneumoniae_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_mhyorhinis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_mhyosynoviae_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_miowae_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_mplutonius_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_mpneumoniae_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_msciuri_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_msynoviae_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_mycobacteria_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_neisseria_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_oralstrep_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_orhinotracheale_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_otsutsugamushi_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_pacnes_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_paeruginosa_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_pdamselae_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_pfluorescens_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_pgingivalis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_plarvae_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_plasmid_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_pmultocida_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_ppentosaceus_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_pputida_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_proteus_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_psalmonis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_ranatipestifer_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_rhodococcus_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_rmlst_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_sagalactiae_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_salmonella_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_saureus_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_sbsec_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_scanis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_schromogenes_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_sdysgalactiae_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_sepidermidis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_serratia_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_sgallolyticus_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_shaemolyticus_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_shewanella_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_shominis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_siniae_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_sinorhizobium_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_smaltophilia_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_smitis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_sparasitica_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_spneumoniae_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_spseudintermedius_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_spyogenes_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_ssuis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_sthermophilus_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_streptomyces_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_suberis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_szooepidemicus_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_taylorella_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_tenacibaculum_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_test_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_tpallidum_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_tvaginalis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_ureaplasma_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_vcholerae_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_vibrio_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_vparahaemolyticus_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_vtapetis_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_vvulnificus_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_wolbachia_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_xcitri_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_xfastidiosa_seqdef,https://rest.pubmlst.org
|
||||||
|
pubmlst_yersinia_seqdef,https://bigsdb.pasteur.fr/api
|
||||||
|
pubmlst_yruckeri_seqdef,https://rest.pubmlst.org
|
|
@@ -1,2 +0,0 @@
|
|||||||
id,st,clonal-complex,tyrB,icd,pepA,adk,pgm,fumC,glyA
|
|
||||||
BX470248.1,1,ST-2 complex,1,1,1,1,1,1,1
|
|
|
11
test-data/schemas-2025-02-21.csv
Normal file
11
test-data/schemas-2025-02-21.csv
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
Name,ID
|
||||||
|
Autotransporters,11
|
||||||
|
Bp_vaccine antigens,7
|
||||||
|
MLST,3
|
||||||
|
Other toxins,9
|
||||||
|
PRN-test-Bp,5
|
||||||
|
Phase,8
|
||||||
|
T3SS,10
|
||||||
|
cgMLST_genus,1
|
||||||
|
cgMLST_pertussis,4
|
||||||
|
macrolide resistance,6
|
|
2
test-data/tohama_I_bpertussis_wgs_typed.csv
Normal file
2
test-data/tohama_I_bpertussis_wgs_typed.csv
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
id,st,clonal-complex,adk,fumC,glyA,icd,pepA,pgm,tyrB
|
||||||
|
BX470248.1,1,ST-2 complex,1,1,1,1,1,1,1
|
|
133
test-data/tohama_I_minimized_features.fasta
Normal file
133
test-data/tohama_I_minimized_features.fasta
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
>lcl|BX640419.1_cds_CAE43044.1_2724 [gene=adK] [locus_tag=BP2769] [db_xref=GOA:P0DKX8,InterPro:IPR000850,InterPro:IPR006259,InterPro:IPR007862,InterPro:IPR027417] [protein=adenylate kinase] [protein_id=CAE43044.1] [location=164032..164688] [gbkey=CDS]
|
||||||
|
ATGCGTCTCATTCTGCTCGGACCGCCCGGAGCCGGCAAAGGCACCCAAGCCGCCTTTCTCACCCAACACT
|
||||||
|
ACGGCATCCCGCAGATATCCACCGGTGACATGCTGCGCGCCGCCGTCAAGGCCGGCACGCCGCTGGGCCT
|
||||||
|
GGAAGCCAAGAAGGTCATGGACGCGGGCGGCCTGGTCTCGGACGACCTGATCATCGGCCTGGTGCGCGAT
|
||||||
|
CGCCTGACCCAGCCCGATTGCGCCAACGGCTACCTGTTCGACGGTTTCCCGCGCACCATCCCGCAGGCCG
|
||||||
|
ACGCGCTCAAGAGCGCCGGCATCGCGCTGGATTACGTGGTCGAGATCGAAGTGCCGGAAAGCGACATCAT
|
||||||
|
CGAACGCATGAGCGAACGCCGCGTGCACCCGGCCAGCGGCCGCAGCTACCACGTACGCTTCAATCCGCCC
|
||||||
|
AAGGCCGAAGGCGTGGACGACGTCACGGGCGAACCGCTGGTGCAGCGCGACGACGACCGCGAGGAAACCG
|
||||||
|
TGCGCCATCGTCTCAACGTCTACCAGAACCAGACCCGCCCGCTGGTCGACTACTACTCGTCCTGGGCCCA
|
||||||
|
GTCCGATGCCGCCGCGGCGCCCAAGTACCGCAAGATCTCCGGCGTCGGCTCGGTCGACGAAATCAAGAGC
|
||||||
|
CGCCTGTCGCAGGCTCTGCAGAGCTAA
|
||||||
|
>lcl|BX640411.1_cds_CAE40628.1_248 [gene=fumC] [locus_tag=BP0248] [db_xref=GOA:Q7W0A2,InterPro:IPR000362,InterPro:IPR005677,InterPro:IPR008948,InterPro:IPR018951,InterPro:IPR020557,InterPro:IPR022761,InterPro:IPR024083] [protein=fumarate hydratase class II] [protein_id=CAE40628.1] [location=256543..257934] [gbkey=CDS]
|
||||||
|
ATGAAAACCCGCACCGAAAAAGACACTTTCGGCCCGATCGAGGTGCCCGAGCAGCACCTGTGGGGCGCGC
|
||||||
|
AGACCCAGCGCTCGCTGCATTTCTTCGCGATCTCGACCGAGAAGATGCCGGTGCCGCTGGTCGCCGCCAT
|
||||||
|
GGCACGCCTGAAGCGCGCCGCCGCCAAGGTCAACGCCGAGCTGGGCGAGCTGGATCCGCAGGTCGCAGAC
|
||||||
|
GCCATCATGCGGGCCGCCGATGAGGTGATCGCCGGCAAGTGGCCCGACGAGTTTCCGCTGTCGGTCTGGC
|
||||||
|
AGACCGGCTCGGGCACGCAGAGCAACATGAACATGAACGAGGTGCTGGCCAACCGCGCCTCCGAGCTGCT
|
||||||
|
GGGCGGCGAGCGCGGCGAAGGCCGCAAGGTGCACCCCAACGACCACGTGAACCGGGGCCAGTCGTCCAAC
|
||||||
|
GATACCTTTCCGACCGCCATGCACGTGGCCGCCGCGGTCGAGGTCGAGCACCGCGTGCTGCCCGCCCTGA
|
||||||
|
AGGCGTTGCGCGGCACGCTGGCCGCCAAGAGCGCGGCGTTCTACGACATCGTCAAGATCGGTCGCACCCA
|
||||||
|
TTTGCAGGACGCCACCCCGTTGACGCTGGGCCAGGAGATCTCCGGCTACGTGGCGCAGCTGGACCTGGCC
|
||||||
|
GAGCAGCAGATCCGCGCGACGCTGGCCGGCCTGCACCAGCTGGCCATCGGCGGCACGGCGGTGGGCACCG
|
||||||
|
GCCTGAACGCGCATCCGCAGTTCAGCGCCAAGGTATCGGCCGAACTGGCCCATGACACGGGCAGCGCGTT
|
||||||
|
CGTGTCGGCGCCCAACAAGTTCCAGGCGCTGGCTTCGCACGAGGCGCTGCTGTTCGCGCACGGCGCCTTG
|
||||||
|
AAGACGCTGGCCGCCGGCCTGATGAAGATCGCCAACGATGTGCGCTGGCTGGCCAGCGGCCCGCGCTCGG
|
||||||
|
GGCTGGGCGAAATCAGCATTCCCGAGAACGAGCCGGGCAGCTCCATCATGCCGGGCAAGGTCAACCCGAC
|
||||||
|
CCAGTGCGAAGCCGTCACGATGCTGGCCGCGCAGGTCATGGGCAACGACGTGGCCATCAATGTCGGCGGG
|
||||||
|
GCCAGCGGCAACTTCGAGCTGAACGTCTTCAAGCCGCTGGTGATCCACAATTTCCTGCAGTCGGTGCGCC
|
||||||
|
TGCTGGCCGACGGCATGGTCAGCTTCGACAAGCACTGCGCGGCCGGCATCGAGCCCAACCGCGAGCGCAT
|
||||||
|
CACCGAGCTGGTCGAGCGTTCGCTGATGCTGGTGACTGCGCTCAACCCGCACATCGGCTACGACAAGGCC
|
||||||
|
GCGCAGATCGCCAAGAAGGCGCACAAGGAAAACCTGTCGCTGAAAGAGGCGGCGCTGGCGCTGGGGCACC
|
||||||
|
TGACCGAGGCGCAGTTCGCCGAGTGGGTGGTGCCGGGCGACATGACCAACGCGCGCCGCTAG
|
||||||
|
>lcl|BX640420.1_cds_CAE43224.1_2904 [gene=glyA] [locus_tag=BP2952] [db_xref=GOA:Q7VUW7,InterPro:IPR001085,InterPro:IPR015421,InterPro:IPR015422,InterPro:IPR015424,InterPro:IPR019798] [protein=serine hydroxymethyltransferase] [protein_id=CAE43224.1] [location=complement(8611..9858)] [gbkey=CDS]
|
||||||
|
ATGTTCAACCGCAACCTGACCCTCGACCAGGTGGATCCCGACGTCTGGGCCGCCATCCAGAAAGAAGACG
|
||||||
|
TACGCCAGGAACAGCACATCGAGCTGATCGCGTCCGAGAACTACGCCAGCCCCGCCGTGATGCAGGCCCA
|
||||||
|
GGGCACGCAACTGACCAACAAGTATGCGGAAGGCTACCCGGGCAAGCGCTACTACGGCGGTTGCGAGTAC
|
||||||
|
GTCGACGTGGTCGAGCAGCTGGCCATCGACCGCCTGAAGCAGATTTTCGGCGCCGAGGCCGCCAACGTGC
|
||||||
|
AGCCGAACTCCGGCTCGCAGGCCAACCAGGGCGTGTACATGGCGGTGCTCAAGCCGGGCGATACCGTGCT
|
||||||
|
GGGCATGAGCCTGGCCGAAGGCGGTCACCTGACGCACGGCGCGTCGGTCAACGCCTCGGGCAAGCTGTAC
|
||||||
|
AACTTCGTGCCCTACGGCCTGGACGCCGACGAGGTGCTGGACTACGCCCAGGTCGAGCGGCTGACCAAGG
|
||||||
|
AACACAAGCCCAAGCTGATCGTGGCCGGCGCCTCCGCGTACGCGCTGCACATCGACTTCGAGCGCATGGC
|
||||||
|
GCGCATCGCCCACGACAACGGCGCGCTGTTCATGGTGGACATCGCCCACTATGCCGGCCTGGTGGCCGGC
|
||||||
|
GGCGCCTATCCCAACCCGGTGCCGCACGCCGATTTCGTCACCTCCACCACGCACAAGTCGCTGCGCGGCC
|
||||||
|
CGCGCGGCGGCGTCATCATGATGAAGGCCGAGTTCGAGAAGGCCGTCAATTCGGCCATCTTCCCGGGCAT
|
||||||
|
CCAGGGCGGTCCGCTGATGCACGTCATCGCGGCCAAGGCCGTGGCCTTCAAGGAAGCGCTGTCGCCCGAG
|
||||||
|
TTCCAGGATTACGCCCAGCAGGTCGTCAAGAACGCCAAGGTGCTGGCCGATACGCTGGTCAAGCGCGGCC
|
||||||
|
TGCGCATCGTGTCGGGCAGGACCGAAAGCCACGTCATGCTGGTGGACCTGCGTCCCAAGGGCATTACCGG
|
||||||
|
CAAGGAAGCGGAAGCGGTGCTGGGCCAGGCCCACATCACGGTCAACAAGAACGCCATTCCCAACGACCCG
|
||||||
|
GAAAAGCCCTTCGTGACCAGCGGCATCCGCCTGGGCACTCCGGCCATGACCACCCGCGGCTTCAAGGAGG
|
||||||
|
CCGAGGCCGAGCTGACCGCCAACCTGATCGCCGACGTGCTGGACAATCCGCGCGACGAGGCGAACATCGC
|
||||||
|
CGCGGTGCGCGCGCGGGTCAATGAACTGACCGCCCGCCTGCCCGTCTACGGCAACTGA
|
||||||
|
>lcl|BX640418.1_cds_CAE42760.1_2440 [gene=icd] [locus_tag=BP2488] [db_xref=GOA:Q7VVZ2,InterPro:IPR001804,InterPro:IPR004439,InterPro:IPR019818,InterPro:IPR024084,UniProtKB/TrEMBL:Q7VVZ2] [protein=isocitrate dehydrogenase [NADP]] [protein_id=CAE42760.1] [location=complement(204636..205892)] [gbkey=CDS]
|
||||||
|
ATGTCCTATCAACATATCAAGGTTCCCACTGGGGGCCAAAAAATCACGGTCAACGCCGATTACTCGCTGA
|
||||||
|
ATGTGCCCGATCAGGTCATCATTCCGGTCATCGAGGGTGACGGTACGGGCGCCGACATCACGCCGGTGAT
|
||||||
|
GATTAAGGTCGTCGACGCGGCCGTGCAGAAGGCCTATGCGGGCAAGCGCAAGATCCACTGGATGGAAGTC
|
||||||
|
TACGCCGGCGAGAAGGCCACCAAGGTCTACGGCCCGGACGTCTGGCTGCCCGAGGAAACCCTCGACGCCG
|
||||||
|
TCAAGGACTACGTGGTGTCGATCAAGGGTCCGCTGACCACGCCGGTCGGCGGCGGCATCCGTTCGCTGAA
|
||||||
|
CGTGGCGCTGCGCCAGCAGCTGGACCTGTATGTCTGCCTGCGCCCGGTGCGCTACTTCAAGGGCGTGCCC
|
||||||
|
TCGCCGGTGCGCGAGCCCGAGAAGACCGACATGGTCATCTTCCGCGAGAACTCGGAAGACATCTACGCGG
|
||||||
|
GCATCGAGTACATGGCCGAGTCCGAGCAGGCCAAGGACCTGATCCAGTACCTGCAGACCAAGCTGGGCGT
|
||||||
|
GACCAAGATCCGCTTCCCGAACACCTCGTCGATCGGCATCAAGCCGGTTTCGCGCGAAGGCACCGAGCGC
|
||||||
|
CTGGTGCGCAAGGCGCTGCAGTACGCCATCGACAATGACCGCGCCTCGGTGACCCTGGTCCACAAGGGCA
|
||||||
|
ACATCATGAAGTTCACGGAAGGCGGCTTCCGCGACTGGGGCTACGCCCTGGCCCAGAACGAGTTCGGCGC
|
||||||
|
GCAGCCGATCGACGGCGGCCCGTGGTGCAAGTTCAAGAATCCCAAGACGGGTCGCGAGATCATCGTCAAG
|
||||||
|
GATTCGATCGCCGACGCCTTCCTGCAGCAGATCCTGCTGCGTCCGGCCGAATACGACGTGATCGCCACGC
|
||||||
|
TGAACCTGAACGGCGACTACATCTCCGACGCGCTGGCCGCGCAAGTGGGCGGCATCGGCATTGCCCCGGG
|
||||||
|
CGCCAACCTGTCGGATTCCGTGGCCATGTTCGAAGCCACCCACGGCACCGCGCCGAAGTACGCGGGCAAG
|
||||||
|
GACTACGTGAACCCCGGTTCCGAAATCCTGTCGGCCGAAATGATGCTGCGCCACATGGGCTGGACCGAGG
|
||||||
|
CCGCCGACCTGATCATCGCCAGCATGGAGAAATCCATCCTGTCCAAGAAGGTCACCTATGACTTCGCCCG
|
||||||
|
TCTGCTCGAAGGCGCCACCCAGGTGTCGTGCTCGGGCTTCGGTCAGGTCATGATCGACAATATGTAA
|
||||||
|
>lcl|BX640418.1_cds_CAE42692.1_2372 [gene=pepA] [locus_tag=BP2421] [db_xref=GOA:Q7VW48,InterPro:IPR000819,InterPro:IPR008283,InterPro:IPR011356,InterPro:IPR023042] [protein=cytosol aminopeptidase] [protein_id=CAE42692.1] [location=131847..133346] [gbkey=CDS]
|
||||||
|
ATGGAATTTAGCACACAGACCACTGCCTCCCTGCATCAGATCAAGACTGCGGCCCTGGCCGTCGGCGTCT
|
||||||
|
TCGCCGACGGCGTGCTCAGCGCCGCCGCCGAAGTCATCGACCGCGCCAGCCACGGTGCCGTGGCCGCCGT
|
||||||
|
GGTGAAAAGCGAGTTCCGCGGCCGCACCGGCAGCACGCTGGTGCTGCGCAGCCTGGCCGGCGTCAGCGCC
|
||||||
|
CAGCGCGTGGTGCTGGTGGGCCTGGGCAAGCAGGCCGAATACAACGCCCGCGCGCACGCCAGCGCCGAAC
|
||||||
|
AGGCGTTCGCCGCGGCGTGCGTCGCGGCCCAGGTGGGCGAAGGCGTGTCGACCCTGGCCGGCGTGGCCAT
|
||||||
|
CGAGGGCGTGCCGGTGCGCGCCCGCGCGCGCAGCGCCGCCATCGCCGCGGGCGCGGCGGCCTACCATTAC
|
||||||
|
GATGCGACGTTCGGCAAGGCCAATCGCGACGCCCGCCCCAGGTTGAAGAAAATCGTCCAGGTGGTCGACC
|
||||||
|
GCGCGGCCTCCGCGCAGGCGCAGCTGGGCCTGCGCGAAGGCGCGGCCATCGCCCACGGCATGGAATTGAC
|
||||||
|
CCGCACGCTGGGCAACCTGCCCGGCAACGTGTGCACGCCGGCCTATCTCGGCAATACCGCCAAGAAACTG
|
||||||
|
GCGCGCGAATTCAAGAGCCTCAAGGTCGAGGTGCTCGAACGCAAGCAGGTCGAGGCGCTGGGCATGGGCT
|
||||||
|
CGTTCCTCTCGGTCGCGCGCGGCTCGGAAGAACCGCTGCGCTTCATCGTGCTGCGCCATGCCGGCAAGCC
|
||||||
|
CGCCAAGAAGGACAAGGCCGGCCCGGTCGTCCTGGTGGGCAAGGGCATCACCTTCGATGCTGGCGGCATC
|
||||||
|
TCGCTCAAGCCGGCCGCCACGATGGACGAAATGAAGTACGACATGTGCGGCGCGGCCAGCGTGCTGGGCA
|
||||||
|
CGTTCCGCGCCCTGGCCGAGCTGGAGCTGCCGCTGGATGTGGTGGGCCTGATCGCGGCGTGCGAGAACCT
|
||||||
|
GCCCAGCGGCAAGGCCAACAAGCCCGGCGACGTGGTCACCAGCATGTCGGGCCAGACCATCGAGATCCTC
|
||||||
|
AACACCGACGCCGAAGGCCGCCTGGTGCTGTGCGATGCCCTGACCTACGCCGAGCGCTTCAAGCCCGCGG
|
||||||
|
CCGTGATCGACATCGCCACGTTGACCGGCGCCTGCGTGGTAGCCCTGGGCAACGTCAATAGCGGCCTGTT
|
||||||
|
CTCCAAGGACGACGCGCTGGCCGACGCGCTGCTGGCCGCCAGCCGCCAGTCGCTCGACCCGGCCTGGCGC
|
||||||
|
CTGCCGCTGGACGATGCCTACCAGGACCAGCTCAAGTCCAACTTCGCCGACATCGCCAACATCGGCGGCC
|
||||||
|
CCCCGGCCGGCGCGGTCACGGCGGCCTGCTTCCTGTCGCGCTTCACCAAGGCTTATCCGTGGGCGCACCT
|
||||||
|
GGACATCGCCGGCACGGCCTGGCGCGGCGGCAAGGACAAGGGCGCCACCGGCCGGCCGGTGCCGCTGCTG
|
||||||
|
ATGCAGTACCTGCTGGACCAGGCAGGCTGA
|
||||||
|
>lcl|BX640420.1_cds_CAE43408.1_3088 [gene=pgm] [locus_tag=BP3141] [db_xref=GOA:Q7VUF5,InterPro:IPR005841,InterPro:IPR005843,InterPro:IPR005844,InterPro:IPR005845,InterPro:IPR005846,InterPro:IPR016055,InterPro:IPR016066,UniProtKB/TrEMBL:Q7VUF5] [protein=phosphoglucomutase] [protein_id=CAE43408.1] [location=217601..218983] [gbkey=CDS]
|
||||||
|
GTGGCGCACCCCTTTCCCGCATCGGTCTACAAGGCGTACGACATCCGTGGCTCGGTTCCCGACCAGCTCG
|
||||||
|
ACCCGGTATTCGCCCGGGCGCTGGGCCGCGCCCTGGCCGCCAGCGCCCGCGCGCAGGGCATCGGCGCCCT
|
||||||
|
GGTGGTCGGCCGCGACGGCCGCCTGAGCAGCCCCGACCTGGCCGGCGCGCTGCAGGAAGGCATCATGGAA
|
||||||
|
GGCGGCGTGGACACCCTGGACATCGGCCAGGTGCCCACGCCGCTGGTCTATTTCGCGGCGCACATCCAGG
|
||||||
|
GCACGGGCTCGGGCGTGGCGGTCACCGGCAGCCACAACCCGCCGCAGTACAACGGCTTCAAGATGATGAT
|
||||||
|
GGGCGGCCAGGCCCTGTACGGCCCGGCCGTGCAGGCGCTGCGCCCGGCCATGCTGGCGCCGGCTGCGGCG
|
||||||
|
CCGGGCACCTGGGGCGAACGCCGCCAGCTCGATGTCGTCCCCGCCTATATCGAGCGCATCGTGTCCGACG
|
||||||
|
TGAAGCTGGCGCGCCCCATGAAGATCGCCGTCGACTGCGGCAATGGCGTGGCCGGCGCCCTGGCGCCGCA
|
||||||
|
ACTGTTCCGCGCGCTGGGTTGCGAAGTGGACGAGCTCTATTGCGAGGTCGACGGCACGTTTCCCAACCAC
|
||||||
|
CATCCCGACCCGGCCGAACCGCGCAACCTGCAGGACCTGATCGCCCATGTCACCAGCACCGACTGCGAGC
|
||||||
|
TGGGCCTGGCCTTCGACGGCGACGGCGACCGCCTCGGCGTGGTGACCAAGTCCGGCCAGATCATCTGGCC
|
||||||
|
CGACCGCCAGCTGATCCTGTTCGCCCGCGACGTGCTGGCCCGCTGTCCCGGCGCGACCATCATCTATGAC
|
||||||
|
GTCAAGTGCAGCCAGCACGTGGGCGTGGCCATCGAGCAAAGCGGCGGCGTGCCGCTGATGTGGCAGACTG
|
||||||
|
GCCATTCGCTGGTGAAGGCCAAGCTGGCCGAGACCGGCGCGCCGCTGGCCGGCGAGATGAGCGGCCATAT
|
||||||
|
CTTCTTCAAGGAGCGCTGGTACGGCTTCGACGACGGCCTGTACACCGGCGCCCGCCTGCTGGAAATCGTC
|
||||||
|
TCCCGCGAAACCGATGCGTCGCGCCCGCTGGAGGCCCTGCCGCAGGCGCTGTCGACCCCCGAGCTCAAGC
|
||||||
|
TGGAGATGGCCGAGGGCGAGCCGCATGCGCTGATCGCCGCCCTGCAGCAGCAGGGCGAGTTCGCCAGCGC
|
||||||
|
CAGCCGGCTGGTTACGATAGACGGCGTGCGCGCGGAATACCCGGACGGCTTCGGGCTGGCGCGCGCCTCC
|
||||||
|
AATACCACCCCCGTCGTCGTGCTGCGCTTCGAAGCGGAGACCGAGCCGGGCCTGGCCCGCATCCAGCAGG
|
||||||
|
AATTCCGCCAGCAGCTGCTGCGGCTGGCTCCGCAAGCCAAACTGCCCTTCTGA
|
||||||
|
>lcl|BX640416.1_cds_CAE42081.1_1761 [gene=tyrB] [locus_tag=BP1795] [db_xref=GOA:Q7VXH5,InterPro:IPR000796,InterPro:IPR004838,InterPro:IPR004839,InterPro:IPR015421,InterPro:IPR015424,UniProtKB/TrEMBL:Q7VXH5] [protein=aromatic-amino-acid aminotransferase] [protein_id=CAE42081.1] [location=complement(151299..152501)] [gbkey=CDS]
|
||||||
|
ATGAGCACTCTTTTCGCTTCCGTCGAACTCGCGCCGCGCGACCCCATTCTTGGCCTGAACGAACAGTACA
|
||||||
|
ACGCCGATACCCGTCCCGGCAAAGTGAACCTGGGCGTGGGCGTGTACTACGACGACGAAGGCCGCATCCC
|
||||||
|
GCTGCTTCAGGCCGTGCGCAAGGCCGAGGTGGCCCGCATCGAAGCCGCCGCCGCCCGCGGCTATCTGCCG
|
||||||
|
ATCGAAGGCATCGCGGGGTACAACAAGGGTGCGCAGGCGCTGCTGCTGGGCGCCGACTCGCCGCTGGCCG
|
||||||
|
CCGAAGGCCGCGTGCTGACCGCGCAGGCCCTGGGCGGCACCGGCGCGCTGAAGATCGGCGCCGACTTCCT
|
||||||
|
GCGCCAGCTGCTGCCGCAGTCCAAGGTCCTCATCAGCGACCCCAGCTGGGAAAACCACCGCGCCCTGTTC
|
||||||
|
GAGCGCGCCGGCTTCCCGGTCGAGACCTACGCTTATTACGATGCCGCCACCCATGGCCTGAACTTCGAAG
|
||||||
|
CCATGCTGGCCGCCCTGCAGGCCGCGCCCGAACAGACCATCGTGGTGCTGCACGCCTGCTGCCACAACCC
|
||||||
|
GACCGGCGTCGATCCCACGCCGCAACAGTGGGAACAGATCGCCGCCGTGGTCAAGGCGCGCAACCTGGTG
|
||||||
|
CCGTTCCTCGACATCGCCTACCAGGGCTTCGGCGAAGGCCTGGAGCAGGACGCCGCCGTGGTGCGCATGT
|
||||||
|
TCGCCGCGCTCGACCTGACCATGTTCATCAGCTCGTCGTTCTCCAAGTCCTTCTCGCTGTATGGCGAGCG
|
||||||
|
GGTCGGGGCCCTGACCGTGGTGGCCGGCAGCAAGGACGAGGCCGCCCGCGTGCTCAGCCAGCTCAAGCGC
|
||||||
|
GTGATCCGCACCAACTACTCCAACCCGCCCACCCACGGCGGCACCGTGGTGTCCACGGTCCTGAACACAC
|
||||||
|
CCGAGCTGTTCGCGCTCTGGGAAAATGAACTGGCCGGCATGCGCGACCGCATCCGCCTGATGCGCAAGGA
|
||||||
|
GCTGGTCGAGAAGATCAAGACCCAGGGCGTGGCGCAGGACTTCAGCTTCGTGCTGGCGCAGCGCGGCATG
|
||||||
|
TTCTCGTACTCGGGCCTGACCGCCGCCCAGGTCGATCGCCTGCGCGAAGAGCACGGCATCTACGCGGTCT
|
||||||
|
CCAGCGGCCGCATCTGCGTGGCCGCGCTCAACAGCCGCAACATCGACGCGGTCGCGGCCGGCATCGCCGC
|
||||||
|
GGTGCTGAAGTAG
|
2
test-data/tohama_I_minimized_features_typed.csv
Normal file
2
test-data/tohama_I_minimized_features_typed.csv
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
id,st,clonal-complex,adk,fumC,glyA,icd,pepA,pgm,tyrB
|
||||||
|
lcl|BX640419.1_cds_CAE43044.1_2724-lcl|BX640411.1_cds_CAE40628.1_248-lcl|BX640420.1_cds_CAE43224.1_2904-lcl|BX640418.1_cds_CAE42760.1_2440-lcl|BX640418.1_cds_CAE42692.1_2372-lcl|BX640420.1_cds_CAE43408.1_3088-lcl|BX640416.1_cds_CAE42081.1_1761,1,ST-2 complex,1,1,1,1,1,1,1
|
|
Reference in New Issue
Block a user