Completed NCBI lookup, FASTA reading, ABIF reading, and Institut Pasteur querying
This commit is contained in:
parent
b834aa93b0
commit
362e0867e5
@ -1,4 +1,5 @@
|
|||||||
aiohttp[speedups]
|
aiohttp[speedups]
|
||||||
biopython
|
biopython
|
||||||
pytest
|
pytest
|
||||||
pytest-asyncio
|
pytest-asyncio
|
||||||
|
dataclasses-json
|
@ -6,30 +6,8 @@ from Bio import Entrez
|
|||||||
from Bio import SeqIO
|
from Bio import SeqIO
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
from mlstmyfasta.engine.data.genomics import AnnotatedString, StringAnnotation
|
||||||
# TODO Change this out for a more professional approach
|
from mlstmyfasta.engine.remote.databases.ncbi.genbank import fetch_ncbi_genbank
|
||||||
Entrez.email = "yunyangdeng@outlook.com"
|
|
||||||
|
|
||||||
from mlstmyfasta.engine.data.genomics import AnnotatedString, StringAnnotation, get_feature_coding
|
|
||||||
|
|
||||||
|
|
||||||
async def fetch_ncbi_genbank(genbank_id: str) -> AnnotatedString:
|
|
||||||
with (await asyncio.to_thread(Entrez.efetch, db="nucleotide", id=genbank_id, rettype="gb", retmode="text")) as fetch_stream:
|
|
||||||
record = SeqIO.read(fetch_stream, "genbank")
|
|
||||||
sequence_features = list()
|
|
||||||
for feature in record.features:
|
|
||||||
start = int(feature.location.start)
|
|
||||||
end = int(feature.location.end)
|
|
||||||
qualifiers = feature.qualifiers
|
|
||||||
for qualifier_key in qualifiers:
|
|
||||||
qualifiers[qualifier_key] = set(qualifiers[qualifier_key])
|
|
||||||
sequence_features.append(StringAnnotation(
|
|
||||||
type=feature.type,
|
|
||||||
start=start,
|
|
||||||
end=end+1, # Position is exclusive
|
|
||||||
feature_properties=qualifiers
|
|
||||||
))
|
|
||||||
return AnnotatedString(name=genbank_id, sequence=str(record.seq), annotations=sequence_features)
|
|
||||||
|
|
||||||
|
|
||||||
async def annotate_from_genbank(genbank_id: str, query_name: str, query_string: str, max_annotation_length:int = 512, gene_targets:Set = set()):
|
async def annotate_from_genbank(genbank_id: str, query_name: str, query_string: str, max_annotation_length:int = 512, gene_targets:Set = set()):
|
||||||
@ -60,4 +38,7 @@ async def annotate_from_genbank(genbank_id: str, query_name: str, query_string:
|
|||||||
end=np.max(top_alignment.aligned[0]), # and the end of the last chunk
|
end=np.max(top_alignment.aligned[0]), # and the end of the last chunk
|
||||||
feature_properties=dict(annotation.feature_properties) # same as original
|
feature_properties=dict(annotation.feature_properties) # same as original
|
||||||
))
|
))
|
||||||
return AnnotatedString(name=query_name, sequence=query_string, annotations=query_annotations)
|
return AnnotatedString(name=query_name, sequence=query_string, annotations=query_annotations)
|
||||||
|
|
||||||
|
def get_feature_coding(annotated_string: AnnotatedString, string_annotation: StringAnnotation) -> str:
|
||||||
|
return annotated_string.sequence[string_annotation.start:string_annotation.end]
|
@ -2,5 +2,5 @@ from dataclasses import dataclass
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Allele:
|
class Allele:
|
||||||
allele_name: str
|
allele_loci: str
|
||||||
allele_variant: str
|
allele_variant: str
|
@ -1,5 +1,6 @@
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Mapping, Sequence, Set
|
from numbers import Number
|
||||||
|
from typing import Mapping, Sequence, Set, Union
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@ -8,12 +9,97 @@ class StringAnnotation:
|
|||||||
start: int
|
start: int
|
||||||
end: int
|
end: int
|
||||||
feature_properties: Mapping[str, Set[str]]
|
feature_properties: Mapping[str, Set[str]]
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class AnnotatedString:
|
class NamedString:
|
||||||
name: str
|
name: str
|
||||||
sequence: str
|
sequence: str
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AnnotatedString(NamedString):
|
||||||
annotations: Sequence[StringAnnotation]
|
annotations: Sequence[StringAnnotation]
|
||||||
|
|
||||||
def get_feature_coding(annotated_string: AnnotatedString, string_annotation: StringAnnotation) -> str:
|
@dataclass
|
||||||
return annotated_string.sequence[string_annotation.start:string_annotation.end]
|
class SangerTraceData:
|
||||||
|
sequence: Sequence[str]
|
||||||
|
seq_param_file_name: str
|
||||||
|
analysis_proto_settings_name: str
|
||||||
|
analysis_rpto_settings_ver: str
|
||||||
|
analysis_proto_xml_data: str
|
||||||
|
analysis_proto_xml_schema_ver: str
|
||||||
|
sample_comment: Union[None, str]
|
||||||
|
capillary_machine: bool
|
||||||
|
container_identifier: str
|
||||||
|
container_name: str
|
||||||
|
comment_title: str
|
||||||
|
channel_1: Sequence[Number]
|
||||||
|
channel_2: Sequence[Number]
|
||||||
|
channel_3: Sequence[Number]
|
||||||
|
channel_4: Sequence[Number]
|
||||||
|
measured_voltage_dv: Sequence[Number]
|
||||||
|
measured_current_ma: Sequence[Number]
|
||||||
|
measured_power_mw: Sequence[Number]
|
||||||
|
measured_temperature_celsius: Sequence[Number]
|
||||||
|
down_sample_factor: Number
|
||||||
|
dye_1: str
|
||||||
|
dye_2: str
|
||||||
|
dye_3: str
|
||||||
|
dye_4: str
|
||||||
|
dye_wavelength_1: str
|
||||||
|
dye_wavelength_2: str
|
||||||
|
dye_wavelength_3: str
|
||||||
|
dye_wavelength_4: str
|
||||||
|
dye_set_name: str
|
||||||
|
electrophoresis_voltage_setting_v: Number
|
||||||
|
start_run_event: str
|
||||||
|
stop_run_event: str
|
||||||
|
start_collection_event: str
|
||||||
|
stop_collection_event: str
|
||||||
|
base_order: Sequence[str]
|
||||||
|
gel_type_desc: str
|
||||||
|
injection_time_sec: Number
|
||||||
|
inection_voltage_v: Number
|
||||||
|
lane_or_capillary: Number
|
||||||
|
sample_tracking_id: str
|
||||||
|
length_to_detector_cm: Number
|
||||||
|
laser_power_mw: Number
|
||||||
|
instrument_name_and_serial: str
|
||||||
|
data_collection_module_file: str
|
||||||
|
model_number: str
|
||||||
|
pixels_avg_per_lane: Number
|
||||||
|
number_of_capillaries: Number
|
||||||
|
marked_off_scale_scans: Union[None, Sequence[Number]]
|
||||||
|
# Skipped Ovrl, OvrV
|
||||||
|
mobility_file: str
|
||||||
|
# Skipped PRJT, PROJ
|
||||||
|
pixel_bin_size: Number
|
||||||
|
# Skipped scan rate
|
||||||
|
results_group_comment: Union[None, str]
|
||||||
|
results_group_name: str
|
||||||
|
run_module_ver: str
|
||||||
|
run_module_xml: str
|
||||||
|
run_module_xml_ver: str
|
||||||
|
run_proto_name: str
|
||||||
|
run_proto_ver: str
|
||||||
|
run_start_date: str # Date time object
|
||||||
|
run_stop_date: str # Date time object
|
||||||
|
data_collection_start_date: str
|
||||||
|
data_collection_stop_date: str
|
||||||
|
run_name: str
|
||||||
|
run_start_time: str # time object
|
||||||
|
run_stop_time: str # time object
|
||||||
|
collection_start_time: str # time object
|
||||||
|
collection_stop_time: str # time object
|
||||||
|
saturated_data_points: Union[None, Sequence[Number]]
|
||||||
|
color_rescaling_divisor: Number
|
||||||
|
scan_count: Number
|
||||||
|
polymer_lot_expiration: str # date time object
|
||||||
|
polymer_lot_number: Number
|
||||||
|
sample_name: str
|
||||||
|
# Skipped genescan data
|
||||||
|
# Skipped size standard file name
|
||||||
|
data_collection_software_ver: str
|
||||||
|
data_collection_firmware_ver: str
|
||||||
|
run_temperature_setting_celcius: Number
|
||||||
|
well_id: str
|
||||||
|
plate_user_name: str
|
||||||
|
104
src/mlstmyfasta/engine/local/abif.py
Normal file
104
src/mlstmyfasta/engine/local/abif.py
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
import asyncio
|
||||||
|
from numbers import Number
|
||||||
|
from os import path
|
||||||
|
from typing import Sequence, Union
|
||||||
|
from mlstmyfasta.engine.data.genomics import SangerTraceData
|
||||||
|
from Bio.SeqRecord import SeqRecord
|
||||||
|
from Bio import SeqIO
|
||||||
|
|
||||||
|
|
||||||
|
def _biopython_read_abif_sequence(seq_path: str) -> SeqRecord:
|
||||||
|
with open(seq_path, "rb") as seq_handle:
|
||||||
|
return SeqIO.read(seq_handle, "abi")
|
||||||
|
|
||||||
|
|
||||||
|
async def load_sanger_sequence(seq_path: str) -> SangerTraceData:
|
||||||
|
ext = path.splitext(seq_path)[1]
|
||||||
|
if ext.lower() != ".ab1" and ext.lower() != "abi":
|
||||||
|
raise ValueError(
|
||||||
|
'seq_path must have file extension of "ab1", or "abi".')
|
||||||
|
biopython_seq = await asyncio.to_thread(_biopython_read_abif_sequence, seq_path)
|
||||||
|
biopython_annotations = biopython_seq.annotations
|
||||||
|
|
||||||
|
# Lot of type ignoring since Biopython did not define their typing.
|
||||||
|
biopython_abif_raw = biopython_annotations["abif_raw"] # type: ignore
|
||||||
|
trace_data = SangerTraceData(
|
||||||
|
biopython_seq.seq,
|
||||||
|
biopython_abif_raw.get("APFN2"), # type: ignore
|
||||||
|
biopython_abif_raw.get("APrN1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("APrV1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("APrX1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("APXV1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("CMNT1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("CpEP1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("CTID1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("CTNM1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("CTTL1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("DATA1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("DATA2"), # type: ignore
|
||||||
|
biopython_abif_raw.get("DATA3"), # type: ignore
|
||||||
|
biopython_abif_raw.get("DATA4"), # type: ignore
|
||||||
|
biopython_abif_raw.get("DATA5"), # type: ignore
|
||||||
|
biopython_abif_raw.get("DATA6"), # type: ignore
|
||||||
|
biopython_abif_raw.get("DATA7"), # type: ignore
|
||||||
|
biopython_abif_raw.get("DATA8"), # type: ignore
|
||||||
|
biopython_abif_raw.get("DSam1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("DyeN1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("DyeN2"), # type: ignore
|
||||||
|
biopython_abif_raw.get("DyeN3"), # type: ignore
|
||||||
|
biopython_abif_raw.get("DyeN4"), # type: ignore
|
||||||
|
biopython_abif_raw.get("DyeW1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("DyeW2"), # type: ignore
|
||||||
|
biopython_abif_raw.get("DyeW3"), # type: ignore
|
||||||
|
biopython_abif_raw.get("DyeW4"), # type: ignore
|
||||||
|
biopython_abif_raw.get("DySN1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("EPVt1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("EVNT1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("EVNT2"), # type: ignore
|
||||||
|
biopython_abif_raw.get("EVNT3"), # type: ignore
|
||||||
|
biopython_abif_raw.get("EVNT4"), # type: ignore
|
||||||
|
biopython_abif_raw.get("FWO_1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("GTyp1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("InSc1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("InVt1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("LANE1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("LIMS1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("LNTD1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("LsrP1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("MCHN1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("MODF1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("MODL1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("NAVG1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("NLNE1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("OfSc1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("PDMF1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("PXLB1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("RGCm1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("RGNm1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("RMdV1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("RMdX1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("RMXV1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("RPrN1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("RPrV1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("RUND1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("RUND2"), # type: ignore
|
||||||
|
biopython_abif_raw.get("RUND3"), # type: ignore
|
||||||
|
biopython_abif_raw.get("RUND4"), # type: ignore
|
||||||
|
biopython_abif_raw.get("RunN1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("RUNT1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("RUNT2"), # type: ignore
|
||||||
|
biopython_abif_raw.get("RUNT3"), # type: ignore
|
||||||
|
biopython_abif_raw.get("RUNT4"), # type: ignore
|
||||||
|
biopython_abif_raw.get("Satd"), # type: ignore
|
||||||
|
biopython_abif_raw.get("Scal1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("SCAN1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("SMED1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("SMLt"), # type: ignore
|
||||||
|
biopython_abif_raw.get("SMPL1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("SVER1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("SVER3"), # type: ignore
|
||||||
|
biopython_abif_raw.get("Tmpr1"), # type: ignore
|
||||||
|
biopython_abif_raw.get("TUBE"), # type: ignore
|
||||||
|
biopython_abif_raw.get("User") # type: ignore
|
||||||
|
)
|
||||||
|
return trace_data
|
11
src/mlstmyfasta/engine/local/fasta.py
Normal file
11
src/mlstmyfasta/engine/local/fasta.py
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
import asyncio
|
||||||
|
from io import TextIOWrapper
|
||||||
|
from typing import Any, AsyncGenerator, Generator, Sequence, Union
|
||||||
|
from Bio import SeqIO
|
||||||
|
|
||||||
|
from mlstmyfasta.engine.data.genomics import NamedString
|
||||||
|
|
||||||
|
async def read_fasta(handle: Union[str, TextIOWrapper]) -> AsyncGenerator[NamedString, Any]:
|
||||||
|
fasta_sequences = asyncio.to_thread(SeqIO.parse, handle=handle, format="fasta")
|
||||||
|
for fasta_sequence in await fasta_sequences:
|
||||||
|
yield NamedString(fasta_sequence.id, str(fasta_sequence.seq))
|
@ -0,0 +1,36 @@
|
|||||||
|
from contextlib import AbstractAsyncContextManager
|
||||||
|
import re
|
||||||
|
from typing import Sequence
|
||||||
|
from aiohttp import ClientSession, ClientTimeout
|
||||||
|
from mlstmyfasta.engine.data.MLST import Allele
|
||||||
|
from mlstmyfasta.engine.data.genomics import NamedString
|
||||||
|
from mlstmyfasta.engine.remote.databases.institutpasteur.structures import InstitutPasteurSequenceRequest, InstitutPasteurSequenceResponse
|
||||||
|
|
||||||
|
class InstitutPasteurProfiler(AbstractAsyncContextManager):
|
||||||
|
|
||||||
|
async def __aenter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
|
def __init__(self, database_name: str):
|
||||||
|
self._base_url = f"https://bigsdb.pasteur.fr/api/db/{database_name}/"
|
||||||
|
self._http_client = ClientSession(self._base_url, timeout=ClientTimeout(10000))
|
||||||
|
|
||||||
|
async def fetch_mlst_profile(self, sequence_string: str):
|
||||||
|
# See https://bigsdb.pasteur.fr/api/db/pubmlst_bordetella_seqdef/schemes
|
||||||
|
uri_path = f"schemes/3/sequence"
|
||||||
|
response = await self._http_client.post(uri_path, json={
|
||||||
|
"sequence": sequence_string
|
||||||
|
})
|
||||||
|
sequence_response: dict = await response.json()
|
||||||
|
exact_matches: dict[str, Sequence[dict[str, str]]] = sequence_response["exact_matches"]
|
||||||
|
for allele_loci, alleles in exact_matches.items():
|
||||||
|
for allele in alleles:
|
||||||
|
alelle_id = allele["allele_id"]
|
||||||
|
yield Allele(allele_loci=allele_loci, allele_variant=alelle_id)
|
||||||
|
|
||||||
|
async def close(self):
|
||||||
|
await self._http_client.close()
|
||||||
|
|
||||||
|
async def __aexit__(self, exc_type, exc_value, traceback):
|
||||||
|
await self.close()
|
@ -0,0 +1,22 @@
|
|||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
from dataclasses_json import dataclass_json
|
||||||
|
|
||||||
|
@dataclass_json
|
||||||
|
@dataclass
|
||||||
|
class InstitutPasteurSequenceRequest:
|
||||||
|
sequence: str
|
||||||
|
details: Union[bool, None]
|
||||||
|
partial_matches: Union[bool, None]
|
||||||
|
base64: Union[bool, None]
|
||||||
|
|
||||||
|
@dataclass_json
|
||||||
|
@dataclass
|
||||||
|
class InstitutPasteurSequenceResponse:
|
||||||
|
exact_matches: Sequence[tuple[int, str]]
|
||||||
|
start: Union[int, None]
|
||||||
|
end: Union[int, None]
|
||||||
|
orientation: Union[str, None]
|
||||||
|
length: Union[int, None]
|
||||||
|
contig: Union[str, None]
|
27
src/mlstmyfasta/engine/remote/databases/ncbi/genbank.py
Normal file
27
src/mlstmyfasta/engine/remote/databases/ncbi/genbank.py
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
import asyncio
|
||||||
|
from Bio import Entrez
|
||||||
|
from Bio import SeqIO
|
||||||
|
|
||||||
|
# TODO Change this out for a more professional approach
|
||||||
|
Entrez.email = "yunyangdeng@outlook.com"
|
||||||
|
|
||||||
|
from mlstmyfasta.engine.data.genomics import AnnotatedString, StringAnnotation
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_ncbi_genbank(genbank_id: str) -> AnnotatedString:
|
||||||
|
with (await asyncio.to_thread(Entrez.efetch, db="nucleotide", id=genbank_id, rettype="gb", retmode="text")) as fetch_stream:
|
||||||
|
record = SeqIO.read(fetch_stream, "genbank")
|
||||||
|
sequence_features = list()
|
||||||
|
for feature in record.features:
|
||||||
|
start = int(feature.location.start)
|
||||||
|
end = int(feature.location.end)
|
||||||
|
qualifiers = feature.qualifiers
|
||||||
|
for qualifier_key in qualifiers:
|
||||||
|
qualifiers[qualifier_key] = set(qualifiers[qualifier_key])
|
||||||
|
sequence_features.append(StringAnnotation(
|
||||||
|
type=feature.type,
|
||||||
|
start=start,
|
||||||
|
end=end+1, # Position is exclusive
|
||||||
|
feature_properties=qualifiers
|
||||||
|
))
|
||||||
|
return AnnotatedString(name=genbank_id, sequence=str(record.seq), annotations=sequence_features)
|
@ -1,18 +0,0 @@
|
|||||||
from dataclasses import dataclass
|
|
||||||
from typing import Sequence, Union
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class SequenceRequestBody:
|
|
||||||
sequence: str
|
|
||||||
details: Union[None, str]
|
|
||||||
partial_matches: Union[None, bool]
|
|
||||||
base64: Union[None, bool]
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class SequenceResponse:
|
|
||||||
exact_matches: Sequence[tuple[str, str]]
|
|
||||||
start: Union[None, int]
|
|
||||||
end: Union[None, int]
|
|
||||||
orientation: Union[None, str]
|
|
||||||
length: Union[None, int]
|
|
||||||
contig: Union[None, str]
|
|
8
tests/mlstmyfasta/engine/local/test_abif.py
Normal file
8
tests/mlstmyfasta/engine/local/test_abif.py
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
from mlstmyfasta.engine.local.abif import load_sanger_sequence
|
||||||
|
|
||||||
|
async def test_load_sanger_sequence_has_data():
|
||||||
|
assert os.path.exists("tests/resources/1I1_F_P1815443_047.ab1")
|
||||||
|
result_data = await load_sanger_sequence("tests/resources/1I1_F_P1815443_047.ab1")
|
||||||
|
assert result_data is not None
|
7
tests/mlstmyfasta/engine/local/test_fasta.py
Normal file
7
tests/mlstmyfasta/engine/local/test_fasta.py
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
from mlstmyfasta.engine.local.fasta import read_fasta
|
||||||
|
|
||||||
|
|
||||||
|
async def test_fasta_reader_not_none():
|
||||||
|
named_strings = read_fasta("tests/resources/tohama_I_bpertussis.fasta")
|
||||||
|
async for named_string in named_strings:
|
||||||
|
assert named_string.name == "BX470248.1"
|
@ -0,0 +1,16 @@
|
|||||||
|
from Bio import SeqIO
|
||||||
|
from mlstmyfasta.engine.data.MLST import Allele
|
||||||
|
from mlstmyfasta.engine.remote.databases.institutpasteur.profiling import InstitutPasteurProfiler
|
||||||
|
|
||||||
|
|
||||||
|
async def test_profiling_results_in_exact_matches_when_exact():
|
||||||
|
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
||||||
|
async with InstitutPasteurProfiler(database_name="pubmlst_bordetella_seqdef") as dummy_profiler:
|
||||||
|
exact_matches = dummy_profiler.fetch_mlst_profile(sequence_string=sequence)
|
||||||
|
targets_left = {"adk", "fumC", "glyA", "tyrB", "icd", "pepA", "pgm"}
|
||||||
|
async for exact_match in exact_matches:
|
||||||
|
assert isinstance(exact_match, Allele)
|
||||||
|
assert exact_match.allele_variant == '1' # All of Tohama I has allele id I
|
||||||
|
targets_left.remove(exact_match.allele_loci)
|
||||||
|
|
||||||
|
assert len(targets_left) == 0
|
@ -0,0 +1,5 @@
|
|||||||
|
from mlstmyfasta.engine.remote.databases.ncbi.genbank import fetch_ncbi_genbank
|
||||||
|
|
||||||
|
|
||||||
|
async def test_fetch_ncbi_genbank_with_id_works():
|
||||||
|
assert len((await fetch_ncbi_genbank("CP011448.1")).sequence) > 0
|
@ -1,14 +1,11 @@
|
|||||||
from mlstmyfasta.engine.annotations import annotate_from_genbank, fetch_ncbi_genbank
|
from mlstmyfasta.engine.annotate import annotate_from_genbank, fetch_ncbi_genbank
|
||||||
from Bio import SeqIO
|
from Bio import SeqIO
|
||||||
|
|
||||||
from mlstmyfasta.engine.data.genomics import AnnotatedString
|
from mlstmyfasta.engine.data.genomics import AnnotatedString
|
||||||
|
|
||||||
async def test_fetch_ncbi_genbank_with_id_works():
|
|
||||||
assert len((await fetch_ncbi_genbank("CP011448.1")).sequence) > 0
|
|
||||||
|
|
||||||
async def test_annotate_from_genbank_for_adk_annotation():
|
async def test_annotate_from_genbank_for_adk_annotation():
|
||||||
sequence = SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq
|
sequence = str(SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq)
|
||||||
annotated_sequence = await annotate_from_genbank("CP011448.1", "bpertussis_tohamaI", str(sequence), max_annotation_length=750, gene_targets=set(["adk"]))
|
annotated_sequence = await annotate_from_genbank("CP011448.1", "bpertussis_tohamaI", sequence, max_annotation_length=750, gene_targets=set(["adk"]))
|
||||||
assert isinstance(annotated_sequence, AnnotatedString)
|
assert isinstance(annotated_sequence, AnnotatedString)
|
||||||
assert len(annotated_sequence.annotations) >= 1
|
assert len(annotated_sequence.annotations) >= 1
|
||||||
assert annotated_sequence.annotations[0].type == "gene"
|
assert annotated_sequence.annotations[0].type == "gene"
|
BIN
tests/resources/1I1_F_P1815443_047.ab1
Normal file
BIN
tests/resources/1I1_F_P1815443_047.ab1
Normal file
Binary file not shown.
BIN
tests/resources/1I1_R_P1815443_094.ab1
Normal file
BIN
tests/resources/1I1_R_P1815443_094.ab1
Normal file
Binary file not shown.
BIN
tests/resources/1I2_F_P1815443_048.ab1
Normal file
BIN
tests/resources/1I2_F_P1815443_048.ab1
Normal file
Binary file not shown.
BIN
tests/resources/1I2_R_P1815443_011.ab1
Normal file
BIN
tests/resources/1I2_R_P1815443_011.ab1
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user