Finished writing genbank fetching function
This commit is contained in:
parent
5c7a67b35e
commit
7b079650e0
24
src/mlstmyfasta/engine/annotations/wgs.py
Normal file
24
src/mlstmyfasta/engine/annotations/wgs.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
from typing import Any, Generator, List, Sequence
|
||||||
|
from Bio.Align import PairwiseAligner
|
||||||
|
from Bio import Entrez
|
||||||
|
from Bio import SeqIO
|
||||||
|
|
||||||
|
from mlstmyfasta.engine.data.genomics import Strand, StrandFeature
|
||||||
|
|
||||||
|
async def fetch_ncbi_genbank(genbank_id: str) -> Strand:
|
||||||
|
with Entrez.efetch(db="nucleotide", id=genbank_id, rettype="gb", retmode="text") as fetch_stream:
|
||||||
|
record = SeqIO.read(fetch_stream, "genbank")
|
||||||
|
sequence_features = list()
|
||||||
|
for feature in record.features:
|
||||||
|
start, end = feature.location.split("..")
|
||||||
|
start = int(start)
|
||||||
|
end = int(end)
|
||||||
|
feature_properties = dict()
|
||||||
|
for qualifier in feature.qualifiers:
|
||||||
|
feature_properties[qualifier.key] = qualifier.value
|
||||||
|
sequence_features.append(StrandFeature(name=feature.key,
|
||||||
|
start=start,
|
||||||
|
end=end,
|
||||||
|
feature_properties=feature_properties
|
||||||
|
))
|
||||||
|
return Strand(name=genbank_id, coding=record.sequence, features=sequence_features)
|
@ -1,12 +1,18 @@
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from typing import Mapping, Sequence
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Sequence:
|
class StrandFeature:
|
||||||
name: str
|
name: str
|
||||||
sequence: str
|
start: int
|
||||||
|
end: int
|
||||||
|
feature_properties: Mapping[str, str]
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class SequenceFeature:
|
class Strand:
|
||||||
type: str
|
|
||||||
name: str
|
name: str
|
||||||
|
coding: str
|
||||||
|
features: Sequence[StrandFeature]
|
||||||
|
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user