Finished writing genbank fetching function
This commit is contained in:
parent
5c7a67b35e
commit
7b079650e0
24
src/mlstmyfasta/engine/annotations/wgs.py
Normal file
24
src/mlstmyfasta/engine/annotations/wgs.py
Normal file
@ -0,0 +1,24 @@
|
||||
from typing import Any, Generator, List, Sequence
|
||||
from Bio.Align import PairwiseAligner
|
||||
from Bio import Entrez
|
||||
from Bio import SeqIO
|
||||
|
||||
from mlstmyfasta.engine.data.genomics import Strand, StrandFeature
|
||||
|
||||
async def fetch_ncbi_genbank(genbank_id: str) -> Strand:
|
||||
with Entrez.efetch(db="nucleotide", id=genbank_id, rettype="gb", retmode="text") as fetch_stream:
|
||||
record = SeqIO.read(fetch_stream, "genbank")
|
||||
sequence_features = list()
|
||||
for feature in record.features:
|
||||
start, end = feature.location.split("..")
|
||||
start = int(start)
|
||||
end = int(end)
|
||||
feature_properties = dict()
|
||||
for qualifier in feature.qualifiers:
|
||||
feature_properties[qualifier.key] = qualifier.value
|
||||
sequence_features.append(StrandFeature(name=feature.key,
|
||||
start=start,
|
||||
end=end,
|
||||
feature_properties=feature_properties
|
||||
))
|
||||
return Strand(name=genbank_id, coding=record.sequence, features=sequence_features)
|
@ -1,12 +1,18 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import Mapping, Sequence
|
||||
|
||||
|
||||
@dataclass
|
||||
class Sequence:
|
||||
class StrandFeature:
|
||||
name: str
|
||||
sequence: str
|
||||
|
||||
start: int
|
||||
end: int
|
||||
feature_properties: Mapping[str, str]
|
||||
|
||||
@dataclass
|
||||
class SequenceFeature:
|
||||
type: str
|
||||
class Strand:
|
||||
name: str
|
||||
coding: str
|
||||
features: Sequence[StrandFeature]
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user