Finished writing genbank fetching function

This commit is contained in:
Harrison Deng 2025-01-02 18:11:42 +00:00
parent 5c7a67b35e
commit 7b079650e0
2 changed files with 35 additions and 5 deletions

View File

@ -0,0 +1,24 @@
from typing import Any, Generator, List, Sequence
from Bio.Align import PairwiseAligner
from Bio import Entrez
from Bio import SeqIO
from mlstmyfasta.engine.data.genomics import Strand, StrandFeature
async def fetch_ncbi_genbank(genbank_id: str) -> Strand:
with Entrez.efetch(db="nucleotide", id=genbank_id, rettype="gb", retmode="text") as fetch_stream:
record = SeqIO.read(fetch_stream, "genbank")
sequence_features = list()
for feature in record.features:
start, end = feature.location.split("..")
start = int(start)
end = int(end)
feature_properties = dict()
for qualifier in feature.qualifiers:
feature_properties[qualifier.key] = qualifier.value
sequence_features.append(StrandFeature(name=feature.key,
start=start,
end=end,
feature_properties=feature_properties
))
return Strand(name=genbank_id, coding=record.sequence, features=sequence_features)

View File

@ -1,12 +1,18 @@
from dataclasses import dataclass from dataclasses import dataclass
from typing import Mapping, Sequence
@dataclass @dataclass
class Sequence: class StrandFeature:
name: str name: str
sequence: str start: int
end: int
feature_properties: Mapping[str, str]
@dataclass @dataclass
class SequenceFeature: class Strand:
type: str
name: str name: str
coding: str
features: Sequence[StrandFeature]