27 lines
1.1 KiB
Python
27 lines
1.1 KiB
Python
import asyncio
|
|
from Bio import Entrez
|
|
from Bio import SeqIO
|
|
|
|
# TODO Change this out for a more professional approach
|
|
Entrez.email = "yunyangdeng@outlook.com"
|
|
|
|
from automlst.engine.data.genomics import AnnotatedString, StringAnnotation
|
|
|
|
|
|
async def fetch_ncbi_genbank(genbank_id: str) -> AnnotatedString:
|
|
with (await asyncio.to_thread(Entrez.efetch, db="nucleotide", id=genbank_id, rettype="gb", retmode="text")) as fetch_stream:
|
|
record = SeqIO.read(fetch_stream, "genbank")
|
|
sequence_features = list()
|
|
for feature in record.features:
|
|
start = int(feature.location.start)
|
|
end = int(feature.location.end)
|
|
qualifiers = feature.qualifiers
|
|
for qualifier_key in qualifiers:
|
|
qualifiers[qualifier_key] = set(qualifiers[qualifier_key])
|
|
sequence_features.append(StringAnnotation(
|
|
type=feature.type,
|
|
start=start,
|
|
end=end+1, # Position is exclusive
|
|
feature_properties=qualifiers
|
|
))
|
|
return AnnotatedString(name=genbank_id, sequence=str(record.seq), annotations=sequence_features) |