Added code to retrieve sequences and annotations from NCBI GenBank
This commit is contained in:
parent
ba2b688e89
commit
a27e09da31
26
src/autobigs/engine/analysis/genbank.py
Normal file
26
src/autobigs/engine/analysis/genbank.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
import asyncio
|
||||||
|
from contextlib import AbstractAsyncContextManager
|
||||||
|
import tempfile
|
||||||
|
from typing import Iterable, Union
|
||||||
|
from Bio import Entrez
|
||||||
|
from Bio import SeqIO
|
||||||
|
|
||||||
|
from autobigs.engine.structures.genomics import AnnotatedString, StringAnnotation
|
||||||
|
|
||||||
|
async def fetch_ncbi_genbank(genbank_id: str) -> AnnotatedString:
|
||||||
|
with (await asyncio.to_thread(Entrez.efetch, db="nucleotide", id=genbank_id, rettype="gb", retmode="text")) as fetch_stream:
|
||||||
|
record = SeqIO.read(fetch_stream, "genbank")
|
||||||
|
sequence_features = list()
|
||||||
|
for feature in record.features:
|
||||||
|
start = int(feature.location.start)
|
||||||
|
end = int(feature.location.end)
|
||||||
|
qualifiers = feature.qualifiers
|
||||||
|
for qualifier_key in qualifiers:
|
||||||
|
qualifiers[qualifier_key] = set(qualifiers[qualifier_key])
|
||||||
|
sequence_features.append(StringAnnotation(
|
||||||
|
type=feature.type,
|
||||||
|
start=start,
|
||||||
|
end=end+1, # Position is exclusive
|
||||||
|
feature_properties=qualifiers
|
||||||
|
))
|
||||||
|
return AnnotatedString(name=genbank_id, sequence=str(record.seq), annotations=sequence_features)
|
Loading…
x
Reference in New Issue
Block a user