Added code to retrieve sequences and annotations from NCBI GenBank
This commit is contained in:
parent
ba2b688e89
commit
a27e09da31
26
src/autobigs/engine/analysis/genbank.py
Normal file
26
src/autobigs/engine/analysis/genbank.py
Normal file
@ -0,0 +1,26 @@
|
||||
import asyncio
|
||||
from contextlib import AbstractAsyncContextManager
|
||||
import tempfile
|
||||
from typing import Iterable, Union
|
||||
from Bio import Entrez
|
||||
from Bio import SeqIO
|
||||
|
||||
from autobigs.engine.structures.genomics import AnnotatedString, StringAnnotation
|
||||
|
||||
async def fetch_ncbi_genbank(genbank_id: str) -> AnnotatedString:
|
||||
with (await asyncio.to_thread(Entrez.efetch, db="nucleotide", id=genbank_id, rettype="gb", retmode="text")) as fetch_stream:
|
||||
record = SeqIO.read(fetch_stream, "genbank")
|
||||
sequence_features = list()
|
||||
for feature in record.features:
|
||||
start = int(feature.location.start)
|
||||
end = int(feature.location.end)
|
||||
qualifiers = feature.qualifiers
|
||||
for qualifier_key in qualifiers:
|
||||
qualifiers[qualifier_key] = set(qualifiers[qualifier_key])
|
||||
sequence_features.append(StringAnnotation(
|
||||
type=feature.type,
|
||||
start=start,
|
||||
end=end+1, # Position is exclusive
|
||||
feature_properties=qualifiers
|
||||
))
|
||||
return AnnotatedString(name=genbank_id, sequence=str(record.seq), annotations=sequence_features)
|
Loading…
x
Reference in New Issue
Block a user