Removed unused code
This commit is contained in:
parent
02985d5e37
commit
e9ffab5526
@ -1,44 +0,0 @@
|
|||||||
import asyncio
|
|
||||||
from collections.abc import Set
|
|
||||||
from typing import Any, Generator, List, Sequence
|
|
||||||
from Bio.Align import PairwiseAligner
|
|
||||||
from Bio import Entrez
|
|
||||||
from Bio import SeqIO
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from automlst.engine.data.genomics import AnnotatedString, StringAnnotation
|
|
||||||
from automlst.engine.remote.databases.ncbi.genbank import fetch_ncbi_genbank
|
|
||||||
|
|
||||||
|
|
||||||
async def annotate_from_genbank(genbank_id: str, query_name: str, query_string: str, max_annotation_length:int = 512, gene_targets:Set = set()):
|
|
||||||
# TODO implement asynchronous alignment algorithm
|
|
||||||
reference_annotations = await fetch_ncbi_genbank(genbank_id=genbank_id)
|
|
||||||
query_annotations = list()
|
|
||||||
aligner = PairwiseAligner("blastn")
|
|
||||||
aligner.mode = "local"
|
|
||||||
for annotation in reference_annotations.annotations:
|
|
||||||
if annotation.type != "gene" or "gene" not in annotation.feature_properties:
|
|
||||||
continue
|
|
||||||
if len(gene_targets) > 0 and "gene" in annotation.feature_properties:
|
|
||||||
if not annotation.feature_properties["gene"].intersection(gene_targets):
|
|
||||||
continue
|
|
||||||
if max_annotation_length > 0 and annotation.end - annotation.start > max_annotation_length:
|
|
||||||
# TODO implement a failsafe
|
|
||||||
continue
|
|
||||||
feature_string_sequence = get_feature_coding(annotated_string=reference_annotations, string_annotation=annotation)
|
|
||||||
alignments = aligner.align(query_string, feature_string_sequence)
|
|
||||||
if len(alignments) < 1:
|
|
||||||
# TODO implement a failsafe
|
|
||||||
continue
|
|
||||||
top_alignment = sorted(alignments)[0]
|
|
||||||
# TODO Check if alternatives are better
|
|
||||||
query_annotations.append(StringAnnotation(
|
|
||||||
type=annotation.type, # same as original
|
|
||||||
start=np.min(top_alignment.aligned[0]), # We only care about the start of first chunk
|
|
||||||
end=np.max(top_alignment.aligned[0]), # and the end of the last chunk
|
|
||||||
feature_properties=dict(annotation.feature_properties) # same as original
|
|
||||||
))
|
|
||||||
return AnnotatedString(name=query_name, sequence=query_string, annotations=query_annotations)
|
|
||||||
|
|
||||||
def get_feature_coding(annotated_string: AnnotatedString, string_annotation: StringAnnotation) -> str:
|
|
||||||
return annotated_string.sequence[string_annotation.start:string_annotation.end]
|
|
Loading…
x
Reference in New Issue
Block a user