Completed untested annotation function
This commit is contained in:
parent
aa2d494bee
commit
28733337d2
@ -24,8 +24,9 @@ async def fetch_ncbi_genbank(genbank_id: str) -> AnnotatedString:
|
|||||||
return AnnotatedString(name=genbank_id, sequence=str(record.seq), annotations=sequence_features)
|
return AnnotatedString(name=genbank_id, sequence=str(record.seq), annotations=sequence_features)
|
||||||
|
|
||||||
|
|
||||||
async def annotate_from_genbank(genbank_id: str, query_coding: str, max_annotation_length=1000):
|
async def annotate_from_genbank(genbank_id: str, query_name: str, query_string: str, max_annotation_length=750):
|
||||||
reference_annotations = await fetch_ncbi_genbank(genbank_id=genbank_id)
|
reference_annotations = await fetch_ncbi_genbank(genbank_id=genbank_id)
|
||||||
|
query_annotations = list()
|
||||||
aligner = PairwiseAligner("blastn")
|
aligner = PairwiseAligner("blastn")
|
||||||
aligner.mode = "local"
|
aligner.mode = "local"
|
||||||
for annotation in reference_annotations.annotations:
|
for annotation in reference_annotations.annotations:
|
||||||
@ -33,10 +34,16 @@ async def annotate_from_genbank(genbank_id: str, query_coding: str, max_annotati
|
|||||||
# TODO implement a failsafe
|
# TODO implement a failsafe
|
||||||
continue
|
continue
|
||||||
feature_string_sequence = get_feature_coding(annotated_string=reference_annotations, string_annotation=annotation)
|
feature_string_sequence = get_feature_coding(annotated_string=reference_annotations, string_annotation=annotation)
|
||||||
alignments = aligner.align(query_coding, feature_string_sequence)
|
alignments = aligner.align(query_string, feature_string_sequence)
|
||||||
if len(alignments) < 1:
|
if len(alignments) < 1:
|
||||||
# TODO implement a failsafe
|
# TODO implement a failsafe
|
||||||
continue
|
continue
|
||||||
top_alignment = sorted(aligner.align(query_coding, annotation))[0]
|
top_alignment = sorted(aligner.align(query_string, annotation))[0]
|
||||||
# TODO Check if alternatives are better
|
# TODO Check if alternatives are better
|
||||||
|
query_annotations.append(StringAnnotation(
|
||||||
|
type=annotation.type, # same as original
|
||||||
|
start=np.min(top_alignment.aligned[0]), # We only care about the start of first chunk
|
||||||
|
end=np.max(top_alignment.aligned[0]), # and the end of the last chunk
|
||||||
|
feature_properties=dict(annotation.feature_properties) # same as original
|
||||||
|
))
|
||||||
|
return AnnotatedString(name=query_name, sequence=query_string, annotations=query_annotations)
|
@ -8,4 +8,5 @@ async def test_fetch_ncbi_genbank_with_id_works():
|
|||||||
|
|
||||||
async def test_annotate_from_genbank_results_in_annotations():
|
async def test_annotate_from_genbank_results_in_annotations():
|
||||||
sequence = SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq
|
sequence = SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq
|
||||||
assert (await annotate_from_genbank("CP011448.1", str(sequence))) is AnnotatedString
|
annotated_sequence = await annotate_from_genbank("CP011448.1", "bpertussis_tohamaI", str(sequence))
|
||||||
|
assert annotated_sequence is AnnotatedString
|
Loading…
x
Reference in New Issue
Block a user