Completed untested annotation function
This commit is contained in:
parent
aa2d494bee
commit
28733337d2
@ -24,8 +24,9 @@ async def fetch_ncbi_genbank(genbank_id: str) -> AnnotatedString:
|
||||
return AnnotatedString(name=genbank_id, sequence=str(record.seq), annotations=sequence_features)
|
||||
|
||||
|
||||
async def annotate_from_genbank(genbank_id: str, query_coding: str, max_annotation_length=1000):
|
||||
async def annotate_from_genbank(genbank_id: str, query_name: str, query_string: str, max_annotation_length=750):
|
||||
reference_annotations = await fetch_ncbi_genbank(genbank_id=genbank_id)
|
||||
query_annotations = list()
|
||||
aligner = PairwiseAligner("blastn")
|
||||
aligner.mode = "local"
|
||||
for annotation in reference_annotations.annotations:
|
||||
@ -33,10 +34,16 @@ async def annotate_from_genbank(genbank_id: str, query_coding: str, max_annotati
|
||||
# TODO implement a failsafe
|
||||
continue
|
||||
feature_string_sequence = get_feature_coding(annotated_string=reference_annotations, string_annotation=annotation)
|
||||
alignments = aligner.align(query_coding, feature_string_sequence)
|
||||
alignments = aligner.align(query_string, feature_string_sequence)
|
||||
if len(alignments) < 1:
|
||||
# TODO implement a failsafe
|
||||
continue
|
||||
top_alignment = sorted(aligner.align(query_coding, annotation))[0]
|
||||
top_alignment = sorted(aligner.align(query_string, annotation))[0]
|
||||
# TODO Check if alternatives are better
|
||||
|
||||
query_annotations.append(StringAnnotation(
|
||||
type=annotation.type, # same as original
|
||||
start=np.min(top_alignment.aligned[0]), # We only care about the start of first chunk
|
||||
end=np.max(top_alignment.aligned[0]), # and the end of the last chunk
|
||||
feature_properties=dict(annotation.feature_properties) # same as original
|
||||
))
|
||||
return AnnotatedString(name=query_name, sequence=query_string, annotations=query_annotations)
|
@ -8,4 +8,5 @@ async def test_fetch_ncbi_genbank_with_id_works():
|
||||
|
||||
async def test_annotate_from_genbank_results_in_annotations():
|
||||
sequence = SeqIO.read("tests/resources/tohama_I_bpertussis.fasta", "fasta").seq
|
||||
assert (await annotate_from_genbank("CP011448.1", str(sequence))) is AnnotatedString
|
||||
annotated_sequence = await annotate_from_genbank("CP011448.1", "bpertussis_tohamaI", str(sequence))
|
||||
assert annotated_sequence is AnnotatedString
|
Loading…
x
Reference in New Issue
Block a user