Added notation to indicate inexact matching in CSV

This commit is contained in:
Harrison Deng 2025-02-12 15:59:19 +00:00
parent 7fb3eab5b6
commit 4183840ba0

View File

@ -6,10 +6,10 @@ from typing import AsyncIterable, Collection, Mapping, Sequence, Union
from autobigs.engine.structures.mlst import Allele, MLSTProfile from autobigs.engine.structures.mlst import Allele, MLSTProfile
def alleles_to_map(alleles: Collection[Allele]) -> Mapping[str, Union[list[str], str]]: def alleles_to_text_map(alleles: Collection[Allele]) -> Mapping[str, Union[list[str], str]]:
result = defaultdict(list) result = defaultdict(list)
for allele in alleles: for allele in alleles:
result[allele.allele_locus].append(allele.allele_variant) result[allele.allele_locus].append(allele.allele_variant + ("*" if allele.partial_match_profile is not None else ""))
for locus in result.keys(): for locus in result.keys():
if len(result[locus]) == 1: if len(result[locus]) == 1:
result[locus] = result[locus][0] # Take the only one result[locus] = result[locus][0] # Take the only one
@ -24,7 +24,7 @@ async def write_mlst_profiles_as_csv(mlst_profiles_iterable: AsyncIterable[tuple
if mlst_profile is None: if mlst_profile is None:
failed.append(name) failed.append(name)
continue continue
allele_mapping = alleles_to_map(mlst_profile.alleles) allele_mapping = alleles_to_text_map(mlst_profile.alleles)
if writer is None: if writer is None:
header = ["id", "st", "clonal-complex", *sorted(allele_mapping.keys())] header = ["id", "st", "clonal-complex", *sorted(allele_mapping.keys())]
writer = csv.DictWriter(filehandle, fieldnames=header) writer = csv.DictWriter(filehandle, fieldnames=header)