[docs]@OPERATORS.register_module(OP_NAME)classFixUnicodeMapper(Mapper):"""Mapper to fix unicode errors in text samples."""_batched_op=True
[docs]def__init__(self,normalization:str=None,*args,**kwargs):""" Initialization method. :param normalization: the specified form of Unicode normalization mode, which can be one of ['NFC', 'NFKC', 'NFD', and 'NFKD'], default 'NFC'. :param args: extra args :param kwargs: extra args """super().__init__(*args,**kwargs)ifnormalizationandlen(normalization)>0:self.normalization=normalization.upper()else:self.normalization="NFC"ifself.normalization.upper()notin["NFC","NFKC","NFD","NFKD"]:raiseValueError(f"Normalization mode [{normalization}] is not ""supported. Can only be one of "'["NFC", "NFKC", "NFD", "NFKD"]')