[docs]@OPERATORS.register_module(OP_NAME)classFixUnicodeMapper(Mapper):"""Mapper to fix unicode errors in text samples."""_batched_op=True
[docs]def__init__(self,normalization:str=None,*args,**kwargs):""" Initialization method. :param normalization: the specified form of Unicode normalization mode, which can be one of ['NFC', 'NFKC', 'NFD', and 'NFKD'], default 'NFC'. :param args: extra args :param kwargs: extra args """super().__init__(*args,**kwargs)ifnormalizationandlen(normalization)>0:self.normalization=normalization.upper()else:self.normalization='NFC'ifself.normalization.upper()notin['NFC','NFKC','NFD','NFKD']:raiseValueError(f'Normalization mode [{normalization}] is not ''supported. Can only be one of ''["NFC", "NFKC", "NFD", "NFKD"]')