[docs]@OPERATORS.register_module("remove_non_chinese_character_mapper")classRemoveNonChineseCharacterlMapper(Mapper):"""Mapper to remove non chinese Character in text samples."""_batched_op=True
[docs]def__init__(self,keep_alphabet:bool=True,keep_number:bool=True,keep_punc:bool=True,*args,**kwargs):""" Initialization method. :param keep_alphabet: whether to keep alphabet :param keep_number: whether to keep number :param keep_punc: whether to keep punctuation :param args: extra args :param kwargs: extra args """super().__init__(*args,**kwargs)self.pattern="[^\u4e00-\u9fa5"ifkeep_alphabet:self.pattern+="A-Za-z"ifkeep_number:self.pattern+="0-9"ifkeep_punc:self.pattern+="., ,\\-。%《*》/•、&&(—)(+):?!!“”·]+"else:self.pattern+="]"