[文档]@OPERATORS.register_module('remove_table_text_mapper')classRemoveTableTextMapper(Mapper):""" Mapper to remove table texts from text samples. Regular expression is used to remove tables in the range of column number of tables. """_batched_op=True
[文档]def__init__(self,min_col:Annotated[int,Field(ge=2,le=20)]=2,max_col:Annotated[int,Field(ge=2,le=20)]=20,*args,**kwargs):""" Initialization method. :param min_col: The min number of columns of table to remove. :param max_col: The max number of columns of table to remove. :param args: extra args :param kwargs: extra args """super().__init__(*args,**kwargs)self.min_col=min_colself.max_col=max_colself.pattern=r'(?<=\n)((\S+?)([ |\t](\S+?)){%d}\n+){2,}'