# Some code here has been modified from:# https://github.com/togethercomputer/RedPajama-Data/tree/rp_v1/# --------------------------------------------------------importregexasrefrom..base_opimportOPERATORS,Mapper
[文档]@OPERATORS.register_module('remove_bibliography_mapper')classRemoveBibliographyMapper(Mapper):"""Mapper to remove bibliography at the end of documents in Latex samples."""_batched_op=True
[文档]def__init__(self,*args,**kwargs):""" Initialization method. :param args: extra args :param kwargs: extra args """super().__init__(*args,**kwargs)self.pattern=r'(\\appendix|'self.pattern+=r'\\begin\{references\}|'self.pattern+=r'\\begin\{REFERENCES\}|'self.pattern+=r'\\begin\{thebibliography\}|'self.pattern+=r'\\bibliography\{.*\}'self.pattern+=r').*$'