# Some code here has been modified from:# https://github.com/togethercomputer/RedPajama-Data/tree/rp_v1/# --------------------------------------------------------importregexasrefrom..base_opimportOPERATORS,Mapper
[文档]@OPERATORS.register_module("remove_bibliography_mapper")classRemoveBibliographyMapper(Mapper):"""Mapper to remove bibliography at the end of documents in Latex samples."""_batched_op=True
[文档]def__init__(self,*args,**kwargs):""" Initialization method. :param args: extra args :param kwargs: extra args """super().__init__(*args,**kwargs)self.pattern=r"(\\appendix|"self.pattern+=r"\\begin\{references\}|"self.pattern+=r"\\begin\{REFERENCES\}|"self.pattern+=r"\\begin\{thebibliography\}|"self.pattern+=r"\\bibliography\{.*\}"self.pattern+=r").*$"