importrefromtypingimportDict,OptionalfromloguruimportloggerfrompydanticimportPositiveIntfromdata_juicer.ops.base_opimportOPERATORS,Mapperfromdata_juicer.utils.model_utilsimportget_model,prepare_modelOP_NAME='pair_preference_mapper'# TODO: Extend LLM-based OPs into API-based implementation.
[文档]@OPERATORS.register_module(OP_NAME)classPairPreferenceMapper(Mapper):""" Mapper to construct paired preference samples. """# avoid leading whitespaceDEFAULT_SYSTEM_PROMPT=('你的任务是根据参考信息修改问答对中的回答,在语言风格、事实性、人物身份、立场等任一方面与原回答相反。''必须按照以下标记格式输出,不要输出其他多余内容。\n''【回答】\n''生成的新回答\n''【原因】\n''生成该回答的原因')DEFAULT_INPUT_TEMPLATE=('【参考信息】\n''{reference}\n''\n''以下是原始问答对:\n''【问题】\n''{query}\n''【回答】\n''{response}')DEFAULT_OUTPUT_PATTERN=r'.*?【回答】\s*(.*?)\s*【原因】\s*(.*)'
[文档]def__init__(self,api_model:str='gpt-4o',*,api_endpoint:Optional[str]=None,response_path:Optional[str]=None,system_prompt:Optional[str]=None,input_template:Optional[str]=None,output_pattern:Optional[str]=None,rejected_key:str='rejected_response',reason_key:str='reason',try_num:PositiveInt=3,model_params:Dict={},sampling_params:Dict={},**kwargs):""" Initialization method. :param api_model: API model name. :param api_endpoint: URL endpoint for the API. :param response_path: Path to extract content from the API response. Defaults to 'choices.0.message.content'. :param system_prompt: System prompt for guiding the generation task. :param input_template: Template for building the model input. It must contain placeholders '{query}' and '{response}', and can optionally include '{reference}'. :param output_pattern: Regular expression for parsing model output. :param rejected_key: The field name in the sample to store the generated rejected response. Defaults to 'rejected_response'. :param reason_key: The field name in the sample to store the reason for generating the response. Defaults to 'reason'. :param try_num: The number of retries for the API call in case of response parsing failure. Defaults to 3. :param model_params: Parameters for initializing the API model. :param sampling_params: Extra parameters passed to the API call. e.g {'temperature': 0.9, 'top_p': 0.95} :param kwargs: Extra keyword arguments. """super().__init__(**kwargs)self.system_prompt=system_promptorself.DEFAULT_SYSTEM_PROMPTself.input_template=input_templateorself.DEFAULT_INPUT_TEMPLATEself.output_pattern=output_patternorself.DEFAULT_OUTPUT_PATTERNself.rejected_key=rejected_keyself.reason_key=reason_keyself.model_key=prepare_model(model_type='api',model=api_model,endpoint=api_endpoint,response_path=response_path,**model_params)self.try_num=try_numself.sampling_params=sampling_params