[文档]@OPERATORS.register_module("key_value_grouper")classKeyValueGrouper(Grouper):"""Groups samples into batches based on values in specified keys. This operator groups samples by the values of the given keys, which can be nested. If no keys are provided, it defaults to using the text key. It uses a naive grouping strategy to batch samples with identical key values. The resulting dataset is a list of batched samples, where each batch contains samples that share the same key values. This is useful for organizing data by specific attributes or features."""
[文档]def__init__(self,group_by_keys:Optional[List[str]]=None,*args,**kwargs):""" Initialization method. :param group_by_keys: group samples according values in the keys. Support for nested keys such as "__dj__stats__.text_len". It is [self.text_key] in default. :param args: extra args :param kwargs: extra args """super().__init__(*args,**kwargs)self.group_by_keys=group_by_keysor[self.text_key]self.naive_grouper=NaiveGrouper()