[docs]@OPERATORS.register_module(OP_NAME)@LOADED_IMAGES.register_module(OP_NAME)classRayImageDeduplicator(RayBasicDeduplicator):""" Deduplicator to deduplicate samples at document-level using exact matching of images between documents. """
[docs]def__init__(self,backend:str='ray_actor',redis_address:str='redis://localhost:6379',method:str='phash',*args,**kwargs):""" Initialization. :param backend: the backend for dedup, either 'ray_actor' or 'redis' :param redis_address: the address of redis server :param args: extra args :param kwargs: extra args """super().__init__(backend=backend,redis_address=redis_address,*args,**kwargs)ifmethodnotinHASH_METHOD:raiseValueError(f'Keep strategy [{method}] is not supported. 'f'Can only be one of {HASH_METHOD}.')self.hasher=get_hash_method(method)()