Source code for data_juicer.utils.lazy_loader

"""A LazyLoader class."""

import importlib
import inspect
import os
import types

from loguru import logger

from data_juicer.utils.auto_install_utils import (AutoInstaller,
                                                  _is_module_installed)
from data_juicer.utils.availability_utils import _torch_check_and_set

current_path = os.path.dirname(os.path.realpath(__file__))
science_file_path = os.path.join(current_path,
                                 '../../environments/science_requires.txt')
dist_file_path = os.path.join(current_path,
                              '../../environments/dist_requires.txt')
AUTOINSTALL = AutoInstaller([science_file_path, dist_file_path])


[docs] class LazyLoader(types.ModuleType): """ Lazily import a module, mainly to avoid pulling in large dependencies. `contrib`, and `ffmpeg` are examples of modules that are large and not always needed, and this allows them to only be loaded when they are used. """ # The lint error here is incorrect.
[docs] def __init__(self, local_name, name, auto_install=True): self._local_name = local_name # get last frame in the stack frame = inspect.currentframe().f_back # get the globals of module who calls LazyLoader self._parent_module_globals = frame.f_globals self.auto_install = auto_install super(LazyLoader, self).__init__(name)
def _load(self): # Auto install if necessary module_name = self.__name__.split('.')[0] if self.auto_install and not _is_module_installed(module_name): logger.warning( f"Module '{module_name}' not installed or fully installed.") logger.warning(f"Auto installing '{module_name}' ...") AUTOINSTALL.install(module_name) # check for torch if self.__name__ == 'torch': _torch_check_and_set() # Import the target module and insert it into the parent's namespace module = importlib.import_module(self.__name__) self._parent_module_globals[self._local_name] = module # Update this object's dict so that if someone keeps a reference to the # LazyLoader, lookups are efficient (__getattr__ is only called on # lookups that fail). self.__dict__.update(module.__dict__) return module def __getattr__(self, item): module = self._load() return getattr(module, item) def __dir__(self): module = self._load() return dir(module)