Source code for data_juicer.utils.resource_utils

import os
import subprocess
from typing import List

import psutil
from loguru import logger

from data_juicer.utils.availability_utils import _is_package_available
from data_juicer.utils.lazy_loader import LazyLoader
from data_juicer.utils.ray_utils import (
    check_and_initialize_ray,
    ray_available_gpu_memories,
    ray_available_memories,
    ray_cpu_count,
    ray_gpu_count,
)

torch = LazyLoader("torch")

ray = LazyLoader("ray")

NVSMI_REPORT = True


[docs] def query_cuda_info(query_key): global NVSMI_REPORT # get cuda info using "nvidia-smi" command in MB try: nvidia_smi_output = subprocess.check_output( ["nvidia-smi", f"--query-gpu={query_key}", "--format=csv,noheader,nounits"] ).decode("utf-8") except Exception as e: if "non-zero exit status 2" in str(e): err_msg = ( f"The specified query_key [{query_key}] might not be " f"supported by command nvidia-smi. Please check and " f"retry!" ) elif "No such file or directory" in str(e): err_msg = "Command nvidia-smi is not found. There might be no " "GPUs on this machine." else: err_msg = str(e) if NVSMI_REPORT: logger.warning(err_msg) NVSMI_REPORT = False return None cuda_info_list = [] for line in nvidia_smi_output.strip().split("\n"): cuda_info_list.append(int(line)) return cuda_info_list
[docs] def get_cpu_utilization(): return psutil.cpu_percent()
[docs] def query_mem_info(query_key): mem = psutil.virtual_memory() if query_key not in mem._fields: logger.warning(f"No such query key [{query_key}] for memory info. " f"Should be one of {mem._fields}") return None val = round(mem.__getattribute__(query_key) / (2**20), 2) # in MB return val
def _cuda_device_count(): _torch_available = _is_package_available("torch") if check_and_initialize_ray(): return int(ray_gpu_count()) if _torch_available: return torch.cuda.device_count() try: nvidia_smi_output = subprocess.check_output(["nvidia-smi", "-L"], text=True) all_devices = nvidia_smi_output.strip().split("\n") cuda_visible_devices = os.getenv("CUDA_VISIBLE_DEVICES") if cuda_visible_devices is not None: logger.warning( "CUDA_VISIBLE_DEVICES is ignored when torch is unavailable. " "All detected GPUs will be used." ) return len(all_devices) except Exception: # nvidia-smi not found or other error return 0
[docs] def cuda_device_count(): return _cuda_device_count()
[docs] def is_cuda_available(): return cuda_device_count() > 0
[docs] def cpu_count(): if check_and_initialize_ray(): return int(ray_cpu_count()) return psutil.cpu_count()
[docs] def available_memories() -> List[int]: """Available memory for each node in MB.""" if check_and_initialize_ray(): return ray_available_memories() return [int(psutil.virtual_memory().available / (1024**2))]
[docs] def available_gpu_memories() -> List[int]: """Available gpu memory of each gpu card for each alive node in MB.""" if check_and_initialize_ray(): return ray_available_gpu_memories() try: return query_cuda_info("memory.free") except Exception: return []