[文档]defget_min_cuda_memory():# get cuda memory info using "nvidia-smi" commandimporttorchmin_cuda_memory=torch.cuda.get_device_properties(0).total_memory/1024**2nvidia_smi_output=subprocess.check_output(["nvidia-smi","--query-gpu=memory.free","--format=csv,noheader,nounits"]).decode("utf-8")forlineinnvidia_smi_output.strip().split("\n"):free_memory=int(line)min_cuda_memory=min(min_cuda_memory,free_memory)returnmin_cuda_memory
[文档]defcalculate_np(name,mem_required,cpu_required,use_cuda=False,gpu_required=0):"""Calculate the optimum number of processes for the given OP automatically。"""ifnotuse_cudaandgpu_required>0:raiseValueError(f"Op[{name}] attempted to request GPU resources (gpu_required={gpu_required}), ""but appears to lack GPU support. If you have verified this operator support GPU acceleration, "'please explicitly set its property: `_accelerator = "cuda"`.')eps=1e-9# about 1 bytecpu_num=cpu_count()ifuse_cuda:cuda_mems_available=[m/1024forminavailable_gpu_memories()]# GBgpu_count=cuda_device_count()ifnotmem_requiredandnotgpu_required:auto_num_proc=gpu_countlogger.warning(f"The required cuda memory and gpu of Op[{name}] "f"has not been specified. "f"Please specify the mem_required field or gpu_required field in the "f"config file. You can reference the config_all.yaml file."f"Set the auto `num_proc` to number of GPUs {auto_num_proc}.")else:auto_proc_from_mem=sum([math.floor(mem_available/(mem_required+eps))formem_availableincuda_mems_available])auto_proc_from_gpu=math.floor(gpu_count/(gpu_required+eps))auto_proc_from_cpu=math.floor(cpu_num/(cpu_required+eps))auto_num_proc=min(auto_proc_from_mem,auto_proc_from_gpu,auto_proc_from_cpu)ifauto_num_proc<1:auto_num_proc=len(available_memories())# set to the number of available nodeslogger.info(f"Set the auto `num_proc` to {auto_num_proc} of Op[{name}] based on the "f"required cuda memory: {mem_required}GB "f"required gpu: {gpu_required} and required cpu: {cpu_required}.")returnauto_num_procelse:mems_available=[m/1024forminavailable_memories()]# GBauto_proc_from_mem=sum([math.floor(mem_available/(mem_required+eps))formem_availableinmems_available])auto_proc_from_cpu=math.floor(cpu_num/(cpu_required+eps))auto_num_proc=min(cpu_num,auto_proc_from_mem,auto_proc_from_cpu)ifauto_num_proc<1.0:auto_num_proc=len(available_memories())# number of processes is equal to the number of nodeslogger.warning(f"The required CPU number: {cpu_required} "f"and memory: {mem_required}GB might "f"be more than the available CPU: {cpu_num} "f"and memory: {mems_available}GB."f"This Op [{name}] might "f"require more resource to run. "f"Set the auto `num_proc` to available nodes number {auto_num_proc}.")else:logger.info(f"Set the auto `num_proc` to {auto_num_proc} of Op[{name}] based on the "f"required memory: {mem_required}GB "f"and required cpu: {cpu_required}.")returnauto_num_proc