[docs]defget_min_cuda_memory():# get cuda memory info using "nvidia-smi" commandimporttorchmin_cuda_memory=torch.cuda.get_device_properties(0).total_memory/1024**2nvidia_smi_output=subprocess.check_output(["nvidia-smi","--query-gpu=memory.free","--format=csv,noheader,nounits"]).decode("utf-8")forlineinnvidia_smi_output.strip().split("\n"):free_memory=int(line)min_cuda_memory=min(min_cuda_memory,free_memory)returnmin_cuda_memory
[docs]defcalculate_np(name,mem_required,cpu_required,num_proc=None,use_cuda=False):"""Calculate the optimum number of processes for the given OP"""eps=1e-9# about 1 byteifuse_cuda:auto_num_proc=Nonecuda_mem_available=get_min_cuda_memory()/1024ifmem_required==0:logger.warning(f"The required cuda memory of Op[{name}] "f"has not been specified. "f"Please specify the mem_required field in the "f"config file, or you might encounter CUDA "f"out of memory error. You can reference "f"the mem_required field in the "f"config_all.yaml file.")else:auto_num_proc=math.floor(cuda_mem_available/mem_required)*cuda_device_count()ifcuda_mem_available/mem_required<1.0:logger.warning(f"The required cuda memory:{mem_required}GB might "f"be more than the available cuda memory:"f"{cuda_mem_available}GB."f"This Op[{name}] might "f"require more resource to run.")ifauto_num_procandnum_proc:op_proc=min(auto_num_proc,num_proc)ifnum_proc>auto_num_proc:logger.warning(f"The given num_proc: {num_proc} is greater than "f"the value {auto_num_proc} auto calculated based "f"on the mem_required of Op[{name}]. "f"Set the `num_proc` to {auto_num_proc}.")elifnotauto_num_procandnotnum_proc:op_proc=cuda_device_count()logger.warning(f"Both mem_required and num_proc of Op[{name}] are not set."f"Set the `num_proc` to number of GPUs {op_proc}.")else:op_proc=auto_num_procifauto_num_procelsenum_procop_proc=max(op_proc,1)returnop_procelse:ifnum_procisNone:num_proc=psutil.cpu_count()op_proc=num_proccpu_available=psutil.cpu_count()mem_available=psutil.virtual_memory().availablemem_available=mem_available/1024**3op_proc=min(op_proc,math.floor(cpu_available/cpu_required+eps))op_proc=min(op_proc,math.floor(mem_available/(mem_required+eps)))ifop_proc<1.0:logger.warning(f"The required CPU number:{cpu_required} "f"and memory:{mem_required}GB might "f"be more than the available CPU:{cpu_available} "f"and memory :{mem_available}GB."f"This Op [{name}] might "f"require more resource to run.")op_proc=max(op_proc,1)returnop_proc