[docs]defget_min_cuda_memory():# get cuda memory info using "nvidia-smi" commandimporttorchmin_cuda_memory=torch.cuda.get_device_properties(0).total_memory/1024**2nvidia_smi_output=subprocess.check_output(['nvidia-smi','--query-gpu=memory.free','--format=csv,noheader,nounits']).decode('utf-8')forlineinnvidia_smi_output.strip().split('\n'):free_memory=int(line)min_cuda_memory=min(min_cuda_memory,free_memory)returnmin_cuda_memory
[docs]defcalculate_np(name,mem_required,cpu_required,num_proc=None,use_cuda=False):"""Calculate the optimum number of processes for the given OP"""eps=1e-9# about 1 byteifuse_cuda:auto_num_proc=Nonecuda_mem_available=get_min_cuda_memory()/1024ifmem_required==0:logger.warning(f'The required cuda memory of Op[{name}] 'f'has not been specified. 'f'Please specify the mem_required field in the 'f'config file, or you might encounter CUDA 'f'out of memory error. You can reference 'f'the mem_required field in the 'f'config_all.yaml file.')else:auto_num_proc=math.floor(cuda_mem_available/mem_required)*cuda_device_count()ifcuda_mem_available/mem_required<1.0:logger.warning(f'The required cuda memory:{mem_required}GB might 'f'be more than the available cuda memory:'f'{cuda_mem_available}GB.'f'This Op[{name}] might 'f'require more resource to run.')ifauto_num_procandnum_proc:op_proc=min(auto_num_proc,num_proc)ifnum_proc>auto_num_proc:logger.warning(f'The given num_proc: {num_proc} is greater than 'f'the value {auto_num_proc} auto calculated based 'f'on the mem_required of Op[{name}]. 'f'Set the `num_proc` to {auto_num_proc}.')elifnotauto_num_procandnotnum_proc:op_proc=cuda_device_count()logger.warning(f'Both mem_required and num_proc of Op[{name}] are not set.'f'Set the `num_proc` to number of GPUs {op_proc}.')else:op_proc=auto_num_procifauto_num_procelsenum_procop_proc=max(op_proc,1)returnop_procelse:ifnum_procisNone:num_proc=psutil.cpu_count()op_proc=num_proccpu_available=psutil.cpu_count()mem_available=psutil.virtual_memory().availablemem_available=mem_available/1024**3op_proc=min(op_proc,math.floor(cpu_available/cpu_required+eps))op_proc=min(op_proc,math.floor(mem_available/(mem_required+eps)))ifop_proc<1.0:logger.warning(f'The required CPU number:{cpu_required} 'f'and memory:{mem_required}GB might 'f'be more than the available CPU:{cpu_available} 'f'and memory :{mem_available}GB.'f'This Op [{name}] might 'f'require more resource to run.')op_proc=max(op_proc,1)returnop_proc