[docs]@OPERATORS.register_module(OP_NAME)classAudioFFmpegWrappedMapper(Mapper):"""Wraps FFmpeg audio filters for processing audio files in a dataset. This operator applies specified FFmpeg audio filters to the audio files in the dataset. It supports passing custom filter parameters and global arguments to the FFmpeg command line. The processed audio files are saved to a specified directory or the same directory as the input files if no save directory is provided. The `DJ_PRODUCED_DATA_DIR` environment variable can also be used to set the save directory. If no filter name is provided, the audio files remain unmodified. The operator updates the source file paths in the dataset after processing."""
[docs]def__init__(self,filter_name:Optional[str]=None,filter_kwargs:Optional[Dict]=None,global_args:Optional[List[str]]=None,capture_stderr:bool=True,overwrite_output:bool=True,save_dir:str=None,*args,**kwargs,):""" Initialization method. :param filter_name: ffmpeg audio filter name. :param filter_kwargs: keyword-arguments passed to ffmpeg filter. :param global_args: list-arguments passed to ffmpeg command-line. :param capture_stderr: whether to capture stderr. :param overwrite_output: whether to overwrite output file. :param save_dir: The directory where generated audio files will be stored. If not specified, outputs will be saved in the same directory as their corresponding input files. This path can alternatively be defined by setting the `DJ_PRODUCED_DATA_DIR` environment variable. :param args: extra args :param kwargs: extra args """super().__init__(*args,**kwargs)self._init_parameters=self.remove_extra_parameters(locals())self._init_parameters.pop("save_dir",None)self.filter_name=filter_nameself.filter_kwargs=filter_kwargsself.global_args=global_argsself.capture_stderr=capture_stderrself.overwrite_output=overwrite_outputself.save_dir=save_dir
[docs]defprocess_single(self,sample):# there is no audio in this sampleifself.audio_keynotinsampleornotsample[self.audio_key]:sample[Fields.source_file]=[]returnsampleifFields.source_filenotinsampleornotsample[Fields.source_file]:sample[Fields.source_file]=sample[self.audio_key]ifself.filter_nameisNone:returnsampleloaded_audio_keys=sample[self.audio_key]processed={}foraudio_keyinloaded_audio_keys:ifaudio_keyinprocessed:continueoutput_key=transfer_filename(audio_key,OP_NAME,self.save_dir,**self._init_parameters)stream=ffmpeg.input(audio_key).filter(self.filter_name,**self.filter_kwargs).output(output_key)ifself.global_argsisnotNone:stream=stream.global_args(*self.global_args)stream.run(capture_stderr=self.capture_stderr,overwrite_output=self.overwrite_output)processed[audio_key]=output_key# when the file is modified, its source file needs to be updated.fori,valueinenumerate(loaded_audio_keys):ifsample[Fields.source_file][i]!=value:ifprocessed[value]!=value:sample[Fields.source_file][i]=valuesample[self.audio_key]=[processed[key]forkeyinloaded_audio_keys]returnsample