索引 _ | A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | Z _ __init__() (data_juicer.analysis.collector.TextTokenDistCollector 方法) __init__() (data_juicer.analysis.column_wise_analysis.ColumnWiseAnalysis 方法) __init__() (data_juicer.analysis.ColumnWiseAnalysis 方法) __init__() (data_juicer.analysis.diversity_analysis.DiversityAnalysis 方法) __init__() (data_juicer.analysis.DiversityAnalysis 方法) __init__() (data_juicer.analysis.overall_analysis.OverallAnalysis 方法) __init__() (data_juicer.analysis.OverallAnalysis 方法) __init__() (data_juicer.core.Adapter 方法) __init__() (data_juicer.core.adapter.Adapter 方法) __init__() (data_juicer.core.Analyzer 方法) __init__() (data_juicer.core.analyzer.Analyzer 方法) __init__() (data_juicer.core.data.data_validator.BaseConversationValidator 方法) __init__() (data_juicer.core.data.data_validator.CodeDataValidator 方法) __init__() (data_juicer.core.data.data_validator.DataValidator 方法) __init__() (data_juicer.core.data.data_validator.RequiredFieldsValidator 方法) __init__() (data_juicer.core.data.dataset_builder.DatasetBuilder 方法) __init__() (data_juicer.core.data.dj_dataset.NestedDataset 方法) __init__() (data_juicer.core.data.dj_dataset.NestedDatasetDict 方法) __init__() (data_juicer.core.data.dj_dataset.NestedQueryDict 方法) __init__() (data_juicer.core.data.load_strategy.DataLoadStrategy 方法) __init__() (data_juicer.core.data.load_strategy.StrategyKey 方法) __init__() (data_juicer.core.data.NestedDataset 方法) __init__() (data_juicer.core.data.ray_dataset.RayDataset 方法) __init__() (data_juicer.core.data.schema.Schema 方法) __init__() (data_juicer.core.DefaultExecutor 方法) __init__() (data_juicer.core.executor.base.ExecutorBase 方法) __init__() (data_juicer.core.executor.default_executor.DefaultExecutor 方法) __init__() (data_juicer.core.executor.DefaultExecutor 方法) __init__() (data_juicer.core.executor.ExecutorBase 方法) __init__() (data_juicer.core.executor.ray_executor.RayExecutor 方法) __init__() (data_juicer.core.executor.ray_executor.TempDirManager 方法) __init__() (data_juicer.core.ExecutorBase 方法) __init__() (data_juicer.core.Exporter 方法) __init__() (data_juicer.core.exporter.Exporter 方法) __init__() (data_juicer.core.Monitor 方法) __init__() (data_juicer.core.monitor.Monitor 方法) __init__() (data_juicer.core.NestedDataset 方法) __init__() (data_juicer.core.Tracer 方法) __init__() (data_juicer.core.tracer.Tracer 方法) __init__() (data_juicer.download.downloader.DocumentDownloader 方法) __init__() (data_juicer.download.downloader.DocumentExtractor 方法) __init__() (data_juicer.download.downloader.DocumentIterator 方法) __init__() (data_juicer.download.wikipedia.WikipediaDownloader 方法) __init__() (data_juicer.download.wikipedia.WikipediaExtractor 方法) __init__() (data_juicer.download.wikipedia.WikipediaIterator 方法) __init__() (data_juicer.format.csv_formatter.CsvFormatter 方法) __init__() (data_juicer.format.CsvFormatter 方法) __init__() (data_juicer.format.empty_formatter.EmptyFormatter 方法) __init__() (data_juicer.format.empty_formatter.RayEmptyFormatter 方法) __init__() (data_juicer.format.EmptyFormatter 方法) __init__() (data_juicer.format.formatter.LocalFormatter 方法) __init__() (data_juicer.format.formatter.RemoteFormatter 方法) __init__() (data_juicer.format.json_formatter.JsonFormatter 方法) __init__() (data_juicer.format.JsonFormatter 方法) __init__() (data_juicer.format.LocalFormatter 方法) __init__() (data_juicer.format.parquet_formatter.ParquetFormatter 方法) __init__() (data_juicer.format.ParquetFormatter 方法) __init__() (data_juicer.format.RayEmptyFormatter 方法) __init__() (data_juicer.format.RemoteFormatter 方法) __init__() (data_juicer.format.text_formatter.TextFormatter 方法) __init__() (data_juicer.format.TextFormatter 方法) __init__() (data_juicer.format.tsv_formatter.TsvFormatter 方法) __init__() (data_juicer.format.TsvFormatter 方法) __init__() (data_juicer.ops.Aggregator 方法) __init__() (data_juicer.ops.aggregator.entity_attribute_aggregator.EntityAttributeAggregator 方法) __init__() (data_juicer.ops.aggregator.EntityAttributeAggregator 方法) __init__() (data_juicer.ops.aggregator.meta_tags_aggregator.MetaTagsAggregator 方法) __init__() (data_juicer.ops.aggregator.MetaTagsAggregator 方法) __init__() (data_juicer.ops.aggregator.most_relevant_entities_aggregator.MostRelevantEntitiesAggregator 方法) __init__() (data_juicer.ops.aggregator.MostRelevantEntitiesAggregator 方法) __init__() (data_juicer.ops.aggregator.nested_aggregator.NestedAggregator 方法) __init__() (data_juicer.ops.aggregator.NestedAggregator 方法) __init__() (data_juicer.ops.base_op.Aggregator 方法) __init__() (data_juicer.ops.base_op.Deduplicator 方法) __init__() (data_juicer.ops.base_op.Filter 方法) __init__() (data_juicer.ops.base_op.Grouper 方法) __init__() (data_juicer.ops.base_op.Mapper 方法) __init__() (data_juicer.ops.base_op.OP 方法) __init__() (data_juicer.ops.base_op.Selector 方法) __init__() (data_juicer.ops.common.helper_func.UnionFind 方法) __init__() (data_juicer.ops.common.prompt2prompt_pipeline.AttentionControl 方法) __init__() (data_juicer.ops.common.prompt2prompt_pipeline.AttentionControlEdit 方法) __init__() (data_juicer.ops.common.prompt2prompt_pipeline.AttentionRefine 方法) __init__() (data_juicer.ops.common.prompt2prompt_pipeline.AttentionReplace 方法) __init__() (data_juicer.ops.common.prompt2prompt_pipeline.AttentionReweight 方法) __init__() (data_juicer.ops.common.prompt2prompt_pipeline.AttentionStore 方法) __init__() (data_juicer.ops.common.prompt2prompt_pipeline.LocalBlend 方法) __init__() (data_juicer.ops.common.prompt2prompt_pipeline.P2PCrossAttnProcessor 方法) __init__() (data_juicer.ops.common.prompt2prompt_pipeline.ScoreParams 方法) __init__() (data_juicer.ops.Deduplicator 方法) __init__() (data_juicer.ops.deduplicator.document_deduplicator.DocumentDeduplicator 方法) __init__() (data_juicer.ops.deduplicator.document_minhash_deduplicator.DocumentMinhashDeduplicator 方法) __init__() (data_juicer.ops.deduplicator.document_simhash_deduplicator.DocumentSimhashDeduplicator 方法) __init__() (data_juicer.ops.deduplicator.DocumentDeduplicator 方法) __init__() (data_juicer.ops.deduplicator.DocumentMinhashDeduplicator 方法) __init__() (data_juicer.ops.deduplicator.DocumentSimhashDeduplicator 方法) __init__() (data_juicer.ops.deduplicator.image_deduplicator.ImageDeduplicator 方法) __init__() (data_juicer.ops.deduplicator.ImageDeduplicator 方法) __init__() (data_juicer.ops.deduplicator.ray_basic_deduplicator.ActorBackend 方法) __init__() (data_juicer.ops.deduplicator.ray_basic_deduplicator.Backend 方法) __init__() (data_juicer.ops.deduplicator.ray_basic_deduplicator.DedupSet 方法) __init__() (data_juicer.ops.deduplicator.ray_basic_deduplicator.RayBasicDeduplicator 方法) __init__() (data_juicer.ops.deduplicator.ray_basic_deduplicator.RedisBackend 方法) __init__() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.BTSUnionFind 方法) __init__() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.EdgeBuffer 方法) __init__() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.IdGenerator 方法) __init__() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.RayBTSMinhashDeduplicator 方法) __init__() (data_juicer.ops.deduplicator.ray_document_deduplicator.RayDocumentDeduplicator 方法) __init__() (data_juicer.ops.deduplicator.ray_image_deduplicator.RayImageDeduplicator 方法) __init__() (data_juicer.ops.deduplicator.ray_video_deduplicator.RayVideoDeduplicator 方法) __init__() (data_juicer.ops.deduplicator.RayBasicDeduplicator 方法) __init__() (data_juicer.ops.deduplicator.RayBTSMinhashDeduplicator 方法) __init__() (data_juicer.ops.deduplicator.RayDocumentDeduplicator 方法) __init__() (data_juicer.ops.deduplicator.RayImageDeduplicator 方法) __init__() (data_juicer.ops.deduplicator.RayVideoDeduplicator 方法) __init__() (data_juicer.ops.deduplicator.video_deduplicator.VideoDeduplicator 方法) __init__() (data_juicer.ops.deduplicator.VideoDeduplicator 方法) __init__() (data_juicer.ops.Filter 方法) __init__() (data_juicer.ops.filter.alphanumeric_filter.AlphanumericFilter 方法) __init__() (data_juicer.ops.filter.AlphanumericFilter 方法) __init__() (data_juicer.ops.filter.audio_duration_filter.AudioDurationFilter 方法) __init__() (data_juicer.ops.filter.audio_nmf_snr_filter.AudioNMFSNRFilter 方法) __init__() (data_juicer.ops.filter.audio_size_filter.AudioSizeFilter 方法) __init__() (data_juicer.ops.filter.AudioDurationFilter 方法) __init__() (data_juicer.ops.filter.AudioNMFSNRFilter 方法) __init__() (data_juicer.ops.filter.AudioSizeFilter 方法) __init__() (data_juicer.ops.filter.average_line_length_filter.AverageLineLengthFilter 方法) __init__() (data_juicer.ops.filter.AverageLineLengthFilter 方法) __init__() (data_juicer.ops.filter.character_repetition_filter.CharacterRepetitionFilter 方法) __init__() (data_juicer.ops.filter.CharacterRepetitionFilter 方法) __init__() (data_juicer.ops.filter.flagged_words_filter.FlaggedWordFilter 方法) __init__() (data_juicer.ops.filter.FlaggedWordFilter 方法) __init__() (data_juicer.ops.filter.GeneralFieldFilter 方法) __init__() (data_juicer.ops.filter.image_aesthetics_filter.ImageAestheticsFilter 方法) __init__() (data_juicer.ops.filter.image_aspect_ratio_filter.ImageAspectRatioFilter 方法) __init__() (data_juicer.ops.filter.image_face_count_filter.ImageFaceCountFilter 方法) __init__() (data_juicer.ops.filter.image_face_ratio_filter.ImageFaceRatioFilter 方法) __init__() (data_juicer.ops.filter.image_nsfw_filter.ImageNSFWFilter 方法) __init__() (data_juicer.ops.filter.image_pair_similarity_filter.ImagePairSimilarityFilter 方法) __init__() (data_juicer.ops.filter.image_shape_filter.ImageShapeFilter 方法) __init__() (data_juicer.ops.filter.image_size_filter.ImageSizeFilter 方法) __init__() (data_juicer.ops.filter.image_text_matching_filter.ImageTextMatchingFilter 方法) __init__() (data_juicer.ops.filter.image_text_similarity_filter.ImageTextSimilarityFilter 方法) __init__() (data_juicer.ops.filter.image_watermark_filter.ImageWatermarkFilter 方法) __init__() (data_juicer.ops.filter.ImageAestheticsFilter 方法) __init__() (data_juicer.ops.filter.ImageAspectRatioFilter 方法) __init__() (data_juicer.ops.filter.ImageFaceCountFilter 方法) __init__() (data_juicer.ops.filter.ImageFaceRatioFilter 方法) __init__() (data_juicer.ops.filter.ImageNSFWFilter 方法) __init__() (data_juicer.ops.filter.ImagePairSimilarityFilter 方法) __init__() (data_juicer.ops.filter.ImageShapeFilter 方法) __init__() (data_juicer.ops.filter.ImageSizeFilter 方法) __init__() (data_juicer.ops.filter.ImageTextMatchingFilter 方法) __init__() (data_juicer.ops.filter.ImageTextSimilarityFilter 方法) __init__() (data_juicer.ops.filter.ImageWatermarkFilter 方法) __init__() (data_juicer.ops.filter.language_id_score_filter.LanguageIDScoreFilter 方法) __init__() (data_juicer.ops.filter.LanguageIDScoreFilter 方法) __init__() (data_juicer.ops.filter.llm_difficulty_score_filter.LLMDifficultyScoreFilter 方法) __init__() (data_juicer.ops.filter.llm_quality_score_filter.LLMQualityScoreFilter 方法) __init__() (data_juicer.ops.filter.LLMDifficultyScoreFilter 方法) __init__() (data_juicer.ops.filter.LLMQualityScoreFilter 方法) __init__() (data_juicer.ops.filter.maximum_line_length_filter.MaximumLineLengthFilter 方法) __init__() (data_juicer.ops.filter.MaximumLineLengthFilter 方法) __init__() (data_juicer.ops.filter.perplexity_filter.PerplexityFilter 方法) __init__() (data_juicer.ops.filter.PerplexityFilter 方法) __init__() (data_juicer.ops.filter.phrase_grounding_recall_filter.PhraseGroundingRecallFilter 方法) __init__() (data_juicer.ops.filter.PhraseGroundingRecallFilter 方法) __init__() (data_juicer.ops.filter.special_characters_filter.SpecialCharactersFilter 方法) __init__() (data_juicer.ops.filter.SpecialCharactersFilter 方法) __init__() (data_juicer.ops.filter.specified_field_filter.SpecifiedFieldFilter 方法) __init__() (data_juicer.ops.filter.specified_numeric_field_filter.SpecifiedNumericFieldFilter 方法) __init__() (data_juicer.ops.filter.SpecifiedFieldFilter 方法) __init__() (data_juicer.ops.filter.SpecifiedNumericFieldFilter 方法) __init__() (data_juicer.ops.filter.stopwords_filter.StopWordsFilter 方法) __init__() (data_juicer.ops.filter.StopWordsFilter 方法) __init__() (data_juicer.ops.filter.suffix_filter.SuffixFilter 方法) __init__() (data_juicer.ops.filter.SuffixFilter 方法) __init__() (data_juicer.ops.filter.text_action_filter.TextActionFilter 方法) __init__() (data_juicer.ops.filter.text_entity_dependency_filter.TextEntityDependencyFilter 方法) __init__() (data_juicer.ops.filter.text_length_filter.TextLengthFilter 方法) __init__() (data_juicer.ops.filter.text_pair_similarity_filter.TextPairSimilarityFilter 方法) __init__() (data_juicer.ops.filter.TextActionFilter 方法) __init__() (data_juicer.ops.filter.TextEntityDependencyFilter 方法) __init__() (data_juicer.ops.filter.TextLengthFilter 方法) __init__() (data_juicer.ops.filter.TextPairSimilarityFilter 方法) __init__() (data_juicer.ops.filter.token_num_filter.TokenNumFilter 方法) __init__() (data_juicer.ops.filter.TokenNumFilter 方法) __init__() (data_juicer.ops.filter.video_aesthetics_filter.VideoAestheticsFilter 方法) __init__() (data_juicer.ops.filter.video_aspect_ratio_filter.VideoAspectRatioFilter 方法) __init__() (data_juicer.ops.filter.video_duration_filter.VideoDurationFilter 方法) __init__() (data_juicer.ops.filter.video_frames_text_similarity_filter.VideoFramesTextSimilarityFilter 方法) __init__() (data_juicer.ops.filter.video_motion_score_filter.VideoMotionScoreFilter 方法) __init__() (data_juicer.ops.filter.video_motion_score_raft_filter.VideoMotionScoreRaftFilter 方法) __init__() (data_juicer.ops.filter.video_nsfw_filter.VideoNSFWFilter 方法) __init__() (data_juicer.ops.filter.video_ocr_area_ratio_filter.VideoOcrAreaRatioFilter 方法) __init__() (data_juicer.ops.filter.video_resolution_filter.VideoResolutionFilter 方法) __init__() (data_juicer.ops.filter.video_tagging_from_frames_filter.VideoTaggingFromFramesFilter 方法) __init__() (data_juicer.ops.filter.video_watermark_filter.VideoWatermarkFilter 方法) __init__() (data_juicer.ops.filter.VideoAestheticsFilter 方法) __init__() (data_juicer.ops.filter.VideoAspectRatioFilter 方法) __init__() (data_juicer.ops.filter.VideoDurationFilter 方法) __init__() (data_juicer.ops.filter.VideoFramesTextSimilarityFilter 方法) __init__() (data_juicer.ops.filter.VideoMotionScoreFilter 方法) __init__() (data_juicer.ops.filter.VideoMotionScoreRaftFilter 方法) __init__() (data_juicer.ops.filter.VideoNSFWFilter 方法) __init__() (data_juicer.ops.filter.VideoOcrAreaRatioFilter 方法) __init__() (data_juicer.ops.filter.VideoResolutionFilter 方法) __init__() (data_juicer.ops.filter.VideoTaggingFromFramesFilter 方法) __init__() (data_juicer.ops.filter.VideoWatermarkFilter 方法) __init__() (data_juicer.ops.filter.word_repetition_filter.WordRepetitionFilter 方法) __init__() (data_juicer.ops.filter.WordRepetitionFilter 方法) __init__() (data_juicer.ops.filter.words_num_filter.WordsNumFilter 方法) __init__() (data_juicer.ops.filter.WordsNumFilter 方法) __init__() (data_juicer.ops.Grouper 方法) __init__() (data_juicer.ops.grouper.key_value_grouper.KeyValueGrouper 方法) __init__() (data_juicer.ops.grouper.KeyValueGrouper 方法) __init__() (data_juicer.ops.grouper.naive_grouper.NaiveGrouper 方法) __init__() (data_juicer.ops.grouper.naive_reverse_grouper.NaiveReverseGrouper 方法) __init__() (data_juicer.ops.grouper.NaiveGrouper 方法) __init__() (data_juicer.ops.grouper.NaiveReverseGrouper 方法) __init__() (data_juicer.ops.Mapper 方法) __init__() (data_juicer.ops.mapper.annotation.annotation_mapper.BaseAnnotationMapper 方法) __init__() (data_juicer.ops.mapper.annotation.annotation_mapper.LabelStudioAnnotationMapper 方法) __init__() (data_juicer.ops.mapper.annotation.human_preference_annotation_mapper.HumanPreferenceAnnotationMapper 方法) __init__() (data_juicer.ops.mapper.audio_add_gaussian_noise_mapper.AudioAddGaussianNoiseMapper 方法) __init__() (data_juicer.ops.mapper.audio_ffmpeg_wrapped_mapper.AudioFFmpegWrappedMapper 方法) __init__() (data_juicer.ops.mapper.AudioAddGaussianNoiseMapper 方法) __init__() (data_juicer.ops.mapper.AudioFFmpegWrappedMapper 方法) __init__() (data_juicer.ops.mapper.calibrate_qa_mapper.CalibrateQAMapper 方法) __init__() (data_juicer.ops.mapper.CalibrateQAMapper 方法) __init__() (data_juicer.ops.mapper.chinese_convert_mapper.ChineseConvertMapper 方法) __init__() (data_juicer.ops.mapper.ChineseConvertMapper 方法) __init__() (data_juicer.ops.mapper.clean_copyright_mapper.CleanCopyrightMapper 方法) __init__() (data_juicer.ops.mapper.clean_email_mapper.CleanEmailMapper 方法) __init__() (data_juicer.ops.mapper.clean_html_mapper.CleanHtmlMapper 方法) __init__() (data_juicer.ops.mapper.clean_ip_mapper.CleanIpMapper 方法) __init__() (data_juicer.ops.mapper.clean_links_mapper.CleanLinksMapper 方法) __init__() (data_juicer.ops.mapper.CleanCopyrightMapper 方法) __init__() (data_juicer.ops.mapper.CleanEmailMapper 方法) __init__() (data_juicer.ops.mapper.CleanHtmlMapper 方法) __init__() (data_juicer.ops.mapper.CleanIpMapper 方法) __init__() (data_juicer.ops.mapper.CleanLinksMapper 方法) __init__() (data_juicer.ops.mapper.dialog_intent_detection_mapper.DialogIntentDetectionMapper 方法) __init__() (data_juicer.ops.mapper.dialog_sentiment_detection_mapper.DialogSentimentDetectionMapper 方法) __init__() (data_juicer.ops.mapper.dialog_sentiment_intensity_mapper.DialogSentimentIntensityMapper 方法) __init__() (data_juicer.ops.mapper.dialog_topic_detection_mapper.DialogTopicDetectionMapper 方法) __init__() (data_juicer.ops.mapper.DialogIntentDetectionMapper 方法) __init__() (data_juicer.ops.mapper.DialogSentimentDetectionMapper 方法) __init__() (data_juicer.ops.mapper.DialogSentimentIntensityMapper 方法) __init__() (data_juicer.ops.mapper.DialogTopicDetectionMapper 方法) __init__() (data_juicer.ops.mapper.Difference_Area_Generator_Mapper 方法) __init__() (data_juicer.ops.mapper.expand_macro_mapper.ExpandMacroMapper 方法) __init__() (data_juicer.ops.mapper.extract_entity_attribute_mapper.ExtractEntityAttributeMapper 方法) __init__() (data_juicer.ops.mapper.extract_entity_relation_mapper.ExtractEntityRelationMapper 方法) __init__() (data_juicer.ops.mapper.extract_event_mapper.ExtractEventMapper 方法) __init__() (data_juicer.ops.mapper.extract_keyword_mapper.ExtractKeywordMapper 方法) __init__() (data_juicer.ops.mapper.extract_nickname_mapper.ExtractNicknameMapper 方法) __init__() (data_juicer.ops.mapper.extract_support_text_mapper.ExtractSupportTextMapper 方法) __init__() (data_juicer.ops.mapper.extract_tables_from_html_mapper.ExtractTablesFromHtmlMapper 方法) __init__() (data_juicer.ops.mapper.ExtractEntityAttributeMapper 方法) __init__() (data_juicer.ops.mapper.ExtractEntityRelationMapper 方法) __init__() (data_juicer.ops.mapper.ExtractEventMapper 方法) __init__() (data_juicer.ops.mapper.ExtractKeywordMapper 方法) __init__() (data_juicer.ops.mapper.ExtractNicknameMapper 方法) __init__() (data_juicer.ops.mapper.ExtractSupportTextMapper 方法) __init__() (data_juicer.ops.mapper.ExtractTablesFromHtmlMapper 方法) __init__() (data_juicer.ops.mapper.fix_unicode_mapper.FixUnicodeMapper 方法) __init__() (data_juicer.ops.mapper.FixUnicodeMapper 方法) __init__() (data_juicer.ops.mapper.generate_qa_from_examples_mapper.GenerateQAFromExamplesMapper 方法) __init__() (data_juicer.ops.mapper.generate_qa_from_text_mapper.GenerateQAFromTextMapper 方法) __init__() (data_juicer.ops.mapper.GenerateQAFromExamplesMapper 方法) __init__() (data_juicer.ops.mapper.GenerateQAFromTextMapper 方法) __init__() (data_juicer.ops.mapper.HumanPreferenceAnnotationMapper 方法) __init__() (data_juicer.ops.mapper.image_blur_mapper.ImageBlurMapper 方法) __init__() (data_juicer.ops.mapper.image_captioning_from_gpt4v_mapper.ImageCaptioningFromGPT4VMapper 方法) __init__() (data_juicer.ops.mapper.image_captioning_mapper.ImageCaptioningMapper 方法) __init__() (data_juicer.ops.mapper.image_diffusion_mapper.ImageDiffusionMapper 方法) __init__() (data_juicer.ops.mapper.image_face_blur_mapper.ImageFaceBlurMapper 方法) __init__() (data_juicer.ops.mapper.image_remove_background_mapper.ImageRemoveBackgroundMapper 方法) __init__() (data_juicer.ops.mapper.image_segment_mapper.ImageSegmentMapper 方法) __init__() (data_juicer.ops.mapper.image_tagging_mapper.ImageTaggingMapper 方法) __init__() (data_juicer.ops.mapper.ImageBlurMapper 方法) __init__() (data_juicer.ops.mapper.ImageCaptioningFromGPT4VMapper 方法) __init__() (data_juicer.ops.mapper.ImageCaptioningMapper 方法) __init__() (data_juicer.ops.mapper.ImageDiffusionMapper 方法) __init__() (data_juicer.ops.mapper.ImageFaceBlurMapper 方法) __init__() (data_juicer.ops.mapper.ImageRemoveBackgroundMapper 方法) __init__() (data_juicer.ops.mapper.ImageSegmentMapper 方法) __init__() (data_juicer.ops.mapper.ImageTaggingMapper 方法) __init__() (data_juicer.ops.mapper.imgdiff_difference_area_generator_mapper.Difference_Area_Generator_Mapper 方法) __init__() (data_juicer.ops.mapper.imgdiff_difference_caption_generator_mapper.Difference_Caption_Generator_Mapper 方法) __init__() (data_juicer.ops.mapper.mllm_mapper.MllmMapper 方法) __init__() (data_juicer.ops.mapper.MllmMapper 方法) __init__() (data_juicer.ops.mapper.nlpaug_en_mapper.NlpaugEnMapper 方法) __init__() (data_juicer.ops.mapper.NlpaugEnMapper 方法) __init__() (data_juicer.ops.mapper.nlpcda_zh_mapper.NlpcdaZhMapper 方法) __init__() (data_juicer.ops.mapper.NlpcdaZhMapper 方法) __init__() (data_juicer.ops.mapper.optimize_qa_mapper.OptimizeQAMapper 方法) __init__() (data_juicer.ops.mapper.OptimizeQAMapper 方法) __init__() (data_juicer.ops.mapper.pair_preference_mapper.PairPreferenceMapper 方法) __init__() (data_juicer.ops.mapper.PairPreferenceMapper 方法) __init__() (data_juicer.ops.mapper.punctuation_normalization_mapper.PunctuationNormalizationMapper 方法) __init__() (data_juicer.ops.mapper.PunctuationNormalizationMapper 方法) __init__() (data_juicer.ops.mapper.python_file_mapper.PythonFileMapper 方法) __init__() (data_juicer.ops.mapper.python_lambda_mapper.PythonLambdaMapper 方法) __init__() (data_juicer.ops.mapper.PythonFileMapper 方法) __init__() (data_juicer.ops.mapper.PythonLambdaMapper 方法) __init__() (data_juicer.ops.mapper.query_intent_detection_mapper.QueryIntentDetectionMapper 方法) __init__() (data_juicer.ops.mapper.query_sentiment_detection_mapper.QuerySentimentDetectionMapper 方法) __init__() (data_juicer.ops.mapper.query_topic_detection_mapper.QueryTopicDetectionMapper 方法) __init__() (data_juicer.ops.mapper.QueryIntentDetectionMapper 方法) __init__() (data_juicer.ops.mapper.QuerySentimentDetectionMapper 方法) __init__() (data_juicer.ops.mapper.QueryTopicDetectionMapper 方法) __init__() (data_juicer.ops.mapper.relation_identity_mapper.RelationIdentityMapper 方法) __init__() (data_juicer.ops.mapper.RelationIdentityMapper 方法) __init__() (data_juicer.ops.mapper.remove_bibliography_mapper.RemoveBibliographyMapper 方法) __init__() (data_juicer.ops.mapper.remove_comments_mapper.RemoveCommentsMapper 方法) __init__() (data_juicer.ops.mapper.remove_header_mapper.RemoveHeaderMapper 方法) __init__() (data_juicer.ops.mapper.remove_long_words_mapper.RemoveLongWordsMapper 方法) __init__() (data_juicer.ops.mapper.remove_non_chinese_character_mapper.RemoveNonChineseCharacterlMapper 方法) __init__() (data_juicer.ops.mapper.remove_repeat_sentences_mapper.RemoveRepeatSentencesMapper 方法) __init__() (data_juicer.ops.mapper.remove_specific_chars_mapper.RemoveSpecificCharsMapper 方法) __init__() (data_juicer.ops.mapper.remove_table_text_mapper.RemoveTableTextMapper 方法) __init__() (data_juicer.ops.mapper.remove_words_with_incorrect_substrings_mapper.RemoveWordsWithIncorrectSubstringsMapper 方法) __init__() (data_juicer.ops.mapper.RemoveBibliographyMapper 方法) __init__() (data_juicer.ops.mapper.RemoveCommentsMapper 方法) __init__() (data_juicer.ops.mapper.RemoveHeaderMapper 方法) __init__() (data_juicer.ops.mapper.RemoveLongWordsMapper 方法) __init__() (data_juicer.ops.mapper.RemoveNonChineseCharacterlMapper 方法) __init__() (data_juicer.ops.mapper.RemoveRepeatSentencesMapper 方法) __init__() (data_juicer.ops.mapper.RemoveSpecificCharsMapper 方法) __init__() (data_juicer.ops.mapper.RemoveTableTextMapper 方法) __init__() (data_juicer.ops.mapper.RemoveWordsWithIncorrectSubstringsMapper 方法) __init__() (data_juicer.ops.mapper.replace_content_mapper.ReplaceContentMapper 方法) __init__() (data_juicer.ops.mapper.ReplaceContentMapper 方法) __init__() (data_juicer.ops.mapper.sdxl_prompt2prompt_mapper.SDXLPrompt2PromptMapper 方法) __init__() (data_juicer.ops.mapper.SDXLPrompt2PromptMapper 方法) __init__() (data_juicer.ops.mapper.sentence_augmentation_mapper.SentenceAugmentationMapper 方法) __init__() (data_juicer.ops.mapper.sentence_split_mapper.SentenceSplitMapper 方法) __init__() (data_juicer.ops.mapper.SentenceAugmentationMapper 方法) __init__() (data_juicer.ops.mapper.SentenceSplitMapper 方法) __init__() (data_juicer.ops.mapper.text_chunk_mapper.TextChunkMapper 方法) __init__() (data_juicer.ops.mapper.TextChunkMapper 方法) __init__() (data_juicer.ops.mapper.video_captioning_from_audio_mapper.VideoCaptioningFromAudioMapper 方法) __init__() (data_juicer.ops.mapper.video_captioning_from_frames_mapper.VideoCaptioningFromFramesMapper 方法) __init__() (data_juicer.ops.mapper.video_captioning_from_summarizer_mapper.VideoCaptioningFromSummarizerMapper 方法) __init__() (data_juicer.ops.mapper.video_captioning_from_video_mapper.VideoCaptioningFromVideoMapper 方法) __init__() (data_juicer.ops.mapper.video_extract_frames_mapper.VideoExtractFramesMapper 方法) __init__() (data_juicer.ops.mapper.video_face_blur_mapper.VideoFaceBlurMapper 方法) __init__() (data_juicer.ops.mapper.video_ffmpeg_wrapped_mapper.VideoFFmpegWrappedMapper 方法) __init__() (data_juicer.ops.mapper.video_remove_watermark_mapper.VideoRemoveWatermarkMapper 方法) __init__() (data_juicer.ops.mapper.video_resize_aspect_ratio_mapper.VideoResizeAspectRatioMapper 方法) __init__() (data_juicer.ops.mapper.video_resize_resolution_mapper.VideoResizeResolutionMapper 方法) __init__() (data_juicer.ops.mapper.video_split_by_duration_mapper.VideoSplitByDurationMapper 方法) __init__() (data_juicer.ops.mapper.video_split_by_key_frame_mapper.VideoSplitByKeyFrameMapper 方法) __init__() (data_juicer.ops.mapper.video_split_by_scene_mapper.VideoSplitBySceneMapper 方法) __init__() (data_juicer.ops.mapper.video_tagging_from_audio_mapper.VideoTaggingFromAudioMapper 方法) __init__() (data_juicer.ops.mapper.video_tagging_from_frames_mapper.VideoTaggingFromFramesMapper 方法) __init__() (data_juicer.ops.mapper.VideoCaptioningFromAudioMapper 方法) __init__() (data_juicer.ops.mapper.VideoCaptioningFromFramesMapper 方法) __init__() (data_juicer.ops.mapper.VideoCaptioningFromSummarizerMapper 方法) __init__() (data_juicer.ops.mapper.VideoCaptioningFromVideoMapper 方法) __init__() (data_juicer.ops.mapper.VideoExtractFramesMapper 方法) __init__() (data_juicer.ops.mapper.VideoFaceBlurMapper 方法) __init__() (data_juicer.ops.mapper.VideoFFmpegWrappedMapper 方法) __init__() (data_juicer.ops.mapper.VideoRemoveWatermarkMapper 方法) __init__() (data_juicer.ops.mapper.VideoResizeAspectRatioMapper 方法) __init__() (data_juicer.ops.mapper.VideoResizeResolutionMapper 方法) __init__() (data_juicer.ops.mapper.VideoSplitByDurationMapper 方法) __init__() (data_juicer.ops.mapper.VideoSplitByKeyFrameMapper 方法) __init__() (data_juicer.ops.mapper.VideoSplitBySceneMapper 方法) __init__() (data_juicer.ops.mapper.VideoTaggingFromAudioMapper 方法) __init__() (data_juicer.ops.mapper.VideoTaggingFromFramesMapper 方法) __init__() (data_juicer.ops.mapper.whitespace_normalization_mapper.WhitespaceNormalizationMapper 方法) __init__() (data_juicer.ops.mapper.WhitespaceNormalizationMapper 方法) __init__() (data_juicer.ops.mixins.EventDrivenMixin 方法) __init__() (data_juicer.ops.mixins.NotificationMixin 方法) __init__() (data_juicer.ops.op_fusion.FusedFilter 方法) __init__() (data_juicer.ops.op_fusion.GeneralFusedOP 方法) __init__() (data_juicer.ops.Selector 方法) __init__() (data_juicer.ops.selector.frequency_specified_field_selector.FrequencySpecifiedFieldSelector 方法) __init__() (data_juicer.ops.selector.FrequencySpecifiedFieldSelector 方法) __init__() (data_juicer.ops.selector.random_selector.RandomSelector 方法) __init__() (data_juicer.ops.selector.RandomSelector 方法) __init__() (data_juicer.ops.selector.range_specified_field_selector.RangeSpecifiedFieldSelector 方法) __init__() (data_juicer.ops.selector.RangeSpecifiedFieldSelector 方法) __init__() (data_juicer.ops.selector.tags_specified_field_selector.TagsSpecifiedFieldSelector 方法) __init__() (data_juicer.ops.selector.TagsSpecifiedFieldSelector 方法) __init__() (data_juicer.ops.selector.topk_specified_field_selector.TopkSpecifiedFieldSelector 方法) __init__() (data_juicer.ops.selector.TopkSpecifiedFieldSelector 方法) __init__() (data_juicer.utils.cache_utils.DatasetCacheControl 方法) __init__() (data_juicer.utils.ckpt_utils.CheckpointManager 方法) __init__() (data_juicer.utils.compress.CacheCompressManager 方法) __init__() (data_juicer.utils.compress.CompressManager 方法) __init__() (data_juicer.utils.fingerprint_utils.Hasher 方法) __init__() (data_juicer.utils.lazy_loader.LazyLoader 方法) __init__() (data_juicer.utils.logger_utils.StreamToLoguru 方法) __init__() (data_juicer.utils.model_utils.APIModel 方法) __init__() (data_juicer.utils.registry.Registry 方法) A ActorBackend(data_juicer.ops.deduplicator.ray_basic_deduplicator 中的类) adapt_workloads() (data_juicer.core.Adapter 方法) adapt_workloads() (data_juicer.core.adapter.Adapter 方法) Adapter(data_juicer.core 中的类) Adapter(data_juicer.core.adapter 中的类) add_column() (data_juicer.core.data.dj_dataset.NestedDataset 方法) add_column() (data_juicer.core.data.NestedDataset 方法) add_column() (data_juicer.core.NestedDataset 方法) add_key_value_pairs() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.BTSUnionFind 方法) add_message() (data_juicer.ops.mapper.extract_entity_relation_mapper.ExtractEntityRelationMapper 方法) add_message() (data_juicer.ops.mapper.ExtractEntityRelationMapper 方法) add_parameters() (data_juicer.ops.base_op.OP 方法) add_same_content_to_new_column()(在 data_juicer.core.data 模块中) add_same_content_to_new_column()(在 data_juicer.core.data.dj_dataset 模块中) add_suffix_to_filename()(在 data_juicer.utils.file_utils 模块中) add_suffixes()(在 data_juicer.format.formatter 模块中) Aggregator(data_juicer.ops 中的类) Aggregator(data_juicer.ops.base_op 中的类) alnum_ratio(data_juicer.utils.constant.StatsKeysConstant 属性) alpha_token_ratio(data_juicer.utils.constant.StatsKeysConstant 属性) AlphanumericFilter(data_juicer.ops.filter 中的类) AlphanumericFilter(data_juicer.ops.filter.alphanumeric_filter 中的类) analyze() (data_juicer.analysis.column_wise_analysis.ColumnWiseAnalysis 方法) analyze() (data_juicer.analysis.ColumnWiseAnalysis 方法) analyze() (data_juicer.analysis.diversity_analysis.DiversityAnalysis 方法) analyze() (data_juicer.analysis.DiversityAnalysis 方法) analyze() (data_juicer.analysis.overall_analysis.OverallAnalysis 方法) analyze() (data_juicer.analysis.OverallAnalysis 方法) analyze_resource_util_list()(data_juicer.core.Monitor 静态方法) analyze_resource_util_list()(data_juicer.core.monitor.Monitor 静态方法) analyze_single_resource_util()(data_juicer.core.Monitor 静态方法) analyze_single_resource_util()(data_juicer.core.monitor.Monitor 静态方法) analyze_small_batch() (data_juicer.core.Adapter 方法) analyze_small_batch() (data_juicer.core.adapter.Adapter 方法) Analyzer(data_juicer.core 中的类) Analyzer(data_juicer.core.analyzer 中的类) APIModel(data_juicer.utils.model_utils 中的类) aspect_ratios(data_juicer.utils.constant.StatsKeysConstant 属性) assertDatasetEqual() (data_juicer.utils.unittest_utils.DataJuicerTestCaseBase 方法) AttentionControlEdit(data_juicer.ops.common.prompt2prompt_pipeline 中的类) AttentionControl(data_juicer.ops.common.prompt2prompt_pipeline 中的类) AttentionRefine(data_juicer.ops.common.prompt2prompt_pipeline 中的类) AttentionReplace(data_juicer.ops.common.prompt2prompt_pipeline 中的类) AttentionReweight(data_juicer.ops.common.prompt2prompt_pipeline 中的类) AttentionStore(data_juicer.ops.common.prompt2prompt_pipeline 中的类) attribute_descriptions(data_juicer.utils.constant.MetaKeys 属性) attribute_summary() (data_juicer.ops.aggregator.entity_attribute_aggregator.EntityAttributeAggregator 方法) attribute_summary() (data_juicer.ops.aggregator.EntityAttributeAggregator 方法) attribute_support_texts(data_juicer.utils.constant.MetaKeys 属性) attributes(data_juicer.utils.constant.MetaKeys 属性) audio_duration(data_juicer.utils.constant.StatsKeysConstant 属性) audio_nmf_snr(data_juicer.utils.constant.StatsKeysConstant 属性) audio_sizes(data_juicer.utils.constant.StatsKeysConstant 属性) AudioAddGaussianNoiseMapper(data_juicer.ops.mapper 中的类) AudioAddGaussianNoiseMapper(data_juicer.ops.mapper.audio_add_gaussian_noise_mapper 中的类) AudioDurationFilter(data_juicer.ops.filter 中的类) AudioDurationFilter(data_juicer.ops.filter.audio_duration_filter 中的类) AudioFFmpegWrappedMapper(data_juicer.ops.mapper 中的类) AudioFFmpegWrappedMapper(data_juicer.ops.mapper.audio_ffmpeg_wrapped_mapper 中的类) AudioNMFSNRFilter(data_juicer.ops.filter 中的类) AudioNMFSNRFilter(data_juicer.ops.filter.audio_nmf_snr_filter 中的类) AudioSizeFilter(data_juicer.ops.filter 中的类) AudioSizeFilter(data_juicer.ops.filter.audio_size_filter 中的类) audio(data_juicer.utils.mm_utils.SpecialTokens 属性) AV_STREAM_THREAD_TYPE()(在 data_juicer.utils.mm_utils 模块中) avaliable_detectors(data_juicer.ops.mapper.video_split_by_scene_mapper.VideoSplitBySceneMapper 属性) avaliable_detectors(data_juicer.ops.mapper.VideoSplitBySceneMapper 属性) AverageLineLengthFilter(data_juicer.ops.filter 中的类) AverageLineLengthFilter(data_juicer.ops.filter.average_line_length_filter 中的类) avg_line_length(data_juicer.utils.constant.StatsKeysConstant 属性) avg_split_string_list_under_limit()(在 data_juicer.utils.common_utils 模块中) B Backend(data_juicer.ops.deduplicator.ray_basic_deduplicator 中的类) balanced_union_find() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.BTSUnionFind 方法) BaseAnnotationMapper(data_juicer.ops.mapper.annotation.annotation_mapper 中的类) BaseCompressor(data_juicer.utils.compress 中的类) BaseConversationValidator(data_juicer.core.data.data_validator 中的类) BaseFormatter(data_juicer.format.formatter 中的类) batch_meta(data_juicer.utils.constant.Fields 属性) batch_size_strategy() (data_juicer.core.Adapter 方法) batch_size_strategy() (data_juicer.core.adapter.Adapter 方法) BatchMetaKeys(data_juicer.utils.constant 中的类) bbox_tag(data_juicer.utils.constant.MetaKeys 属性) between_steps() (data_juicer.ops.common.prompt2prompt_pipeline.AttentionControl 方法) between_steps() (data_juicer.ops.common.prompt2prompt_pipeline.AttentionStore 方法) BTSUnionFind(data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator 中的类) build_input() (data_juicer.ops.filter.llm_difficulty_score_filter.LLMDifficultyScoreFilter 方法) build_input() (data_juicer.ops.filter.llm_quality_score_filter.LLMQualityScoreFilter 方法) build_input() (data_juicer.ops.filter.LLMDifficultyScoreFilter 方法) build_input() (data_juicer.ops.filter.LLMQualityScoreFilter 方法) build_input() (data_juicer.ops.mapper.calibrate_qa_mapper.CalibrateQAMapper 方法) build_input() (data_juicer.ops.mapper.CalibrateQAMapper 方法) build_input() (data_juicer.ops.mapper.dialog_intent_detection_mapper.DialogIntentDetectionMapper 方法) build_input() (data_juicer.ops.mapper.dialog_sentiment_detection_mapper.DialogSentimentDetectionMapper 方法) build_input() (data_juicer.ops.mapper.dialog_sentiment_intensity_mapper.DialogSentimentIntensityMapper 方法) build_input() (data_juicer.ops.mapper.dialog_topic_detection_mapper.DialogTopicDetectionMapper 方法) build_input() (data_juicer.ops.mapper.DialogIntentDetectionMapper 方法) build_input() (data_juicer.ops.mapper.DialogSentimentDetectionMapper 方法) build_input() (data_juicer.ops.mapper.DialogSentimentIntensityMapper 方法) build_input() (data_juicer.ops.mapper.DialogTopicDetectionMapper 方法) build_input() (data_juicer.ops.mapper.generate_qa_from_examples_mapper.GenerateQAFromExamplesMapper 方法) build_input() (data_juicer.ops.mapper.GenerateQAFromExamplesMapper 方法) build_input() (data_juicer.ops.mapper.optimize_qa_mapper.OptimizeQAMapper 方法) build_input() (data_juicer.ops.mapper.OptimizeQAMapper 方法) build_input() (data_juicer.ops.mapper.pair_preference_mapper.PairPreferenceMapper 方法) build_input() (data_juicer.ops.mapper.PairPreferenceMapper 方法) C CacheCompressManager(data_juicer.utils.compress 中的类) calc_minhash() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.RayBTSMinhashDeduplicator 方法) calc_minhash() (data_juicer.ops.deduplicator.RayBTSMinhashDeduplicator 方法) calculate_hash() (data_juicer.ops.deduplicator.ray_basic_deduplicator.RayBasicDeduplicator 方法) calculate_hash() (data_juicer.ops.deduplicator.ray_document_deduplicator.RayDocumentDeduplicator 方法) calculate_hash() (data_juicer.ops.deduplicator.ray_image_deduplicator.RayImageDeduplicator 方法) calculate_hash() (data_juicer.ops.deduplicator.ray_video_deduplicator.RayVideoDeduplicator 方法) calculate_hash() (data_juicer.ops.deduplicator.RayBasicDeduplicator 方法) calculate_hash() (data_juicer.ops.deduplicator.RayDocumentDeduplicator 方法) calculate_hash() (data_juicer.ops.deduplicator.RayImageDeduplicator 方法) calculate_hash() (data_juicer.ops.deduplicator.RayVideoDeduplicator 方法) calculate_np()(在 data_juicer.utils.process_utils 模块中) calculate_resized_dimensions()(在 data_juicer.utils.mm_utils 模块中) CalibrateQAMapper(data_juicer.ops.mapper 中的类) CalibrateQAMapper(data_juicer.ops.mapper.calibrate_qa_mapper 中的类) CalibrateQueryMapper(data_juicer.ops.mapper 中的类) CalibrateQueryMapper(data_juicer.ops.mapper.calibrate_query_mapper 中的类) CalibrateResponseMapper(data_juicer.ops.mapper 中的类) CalibrateResponseMapper(data_juicer.ops.mapper.calibrate_response_mapper 中的类) call_gpt_vision_api()(在 data_juicer.ops.mapper.image_captioning_from_gpt4v_mapper 模块中) catch_map_batches_exception()(在 data_juicer.ops.base_op 模块中) catch_map_single_exception()(在 data_juicer.ops.base_op 模块中) category_to_hist()(data_juicer.analysis.measure.RelatedTTestMeasure 静态方法) char_rep_ratio(data_juicer.utils.constant.StatsKeysConstant 属性) CharacterRepetitionFilter(data_juicer.ops.filter 中的类) CharacterRepetitionFilter(data_juicer.ops.filter.character_repetition_filter 中的类) check_ckpt() (data_juicer.utils.ckpt_utils.CheckpointManager 方法) check_inputs() (data_juicer.ops.common.prompt2prompt_pipeline.Prompt2PromptPipeline 方法) check_model()(在 data_juicer.utils.model_utils 模块中) check_ops_to_skip() (data_juicer.utils.ckpt_utils.CheckpointManager 方法) check_packages()(data_juicer.utils.lazy_loader.LazyLoader 类方法) CheckpointManager(data_juicer.utils.ckpt_utils 中的类) ChineseConvertMapper(data_juicer.ops.mapper 中的类) ChineseConvertMapper(data_juicer.ops.mapper.chinese_convert_mapper 中的类) clean_nltk_cache()(在 data_juicer.utils.nltk_utils 模块中) CleanCopyrightMapper(data_juicer.ops.mapper 中的类) CleanCopyrightMapper(data_juicer.ops.mapper.clean_copyright_mapper 中的类) CleanEmailMapper(data_juicer.ops.mapper 中的类) CleanEmailMapper(data_juicer.ops.mapper.clean_email_mapper 中的类) CleanHtmlMapper(data_juicer.ops.mapper 中的类) CleanHtmlMapper(data_juicer.ops.mapper.clean_html_mapper 中的类) CleanIpMapper(data_juicer.ops.mapper 中的类) CleanIpMapper(data_juicer.ops.mapper.clean_ip_mapper 中的类) CleanLinksMapper(data_juicer.ops.mapper 中的类) CleanLinksMapper(data_juicer.ops.mapper.clean_links_mapper 中的类) cleanup_cache_files() (data_juicer.core.data.dj_dataset.NestedDataset 方法) cleanup_cache_files() (data_juicer.core.data.NestedDataset 方法) cleanup_cache_files() (data_juicer.core.NestedDataset 方法) cleanup_cache_files() (data_juicer.utils.compress.CacheCompressManager 方法) cleanup_compressed_cache_files()(在 data_juicer.utils.compress 模块中) clear() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.EdgeBuffer 方法) close_video()(在 data_juicer.utils.mm_utils 模块中) CodeDataValidator(data_juicer.core.data.data_validator 中的类) collect() (data_juicer.analysis.collector.TextTokenDistCollector 方法) column_types(data_juicer.core.data.schema.Schema 属性), [1] columns(data_juicer.core.data.schema.Schema 属性), [1] ColumnWiseAnalysis(data_juicer.analysis 中的类) ColumnWiseAnalysis(data_juicer.analysis.column_wise_analysis 中的类) communication() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.BTSUnionFind 方法) compare_text_index()(在 data_juicer.ops.mapper.imgdiff_difference_area_generator_mapper 模块中) compress() (data_juicer.utils.compress.CacheCompressManager 方法) compress() (data_juicer.utils.compress.CompressManager 方法) compress()(data_juicer.utils.compress.BaseCompressor 静态方法) compress()(data_juicer.utils.compress.Compressor 类方法) compress()(data_juicer.utils.compress.GzipCompressor 静态方法) compress()(data_juicer.utils.compress.Lz4Compressor 静态方法) compress()(data_juicer.utils.compress.ZstdCompressor 静态方法) compress()(在 data_juicer.utils.compress 模块中) CompressionOff(data_juicer.utils.compress 中的类) CompressManager(data_juicer.utils.compress 中的类) compressors(data_juicer.utils.compress.Compressor 属性) Compressor(data_juicer.utils.compress 中的类) compute() (data_juicer.analysis.diversity_analysis.DiversityAnalysis 方法) compute() (data_juicer.analysis.DiversityAnalysis 方法) compute_flow() (data_juicer.ops.filter.video_motion_score_filter.VideoMotionScoreFilter 方法) compute_flow() (data_juicer.ops.filter.video_motion_score_raft_filter.VideoMotionScoreRaftFilter 方法) compute_flow() (data_juicer.ops.filter.VideoMotionScoreFilter 方法) compute_flow() (data_juicer.ops.filter.VideoMotionScoreRaftFilter 方法) compute_hash() (data_juicer.ops.base_op.Deduplicator 方法) compute_hash() (data_juicer.ops.Deduplicator 方法) compute_hash() (data_juicer.ops.deduplicator.document_deduplicator.DocumentDeduplicator 方法) compute_hash() (data_juicer.ops.deduplicator.document_minhash_deduplicator.DocumentMinhashDeduplicator 方法) compute_hash() (data_juicer.ops.deduplicator.document_simhash_deduplicator.DocumentSimhashDeduplicator 方法) compute_hash() (data_juicer.ops.deduplicator.DocumentDeduplicator 方法) compute_hash() (data_juicer.ops.deduplicator.DocumentMinhashDeduplicator 方法) compute_hash() (data_juicer.ops.deduplicator.DocumentSimhashDeduplicator 方法) compute_hash() (data_juicer.ops.deduplicator.image_deduplicator.ImageDeduplicator 方法) compute_hash() (data_juicer.ops.deduplicator.ImageDeduplicator 方法) compute_hash() (data_juicer.ops.deduplicator.video_deduplicator.VideoDeduplicator 方法) compute_hash() (data_juicer.ops.deduplicator.VideoDeduplicator 方法) compute_nmf_snr()(在 data_juicer.ops.filter.audio_nmf_snr_filter 模块中) compute_stats_batched() (data_juicer.ops.base_op.Filter 方法) compute_stats_batched() (data_juicer.ops.Filter 方法) compute_stats_batched() (data_juicer.ops.filter.alphanumeric_filter.AlphanumericFilter 方法) compute_stats_batched() (data_juicer.ops.filter.AlphanumericFilter 方法) compute_stats_batched() (data_juicer.ops.filter.average_line_length_filter.AverageLineLengthFilter 方法) compute_stats_batched() (data_juicer.ops.filter.AverageLineLengthFilter 方法) compute_stats_batched() (data_juicer.ops.filter.character_repetition_filter.CharacterRepetitionFilter 方法) compute_stats_batched() (data_juicer.ops.filter.CharacterRepetitionFilter 方法) compute_stats_batched() (data_juicer.ops.filter.flagged_words_filter.FlaggedWordFilter 方法) compute_stats_batched() (data_juicer.ops.filter.FlaggedWordFilter 方法) compute_stats_batched() (data_juicer.ops.filter.image_aspect_ratio_filter.ImageAspectRatioFilter 方法) compute_stats_batched() (data_juicer.ops.filter.ImageAspectRatioFilter 方法) compute_stats_batched() (data_juicer.ops.filter.maximum_line_length_filter.MaximumLineLengthFilter 方法) compute_stats_batched() (data_juicer.ops.filter.MaximumLineLengthFilter 方法) compute_stats_batched() (data_juicer.ops.filter.perplexity_filter.PerplexityFilter 方法) compute_stats_batched() (data_juicer.ops.filter.PerplexityFilter 方法) compute_stats_batched() (data_juicer.ops.filter.special_characters_filter.SpecialCharactersFilter 方法) compute_stats_batched() (data_juicer.ops.filter.SpecialCharactersFilter 方法) compute_stats_batched() (data_juicer.ops.filter.text_length_filter.TextLengthFilter 方法) compute_stats_batched() (data_juicer.ops.filter.TextLengthFilter 方法) compute_stats_batched() (data_juicer.ops.filter.word_repetition_filter.WordRepetitionFilter 方法) compute_stats_batched() (data_juicer.ops.filter.WordRepetitionFilter 方法) compute_stats_batched() (data_juicer.ops.filter.words_num_filter.WordsNumFilter 方法) compute_stats_batched() (data_juicer.ops.filter.WordsNumFilter 方法) compute_stats_batched() (data_juicer.ops.op_fusion.FusedFilter 方法) compute_stats_single() (data_juicer.ops.base_op.Filter 方法) compute_stats_single() (data_juicer.ops.deduplicator.ray_basic_deduplicator.RayBasicDeduplicator 方法) compute_stats_single() (data_juicer.ops.deduplicator.RayBasicDeduplicator 方法) compute_stats_single() (data_juicer.ops.Filter 方法) compute_stats_single() (data_juicer.ops.filter.audio_duration_filter.AudioDurationFilter 方法) compute_stats_single() (data_juicer.ops.filter.audio_nmf_snr_filter.AudioNMFSNRFilter 方法) compute_stats_single() (data_juicer.ops.filter.audio_size_filter.AudioSizeFilter 方法) compute_stats_single() (data_juicer.ops.filter.AudioDurationFilter 方法) compute_stats_single() (data_juicer.ops.filter.AudioNMFSNRFilter 方法) compute_stats_single() (data_juicer.ops.filter.AudioSizeFilter 方法) compute_stats_single() (data_juicer.ops.filter.GeneralFieldFilter 方法) compute_stats_single() (data_juicer.ops.filter.image_aesthetics_filter.ImageAestheticsFilter 方法) compute_stats_single() (data_juicer.ops.filter.image_face_count_filter.ImageFaceCountFilter 方法) compute_stats_single() (data_juicer.ops.filter.image_face_ratio_filter.ImageFaceRatioFilter 方法) compute_stats_single() (data_juicer.ops.filter.image_nsfw_filter.ImageNSFWFilter 方法) compute_stats_single() (data_juicer.ops.filter.image_pair_similarity_filter.ImagePairSimilarityFilter 方法) compute_stats_single() (data_juicer.ops.filter.image_shape_filter.ImageShapeFilter 方法) compute_stats_single() (data_juicer.ops.filter.image_size_filter.ImageSizeFilter 方法) compute_stats_single() (data_juicer.ops.filter.image_text_matching_filter.ImageTextMatchingFilter 方法) compute_stats_single() (data_juicer.ops.filter.image_text_similarity_filter.ImageTextSimilarityFilter 方法) compute_stats_single() (data_juicer.ops.filter.image_watermark_filter.ImageWatermarkFilter 方法) compute_stats_single() (data_juicer.ops.filter.ImageAestheticsFilter 方法) compute_stats_single() (data_juicer.ops.filter.ImageFaceCountFilter 方法) compute_stats_single() (data_juicer.ops.filter.ImageFaceRatioFilter 方法) compute_stats_single() (data_juicer.ops.filter.ImageNSFWFilter 方法) compute_stats_single() (data_juicer.ops.filter.ImagePairSimilarityFilter 方法) compute_stats_single() (data_juicer.ops.filter.ImageShapeFilter 方法) compute_stats_single() (data_juicer.ops.filter.ImageSizeFilter 方法) compute_stats_single() (data_juicer.ops.filter.ImageTextMatchingFilter 方法) compute_stats_single() (data_juicer.ops.filter.ImageTextSimilarityFilter 方法) compute_stats_single() (data_juicer.ops.filter.ImageWatermarkFilter 方法) compute_stats_single() (data_juicer.ops.filter.language_id_score_filter.LanguageIDScoreFilter 方法) compute_stats_single() (data_juicer.ops.filter.LanguageIDScoreFilter 方法) compute_stats_single() (data_juicer.ops.filter.llm_difficulty_score_filter.LLMDifficultyScoreFilter 方法) compute_stats_single() (data_juicer.ops.filter.llm_quality_score_filter.LLMQualityScoreFilter 方法) compute_stats_single() (data_juicer.ops.filter.LLMDifficultyScoreFilter 方法) compute_stats_single() (data_juicer.ops.filter.LLMQualityScoreFilter 方法) compute_stats_single() (data_juicer.ops.filter.phrase_grounding_recall_filter.PhraseGroundingRecallFilter 方法) compute_stats_single() (data_juicer.ops.filter.PhraseGroundingRecallFilter 方法) compute_stats_single() (data_juicer.ops.filter.specified_field_filter.SpecifiedFieldFilter 方法) compute_stats_single() (data_juicer.ops.filter.specified_numeric_field_filter.SpecifiedNumericFieldFilter 方法) compute_stats_single() (data_juicer.ops.filter.SpecifiedFieldFilter 方法) compute_stats_single() (data_juicer.ops.filter.SpecifiedNumericFieldFilter 方法) compute_stats_single() (data_juicer.ops.filter.stopwords_filter.StopWordsFilter 方法) compute_stats_single() (data_juicer.ops.filter.StopWordsFilter 方法) compute_stats_single() (data_juicer.ops.filter.suffix_filter.SuffixFilter 方法) compute_stats_single() (data_juicer.ops.filter.SuffixFilter 方法) compute_stats_single() (data_juicer.ops.filter.text_action_filter.TextActionFilter 方法) compute_stats_single() (data_juicer.ops.filter.text_entity_dependency_filter.TextEntityDependencyFilter 方法) compute_stats_single() (data_juicer.ops.filter.text_pair_similarity_filter.TextPairSimilarityFilter 方法) compute_stats_single() (data_juicer.ops.filter.TextActionFilter 方法) compute_stats_single() (data_juicer.ops.filter.TextEntityDependencyFilter 方法) compute_stats_single() (data_juicer.ops.filter.TextPairSimilarityFilter 方法) compute_stats_single() (data_juicer.ops.filter.token_num_filter.TokenNumFilter 方法) compute_stats_single() (data_juicer.ops.filter.TokenNumFilter 方法) compute_stats_single() (data_juicer.ops.filter.video_aesthetics_filter.VideoAestheticsFilter 方法) compute_stats_single() (data_juicer.ops.filter.video_aspect_ratio_filter.VideoAspectRatioFilter 方法) compute_stats_single() (data_juicer.ops.filter.video_duration_filter.VideoDurationFilter 方法) compute_stats_single() (data_juicer.ops.filter.video_frames_text_similarity_filter.VideoFramesTextSimilarityFilter 方法) compute_stats_single() (data_juicer.ops.filter.video_motion_score_filter.VideoMotionScoreFilter 方法) compute_stats_single() (data_juicer.ops.filter.video_nsfw_filter.VideoNSFWFilter 方法) compute_stats_single() (data_juicer.ops.filter.video_ocr_area_ratio_filter.VideoOcrAreaRatioFilter 方法) compute_stats_single() (data_juicer.ops.filter.video_resolution_filter.VideoResolutionFilter 方法) compute_stats_single() (data_juicer.ops.filter.video_tagging_from_frames_filter.VideoTaggingFromFramesFilter 方法) compute_stats_single() (data_juicer.ops.filter.video_watermark_filter.VideoWatermarkFilter 方法) compute_stats_single() (data_juicer.ops.filter.VideoAestheticsFilter 方法) compute_stats_single() (data_juicer.ops.filter.VideoAspectRatioFilter 方法) compute_stats_single() (data_juicer.ops.filter.VideoDurationFilter 方法) compute_stats_single() (data_juicer.ops.filter.VideoFramesTextSimilarityFilter 方法) compute_stats_single() (data_juicer.ops.filter.VideoMotionScoreFilter 方法) compute_stats_single() (data_juicer.ops.filter.VideoNSFWFilter 方法) compute_stats_single() (data_juicer.ops.filter.VideoOcrAreaRatioFilter 方法) compute_stats_single() (data_juicer.ops.filter.VideoResolutionFilter 方法) compute_stats_single() (data_juicer.ops.filter.VideoTaggingFromFramesFilter 方法) compute_stats_single() (data_juicer.ops.filter.VideoWatermarkFilter 方法) config_backup()(在 data_juicer.config.config 模块中) CONFIG_VALIDATION_RULES(data_juicer.core.data.config_validator.ConfigValidator 属性) CONFIG_VALIDATION_RULES(data_juicer.core.data.load_strategy.DefaultArxivDataLoadStrategy 属性) CONFIG_VALIDATION_RULES(data_juicer.core.data.load_strategy.DefaultCommonCrawlDataLoadStrategy 属性) CONFIG_VALIDATION_RULES(data_juicer.core.data.load_strategy.DefaultHuggingfaceDataLoadStrategy 属性) CONFIG_VALIDATION_RULES(data_juicer.core.data.load_strategy.DefaultLocalDataLoadStrategy 属性) CONFIG_VALIDATION_RULES(data_juicer.core.data.load_strategy.DefaultWikiDataLoadStrategy 属性) CONFIG_VALIDATION_RULES(data_juicer.core.data.load_strategy.RayHuggingfaceDataLoadStrategy 属性) CONFIG_VALIDATION_RULES(data_juicer.core.data.load_strategy.RayLocalJsonDataLoadStrategy 属性) ConfigValidationError ConfigValidator(data_juicer.core.data.config_validator 中的类) context(data_juicer.utils.constant.Fields 属性) convert_arrow_to_python()(在 data_juicer.ops.base_op 模块中) convert_dict_list_to_list_dict()(在 data_juicer.ops.base_op 模块中) convert_list_dict_to_dict_list()(在 data_juicer.ops.base_op 模块中) convert_to_absolute_paths()(在 data_juicer.core.data.ray_dataset 模块中) copy_data()(在 data_juicer.utils.file_utils 模块中) create_controller()(在 data_juicer.ops.common.prompt2prompt_pipeline 模块中) create_directory_if_not_exists()(在 data_juicer.utils.file_utils 模块中) create_executor()(data_juicer.core.executor.ExecutorFactory 静态方法) create_executor()(data_juicer.core.executor.factory.ExecutorFactory 静态方法) create_executor()(data_juicer.core.ExecutorFactory 静态方法) create_physical_resource_alias()(在 data_juicer.utils.nltk_utils 模块中) create_replacer()(在 data_juicer.ops.mapper.video_split_by_duration_mapper 模块中) create_replacer()(在 data_juicer.ops.mapper.video_split_by_key_frame_mapper 模块中) CrossEntropyMeasure(data_juicer.analysis.measure 中的类) CsvFormatter(data_juicer.format 中的类) CsvFormatter(data_juicer.format.csv_formatter 中的类) cuda_device_count()(在 data_juicer 模块中) cut_video_by_seconds()(在 data_juicer.utils.mm_utils 模块中) D data_juicer module data_juicer.analysis module data_juicer.analysis.collector module data_juicer.analysis.column_wise_analysis module data_juicer.analysis.diversity_analysis module data_juicer.analysis.draw module data_juicer.analysis.measure module data_juicer.analysis.overall_analysis module data_juicer.config module data_juicer.config.config module data_juicer.core module data_juicer.core.adapter module data_juicer.core.analyzer module data_juicer.core.data module data_juicer.core.data.config_validator module data_juicer.core.data.data_validator module data_juicer.core.data.dataset_builder module data_juicer.core.data.dj_dataset module data_juicer.core.data.load_strategy module data_juicer.core.data.ray_dataset module data_juicer.core.data.schema module data_juicer.core.executor module data_juicer.core.executor.base module data_juicer.core.executor.default_executor module data_juicer.core.executor.factory module data_juicer.core.executor.ray_executor module data_juicer.core.exporter module data_juicer.core.monitor module data_juicer.core.tracer module data_juicer.download module data_juicer.download.commoncrawl module data_juicer.download.downloader module data_juicer.download.wikipedia module data_juicer.format module data_juicer.format.csv_formatter module data_juicer.format.empty_formatter module data_juicer.format.formatter module data_juicer.format.json_formatter module data_juicer.format.load module data_juicer.format.parquet_formatter module data_juicer.format.text_formatter module data_juicer.format.tsv_formatter module data_juicer.ops module data_juicer.ops.aggregator module data_juicer.ops.aggregator.entity_attribute_aggregator module data_juicer.ops.aggregator.meta_tags_aggregator module data_juicer.ops.aggregator.most_relevant_entities_aggregator module data_juicer.ops.aggregator.nested_aggregator module data_juicer.ops.base_op module data_juicer.ops.common module data_juicer.ops.common.helper_func module data_juicer.ops.common.prompt2prompt_pipeline module data_juicer.ops.common.special_characters module data_juicer.ops.deduplicator module data_juicer.ops.deduplicator.document_deduplicator module data_juicer.ops.deduplicator.document_minhash_deduplicator module data_juicer.ops.deduplicator.document_simhash_deduplicator module data_juicer.ops.deduplicator.image_deduplicator module data_juicer.ops.deduplicator.ray_basic_deduplicator module data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator module data_juicer.ops.deduplicator.ray_document_deduplicator module data_juicer.ops.deduplicator.ray_image_deduplicator module data_juicer.ops.deduplicator.ray_video_deduplicator module data_juicer.ops.deduplicator.video_deduplicator module data_juicer.ops.filter module data_juicer.ops.filter.alphanumeric_filter module data_juicer.ops.filter.audio_duration_filter module data_juicer.ops.filter.audio_nmf_snr_filter module data_juicer.ops.filter.audio_size_filter module data_juicer.ops.filter.average_line_length_filter module data_juicer.ops.filter.character_repetition_filter module data_juicer.ops.filter.flagged_words_filter module data_juicer.ops.filter.image_aesthetics_filter module data_juicer.ops.filter.image_aspect_ratio_filter module data_juicer.ops.filter.image_face_count_filter module data_juicer.ops.filter.image_face_ratio_filter module data_juicer.ops.filter.image_nsfw_filter module data_juicer.ops.filter.image_pair_similarity_filter module data_juicer.ops.filter.image_shape_filter module data_juicer.ops.filter.image_size_filter module data_juicer.ops.filter.image_text_matching_filter module data_juicer.ops.filter.image_text_similarity_filter module data_juicer.ops.filter.image_watermark_filter module data_juicer.ops.filter.language_id_score_filter module data_juicer.ops.filter.llm_difficulty_score_filter module data_juicer.ops.filter.llm_quality_score_filter module data_juicer.ops.filter.maximum_line_length_filter module data_juicer.ops.filter.perplexity_filter module data_juicer.ops.filter.phrase_grounding_recall_filter module data_juicer.ops.filter.special_characters_filter module data_juicer.ops.filter.specified_field_filter module data_juicer.ops.filter.specified_numeric_field_filter module data_juicer.ops.filter.stopwords_filter module data_juicer.ops.filter.suffix_filter module data_juicer.ops.filter.text_action_filter module data_juicer.ops.filter.text_entity_dependency_filter module data_juicer.ops.filter.text_length_filter module data_juicer.ops.filter.text_pair_similarity_filter module data_juicer.ops.filter.token_num_filter module data_juicer.ops.filter.video_aesthetics_filter module data_juicer.ops.filter.video_aspect_ratio_filter module data_juicer.ops.filter.video_duration_filter module data_juicer.ops.filter.video_frames_text_similarity_filter module data_juicer.ops.filter.video_motion_score_filter module data_juicer.ops.filter.video_motion_score_raft_filter module data_juicer.ops.filter.video_nsfw_filter module data_juicer.ops.filter.video_ocr_area_ratio_filter module data_juicer.ops.filter.video_resolution_filter module data_juicer.ops.filter.video_tagging_from_frames_filter module data_juicer.ops.filter.video_watermark_filter module data_juicer.ops.filter.word_repetition_filter module data_juicer.ops.filter.words_num_filter module data_juicer.ops.grouper module data_juicer.ops.grouper.key_value_grouper module data_juicer.ops.grouper.naive_grouper module data_juicer.ops.grouper.naive_reverse_grouper module data_juicer.ops.load module data_juicer.ops.mapper module data_juicer.ops.mapper.annotation module data_juicer.ops.mapper.annotation.annotation_mapper module data_juicer.ops.mapper.annotation.human_preference_annotation_mapper module data_juicer.ops.mapper.audio_add_gaussian_noise_mapper module data_juicer.ops.mapper.audio_ffmpeg_wrapped_mapper module data_juicer.ops.mapper.calibrate_qa_mapper module data_juicer.ops.mapper.calibrate_query_mapper module data_juicer.ops.mapper.calibrate_response_mapper module data_juicer.ops.mapper.chinese_convert_mapper module data_juicer.ops.mapper.clean_copyright_mapper module data_juicer.ops.mapper.clean_email_mapper module data_juicer.ops.mapper.clean_html_mapper module data_juicer.ops.mapper.clean_ip_mapper module data_juicer.ops.mapper.clean_links_mapper module data_juicer.ops.mapper.dialog_intent_detection_mapper module data_juicer.ops.mapper.dialog_sentiment_detection_mapper module data_juicer.ops.mapper.dialog_sentiment_intensity_mapper module data_juicer.ops.mapper.dialog_topic_detection_mapper module data_juicer.ops.mapper.expand_macro_mapper module data_juicer.ops.mapper.extract_entity_attribute_mapper module data_juicer.ops.mapper.extract_entity_relation_mapper module data_juicer.ops.mapper.extract_event_mapper module data_juicer.ops.mapper.extract_keyword_mapper module data_juicer.ops.mapper.extract_nickname_mapper module data_juicer.ops.mapper.extract_support_text_mapper module data_juicer.ops.mapper.extract_tables_from_html_mapper module data_juicer.ops.mapper.fix_unicode_mapper module data_juicer.ops.mapper.generate_qa_from_examples_mapper module data_juicer.ops.mapper.generate_qa_from_text_mapper module data_juicer.ops.mapper.image_blur_mapper module data_juicer.ops.mapper.image_captioning_from_gpt4v_mapper module data_juicer.ops.mapper.image_captioning_mapper module data_juicer.ops.mapper.image_diffusion_mapper module data_juicer.ops.mapper.image_face_blur_mapper module data_juicer.ops.mapper.image_remove_background_mapper module data_juicer.ops.mapper.image_segment_mapper module data_juicer.ops.mapper.image_tagging_mapper module data_juicer.ops.mapper.imgdiff_difference_area_generator_mapper module data_juicer.ops.mapper.imgdiff_difference_caption_generator_mapper module data_juicer.ops.mapper.mllm_mapper module data_juicer.ops.mapper.nlpaug_en_mapper module data_juicer.ops.mapper.nlpcda_zh_mapper module data_juicer.ops.mapper.optimize_qa_mapper module data_juicer.ops.mapper.optimize_query_mapper module data_juicer.ops.mapper.optimize_response_mapper module data_juicer.ops.mapper.pair_preference_mapper module data_juicer.ops.mapper.punctuation_normalization_mapper module data_juicer.ops.mapper.python_file_mapper module data_juicer.ops.mapper.python_lambda_mapper module data_juicer.ops.mapper.query_intent_detection_mapper module data_juicer.ops.mapper.query_sentiment_detection_mapper module data_juicer.ops.mapper.query_topic_detection_mapper module data_juicer.ops.mapper.relation_identity_mapper module data_juicer.ops.mapper.remove_bibliography_mapper module data_juicer.ops.mapper.remove_comments_mapper module data_juicer.ops.mapper.remove_header_mapper module data_juicer.ops.mapper.remove_long_words_mapper module data_juicer.ops.mapper.remove_non_chinese_character_mapper module data_juicer.ops.mapper.remove_repeat_sentences_mapper module data_juicer.ops.mapper.remove_specific_chars_mapper module data_juicer.ops.mapper.remove_table_text_mapper module data_juicer.ops.mapper.remove_words_with_incorrect_substrings_mapper module data_juicer.ops.mapper.replace_content_mapper module data_juicer.ops.mapper.sdxl_prompt2prompt_mapper module data_juicer.ops.mapper.sentence_augmentation_mapper module data_juicer.ops.mapper.sentence_split_mapper module data_juicer.ops.mapper.text_chunk_mapper module data_juicer.ops.mapper.video_captioning_from_audio_mapper module data_juicer.ops.mapper.video_captioning_from_frames_mapper module data_juicer.ops.mapper.video_captioning_from_summarizer_mapper module data_juicer.ops.mapper.video_captioning_from_video_mapper module data_juicer.ops.mapper.video_extract_frames_mapper module data_juicer.ops.mapper.video_face_blur_mapper module data_juicer.ops.mapper.video_ffmpeg_wrapped_mapper module data_juicer.ops.mapper.video_remove_watermark_mapper module data_juicer.ops.mapper.video_resize_aspect_ratio_mapper module data_juicer.ops.mapper.video_resize_resolution_mapper module data_juicer.ops.mapper.video_split_by_duration_mapper module data_juicer.ops.mapper.video_split_by_key_frame_mapper module data_juicer.ops.mapper.video_split_by_scene_mapper module data_juicer.ops.mapper.video_tagging_from_audio_mapper module data_juicer.ops.mapper.video_tagging_from_frames_mapper module data_juicer.ops.mapper.whitespace_normalization_mapper module data_juicer.ops.mixins module data_juicer.ops.op_fusion module data_juicer.ops.selector module data_juicer.ops.selector.frequency_specified_field_selector module data_juicer.ops.selector.random_selector module data_juicer.ops.selector.range_specified_field_selector module data_juicer.ops.selector.tags_specified_field_selector module data_juicer.ops.selector.topk_specified_field_selector module data_juicer.tools module data_juicer.utils module data_juicer.utils.asset_utils module data_juicer.utils.availability_utils module data_juicer.utils.cache_utils module data_juicer.utils.ckpt_utils module data_juicer.utils.common_utils module data_juicer.utils.compress module data_juicer.utils.constant module data_juicer.utils.file_utils module data_juicer.utils.fingerprint_utils module data_juicer.utils.lazy_loader module data_juicer.utils.logger_utils module data_juicer.utils.mm_utils module data_juicer.utils.model_utils module data_juicer.utils.nltk_utils module data_juicer.utils.process_utils module data_juicer.utils.registry module data_juicer.utils.resource_utils module data_juicer.utils.sample module data_juicer.utils.unittest_utils module data_source(data_juicer.core.data.load_strategy.StrategyKey 属性) data_type(data_juicer.core.data.load_strategy.StrategyKey 属性) DataJuicerFormatValidator(data_juicer.core.data.data_validator 中的类) DataJuicerTestCaseBase(data_juicer.utils.unittest_utils 中的类) DataLoadStrategyRegistry(data_juicer.core.data.load_strategy 中的类) DataLoadStrategy(data_juicer.core.data.load_strategy 中的类) dataset_cache_control()(在 data_juicer.utils.cache_utils 模块中) DatasetBuilder(data_juicer.core.data.dataset_builder 中的类) DatasetCacheControl(data_juicer.utils.cache_utils 中的类) DataValidationError DataValidatorRegistry(data_juicer.core.data.data_validator 中的类) DataValidator(data_juicer.core.data.data_validator 中的类) decompress() (data_juicer.utils.compress.CacheCompressManager 方法) decompress() (data_juicer.utils.compress.CompressManager 方法) decompress()(在 data_juicer.utils.compress 模块中) Deduplicator(data_juicer.ops 中的类) Deduplicator(data_juicer.ops.base_op 中的类) DedupSet(data_juicer.ops.deduplicator.ray_basic_deduplicator 中的类) DEFAULT_ANALYSIS_PATTERN(data_juicer.ops.mapper.dialog_intent_detection_mapper.DialogIntentDetectionMapper 属性) DEFAULT_ANALYSIS_PATTERN(data_juicer.ops.mapper.dialog_sentiment_detection_mapper.DialogSentimentDetectionMapper 属性) DEFAULT_ANALYSIS_PATTERN(data_juicer.ops.mapper.dialog_sentiment_intensity_mapper.DialogSentimentIntensityMapper 属性) DEFAULT_ANALYSIS_PATTERN(data_juicer.ops.mapper.dialog_topic_detection_mapper.DialogTopicDetectionMapper 属性) DEFAULT_ANALYSIS_PATTERN(data_juicer.ops.mapper.DialogIntentDetectionMapper 属性) DEFAULT_ANALYSIS_PATTERN(data_juicer.ops.mapper.DialogSentimentDetectionMapper 属性) DEFAULT_ANALYSIS_PATTERN(data_juicer.ops.mapper.DialogSentimentIntensityMapper 属性) DEFAULT_ANALYSIS_PATTERN(data_juicer.ops.mapper.DialogTopicDetectionMapper 属性) DEFAULT_ANALYSIS_TEMPLATE(data_juicer.ops.mapper.dialog_intent_detection_mapper.DialogIntentDetectionMapper 属性) DEFAULT_ANALYSIS_TEMPLATE(data_juicer.ops.mapper.dialog_sentiment_detection_mapper.DialogSentimentDetectionMapper 属性) DEFAULT_ANALYSIS_TEMPLATE(data_juicer.ops.mapper.dialog_sentiment_intensity_mapper.DialogSentimentIntensityMapper 属性) DEFAULT_ANALYSIS_TEMPLATE(data_juicer.ops.mapper.dialog_topic_detection_mapper.DialogTopicDetectionMapper 属性) DEFAULT_ANALYSIS_TEMPLATE(data_juicer.ops.mapper.DialogIntentDetectionMapper 属性) DEFAULT_ANALYSIS_TEMPLATE(data_juicer.ops.mapper.DialogSentimentDetectionMapper 属性) DEFAULT_ANALYSIS_TEMPLATE(data_juicer.ops.mapper.DialogSentimentIntensityMapper 属性) DEFAULT_ANALYSIS_TEMPLATE(data_juicer.ops.mapper.DialogTopicDetectionMapper 属性) DEFAULT_ATTR_PATTERN_TEMPLATE(data_juicer.ops.mapper.extract_entity_attribute_mapper.ExtractEntityAttributeMapper 属性) DEFAULT_ATTR_PATTERN_TEMPLATE(data_juicer.ops.mapper.ExtractEntityAttributeMapper 属性) DEFAULT_CANDIDATES_TEMPLATE(data_juicer.ops.mapper.dialog_intent_detection_mapper.DialogIntentDetectionMapper 属性) DEFAULT_CANDIDATES_TEMPLATE(data_juicer.ops.mapper.dialog_sentiment_detection_mapper.DialogSentimentDetectionMapper 属性) DEFAULT_CANDIDATES_TEMPLATE(data_juicer.ops.mapper.dialog_topic_detection_mapper.DialogTopicDetectionMapper 属性) DEFAULT_CANDIDATES_TEMPLATE(data_juicer.ops.mapper.DialogIntentDetectionMapper 属性) DEFAULT_CANDIDATES_TEMPLATE(data_juicer.ops.mapper.DialogSentimentDetectionMapper 属性) DEFAULT_CANDIDATES_TEMPLATE(data_juicer.ops.mapper.DialogTopicDetectionMapper 属性) DEFAULT_COMPLETION_DELIMITER(data_juicer.ops.mapper.extract_entity_relation_mapper.ExtractEntityRelationMapper 属性) DEFAULT_COMPLETION_DELIMITER(data_juicer.ops.mapper.extract_keyword_mapper.ExtractKeywordMapper 属性) DEFAULT_COMPLETION_DELIMITER(data_juicer.ops.mapper.ExtractEntityRelationMapper 属性) DEFAULT_COMPLETION_DELIMITER(data_juicer.ops.mapper.ExtractKeywordMapper 属性) DEFAULT_CONTINUE_PROMPT(data_juicer.ops.mapper.extract_entity_relation_mapper.ExtractEntityRelationMapper 属性) DEFAULT_CONTINUE_PROMPT(data_juicer.ops.mapper.ExtractEntityRelationMapper 属性) DEFAULT_DEMON_PATTERN(data_juicer.ops.mapper.extract_entity_attribute_mapper.ExtractEntityAttributeMapper 属性) DEFAULT_DEMON_PATTERN(data_juicer.ops.mapper.ExtractEntityAttributeMapper 属性) DEFAULT_ENTITY_PATTERN(data_juicer.ops.mapper.extract_entity_relation_mapper.ExtractEntityRelationMapper 属性) DEFAULT_ENTITY_PATTERN(data_juicer.ops.mapper.ExtractEntityRelationMapper 属性) DEFAULT_ENTITY_TYPES(data_juicer.ops.mapper.extract_entity_relation_mapper.ExtractEntityRelationMapper 属性) DEFAULT_ENTITY_TYPES(data_juicer.ops.mapper.ExtractEntityRelationMapper 属性) DEFAULT_EXAMPLE_PROMPT(data_juicer.ops.aggregator.entity_attribute_aggregator.EntityAttributeAggregator 属性) DEFAULT_EXAMPLE_PROMPT(data_juicer.ops.aggregator.EntityAttributeAggregator 属性) DEFAULT_EXAMPLE_TEMPLATE(data_juicer.ops.mapper.generate_qa_from_examples_mapper.GenerateQAFromExamplesMapper 属性) DEFAULT_EXAMPLE_TEMPLATE(data_juicer.ops.mapper.GenerateQAFromExamplesMapper 属性) DEFAULT_FIELD_TEMPLATE(data_juicer.ops.filter.llm_difficulty_score_filter.LLMDifficultyScoreFilter 属性) DEFAULT_FIELD_TEMPLATE(data_juicer.ops.filter.llm_quality_score_filter.LLMQualityScoreFilter 属性) DEFAULT_FIELD_TEMPLATE(data_juicer.ops.filter.LLMDifficultyScoreFilter 属性) DEFAULT_FIELD_TEMPLATE(data_juicer.ops.filter.LLMQualityScoreFilter 属性) DEFAULT_IF_LOOP_PROMPT(data_juicer.ops.mapper.extract_entity_relation_mapper.ExtractEntityRelationMapper 属性) DEFAULT_IF_LOOP_PROMPT(data_juicer.ops.mapper.ExtractEntityRelationMapper 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.aggregator.entity_attribute_aggregator.EntityAttributeAggregator 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.aggregator.EntityAttributeAggregator 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.aggregator.meta_tags_aggregator.MetaTagsAggregator 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.aggregator.MetaTagsAggregator 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.aggregator.most_relevant_entities_aggregator.MostRelevantEntitiesAggregator 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.aggregator.MostRelevantEntitiesAggregator 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.aggregator.nested_aggregator.NestedAggregator 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.aggregator.NestedAggregator 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.filter.llm_difficulty_score_filter.LLMDifficultyScoreFilter 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.filter.llm_quality_score_filter.LLMQualityScoreFilter 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.filter.LLMDifficultyScoreFilter 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.filter.LLMQualityScoreFilter 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.mapper.calibrate_qa_mapper.CalibrateQAMapper 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.mapper.CalibrateQAMapper 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.mapper.extract_entity_attribute_mapper.ExtractEntityAttributeMapper 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.mapper.extract_event_mapper.ExtractEventMapper 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.mapper.extract_nickname_mapper.ExtractNicknameMapper 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.mapper.extract_support_text_mapper.ExtractSupportTextMapper 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.mapper.ExtractEntityAttributeMapper 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.mapper.ExtractEventMapper 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.mapper.ExtractNicknameMapper 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.mapper.ExtractSupportTextMapper 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.mapper.generate_qa_from_examples_mapper.GenerateQAFromExamplesMapper 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.mapper.GenerateQAFromExamplesMapper 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.mapper.optimize_qa_mapper.OptimizeQAMapper 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.mapper.OptimizeQAMapper 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.mapper.pair_preference_mapper.PairPreferenceMapper 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.mapper.PairPreferenceMapper 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.mapper.relation_identity_mapper.RelationIdentityMapper 属性) DEFAULT_INPUT_TEMPLATE(data_juicer.ops.mapper.RelationIdentityMapper 属性) DEFAULT_INTENSITY_PATTERN(data_juicer.ops.mapper.dialog_sentiment_intensity_mapper.DialogSentimentIntensityMapper 属性) DEFAULT_INTENSITY_PATTERN(data_juicer.ops.mapper.DialogSentimentIntensityMapper 属性) DEFAULT_INTENSITY_TEMPLATE(data_juicer.ops.mapper.dialog_sentiment_intensity_mapper.DialogSentimentIntensityMapper 属性) DEFAULT_INTENSITY_TEMPLATE(data_juicer.ops.mapper.DialogSentimentIntensityMapper 属性) DEFAULT_LABEL_CONFIG(data_juicer.ops.mapper.annotation.human_preference_annotation_mapper.HumanPreferenceAnnotationMapper 属性) DEFAULT_LABEL_CONFIG(data_juicer.ops.mapper.HumanPreferenceAnnotationMapper 属性) DEFAULT_LABELS_PATTERN(data_juicer.ops.mapper.dialog_intent_detection_mapper.DialogIntentDetectionMapper 属性) DEFAULT_LABELS_PATTERN(data_juicer.ops.mapper.dialog_sentiment_detection_mapper.DialogSentimentDetectionMapper 属性) DEFAULT_LABELS_PATTERN(data_juicer.ops.mapper.dialog_topic_detection_mapper.DialogTopicDetectionMapper 属性) DEFAULT_LABELS_PATTERN(data_juicer.ops.mapper.DialogIntentDetectionMapper 属性) DEFAULT_LABELS_PATTERN(data_juicer.ops.mapper.DialogSentimentDetectionMapper 属性) DEFAULT_LABELS_PATTERN(data_juicer.ops.mapper.DialogTopicDetectionMapper 属性) DEFAULT_LABELS_TEMPLATE(data_juicer.ops.mapper.dialog_intent_detection_mapper.DialogIntentDetectionMapper 属性) DEFAULT_LABELS_TEMPLATE(data_juicer.ops.mapper.dialog_sentiment_detection_mapper.DialogSentimentDetectionMapper 属性) DEFAULT_LABELS_TEMPLATE(data_juicer.ops.mapper.dialog_topic_detection_mapper.DialogTopicDetectionMapper 属性) DEFAULT_LABELS_TEMPLATE(data_juicer.ops.mapper.DialogIntentDetectionMapper 属性) DEFAULT_LABELS_TEMPLATE(data_juicer.ops.mapper.DialogSentimentDetectionMapper 属性) DEFAULT_LABELS_TEMPLATE(data_juicer.ops.mapper.DialogTopicDetectionMapper 属性) DEFAULT_OUTPUT_PATTERN_TEMPLATE(data_juicer.ops.aggregator.entity_attribute_aggregator.EntityAttributeAggregator 属性) DEFAULT_OUTPUT_PATTERN_TEMPLATE(data_juicer.ops.aggregator.EntityAttributeAggregator 属性) DEFAULT_OUTPUT_PATTERN_TEMPLATE(data_juicer.ops.mapper.relation_identity_mapper.RelationIdentityMapper 属性) DEFAULT_OUTPUT_PATTERN_TEMPLATE(data_juicer.ops.mapper.RelationIdentityMapper 属性) DEFAULT_OUTPUT_PATTERN(data_juicer.ops.aggregator.meta_tags_aggregator.MetaTagsAggregator 属性) DEFAULT_OUTPUT_PATTERN(data_juicer.ops.aggregator.MetaTagsAggregator 属性) DEFAULT_OUTPUT_PATTERN(data_juicer.ops.aggregator.most_relevant_entities_aggregator.MostRelevantEntitiesAggregator 属性) DEFAULT_OUTPUT_PATTERN(data_juicer.ops.aggregator.MostRelevantEntitiesAggregator 属性) DEFAULT_OUTPUT_PATTERN(data_juicer.ops.mapper.calibrate_qa_mapper.CalibrateQAMapper 属性) DEFAULT_OUTPUT_PATTERN(data_juicer.ops.mapper.CalibrateQAMapper 属性) DEFAULT_OUTPUT_PATTERN(data_juicer.ops.mapper.extract_event_mapper.ExtractEventMapper 属性) DEFAULT_OUTPUT_PATTERN(data_juicer.ops.mapper.extract_keyword_mapper.ExtractKeywordMapper 属性) DEFAULT_OUTPUT_PATTERN(data_juicer.ops.mapper.extract_nickname_mapper.ExtractNicknameMapper 属性) DEFAULT_OUTPUT_PATTERN(data_juicer.ops.mapper.ExtractEventMapper 属性) DEFAULT_OUTPUT_PATTERN(data_juicer.ops.mapper.ExtractKeywordMapper 属性) DEFAULT_OUTPUT_PATTERN(data_juicer.ops.mapper.ExtractNicknameMapper 属性) DEFAULT_OUTPUT_PATTERN(data_juicer.ops.mapper.generate_qa_from_examples_mapper.GenerateQAFromExamplesMapper 属性) DEFAULT_OUTPUT_PATTERN(data_juicer.ops.mapper.GenerateQAFromExamplesMapper 属性) DEFAULT_OUTPUT_PATTERN(data_juicer.ops.mapper.optimize_qa_mapper.OptimizeQAMapper 属性) DEFAULT_OUTPUT_PATTERN(data_juicer.ops.mapper.OptimizeQAMapper 属性) DEFAULT_OUTPUT_PATTERN(data_juicer.ops.mapper.pair_preference_mapper.PairPreferenceMapper 属性) DEFAULT_OUTPUT_PATTERN(data_juicer.ops.mapper.PairPreferenceMapper 属性) DEFAULT_PROMPT_TEMPLATE(data_juicer.ops.mapper.extract_entity_relation_mapper.ExtractEntityRelationMapper 属性) DEFAULT_PROMPT_TEMPLATE(data_juicer.ops.mapper.extract_keyword_mapper.ExtractKeywordMapper 属性) DEFAULT_PROMPT_TEMPLATE(data_juicer.ops.mapper.ExtractEntityRelationMapper 属性) DEFAULT_PROMPT_TEMPLATE(data_juicer.ops.mapper.ExtractKeywordMapper 属性) DEFAULT_QA_PAIR_TEMPLATE(data_juicer.ops.mapper.calibrate_qa_mapper.CalibrateQAMapper 属性) DEFAULT_QA_PAIR_TEMPLATE(data_juicer.ops.mapper.CalibrateQAMapper 属性) DEFAULT_QA_PAIR_TEMPLATE(data_juicer.ops.mapper.generate_qa_from_examples_mapper.GenerateQAFromExamplesMapper 属性) DEFAULT_QA_PAIR_TEMPLATE(data_juicer.ops.mapper.GenerateQAFromExamplesMapper 属性) DEFAULT_QA_PAIR_TEMPLATE(data_juicer.ops.mapper.optimize_qa_mapper.OptimizeQAMapper 属性) DEFAULT_QA_PAIR_TEMPLATE(data_juicer.ops.mapper.OptimizeQAMapper 属性) DEFAULT_QUERY_TEMPLATE(data_juicer.ops.mapper.dialog_intent_detection_mapper.DialogIntentDetectionMapper 属性) DEFAULT_QUERY_TEMPLATE(data_juicer.ops.mapper.dialog_sentiment_detection_mapper.DialogSentimentDetectionMapper 属性) DEFAULT_QUERY_TEMPLATE(data_juicer.ops.mapper.dialog_sentiment_intensity_mapper.DialogSentimentIntensityMapper 属性) DEFAULT_QUERY_TEMPLATE(data_juicer.ops.mapper.dialog_topic_detection_mapper.DialogTopicDetectionMapper 属性) DEFAULT_QUERY_TEMPLATE(data_juicer.ops.mapper.DialogIntentDetectionMapper 属性) DEFAULT_QUERY_TEMPLATE(data_juicer.ops.mapper.DialogSentimentDetectionMapper 属性) DEFAULT_QUERY_TEMPLATE(data_juicer.ops.mapper.DialogSentimentIntensityMapper 属性) DEFAULT_QUERY_TEMPLATE(data_juicer.ops.mapper.DialogTopicDetectionMapper 属性) DEFAULT_RECORD_DELIMITER(data_juicer.ops.mapper.extract_entity_relation_mapper.ExtractEntityRelationMapper 属性) DEFAULT_RECORD_DELIMITER(data_juicer.ops.mapper.ExtractEntityRelationMapper 属性) DEFAULT_REFERENCE_TEMPLATE(data_juicer.ops.mapper.calibrate_qa_mapper.CalibrateQAMapper 属性) DEFAULT_REFERENCE_TEMPLATE(data_juicer.ops.mapper.CalibrateQAMapper 属性) DEFAULT_RELATION_PATTERN(data_juicer.ops.mapper.extract_entity_relation_mapper.ExtractEntityRelationMapper 属性) DEFAULT_RELATION_PATTERN(data_juicer.ops.mapper.ExtractEntityRelationMapper 属性) DEFAULT_RESPONSE_TEMPLATE(data_juicer.ops.mapper.dialog_intent_detection_mapper.DialogIntentDetectionMapper 属性) DEFAULT_RESPONSE_TEMPLATE(data_juicer.ops.mapper.dialog_sentiment_detection_mapper.DialogSentimentDetectionMapper 属性) DEFAULT_RESPONSE_TEMPLATE(data_juicer.ops.mapper.dialog_sentiment_intensity_mapper.DialogSentimentIntensityMapper 属性) DEFAULT_RESPONSE_TEMPLATE(data_juicer.ops.mapper.dialog_topic_detection_mapper.DialogTopicDetectionMapper 属性) DEFAULT_RESPONSE_TEMPLATE(data_juicer.ops.mapper.DialogIntentDetectionMapper 属性) DEFAULT_RESPONSE_TEMPLATE(data_juicer.ops.mapper.DialogSentimentDetectionMapper 属性) DEFAULT_RESPONSE_TEMPLATE(data_juicer.ops.mapper.DialogSentimentIntensityMapper 属性) DEFAULT_RESPONSE_TEMPLATE(data_juicer.ops.mapper.DialogTopicDetectionMapper 属性) DEFAULT_SUB_DOC_TEMPLATE(data_juicer.ops.aggregator.nested_aggregator.NestedAggregator 属性) DEFAULT_SUB_DOC_TEMPLATE(data_juicer.ops.aggregator.NestedAggregator 属性) DEFAULT_SYSTEM_PROMPT_TEMPLATE(data_juicer.ops.mapper.extract_entity_attribute_mapper.ExtractEntityAttributeMapper 属性) DEFAULT_SYSTEM_PROMPT_TEMPLATE(data_juicer.ops.mapper.ExtractEntityAttributeMapper 属性) DEFAULT_SYSTEM_PROMPT_TEMPLATE(data_juicer.ops.mapper.relation_identity_mapper.RelationIdentityMapper 属性) DEFAULT_SYSTEM_PROMPT_TEMPLATE(data_juicer.ops.mapper.RelationIdentityMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.aggregator.meta_tags_aggregator.MetaTagsAggregator 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.aggregator.MetaTagsAggregator 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.aggregator.nested_aggregator.NestedAggregator 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.aggregator.NestedAggregator 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.filter.llm_difficulty_score_filter.LLMDifficultyScoreFilter 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.filter.llm_quality_score_filter.LLMQualityScoreFilter 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.filter.LLMDifficultyScoreFilter 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.filter.LLMQualityScoreFilter 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.calibrate_qa_mapper.CalibrateQAMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.calibrate_query_mapper.CalibrateQueryMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.calibrate_response_mapper.CalibrateResponseMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.CalibrateQAMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.CalibrateQueryMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.CalibrateResponseMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.dialog_intent_detection_mapper.DialogIntentDetectionMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.dialog_sentiment_detection_mapper.DialogSentimentDetectionMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.dialog_sentiment_intensity_mapper.DialogSentimentIntensityMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.dialog_topic_detection_mapper.DialogTopicDetectionMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.DialogIntentDetectionMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.DialogSentimentDetectionMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.DialogSentimentIntensityMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.DialogTopicDetectionMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.extract_event_mapper.ExtractEventMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.extract_nickname_mapper.ExtractNicknameMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.extract_support_text_mapper.ExtractSupportTextMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.ExtractEventMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.ExtractNicknameMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.ExtractSupportTextMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.generate_qa_from_examples_mapper.GenerateQAFromExamplesMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.GenerateQAFromExamplesMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.optimize_qa_mapper.OptimizeQAMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.optimize_query_mapper.OptimizeQueryMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.optimize_response_mapper.OptimizeResponseMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.OptimizeQAMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.OptimizeQueryMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.OptimizeResponseMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.pair_preference_mapper.PairPreferenceMapper 属性) DEFAULT_SYSTEM_PROMPT(data_juicer.ops.mapper.PairPreferenceMapper 属性) DEFAULT_SYSTEM_TEMPLATE(data_juicer.ops.aggregator.entity_attribute_aggregator.EntityAttributeAggregator 属性) DEFAULT_SYSTEM_TEMPLATE(data_juicer.ops.aggregator.EntityAttributeAggregator 属性) DEFAULT_SYSTEM_TEMPLATE(data_juicer.ops.aggregator.most_relevant_entities_aggregator.MostRelevantEntitiesAggregator 属性) DEFAULT_SYSTEM_TEMPLATE(data_juicer.ops.aggregator.MostRelevantEntitiesAggregator 属性) DEFAULT_TAG_TEMPLATE(data_juicer.ops.aggregator.meta_tags_aggregator.MetaTagsAggregator 属性) DEFAULT_TAG_TEMPLATE(data_juicer.ops.aggregator.MetaTagsAggregator 属性) DEFAULT_TARGET_TAG_TEMPLATE(data_juicer.ops.aggregator.meta_tags_aggregator.MetaTagsAggregator 属性) DEFAULT_TARGET_TAG_TEMPLATE(data_juicer.ops.aggregator.MetaTagsAggregator 属性) DEFAULT_TUPLE_DELIMITER(data_juicer.ops.mapper.extract_entity_relation_mapper.ExtractEntityRelationMapper 属性) DEFAULT_TUPLE_DELIMITER(data_juicer.ops.mapper.ExtractEntityRelationMapper 属性) DefaultArxivDataLoadStrategy(data_juicer.core.data.load_strategy 中的类) DefaultCommonCrawlDataLoadStrategy(data_juicer.core.data.load_strategy 中的类) DefaultDataLoadStrategy(data_juicer.core.data.load_strategy 中的类) DefaultExecutor(data_juicer.core 中的类) DefaultExecutor(data_juicer.core.executor 中的类) DefaultExecutor(data_juicer.core.executor.default_executor 中的类) DefaultHuggingfaceDataLoadStrategy(data_juicer.core.data.load_strategy 中的类) DefaultLocalDataLoadStrategy(data_juicer.core.data.load_strategy 中的类) DefaultModelScopeDataLoadStrategy(data_juicer.core.data.load_strategy 中的类) DefaultWikiDataLoadStrategy(data_juicer.core.data.load_strategy 中的类) detect_faces()(在 data_juicer.utils.mm_utils 模块中) dialog_intent_labels_analysis(data_juicer.utils.constant.MetaKeys 属性) dialog_intent_labels(data_juicer.utils.constant.MetaKeys 属性) dialog_sentiment_intensity_analysis(data_juicer.utils.constant.MetaKeys 属性) dialog_sentiment_intensity(data_juicer.utils.constant.MetaKeys 属性) dialog_sentiment_labels_analysis(data_juicer.utils.constant.MetaKeys 属性) dialog_sentiment_labels(data_juicer.utils.constant.MetaKeys 属性) dialog_topic_labels_analysis(data_juicer.utils.constant.MetaKeys 属性) dialog_topic_labels(data_juicer.utils.constant.MetaKeys 属性) DialogIntentDetectionMapper(data_juicer.ops.mapper 中的类) DialogIntentDetectionMapper(data_juicer.ops.mapper.dialog_intent_detection_mapper 中的类) DialogSentimentDetectionMapper(data_juicer.ops.mapper 中的类) DialogSentimentDetectionMapper(data_juicer.ops.mapper.dialog_sentiment_detection_mapper 中的类) DialogSentimentIntensityMapper(data_juicer.ops.mapper 中的类) DialogSentimentIntensityMapper(data_juicer.ops.mapper.dialog_sentiment_intensity_mapper 中的类) DialogTopicDetectionMapper(data_juicer.ops.mapper 中的类) DialogTopicDetectionMapper(data_juicer.ops.mapper.dialog_topic_detection_mapper 中的类) dict_to_hash()(在 data_juicer.utils.common_utils 模块中) Difference_Area_Generator_Mapper(data_juicer.ops.mapper 中的类) Difference_Area_Generator_Mapper(data_juicer.ops.mapper.imgdiff_difference_area_generator_mapper 中的类) Difference_Caption_Generator_Mapper(data_juicer.ops.mapper.imgdiff_difference_caption_generator_mapper 中的类) dispatch(data_juicer.utils.fingerprint_utils.Hasher 属性) display_config()(在 data_juicer.config.config 模块中) distribute_edge() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.BTSUnionFind 方法) DiversityAnalysis(data_juicer.analysis 中的类) DiversityAnalysis(data_juicer.analysis.diversity_analysis 中的类) dj_configs(data_juicer.utils.constant.JobRequiredKeys 属性) DJDataset(data_juicer.core.data 中的类) DJDataset(data_juicer.core.data.dj_dataset 中的类) DocumentDeduplicator(data_juicer.ops.deduplicator 中的类) DocumentDeduplicator(data_juicer.ops.deduplicator.document_deduplicator 中的类) DocumentDownloader(data_juicer.download.downloader 中的类) DocumentExtractor(data_juicer.download.downloader 中的类) DocumentIterator(data_juicer.download.downloader 中的类) DocumentMinhashDeduplicator(data_juicer.ops.deduplicator 中的类) DocumentMinhashDeduplicator(data_juicer.ops.deduplicator.document_minhash_deduplicator 中的类) DocumentSimhashDeduplicator(data_juicer.ops.deduplicator 中的类) DocumentSimhashDeduplicator(data_juicer.ops.deduplicator.document_simhash_deduplicator 中的类) download() (data_juicer.download.downloader.DocumentDownloader 方法) download() (data_juicer.download.wikipedia.WikipediaDownloader 方法) download_and_extract()(在 data_juicer.download.downloader 模块中) download_wikipedia()(在 data_juicer.download.wikipedia 模块中) draw_box() (data_juicer.analysis.column_wise_analysis.ColumnWiseAnalysis 方法) draw_box() (data_juicer.analysis.ColumnWiseAnalysis 方法) draw_heatmap()(在 data_juicer.analysis.draw 模块中) draw_hist() (data_juicer.analysis.column_wise_analysis.ColumnWiseAnalysis 方法) draw_hist() (data_juicer.analysis.ColumnWiseAnalysis 方法) draw_resource_util_graph()(data_juicer.core.Monitor 静态方法) draw_resource_util_graph()(data_juicer.core.monitor.Monitor 静态方法) draw_wordcloud() (data_juicer.analysis.column_wise_analysis.ColumnWiseAnalysis 方法) draw_wordcloud() (data_juicer.analysis.ColumnWiseAnalysis 方法) dup_idx() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.BTSUnionFind 方法) DYNAMIC_FIELDS(data_juicer.core.Monitor 属性) DYNAMIC_FIELDS(data_juicer.core.monitor.Monitor 属性) E edge_redistribution() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.BTSUnionFind 方法) EdgeBuffer(data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator 中的类) EMPTY_HASH_VALUE(data_juicer.ops.deduplicator.ray_basic_deduplicator.RayBasicDeduplicator 属性) EMPTY_HASH_VALUE(data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.RayBTSMinhashDeduplicator 属性) EMPTY_HASH_VALUE(data_juicer.ops.deduplicator.RayBasicDeduplicator 属性) EMPTY_HASH_VALUE(data_juicer.ops.deduplicator.RayBTSMinhashDeduplicator 属性) empty_history() (data_juicer.ops.base_op.OP 方法) EmptyControl(data_juicer.ops.common.prompt2prompt_pipeline 中的类) EmptyFormatter(data_juicer.format 中的类) EmptyFormatter(data_juicer.format.empty_formatter 中的类) ensure_nltk_resource()(在 data_juicer.utils.nltk_utils 模块中) entity_attribute(data_juicer.utils.constant.BatchMetaKeys 属性) entity_description(data_juicer.utils.constant.MetaKeys 属性) entity_name(data_juicer.utils.constant.MetaKeys 属性) entity_type(data_juicer.utils.constant.MetaKeys 属性) EntityAttributeAggregator(data_juicer.ops.aggregator 中的类) EntityAttributeAggregator(data_juicer.ops.aggregator.entity_attribute_aggregator 中的类) entity(data_juicer.utils.constant.MetaKeys 属性) EntropyMeasure(data_juicer.analysis.measure 中的类) eoc(data_juicer.utils.mm_utils.SpecialTokens 属性) event_description(data_juicer.utils.constant.MetaKeys 属性) EventDrivenMixin(data_juicer.ops.mixins 中的类) execute_and_probe()(data_juicer.core.Adapter 静态方法) execute_and_probe()(data_juicer.core.adapter.Adapter 静态方法) executor_type(data_juicer.core.data.load_strategy.StrategyKey 属性) ExecutorBase(data_juicer.core 中的类) ExecutorBase(data_juicer.core.executor 中的类) ExecutorBase(data_juicer.core.executor.base 中的类) ExecutorFactory(data_juicer.core 中的类) ExecutorFactory(data_juicer.core.executor 中的类) ExecutorFactory(data_juicer.core.executor.factory 中的类) expand_outdir_and_mkdir()(在 data_juicer.utils.file_utils 模块中) ExpandMacroMapper(data_juicer.ops.mapper.expand_macro_mapper 中的类) export() (data_juicer.core.Exporter 方法) export() (data_juicer.core.exporter.Exporter 方法) export_compute_stats() (data_juicer.core.Exporter 方法) export_compute_stats() (data_juicer.core.exporter.Exporter 方法) export_config()(在 data_juicer.config 模块中) export_config()(在 data_juicer.config.config 模块中) Exporter(data_juicer.core 中的类) Exporter(data_juicer.core.exporter 中的类) extra_configs(data_juicer.utils.constant.JobRequiredKeys 属性) extract() (data_juicer.download.downloader.DocumentExtractor 方法) extract() (data_juicer.download.wikipedia.WikipediaExtractor 方法) extract()(data_juicer.utils.compress.Extractor 类方法) extract_audio_from_video()(在 data_juicer.utils.mm_utils 模块中) extract_key_frames()(在 data_juicer.utils.mm_utils 模块中) extract_key_frames_by_seconds()(在 data_juicer.utils.mm_utils 模块中) extract_txt_from_docx()(在 data_juicer.format.text_formatter 模块中) extract_txt_from_pdf()(在 data_juicer.format.text_formatter 模块中) extract_video_frames_uniformly()(在 data_juicer.utils.mm_utils 模块中) extract_video_frames_uniformly_by_seconds()(在 data_juicer.utils.mm_utils 模块中) ExtractEntityAttributeMapper(data_juicer.ops.mapper 中的类) ExtractEntityAttributeMapper(data_juicer.ops.mapper.extract_entity_attribute_mapper 中的类) ExtractEntityRelationMapper(data_juicer.ops.mapper 中的类) ExtractEntityRelationMapper(data_juicer.ops.mapper.extract_entity_relation_mapper 中的类) ExtractEventMapper(data_juicer.ops.mapper 中的类) ExtractEventMapper(data_juicer.ops.mapper.extract_event_mapper 中的类) ExtractKeywordMapper(data_juicer.ops.mapper 中的类) ExtractKeywordMapper(data_juicer.ops.mapper.extract_keyword_mapper 中的类) ExtractNicknameMapper(data_juicer.ops.mapper 中的类) ExtractNicknameMapper(data_juicer.ops.mapper.extract_nickname_mapper 中的类) Extractor(data_juicer.utils.compress 中的类) ExtractSupportTextMapper(data_juicer.ops.mapper 中的类) ExtractSupportTextMapper(data_juicer.ops.mapper.extract_support_text_mapper 中的类) ExtractTablesFromHtmlMapper(data_juicer.ops.mapper 中的类) ExtractTablesFromHtmlMapper(data_juicer.ops.mapper.extract_tables_from_html_mapper 中的类) F face_counts(data_juicer.utils.constant.StatsKeysConstant 属性) face_detections(data_juicer.utils.constant.StatsKeysConstant 属性) face_ratios(data_juicer.utils.constant.StatsKeysConstant 属性) Fields(data_juicer.utils.constant 中的类) FileLock(data_juicer.utils.compress 中的类) fileno() (data_juicer.utils.logger_utils.StreamToLoguru 方法) filter() (data_juicer.core.data.dj_dataset.NestedDataset 方法) filter() (data_juicer.core.data.NestedDataset 方法) filter() (data_juicer.core.NestedDataset 方法) filter_batch()(在 data_juicer.core.data.ray_dataset 模块中) filter_with_union_find() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.RayBTSMinhashDeduplicator 方法) filter_with_union_find() (data_juicer.ops.deduplicator.RayBTSMinhashDeduplicator 方法) Filter(data_juicer.ops 中的类) Filter(data_juicer.ops.base_op 中的类) find() (data_juicer.ops.common.helper_func.UnionFind 方法) find() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.BTSUnionFind 方法) find_corresponding_test_file()(在 data_juicer.utils.unittest_utils 模块中) find_files_with_suffix()(在 data_juicer.utils.file_utils 模块中) find_noun_phrases()(在 data_juicer.ops.filter.phrase_grounding_recall_filter 模块中) find_root_verb_and_its_dobj()(在 data_juicer.analysis.diversity_analysis 模块中) find_root_verb_and_its_dobj_in_string()(在 data_juicer.analysis.diversity_analysis 模块中) FixUnicodeMapper(data_juicer.ops.mapper 中的类) FixUnicodeMapper(data_juicer.ops.mapper.fix_unicode_mapper 中的类) flagged_words_ratio(data_juicer.utils.constant.StatsKeysConstant 属性) FlaggedWordFilter(data_juicer.ops.filter 中的类) FlaggedWordFilter(data_juicer.ops.filter.flagged_words_filter 中的类) flush() (data_juicer.utils.logger_utils.StreamToLoguru 方法) flush_key_value_pairs() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.BTSUnionFind 方法) follow_read()(在 data_juicer.utils.file_utils 模块中) format_cache_file_name() (data_juicer.utils.compress.CacheCompressManager 方法) forward() (data_juicer.ops.common.prompt2prompt_pipeline.AttentionControl 方法) forward() (data_juicer.ops.common.prompt2prompt_pipeline.AttentionControlEdit 方法) forward() (data_juicer.ops.common.prompt2prompt_pipeline.AttentionStore 方法) forward() (data_juicer.ops.common.prompt2prompt_pipeline.EmptyControl 方法) free_models()(在 data_juicer.utils.model_utils 模块中) FrequencySpecifiedFieldSelector(data_juicer.ops.selector 中的类) FrequencySpecifiedFieldSelector(data_juicer.ops.selector.frequency_specified_field_selector 中的类) from_dict()(data_juicer.core.data.dj_dataset.NestedDataset 类方法) from_dict()(data_juicer.core.data.NestedDataset 类方法) from_dict()(data_juicer.core.NestedDataset 类方法) fuse_filter_group()(在 data_juicer.ops.op_fusion 模块中) fuse_operators()(在 data_juicer.ops.op_fusion 模块中) FusedFilter(data_juicer.ops.op_fusion 中的类) G general_field_filter_condition(data_juicer.utils.constant.StatsKeysConstant 属性) GeneralFieldFilter(data_juicer.ops.filter 中的类) GeneralFusedOP(data_juicer.ops.op_fusion 中的类) generate_dataset() (data_juicer.utils.unittest_utils.DataJuicerTestCaseBase 方法) generate_fingerprint()(在 data_juicer.utils.fingerprint_utils 模块中) GenerateQAFromExamplesMapper(data_juicer.ops.mapper 中的类) GenerateQAFromExamplesMapper(data_juicer.ops.mapper.generate_qa_from_examples_mapper 中的类) GenerateQAFromTextMapper(data_juicer.ops.mapper 中的类) GenerateQAFromTextMapper(data_juicer.ops.mapper.generate_qa_from_text_mapper 中的类) get() (data_juicer.core.data.dj_dataset.DJDataset 方法) get() (data_juicer.core.data.dj_dataset.NestedDataset 方法) get() (data_juicer.core.data.DJDataset 方法) get() (data_juicer.core.data.NestedDataset 方法) get() (data_juicer.core.data.ray_dataset.RayDataset 方法) get() (data_juicer.core.NestedDataset 方法) get() (data_juicer.utils.registry.Registry 方法) get_abs_path()(在 data_juicer.core.data.ray_dataset 模块中) get_access_log() (data_juicer.utils.constant.StatsKeysMeta 方法) get_aligned_sequences()(在 data_juicer.ops.common.prompt2prompt_pipeline 模块中) get_all_annotations() (data_juicer.ops.mapper.annotation.annotation_mapper.LabelStudioAnnotationMapper 方法) get_all_dependencies()(data_juicer.utils.lazy_loader.LazyLoader 类方法) get_all_files_paths_under()(在 data_juicer.utils.file_utils 模块中) get_arxiv_urls()(在 data_juicer.download.downloader 模块中) get_average_attention() (data_juicer.ops.common.prompt2prompt_pipeline.AttentionStore 方法) get_backup_model_link()(在 data_juicer.utils.model_utils 模块中) get_caller_name()(在 data_juicer.utils.logger_utils 模块中) get_column() (data_juicer.core.data.dj_dataset.DJDataset 方法) get_column() (data_juicer.core.data.dj_dataset.NestedDataset 方法) get_column() (data_juicer.core.data.DJDataset 方法) get_column() (data_juicer.core.data.NestedDataset 方法) get_column() (data_juicer.core.data.ray_dataset.RayDataset 方法) get_column() (data_juicer.core.NestedDataset 方法) get_cpu_count()(在 data_juicer.utils.resource_utils 模块中) get_cpu_utilization()(在 data_juicer.utils.resource_utils 模块中) get_decoded_frames_from_video()(在 data_juicer.utils.mm_utils 模块中) get_default_cfg()(在 data_juicer.config 模块中) get_default_cfg()(在 data_juicer.config.config 模块中) get_diff_files()(在 data_juicer.utils.unittest_utils 模块中) get_diversity()(在 data_juicer.analysis.diversity_analysis 模块中) get_edges() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.EdgeBuffer 方法) get_empty_store()(data_juicer.ops.common.prompt2prompt_pipeline.AttentionStore 静态方法) get_equalizer()(在 data_juicer.ops.common.prompt2prompt_pipeline 模块中) get_file_size()(在 data_juicer.utils.mm_utils 模块中) get_hash_method()(在 data_juicer.ops.deduplicator.image_deduplicator 模块中) get_hash_method()(在 data_juicer.ops.deduplicator.ray_image_deduplicator 模块中) get_init_configs()(在 data_juicer.config 模块中) get_init_configs()(在 data_juicer.config.config 模块中) get_key_frame_seconds()(在 data_juicer.utils.mm_utils 模块中) get_left_process_list() (data_juicer.utils.ckpt_utils.CheckpointManager 方法) get_log_file_path()(在 data_juicer.utils.logger_utils 模块中) get_mapper()(在 data_juicer.ops.common.prompt2prompt_pipeline 模块中) get_matrix()(在 data_juicer.ops.common.prompt2prompt_pipeline 模块中) get_min_cuda_memory()(在 data_juicer.utils.process_utils 模块中) get_model()(在 data_juicer.utils.model_utils 模块中) get_next_id() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.IdGenerator 方法) get_num_gpus()(在 data_juicer.core.data.ray_dataset 模块中) get_package_name()(data_juicer.utils.lazy_loader.LazyLoader 类方法) get_partial_test_cases()(在 data_juicer.utils.unittest_utils 模块中) get_reader() (data_juicer.ops.filter.video_ocr_area_ratio_filter.VideoOcrAreaRatioFilter 方法) get_reader() (data_juicer.ops.filter.VideoOcrAreaRatioFilter 方法) get_refinement_mapper()(在 data_juicer.ops.common.prompt2prompt_pipeline 模块中) get_remote_classes()(在 data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator 模块中) get_remote_dedup_set()(在 data_juicer.ops.deduplicator.ray_basic_deduplicator 模块中) get_replacement_mapper()(在 data_juicer.ops.common.prompt2prompt_pipeline 模块中) get_replacement_mapper_()(在 data_juicer.ops.common.prompt2prompt_pipeline 模块中) get_row_col()(在 data_juicer.analysis.column_wise_analysis 模块中) get_sample_numbers()(在 data_juicer.core.data.dataset_builder 模块中) get_sentences_from_document()(在 data_juicer.ops.common 模块中) get_sentences_from_document()(在 data_juicer.ops.common.helper_func 模块中) get_special_tokens()(在 data_juicer.utils.mm_utils 模块中) get_split_key_frame() (data_juicer.ops.mapper.video_split_by_key_frame_mapper.VideoSplitByKeyFrameMapper 方法) get_split_key_frame() (data_juicer.ops.mapper.VideoSplitByKeyFrameMapper 方法) get_strategy_class()(data_juicer.core.data.load_strategy.DataLoadStrategyRegistry 类方法) get_text_chunks() (data_juicer.ops.mapper.text_chunk_mapper.TextChunkMapper 方法) get_text_chunks() (data_juicer.ops.mapper.TextChunkMapper 方法) get_time_words_attention_alpha()(在 data_juicer.ops.common.prompt2prompt_pipeline 模块中) get_toml_file_path()(在 data_juicer.utils.lazy_loader 模块中) get_traceback_matrix()(在 data_juicer.ops.common.prompt2prompt_pipeline 模块中) get_uv_lock_path()(在 data_juicer.utils.lazy_loader 模块中) get_validator()(data_juicer.core.data.data_validator.DataValidatorRegistry 类方法) get_video_duration()(在 data_juicer.utils.mm_utils 模块中) get_wikipedia_urls()(在 data_juicer.download.downloader 模块中) get_word_inds()(在 data_juicer.ops.common.prompt2prompt_pipeline 模块中) get_words_from_document()(在 data_juicer.ops.common 模块中) get_words_from_document()(在 data_juicer.ops.common.helper_func 模块中) getvalue() (data_juicer.utils.logger_utils.StreamToLoguru 方法) GiB(data_juicer.core.Exporter 属性) GiB(data_juicer.core.exporter.Exporter 属性) global_align()(在 data_juicer.ops.common.prompt2prompt_pipeline 模块中) Grouper(data_juicer.ops 中的类) Grouper(data_juicer.ops.base_op 中的类) GzipCompressor(data_juicer.utils.compress 中的类) H hash()(data_juicer.utils.fingerprint_utils.Hasher 类方法) hash_bytes()(data_juicer.utils.fingerprint_utils.Hasher 类方法) hash_default()(data_juicer.utils.fingerprint_utils.Hasher 类方法) Hasher(data_juicer.utils.fingerprint_utils 中的类) HashKeys(data_juicer.utils.constant 中的类) hash(data_juicer.utils.constant.HashKeys 属性) hexdigest() (data_juicer.utils.fingerprint_utils.Hasher 方法) HiddenPrints(data_juicer.utils.logger_utils 中的类) hook(data_juicer.utils.constant.JobRequiredKeys 属性) html_tables(data_juicer.utils.constant.MetaKeys 属性) HumanPreferenceAnnotationMapper(data_juicer.ops.mapper 中的类) HumanPreferenceAnnotationMapper(data_juicer.ops.mapper.annotation.human_preference_annotation_mapper 中的类) I IdGenerator(data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator 中的类) image_aesthetics_scores(data_juicer.utils.constant.StatsKeysConstant 属性) image_byte_to_base64()(在 data_juicer.utils.mm_utils 模块中) image_height(data_juicer.utils.constant.StatsKeysConstant 属性) image_nsfw_score(data_juicer.utils.constant.StatsKeysConstant 属性) image_pair_similarity(data_juicer.utils.constant.StatsKeysConstant 属性) image_path_to_base64()(在 data_juicer.utils.mm_utils 模块中) image_sizes(data_juicer.utils.constant.StatsKeysConstant 属性) image_tags(data_juicer.utils.constant.MetaKeys 属性) image_text_matching_score(data_juicer.utils.constant.StatsKeysConstant 属性) image_text_similarity(data_juicer.utils.constant.StatsKeysConstant 属性) image_watermark_prob(data_juicer.utils.constant.StatsKeysConstant 属性) image_width(data_juicer.utils.constant.StatsKeysConstant 属性) ImageAestheticsFilter(data_juicer.ops.filter 中的类) ImageAestheticsFilter(data_juicer.ops.filter.image_aesthetics_filter 中的类) ImageAspectRatioFilter(data_juicer.ops.filter 中的类) ImageAspectRatioFilter(data_juicer.ops.filter.image_aspect_ratio_filter 中的类) ImageBlurMapper(data_juicer.ops.mapper 中的类) ImageBlurMapper(data_juicer.ops.mapper.image_blur_mapper 中的类) ImageCaptioningFromGPT4VMapper(data_juicer.ops.mapper 中的类) ImageCaptioningFromGPT4VMapper(data_juicer.ops.mapper.image_captioning_from_gpt4v_mapper 中的类) ImageCaptioningMapper(data_juicer.ops.mapper 中的类) ImageCaptioningMapper(data_juicer.ops.mapper.image_captioning_mapper 中的类) ImageDeduplicator(data_juicer.ops.deduplicator 中的类) ImageDeduplicator(data_juicer.ops.deduplicator.image_deduplicator 中的类) ImageDiffusionMapper(data_juicer.ops.mapper 中的类) ImageDiffusionMapper(data_juicer.ops.mapper.image_diffusion_mapper 中的类) ImageFaceBlurMapper(data_juicer.ops.mapper 中的类) ImageFaceBlurMapper(data_juicer.ops.mapper.image_face_blur_mapper 中的类) ImageFaceCountFilter(data_juicer.ops.filter 中的类) ImageFaceCountFilter(data_juicer.ops.filter.image_face_count_filter 中的类) ImageFaceRatioFilter(data_juicer.ops.filter 中的类) ImageFaceRatioFilter(data_juicer.ops.filter.image_face_ratio_filter 中的类) imagehash(data_juicer.utils.constant.HashKeys 属性) ImageNSFWFilter(data_juicer.ops.filter 中的类) ImageNSFWFilter(data_juicer.ops.filter.image_nsfw_filter 中的类) ImagePairSimilarityFilter(data_juicer.ops.filter 中的类) ImagePairSimilarityFilter(data_juicer.ops.filter.image_pair_similarity_filter 中的类) ImageRemoveBackgroundMapper(data_juicer.ops.mapper 中的类) ImageRemoveBackgroundMapper(data_juicer.ops.mapper.image_remove_background_mapper 中的类) ImageSegmentMapper(data_juicer.ops.mapper 中的类) ImageSegmentMapper(data_juicer.ops.mapper.image_segment_mapper 中的类) ImageShapeFilter(data_juicer.ops.filter 中的类) ImageShapeFilter(data_juicer.ops.filter.image_shape_filter 中的类) ImageSizeFilter(data_juicer.ops.filter 中的类) ImageSizeFilter(data_juicer.ops.filter.image_size_filter 中的类) ImageTaggingMapper(data_juicer.ops.mapper 中的类) ImageTaggingMapper(data_juicer.ops.mapper.image_tagging_mapper 中的类) ImageTextMatchingFilter(data_juicer.ops.filter 中的类) ImageTextMatchingFilter(data_juicer.ops.filter.image_text_matching_filter 中的类) ImageTextSimilarityFilter(data_juicer.ops.filter 中的类) ImageTextSimilarityFilter(data_juicer.ops.filter.image_text_similarity_filter 中的类) ImageWatermarkFilter(data_juicer.ops.filter 中的类) ImageWatermarkFilter(data_juicer.ops.filter.image_watermark_filter 中的类) image(data_juicer.utils.mm_utils.SpecialTokens 属性) init_configs()(在 data_juicer.config 模块中) init_configs()(在 data_juicer.config.config 模块中) init_setup_from_cfg()(在 data_juicer.config.config 模块中) insert_texts_after_placeholders()(在 data_juicer.utils.mm_utils 模块中) insight_mining() (data_juicer.core.Adapter 方法) insight_mining() (data_juicer.core.adapter.Adapter 方法) InterVars(data_juicer.utils.constant 中的类) iou()(在 data_juicer.utils.mm_utils 模块中) iou_filter()(在 data_juicer.ops.mapper.imgdiff_difference_area_generator_mapper 模块中) is_absolute_path()(在 data_juicer.utils.file_utils 模块中) is_batched_op() (data_juicer.ops.base_op.OP 方法) is_cuda_available()(在 data_juicer 模块中) is_float()(在 data_juicer.utils.common_utils 模块中) is_noun()(在 data_juicer.ops.mapper.imgdiff_difference_area_generator_mapper 模块中) is_number()(在 data_juicer.ops.filter.specified_numeric_field_filter 模块中) is_string_list()(在 data_juicer.utils.common_utils 模块中) is_unique() (data_juicer.ops.deduplicator.ray_basic_deduplicator.ActorBackend 方法) is_unique() (data_juicer.ops.deduplicator.ray_basic_deduplicator.Backend 方法) is_unique() (data_juicer.ops.deduplicator.ray_basic_deduplicator.DedupSet 方法) is_unique() (data_juicer.ops.deduplicator.ray_basic_deduplicator.RedisBackend 方法) is_unique(data_juicer.utils.constant.HashKeys 属性) isatty() (data_juicer.utils.logger_utils.StreamToLoguru 方法) iterate() (data_juicer.download.downloader.DocumentIterator 方法) iterate() (data_juicer.download.wikipedia.WikipediaIterator 方法) J JobRequiredKeys(data_juicer.utils.constant 中的类) JSDivMeasure(data_juicer.analysis.measure 中的类) JsonFormatter(data_juicer.format 中的类) JsonFormatter(data_juicer.format.json_formatter 中的类) JSONStreamDatasource(data_juicer.core.data.ray_dataset 中的类) K KeyValueGrouper(data_juicer.ops.grouper 中的类) KeyValueGrouper(data_juicer.ops.grouper.key_value_grouper 中的类) keyword(data_juicer.utils.constant.MetaKeys 属性) KiB(data_juicer.core.Exporter 属性) KiB(data_juicer.core.exporter.Exporter 属性) KLDivMeasure(data_juicer.analysis.measure 中的类) L LabelStudioAnnotationMapper(data_juicer.ops.mapper.annotation.annotation_mapper 中的类) lang_score(data_juicer.utils.constant.StatsKeysConstant 属性) LanguageIDScoreFilter(data_juicer.ops.filter 中的类) LanguageIDScoreFilter(data_juicer.ops.filter.language_id_score_filter 中的类) lang(data_juicer.utils.constant.StatsKeysConstant 属性) LazyLoader(data_juicer.utils.lazy_loader 中的类) light_rag_extraction() (data_juicer.ops.mapper.extract_entity_relation_mapper.ExtractEntityRelationMapper 方法) light_rag_extraction() (data_juicer.ops.mapper.ExtractEntityRelationMapper 方法) lines(data_juicer.utils.constant.InterVars 属性) list() (data_juicer.utils.registry.Registry 方法) llm_difficulty_record(data_juicer.utils.constant.StatsKeysConstant 属性) llm_difficulty_score(data_juicer.utils.constant.StatsKeysConstant 属性) llm_quality_record(data_juicer.utils.constant.StatsKeysConstant 属性) llm_quality_score(data_juicer.utils.constant.StatsKeysConstant 属性) LLMDifficultyScoreFilter(data_juicer.ops.filter 中的类) LLMDifficultyScoreFilter(data_juicer.ops.filter.llm_difficulty_score_filter 中的类) LLMQualityScoreFilter(data_juicer.ops.filter 中的类) LLMQualityScoreFilter(data_juicer.ops.filter.llm_quality_score_filter 中的类) load_audio()(在 data_juicer.utils.mm_utils 模块中) load_audios()(在 data_juicer.utils.mm_utils 模块中) load_ckpt() (data_juicer.utils.ckpt_utils.CheckpointManager 方法) load_data() (data_juicer.core.data.load_strategy.DataLoadStrategy 方法) load_data() (data_juicer.core.data.load_strategy.DefaultArxivDataLoadStrategy 方法) load_data() (data_juicer.core.data.load_strategy.DefaultCommonCrawlDataLoadStrategy 方法) load_data() (data_juicer.core.data.load_strategy.DefaultDataLoadStrategy 方法) load_data() (data_juicer.core.data.load_strategy.DefaultHuggingfaceDataLoadStrategy 方法) load_data() (data_juicer.core.data.load_strategy.DefaultLocalDataLoadStrategy 方法) load_data() (data_juicer.core.data.load_strategy.DefaultModelScopeDataLoadStrategy 方法) load_data() (data_juicer.core.data.load_strategy.DefaultWikiDataLoadStrategy 方法) load_data() (data_juicer.core.data.load_strategy.RayDataLoadStrategy 方法) load_data() (data_juicer.core.data.load_strategy.RayHuggingfaceDataLoadStrategy 方法) load_data() (data_juicer.core.data.load_strategy.RayLocalJsonDataLoadStrategy 方法) load_data_with_context()(在 data_juicer.utils.mm_utils 模块中) load_dataset() (data_juicer.core.data.dataset_builder.DatasetBuilder 方法) load_dataset() (data_juicer.format.empty_formatter.EmptyFormatter 方法) load_dataset() (data_juicer.format.empty_formatter.RayEmptyFormatter 方法) load_dataset() (data_juicer.format.EmptyFormatter 方法) load_dataset() (data_juicer.format.formatter.BaseFormatter 方法) load_dataset() (data_juicer.format.formatter.LocalFormatter 方法) load_dataset() (data_juicer.format.formatter.RemoteFormatter 方法) load_dataset() (data_juicer.format.LocalFormatter 方法) load_dataset() (data_juicer.format.RayEmptyFormatter 方法) load_dataset() (data_juicer.format.RemoteFormatter 方法) load_dataset() (data_juicer.format.text_formatter.TextFormatter 方法) load_dataset() (data_juicer.format.TextFormatter 方法) load_dataset_by_generated_config()(data_juicer.core.data.dataset_builder.DatasetBuilder 类方法) load_formatter()(在 data_juicer.format.load 模块中) load_from_disk()(data_juicer.core.data.dj_dataset.NestedDataset 静态方法) load_from_disk()(data_juicer.core.data.NestedDataset 静态方法) load_from_disk()(data_juicer.core.NestedDataset 静态方法) load_image()(在 data_juicer.utils.mm_utils 模块中) load_image_byte()(在 data_juicer.utils.mm_utils 模块中) load_images()(在 data_juicer.utils.mm_utils 模块中) load_images_byte()(在 data_juicer.utils.mm_utils 模块中) load_ops()(在 data_juicer.ops 模块中) load_ops()(在 data_juicer.ops.load 模块中) load_ops_with_stats_meta()(在 data_juicer.config.config 模块中) load_video()(在 data_juicer.utils.mm_utils 模块中) load_videos()(在 data_juicer.utils.mm_utils 模块中) load_words_asset()(在 data_juicer.utils.asset_utils 模块中) loaded_audios(data_juicer.utils.constant.InterVars 属性) loaded_images(data_juicer.utils.constant.InterVars 属性) loaded_videos(data_juicer.utils.constant.InterVars 属性) LocalBlend(data_juicer.ops.common.prompt2prompt_pipeline 中的类) LocalFormatter(data_juicer.format 中的类) LocalFormatter(data_juicer.format.formatter 中的类) Lz4Compressor(data_juicer.utils.compress 中的类) M main_entities(data_juicer.utils.constant.MetaKeys 属性) make_log_summarization()(在 data_juicer.utils.logger_utils 模块中) map() (data_juicer.core.data.dj_dataset.NestedDataset 方法) map() (data_juicer.core.data.dj_dataset.NestedDatasetDict 方法) map() (data_juicer.core.data.NestedDataset 方法) map() (data_juicer.core.NestedDataset 方法) map_hf_type_to_python()(data_juicer.core.data.schema.Schema 类方法) map_ray_type_to_python()(data_juicer.core.data.schema.Schema 类方法) Mapper(data_juicer.ops 中的类) Mapper(data_juicer.ops.base_op 中的类) matches() (data_juicer.core.data.load_strategy.StrategyKey 方法) MAX_BATCH_SIZE(data_juicer.core.Adapter 属性) MAX_BATCH_SIZE(data_juicer.core.adapter.Adapter 属性) max_line_length(data_juicer.utils.constant.StatsKeysConstant 属性) MaximumLineLengthFilter(data_juicer.ops.filter 中的类) MaximumLineLengthFilter(data_juicer.ops.filter.maximum_line_length_filter 中的类) measure() (data_juicer.analysis.measure.CrossEntropyMeasure 方法) measure() (data_juicer.analysis.measure.EntropyMeasure 方法) measure() (data_juicer.analysis.measure.JSDivMeasure 方法) measure() (data_juicer.analysis.measure.KLDivMeasure 方法) measure() (data_juicer.analysis.measure.Measure 方法) measure() (data_juicer.analysis.measure.RelatedTTestMeasure 方法) Measure(data_juicer.analysis.measure 中的类) merge() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.RayBTSMinhashDeduplicator 方法) merge() (data_juicer.ops.deduplicator.RayBTSMinhashDeduplicator 方法) merge_config()(在 data_juicer.config 模块中) merge_config()(在 data_juicer.config.config 模块中) merge_on_whitespace_tab_newline()(在 data_juicer.ops.common 模块中) merge_on_whitespace_tab_newline()(在 data_juicer.ops.common.helper_func 模块中) merge_op_batch() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.RayBTSMinhashDeduplicator 方法) merge_op_batch() (data_juicer.ops.deduplicator.RayBTSMinhashDeduplicator 方法) meta_map() (data_juicer.ops.aggregator.meta_tags_aggregator.MetaTagsAggregator 方法) meta_map() (data_juicer.ops.aggregator.MetaTagsAggregator 方法) meta_name(data_juicer.utils.constant.JobRequiredKeys 属性) MetaKeys(data_juicer.utils.constant 中的类) MetaTagsAggregator(data_juicer.ops.aggregator 中的类) MetaTagsAggregator(data_juicer.ops.aggregator.meta_tags_aggregator 中的类) meta(data_juicer.utils.constant.Fields 属性) MiB(data_juicer.core.Exporter 属性) MiB(data_juicer.core.exporter.Exporter 属性) minhash(data_juicer.utils.constant.HashKeys 属性) mis_match_char() (data_juicer.ops.common.prompt2prompt_pipeline.ScoreParams 方法) MllmMapper(data_juicer.ops.mapper 中的类) MllmMapper(data_juicer.ops.mapper.mllm_mapper 中的类) module data_juicer data_juicer.analysis data_juicer.analysis.collector data_juicer.analysis.column_wise_analysis data_juicer.analysis.diversity_analysis data_juicer.analysis.draw data_juicer.analysis.measure data_juicer.analysis.overall_analysis data_juicer.config data_juicer.config.config data_juicer.core data_juicer.core.adapter data_juicer.core.analyzer data_juicer.core.data data_juicer.core.data.config_validator data_juicer.core.data.data_validator data_juicer.core.data.dataset_builder data_juicer.core.data.dj_dataset data_juicer.core.data.load_strategy data_juicer.core.data.ray_dataset data_juicer.core.data.schema data_juicer.core.executor data_juicer.core.executor.base data_juicer.core.executor.default_executor data_juicer.core.executor.factory data_juicer.core.executor.ray_executor data_juicer.core.exporter data_juicer.core.monitor data_juicer.core.tracer data_juicer.download data_juicer.download.commoncrawl data_juicer.download.downloader data_juicer.download.wikipedia data_juicer.format data_juicer.format.csv_formatter data_juicer.format.empty_formatter data_juicer.format.formatter data_juicer.format.json_formatter data_juicer.format.load data_juicer.format.parquet_formatter data_juicer.format.text_formatter data_juicer.format.tsv_formatter data_juicer.ops data_juicer.ops.aggregator data_juicer.ops.aggregator.entity_attribute_aggregator data_juicer.ops.aggregator.meta_tags_aggregator data_juicer.ops.aggregator.most_relevant_entities_aggregator data_juicer.ops.aggregator.nested_aggregator data_juicer.ops.base_op data_juicer.ops.common data_juicer.ops.common.helper_func data_juicer.ops.common.prompt2prompt_pipeline data_juicer.ops.common.special_characters data_juicer.ops.deduplicator data_juicer.ops.deduplicator.document_deduplicator data_juicer.ops.deduplicator.document_minhash_deduplicator data_juicer.ops.deduplicator.document_simhash_deduplicator data_juicer.ops.deduplicator.image_deduplicator data_juicer.ops.deduplicator.ray_basic_deduplicator data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator data_juicer.ops.deduplicator.ray_document_deduplicator data_juicer.ops.deduplicator.ray_image_deduplicator data_juicer.ops.deduplicator.ray_video_deduplicator data_juicer.ops.deduplicator.video_deduplicator data_juicer.ops.filter data_juicer.ops.filter.alphanumeric_filter data_juicer.ops.filter.audio_duration_filter data_juicer.ops.filter.audio_nmf_snr_filter data_juicer.ops.filter.audio_size_filter data_juicer.ops.filter.average_line_length_filter data_juicer.ops.filter.character_repetition_filter data_juicer.ops.filter.flagged_words_filter data_juicer.ops.filter.image_aesthetics_filter data_juicer.ops.filter.image_aspect_ratio_filter data_juicer.ops.filter.image_face_count_filter data_juicer.ops.filter.image_face_ratio_filter data_juicer.ops.filter.image_nsfw_filter data_juicer.ops.filter.image_pair_similarity_filter data_juicer.ops.filter.image_shape_filter data_juicer.ops.filter.image_size_filter data_juicer.ops.filter.image_text_matching_filter data_juicer.ops.filter.image_text_similarity_filter data_juicer.ops.filter.image_watermark_filter data_juicer.ops.filter.language_id_score_filter data_juicer.ops.filter.llm_difficulty_score_filter data_juicer.ops.filter.llm_quality_score_filter data_juicer.ops.filter.maximum_line_length_filter data_juicer.ops.filter.perplexity_filter data_juicer.ops.filter.phrase_grounding_recall_filter data_juicer.ops.filter.special_characters_filter data_juicer.ops.filter.specified_field_filter data_juicer.ops.filter.specified_numeric_field_filter data_juicer.ops.filter.stopwords_filter data_juicer.ops.filter.suffix_filter data_juicer.ops.filter.text_action_filter data_juicer.ops.filter.text_entity_dependency_filter data_juicer.ops.filter.text_length_filter data_juicer.ops.filter.text_pair_similarity_filter data_juicer.ops.filter.token_num_filter data_juicer.ops.filter.video_aesthetics_filter data_juicer.ops.filter.video_aspect_ratio_filter data_juicer.ops.filter.video_duration_filter data_juicer.ops.filter.video_frames_text_similarity_filter data_juicer.ops.filter.video_motion_score_filter data_juicer.ops.filter.video_motion_score_raft_filter data_juicer.ops.filter.video_nsfw_filter data_juicer.ops.filter.video_ocr_area_ratio_filter data_juicer.ops.filter.video_resolution_filter data_juicer.ops.filter.video_tagging_from_frames_filter data_juicer.ops.filter.video_watermark_filter data_juicer.ops.filter.word_repetition_filter data_juicer.ops.filter.words_num_filter data_juicer.ops.grouper data_juicer.ops.grouper.key_value_grouper data_juicer.ops.grouper.naive_grouper data_juicer.ops.grouper.naive_reverse_grouper data_juicer.ops.load data_juicer.ops.mapper data_juicer.ops.mapper.annotation data_juicer.ops.mapper.annotation.annotation_mapper data_juicer.ops.mapper.annotation.human_preference_annotation_mapper data_juicer.ops.mapper.audio_add_gaussian_noise_mapper data_juicer.ops.mapper.audio_ffmpeg_wrapped_mapper data_juicer.ops.mapper.calibrate_qa_mapper data_juicer.ops.mapper.calibrate_query_mapper data_juicer.ops.mapper.calibrate_response_mapper data_juicer.ops.mapper.chinese_convert_mapper data_juicer.ops.mapper.clean_copyright_mapper data_juicer.ops.mapper.clean_email_mapper data_juicer.ops.mapper.clean_html_mapper data_juicer.ops.mapper.clean_ip_mapper data_juicer.ops.mapper.clean_links_mapper data_juicer.ops.mapper.dialog_intent_detection_mapper data_juicer.ops.mapper.dialog_sentiment_detection_mapper data_juicer.ops.mapper.dialog_sentiment_intensity_mapper data_juicer.ops.mapper.dialog_topic_detection_mapper data_juicer.ops.mapper.expand_macro_mapper data_juicer.ops.mapper.extract_entity_attribute_mapper data_juicer.ops.mapper.extract_entity_relation_mapper data_juicer.ops.mapper.extract_event_mapper data_juicer.ops.mapper.extract_keyword_mapper data_juicer.ops.mapper.extract_nickname_mapper data_juicer.ops.mapper.extract_support_text_mapper data_juicer.ops.mapper.extract_tables_from_html_mapper data_juicer.ops.mapper.fix_unicode_mapper data_juicer.ops.mapper.generate_qa_from_examples_mapper data_juicer.ops.mapper.generate_qa_from_text_mapper data_juicer.ops.mapper.image_blur_mapper data_juicer.ops.mapper.image_captioning_from_gpt4v_mapper data_juicer.ops.mapper.image_captioning_mapper data_juicer.ops.mapper.image_diffusion_mapper data_juicer.ops.mapper.image_face_blur_mapper data_juicer.ops.mapper.image_remove_background_mapper data_juicer.ops.mapper.image_segment_mapper data_juicer.ops.mapper.image_tagging_mapper data_juicer.ops.mapper.imgdiff_difference_area_generator_mapper data_juicer.ops.mapper.imgdiff_difference_caption_generator_mapper data_juicer.ops.mapper.mllm_mapper data_juicer.ops.mapper.nlpaug_en_mapper data_juicer.ops.mapper.nlpcda_zh_mapper data_juicer.ops.mapper.optimize_qa_mapper data_juicer.ops.mapper.optimize_query_mapper data_juicer.ops.mapper.optimize_response_mapper data_juicer.ops.mapper.pair_preference_mapper data_juicer.ops.mapper.punctuation_normalization_mapper data_juicer.ops.mapper.python_file_mapper data_juicer.ops.mapper.python_lambda_mapper data_juicer.ops.mapper.query_intent_detection_mapper data_juicer.ops.mapper.query_sentiment_detection_mapper data_juicer.ops.mapper.query_topic_detection_mapper data_juicer.ops.mapper.relation_identity_mapper data_juicer.ops.mapper.remove_bibliography_mapper data_juicer.ops.mapper.remove_comments_mapper data_juicer.ops.mapper.remove_header_mapper data_juicer.ops.mapper.remove_long_words_mapper data_juicer.ops.mapper.remove_non_chinese_character_mapper data_juicer.ops.mapper.remove_repeat_sentences_mapper data_juicer.ops.mapper.remove_specific_chars_mapper data_juicer.ops.mapper.remove_table_text_mapper data_juicer.ops.mapper.remove_words_with_incorrect_substrings_mapper data_juicer.ops.mapper.replace_content_mapper data_juicer.ops.mapper.sdxl_prompt2prompt_mapper data_juicer.ops.mapper.sentence_augmentation_mapper data_juicer.ops.mapper.sentence_split_mapper data_juicer.ops.mapper.text_chunk_mapper data_juicer.ops.mapper.video_captioning_from_audio_mapper data_juicer.ops.mapper.video_captioning_from_frames_mapper data_juicer.ops.mapper.video_captioning_from_summarizer_mapper data_juicer.ops.mapper.video_captioning_from_video_mapper data_juicer.ops.mapper.video_extract_frames_mapper data_juicer.ops.mapper.video_face_blur_mapper data_juicer.ops.mapper.video_ffmpeg_wrapped_mapper data_juicer.ops.mapper.video_remove_watermark_mapper data_juicer.ops.mapper.video_resize_aspect_ratio_mapper data_juicer.ops.mapper.video_resize_resolution_mapper data_juicer.ops.mapper.video_split_by_duration_mapper data_juicer.ops.mapper.video_split_by_key_frame_mapper data_juicer.ops.mapper.video_split_by_scene_mapper data_juicer.ops.mapper.video_tagging_from_audio_mapper data_juicer.ops.mapper.video_tagging_from_frames_mapper data_juicer.ops.mapper.whitespace_normalization_mapper data_juicer.ops.mixins data_juicer.ops.op_fusion data_juicer.ops.selector data_juicer.ops.selector.frequency_specified_field_selector data_juicer.ops.selector.random_selector data_juicer.ops.selector.range_specified_field_selector data_juicer.ops.selector.tags_specified_field_selector data_juicer.ops.selector.topk_specified_field_selector data_juicer.tools data_juicer.utils data_juicer.utils.asset_utils data_juicer.utils.availability_utils data_juicer.utils.cache_utils data_juicer.utils.ckpt_utils data_juicer.utils.common_utils data_juicer.utils.compress data_juicer.utils.constant data_juicer.utils.file_utils data_juicer.utils.fingerprint_utils data_juicer.utils.lazy_loader data_juicer.utils.logger_utils data_juicer.utils.mm_utils data_juicer.utils.model_utils data_juicer.utils.nltk_utils data_juicer.utils.process_utils data_juicer.utils.registry data_juicer.utils.resource_utils data_juicer.utils.sample data_juicer.utils.unittest_utils modules(data_juicer.utils.registry.Registry 属性) monitor_all_resources() (data_juicer.core.Monitor 方法) monitor_all_resources() (data_juicer.core.monitor.Monitor 方法) monitor_current_resources()(data_juicer.core.Monitor 静态方法) monitor_current_resources()(data_juicer.core.monitor.Monitor 静态方法) monitor_func()(data_juicer.core.Monitor 静态方法) monitor_func()(data_juicer.core.monitor.Monitor 静态方法) Monitor(data_juicer.core 中的类) Monitor(data_juicer.core.monitor 中的类) most_relevant_entities(data_juicer.utils.constant.BatchMetaKeys 属性) MostRelevantEntitiesAggregator(data_juicer.ops.aggregator 中的类) MostRelevantEntitiesAggregator(data_juicer.ops.aggregator.most_relevant_entities_aggregator 中的类) multimodal_data_output_dir(data_juicer.utils.constant.Fields 属性) N NaiveGrouper(data_juicer.ops.grouper 中的类) NaiveGrouper(data_juicer.ops.grouper.naive_grouper 中的类) NaiveReverseGrouper(data_juicer.ops.grouper 中的类) NaiveReverseGrouper(data_juicer.ops.grouper.naive_reverse_grouper 中的类) namespace_to_arg_list()(在 data_juicer.config.config 模块中) name(data_juicer.analysis.measure.CrossEntropyMeasure 属性) name(data_juicer.analysis.measure.EntropyMeasure 属性) name(data_juicer.analysis.measure.JSDivMeasure 属性) name(data_juicer.analysis.measure.KLDivMeasure 属性) name(data_juicer.analysis.measure.Measure 属性) name(data_juicer.analysis.measure.RelatedTTestMeasure 属性) name(data_juicer.utils.registry.Registry 属性) nested_access()(在 data_juicer.utils.common_utils 模块中) nested_obj_factory()(在 data_juicer.core.data.dj_dataset 模块中) nested_query()(在 data_juicer.core.data.dj_dataset 模块中) NestedAggregator(data_juicer.ops.aggregator 中的类) NestedAggregator(data_juicer.ops.aggregator.nested_aggregator 中的类) NestedDatasetDict(data_juicer.core.data.dj_dataset 中的类) NestedDataset(data_juicer.core 中的类) NestedDataset(data_juicer.core.data 中的类) NestedDataset(data_juicer.core.data.dj_dataset 中的类) NestedQueryDict(data_juicer.core.data.dj_dataset 中的类) nickname(data_juicer.utils.constant.MetaKeys 属性) NlpaugEnMapper(data_juicer.ops.mapper 中的类) NlpaugEnMapper(data_juicer.ops.mapper.nlpaug_en_mapper 中的类) NlpcdaZhMapper(data_juicer.ops.mapper 中的类) NlpcdaZhMapper(data_juicer.ops.mapper.nlpcda_zh_mapper 中的类) NotificationMixin(data_juicer.ops.mixins 中的类) null_value(data_juicer.format.empty_formatter.EmptyFormatter 属性) null_value(data_juicer.format.empty_formatter.RayEmptyFormatter 属性) null_value(data_juicer.format.EmptyFormatter 属性) null_value(data_juicer.format.RayEmptyFormatter 属性) num_action(data_juicer.utils.constant.StatsKeysConstant 属性) num_dependency_edges(data_juicer.utils.constant.StatsKeysConstant 属性) num_token(data_juicer.utils.constant.StatsKeysConstant 属性) num_uncond_att_layers(data_juicer.ops.common.prompt2prompt_pipeline.AttentionControl 属性) num_words(data_juicer.utils.constant.StatsKeysConstant 属性) O optimal_param()(在 data_juicer.ops.deduplicator.document_minhash_deduplicator 模块中) OptimizeQAMapper(data_juicer.ops.mapper 中的类) OptimizeQAMapper(data_juicer.ops.mapper.optimize_qa_mapper 中的类) OptimizeQueryMapper(data_juicer.ops.mapper 中的类) OptimizeQueryMapper(data_juicer.ops.mapper.optimize_query_mapper 中的类) OptimizeResponseMapper(data_juicer.ops.mapper 中的类) OptimizeResponseMapper(data_juicer.ops.mapper.optimize_response_mapper 中的类) OP(data_juicer.ops.base_op 中的类) OverallAnalysis(data_juicer.analysis 中的类) OverallAnalysis(data_juicer.analysis.overall_analysis 中的类) P P2PCrossAttnProcessor(data_juicer.ops.common.prompt2prompt_pipeline 中的类) PairPreferenceMapper(data_juicer.ops.mapper 中的类) PairPreferenceMapper(data_juicer.ops.mapper.pair_preference_mapper 中的类) ParquetFormatter(data_juicer.format 中的类) ParquetFormatter(data_juicer.format.parquet_formatter 中的类) parse_cli_datapath()(在 data_juicer.core.data.dataset_builder 模块中) parse_output() (data_juicer.ops.aggregator.entity_attribute_aggregator.EntityAttributeAggregator 方法) parse_output() (data_juicer.ops.aggregator.EntityAttributeAggregator 方法) parse_output() (data_juicer.ops.aggregator.meta_tags_aggregator.MetaTagsAggregator 方法) parse_output() (data_juicer.ops.aggregator.MetaTagsAggregator 方法) parse_output() (data_juicer.ops.aggregator.most_relevant_entities_aggregator.MostRelevantEntitiesAggregator 方法) parse_output() (data_juicer.ops.aggregator.MostRelevantEntitiesAggregator 方法) parse_output() (data_juicer.ops.aggregator.nested_aggregator.NestedAggregator 方法) parse_output() (data_juicer.ops.aggregator.NestedAggregator 方法) parse_output() (data_juicer.ops.filter.llm_difficulty_score_filter.LLMDifficultyScoreFilter 方法) parse_output() (data_juicer.ops.filter.llm_quality_score_filter.LLMQualityScoreFilter 方法) parse_output() (data_juicer.ops.filter.LLMDifficultyScoreFilter 方法) parse_output() (data_juicer.ops.filter.LLMQualityScoreFilter 方法) parse_output() (data_juicer.ops.mapper.calibrate_qa_mapper.CalibrateQAMapper 方法) parse_output() (data_juicer.ops.mapper.calibrate_query_mapper.CalibrateQueryMapper 方法) parse_output() (data_juicer.ops.mapper.calibrate_response_mapper.CalibrateResponseMapper 方法) parse_output() (data_juicer.ops.mapper.CalibrateQAMapper 方法) parse_output() (data_juicer.ops.mapper.CalibrateQueryMapper 方法) parse_output() (data_juicer.ops.mapper.CalibrateResponseMapper 方法) parse_output() (data_juicer.ops.mapper.dialog_intent_detection_mapper.DialogIntentDetectionMapper 方法) parse_output() (data_juicer.ops.mapper.dialog_sentiment_detection_mapper.DialogSentimentDetectionMapper 方法) parse_output() (data_juicer.ops.mapper.dialog_sentiment_intensity_mapper.DialogSentimentIntensityMapper 方法) parse_output() (data_juicer.ops.mapper.dialog_topic_detection_mapper.DialogTopicDetectionMapper 方法) parse_output() (data_juicer.ops.mapper.DialogIntentDetectionMapper 方法) parse_output() (data_juicer.ops.mapper.DialogSentimentDetectionMapper 方法) parse_output() (data_juicer.ops.mapper.DialogSentimentIntensityMapper 方法) parse_output() (data_juicer.ops.mapper.DialogTopicDetectionMapper 方法) parse_output() (data_juicer.ops.mapper.extract_entity_attribute_mapper.ExtractEntityAttributeMapper 方法) parse_output() (data_juicer.ops.mapper.extract_entity_relation_mapper.ExtractEntityRelationMapper 方法) parse_output() (data_juicer.ops.mapper.extract_event_mapper.ExtractEventMapper 方法) parse_output() (data_juicer.ops.mapper.extract_keyword_mapper.ExtractKeywordMapper 方法) parse_output() (data_juicer.ops.mapper.extract_nickname_mapper.ExtractNicknameMapper 方法) parse_output() (data_juicer.ops.mapper.ExtractEntityAttributeMapper 方法) parse_output() (data_juicer.ops.mapper.ExtractEntityRelationMapper 方法) parse_output() (data_juicer.ops.mapper.ExtractEventMapper 方法) parse_output() (data_juicer.ops.mapper.ExtractKeywordMapper 方法) parse_output() (data_juicer.ops.mapper.ExtractNicknameMapper 方法) parse_output() (data_juicer.ops.mapper.generate_qa_from_examples_mapper.GenerateQAFromExamplesMapper 方法) parse_output() (data_juicer.ops.mapper.generate_qa_from_text_mapper.GenerateQAFromTextMapper 方法) parse_output() (data_juicer.ops.mapper.GenerateQAFromExamplesMapper 方法) parse_output() (data_juicer.ops.mapper.GenerateQAFromTextMapper 方法) parse_output() (data_juicer.ops.mapper.optimize_qa_mapper.OptimizeQAMapper 方法) parse_output() (data_juicer.ops.mapper.optimize_query_mapper.OptimizeQueryMapper 方法) parse_output() (data_juicer.ops.mapper.optimize_response_mapper.OptimizeResponseMapper 方法) parse_output() (data_juicer.ops.mapper.OptimizeQAMapper 方法) parse_output() (data_juicer.ops.mapper.OptimizeQueryMapper 方法) parse_output() (data_juicer.ops.mapper.OptimizeResponseMapper 方法) parse_output() (data_juicer.ops.mapper.pair_preference_mapper.PairPreferenceMapper 方法) parse_output() (data_juicer.ops.mapper.PairPreferenceMapper 方法) parse_output() (data_juicer.ops.mapper.relation_identity_mapper.RelationIdentityMapper 方法) parse_output() (data_juicer.ops.mapper.RelationIdentityMapper 方法) parse_string_to_roi()(在 data_juicer.utils.mm_utils 模块中) patch_nltk_pickle_security()(在 data_juicer.utils.nltk_utils 模块中) PerplexityFilter(data_juicer.ops.filter 中的类) PerplexityFilter(data_juicer.ops.filter.perplexity_filter 中的类) perplexity(data_juicer.utils.constant.StatsKeysConstant 属性) phrase_grounding_recall(data_juicer.utils.constant.StatsKeysConstant 属性) PhraseGroundingRecallFilter(data_juicer.ops.filter 中的类) PhraseGroundingRecallFilter(data_juicer.ops.filter.phrase_grounding_recall_filter 中的类) pil_to_opencv()(在 data_juicer.utils.mm_utils 模块中) prepare_api_model()(在 data_juicer.utils.model_utils 模块中) prepare_converter()(在 data_juicer.ops.mapper.chinese_convert_mapper 模块中) prepare_diffusion_model()(在 data_juicer.utils.model_utils 模块中) prepare_fastsam_model()(在 data_juicer.utils.model_utils 模块中) prepare_fasttext_model()(在 data_juicer.utils.model_utils 模块中) prepare_huggingface_model()(在 data_juicer.utils.model_utils 模块中) prepare_kenlm_model()(在 data_juicer.utils.model_utils 模块中) prepare_model()(在 data_juicer.utils.model_utils 模块中) prepare_nltk_model()(在 data_juicer.utils.model_utils 模块中) prepare_nltk_pos_tagger()(在 data_juicer.utils.model_utils 模块中) prepare_opencv_classifier()(在 data_juicer.utils.model_utils 模块中) prepare_recognizeAnything_model()(在 data_juicer.utils.model_utils 模块中) prepare_sdxl_prompt2prompt()(在 data_juicer.utils.model_utils 模块中) prepare_sentencepiece_for_lang()(在 data_juicer.utils.model_utils 模块中) prepare_sentencepiece_model()(在 data_juicer.utils.model_utils 模块中) prepare_side_configs()(在 data_juicer.config 模块中) prepare_side_configs()(在 data_juicer.config.config 模块中) prepare_simple_aesthetics_model()(在 data_juicer.utils.model_utils 模块中) prepare_spacy_model()(在 data_juicer.utils.model_utils 模块中) prepare_video_blip_model()(在 data_juicer.utils.model_utils 模块中) prepare_vllm_model()(在 data_juicer.utils.model_utils 模块中) preprocess_dataset()(在 data_juicer.core.data.ray_dataset 模块中) probe_small_batch() (data_juicer.core.Adapter 方法) probe_small_batch() (data_juicer.core.adapter.Adapter 方法) process() (data_juicer.core.data.dj_dataset.DJDataset 方法) process() (data_juicer.core.data.dj_dataset.NestedDataset 方法) process() (data_juicer.core.data.DJDataset 方法) process() (data_juicer.core.data.NestedDataset 方法) process() (data_juicer.core.data.ray_dataset.RayDataset 方法) process() (data_juicer.core.NestedDataset 方法) process() (data_juicer.ops.base_op.Deduplicator 方法) process() (data_juicer.ops.base_op.Grouper 方法) process() (data_juicer.ops.base_op.OP 方法) process() (data_juicer.ops.base_op.Selector 方法) process() (data_juicer.ops.Deduplicator 方法) process() (data_juicer.ops.deduplicator.document_deduplicator.DocumentDeduplicator 方法) process() (data_juicer.ops.deduplicator.document_minhash_deduplicator.DocumentMinhashDeduplicator 方法) process() (data_juicer.ops.deduplicator.document_simhash_deduplicator.DocumentSimhashDeduplicator 方法) process() (data_juicer.ops.deduplicator.DocumentDeduplicator 方法) process() (data_juicer.ops.deduplicator.DocumentMinhashDeduplicator 方法) process() (data_juicer.ops.deduplicator.DocumentSimhashDeduplicator 方法) process() (data_juicer.ops.deduplicator.image_deduplicator.ImageDeduplicator 方法) process() (data_juicer.ops.deduplicator.ImageDeduplicator 方法) process() (data_juicer.ops.deduplicator.video_deduplicator.VideoDeduplicator 方法) process() (data_juicer.ops.deduplicator.VideoDeduplicator 方法) process() (data_juicer.ops.Grouper 方法) process() (data_juicer.ops.grouper.key_value_grouper.KeyValueGrouper 方法) process() (data_juicer.ops.grouper.KeyValueGrouper 方法) process() (data_juicer.ops.grouper.naive_grouper.NaiveGrouper 方法) process() (data_juicer.ops.grouper.naive_reverse_grouper.NaiveReverseGrouper 方法) process() (data_juicer.ops.grouper.NaiveGrouper 方法) process() (data_juicer.ops.grouper.NaiveReverseGrouper 方法) process() (data_juicer.ops.Selector 方法) process() (data_juicer.ops.selector.frequency_specified_field_selector.FrequencySpecifiedFieldSelector 方法) process() (data_juicer.ops.selector.FrequencySpecifiedFieldSelector 方法) process() (data_juicer.ops.selector.random_selector.RandomSelector 方法) process() (data_juicer.ops.selector.RandomSelector 方法) process() (data_juicer.ops.selector.range_specified_field_selector.RangeSpecifiedFieldSelector 方法) process() (data_juicer.ops.selector.RangeSpecifiedFieldSelector 方法) process() (data_juicer.ops.selector.tags_specified_field_selector.TagsSpecifiedFieldSelector 方法) process() (data_juicer.ops.selector.TagsSpecifiedFieldSelector 方法) process() (data_juicer.ops.selector.topk_specified_field_selector.TopkSpecifiedFieldSelector 方法) process() (data_juicer.ops.selector.TopkSpecifiedFieldSelector 方法) process_batched() (data_juicer.ops.base_op.Filter 方法) process_batched() (data_juicer.ops.base_op.Mapper 方法) process_batched() (data_juicer.ops.Filter 方法) process_batched() (data_juicer.ops.filter.alphanumeric_filter.AlphanumericFilter 方法) process_batched() (data_juicer.ops.filter.AlphanumericFilter 方法) process_batched() (data_juicer.ops.filter.average_line_length_filter.AverageLineLengthFilter 方法) process_batched() (data_juicer.ops.filter.AverageLineLengthFilter 方法) process_batched() (data_juicer.ops.filter.character_repetition_filter.CharacterRepetitionFilter 方法) process_batched() (data_juicer.ops.filter.CharacterRepetitionFilter 方法) process_batched() (data_juicer.ops.filter.flagged_words_filter.FlaggedWordFilter 方法) process_batched() (data_juicer.ops.filter.FlaggedWordFilter 方法) process_batched() (data_juicer.ops.filter.image_aspect_ratio_filter.ImageAspectRatioFilter 方法) process_batched() (data_juicer.ops.filter.ImageAspectRatioFilter 方法) process_batched() (data_juicer.ops.filter.maximum_line_length_filter.MaximumLineLengthFilter 方法) process_batched() (data_juicer.ops.filter.MaximumLineLengthFilter 方法) process_batched() (data_juicer.ops.filter.perplexity_filter.PerplexityFilter 方法) process_batched() (data_juicer.ops.filter.PerplexityFilter 方法) process_batched() (data_juicer.ops.filter.special_characters_filter.SpecialCharactersFilter 方法) process_batched() (data_juicer.ops.filter.SpecialCharactersFilter 方法) process_batched() (data_juicer.ops.filter.text_length_filter.TextLengthFilter 方法) process_batched() (data_juicer.ops.filter.TextLengthFilter 方法) process_batched() (data_juicer.ops.filter.word_repetition_filter.WordRepetitionFilter 方法) process_batched() (data_juicer.ops.filter.WordRepetitionFilter 方法) process_batched() (data_juicer.ops.filter.words_num_filter.WordsNumFilter 方法) process_batched() (data_juicer.ops.filter.WordsNumFilter 方法) process_batched() (data_juicer.ops.Mapper 方法) process_batched() (data_juicer.ops.mapper.annotation.annotation_mapper.BaseAnnotationMapper 方法) process_batched() (data_juicer.ops.mapper.chinese_convert_mapper.ChineseConvertMapper 方法) process_batched() (data_juicer.ops.mapper.ChineseConvertMapper 方法) process_batched() (data_juicer.ops.mapper.clean_copyright_mapper.CleanCopyrightMapper 方法) process_batched() (data_juicer.ops.mapper.clean_email_mapper.CleanEmailMapper 方法) process_batched() (data_juicer.ops.mapper.clean_html_mapper.CleanHtmlMapper 方法) process_batched() (data_juicer.ops.mapper.clean_ip_mapper.CleanIpMapper 方法) process_batched() (data_juicer.ops.mapper.clean_links_mapper.CleanLinksMapper 方法) process_batched() (data_juicer.ops.mapper.CleanCopyrightMapper 方法) process_batched() (data_juicer.ops.mapper.CleanEmailMapper 方法) process_batched() (data_juicer.ops.mapper.CleanHtmlMapper 方法) process_batched() (data_juicer.ops.mapper.CleanIpMapper 方法) process_batched() (data_juicer.ops.mapper.CleanLinksMapper 方法) process_batched() (data_juicer.ops.mapper.expand_macro_mapper.ExpandMacroMapper 方法) process_batched() (data_juicer.ops.mapper.extract_event_mapper.ExtractEventMapper 方法) process_batched() (data_juicer.ops.mapper.ExtractEventMapper 方法) process_batched() (data_juicer.ops.mapper.fix_unicode_mapper.FixUnicodeMapper 方法) process_batched() (data_juicer.ops.mapper.FixUnicodeMapper 方法) process_batched() (data_juicer.ops.mapper.generate_qa_from_text_mapper.GenerateQAFromTextMapper 方法) process_batched() (data_juicer.ops.mapper.GenerateQAFromTextMapper 方法) process_batched() (data_juicer.ops.mapper.image_captioning_from_gpt4v_mapper.ImageCaptioningFromGPT4VMapper 方法) process_batched() (data_juicer.ops.mapper.image_captioning_mapper.ImageCaptioningMapper 方法) process_batched() (data_juicer.ops.mapper.image_diffusion_mapper.ImageDiffusionMapper 方法) process_batched() (data_juicer.ops.mapper.ImageCaptioningFromGPT4VMapper 方法) process_batched() (data_juicer.ops.mapper.ImageCaptioningMapper 方法) process_batched() (data_juicer.ops.mapper.ImageDiffusionMapper 方法) process_batched() (data_juicer.ops.mapper.nlpaug_en_mapper.NlpaugEnMapper 方法) process_batched() (data_juicer.ops.mapper.NlpaugEnMapper 方法) process_batched() (data_juicer.ops.mapper.nlpcda_zh_mapper.NlpcdaZhMapper 方法) process_batched() (data_juicer.ops.mapper.NlpcdaZhMapper 方法) process_batched() (data_juicer.ops.mapper.punctuation_normalization_mapper.PunctuationNormalizationMapper 方法) process_batched() (data_juicer.ops.mapper.PunctuationNormalizationMapper 方法) process_batched() (data_juicer.ops.mapper.python_file_mapper.PythonFileMapper 方法) process_batched() (data_juicer.ops.mapper.python_lambda_mapper.PythonLambdaMapper 方法) process_batched() (data_juicer.ops.mapper.PythonFileMapper 方法) process_batched() (data_juicer.ops.mapper.PythonLambdaMapper 方法) process_batched() (data_juicer.ops.mapper.query_intent_detection_mapper.QueryIntentDetectionMapper 方法) process_batched() (data_juicer.ops.mapper.query_sentiment_detection_mapper.QuerySentimentDetectionMapper 方法) process_batched() (data_juicer.ops.mapper.query_topic_detection_mapper.QueryTopicDetectionMapper 方法) process_batched() (data_juicer.ops.mapper.QueryIntentDetectionMapper 方法) process_batched() (data_juicer.ops.mapper.QuerySentimentDetectionMapper 方法) process_batched() (data_juicer.ops.mapper.QueryTopicDetectionMapper 方法) process_batched() (data_juicer.ops.mapper.remove_bibliography_mapper.RemoveBibliographyMapper 方法) process_batched() (data_juicer.ops.mapper.remove_comments_mapper.RemoveCommentsMapper 方法) process_batched() (data_juicer.ops.mapper.remove_header_mapper.RemoveHeaderMapper 方法) process_batched() (data_juicer.ops.mapper.remove_long_words_mapper.RemoveLongWordsMapper 方法) process_batched() (data_juicer.ops.mapper.remove_non_chinese_character_mapper.RemoveNonChineseCharacterlMapper 方法) process_batched() (data_juicer.ops.mapper.remove_repeat_sentences_mapper.RemoveRepeatSentencesMapper 方法) process_batched() (data_juicer.ops.mapper.remove_specific_chars_mapper.RemoveSpecificCharsMapper 方法) process_batched() (data_juicer.ops.mapper.remove_table_text_mapper.RemoveTableTextMapper 方法) process_batched() (data_juicer.ops.mapper.remove_words_with_incorrect_substrings_mapper.RemoveWordsWithIncorrectSubstringsMapper 方法) process_batched() (data_juicer.ops.mapper.RemoveBibliographyMapper 方法) process_batched() (data_juicer.ops.mapper.RemoveCommentsMapper 方法) process_batched() (data_juicer.ops.mapper.RemoveHeaderMapper 方法) process_batched() (data_juicer.ops.mapper.RemoveLongWordsMapper 方法) process_batched() (data_juicer.ops.mapper.RemoveNonChineseCharacterlMapper 方法) process_batched() (data_juicer.ops.mapper.RemoveRepeatSentencesMapper 方法) process_batched() (data_juicer.ops.mapper.RemoveSpecificCharsMapper 方法) process_batched() (data_juicer.ops.mapper.RemoveTableTextMapper 方法) process_batched() (data_juicer.ops.mapper.RemoveWordsWithIncorrectSubstringsMapper 方法) process_batched() (data_juicer.ops.mapper.replace_content_mapper.ReplaceContentMapper 方法) process_batched() (data_juicer.ops.mapper.ReplaceContentMapper 方法) process_batched() (data_juicer.ops.mapper.sentence_split_mapper.SentenceSplitMapper 方法) process_batched() (data_juicer.ops.mapper.SentenceSplitMapper 方法) process_batched() (data_juicer.ops.mapper.text_chunk_mapper.TextChunkMapper 方法) process_batched() (data_juicer.ops.mapper.TextChunkMapper 方法) process_batched() (data_juicer.ops.mapper.video_captioning_from_audio_mapper.VideoCaptioningFromAudioMapper 方法) process_batched() (data_juicer.ops.mapper.video_captioning_from_frames_mapper.VideoCaptioningFromFramesMapper 方法) process_batched() (data_juicer.ops.mapper.video_captioning_from_summarizer_mapper.VideoCaptioningFromSummarizerMapper 方法) process_batched() (data_juicer.ops.mapper.video_captioning_from_video_mapper.VideoCaptioningFromVideoMapper 方法) process_batched() (data_juicer.ops.mapper.video_split_by_duration_mapper.VideoSplitByDurationMapper 方法) process_batched() (data_juicer.ops.mapper.video_split_by_key_frame_mapper.VideoSplitByKeyFrameMapper 方法) process_batched() (data_juicer.ops.mapper.VideoCaptioningFromAudioMapper 方法) process_batched() (data_juicer.ops.mapper.VideoCaptioningFromFramesMapper 方法) process_batched() (data_juicer.ops.mapper.VideoCaptioningFromSummarizerMapper 方法) process_batched() (data_juicer.ops.mapper.VideoCaptioningFromVideoMapper 方法) process_batched() (data_juicer.ops.mapper.VideoSplitByDurationMapper 方法) process_batched() (data_juicer.ops.mapper.VideoSplitByKeyFrameMapper 方法) process_batched() (data_juicer.ops.mapper.whitespace_normalization_mapper.WhitespaceNormalizationMapper 方法) process_batched() (data_juicer.ops.mapper.WhitespaceNormalizationMapper 方法) process_batched() (data_juicer.ops.op_fusion.FusedFilter 方法) process_batched() (data_juicer.ops.op_fusion.GeneralFusedOP 方法) process_each_frame()(在 data_juicer.utils.mm_utils 模块中) process_single() (data_juicer.ops.Aggregator 方法) process_single() (data_juicer.ops.aggregator.entity_attribute_aggregator.EntityAttributeAggregator 方法) process_single() (data_juicer.ops.aggregator.EntityAttributeAggregator 方法) process_single() (data_juicer.ops.aggregator.meta_tags_aggregator.MetaTagsAggregator 方法) process_single() (data_juicer.ops.aggregator.MetaTagsAggregator 方法) process_single() (data_juicer.ops.aggregator.most_relevant_entities_aggregator.MostRelevantEntitiesAggregator 方法) process_single() (data_juicer.ops.aggregator.MostRelevantEntitiesAggregator 方法) process_single() (data_juicer.ops.aggregator.nested_aggregator.NestedAggregator 方法) process_single() (data_juicer.ops.aggregator.NestedAggregator 方法) process_single() (data_juicer.ops.base_op.Aggregator 方法) process_single() (data_juicer.ops.base_op.Filter 方法) process_single() (data_juicer.ops.base_op.Mapper 方法) process_single() (data_juicer.ops.deduplicator.ray_basic_deduplicator.RayBasicDeduplicator 方法) process_single() (data_juicer.ops.deduplicator.RayBasicDeduplicator 方法) process_single() (data_juicer.ops.Filter 方法) process_single() (data_juicer.ops.filter.audio_duration_filter.AudioDurationFilter 方法) process_single() (data_juicer.ops.filter.audio_nmf_snr_filter.AudioNMFSNRFilter 方法) process_single() (data_juicer.ops.filter.audio_size_filter.AudioSizeFilter 方法) process_single() (data_juicer.ops.filter.AudioDurationFilter 方法) process_single() (data_juicer.ops.filter.AudioNMFSNRFilter 方法) process_single() (data_juicer.ops.filter.AudioSizeFilter 方法) process_single() (data_juicer.ops.filter.GeneralFieldFilter 方法) process_single() (data_juicer.ops.filter.image_aesthetics_filter.ImageAestheticsFilter 方法) process_single() (data_juicer.ops.filter.image_face_count_filter.ImageFaceCountFilter 方法) process_single() (data_juicer.ops.filter.image_face_ratio_filter.ImageFaceRatioFilter 方法) process_single() (data_juicer.ops.filter.image_nsfw_filter.ImageNSFWFilter 方法) process_single() (data_juicer.ops.filter.image_pair_similarity_filter.ImagePairSimilarityFilter 方法) process_single() (data_juicer.ops.filter.image_shape_filter.ImageShapeFilter 方法) process_single() (data_juicer.ops.filter.image_size_filter.ImageSizeFilter 方法) process_single() (data_juicer.ops.filter.image_text_matching_filter.ImageTextMatchingFilter 方法) process_single() (data_juicer.ops.filter.image_text_similarity_filter.ImageTextSimilarityFilter 方法) process_single() (data_juicer.ops.filter.image_watermark_filter.ImageWatermarkFilter 方法) process_single() (data_juicer.ops.filter.ImageAestheticsFilter 方法) process_single() (data_juicer.ops.filter.ImageFaceCountFilter 方法) process_single() (data_juicer.ops.filter.ImageFaceRatioFilter 方法) process_single() (data_juicer.ops.filter.ImageNSFWFilter 方法) process_single() (data_juicer.ops.filter.ImagePairSimilarityFilter 方法) process_single() (data_juicer.ops.filter.ImageShapeFilter 方法) process_single() (data_juicer.ops.filter.ImageSizeFilter 方法) process_single() (data_juicer.ops.filter.ImageTextMatchingFilter 方法) process_single() (data_juicer.ops.filter.ImageTextSimilarityFilter 方法) process_single() (data_juicer.ops.filter.ImageWatermarkFilter 方法) process_single() (data_juicer.ops.filter.language_id_score_filter.LanguageIDScoreFilter 方法) process_single() (data_juicer.ops.filter.LanguageIDScoreFilter 方法) process_single() (data_juicer.ops.filter.llm_difficulty_score_filter.LLMDifficultyScoreFilter 方法) process_single() (data_juicer.ops.filter.llm_quality_score_filter.LLMQualityScoreFilter 方法) process_single() (data_juicer.ops.filter.LLMDifficultyScoreFilter 方法) process_single() (data_juicer.ops.filter.LLMQualityScoreFilter 方法) process_single() (data_juicer.ops.filter.phrase_grounding_recall_filter.PhraseGroundingRecallFilter 方法) process_single() (data_juicer.ops.filter.PhraseGroundingRecallFilter 方法) process_single() (data_juicer.ops.filter.specified_field_filter.SpecifiedFieldFilter 方法) process_single() (data_juicer.ops.filter.specified_numeric_field_filter.SpecifiedNumericFieldFilter 方法) process_single() (data_juicer.ops.filter.SpecifiedFieldFilter 方法) process_single() (data_juicer.ops.filter.SpecifiedNumericFieldFilter 方法) process_single() (data_juicer.ops.filter.stopwords_filter.StopWordsFilter 方法) process_single() (data_juicer.ops.filter.StopWordsFilter 方法) process_single() (data_juicer.ops.filter.suffix_filter.SuffixFilter 方法) process_single() (data_juicer.ops.filter.SuffixFilter 方法) process_single() (data_juicer.ops.filter.text_action_filter.TextActionFilter 方法) process_single() (data_juicer.ops.filter.text_entity_dependency_filter.TextEntityDependencyFilter 方法) process_single() (data_juicer.ops.filter.text_pair_similarity_filter.TextPairSimilarityFilter 方法) process_single() (data_juicer.ops.filter.TextActionFilter 方法) process_single() (data_juicer.ops.filter.TextEntityDependencyFilter 方法) process_single() (data_juicer.ops.filter.TextPairSimilarityFilter 方法) process_single() (data_juicer.ops.filter.token_num_filter.TokenNumFilter 方法) process_single() (data_juicer.ops.filter.TokenNumFilter 方法) process_single() (data_juicer.ops.filter.video_aesthetics_filter.VideoAestheticsFilter 方法) process_single() (data_juicer.ops.filter.video_aspect_ratio_filter.VideoAspectRatioFilter 方法) process_single() (data_juicer.ops.filter.video_duration_filter.VideoDurationFilter 方法) process_single() (data_juicer.ops.filter.video_frames_text_similarity_filter.VideoFramesTextSimilarityFilter 方法) process_single() (data_juicer.ops.filter.video_motion_score_filter.VideoMotionScoreFilter 方法) process_single() (data_juicer.ops.filter.video_nsfw_filter.VideoNSFWFilter 方法) process_single() (data_juicer.ops.filter.video_ocr_area_ratio_filter.VideoOcrAreaRatioFilter 方法) process_single() (data_juicer.ops.filter.video_resolution_filter.VideoResolutionFilter 方法) process_single() (data_juicer.ops.filter.video_tagging_from_frames_filter.VideoTaggingFromFramesFilter 方法) process_single() (data_juicer.ops.filter.video_watermark_filter.VideoWatermarkFilter 方法) process_single() (data_juicer.ops.filter.VideoAestheticsFilter 方法) process_single() (data_juicer.ops.filter.VideoAspectRatioFilter 方法) process_single() (data_juicer.ops.filter.VideoDurationFilter 方法) process_single() (data_juicer.ops.filter.VideoFramesTextSimilarityFilter 方法) process_single() (data_juicer.ops.filter.VideoMotionScoreFilter 方法) process_single() (data_juicer.ops.filter.VideoNSFWFilter 方法) process_single() (data_juicer.ops.filter.VideoOcrAreaRatioFilter 方法) process_single() (data_juicer.ops.filter.VideoResolutionFilter 方法) process_single() (data_juicer.ops.filter.VideoTaggingFromFramesFilter 方法) process_single() (data_juicer.ops.filter.VideoWatermarkFilter 方法) process_single() (data_juicer.ops.Mapper 方法) process_single() (data_juicer.ops.mapper.audio_add_gaussian_noise_mapper.AudioAddGaussianNoiseMapper 方法) process_single() (data_juicer.ops.mapper.audio_ffmpeg_wrapped_mapper.AudioFFmpegWrappedMapper 方法) process_single() (data_juicer.ops.mapper.AudioAddGaussianNoiseMapper 方法) process_single() (data_juicer.ops.mapper.AudioFFmpegWrappedMapper 方法) process_single() (data_juicer.ops.mapper.calibrate_qa_mapper.CalibrateQAMapper 方法) process_single() (data_juicer.ops.mapper.CalibrateQAMapper 方法) process_single() (data_juicer.ops.mapper.dialog_intent_detection_mapper.DialogIntentDetectionMapper 方法) process_single() (data_juicer.ops.mapper.dialog_sentiment_detection_mapper.DialogSentimentDetectionMapper 方法) process_single() (data_juicer.ops.mapper.dialog_sentiment_intensity_mapper.DialogSentimentIntensityMapper 方法) process_single() (data_juicer.ops.mapper.dialog_topic_detection_mapper.DialogTopicDetectionMapper 方法) process_single() (data_juicer.ops.mapper.DialogIntentDetectionMapper 方法) process_single() (data_juicer.ops.mapper.DialogSentimentDetectionMapper 方法) process_single() (data_juicer.ops.mapper.DialogSentimentIntensityMapper 方法) process_single() (data_juicer.ops.mapper.DialogTopicDetectionMapper 方法) process_single() (data_juicer.ops.mapper.Difference_Area_Generator_Mapper 方法) process_single() (data_juicer.ops.mapper.extract_entity_attribute_mapper.ExtractEntityAttributeMapper 方法) process_single() (data_juicer.ops.mapper.extract_entity_relation_mapper.ExtractEntityRelationMapper 方法) process_single() (data_juicer.ops.mapper.extract_keyword_mapper.ExtractKeywordMapper 方法) process_single() (data_juicer.ops.mapper.extract_nickname_mapper.ExtractNicknameMapper 方法) process_single() (data_juicer.ops.mapper.extract_support_text_mapper.ExtractSupportTextMapper 方法) process_single() (data_juicer.ops.mapper.extract_tables_from_html_mapper.ExtractTablesFromHtmlMapper 方法) process_single() (data_juicer.ops.mapper.ExtractEntityAttributeMapper 方法) process_single() (data_juicer.ops.mapper.ExtractEntityRelationMapper 方法) process_single() (data_juicer.ops.mapper.ExtractKeywordMapper 方法) process_single() (data_juicer.ops.mapper.ExtractNicknameMapper 方法) process_single() (data_juicer.ops.mapper.ExtractSupportTextMapper 方法) process_single() (data_juicer.ops.mapper.ExtractTablesFromHtmlMapper 方法) process_single() (data_juicer.ops.mapper.generate_qa_from_examples_mapper.GenerateQAFromExamplesMapper 方法) process_single() (data_juicer.ops.mapper.GenerateQAFromExamplesMapper 方法) process_single() (data_juicer.ops.mapper.image_blur_mapper.ImageBlurMapper 方法) process_single() (data_juicer.ops.mapper.image_face_blur_mapper.ImageFaceBlurMapper 方法) process_single() (data_juicer.ops.mapper.image_remove_background_mapper.ImageRemoveBackgroundMapper 方法) process_single() (data_juicer.ops.mapper.image_segment_mapper.ImageSegmentMapper 方法) process_single() (data_juicer.ops.mapper.image_tagging_mapper.ImageTaggingMapper 方法) process_single() (data_juicer.ops.mapper.ImageBlurMapper 方法) process_single() (data_juicer.ops.mapper.ImageFaceBlurMapper 方法) process_single() (data_juicer.ops.mapper.ImageRemoveBackgroundMapper 方法) process_single() (data_juicer.ops.mapper.ImageSegmentMapper 方法) process_single() (data_juicer.ops.mapper.ImageTaggingMapper 方法) process_single() (data_juicer.ops.mapper.imgdiff_difference_area_generator_mapper.Difference_Area_Generator_Mapper 方法) process_single() (data_juicer.ops.mapper.imgdiff_difference_caption_generator_mapper.Difference_Caption_Generator_Mapper 方法) process_single() (data_juicer.ops.mapper.mllm_mapper.MllmMapper 方法) process_single() (data_juicer.ops.mapper.MllmMapper 方法) process_single() (data_juicer.ops.mapper.optimize_qa_mapper.OptimizeQAMapper 方法) process_single() (data_juicer.ops.mapper.OptimizeQAMapper 方法) process_single() (data_juicer.ops.mapper.pair_preference_mapper.PairPreferenceMapper 方法) process_single() (data_juicer.ops.mapper.PairPreferenceMapper 方法) process_single() (data_juicer.ops.mapper.python_file_mapper.PythonFileMapper 方法) process_single() (data_juicer.ops.mapper.python_lambda_mapper.PythonLambdaMapper 方法) process_single() (data_juicer.ops.mapper.PythonFileMapper 方法) process_single() (data_juicer.ops.mapper.PythonLambdaMapper 方法) process_single() (data_juicer.ops.mapper.relation_identity_mapper.RelationIdentityMapper 方法) process_single() (data_juicer.ops.mapper.RelationIdentityMapper 方法) process_single() (data_juicer.ops.mapper.sdxl_prompt2prompt_mapper.SDXLPrompt2PromptMapper 方法) process_single() (data_juicer.ops.mapper.SDXLPrompt2PromptMapper 方法) process_single() (data_juicer.ops.mapper.sentence_augmentation_mapper.SentenceAugmentationMapper 方法) process_single() (data_juicer.ops.mapper.SentenceAugmentationMapper 方法) process_single() (data_juicer.ops.mapper.video_extract_frames_mapper.VideoExtractFramesMapper 方法) process_single() (data_juicer.ops.mapper.video_face_blur_mapper.VideoFaceBlurMapper 方法) process_single() (data_juicer.ops.mapper.video_ffmpeg_wrapped_mapper.VideoFFmpegWrappedMapper 方法) process_single() (data_juicer.ops.mapper.video_remove_watermark_mapper.VideoRemoveWatermarkMapper 方法) process_single() (data_juicer.ops.mapper.video_resize_aspect_ratio_mapper.VideoResizeAspectRatioMapper 方法) process_single() (data_juicer.ops.mapper.video_resize_resolution_mapper.VideoResizeResolutionMapper 方法) process_single() (data_juicer.ops.mapper.video_split_by_scene_mapper.VideoSplitBySceneMapper 方法) process_single() (data_juicer.ops.mapper.video_tagging_from_audio_mapper.VideoTaggingFromAudioMapper 方法) process_single() (data_juicer.ops.mapper.video_tagging_from_frames_mapper.VideoTaggingFromFramesMapper 方法) process_single() (data_juicer.ops.mapper.VideoExtractFramesMapper 方法) process_single() (data_juicer.ops.mapper.VideoFaceBlurMapper 方法) process_single() (data_juicer.ops.mapper.VideoFFmpegWrappedMapper 方法) process_single() (data_juicer.ops.mapper.VideoRemoveWatermarkMapper 方法) process_single() (data_juicer.ops.mapper.VideoResizeAspectRatioMapper 方法) process_single() (data_juicer.ops.mapper.VideoResizeResolutionMapper 方法) process_single() (data_juicer.ops.mapper.VideoSplitBySceneMapper 方法) process_single() (data_juicer.ops.mapper.VideoTaggingFromAudioMapper 方法) process_single() (data_juicer.ops.mapper.VideoTaggingFromFramesMapper 方法) Prompt2PromptPipeline(data_juicer.ops.common.prompt2prompt_pipeline 中的类) PunctuationNormalizationMapper(data_juicer.ops.mapper 中的类) PunctuationNormalizationMapper(data_juicer.ops.mapper.punctuation_normalization_mapper 中的类) PythonFileMapper(data_juicer.ops.mapper 中的类) PythonFileMapper(data_juicer.ops.mapper.python_file_mapper 中的类) PythonLambdaMapper(data_juicer.ops.mapper 中的类) PythonLambdaMapper(data_juicer.ops.mapper.python_lambda_mapper 中的类) Q query_cuda_info()(在 data_juicer.utils.resource_utils 模块中) query_intent_label(data_juicer.utils.constant.MetaKeys 属性) query_intent_score(data_juicer.utils.constant.MetaKeys 属性) query_mem_info()(在 data_juicer.utils.resource_utils 模块中) query_most_relevant_entities() (data_juicer.ops.aggregator.most_relevant_entities_aggregator.MostRelevantEntitiesAggregator 方法) query_most_relevant_entities() (data_juicer.ops.aggregator.MostRelevantEntitiesAggregator 方法) query_sentiment_label(data_juicer.utils.constant.MetaKeys 属性) query_sentiment_score(data_juicer.utils.constant.MetaKeys 属性) query_topic_label(data_juicer.utils.constant.MetaKeys 属性) query_topic_score(data_juicer.utils.constant.MetaKeys 属性) QueryIntentDetectionMapper(data_juicer.ops.mapper 中的类) QueryIntentDetectionMapper(data_juicer.ops.mapper.query_intent_detection_mapper 中的类) QuerySentimentDetectionMapper(data_juicer.ops.mapper 中的类) QuerySentimentDetectionMapper(data_juicer.ops.mapper.query_sentiment_detection_mapper 中的类) QueryTopicDetectionMapper(data_juicer.ops.mapper 中的类) QueryTopicDetectionMapper(data_juicer.ops.mapper.query_topic_detection_mapper 中的类) R random_sample()(在 data_juicer.utils.sample 模块中) RandomSelector(data_juicer.ops.selector 中的类) RandomSelector(data_juicer.ops.selector.random_selector 中的类) RangeSpecifiedFieldSelector(data_juicer.ops.selector 中的类) RangeSpecifiedFieldSelector(data_juicer.ops.selector.range_specified_field_selector 中的类) RayBasicDeduplicator(data_juicer.ops.deduplicator 中的类) RayBasicDeduplicator(data_juicer.ops.deduplicator.ray_basic_deduplicator 中的类) RayBTSMinhashDeduplicator(data_juicer.ops.deduplicator 中的类) RayBTSMinhashDeduplicator(data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator 中的类) RayDataLoadStrategy(data_juicer.core.data.load_strategy 中的类) RayDataset(data_juicer.core.data.ray_dataset 中的类) RayDocumentDeduplicator(data_juicer.ops.deduplicator 中的类) RayDocumentDeduplicator(data_juicer.ops.deduplicator.ray_document_deduplicator 中的类) RayEmptyFormatter(data_juicer.format 中的类) RayEmptyFormatter(data_juicer.format.empty_formatter 中的类) RayExecutor(data_juicer.core.executor.ray_executor 中的类) RayHuggingfaceDataLoadStrategy(data_juicer.core.data.load_strategy 中的类) RayImageDeduplicator(data_juicer.ops.deduplicator 中的类) RayImageDeduplicator(data_juicer.ops.deduplicator.ray_image_deduplicator 中的类) RayLocalJsonDataLoadStrategy(data_juicer.core.data.load_strategy 中的类) RayVideoDeduplicator(data_juicer.ops.deduplicator 中的类) RayVideoDeduplicator(data_juicer.ops.deduplicator.ray_video_deduplicator 中的类) read_json()(data_juicer.core.data.ray_dataset.RayDataset 类方法) read_json_stream()(在 data_juicer.core.data.ray_dataset 模块中) read_single_partition()(在 data_juicer.utils.file_utils 模块中) rebalancing() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.BTSUnionFind 方法) record() (data_juicer.utils.ckpt_utils.CheckpointManager 方法) recursive_summary() (data_juicer.ops.aggregator.nested_aggregator.NestedAggregator 方法) recursive_summary() (data_juicer.ops.aggregator.NestedAggregator 方法) recursively_chunk() (data_juicer.ops.mapper.text_chunk_mapper.TextChunkMapper 方法) recursively_chunk() (data_juicer.ops.mapper.TextChunkMapper 方法) redirect_sys_output()(在 data_juicer.utils.logger_utils 模块中) RedisBackend(data_juicer.ops.deduplicator.ray_basic_deduplicator 中的类) refine_single_column() (data_juicer.analysis.overall_analysis.OverallAnalysis 方法) refine_single_column() (data_juicer.analysis.OverallAnalysis 方法) refined_words(data_juicer.utils.constant.InterVars 属性) register()(data_juicer.core.data.data_validator.DataValidatorRegistry 类方法) register()(data_juicer.core.data.load_strategy.DataLoadStrategyRegistry 类方法) register_attention_control() (data_juicer.ops.common.prompt2prompt_pipeline.Prompt2PromptPipeline 方法) register_event_handler() (data_juicer.ops.mixins.EventDrivenMixin 方法) register_module() (data_juicer.utils.registry.Registry 方法) Registry(data_juicer.utils.registry 中的类) RelatedTTestMeasure(data_juicer.analysis.measure 中的类) relation_description(data_juicer.utils.constant.MetaKeys 属性) relation_keywords(data_juicer.utils.constant.MetaKeys 属性) relation_strength(data_juicer.utils.constant.MetaKeys 属性) RelationIdentityMapper(data_juicer.ops.mapper 中的类) RelationIdentityMapper(data_juicer.ops.mapper.relation_identity_mapper 中的类) relation(data_juicer.utils.constant.MetaKeys 属性) relevant_characters(data_juicer.utils.constant.MetaKeys 属性) RemoteFormatter(data_juicer.format 中的类) RemoteFormatter(data_juicer.format.formatter 中的类) remove_columns() (data_juicer.core.data.dj_dataset.NestedDataset 方法) remove_columns() (data_juicer.core.data.NestedDataset 方法) remove_columns() (data_juicer.core.NestedDataset 方法) remove_extra_parameters() (data_juicer.ops.base_op.OP 方法) remove_non_special_tokens()(在 data_juicer.utils.mm_utils 模块中) remove_punctuation()(在 data_juicer.ops.filter.phrase_grounding_recall_filter 模块中) remove_special_tokens()(在 data_juicer.utils.mm_utils 模块中) RemoveBibliographyMapper(data_juicer.ops.mapper 中的类) RemoveBibliographyMapper(data_juicer.ops.mapper.remove_bibliography_mapper 中的类) RemoveCommentsMapper(data_juicer.ops.mapper 中的类) RemoveCommentsMapper(data_juicer.ops.mapper.remove_comments_mapper 中的类) RemoveHeaderMapper(data_juicer.ops.mapper 中的类) RemoveHeaderMapper(data_juicer.ops.mapper.remove_header_mapper 中的类) RemoveLongWordsMapper(data_juicer.ops.mapper 中的类) RemoveLongWordsMapper(data_juicer.ops.mapper.remove_long_words_mapper 中的类) RemoveNonChineseCharacterlMapper(data_juicer.ops.mapper 中的类) RemoveNonChineseCharacterlMapper(data_juicer.ops.mapper.remove_non_chinese_character_mapper 中的类) RemoveRepeatSentencesMapper(data_juicer.ops.mapper 中的类) RemoveRepeatSentencesMapper(data_juicer.ops.mapper.remove_repeat_sentences_mapper 中的类) RemoveSpecificCharsMapper(data_juicer.ops.mapper 中的类) RemoveSpecificCharsMapper(data_juicer.ops.mapper.remove_specific_chars_mapper 中的类) RemoveTableTextMapper(data_juicer.ops.mapper 中的类) RemoveTableTextMapper(data_juicer.ops.mapper.remove_table_text_mapper 中的类) RemoveWordsWithIncorrectSubstringsMapper(data_juicer.ops.mapper 中的类) RemoveWordsWithIncorrectSubstringsMapper(data_juicer.ops.mapper.remove_words_with_incorrect_substrings_mapper 中的类) replace_cross_attention() (data_juicer.ops.common.prompt2prompt_pipeline.AttentionControlEdit 方法) replace_cross_attention() (data_juicer.ops.common.prompt2prompt_pipeline.AttentionRefine 方法) replace_cross_attention() (data_juicer.ops.common.prompt2prompt_pipeline.AttentionReplace 方法) replace_cross_attention() (data_juicer.ops.common.prompt2prompt_pipeline.AttentionReweight 方法) replace_func()(在 data_juicer.ops.mapper.video_split_by_scene_mapper 模块中) replace_self_attention() (data_juicer.ops.common.prompt2prompt_pipeline.AttentionControlEdit 方法) ReplaceContentMapper(data_juicer.ops.mapper 中的类) ReplaceContentMapper(data_juicer.ops.mapper.replace_content_mapper 中的类) RequiredFieldsValidator(data_juicer.core.data.data_validator 中的类) rescale()(在 data_juicer.ops.mapper.video_resize_aspect_ratio_mapper 模块中) rescale_noise_cfg()(在 data_juicer.ops.common.prompt2prompt_pipeline 模块中) reset() (data_juicer.ops.common.prompt2prompt_pipeline.AttentionControl 方法) reset() (data_juicer.ops.common.prompt2prompt_pipeline.AttentionStore 方法) reset_dependencies_cache()(data_juicer.utils.lazy_loader.LazyLoader 类方法) resource_monitor()(在 data_juicer.core.monitor 模块中) rewrite_cli_datapath()(在 data_juicer.core.data.dataset_builder 模块中) role_relation(data_juicer.utils.constant.MetaKeys 属性) run() (data_juicer.core.Analyzer 方法) run() (data_juicer.core.analyzer.Analyzer 方法) run() (data_juicer.core.DefaultExecutor 方法) run() (data_juicer.core.executor.base.ExecutorBase 方法) run() (data_juicer.core.executor.default_executor.DefaultExecutor 方法) run() (data_juicer.core.executor.DefaultExecutor 方法) run() (data_juicer.core.executor.ExecutorBase 方法) run() (data_juicer.core.executor.ray_executor.RayExecutor 方法) run() (data_juicer.core.ExecutorBase 方法) run() (data_juicer.ops.Aggregator 方法) run() (data_juicer.ops.base_op.Aggregator 方法) run() (data_juicer.ops.base_op.Deduplicator 方法) run() (data_juicer.ops.base_op.Filter 方法) run() (data_juicer.ops.base_op.Grouper 方法) run() (data_juicer.ops.base_op.Mapper 方法) run() (data_juicer.ops.base_op.OP 方法) run() (data_juicer.ops.base_op.Selector 方法) run() (data_juicer.ops.Deduplicator 方法) run() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.RayBTSMinhashDeduplicator 方法) run() (data_juicer.ops.deduplicator.RayBTSMinhashDeduplicator 方法) run() (data_juicer.ops.Filter 方法) run() (data_juicer.ops.Grouper 方法) run() (data_juicer.ops.Mapper 方法) run() (data_juicer.ops.op_fusion.GeneralFusedOP 方法) run() (data_juicer.ops.Selector 方法) run_ner()(在 data_juicer.ops.filter.phrase_grounding_recall_filter 模块中) run_single_op() (data_juicer.utils.unittest_utils.DataJuicerTestCaseBase 方法) runtime_np() (data_juicer.ops.base_op.OP 方法) S sample_data() (data_juicer.core.DefaultExecutor 方法) sample_data() (data_juicer.core.executor.default_executor.DefaultExecutor 方法) sample_data() (data_juicer.core.executor.DefaultExecutor 方法) sampled_frames(data_juicer.utils.constant.InterVars 属性) save_ckpt() (data_juicer.utils.ckpt_utils.CheckpointManager 方法) schema() (data_juicer.core.data.dj_dataset.DJDataset 方法) schema() (data_juicer.core.data.dj_dataset.NestedDataset 方法) schema() (data_juicer.core.data.DJDataset 方法) schema() (data_juicer.core.data.NestedDataset 方法) schema() (data_juicer.core.data.ray_dataset.RayDataset 方法) schema() (data_juicer.core.NestedDataset 方法) Schema(data_juicer.core.data.schema 中的类) ScoreParams(data_juicer.ops.common.prompt2prompt_pipeline 中的类) SDXLPrompt2PromptMapper(data_juicer.ops.mapper 中的类) SDXLPrompt2PromptMapper(data_juicer.ops.mapper.sdxl_prompt2prompt_mapper 中的类) select() (data_juicer.core.data.dj_dataset.NestedDataset 方法) select() (data_juicer.core.data.NestedDataset 方法) select() (data_juicer.core.NestedDataset 方法) select_columns() (data_juicer.core.data.dj_dataset.NestedDataset 方法) select_columns() (data_juicer.core.data.NestedDataset 方法) select_columns() (data_juicer.core.NestedDataset 方法) Selector(data_juicer.ops 中的类) Selector(data_juicer.ops.base_op 中的类) send_notification() (data_juicer.ops.mixins.NotificationMixin 方法) SentenceAugmentationMapper(data_juicer.ops.mapper 中的类) SentenceAugmentationMapper(data_juicer.ops.mapper.sentence_augmentation_mapper 中的类) SentenceSplitMapper(data_juicer.ops.mapper 中的类) SentenceSplitMapper(data_juicer.ops.mapper.sentence_split_mapper 中的类) separate_signal_noise()(在 data_juicer.ops.filter.audio_nmf_snr_filter 模块中) set_clear_model_flag()(在 data_juicer.utils.unittest_utils 模块中) set_dataset_to_absolute_path()(在 data_juicer.core.data.ray_dataset 模块中) set_edge_buffer() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.BTSUnionFind 方法) set_edges() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.EdgeBuffer 方法) setup_logger()(在 data_juicer.utils.logger_utils 模块中) setup_model() (data_juicer.ops.filter.video_motion_score_filter.VideoMotionScoreFilter 方法) setup_model() (data_juicer.ops.filter.video_motion_score_raft_filter.VideoMotionScoreRaftFilter 方法) setup_model() (data_juicer.ops.filter.VideoMotionScoreFilter 方法) setup_model() (data_juicer.ops.filter.VideoMotionScoreRaftFilter 方法) setup_mp()(在 data_juicer.utils.process_utils 模块中) setup_project() (data_juicer.ops.mapper.annotation.annotation_mapper.LabelStudioAnnotationMapper 方法) setup_resource_aliases()(在 data_juicer.utils.nltk_utils 模块中) setUpClass()(data_juicer.utils.unittest_utils.DataJuicerTestCaseBase 类方法) sha1_hash32()(在 data_juicer.ops.deduplicator.document_minhash_deduplicator 模块中) should_keep_long_word() (data_juicer.ops.mapper.remove_long_words_mapper.RemoveLongWordsMapper 方法) should_keep_long_word() (data_juicer.ops.mapper.RemoveLongWordsMapper 方法) should_keep_word_with_incorrect_substrings() (data_juicer.ops.mapper.remove_words_with_incorrect_substrings_mapper.RemoveWordsWithIncorrectSubstringsMapper 方法) should_keep_word_with_incorrect_substrings() (data_juicer.ops.mapper.RemoveWordsWithIncorrectSubstringsMapper 方法) simhash(data_juicer.utils.constant.HashKeys 属性) single_partition_write_with_filename()(在 data_juicer.utils.file_utils 模块中) size_to_bytes()(在 data_juicer.utils.mm_utils 模块中) sort_op_by_types_and_names()(在 data_juicer.config.config 模块中) source_entity(data_juicer.utils.constant.MetaKeys 属性) source_file(data_juicer.utils.constant.Fields 属性) special_char_ratio(data_juicer.utils.constant.StatsKeysConstant 属性) SpecialCharactersFilter(data_juicer.ops.filter 中的类) SpecialCharactersFilter(data_juicer.ops.filter.special_characters_filter 中的类) SpecialTokens(data_juicer.utils.mm_utils 中的类) SpecifiedFieldFilter(data_juicer.ops.filter 中的类) SpecifiedFieldFilter(data_juicer.ops.filter.specified_field_filter 中的类) SpecifiedNumericFieldFilter(data_juicer.ops.filter 中的类) SpecifiedNumericFieldFilter(data_juicer.ops.filter.specified_numeric_field_filter 中的类) split_on_newline_tab_whitespace()(在 data_juicer.ops.common 模块中) split_on_newline_tab_whitespace()(在 data_juicer.ops.common.helper_func 模块中) split_on_whitespace()(在 data_juicer.ops.common 模块中) split_on_whitespace()(在 data_juicer.ops.common.helper_func 模块中) split_sentence()(在 data_juicer.ops.mapper.remove_repeat_sentences_mapper 模块中) split_text_by_punctuation()(在 data_juicer.ops.common 模块中) split_text_by_punctuation()(在 data_juicer.ops.common.helper_func 模块中) split_videos_by_duration() (data_juicer.ops.mapper.video_split_by_duration_mapper.VideoSplitByDurationMapper 方法) split_videos_by_duration() (data_juicer.ops.mapper.VideoSplitByDurationMapper 方法) squeeze() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.BTSUnionFind 方法) start_polling() (data_juicer.ops.mixins.EventDrivenMixin 方法) stats_to_hist()(data_juicer.analysis.measure.RelatedTTestMeasure 静态方法) stats_to_number()(在 data_juicer.utils.common_utils 模块中) StatsKeysConstant(data_juicer.utils.constant 中的类) StatsKeysMeta(data_juicer.utils.constant 中的类) StatsKeys(data_juicer.utils.constant 中的类) stats(data_juicer.utils.constant.Fields 属性) step_callback() (data_juicer.ops.common.prompt2prompt_pipeline.AttentionControl 方法) step_callback() (data_juicer.ops.common.prompt2prompt_pipeline.AttentionControlEdit 方法) stop_all_polling() (data_juicer.ops.mixins.EventDrivenMixin 方法) stop_polling() (data_juicer.ops.mixins.EventDrivenMixin 方法) stopwords_ratio(data_juicer.utils.constant.StatsKeysConstant 属性) StopWordsFilter(data_juicer.ops.filter 中的类) StopWordsFilter(data_juicer.ops.filter.stopwords_filter 中的类) StrategyKey(data_juicer.core.data.load_strategy 中的类) STRATEGY(data_juicer.ops.mapper.video_resize_aspect_ratio_mapper.VideoResizeAspectRatioMapper 属性) STRATEGY(data_juicer.ops.mapper.VideoResizeAspectRatioMapper 属性) StreamToLoguru(data_juicer.utils.logger_utils 中的类) strip()(在 data_juicer.ops.common 模块中) strip()(在 data_juicer.ops.common.helper_func 模块中) SUFFIXES(data_juicer.format.csv_formatter.CsvFormatter 属性) SUFFIXES(data_juicer.format.CsvFormatter 属性) SUFFIXES(data_juicer.format.empty_formatter.EmptyFormatter 属性) SUFFIXES(data_juicer.format.empty_formatter.RayEmptyFormatter 属性) SUFFIXES(data_juicer.format.EmptyFormatter 属性) SUFFIXES(data_juicer.format.json_formatter.JsonFormatter 属性) SUFFIXES(data_juicer.format.JsonFormatter 属性) SUFFIXES(data_juicer.format.parquet_formatter.ParquetFormatter 属性) SUFFIXES(data_juicer.format.ParquetFormatter 属性) SUFFIXES(data_juicer.format.RayEmptyFormatter 属性) SUFFIXES(data_juicer.format.text_formatter.TextFormatter 属性) SUFFIXES(data_juicer.format.TextFormatter 属性) SUFFIXES(data_juicer.format.tsv_formatter.TsvFormatter 属性) SUFFIXES(data_juicer.format.TsvFormatter 属性) SuffixFilter(data_juicer.ops.filter 中的类) SuffixFilter(data_juicer.ops.filter.suffix_filter 中的类) suffix(data_juicer.utils.constant.Fields 属性) support_text(data_juicer.utils.constant.MetaKeys 属性) SwiftMessagesValidator(data_juicer.core.data.data_validator 中的类) T TagsSpecifiedFieldSelector(data_juicer.ops.selector 中的类) TagsSpecifiedFieldSelector(data_juicer.ops.selector.tags_specified_field_selector 中的类) take_batch()(data_juicer.core.Adapter 静态方法) take_batch()(data_juicer.core.adapter.Adapter 静态方法) target_entity(data_juicer.utils.constant.MetaKeys 属性) tearDown()(data_juicer.utils.unittest_utils.DataJuicerTestCaseBase 类方法) tearDownClass()(data_juicer.utils.unittest_utils.DataJuicerTestCaseBase 类方法) TempDirManager(data_juicer.core.executor.ray_executor 中的类) TEST_TAG()(在 data_juicer.utils.unittest_utils 模块中) text_len(data_juicer.utils.constant.StatsKeysConstant 属性) text_pair_similarity(data_juicer.utils.constant.StatsKeysConstant 属性) TextActionFilter(data_juicer.ops.filter 中的类) TextActionFilter(data_juicer.ops.filter.text_action_filter 中的类) TextChunkMapper(data_juicer.ops.mapper 中的类) TextChunkMapper(data_juicer.ops.mapper.text_chunk_mapper 中的类) TextEntityDependencyFilter(data_juicer.ops.filter 中的类) TextEntityDependencyFilter(data_juicer.ops.filter.text_entity_dependency_filter 中的类) TextFormatter(data_juicer.format 中的类) TextFormatter(data_juicer.format.text_formatter 中的类) TextLengthFilter(data_juicer.ops.filter 中的类) TextLengthFilter(data_juicer.ops.filter.text_length_filter 中的类) TextPairSimilarityFilter(data_juicer.ops.filter 中的类) TextPairSimilarityFilter(data_juicer.ops.filter.text_pair_similarity_filter 中的类) TextTokenDistCollector(data_juicer.analysis.collector 中的类) TiB(data_juicer.core.Exporter 属性) TiB(data_juicer.core.exporter.Exporter 属性) timecode_string_to_seconds()(在 data_juicer.utils.mm_utils 模块中) to_json()(data_juicer.core.Exporter 静态方法) to_json()(data_juicer.core.exporter.Exporter 静态方法) to_jsonl()(data_juicer.core.Exporter 静态方法) to_jsonl()(data_juicer.core.exporter.Exporter 静态方法) to_parquet()(data_juicer.core.Exporter 静态方法) to_parquet()(data_juicer.core.exporter.Exporter 静态方法) TokenNumFilter(data_juicer.ops.filter 中的类) TokenNumFilter(data_juicer.ops.filter.token_num_filter 中的类) TopkSpecifiedFieldSelector(data_juicer.ops.selector 中的类) TopkSpecifiedFieldSelector(data_juicer.ops.selector.topk_specified_field_selector 中的类) trace_batch_mapper() (data_juicer.core.Tracer 方法) trace_batch_mapper() (data_juicer.core.tracer.Tracer 方法) trace_deduplicator() (data_juicer.core.Tracer 方法) trace_deduplicator() (data_juicer.core.tracer.Tracer 方法) trace_filter() (data_juicer.core.Tracer 方法) trace_filter() (data_juicer.core.tracer.Tracer 方法) trace_mapper() (data_juicer.core.Tracer 方法) trace_mapper() (data_juicer.core.tracer.Tracer 方法) Tracer(data_juicer.core 中的类) Tracer(data_juicer.core.tracer 中的类) transfer_data_dir()(在 data_juicer.utils.file_utils 模块中) transfer_filename()(在 data_juicer.utils.file_utils 模块中) triangle_area()(在 data_juicer.ops.filter.video_ocr_area_ratio_filter 模块中) trigger_event() (data_juicer.ops.mixins.EventDrivenMixin 方法) TsvFormatter(data_juicer.format 中的类) TsvFormatter(data_juicer.format.tsv_formatter 中的类) U uid(data_juicer.utils.constant.HashKeys 属性) unify_format()(在 data_juicer.format.formatter 模块中) union() (data_juicer.ops.common.helper_func.UnionFind 方法) union() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.BTSUnionFind 方法) union_list() (data_juicer.ops.deduplicator.ray_bts_minhash_deduplicator.BTSUnionFind 方法) UnionFind(data_juicer.ops.common.helper_func 中的类) update() (data_juicer.utils.fingerprint_utils.Hasher 方法) update_alpha_time_word()(在 data_juicer.ops.common.prompt2prompt_pipeline 模块中) update_args() (data_juicer.core.data.dj_dataset.NestedDataset 方法) update_args() (data_juicer.core.data.NestedDataset 方法) update_args() (data_juicer.core.NestedDataset 方法) update_ds_cache_dir_and_related_vars()(在 data_juicer.config.config 模块中) update_fingerprint()(在 data_juicer.utils.fingerprint_utils 模块中) update_op_attr()(在 data_juicer.config.config 模块中) update_op_process()(在 data_juicer.config.config 模块中) update_sampling_params()(在 data_juicer.utils.model_utils 模块中) use_cuda() (data_juicer.ops.base_op.OP 方法) V validate() (data_juicer.core.data.data_validator.BaseConversationValidator 方法) validate() (data_juicer.core.data.data_validator.CodeDataValidator 方法) validate() (data_juicer.core.data.data_validator.DataValidator 方法) validate() (data_juicer.core.data.data_validator.RequiredFieldsValidator 方法) validate_config() (data_juicer.core.data.config_validator.ConfigValidator 方法) validate_conversation() (data_juicer.core.data.data_validator.BaseConversationValidator 方法) validate_conversation() (data_juicer.core.data.data_validator.DataJuicerFormatValidator 方法) validate_conversation() (data_juicer.core.data.data_validator.SwiftMessagesValidator 方法) validate_snapshot_format()(在 data_juicer.download.downloader 模块中) video_aesthetic_score(data_juicer.utils.constant.StatsKeysConstant 属性) video_aspect_ratios(data_juicer.utils.constant.StatsKeysConstant 属性) video_audio_tags(data_juicer.utils.constant.MetaKeys 属性) video_duration(data_juicer.utils.constant.StatsKeysConstant 属性) video_frame_tags(data_juicer.utils.constant.MetaKeys 属性) video_frames_aesthetics_score(data_juicer.utils.constant.StatsKeysConstant 属性) video_frames_text_similarity(data_juicer.utils.constant.StatsKeysConstant 属性) video_frames(data_juicer.utils.constant.MetaKeys 属性) video_height(data_juicer.utils.constant.StatsKeysConstant 属性) video_motion_score(data_juicer.utils.constant.StatsKeysConstant 属性) video_nsfw_score(data_juicer.utils.constant.StatsKeysConstant 属性) video_ocr_area_ratio(data_juicer.utils.constant.StatsKeysConstant 属性) video_watermark_prob(data_juicer.utils.constant.StatsKeysConstant 属性) video_width(data_juicer.utils.constant.StatsKeysConstant 属性) VideoAestheticsFilter(data_juicer.ops.filter 中的类) VideoAestheticsFilter(data_juicer.ops.filter.video_aesthetics_filter 中的类) VideoAspectRatioFilter(data_juicer.ops.filter 中的类) VideoAspectRatioFilter(data_juicer.ops.filter.video_aspect_ratio_filter 中的类) VideoCaptioningFromAudioMapper(data_juicer.ops.mapper 中的类) VideoCaptioningFromAudioMapper(data_juicer.ops.mapper.video_captioning_from_audio_mapper 中的类) VideoCaptioningFromFramesMapper(data_juicer.ops.mapper 中的类) VideoCaptioningFromFramesMapper(data_juicer.ops.mapper.video_captioning_from_frames_mapper 中的类) VideoCaptioningFromSummarizerMapper(data_juicer.ops.mapper 中的类) VideoCaptioningFromSummarizerMapper(data_juicer.ops.mapper.video_captioning_from_summarizer_mapper 中的类) VideoCaptioningFromVideoMapper(data_juicer.ops.mapper 中的类) VideoCaptioningFromVideoMapper(data_juicer.ops.mapper.video_captioning_from_video_mapper 中的类) VideoCapture()(在 data_juicer.ops.filter.video_motion_score_filter 模块中) VideoDeduplicator(data_juicer.ops.deduplicator 中的类) VideoDeduplicator(data_juicer.ops.deduplicator.video_deduplicator 中的类) VideoDurationFilter(data_juicer.ops.filter 中的类) VideoDurationFilter(data_juicer.ops.filter.video_duration_filter 中的类) VideoExtractFramesMapper(data_juicer.ops.mapper 中的类) VideoExtractFramesMapper(data_juicer.ops.mapper.video_extract_frames_mapper 中的类) VideoFaceBlurMapper(data_juicer.ops.mapper 中的类) VideoFaceBlurMapper(data_juicer.ops.mapper.video_face_blur_mapper 中的类) VideoFFmpegWrappedMapper(data_juicer.ops.mapper 中的类) VideoFFmpegWrappedMapper(data_juicer.ops.mapper.video_ffmpeg_wrapped_mapper 中的类) VideoFramesTextSimilarityFilter(data_juicer.ops.filter 中的类) VideoFramesTextSimilarityFilter(data_juicer.ops.filter.video_frames_text_similarity_filter 中的类) videohash(data_juicer.utils.constant.HashKeys 属性) VideoMotionScoreFilter(data_juicer.ops.filter 中的类) VideoMotionScoreFilter(data_juicer.ops.filter.video_motion_score_filter 中的类) VideoMotionScoreRaftFilter(data_juicer.ops.filter 中的类) VideoMotionScoreRaftFilter(data_juicer.ops.filter.video_motion_score_raft_filter 中的类) VideoNSFWFilter(data_juicer.ops.filter 中的类) VideoNSFWFilter(data_juicer.ops.filter.video_nsfw_filter 中的类) VideoOcrAreaRatioFilter(data_juicer.ops.filter 中的类) VideoOcrAreaRatioFilter(data_juicer.ops.filter.video_ocr_area_ratio_filter 中的类) VideoRemoveWatermarkMapper(data_juicer.ops.mapper 中的类) VideoRemoveWatermarkMapper(data_juicer.ops.mapper.video_remove_watermark_mapper 中的类) VideoResizeAspectRatioMapper(data_juicer.ops.mapper 中的类) VideoResizeAspectRatioMapper(data_juicer.ops.mapper.video_resize_aspect_ratio_mapper 中的类) VideoResizeResolutionMapper(data_juicer.ops.mapper 中的类) VideoResizeResolutionMapper(data_juicer.ops.mapper.video_resize_resolution_mapper 中的类) VideoResolutionFilter(data_juicer.ops.filter 中的类) VideoResolutionFilter(data_juicer.ops.filter.video_resolution_filter 中的类) VideoSplitByDurationMapper(data_juicer.ops.mapper 中的类) VideoSplitByDurationMapper(data_juicer.ops.mapper.video_split_by_duration_mapper 中的类) VideoSplitByKeyFrameMapper(data_juicer.ops.mapper 中的类) VideoSplitByKeyFrameMapper(data_juicer.ops.mapper.video_split_by_key_frame_mapper 中的类) VideoSplitBySceneMapper(data_juicer.ops.mapper 中的类) VideoSplitBySceneMapper(data_juicer.ops.mapper.video_split_by_scene_mapper 中的类) VideoTaggingFromAudioMapper(data_juicer.ops.mapper 中的类) VideoTaggingFromAudioMapper(data_juicer.ops.mapper.video_tagging_from_audio_mapper 中的类) VideoTaggingFromFramesFilter(data_juicer.ops.filter 中的类) VideoTaggingFromFramesFilter(data_juicer.ops.filter.video_tagging_from_frames_filter 中的类) VideoTaggingFromFramesMapper(data_juicer.ops.mapper 中的类) VideoTaggingFromFramesMapper(data_juicer.ops.mapper.video_tagging_from_frames_mapper 中的类) VideoWatermarkFilter(data_juicer.ops.filter 中的类) VideoWatermarkFilter(data_juicer.ops.filter.video_watermark_filter 中的类) video(data_juicer.utils.mm_utils.SpecialTokens 属性) W wait_for_completion() (data_juicer.ops.mixins.EventDrivenMixin 方法) WhitespaceNormalizationMapper(data_juicer.ops.mapper 中的类) WhitespaceNormalizationMapper(data_juicer.ops.mapper.whitespace_normalization_mapper 中的类) WikipediaDownloader(data_juicer.download.wikipedia 中的类) WikipediaExtractor(data_juicer.download.wikipedia 中的类) WikipediaIterator(data_juicer.download.wikipedia 中的类) word_rep_ratio(data_juicer.utils.constant.StatsKeysConstant 属性) WordRepetitionFilter(data_juicer.ops.filter 中的类) WordRepetitionFilter(data_juicer.ops.filter.word_repetition_filter 中的类) words_augmentation()(在 data_juicer.ops.common 模块中) words_augmentation()(在 data_juicer.ops.common.helper_func 模块中) words_refinement()(在 data_juicer.ops.common 模块中) words_refinement()(在 data_juicer.ops.common.helper_func 模块中) WordsNumFilter(data_juicer.ops.filter 中的类) WordsNumFilter(data_juicer.ops.filter.words_num_filter 中的类) words(data_juicer.utils.constant.InterVars 属性) wrap_func_with_nested_access()(在 data_juicer.core.data 模块中) wrap_func_with_nested_access()(在 data_juicer.core.data.dj_dataset 模块中) write() (data_juicer.utils.logger_utils.StreamToLoguru 方法) Z ZstdCompressor(data_juicer.utils.compress 中的类)