data_juicer.ops.common.prompt2prompt_pipeline module

data_juicer.ops.common.prompt2prompt_pipeline.rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0)[source]

Rescale noise_cfg according to guidance_rescale. Based on findings of [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).

See Section 3.4

class data_juicer.ops.common.prompt2prompt_pipeline.Prompt2PromptPipeline(vae: AutoencoderKL, text_encoder: CLIPTextModel, text_encoder_2: CLIPTextModelWithProjection, tokenizer: CLIPTokenizer, tokenizer_2: CLIPTokenizer, unet: UNet2DConditionModel, scheduler: KarrasDiffusionSchedulers, image_encoder: CLIPVisionModelWithProjection = None, feature_extractor: CLIPImageProcessor = None, force_zeros_for_empty_prompt: bool = True, add_watermarker: bool | None = None)[source]

Bases: StableDiffusionXLPipeline

Args: Prompt-to-Prompt-Pipeline for text-to-image generation using Stable Diffusion. This model inherits from [StableDiffusionPipeline]. Check the superclass documentation

for the generic methods the library implements for

all the pipelines (such as downloading or saving, running on a particular device, etc.)

vae ([AutoencoderKL]):

Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.

text_encoder ([CLIPTextModel]):

Frozen text-encoder. Stable Diffusion uses the text portion of [CLIP](https://huggingface.co/docs/transformers/model_doc/ clip#transformers.CLIPTextModel), specifically the [clip-vit-large-patch14](https://huggingface.co/openai/ clip-vit-large-patch14) variant.

tokenizer (CLIPTokenizer):

Tokenizer of class [CLIPTokenizer](https://huggingface.co/docs/transformers/ v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).

unet ([UNet2DConditionModel]): Conditional U-Net architecture

to denoise the encoded image latents. scheduler

([SchedulerMixin]):
A scheduler to be used in combination with unet to denoise

the encoded image latents. Can be one of

[DDIMScheduler], [LMSDiscreteScheduler], or [PNDMScheduler].

safety_checker ([StableDiffusionSafetyChecker]):
Classification module that estimates whether generated

images could be considered offensive or harmful.

Please, refer to the [model card](https://huggingface.co/ runwayml/stable-diffusion-v1-5) for details.

feature_extractor ([CLIPFeatureExtractor]):
Model that extracts features from generated images to be

used as inputs for the safety_checker.

check_inputs(prompt, prompt_2, height, width, callback_steps, negative_prompt=None, negative_prompt_2=None, prompt_embeds=None, negative_prompt_embeds=None, pooled_prompt_embeds=None, negative_pooled_prompt_embeds=None)[source]
register_attention_control(controller)[source]
class data_juicer.ops.common.prompt2prompt_pipeline.P2PCrossAttnProcessor(controller, place_in_unet)[source]

Bases: object

__init__(controller, place_in_unet)[source]
class data_juicer.ops.common.prompt2prompt_pipeline.AttentionControl(attn_res=None)[source]

Bases: ABC

step_callback(x_t)[source]
between_steps()[source]
property num_uncond_att_layers
abstractmethod forward(attn, is_cross: bool, place_in_unet: str)[source]
reset()[source]
__init__(attn_res=None)[source]
data_juicer.ops.common.prompt2prompt_pipeline.create_controller(prompts: List[str], cross_attention_kwargs: Dict, num_inference_steps: int, tokenizer, device, attn_res) AttentionControl[source]
class data_juicer.ops.common.prompt2prompt_pipeline.EmptyControl(attn_res=None)[source]

Bases: AttentionControl

forward(attn, is_cross: bool, place_in_unet: str)[source]
class data_juicer.ops.common.prompt2prompt_pipeline.AttentionStore(attn_res=None)[source]

Bases: AttentionControl

static get_empty_store()[source]
forward(attn, is_cross: bool, place_in_unet: str)[source]
between_steps()[source]
get_average_attention()[source]
reset()[source]
__init__(attn_res=None)[source]
class data_juicer.ops.common.prompt2prompt_pipeline.LocalBlend(prompts: List[str], words: [List[List[str]]], tokenizer, device, threshold=0.3, attn_res=None)[source]

Bases: object

__init__(prompts: List[str], words: [List[List[str]]], tokenizer, device, threshold=0.3, attn_res=None)[source]
class data_juicer.ops.common.prompt2prompt_pipeline.AttentionControlEdit(prompts, num_steps: int, cross_replace_steps: float | Tuple[float, float] | Dict[str, Tuple[float, float]], self_replace_steps: float | Tuple[float, float], local_blend: LocalBlend | None, tokenizer, device, attn_res=None)[source]

Bases: AttentionStore, ABC

step_callback(x_t)[source]
replace_self_attention(attn_base, att_replace)[source]
abstractmethod replace_cross_attention(attn_base, att_replace)[source]
forward(attn, is_cross: bool, place_in_unet: str)[source]
__init__(prompts, num_steps: int, cross_replace_steps: float | Tuple[float, float] | Dict[str, Tuple[float, float]], self_replace_steps: float | Tuple[float, float], local_blend: LocalBlend | None, tokenizer, device, attn_res=None)[source]
class data_juicer.ops.common.prompt2prompt_pipeline.AttentionReplace(prompts, num_steps: int, cross_replace_steps: float, self_replace_steps: float, local_blend: LocalBlend | None = None, tokenizer=None, device=None, attn_res=None)[source]

Bases: AttentionControlEdit

replace_cross_attention(attn_base, att_replace)[source]
__init__(prompts, num_steps: int, cross_replace_steps: float, self_replace_steps: float, local_blend: LocalBlend | None = None, tokenizer=None, device=None, attn_res=None)[source]
class data_juicer.ops.common.prompt2prompt_pipeline.AttentionRefine(prompts, num_steps: int, cross_replace_steps: float, self_replace_steps: float, local_blend: LocalBlend | None = None, tokenizer=None, device=None, attn_res=None)[source]

Bases: AttentionControlEdit

replace_cross_attention(attn_base, att_replace)[source]
__init__(prompts, num_steps: int, cross_replace_steps: float, self_replace_steps: float, local_blend: LocalBlend | None = None, tokenizer=None, device=None, attn_res=None)[source]
class data_juicer.ops.common.prompt2prompt_pipeline.AttentionReweight(prompts, num_steps: int, cross_replace_steps: float, self_replace_steps: float, equalizer, local_blend: LocalBlend | None = None, controller: AttentionControlEdit | None = None, tokenizer=None, device=None, attn_res=None)[source]

Bases: AttentionControlEdit

replace_cross_attention(attn_base, att_replace)[source]
__init__(prompts, num_steps: int, cross_replace_steps: float, self_replace_steps: float, equalizer, local_blend: LocalBlend | None = None, controller: AttentionControlEdit | None = None, tokenizer=None, device=None, attn_res=None)[source]
data_juicer.ops.common.prompt2prompt_pipeline.update_alpha_time_word(alpha, bounds: float | Tuple[float, float], prompt_ind: int, word_inds: Tensor | None = None)[source]
data_juicer.ops.common.prompt2prompt_pipeline.get_time_words_attention_alpha(prompts, num_steps, cross_replace_steps: float | Dict[str, Tuple[float, float]], tokenizer, max_num_words=77)[source]
data_juicer.ops.common.prompt2prompt_pipeline.get_word_inds(text: str, word_place: int, tokenizer)[source]
data_juicer.ops.common.prompt2prompt_pipeline.get_replacement_mapper_(x: str, y: str, tokenizer, max_len=77)[source]
data_juicer.ops.common.prompt2prompt_pipeline.get_replacement_mapper(prompts, tokenizer, max_len=77)[source]
data_juicer.ops.common.prompt2prompt_pipeline.get_equalizer(text: str, word_select: int | Tuple[int, ...], values: List[float] | Tuple[float, ...], tokenizer)[source]
class data_juicer.ops.common.prompt2prompt_pipeline.ScoreParams(gap, match, mismatch)[source]

Bases: object

__init__(gap, match, mismatch)[source]
mis_match_char(x, y)[source]
data_juicer.ops.common.prompt2prompt_pipeline.get_matrix(size_x, size_y, gap)[source]
data_juicer.ops.common.prompt2prompt_pipeline.get_traceback_matrix(size_x, size_y)[source]
data_juicer.ops.common.prompt2prompt_pipeline.global_align(x, y, score)[source]
data_juicer.ops.common.prompt2prompt_pipeline.get_aligned_sequences(x, y, trace_back)[source]
data_juicer.ops.common.prompt2prompt_pipeline.get_mapper(x: str, y: str, tokenizer, max_len=77)[source]
data_juicer.ops.common.prompt2prompt_pipeline.get_refinement_mapper(prompts, tokenizer, max_len=77)[source]