import spaces import torch from diffusers import StableDiffusion3InstructPix2PixPipeline import gradio as gr import PIL.Image import numpy as np from PIL import Image, ImageOps import os import transformers from transformers.utils.hub import move_cache transformers.utils.move_cache() move_cache() pipe = StableDiffusion3InstructPix2PixPipeline.from_pretrained("bpathir1/RefEdit-SD3", torch_dtype=torch.float16).to("cuda") @spaces.GPU(duration=120) def generate(image, prompt, num_inference_steps=50, image_guidance_scale=1.5, guidance_scale=7.5, seed=255): seed = int(seed) generator = torch.manual_seed(seed) img = image.convert("RGB") desired_size = (512, 512) img = ImageOps.fit(img, desired_size, method=Image.LANCZOS, centering=(0.5, 0.5)) image = pipe( prompt, image=img, mask_img=None, num_inference_steps=num_inference_steps, image_guidance_scale=image_guidance_scale, guidance_scale=guidance_scale, generator=generator ).images[0] return image # Update the example list to remove mask-related entries example_lists = [ ['UltraEdit/images/example_images/1-input.png', "Add a moon in the sky", 20, 1.5, 12.5, 255], ['UltraEdit/images/example_images/2-input.png', "Add cherry blossoms", 20, 1.5, 12.5, 255], ['UltraEdit/images/example_images/3-input.png', "Please dress her in a short purple wedding dress adorned with white floral embroidery.", 20, 1.5, 7.5, 255], ['UltraEdit/images/example_images/4-input.png', "Give her a chief's headdress.", 20, 1.5, 7.5, 24555] ] # Update the mask_ex_list to reflect the new example list structure mask_ex_list = [] for exp in example_lists: re_list = [exp[0], exp[1], exp[2], exp[3], exp[4], exp[5]] mask_ex_list.append(re_list) # Update the input for image upload to remove mask-related functionality image_input = gr.Image(type="pil", label="Input Image", transforms=()) prompt_input = gr.Textbox(label="Prompt") num_inference_steps_input = gr.Slider(minimum=0, maximum=100, value=50, label="Number of Inference Steps") image_guidance_scale_input = gr.Slider(minimum=0.0, maximum=2.5, value=1.5, label="Image Guidance Scale") guidance_scale_input = gr.Slider(minimum=0.0, maximum=17.5, value=12.5, label="Guidance Scale") seed_input = gr.Textbox(value="255", label="Random Seed") inputs = [image_input, prompt_input, num_inference_steps_input, image_guidance_scale_input, guidance_scale_input, seed_input] outputs = [gr.Image(label="Generated Image"), gr.Text(label="Editing Mode")] article_html = """

🖼️ UltraEdit for Fine-Grained Image Editing

Haozhe Zhao1*, Xiaojian Ma2*, Liang Chen1, Shuzheng Si1, Rujie Wu1, Kaikai An1, Peiyu Yu3, Minjia Zhang4, Qing Li2, Baobao Chang2

1Peking University, 2BIGAI, 3UCLA, 4UIUC

Dataset_4M Dataset Dataset_500k Dataset_500k 🔗 Page Checkpoint Checkpoint GitHub GitHub

UltraEdit is a dataset designed for fine-grained, instruction-based image editing. It contains over 4 million free-form image editing samples and more than 100,000 region-based image editing samples, automatically generated with real images as anchors.

This demo allows you to perform image editing using the Stable Diffusion 3 model trained with this extensive dataset. It supports both free-form (without mask) and region-based (with mask) image editing. Use the sliders to adjust the inference steps and guidance scales, and provide a seed for reproducibility. The image guidance scale of 1.5 and text guidance scale of 7.5 / 12.5 is a good start for free-form/region-based image editing.

Usage Instructions: You need to upload the images and prompts for editing. Use the pen tool to mark the areas you want to edit. If no region is marked, it will resort to free-form editing.

""" html = '''
Limitations:
''' demo = gr.Interface( fn=generate, inputs=inputs, outputs=outputs, description=article_html, article=html, examples=mask_ex_list, cache_examples = True, live = False ) demo.queue().launch()