bimsarapathiraja commited on
Commit
00e1a69
·
1 Parent(s): 9643ad3

Change desc

Browse files
UltraEdit/images/example_images/3ppl2.jpg ADDED
UltraEdit/images/example_images/4ppl1.jpg ADDED
UltraEdit/images/example_images/4ppl2.jpg ADDED
UltraEdit/images/example_images/bowls1.jpg ADDED
UltraEdit/images/example_images/cat1.jpg ADDED
UltraEdit/images/example_images/cat2.jpg ADDED
app.py CHANGED
@@ -40,10 +40,12 @@ def generate(image, prompt, num_inference_steps=50, image_guidance_scale=1.5, gu
40
 
41
  # Update the example list to remove mask-related entries
42
  example_lists = [
43
- ['UltraEdit/images/example_images/1-input.png', "Add a moon in the sky", 20, 1.5, 12.5, 255],
44
- ['UltraEdit/images/example_images/2-input.png', "Add cherry blossoms", 20, 1.5, 12.5, 255],
45
- ['UltraEdit/images/example_images/3-input.png', "Please dress her in a short purple wedding dress adorned with white floral embroidery.", 20, 1.5, 7.5, 255],
46
- ['UltraEdit/images/example_images/4-input.png', "Give her a chief's headdress.", 20, 1.5, 7.5, 24555]
 
 
47
  ]
48
 
49
  # Update the mask_ex_list to reflect the new example list structure
@@ -67,66 +69,57 @@ outputs = [gr.Image(label="Generated Image")]
67
  article_html = """
68
  <div style="text-align: center; max-width: 1000px; margin: 20px auto; font-family: Arial, sans-serif;">
69
  <h2 style="font-weight: 900; font-size: 2.5rem; margin-bottom: 0.5rem;">
70
- 🖼️ RefEdit-SD3 for Fine-Grained Image Editing
71
  </h2>
72
  <div style="margin-bottom: 1rem;">
73
  <h3 style="font-weight: 500; font-size: 1.25rem; margin: 0;"></h3>
74
  <p style="font-weight: 400; font-size: 1rem; margin: 0.5rem 0;">
75
- Haozhe Zhao<sup>1*</sup>, Xiaojian Ma<sup>2*</sup>, Liang Chen<sup>1</sup>, Shuzheng Si<sup>1</sup>, Rujie Wu<sup>1</sup>,
76
- Kaikai An<sup>1</sup>, Peiyu Yu<sup>3</sup>, Minjia Zhang<sup>4</sup>, Qing Li<sup>2</sup>, Baobao Chang<sup>2</sup>
77
  </p>
78
  <p style="font-weight: 400; font-size: 1rem; margin: 0;">
79
- <sup>1</sup>Peking University, <sup>2</sup>BIGAI, <sup>3</sup>UCLA, <sup>4</sup>UIUC
80
  </p>
81
  </div>
82
  <div style="margin: 1rem 0; display: flex; justify-content: center; gap: 1.5rem; flex-wrap: wrap;">
83
- <a href="https://huggingface.co/datasets/BleachNick/UltraEdit" style="display: flex; align-items: center; text-decoration: none; color: blue; font-weight: bold; gap: 0.5rem;">
84
  <img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" alt="Dataset_4M" style="height: 20px; vertical-align: middle;"> Dataset
85
  </a>
86
- <a href="https://huggingface.co/datasets/BleachNick/UltraEdit_500k" style="display: flex; align-items: center; text-decoration: none; color: blue; font-weight: bold; gap: 0.5rem;">
87
- <img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" alt="Dataset_500k" style="height: 20px; vertical-align: middle;"> Dataset_500k
88
- </a>
89
- <a href="https://ultra-editing.github.io/" style="display: flex; align-items: center; text-decoration: none; color: blue; font-weight: bold; gap: 0.5rem;">
90
  <span style="font-size: 20px; vertical-align: middle;">🔗</span> Page
91
  </a>
92
- <a href="https://huggingface.co/BleachNick/SD3_UltraEdit_w_mask" style="display: flex; align-items: center; text-decoration: none; color: blue; font-weight: bold; gap: 0.5rem;">
93
  <img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" alt="Checkpoint" style="height: 20px; vertical-align: middle;"> Checkpoint
94
  </a>
95
- <a href="https://github.com/HaozheZhao/UltraEdit" style="display: flex; align-items: center; text-decoration: none; color: blue; font-weight: bold; gap: 0.5rem;">
96
  <img src="https://upload.wikimedia.org/wikipedia/commons/9/91/Octicons-mark-github.svg" alt="GitHub" style="height: 20px; vertical-align: middle;"> GitHub
97
  </a>
98
  </div>
99
  <div style="text-align: left; margin: 0 auto; font-size: 1rem; line-height: 1.5;">
100
  <p>
101
- <b>UltraEdit</b> is a dataset designed for fine-grained, instruction-based image editing. It contains over 4 million free-form image editing samples and more than 100,000 region-based image editing samples, automatically generated with real images as anchors.
102
- </p>
103
- <p>
104
- This demo allows you to perform image editing using the <a href="https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers" style="color: blue; text-decoration: none;">Stable Diffusion 3</a> model trained with this extensive dataset. It supports both free-form (without mask) and region-based (with mask) image editing. Use the sliders to adjust the inference steps and guidance scales, and provide a seed for reproducibility. The image guidance scale of 1.5 and text guidance scale of 7.5 / 12.5 is a good start for free-form/region-based image editing.
105
  </p>
106
- <p>
107
- <b>Usage Instructions:</b> You need to upload the images and prompts for editing. Use the pen tool to mark the areas you want to edit. If no region is marked, it will resort to free-form editing.
108
- </p>
109
  </div>
110
  </div>
111
  """
112
- html = '''
113
- <div style="text-align: left; margin-top: 2rem; font-size: 0.85rem; color: gray;">
114
- <b>Limitations:</b>
115
- <ul>
116
- <li>We have not conducted any NSFW checks;</li>
117
- <li>Due to the bias of the generated models, the model performance is still weak when dealing with high-frequency information such as <b>human facial expressions or text in the images</b>;</li>
118
- <li>We unified the free-form and region-based image editing by adding an extra channel of the mask image to the dataset. When doing free-form image editing, the network receives a blank mask.</li>
119
- <li>The generation result is sensitive to the guidance scale. For text guidance, based on experience, free-form image editing will perform better with a relatively low guidance score (7.5 or lower), while region-based image editing will perform better with a higher guidance score.</li>
120
- </ul>
121
- </div>
122
- '''
123
 
124
  demo = gr.Interface(
125
  fn=generate,
126
  inputs=inputs,
127
  outputs=outputs,
128
  description=article_html,
129
- article=html,
130
  examples=mask_ex_list,
131
  cache_examples = True,
132
  live = False
 
40
 
41
  # Update the example list to remove mask-related entries
42
  example_lists = [
43
+ ['UltraEdit/images/example_images/4ppl2.jpg', "Add a flower on the t-shirt of the guy in the middle with dark jeans", 50, 1.5, 7.5, 255],
44
+ ['UltraEdit/images/example_images/cat2.jpg', "Add a green scarf to the right cat", 50, 1.5, 7.5, 255],
45
+ ['UltraEdit/images/example_images/3ppl2.jpg', "Add a flower bunch to the person with a red jacket", 50, 1.5, 7.5, 255],
46
+ ['UltraEdit/images/example_images/4ppl1.jpg', "Let the rightmost person wear a golden dress", 50, 1.5, 7.5, 255],
47
+ ['UltraEdit/images/example_images/bowls1.jpg', "Remove the bowl with some leaves in the middle", 50, 1.5, 7.5, 255],
48
+ ['UltraEdit/images/example_images/cat1.jpg', "Can we have a dog instead of the cat looking at the camera?", 50, 1.5, 7.5, 255],
49
  ]
50
 
51
  # Update the mask_ex_list to reflect the new example list structure
 
69
  article_html = """
70
  <div style="text-align: center; max-width: 1000px; margin: 20px auto; font-family: Arial, sans-serif;">
71
  <h2 style="font-weight: 900; font-size: 2.5rem; margin-bottom: 0.5rem;">
72
+ RefEdit-SD3 for Fine-Grained Image Editing
73
  </h2>
74
  <div style="margin-bottom: 1rem;">
75
  <h3 style="font-weight: 500; font-size: 1.25rem; margin: 0;"></h3>
76
  <p style="font-weight: 400; font-size: 1rem; margin: 0.5rem 0;">
77
+ Bimsara Pathiraja<sup>1*</sup>, Maitreya Patel<sup>2*</sup>, Shivam Singh, Yezhou Yang, Chitta Baral
 
78
  </p>
79
  <p style="font-weight: 400; font-size: 1rem; margin: 0;">
80
+ Arizona State University
81
  </p>
82
  </div>
83
  <div style="margin: 1rem 0; display: flex; justify-content: center; gap: 1.5rem; flex-wrap: wrap;">
84
+ <a href="https://huggingface.co/datasets/bpathir1/RefEdit" style="display: flex; align-items: center; text-decoration: none; color: blue; font-weight: bold; gap: 0.5rem;">
85
  <img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" alt="Dataset_4M" style="height: 20px; vertical-align: middle;"> Dataset
86
  </a>
87
+
88
+ <a href="https://refedit.vercel.app/" style="display: flex; align-items: center; text-decoration: none; color: blue; font-weight: bold; gap: 0.5rem;">
 
 
89
  <span style="font-size: 20px; vertical-align: middle;">🔗</span> Page
90
  </a>
91
+ <a href="https://huggingface.co/bpathir1/RefEdit-SD3" style="display: flex; align-items: center; text-decoration: none; color: blue; font-weight: bold; gap: 0.5rem;">
92
  <img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" alt="Checkpoint" style="height: 20px; vertical-align: middle;"> Checkpoint
93
  </a>
94
+ <a href="https://github.com/bimsarapathiraja/refedit_private" style="display: flex; align-items: center; text-decoration: none; color: blue; font-weight: bold; gap: 0.5rem;">
95
  <img src="https://upload.wikimedia.org/wikipedia/commons/9/91/Octicons-mark-github.svg" alt="GitHub" style="height: 20px; vertical-align: middle;"> GitHub
96
  </a>
97
  </div>
98
  <div style="text-align: left; margin: 0 auto; font-size: 1rem; line-height: 1.5;">
99
  <p>
100
+ <b>RefEdit</b> is a benchmark and method for improving instruction-based image editing models for referring expressions.
 
 
 
101
  </p>
 
 
 
102
  </div>
103
  </div>
104
  """
105
+ # html = '''
106
+ # <div style="text-align: left; margin-top: 2rem; font-size: 0.85rem; color: gray;">
107
+ # <b>Limitations:</b>
108
+ # <ul>
109
+ # <li>We have not conducted any NSFW checks;</li>
110
+ # <li>Due to the bias of the generated models, the model performance is still weak when dealing with high-frequency information such as <b>human facial expressions or text in the images</b>;</li>
111
+ # <li>We unified the free-form and region-based image editing by adding an extra channel of the mask image to the dataset. When doing free-form image editing, the network receives a blank mask.</li>
112
+ # <li>The generation result is sensitive to the guidance scale. For text guidance, based on experience, free-form image editing will perform better with a relatively low guidance score (7.5 or lower), while region-based image editing will perform better with a higher guidance score.</li>
113
+ # </ul>
114
+ # </div>
115
+ # '''
116
 
117
  demo = gr.Interface(
118
  fn=generate,
119
  inputs=inputs,
120
  outputs=outputs,
121
  description=article_html,
122
+ # article=html,
123
  examples=mask_ex_list,
124
  cache_examples = True,
125
  live = False