alexnasa commited on
Commit
d73c075
Β·
verified Β·
1 Parent(s): 87c1890

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +150 -58
app.py CHANGED
@@ -3,32 +3,105 @@ import subprocess
3
  import os
4
  import shutil
5
  from pathlib import Path
6
- from PIL import Image
7
  import spaces
8
 
9
- # -----------------------------------------------------------------------------
10
  # CONFIGURE THESE PATHS TO MATCH YOUR PROJECT STRUCTURE
11
- # -----------------------------------------------------------------------------
12
 
13
  INPUT_DIR = "samples"
14
  OUTPUT_DIR = "inference_results/coz_vlmprompt"
15
 
16
- # -----------------------------------------------------------------------------
17
- # HELPER FUNCTION TO RUN INFERENCE AND RETURN THE OUTPUT IMAGE PATHS
18
- # -----------------------------------------------------------------------------
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  @spaces.GPU(duration=120)
21
  def run_with_upload(uploaded_image_path, upscale_option):
22
  """
23
  1) Clear INPUT_DIR
24
  2) Save the uploaded file as input.png in INPUT_DIR
25
- 3) Read `upscale_option` (e.g. "1x", "2x", "4x") β†’ turn it into "1", "2", or "4"
26
  4) Call inference_coz.py with `--upscale <that_value>`
27
  5) Return the FOUR output‐PNG file‐paths as a Python list, so that Gradio's Gallery
28
- can display them and we can click on each one later.
29
  """
 
 
 
30
 
31
- # 1) Make sure INPUT_DIR exists; if it does, delete everything inside.
32
  os.makedirs(INPUT_DIR, exist_ok=True)
33
  for fn in os.listdir(INPUT_DIR):
34
  full_path = os.path.join(INPUT_DIR, fn)
@@ -40,7 +113,6 @@ def run_with_upload(uploaded_image_path, upscale_option):
40
  except Exception as e:
41
  print(f"Warning: could not delete {full_path}: {e}")
42
 
43
- # 2) Copy the uploaded image into INPUT_DIR.
44
  if uploaded_image_path is None:
45
  return []
46
  try:
@@ -55,7 +127,6 @@ def run_with_upload(uploaded_image_path, upscale_option):
55
  print(f"Error: could not save as PNG: {e}")
56
  return []
57
 
58
- # 3) Build and run your inference_coz.py command.
59
  upscale_value = upscale_option.replace("x", "") # e.g. "2x" β†’ "2"
60
  cmd = [
61
  "python", "inference_coz.py",
@@ -76,52 +147,34 @@ def run_with_upload(uploaded_image_path, upscale_option):
76
  print("Inference failed:", err)
77
  return []
78
 
79
- # -------------------------------------------------------------------------
80
- # 4) After inference, gather the four numbered PNGs and return their paths
81
- # -------------------------------------------------------------------------
82
  per_sample_dir = os.path.join(OUTPUT_DIR, "per-sample", "input")
83
- # We expect 1.png, 2.png, 3.png, 4.png in that folder
84
  expected_files = [
85
  os.path.join(per_sample_dir, f"{i}.png")
86
  for i in range(1, 5)
87
  ]
88
-
89
- # Verify they exist; if any is missing, return an empty list
90
  for fp in expected_files:
91
  if not os.path.isfile(fp):
92
  print(f"Warning: expected file not found: {fp}")
93
  return []
94
-
95
- # Return the list of file‐paths (strings). Gradio's Gallery will display them.
96
  return expected_files
97
 
98
 
99
- # -----------------------------------------------------------------------------
100
- # HELPER: Given a selected image PATH, read the matching .txt in .../txt/
101
- # -----------------------------------------------------------------------------
102
-
103
  def get_caption(src_gallery, evt: gr.SelectData):
104
- selected_image_path = src_gallery[evt.index][0]
105
-
106
  """
107
- Gradio will pass in something like '/full/path/to/inference_results/coz_vlmprompt/per-sample/input/2.png'.
108
- We want to replace '2.png' β†’ '2.txt' and look under '.../per-sample/input/txt/2.txt'.
109
- Return the text contents (or a default message if not found).
110
  """
111
- if not selected_image_path or not os.path.isfile(selected_image_path):
112
  return "No caption available."
113
 
114
- # Extract just the base name, e.g. '2.png' β†’ '2'
115
- base = os.path.basename(selected_image_path) # e.g. '2.png'
116
- stem = os.path.splitext(base)[0] # e.g. '2'
117
-
118
- # Construct the .txt filename under the 'txt' subdirectory:
119
  txt_folder = os.path.join(OUTPUT_DIR, "per-sample", "input", "txt")
120
  txt_path = os.path.join(txt_folder, f"{int(stem) - 1}.txt")
121
 
122
  if not os.path.isfile(txt_path):
123
  return f"Caption file not found: {int(stem) - 1}.txt"
124
-
125
  try:
126
  with open(txt_path, "r", encoding="utf-8") as f:
127
  caption = f.read().strip()
@@ -130,9 +183,9 @@ def get_caption(src_gallery, evt: gr.SelectData):
130
  return f"Error reading caption: {e}"
131
 
132
 
133
- # -------------------------------------------------------------
134
- # BUILD THE GRADIO INTERFACE
135
- # -------------------------------------------------------------
136
 
137
  css = """
138
  #col-container {
@@ -178,40 +231,79 @@ with gr.Blocks(css=css) as demo:
178
  # 3) Button to launch inference
179
  run_button = gr.Button("Chain-of-Zoom it")
180
 
 
 
 
 
 
 
 
 
181
  with gr.Column():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
- # 4) Gallery to display multiple output images
184
- output_gallery = gr.Gallery(
185
- label="Inference Results",
186
- show_label=True,
187
- elem_id="gallery",
188
- columns=[2], rows=[2]
189
- )
190
-
191
-
192
- # 5) Textbox under the gallery for showing captions
193
- caption_text = gr.Textbox(
194
- label="Caption",
195
- lines=4,
196
- placeholder="Click on any image above to see its caption here."
197
- )
198
-
199
- # Wire the button: when clicked, call run_with_upload(...) β†’ output_gallery
200
  run_button.click(
201
  fn=run_with_upload,
202
  inputs=[upload_image, upscale_radio],
203
  outputs=[output_gallery]
204
  )
205
 
206
- # Wire gallery clicks: when an image is clicked, run get_caption(...) β†’ caption_text
 
 
 
207
  output_gallery.select(
208
  fn=get_caption,
209
  inputs=[output_gallery],
210
  outputs=[caption_text]
211
  )
212
 
213
- # -----------------------------------------------------------------------------
214
  # START THE GRADIO SERVER
215
- # -----------------------------------------------------------------------------
216
 
217
  demo.launch(share=True)
 
3
  import os
4
  import shutil
5
  from pathlib import Path
6
+ from PIL import Image, ImageDraw
7
  import spaces
8
 
9
+ # ------------------------------------------------------------------
10
  # CONFIGURE THESE PATHS TO MATCH YOUR PROJECT STRUCTURE
11
+ # ------------------------------------------------------------------
12
 
13
  INPUT_DIR = "samples"
14
  OUTPUT_DIR = "inference_results/coz_vlmprompt"
15
 
16
+ # ------------------------------------------------------------------
17
+ # HELPER: Resize & center-crop to 512, preserving aspect ratio
18
+ # ------------------------------------------------------------------
19
 
20
+ def resize_and_center_crop(img: Image.Image, size: int) -> Image.Image:
21
+ """
22
+ Resize the input PIL image so that its shorter side == `size`,
23
+ then center-crop to exactly (size x size).
24
+ """
25
+ w, h = img.size
26
+ scale = size / min(w, h)
27
+ new_w, new_h = int(w * scale), int(h * scale)
28
+ img = img.resize((new_w, new_h), Image.LANCZOS)
29
+
30
+ left = (new_w - size) // 2
31
+ top = (new_h - size) // 2
32
+ return img.crop((left, top, left + size, top + size))
33
+
34
+
35
+ # ------------------------------------------------------------------
36
+ # HELPER: Draw four concentric, centered rectangles on a 512Γ—512 image
37
+ # ------------------------------------------------------------------
38
+
39
+ def make_preview_with_boxes(image_path: str, scale_option: str) -> Image.Image:
40
+ """
41
+ 1) Open the uploaded image from disk.
42
+ 2) Resize & center-crop it to exactly 512Γ—512.
43
+ 3) Depending on scale_option ("1x","2x","4x"), compute four rectangle sizes:
44
+ - "1x": [512, 512, 512, 512]
45
+ - "2x": [256, 128, 64, 32]
46
+ - "4x": [128, 64, 32, 16]
47
+ 4) Draw each of those four rectangles (outline only), all centered.
48
+ 5) Return the modified PIL image.
49
+ """
50
+ try:
51
+ orig = Image.open(image_path).convert("RGB")
52
+ except Exception as e:
53
+ # If something fails, return a plain 512Γ—512 gray image as fallback
54
+ fallback = Image.new("RGB", (512, 512), (200, 200, 200))
55
+ draw = ImageDraw.Draw(fallback)
56
+ draw.text((20, 20), f"Error:\n{e}", fill="red")
57
+ return fallback
58
+
59
+ # 1. Resize & center-crop to 512Γ—512
60
+ base = resize_and_center_crop(orig, 512) # now `base.size == (512,512)`
61
+
62
+ # 2. Determine the four box sizes
63
+ scale_int = int(scale_option.replace("x", "")) # e.g. "2x" -> 2
64
+ if scale_int == 1:
65
+ sizes = [512, 512, 512, 512]
66
+ else:
67
+ # For scale=2: sizes = [512//2, 512//(2*2), 512//(2*4), 512//(2*8)] -> [256,128,64,32]
68
+ # For scale=4: sizes = [512//4, 512//(4*2), 512//(4*4), 512//(4*8)] -> [128,64,32,16]
69
+ sizes = [512 // (scale_int * (2 ** i)) for i in range(4)]
70
+
71
+ draw = ImageDraw.Draw(base)
72
+
73
+ # 3. Outline color cycle (you can change these or use just one color)
74
+ colors = ["red", "lime", "cyan", "yellow"]
75
+ width = 3 # thickness of each rectangle’s outline
76
+
77
+ for idx, s in enumerate(sizes):
78
+ # Compute top-left corner so that box is centered in 512Γ—512
79
+ x0 = (512 - s) // 2
80
+ y0 = (512 - s) // 2
81
+ x1 = x0 + s
82
+ y1 = y0 + s
83
+ draw.rectangle([(x0, y0), (x1, y1)], outline=colors[idx % len(colors)], width=width)
84
+
85
+ return base
86
+
87
+
88
+ # ------------------------------------------------------------------
89
+ # HELPER FUNCTIONS FOR INFERENCE & CAPTION (unchanged from your original)
90
+ # ------------------------------------------------------------------
91
  @spaces.GPU(duration=120)
92
  def run_with_upload(uploaded_image_path, upscale_option):
93
  """
94
  1) Clear INPUT_DIR
95
  2) Save the uploaded file as input.png in INPUT_DIR
96
+ 3) Read `upscale_option` (e.g. "1x", "2x", "4x") β†’ turn it into "1","2","4"
97
  4) Call inference_coz.py with `--upscale <that_value>`
98
  5) Return the FOUR output‐PNG file‐paths as a Python list, so that Gradio's Gallery
99
+ can display them.
100
  """
101
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
102
+ # (Copy‐paste exactly your existing code here; no changes needed)
103
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
104
 
 
105
  os.makedirs(INPUT_DIR, exist_ok=True)
106
  for fn in os.listdir(INPUT_DIR):
107
  full_path = os.path.join(INPUT_DIR, fn)
 
113
  except Exception as e:
114
  print(f"Warning: could not delete {full_path}: {e}")
115
 
 
116
  if uploaded_image_path is None:
117
  return []
118
  try:
 
127
  print(f"Error: could not save as PNG: {e}")
128
  return []
129
 
 
130
  upscale_value = upscale_option.replace("x", "") # e.g. "2x" β†’ "2"
131
  cmd = [
132
  "python", "inference_coz.py",
 
147
  print("Inference failed:", err)
148
  return []
149
 
 
 
 
150
  per_sample_dir = os.path.join(OUTPUT_DIR, "per-sample", "input")
 
151
  expected_files = [
152
  os.path.join(per_sample_dir, f"{i}.png")
153
  for i in range(1, 5)
154
  ]
 
 
155
  for fp in expected_files:
156
  if not os.path.isfile(fp):
157
  print(f"Warning: expected file not found: {fp}")
158
  return []
 
 
159
  return expected_files
160
 
161
 
 
 
 
 
162
  def get_caption(src_gallery, evt: gr.SelectData):
 
 
163
  """
164
+ Given a clicked‐on image in the gallery, read the corresponding .txt in
165
+ .../per-sample/input/txt and return its contents.
 
166
  """
167
+ if not src_gallery or not os.path.isfile(src_gallery[evt.index][0]):
168
  return "No caption available."
169
 
170
+ selected_image_path = src_gallery[evt.index][0]
171
+ base = os.path.basename(selected_image_path) # e.g. "2.png"
172
+ stem = os.path.splitext(base)[0] # e.g. "2"
 
 
173
  txt_folder = os.path.join(OUTPUT_DIR, "per-sample", "input", "txt")
174
  txt_path = os.path.join(txt_folder, f"{int(stem) - 1}.txt")
175
 
176
  if not os.path.isfile(txt_path):
177
  return f"Caption file not found: {int(stem) - 1}.txt"
 
178
  try:
179
  with open(txt_path, "r", encoding="utf-8") as f:
180
  caption = f.read().strip()
 
183
  return f"Error reading caption: {e}"
184
 
185
 
186
+ # ------------------------------------------------------------------
187
+ # BUILD THE GRADIO INTERFACE (with updated callbacks)
188
+ # ------------------------------------------------------------------
189
 
190
  css = """
191
  #col-container {
 
231
  # 3) Button to launch inference
232
  run_button = gr.Button("Chain-of-Zoom it")
233
 
234
+ # 4) Show the 512Γ—512 preview with four centered rectangles
235
+ preview_with_box = gr.Image(
236
+ label="Preview (512Γ—512 with centered boxes)",
237
+ type="pil", # we’ll return a PIL.Image from our function
238
+ interactive=False
239
+ )
240
+
241
+
242
  with gr.Column():
243
+ # 5) Gallery to display multiple output images
244
+ output_gallery = gr.Gallery(
245
+ label="Inference Results",
246
+ show_label=True,
247
+ elem_id="gallery",
248
+ columns=[2], rows=[2]
249
+ )
250
+
251
+ # 6) Textbox under the gallery for showing captions
252
+ caption_text = gr.Textbox(
253
+ label="Caption",
254
+ lines=4,
255
+ placeholder="Click on any image above to see its caption here."
256
+ )
257
+
258
+ # ------------------------------------------------------------------
259
+ # CALLBACK #1: Whenever the user uploads or changes the radio, update preview
260
+ # ------------------------------------------------------------------
261
+
262
+ def update_preview(img_path, scale_opt):
263
+ """
264
+ If there's no image uploaded yet, return None (Gradio will show blank).
265
+ Otherwise, draw the resized 512Γ—512 + four boxes and return it.
266
+ """
267
+ if img_path is None:
268
+ return None
269
+ return make_preview_with_boxes(img_path, scale_opt)
270
+
271
+ # When the user uploads a new file:
272
+ upload_image.change(
273
+ fn=update_preview,
274
+ inputs=[upload_image, upscale_radio],
275
+ outputs=[preview_with_box]
276
+ )
277
+
278
+ # Also trigger preview redraw if they switch 1Γ—/2Γ—/4Γ— after uploading:
279
+ upscale_radio.change(
280
+ fn=update_preview,
281
+ inputs=[upload_image, upscale_radio],
282
+ outputs=[preview_with_box]
283
+ )
284
+
285
+ # ------------------------------------------------------------------
286
+ # CALLBACK #2: When β€œChain-of-Zoom it” is clicked, run inference
287
+ # ------------------------------------------------------------------
288
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  run_button.click(
290
  fn=run_with_upload,
291
  inputs=[upload_image, upscale_radio],
292
  outputs=[output_gallery]
293
  )
294
 
295
+ # ------------------------------------------------------------------
296
+ # CALLBACK #3: When an image in the gallery is clicked, show its caption
297
+ # ------------------------------------------------------------------
298
+
299
  output_gallery.select(
300
  fn=get_caption,
301
  inputs=[output_gallery],
302
  outputs=[caption_text]
303
  )
304
 
305
+ # ------------------------------------------------------------------
306
  # START THE GRADIO SERVER
307
+ # ------------------------------------------------------------------
308
 
309
  demo.launch(share=True)