Spaces:

alexnasa
/

Chain-of-Zoom

Running on Zero

App Files Files Community

alexnasa commited on May 31

Commit

d73c075

verified ·

1 Parent(s): 87c1890

Update app.py

Browse files

Files changed (1) hide show

app.py +150 -58

app.py CHANGED Viewed

@@ -3,32 +3,105 @@ import subprocess
 import os
 import shutil
 from pathlib import Path
-from PIL import Image
 import spaces
-# -----------------------------------------------------------------------------
 # CONFIGURE THESE PATHS TO MATCH YOUR PROJECT STRUCTURE
-# -----------------------------------------------------------------------------
 INPUT_DIR   = "samples"
 OUTPUT_DIR  = "inference_results/coz_vlmprompt"
-# -----------------------------------------------------------------------------
-# HELPER FUNCTION TO RUN INFERENCE AND RETURN THE OUTPUT IMAGE PATHS
-# -----------------------------------------------------------------------------
 @spaces.GPU(duration=120)
 def run_with_upload(uploaded_image_path, upscale_option):
     """
     1) Clear INPUT_DIR
     2) Save the uploaded file as input.png in INPUT_DIR
-    3) Read `upscale_option` (e.g. "1x", "2x", "4x") → turn it into "1", "2", or "4"
     4) Call inference_coz.py with `--upscale <that_value>`
     5) Return the FOUR output‐PNG file‐paths as a Python list, so that Gradio's Gallery
-       can display them and we can click on each one later.
     """
-    # 1) Make sure INPUT_DIR exists; if it does, delete everything inside.
     os.makedirs(INPUT_DIR, exist_ok=True)
     for fn in os.listdir(INPUT_DIR):
         full_path = os.path.join(INPUT_DIR, fn)
@@ -40,7 +113,6 @@ def run_with_upload(uploaded_image_path, upscale_option):
         except Exception as e:
             print(f"Warning: could not delete {full_path}: {e}")
-    # 2) Copy the uploaded image into INPUT_DIR.
     if uploaded_image_path is None:
         return []
     try:
@@ -55,7 +127,6 @@ def run_with_upload(uploaded_image_path, upscale_option):
         print(f"Error: could not save as PNG: {e}")
         return []
-    # 3) Build and run your inference_coz.py command.
     upscale_value = upscale_option.replace("x", "")  # e.g. "2x" → "2"
     cmd = [
         "python", "inference_coz.py",
@@ -76,52 +147,34 @@ def run_with_upload(uploaded_image_path, upscale_option):
         print("Inference failed:", err)
         return []
-    # -------------------------------------------------------------------------
-    # 4) After inference, gather the four numbered PNGs and return their paths
-    # -------------------------------------------------------------------------
     per_sample_dir = os.path.join(OUTPUT_DIR, "per-sample", "input")
-    # We expect 1.png, 2.png, 3.png, 4.png in that folder
     expected_files = [
         os.path.join(per_sample_dir, f"{i}.png")
         for i in range(1, 5)
     ]
-    # Verify they exist; if any is missing, return an empty list
     for fp in expected_files:
         if not os.path.isfile(fp):
             print(f"Warning: expected file not found: {fp}")
             return []
-    # Return the list of file‐paths (strings). Gradio's Gallery will display them.
     return expected_files
-# -----------------------------------------------------------------------------
-# HELPER: Given a selected image PATH, read the matching .txt in .../txt/
-# -----------------------------------------------------------------------------
 def get_caption(src_gallery, evt: gr.SelectData):
-    selected_image_path = src_gallery[evt.index][0]
     """
-    Gradio will pass in something like '/full/path/to/inference_results/coz_vlmprompt/per-sample/input/2.png'.
-    We want to replace '2.png' → '2.txt' and look under '.../per-sample/input/txt/2.txt'.
-    Return the text contents (or a default message if not found).
     """
-    if not selected_image_path or not os.path.isfile(selected_image_path):
         return "No caption available."
-    # Extract just the base name, e.g. '2.png' → '2'
-    base = os.path.basename(selected_image_path)             # e.g. '2.png'
-    stem = os.path.splitext(base)[0]                         # e.g. '2'
-    # Construct the .txt filename under the 'txt' subdirectory:
     txt_folder = os.path.join(OUTPUT_DIR, "per-sample", "input", "txt")
     txt_path = os.path.join(txt_folder, f"{int(stem) - 1}.txt")
     if not os.path.isfile(txt_path):
         return f"Caption file not found: {int(stem) - 1}.txt"
     try:
         with open(txt_path, "r", encoding="utf-8") as f:
             caption = f.read().strip()
@@ -130,9 +183,9 @@ def get_caption(src_gallery, evt: gr.SelectData):
         return f"Error reading caption: {e}"
-# -------------------------------------------------------------
-# BUILD THE GRADIO INTERFACE
-# -------------------------------------------------------------
 css = """
 #col-container {
@@ -178,40 +231,79 @@ with gr.Blocks(css=css) as demo:
                 # 3) Button to launch inference
                 run_button = gr.Button("Chain-of-Zoom it")
             with gr.Column():
-              # 4) Gallery to display multiple output images
-              output_gallery = gr.Gallery(
-                  label="Inference Results",
-                  show_label=True,
-                  elem_id="gallery",
-                  columns=[2], rows=[2]
-              )
-              # 5) Textbox under the gallery for showing captions
-              caption_text = gr.Textbox(
-                  label="Caption",
-                  lines=4,
-                  placeholder="Click on any image above to see its caption here."
-              )
-        # Wire the button: when clicked, call run_with_upload(...) → output_gallery
         run_button.click(
             fn=run_with_upload,
             inputs=[upload_image, upscale_radio],
             outputs=[output_gallery]
         )
-        # Wire gallery clicks: when an image is clicked, run get_caption(...) → caption_text
         output_gallery.select(
             fn=get_caption,
             inputs=[output_gallery],
             outputs=[caption_text]
         )
-# -----------------------------------------------------------------------------
 # START THE GRADIO SERVER
-# -----------------------------------------------------------------------------
 demo.launch(share=True)

 import os
 import shutil
 from pathlib import Path
+from PIL import Image, ImageDraw
 import spaces
+# ------------------------------------------------------------------
 # CONFIGURE THESE PATHS TO MATCH YOUR PROJECT STRUCTURE
+# ------------------------------------------------------------------
 INPUT_DIR   = "samples"
 OUTPUT_DIR  = "inference_results/coz_vlmprompt"
+# ------------------------------------------------------------------
+# HELPER: Resize & center-crop to 512, preserving aspect ratio
+# ------------------------------------------------------------------
+def resize_and_center_crop(img: Image.Image, size: int) -> Image.Image:
+    """
+    Resize the input PIL image so that its shorter side == `size`,
+    then center-crop to exactly (size x size).
+    """
+    w, h = img.size
+    scale = size / min(w, h)
+    new_w, new_h = int(w * scale), int(h * scale)
+    img = img.resize((new_w, new_h), Image.LANCZOS)
+    left = (new_w - size) // 2
+    top  = (new_h - size) // 2
+    return img.crop((left, top, left + size, top + size))
+# ------------------------------------------------------------------
+# HELPER: Draw four concentric, centered rectangles on a 512×512 image
+# ------------------------------------------------------------------
+def make_preview_with_boxes(image_path: str, scale_option: str) -> Image.Image:
+    """
+    1) Open the uploaded image from disk.
+    2) Resize & center-crop it to exactly 512×512.
+    3) Depending on scale_option ("1x","2x","4x"), compute four rectangle sizes:
+       - "1x": [512, 512, 512, 512]
+       - "2x": [256, 128, 64, 32]
+       - "4x": [128, 64, 32, 16]
+    4) Draw each of those four rectangles (outline only), all centered.
+    5) Return the modified PIL image.
+    """
+    try:
+        orig = Image.open(image_path).convert("RGB")
+    except Exception as e:
+        # If something fails, return a plain 512×512 gray image as fallback
+        fallback = Image.new("RGB", (512, 512), (200, 200, 200))
+        draw = ImageDraw.Draw(fallback)
+        draw.text((20, 20), f"Error:\n{e}", fill="red")
+        return fallback
+    # 1. Resize & center-crop to 512×512
+    base = resize_and_center_crop(orig, 512)  # now `base.size == (512,512)`
+    # 2. Determine the four box sizes
+    scale_int = int(scale_option.replace("x", ""))  # e.g. "2x" -> 2
+    if scale_int == 1:
+        sizes = [512, 512, 512, 512]
+    else:
+        # For scale=2: sizes = [512//2, 512//(2*2), 512//(2*4), 512//(2*8)] -> [256,128,64,32]
+        # For scale=4: sizes = [512//4, 512//(4*2), 512//(4*4), 512//(4*8)] -> [128,64,32,16]
+        sizes = [512 // (scale_int * (2 ** i)) for i in range(4)]
+    draw = ImageDraw.Draw(base)
+    # 3. Outline color cycle (you can change these or use just one color)
+    colors = ["red", "lime", "cyan", "yellow"]
+    width = 3  # thickness of each rectangle’s outline
+    for idx, s in enumerate(sizes):
+        # Compute top-left corner so that box is centered in 512×512
+        x0 = (512 - s) // 2
+        y0 = (512 - s) // 2
+        x1 = x0 + s
+        y1 = y0 + s
+        draw.rectangle([(x0, y0), (x1, y1)], outline=colors[idx % len(colors)], width=width)
+    return base
+# ------------------------------------------------------------------
+# HELPER FUNCTIONS FOR INFERENCE & CAPTION (unchanged from your original)
+# ------------------------------------------------------------------
 @spaces.GPU(duration=120)
 def run_with_upload(uploaded_image_path, upscale_option):
     """
     1) Clear INPUT_DIR
     2) Save the uploaded file as input.png in INPUT_DIR
+    3) Read `upscale_option` (e.g. "1x", "2x", "4x") → turn it into "1","2","4"
     4) Call inference_coz.py with `--upscale <that_value>`
     5) Return the FOUR output‐PNG file‐paths as a Python list, so that Gradio's Gallery
+       can display them.
     """
+    # ————————————————————————————————————————————————————————————
+    # (Copy‐paste exactly your existing code here; no changes needed)
+    # ————————————————————————————————————————————————————————————
     os.makedirs(INPUT_DIR, exist_ok=True)
     for fn in os.listdir(INPUT_DIR):
         full_path = os.path.join(INPUT_DIR, fn)
         except Exception as e:
             print(f"Warning: could not delete {full_path}: {e}")
     if uploaded_image_path is None:
         return []
     try:
         print(f"Error: could not save as PNG: {e}")
         return []
     upscale_value = upscale_option.replace("x", "")  # e.g. "2x" → "2"
     cmd = [
         "python", "inference_coz.py",
         print("Inference failed:", err)
         return []
     per_sample_dir = os.path.join(OUTPUT_DIR, "per-sample", "input")
     expected_files = [
         os.path.join(per_sample_dir, f"{i}.png")
         for i in range(1, 5)
     ]
     for fp in expected_files:
         if not os.path.isfile(fp):
             print(f"Warning: expected file not found: {fp}")
             return []
     return expected_files
 def get_caption(src_gallery, evt: gr.SelectData):
     """
+    Given a clicked‐on image in the gallery, read the corresponding .txt in
+    .../per-sample/input/txt and return its contents.
     """
+    if not src_gallery or not os.path.isfile(src_gallery[evt.index][0]):
         return "No caption available."
+    selected_image_path = src_gallery[evt.index][0]
+    base = os.path.basename(selected_image_path)  # e.g. "2.png"
+    stem = os.path.splitext(base)[0]              # e.g. "2"
     txt_folder = os.path.join(OUTPUT_DIR, "per-sample", "input", "txt")
     txt_path = os.path.join(txt_folder, f"{int(stem) - 1}.txt")
     if not os.path.isfile(txt_path):
         return f"Caption file not found: {int(stem) - 1}.txt"
     try:
         with open(txt_path, "r", encoding="utf-8") as f:
             caption = f.read().strip()
         return f"Error reading caption: {e}"
+# ------------------------------------------------------------------
+# BUILD THE GRADIO INTERFACE (with updated callbacks)
+# ------------------------------------------------------------------
 css = """
 #col-container {
                 # 3) Button to launch inference
                 run_button = gr.Button("Chain-of-Zoom it")
+                # 4) Show the 512×512 preview with four centered rectangles
+                preview_with_box = gr.Image(
+                    label="Preview (512×512 with centered boxes)",
+                    type="pil",        # we’ll return a PIL.Image from our function
+                    interactive=False
+                )
             with gr.Column():
+                # 5) Gallery to display multiple output images
+                output_gallery = gr.Gallery(
+                    label="Inference Results",
+                    show_label=True,
+                    elem_id="gallery",
+                    columns=[2], rows=[2]
+                )
+                # 6) Textbox under the gallery for showing captions
+                caption_text = gr.Textbox(
+                    label="Caption",
+                    lines=4,
+                    placeholder="Click on any image above to see its caption here."
+                )
+        # ------------------------------------------------------------------
+        # CALLBACK #1: Whenever the user uploads or changes the radio, update preview
+        # ------------------------------------------------------------------
+        def update_preview(img_path, scale_opt):
+            """
+            If there's no image uploaded yet, return None (Gradio will show blank).
+            Otherwise, draw the resized 512×512 + four boxes and return it.
+            """
+            if img_path is None:
+                return None
+            return make_preview_with_boxes(img_path, scale_opt)
+        # When the user uploads a new file:
+        upload_image.change(
+            fn=update_preview,
+            inputs=[upload_image, upscale_radio],
+            outputs=[preview_with_box]
+        )
+        # Also trigger preview redraw if they switch 1×/2×/4× after uploading:
+        upscale_radio.change(
+            fn=update_preview,
+            inputs=[upload_image, upscale_radio],
+            outputs=[preview_with_box]
+        )
+        # ------------------------------------------------------------------
+        # CALLBACK #2: When “Chain-of-Zoom it” is clicked, run inference
+        # ------------------------------------------------------------------
         run_button.click(
             fn=run_with_upload,
             inputs=[upload_image, upscale_radio],
             outputs=[output_gallery]
         )
+        # ------------------------------------------------------------------
+        # CALLBACK #3: When an image in the gallery is clicked, show its caption
+        # ------------------------------------------------------------------
         output_gallery.select(
             fn=get_caption,
             inputs=[output_gallery],
             outputs=[caption_text]
         )
+# ------------------------------------------------------------------
 # START THE GRADIO SERVER
+# ------------------------------------------------------------------
 demo.launch(share=True)