import gradio as gr import subprocess import os import shutil from pathlib import Path import spaces # import the updated recursive_multiscale_sr that expects a list of centers from inference_coz_single import recursive_multiscale_sr from PIL import Image, ImageDraw # ------------------------------------------------------------------ # CONFIGURE THESE PATHS TO MATCH YOUR PROJECT STRUCTURE # ------------------------------------------------------------------ INPUT_DIR = "samples" OUTPUT_DIR = "inference_results/coz_vlmprompt" # ------------------------------------------------------------------ # HELPER: Resize & center-crop to 512, preserving aspect ratio # ------------------------------------------------------------------ def resize_and_center_crop(img: Image.Image, size: int) -> Image.Image: """ Resize the input PIL image so that its shorter side == `size`, then center-crop to exactly (size x size). """ w, h = img.size scale = size / min(w, h) new_w, new_h = int(w * scale), int(h * scale) img = img.resize((new_w, new_h), Image.LANCZOS) left = (new_w - size) // 2 top = (new_h - size) // 2 return img.crop((left, top, left + size, top + size)) # ------------------------------------------------------------------ # HELPER: Draw four true “nested” rectangles, matching the SR logic # ------------------------------------------------------------------ def make_preview_with_boxes( image_path: str, scale_option: str, cx_norm: float, cy_norm: float, ) -> Image.Image: """ 1) Open the uploaded image, resize & center-crop to 512×512. 2) Let scale_int = int(scale_option.replace("x","")). Then the four nested crop‐sizes (in pixels) are: size[0] = 512 / (scale_int^1), size[1] = 512 / (scale_int^2), size[2] = 512 / (scale_int^3), size[3] = 512 / (scale_int^4). 3) Iteratively compute each crop’s top-left in “original 512×512” space: - Start with prev_tl = (0,0), prev_size = 512. - For i in [0..3]: center_abs_x = prev_tl_x + cx_norm * prev_size center_abs_y = prev_tl_y + cy_norm * prev_size unc_x0 = center_abs_x - (size[i]/2) unc_y0 = center_abs_y - (size[i]/2) clamp x0 ∈ [prev_tl_x, prev_tl_x + prev_size - size[i]] y0 ∈ [prev_tl_y, prev_tl_y + prev_size - size[i]] Draw a rectangle from (x0, y0) to (x0 + size[i], y0 + size[i]). Then set prev_tl = (x0, y0), prev_size = size[i]. 4) Return the PIL image with those four truly nested outlines. """ try: orig = Image.open(image_path).convert("RGB") except Exception as e: # On error, return a gray 512×512 with the error text fallback = Image.new("RGB", (512, 512), (200, 200, 200)) draw = ImageDraw.Draw(fallback) draw.text((20, 20), f"Error:\n{e}", fill="red") return fallback # 1) Resize & center-crop to 512×512 base = resize_and_center_crop(orig, 512) # 2) Compute the four nested crop‐sizes scale_int = int(scale_option.replace("x", "")) # e.g. "4x" → 4 if scale_int <= 1: # If 1×, then all “nested” sizes are 512 (no real nesting) sizes = [512, 512, 512, 512] else: sizes = [ 512 // (scale_int ** (i + 1)) for i in range(4) ] # e.g. if scale_int=4 → sizes = [128, 32, 8, 2] draw = ImageDraw.Draw(base) colors = ["red", "lime", "cyan", "yellow"] width = 3 # 3) Iteratively compute nested rectangles prev_tl_x, prev_tl_y = 0.0, 0.0 prev_size = 512.0 for idx, crop_size in enumerate(sizes): # 3.a) Where is the “normalized center” in this current 512×512 region? center_abs_x = prev_tl_x + (cx_norm * prev_size) center_abs_y = prev_tl_y + (cy_norm * prev_size) # 3.b) Unclamped top-left for this crop unc_x0 = center_abs_x - (crop_size / 2.0) unc_y0 = center_abs_y - (crop_size / 2.0) # 3.c) Clamp so the crop window stays inside [prev_tl .. prev_tl + prev_size] min_x0 = prev_tl_x max_x0 = prev_tl_x + prev_size - crop_size min_y0 = prev_tl_y max_y0 = prev_tl_y + prev_size - crop_size x0 = max(min_x0, min(unc_x0, max_x0)) y0 = max(min_y0, min(unc_y0, max_y0)) x1 = x0 + crop_size y1 = y0 + crop_size # Draw the rectangle (cast to int for pixels) draw.rectangle( [(int(x0), int(y0)), (int(x1), int(y1))], outline=colors[idx % len(colors)], width=width ) # 3.d) Update for the next iteration prev_tl_x, prev_tl_y = x0, y0 prev_size = crop_size return base # ------------------------------------------------------------------ # HELPER FUNCTION FOR INFERENCE (build a list of identical centers) # ------------------------------------------------------------------ @spaces.GPU() def run_with_upload( uploaded_image_path: str, upscale_option: str, cx_norm: float, cy_norm: float, ): """ Perform chain-of-zoom super-resolution on a given image, using recursive multi-scale upscaling centered on a specific point. This function enhances a given image by progressively zooming into a specific point, using a recursive deep super-resolution model. Args: uploaded_image_path (str): Path to the input image file on disk. upscale_option (str): The desired upscale factor as a string. Valid options are "1x", "2x", and "4x". - "1x" means no upscaling. - "2x" means 2× enlargement per zoom step. - "4x" means 4× enlargement per zoom step. cx_norm (float): Normalized X-coordinate (0 to 1) of the zoom center. cy_norm (float): Normalized Y-coordinate (0 to 1) of the zoom center. Returns: list[PIL.Image.Image]: A list of progressively zoomed-in and super-resolved images at each recursion step (typically 4), centered around the user-specified point. Note: The center point is repeated for each recursion level to maintain consistency during zooming. This function uses a modified version of the `recursive_multiscale_sr` pipeline for inference. """ if uploaded_image_path is None: return [] upscale_value = int(upscale_option.replace("x", "")) rec_num = 4 # match the SR pipeline’s default recursion depth centers = [(cx_norm, cy_norm)] * rec_num # Call the modified SR function sr_list, _ = recursive_multiscale_sr( uploaded_image_path, upscale=upscale_value, rec_num=rec_num, centers=centers, ) # Return the list of PIL images (Gradio Gallery expects a list) return sr_list # ------------------------------------------------------------------ # BUILD THE GRADIO INTERFACE (two sliders + correct preview) # ------------------------------------------------------------------ css = """ #col-container { margin: 0 auto; max-width: 1024px; } """ with gr.Blocks(css=css) as demo: gr.HTML( """
Extreme Super-Resolution via Scale Autoregression and Preference Alignment