File size: 6,061 Bytes
11a270e 52f5763 11a270e 52f5763 c580a28 52f5763 c580a28 52f5763 c580a28 52f5763 c580a28 52f5763 c580a28 52f5763 841589e c580a28 52f5763 841589e c580a28 52f5763 c580a28 52f5763 c580a28 52f5763 c580a28 52f5763 c580a28 52f5763 c580a28 52f5763 c580a28 52f5763 841589e 52f5763 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import gradio as gr
from transformers import pipeline
from PIL import Image, ImageFilter, ImageOps
import numpy as np
import cv2
# Initialize models with fixed choices
segmentation_model = pipeline("image-segmentation", model="nvidia/segformer-b1-finetuned-cityscapes-1024-1024")
depth_estimator = pipeline("depth-estimation", model="Intel/zoedepth-nyu-kitti")
def lens_blur(image, radius):
"""
Apply a more realistic lens blur (bokeh effect) using OpenCV.
"""
if radius < 1:
return image
# Convert PIL image to OpenCV format
img_np = np.array(image)
# Create a circular kernel for the bokeh effect
kernel_size = 2 * radius + 1
kernel = np.zeros((kernel_size, kernel_size), dtype=np.float32)
center = radius
for i in range(kernel_size):
for j in range(kernel_size):
# Create circular kernel
if np.sqrt((i - center) ** 2 + (j - center) ** 2) <= radius:
kernel[i, j] = 1.0
# Normalize the kernel
if kernel.sum() != 0:
kernel = kernel / kernel.sum()
# Apply the filter to each channel separately
channels = cv2.split(img_np)
blurred_channels = []
for channel in channels:
blurred_channel = cv2.filter2D(channel, -1, kernel)
blurred_channels.append(blurred_channel)
# Merge the channels back
blurred_img = cv2.merge(blurred_channels)
# Convert back to PIL image
return Image.fromarray(blurred_img)
def process_image(input_image, method, blur_intensity, blur_type):
"""
Process the input image using one of two methods:
1. Segmented Background Blur:
- Uses segmentation to extract a foreground mask.
- Applies the selected blur (Gaussian or Lens) to the background.
- Composites the final image.
2. Depth-based Variable Blur:
- Uses depth estimation to generate a depth map.
- Normalizes the depth map to be used as a blending mask.
- Blends a fully blurred version (using the selected blur) with the original image.
Returns:
- output_image: final composited image.
- mask_image: the mask used (binary for segmentation, normalized depth for depth-based).
"""
# Ensure image is in RGB mode
input_image = input_image.convert("RGB")
# Select blur function based on blur_type
if blur_type == "Gaussian Blur":
blur_fn = lambda img, rad: img.filter(ImageFilter.GaussianBlur(radius=rad))
elif blur_type == "Lens Blur":
blur_fn = lens_blur
else:
blur_fn = lambda img, rad: img.filter(ImageFilter.GaussianBlur(radius=rad))
if method == "Segmented Background Blur":
# Use segmentation to obtain a foreground mask
results = segmentation_model(input_image)
# Assume the last result is the main foreground object
foreground_mask = results[-1]["mask"]
# Ensure the mask is grayscale
foreground_mask = foreground_mask.convert("L")
# Threshold to create a binary mask
binary_mask = foreground_mask.point(lambda p: 255 if p > 128 else 0)
# Blur the background using the selected blur function
blurred_background = blur_fn(input_image, blur_intensity)
# Composite the final image: keep foreground and use blurred background elsewhere
output_image = Image.composite(input_image, blurred_background, binary_mask)
mask_image = binary_mask
elif method == "Depth-based Variable Blur":
# Generate depth map
depth_results = depth_estimator(input_image)
depth_map = depth_results["depth"]
# Convert depth map to numpy array and normalize to [0, 255]
depth_array = np.array(depth_map).astype(np.float32)
norm = (depth_array - depth_array.min()) / (depth_array.max() - depth_array.min() + 1e-8)
normalized_depth = (norm * 255).astype(np.uint8)
mask_image = Image.fromarray(normalized_depth)
# Create fully blurred version using the selected blur function
blurred_image = blur_fn(input_image, blur_intensity)
# Convert images to arrays for blending
orig_np = np.array(input_image).astype(np.float32)
blur_np = np.array(blurred_image).astype(np.float32)
# Reshape mask for broadcasting
alpha = normalized_depth[..., np.newaxis] / 255.0
# Blend pixels: 0 = original; 1 = fully blurred
blended_np = (1 - alpha) * orig_np + alpha * blur_np
blended_np = np.clip(blended_np, 0, 255).astype(np.uint8)
output_image = Image.fromarray(blended_np)
else:
output_image = input_image
mask_image = input_image.convert("L")
return output_image, mask_image
# Build a Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## Image Processing App: Segmentation & Depth-based Blur")
with gr.Row():
with gr.Column():
input_image = gr.Image(label="Input Image", type="pil")
method = gr.Radio(label="Processing Method",
choices=["Segmented Background Blur", "Depth-based Variable Blur"],
value="Segmented Background Blur")
blur_intensity = gr.Slider(label="Blur Intensity (Maximum Blur Radius)",
minimum=1, maximum=30, step=1, value=15)
blur_type = gr.Dropdown(label="Blur Type",
choices=["Gaussian Blur", "Lens Blur"],
value="Gaussian Blur")
run_button = gr.Button("Process Image")
with gr.Column():
output_image = gr.Image(label="Output Image")
mask_output = gr.Image(label="Mask")
# Set up event handler
run_button.click(
fn=process_image,
inputs=[input_image, method, blur_intensity, blur_type],
outputs=[output_image, mask_output]
)
# Launch the app
demo.launch() |