File size: 6,061 Bytes
11a270e
 
52f5763
11a270e
52f5763
 
c580a28
 
 
52f5763
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c580a28
52f5763
c580a28
52f5763
c580a28
52f5763
c580a28
52f5763
841589e
c580a28
52f5763
841589e
c580a28
52f5763
 
 
 
c580a28
52f5763
 
 
 
 
 
 
 
 
c580a28
52f5763
 
c580a28
52f5763
 
c580a28
52f5763
 
c580a28
52f5763
 
 
 
 
 
 
 
 
 
 
 
 
 
c580a28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52f5763
 
 
 
 
 
841589e
52f5763
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import gradio as gr
from transformers import pipeline
from PIL import Image, ImageFilter, ImageOps
import numpy as np
import cv2

# Initialize models with fixed choices
segmentation_model = pipeline("image-segmentation", model="nvidia/segformer-b1-finetuned-cityscapes-1024-1024")
depth_estimator = pipeline("depth-estimation", model="Intel/zoedepth-nyu-kitti")

def lens_blur(image, radius):
    """
    Apply a more realistic lens blur (bokeh effect) using OpenCV.
    """
    if radius < 1:
        return image
    
    # Convert PIL image to OpenCV format
    img_np = np.array(image)
    
    # Create a circular kernel for the bokeh effect
    kernel_size = 2 * radius + 1
    kernel = np.zeros((kernel_size, kernel_size), dtype=np.float32)
    center = radius
    for i in range(kernel_size):
        for j in range(kernel_size):
            # Create circular kernel
            if np.sqrt((i - center) ** 2 + (j - center) ** 2) <= radius:
                kernel[i, j] = 1.0
    
    # Normalize the kernel
    if kernel.sum() != 0:
        kernel = kernel / kernel.sum()
    
    # Apply the filter to each channel separately
    channels = cv2.split(img_np)
    blurred_channels = []
    
    for channel in channels:
        blurred_channel = cv2.filter2D(channel, -1, kernel)
        blurred_channels.append(blurred_channel)
    
    # Merge the channels back
    blurred_img = cv2.merge(blurred_channels)
    
    # Convert back to PIL image
    return Image.fromarray(blurred_img)

def process_image(input_image, method, blur_intensity, blur_type):
    """
    Process the input image using one of two methods:
    
    1. Segmented Background Blur:
       - Uses segmentation to extract a foreground mask.
       - Applies the selected blur (Gaussian or Lens) to the background.
       - Composites the final image.
       
    2. Depth-based Variable Blur:
       - Uses depth estimation to generate a depth map.
       - Normalizes the depth map to be used as a blending mask.
       - Blends a fully blurred version (using the selected blur) with the original image.
       
    Returns:
       - output_image: final composited image.
       - mask_image: the mask used (binary for segmentation, normalized depth for depth-based).
    """
    # Ensure image is in RGB mode
    input_image = input_image.convert("RGB")
    
    # Select blur function based on blur_type
    if blur_type == "Gaussian Blur":
        blur_fn = lambda img, rad: img.filter(ImageFilter.GaussianBlur(radius=rad))
    elif blur_type == "Lens Blur":
        blur_fn = lens_blur
    else:
        blur_fn = lambda img, rad: img.filter(ImageFilter.GaussianBlur(radius=rad))
    
    if method == "Segmented Background Blur":
        # Use segmentation to obtain a foreground mask
        results = segmentation_model(input_image)
        # Assume the last result is the main foreground object
        foreground_mask = results[-1]["mask"]
        # Ensure the mask is grayscale
        foreground_mask = foreground_mask.convert("L")
        # Threshold to create a binary mask
        binary_mask = foreground_mask.point(lambda p: 255 if p > 128 else 0)
        
        # Blur the background using the selected blur function
        blurred_background = blur_fn(input_image, blur_intensity)
        
        # Composite the final image: keep foreground and use blurred background elsewhere
        output_image = Image.composite(input_image, blurred_background, binary_mask)
        mask_image = binary_mask
        
    elif method == "Depth-based Variable Blur":
        # Generate depth map
        depth_results = depth_estimator(input_image)
        depth_map = depth_results["depth"]
        
        # Convert depth map to numpy array and normalize to [0, 255]
        depth_array = np.array(depth_map).astype(np.float32)
        norm = (depth_array - depth_array.min()) / (depth_array.max() - depth_array.min() + 1e-8)
        normalized_depth = (norm * 255).astype(np.uint8)
        mask_image = Image.fromarray(normalized_depth)
        
        # Create fully blurred version using the selected blur function
        blurred_image = blur_fn(input_image, blur_intensity)
        
        # Convert images to arrays for blending
        orig_np = np.array(input_image).astype(np.float32)
        blur_np = np.array(blurred_image).astype(np.float32)
        # Reshape mask for broadcasting
        alpha = normalized_depth[..., np.newaxis] / 255.0
        
        # Blend pixels: 0 = original; 1 = fully blurred
        blended_np = (1 - alpha) * orig_np + alpha * blur_np
        blended_np = np.clip(blended_np, 0, 255).astype(np.uint8)
        output_image = Image.fromarray(blended_np)
    
    else:
        output_image = input_image
        mask_image = input_image.convert("L")
    
    return output_image, mask_image

# Build a Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## Image Processing App: Segmentation & Depth-based Blur")
    
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(label="Input Image", type="pil")
            method = gr.Radio(label="Processing Method", 
                            choices=["Segmented Background Blur", "Depth-based Variable Blur"],
                            value="Segmented Background Blur")
            blur_intensity = gr.Slider(label="Blur Intensity (Maximum Blur Radius)", 
                                    minimum=1, maximum=30, step=1, value=15)
            blur_type = gr.Dropdown(label="Blur Type", 
                                    choices=["Gaussian Blur", "Lens Blur"], 
                                    value="Gaussian Blur")
            run_button = gr.Button("Process Image")
        with gr.Column():
            output_image = gr.Image(label="Output Image")
            mask_output = gr.Image(label="Mask")
    
    # Set up event handler
    run_button.click(
        fn=process_image, 
        inputs=[input_image, method, blur_intensity, blur_type], 
        outputs=[output_image, mask_output]
    )

# Launch the app
demo.launch()