sanjaybora04
commit
31da93c
raw
history blame
3.78 kB
import gradio as gr
import torch
import numpy as np
import cv2
from PIL import Image
from torchvision import transforms
from transformers import SegformerForSemanticSegmentation, AutoImageProcessor
# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load model and processor once
processor = AutoImageProcessor.from_pretrained("nvidia/segformer-b2-finetuned-ade-512-512")
model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b2-finetuned-ade-512-512").to(device)
def process(room_img, tile_img):
room_img = room_img.convert("RGB")
tile_img = tile_img.convert("RGB")
room_np = np.array(room_img)
# Segmentation
inputs = processor(images=room_img, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model(**inputs)
segmentation = outputs.logits.argmax(dim=1).squeeze().cpu().numpy()
segmentation_resized = cv2.resize(segmentation.astype(np.uint8), (room_np.shape[1], room_np.shape[0]), interpolation=cv2.INTER_NEAREST)
# Mask for floor (ADE20K class index 3)
floor_class_index = 3
mask_bin = (segmentation_resized == floor_class_index).astype(np.uint8)
# Largest contour
contours, _ = cv2.findContours(mask_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if not contours:
return room_img, Image.fromarray(mask_bin*255), tile_img, room_img
contour = max(contours, key=cv2.contourArea)
if len(contour) < 4:
return room_img, Image.fromarray(mask_bin*255), tile_img, room_img
x, y, w, h = cv2.boundingRect(contour)
src_pts = np.array([[x, y + h], [x + w, y + h], [x + w, y], [x, y]], dtype=np.float32)
offset = h * 0.5
dst_pts = np.array([[x - offset, y + h], [x + w + offset, y + h], [x + w, y], [x, y]], dtype=np.float32)
H = cv2.getPerspectiveTransform(src_pts, dst_pts)
# Resize tile
target_tile_width = room_np.shape[1] // 10
tile_aspect_ratio = tile_img.height / tile_img.width
target_tile_height = int(target_tile_width * tile_aspect_ratio)
tile_resized = tile_img.resize((target_tile_width, target_tile_height), Image.LANCZOS)
tile_np = np.array(tile_resized)
# Tile texture
tile_h, tile_w = tile_np.shape[:2]
room_h, room_w = room_np.shape[:2]
reps_y = room_h // tile_h + 2
reps_x = room_w // tile_w + 2
tiled_texture = np.tile(tile_np, (reps_y, reps_x, 1))[:room_h, :room_w]
warped_texture = cv2.warpPerspective(tiled_texture, H, (room_w, room_h))
# Blend
room_float = room_np.astype(np.float32) / 255.0
texture_float = warped_texture.astype(np.float32) / 255.0
room_gray = cv2.cvtColor(room_float, cv2.COLOR_RGB2GRAY)
lighting = np.stack([room_gray]*3, axis=-1)
lighting = np.clip(lighting * 1.2, 0, 1)
lit_texture = np.clip(texture_float * lighting, 0, 1)
mask_3ch = np.stack([mask_bin]*3, axis=-1)
blended = np.where(mask_3ch == 1, lit_texture, room_float)
blended_img = (blended * 255).astype(np.uint8)
return Image.fromarray(room_np), Image.fromarray(mask_bin * 255), Image.fromarray(warped_texture), Image.fromarray(blended_img)
demo = gr.Interface(
fn=process,
inputs=[gr.Image(label="Room Image", type="pil"), gr.Image(label="Tile Image", type="pil")],
outputs=[
gr.Image(label="Original Room"),
gr.Image(label="Floor Mask"),
gr.Image(label="Warped Texture"),
gr.Image(label="Final Overlay")
],
title="Room Floor Tiler",
description="Upload a room image and a tile texture. The floor is automatically detected and overlaid with your selected tile using SegFormer and perspective warping."
)
if __name__ == "__main__":
demo.launch()