|
import torch |
|
import gradio as gr |
|
import numpy as np |
|
import cv2 |
|
from PIL import Image |
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
|
|
midas = torch.hub.load("intel-isl/MiDaS", "DPT_Large") |
|
midas.to(device) |
|
midas.eval() |
|
|
|
|
|
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms") |
|
transform = midas_transforms.dpt_transform |
|
|
|
def predict_depth(image): |
|
|
|
if not isinstance(image, Image.Image): |
|
image = Image.fromarray(image) |
|
image_np = np.array(image) |
|
|
|
|
|
img_rgb = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR) |
|
img_rgb = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2RGB) |
|
|
|
|
|
input_tensor = transform(img_rgb).to(device) |
|
|
|
|
|
if len(input_tensor.shape) == 3: |
|
input_batch = input_tensor.unsqueeze(0) |
|
else: |
|
input_batch = input_tensor |
|
|
|
|
|
with torch.no_grad(): |
|
prediction = midas(input_batch) |
|
prediction = torch.nn.functional.interpolate( |
|
prediction.unsqueeze(1), |
|
size=(img_rgb.shape[0], img_rgb.shape[1]), |
|
mode="bicubic", |
|
align_corners=False, |
|
).squeeze() |
|
|
|
|
|
depth_map = prediction.cpu().numpy() |
|
depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min()) |
|
depth_map = (depth_map * 255).astype(np.uint8) |
|
depth_img = Image.fromarray(depth_map) |
|
|
|
return depth_img |
|
|
|
|
|
iface = gr.Interface( |
|
fn=predict_depth, |
|
inputs=gr.Image(type="pil"), |
|
outputs=gr.Image(type="pil"), |
|
title="MiDaS Depth Estimation", |
|
description="Drop img -> depth map" |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |