Spaces:

huntrezz
/

RealtimeMonocularDepth

Runtime error

File size: 1,336 Bytes

f8b3886
 
 
0143794
e8486cb
f8b3886
 
79684c1
f8b3886
 
 
 
79684c1
e8486cb
 
79684c1
f8b3886
e8486cb
 
f8b3886
79684c1
e8486cb
79684c1
 
 
e8486cb
d4f8b39
79684c1
f8b3886
e8486cb
 
79684c1
e8486cb
79684c1
 
e8486cb
f8b3886
e8486cb

import cv2
import torch
import numpy as np
from transformers import DPTForDepthEstimation, DPTImageProcessor
import gradio as gr

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DPTForDepthEstimation.from_pretrained("./", local_files_only=True, torch_dtype=torch.float16).to(device)
processor = DPTImageProcessor.from_pretrained("Intel/dpt-swinv2-tiny-256")

color_map = cv2.applyColorMap(np.arange(256, dtype=np.uint8), cv2.COLORMAP_INFERNO)

@torch.inference_mode()
def process_frame(image):
    rgb_frame = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    resized_frame = cv2.resize(rgb_frame, (128, 128), interpolation=cv2.INTER_AREA)

    inputs = processor(images=resized_frame, return_tensors="pt").to(device)
    inputs = {k: v.to(torch.float16) for k, v in inputs.items()}

    predicted_depth = model(**inputs).predicted_depth
    depth_map = predicted_depth.squeeze().cpu().numpy()
    
    depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
    depth_map = (depth_map * 255).astype(np.uint8)
    depth_map_colored = cv2.applyColorMap(depth_map, color_map)
    
    return depth_map_colored

interface = gr.Interface(
    fn=process_frame,
    inputs=gr.Image(source="webcam", streaming=True),
    outputs="image",
    live=True,
    refresh_rate=0.1
)

interface.launch()