Spaces:
Runtime error
Runtime error
File size: 2,959 Bytes
f8b3886 0143794 e8486cb 893be2d 40334e7 f8b3886 893be2d 99bbe3e 726a72f 893be2d 4f1fd81 893be2d fd26002 726a72f 4f1fd81 99bbe3e 893be2d 7b83683 f8b3886 ff63548 7bc8ed0 cafea28 a42d79c 99bbe3e a42d79c 40334e7 79684c1 e8486cb 1f906f0 cafea28 99bbe3e 40334e7 001bc7d 40334e7 7bc8ed0 4f1fd81 d3c5921 4f1fd81 f8b3886 4f1fd81 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import cv2
import torch
import numpy as np
from transformers import DPTForDepthEstimation, DPTImageProcessor
import gradio as gr
import torch.nn.utils.prune as prune
import open3d as o3d
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-swinv2-tiny-256", torch_dtype=torch.float32)
model.eval()
# Apply global unstructured pruning
parameters_to_prune = [
(module, "weight") for module in filter(lambda m: isinstance(m, (torch.nn.Conv2d, torch.nn.Linear)), model.modules())
]
prune.global_unstructured(
parameters_to_prune,
pruning_method=prune.L1Unstructured,
amount=0.4, # Prune 40% of weights
)
for module, _ in parameters_to_prune:
prune.remove(module, "weight")
model = torch.quantization.quantize_dynamic(
model, {torch.nn.Linear, torch.nn.Conv2d}, dtype=torch.qint8
)
model = model.to(device)
processor = DPTImageProcessor.from_pretrained("Intel/dpt-swinv2-tiny-256")
color_map = cv2.applyColorMap(np.arange(256, dtype=np.uint8), cv2.COLORMAP_INFERNO)
color_map = torch.from_numpy(color_map).to(device)
def preprocess_image(image):
image = cv2.resize(image, (128, 128))
image = torch.from_numpy(image).permute(2, 0, 1).unsqueeze(0).float().to(device)
return image / 255.0
def create_point_cloud(depth_map, color_image):
rows, cols = depth_map.shape
c, r = np.meshgrid(np.arange(cols), np.arange(rows), sparse=True)
valid = (depth_map > 0) & (depth_map < 1000)
z = np.where(valid, depth_map, 0)
x = np.where(valid, z * (c - cols / 2) / cols, 0)
y = np.where(valid, z * (r - rows / 2) / rows, 0)
points = np.dstack((x, y, z)).reshape(-1, 3)
colors = color_image.reshape(-1, 3)
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points)
pcd.colors = o3d.utility.Vector3dVector(colors / 255.0)
return pcd
@torch.inference_mode()
def process_frame(image):
if image is None:
return None
preprocessed = preprocess_image(image)
predicted_depth = model(preprocessed).predicted_depth
depth_map = predicted_depth.squeeze().cpu().numpy()
# Normalize depth map
depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
# Create point cloud
pcd = create_point_cloud(depth_map, image)
# Visualize point cloud
vis = o3d.visualization.Visualizer()
vis.create_window()
vis.add_geometry(pcd)
vis.poll_events()
vis.update_renderer()
# Capture the visualization as an image
image = vis.capture_screen_float_buffer(False)
vis.destroy_window()
# Convert the image to numpy array
point_cloud_image = (np.asarray(image) * 255).astype(np.uint8)
return point_cloud_image
interface = gr.Interface(
fn=process_frame,
inputs=gr.Image(sources="webcam", streaming=True),
outputs="image",
live=True
)
interface.launch() |