Spaces:
Runtime error
Runtime error
File size: 4,157 Bytes
3e9df0c 6ec98d8 3e9df0c c86bde4 6ec98d8 c55986d 6ec98d8 c86bde4 6ec98d8 c86bde4 c55986d 6ec98d8 b9c32e6 6ec98d8 c86bde4 6ec98d8 c86bde4 b9c32e6 6ec98d8 b9c32e6 c1dbba4 6ec98d8 c86bde4 6ec98d8 c86bde4 b9c32e6 6ec98d8 b9c32e6 c86bde4 b9c32e6 6ec98d8 b9c32e6 c86bde4 6ec98d8 c86bde4 6ec98d8 b9c32e6 c86bde4 6ec98d8 b9c32e6 c1dbba4 f52a14b 5847e9e c62d4d9 b9c32e6 c86bde4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import gradio as gr
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
import torch
import numpy as np
from PIL import Image
import open3d as o3d
from pathlib import Path
# Load model and feature extractor
feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
def process_image(image_path):
image_path = Path(image_path) if isinstance(image_path, str) else image_path
try:
image_raw = Image.open(image_path).convert("RGB")
except Exception as e:
return f"Error loading image: {e}"
# Resize while maintaining aspect ratio
image = image_raw.resize(
(800, int(800 * image_raw.size[1] / image_raw.size[0])),
Image.Resampling.LANCZOS
)
encoding = feature_extractor(image, return_tensors="pt")
with torch.no_grad():
outputs = model(**encoding)
predicted_depth = outputs.predicted_depth
# Normalize depth image
prediction = torch.nn.functional.interpolate(
predicted_depth.unsqueeze(1),
size=image.size[::-1],
mode="bicubic",
align_corners=False,
).squeeze()
output = prediction.cpu().numpy()
if np.max(output) > 0:
depth_image = (output * 255 / np.max(output)).astype('uint8')
else:
depth_image = np.zeros_like(output, dtype='uint8') # Handle empty output
try:
gltf_path = create_3d_obj(np.array(image), depth_image, image_path)
except Exception:
gltf_path = create_3d_obj(np.array(image), depth_image, image_path, depth=8)
return [Image.fromarray(depth_image), gltf_path]
def create_3d_obj(rgb_image, depth_image, image_path, depth=10):
depth_o3d = o3d.geometry.Image(depth_image)
image_o3d = o3d.geometry.Image(rgb_image)
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
image_o3d, depth_o3d, convert_rgb_to_intensity=False)
w, h = depth_image.shape[1], depth_image.shape[0]
camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
camera_intrinsic.set_intrinsics(w, h, 500, 500, w / 2, h / 2)
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic)
pcd.estimate_normals(
search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30))
pcd.orient_normals_towards_camera_location(camera_location=np.array([0., 0., 1000.]))
with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Debug):
mesh_raw, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
pcd, depth=depth, width=0, scale=1.1, linear_fit=True)
voxel_size = max(mesh_raw.get_max_bound() - mesh_raw.get_min_bound()) / 256
mesh = mesh_raw.simplify_vertex_clustering(voxel_size=voxel_size)
bbox = pcd.get_axis_aligned_bounding_box()
mesh_crop = mesh.crop(bbox)
gltf_path = f'./{image_path.stem}.gltf'
o3d.io.write_triangle_mesh(gltf_path, mesh_crop, write_triangle_uvs=True)
return gltf_path
title = "Zero-shot Depth Estimation with DPT + 3D Point Cloud"
description = "DPT model predicts depth from an image, followed by 3D Point Cloud reconstruction."
with gr.Blocks() as iface:
gr.Markdown("# Zero-shot Depth Estimation with DPT + 3D Point Cloud")
with gr.Row():
image_input = gr.Image(type="filepath", label="Input Image")
depth_output = gr.Image(label="Predicted Depth", type="pil")
gltf_output = gr.File(label="Download 3D gLTF")
# Embed an iframe for previewing the .gltf
with gr.Row():
gr.HTML('<iframe id="gltf-viewer" width="100%" height="400px"></iframe>')
def update_gltf_viewer(image_path):
gltf_path = process_image(image_path)[1]
iframe_html = f'''
<script>
document.getElementById('gltf-viewer').src = 'https://gltf-viewer.donmccurdy.com/?url=file://{gltf_path}';
</script>
'''
return process_image(image_path)[0], gltf_path, iframe_html
image_input.change(update_gltf_viewer, inputs=[image_input], outputs=[depth_output, gltf_output, gr.HTML()])
iface.launch()
|