Spaces:

mac9087
/

rightnight

Sleeping

App Files Files Community

mac9087 commited on Apr 25

Commit

6a5c502

verified ·

1 Parent(s): 03bce9d

Update app.py

Browse files

Files changed (1) hide show

app.py +94 -36

app.py CHANGED Viewed

@@ -14,7 +14,9 @@ from huggingface_hub import snapshot_download
 from flask_cors import CORS
 import numpy as np
 import trimesh
-from diffusers import DiffusionPipeline
 # Force CPU usage
 os.environ["CUDA_VISIBLE_DEVICES"] = ""
@@ -49,13 +51,13 @@ app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
 processing_jobs = {}
 # Global model variables
-triposg_pipeline = None
 model_loaded = False
 model_loading = False
 # Configuration for processing
-TIMEOUT_SECONDS = 240  # 4 minutes max for TripoSG on CPU
-MAX_DIMENSION = 512  # TripoSG expects 512x512
 # TimeoutError for handling timeouts
 class TimeoutError(Exception):
@@ -94,30 +96,30 @@ def process_with_timeout(function, args, timeout):
 def allowed_file(filename):
     return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
-# Image preprocessing for TripoSG (512x512, no background removal)
 def preprocess_image(image_path):
     with Image.open(image_path) as img:
         img = img.convert("RGB")
-        # TripoSG requires 512x512
         img = img.resize((512, 512), Image.LANCZOS)
         return img
 def load_model():
-    global triposg_pipeline, model_loaded, model_loading
     if model_loaded:
-        return triposg_pipeline
     if model_loading:
         while model_loading and not model_loaded:
             time.sleep(0.5)
-        return triposg_pipeline
     try:
         model_loading = True
         print("Starting model loading...")
-        model_name = "VAST-AI/TripoSG"
         # Download model with retry mechanism
         max_retries = 3
@@ -138,18 +140,18 @@ def load_model():
                 else:
                     raise
-        # Load TripoSG pipeline
-        triposg_pipeline = DiffusionPipeline.from_pretrained(
-            model_name,
             cache_dir=CACHE_DIR,
-            torch_dtype=torch.float32,  # Use float32 for CPU
-            custom_pipeline="VAST-AI/TripoSG",
         )
-        triposg_pipeline.to("cpu")  # Explicitly move to CPU
         model_loaded = True
         print("Model loaded successfully on CPU")
-        return triposg_pipeline
     except Exception as e:
         print(f"Error loading model: {str(e)}")
@@ -158,11 +160,76 @@ def load_model():
     finally:
         model_loading = False
 @app.route('/health', methods=['GET'])
 def health_check():
     return jsonify({
         "status": "healthy",
-        "model": "TripoSG",
         "device": "cpu"
     }), 200
@@ -259,23 +326,14 @@ def convert_image_to_3d():
             try:
                 def generate_3d():
-                    # Adjust settings based on detail level
-                    num_steps = {'low': 20, 'medium': 50, 'high': 75}
-                    faces = {'low': 3000, 'medium': 5000, 'high': 8000}
-                    # Convert image to tensor
-                    img_array = np.array(image) / 255.0
-                    img_tensor = torch.from_numpy(img_array).permute(2, 0, 1).float()
-                    # Generate mesh with TripoSG
-                    with torch.no_grad():
-                        mesh = pipeline(
-                            img_tensor.unsqueeze(0),
-                            num_inference_steps=num_steps[detail_level],
-                            num_faces=faces[detail_level],
-                            guidance_scale=7.5,
-                            seed=12345
-                        ).meshes[0]
                     return mesh
                 mesh, error = process_with_timeout(generate_3d, [], TIMEOUT_SECONDS)
@@ -414,7 +472,7 @@ def model_info(job_id):
 @app.route('/', methods=['GET'])
 def index():
     return jsonify({
-        "message": "Image to 3D API (TripoSG)",
         "endpoints": [
             "/convert",
             "/progress/<job_id>",
@@ -424,9 +482,9 @@ def index():
         ],
         "parameters": {
             "output_format": "glb",
-            "detail_level": "low, medium, or high - controls inference steps and mesh faces"
         },
-        "description": "This API creates full 3D models from 2D images using TripoSG. Images should have transparent backgrounds."
     }), 200
 if __name__ == '__main__':

 from flask_cors import CORS
 import numpy as np
 import trimesh
+from transformers import pipeline
+from scipy.ndimage import gaussian_filter
+import open3d as o3d
 # Force CPU usage
 os.environ["CUDA_VISIBLE_DEVICES"] = ""
 processing_jobs = {}
 # Global model variables
+depth_pipeline = None
 model_loaded = False
 model_loading = False
 # Configuration for processing
+TIMEOUT_SECONDS = 240  # 4 minutes max for Depth-Anything on CPU
+MAX_DIMENSION = 512  # Depth-Anything expects 512x512
 # TimeoutError for handling timeouts
 class TimeoutError(Exception):
 def allowed_file(filename):
     return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+# Image preprocessing for Depth-Anything (512x512, no background removal)
 def preprocess_image(image_path):
     with Image.open(image_path) as img:
         img = img.convert("RGB")
+        # Depth-Anything requires 512x512
         img = img.resize((512, 512), Image.LANCZOS)
         return img
 def load_model():
+    global depth_pipeline, model_loaded, model_loading
     if model_loaded:
+        return depth_pipeline
     if model_loading:
         while model_loading and not model_loaded:
             time.sleep(0.5)
+        return depth_pipeline
     try:
         model_loading = True
         print("Starting model loading...")
+        model_name = "LiheYoung/depth-anything-small-hf"
         # Download model with retry mechanism
         max_retries = 3
                 else:
                     raise
+        # Load Depth-Anything pipeline
+        depth_pipeline = pipeline(
+            "depth-estimation",
+            model=model_name,
             cache_dir=CACHE_DIR,
+            device=-1,  # Force CPU
+            torch_dtype=torch.float32,
         )
         model_loaded = True
         print("Model loaded successfully on CPU")
+        return depth_pipeline
     except Exception as e:
         print(f"Error loading model: {str(e)}")
     finally:
         model_loading = False
+def depth_to_point_cloud(depth_map, image, detail_level):
+    # Parameters based on detail level
+    downsample_factors = {'low': 4, 'medium': 2, 'high': 1}
+    downsample = downsample_factors[detail_level]
+    # Convert image and depth to numpy
+    img_array = np.array(image)
+    depth_array = np.array(depth_map)
+    # Downsample for performance
+    if downsample > 1:
+        depth_array = depth_array[::downsample, ::downsample]
+        img_array = img_array[::downsample, ::downsample]
+    # Normalize depth
+    depth_array = gaussian_filter(depth_array, sigma=1)
+    depth_array = (depth_array - depth_array.min()) / (depth_array.max() - depth_array.min() + 1e-8)
+    # Create point cloud
+    h, w = depth_array.shape
+    x, y = np.meshgrid(np.arange(w), np.arange(h))
+    # Simple camera intrinsics (assumed focal length)
+    fx = fy = w * 0.5
+    cx, cy = w / 2, h / 2
+    # Convert to 3D coordinates
+    z = depth_array
+    x = (x - cx) * z / fx
+    y = (y - cy) * z / fy
+    points = np.stack([x, y, z], axis=-1).reshape(-1, 3)
+    colors = img_array.reshape(-1, 3) / 255.0
+    # Filter out invalid points (e.g., background)
+    mask = (z.reshape(-1) > 0.1) & (z.reshape(-1) < 0.9)
+    points = points[mask]
+    colors = colors[mask]
+    # Create Open3D point cloud
+    pcd = o3d.geometry.PointCloud()
+    pcd.points = o3d.utility.Vector3dVector(points)
+    pcd.colors = o3d.utility.Vector3dVector(colors)
+    # Estimate normals
+    pcd.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=30))
+    # Poisson surface reconstruction
+    mesh, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
+        pcd, depth=8 if detail_level == 'high' else 6
+    )
+    # Convert to trimesh
+    vertices = np.asarray(mesh.vertices)
+    faces = np.asarray(mesh.triangles)
+    vertex_colors = np.asarray(mesh.vertex_colors)
+    trimesh_mesh = trimesh.Trimesh(
+        vertices=vertices,
+        faces=faces,
+        vertex_colors=vertex_colors
+    )
+    return trimesh_mesh
 @app.route('/health', methods=['GET'])
 def health_check():
     return jsonify({
         "status": "healthy",
+        "model": "Depth-Anything",
         "device": "cpu"
     }), 200
             try:
                 def generate_3d():
+                    # Generate depth map
+                    with torch.no_grad():
+                        depth_output = pipeline(image)
+                    depth_map = depth_output["depth"]
+                    # Convert depth to mesh
+                    mesh = depth_to_point_cloud(depth_map, image, detail_level)
                     return mesh
                 mesh, error = process_with_timeout(generate_3d, [], TIMEOUT_SECONDS)
 @app.route('/', methods=['GET'])
 def index():
     return jsonify({
+        "message": "Image to 3D API (Depth-Anything)",
         "endpoints": [
             "/convert",
             "/progress/<job_id>",
         ],
         "parameters": {
             "output_format": "glb",
+            "detail_level": "low, medium, or high - controls point cloud density"
         },
+        "description": "This API creates 3D models from 2D images using Depth-Anything depth estimation. Images should have transparent backgrounds."
     }), 200
 if __name__ == '__main__':