Spaces:

mac9087
/

rightnight

Sleeping

App Files Files Community

mac9087 commited on Apr 25

Commit

64188d6

verified ·

1 Parent(s): ffe4279

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -156

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
 import os
 import torch
 import time
@@ -15,15 +14,12 @@ from huggingface_hub import snapshot_download
 from flask_cors import CORS
 import numpy as np
 import trimesh
-from transformers import pipeline
-from scipy.ndimage import gaussian_filter
-import open3d as o3d
 import cv2
 # Force CPU usage
 os.environ["CUDA_VISIBLE_DEVICES"] = ""
 torch.set_default_device("cpu")
-# Patch PyTorch to disable CUDA initialization
 torch.cuda.is_available = lambda: False
 torch.cuda.device_count = lambda: 0
@@ -36,12 +32,12 @@ RESULTS_FOLDER = '/tmp/results'
 CACHE_DIR = '/tmp/huggingface'
 ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg'}
-# Create necessary directories
 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 os.makedirs(RESULTS_FOLDER, exist_ok=True)
 os.makedirs(CACHE_DIR, exist_ok=True)
-# Set Hugging Face cache environment variables
 os.environ['HF_HOME'] = CACHE_DIR
 os.environ['TRANSFORMERS_CACHE'] = os.path.join(CACHE_DIR, 'transformers')
 os.environ['HF_DATASETS_CACHE'] = os.path.join(CACHE_DIR, 'datasets')
@@ -49,23 +45,21 @@ os.environ['HF_DATASETS_CACHE'] = os.path.join(CACHE_DIR, 'datasets')
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
-# Job tracking dictionary
 processing_jobs = {}
-# Global model variables
-depth_pipeline = None
 model_loaded = False
 model_loading = False
-# Configuration for processing
-TIMEOUT_SECONDS = 240  # 4 minutes max for Depth-Anything on CPU
-MAX_DIMENSION = 512  # Depth-Anything expects 512x512
-# TimeoutError for handling timeouts
 class TimeoutError(Exception):
     pass
-# Thread-safe timeout implementation
 def process_with_timeout(function, args, timeout):
     result = [None]
     error = [None]
@@ -81,7 +75,6 @@ def process_with_timeout(function, args, timeout):
     thread = threading.Thread(target=target)
     thread.daemon = True
     thread.start()
     thread.join(timeout)
     if not completed[0]:
@@ -98,76 +91,44 @@ def process_with_timeout(function, args, timeout):
 def allowed_file(filename):
     return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
-# Image preprocessing: Remove background using cv2
 def preprocess_image(image_path):
     try:
-        # Load image
         with Image.open(image_path) as img:
-            # Convert to RGB or handle transparency
             if img.mode == 'RGBA':
-                # Use alpha channel as initial mask
-                img_array = np.array(img)
-                alpha = img_array[:, :, 3]
-                img_rgb = img_array[:, :, :3]
-            else:
-                img_rgb = np.array(img.convert('RGB'))
-                alpha = None
-            # Resize to 512x512
-            img_rgb = cv2.resize(img_rgb, (512, 512), interpolation=cv2.INTER_LANCZOS4)
-            # Convert to grayscale
-            gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)
-            # Adaptive thresholding for initial mask
-            thresh = cv2.adaptiveThreshold(
-                gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2
-            )
-            # If alpha channel exists, combine with threshold
-            if alpha is not None:
-                alpha_resized = cv2.resize(alpha, (512, 512), interpolation=cv2.INTER_LANCZOS4)
-                thresh = cv2.bitwise_and(thresh, alpha_resized)
-            # Refine with GrabCut
-            mask = np.zeros((512, 512), np.uint8)
-            mask[thresh == 255] = cv2.GC_PR_FGD  # Probable foreground
-            mask[thresh == 0] = cv2.GC_PR_BGD   # Probable background
-            bgdModel = np.zeros((1, 65), np.float64)
-            fgdModel = np.zeros((1, 65), np.float64)
-            rect = (10, 10, 492, 492)  # ROI for GrabCut
-            cv2.grabCut(img_rgb, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_MASK)
-            # Create final mask (foreground = 1, background = 0)
-            mask2 = np.where((mask == cv2.GC_FGD) | (mask == cv2.GC_PR_FGD), 255, 0).astype('uint8')
-            # Apply mask to image
-            img_foreground = cv2.bitwise_and(img_rgb, img_rgb, mask=mask2)
-            return Image.fromarray(img_foreground)
     except Exception as e:
         raise Exception(f"Error preprocessing image: {str(e)}")
 def load_model():
-    global depth_pipeline, model_loaded, model_loading
     if model_loaded:
-        return depth_pipeline
     if model_loading:
         while model_loading and not model_loaded:
             time.sleep(0.5)
-        return depth_pipeline
     try:
         model_loading = True
-        print("Starting model loading...")
-        model_name = "LiheYoung/depth-anything-small-hf"
-        # Download model with retry mechanism
         max_retries = 3
         retry_delay = 5
         for attempt in range(max_retries):
@@ -180,24 +141,23 @@ def load_model():
                 break
             except Exception as e:
                 if attempt < max_retries - 1:
-                    print(f"Download attempt {attempt+1} failed: {str(e)}. Retrying in {retry_delay} seconds...")
                     time.sleep(retry_delay)
                     retry_delay *= 2
                 else:
                     raise
-        # Load Depth-Anything pipeline
-        depth_pipeline = pipeline(
-            "depth-estimation",
-            model=model_name,
             cache_dir=CACHE_DIR,
-            device=-1,  # Force CPU
             torch_dtype=torch.float32,
         )
         model_loaded = True
-        print("Model loaded successfully on CPU")
-        return depth_pipeline
     except Exception as e:
         print(f"Error loading model: {str(e)}")
@@ -206,79 +166,42 @@ def load_model():
     finally:
         model_loading = False
-def depth_to_point_cloud(depth_map, image, detail_level):
-    # Parameters based on detail level
-    downsample_factors = {'low': 4, 'medium': 2, 'high': 1}
-    downsample = downsample_factors[detail_level]
-    # Convert image and depth to numpy
-    img_array = np.array(image)
-    depth_array = np.array(depth_map)
-    # Downsample for performance
-    if downsample > 1:
-        depth_array = depth_array[::downsample, ::downsample]
-        img_array = img_array[::downsample, ::downsample]
-    # Normalize depth
-    depth_array = gaussian_filter(depth_array, sigma=1)
-    depth_array = (depth_array - depth_array.min()) / (depth_array.max() - depth_array.min() + 1e-8)
-    # Create point cloud
-    h, w = depth_array.shape
-    x, y = np.meshgrid(np.arange(w), np.arange(h))
-    # Camera intrinsics (assumed focal length)
-    fx = fy = w * 0.5
-    cx, cy = w / 2, h / 2
-    # Convert to 3D coordinates (Z-up for Unity)
-    z = depth_array
-    x = (x - cx) * z / fx
-    y = -(y - cy) * z / fy  # Flip y-axis to correct orientation
-    points = np.stack([x, y, z], axis=-1).reshape(-1, 3)
-    colors = img_array.reshape(-1, 3) / 255.0
-    # Filter out invalid points (tighter range for foreground)
-    mask = (z.reshape(-1) > 0.2) & (z.reshape(-1) < 0.8)
-    points = points[mask]
-    colors = colors[mask]
-    # Create Open3D point cloud
-    pcd = o3d.geometry.PointCloud()
-    pcd.points = o3d.utility.Vector3dVector(points)
-    pcd.colors = o3d.utility.Vector3dVector(colors)
-    # Estimate normals
-    pcd.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=30))
-    # Poisson surface reconstruction
-    mesh, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
-        pcd, depth=8 if detail_level == 'high' else 6
-    )
-    # Convert to trimesh
-    vertices = np.asarray(mesh.vertices)
-    faces = np.asarray(mesh.triangles)
-    vertex_colors = np.asarray(mesh.vertex_colors)
-    trimesh_mesh = trimesh.Trimesh(
-        vertices=vertices,
-        faces=faces,
-        vertex_colors=vertex_colors
-    )
-    # Rotate mesh to correct orientation (180 degrees around X-axis)
-    trimesh_mesh.apply_transform(trimesh.transformations.rotation_matrix(np.pi, [1, 0, 0]))
-    return trimesh_mesh
 @app.route('/health', methods=['GET'])
 def health_check():
     return jsonify({
         "status": "healthy",
-        "model": "Depth-Anything",
         "device": "cpu"
     }), 200
@@ -375,15 +298,7 @@ def convert_image_to_3d():
             try:
                 def generate_3d():
-                    # Generate depth map
-                    with torch.no_grad():
-                        depth_output = pipeline(image)
-                    depth_map = depth_output["depth"]
-                    # Convert depth to mesh
-                    mesh = depth_to_point_cloud(depth_map, image, detail_level)
-                    return mesh
                 mesh, error = process_with_timeout(generate_3d, [], TIMEOUT_SECONDS)
@@ -397,7 +312,7 @@ def convert_image_to_3d():
                 processing_jobs[job_id]['progress'] = 80
-                # Export as GLB or OBJ
                 file_path = os.path.join(output_dir, f"model.{output_format}")
                 mesh.export(file_path, file_type=output_format)
@@ -406,7 +321,7 @@ def convert_image_to_3d():
                 processing_jobs[job_id]['status'] = 'completed'
                 processing_jobs[job_id]['progress'] = 100
-                print(f"Job {job_id} completed successfully")
             except Exception as e:
                 error_details = traceback.format_exc()
@@ -527,7 +442,7 @@ def model_info(job_id):
 @app.route('/', methods=['GET'])
 def index():
     return jsonify({
-        "message": "Image to 3D API (Depth-Anything)",
         "endpoints": [
             "/convert",
             "/progress/<job_id>",
@@ -537,12 +452,12 @@ def index():
         ],
         "parameters": {
             "output_format": "glb or obj",
-            "detail_level": "low, medium, or high - controls point cloud density"
         },
-        "description": "This API creates 3D models from 2D images using Depth-Anything depth estimation. Images should have transparent backgrounds for best results."
     }), 200
 if __name__ == '__main__':
     cleanup_old_jobs()
     port = int(os.environ.get('PORT', 7860))
-    app.run(host='0.0.0.0', port=port)

 import os
 import torch
 import time
 from flask_cors import CORS
 import numpy as np
 import trimesh
+from diffusers import DiffusionPipeline
 import cv2
 # Force CPU usage
 os.environ["CUDA_VISIBLE_DEVICES"] = ""
 torch.set_default_device("cpu")
 torch.cuda.is_available = lambda: False
 torch.cuda.device_count = lambda: 0
 CACHE_DIR = '/tmp/huggingface'
 ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg'}
+# Create directories
 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 os.makedirs(RESULTS_FOLDER, exist_ok=True)
 os.makedirs(CACHE_DIR, exist_ok=True)
+# Set Hugging Face cache
 os.environ['HF_HOME'] = CACHE_DIR
 os.environ['TRANSFORMERS_CACHE'] = os.path.join(CACHE_DIR, 'transformers')
 os.environ['HF_DATASETS_CACHE'] = os.path.join(CACHE_DIR, 'datasets')
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
+# Job tracking
 processing_jobs = {}
+# Global model
+tripo_pipeline = None
 model_loaded = False
 model_loading = False
+# Configuration
+TIMEOUT_SECONDS = 300  # 5 minutes for TripoSG
+MAX_DIMENSION = 256  # TripoSG works with smaller images
 class TimeoutError(Exception):
     pass
 def process_with_timeout(function, args, timeout):
     result = [None]
     error = [None]
     thread = threading.Thread(target=target)
     thread.daemon = True
     thread.start()
     thread.join(timeout)
     if not completed[0]:
 def allowed_file(filename):
     return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+# Image preprocessing
 def preprocess_image(image_path):
     try:
         with Image.open(image_path) as img:
+            # Convert to RGB
             if img.mode == 'RGBA':
+                img = img.convert('RGB')
+            # Resize to 256x256
+            img = img.resize((256, 256), Image.LANCZOS)
+            # Basic cv2 cleanup
+            img_array = np.array(img)
+            gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
+            _, mask = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
+            img_array = cv2.bitwise_and(img_array, img_array, mask=mask)
+            return Image.fromarray(img_array)
     except Exception as e:
         raise Exception(f"Error preprocessing image: {str(e)}")
 def load_model():
+    global tripo_pipeline, model_loaded, model_loading
     if model_loaded:
+        return tripo_pipeline
     if model_loading:
         while model_loading and not model_loaded:
             time.sleep(0.5)
+        return tripo_pipeline
     try:
         model_loading = True
+        print("Loading TripoSG model...")
+        model_name = "tripo3d/tripo-sg-3d"
+        # Download model
         max_retries = 3
         retry_delay = 5
         for attempt in range(max_retries):
                 break
             except Exception as e:
                 if attempt < max_retries - 1:
+                    print(f"Download attempt {attempt+1} failed: {str(e)}. Retrying...")
                     time.sleep(retry_delay)
                     retry_delay *= 2
                 else:
                     raise
+        # Load TripoSG pipeline
+        tripo_pipeline = DiffusionPipeline.from_pretrained(
+            model_name,
             cache_dir=CACHE_DIR,
             torch_dtype=torch.float32,
         )
+        tripo_pipeline.to("cpu")
         model_loaded = True
+        print("TripoSG loaded successfully on CPU")
+        return tripo_pipeline
     except Exception as e:
         print(f"Error loading model: {str(e)}")
     finally:
         model_loading = False
+def generate_3d_model(image, detail_level):
+    try:
+        # Parameters
+        num_steps = {'low': 20, 'medium': 30, 'high': 40}
+        steps = num_steps[detail_level]
+        # Generate 3D model
+        with torch.no_grad():
+            result = tripo_pipeline(image, num_inference_steps=steps)
+        # Extract mesh
+        mesh = result.meshes[0]
+        # Convert to trimesh
+        vertices = np.array(mesh.vertices)
+        faces = np.array(mesh.faces)
+        vertex_colors = np.array(mesh.vertex_colors) if mesh.vertex_colors is not None else None
+        trimesh_mesh = trimesh.Trimesh(
+            vertices=vertices,
+            faces=faces,
+            vertex_colors=vertex_colors
+        )
+        # Rotate for Unity Z-up
+        trimesh_mesh.apply_transform(trimesh.transformations.rotation_matrix(np.pi, [1, 0, 0]))
+        return trimesh_mesh
+    except Exception as e:
+        raise Exception(f"Error generating 3D model: {str(e)}")
 @app.route('/health', methods=['GET'])
 def health_check():
     return jsonify({
         "status": "healthy",
+        "model": "TripoSG",
         "device": "cpu"
     }), 200
             try:
                 def generate_3d():
+                    return generate_3d_model(image, detail_level)
                 mesh, error = process_with_timeout(generate_3d, [], TIMEOUT_SECONDS)
                 processing_jobs[job_id]['progress'] = 80
+                # Export
                 file_path = os.path.join(output_dir, f"model.{output_format}")
                 mesh.export(file_path, file_type=output_format)
                 processing_jobs[job_id]['status'] = 'completed'
                 processing_jobs[job_id]['progress'] = 100
+                print(f"Job {job_id} completed")
             except Exception as e:
                 error_details = traceback.format_exc()
 @app.route('/', methods=['GET'])
 def index():
     return jsonify({
+        "message": "Image to 3D API (TripoSG)",
         "endpoints": [
             "/convert",
             "/progress/<job_id>",
         ],
         "parameters": {
             "output_format": "glb or obj",
+            "detail_level": "low, medium, or high - controls inference steps"
         },
+        "description": "Creates 3D models from 2D images using TripoSG. Use transparent PNGs for best results."
     }), 200
 if __name__ == '__main__':
     cleanup_old_jobs()
     port = int(os.environ.get('PORT', 7860))
+    app.run(host='0.0.0.0', port=port)