Spaces:

mac9087
/

rightnight

Sleeping

App Files Files Community

mac9087 commited on Apr 26

Commit

d706f08

verified ·

1 Parent(s): da0c0da

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -410

app.py CHANGED Viewed

@@ -37,7 +37,7 @@ os.environ['HF_HOME'] = CACHE_DIR
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
-# Job track
 processing_jobs = {}
 # Model variables
@@ -47,7 +47,7 @@ depth_anything_processor = None
 model_loaded = False
 model_loading = False
-TIMEOUT_SECONDS = 300  # Increased timeout for better processing
 MAX_DIMENSION = 518
 class TimeoutError(Exception):
@@ -84,77 +84,50 @@ def process_with_timeout(function, args, timeout):
 def allowed_file(filename):
     return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
-def remove_background(img):
-    """
-    Remove background from image using OpenCV
-    """
-    # Store original mode
-    original_mode = img.mode
-    # Convert to RGBA if not already
-    if img.mode != 'RGBA':
-        img = img.convert('RGBA')
-    # Convert to numpy array
-    img_array = np.array(img)
-    # Create a mask with alpha channel
-    if img_array.shape[2] == 4:
-        # If image already has alpha channel, use it
-        alpha = img_array[:, :, 3]
-        if np.all(alpha == 255):  # If alpha is all 255, it's not transparent
-            alpha = None
-    else:
-        alpha = None
-    # If no alpha channel or all opaque, we need to create a mask
-    if alpha is None:
-        # Convert to RGB for processing
-        img_rgb = cv2.cvtColor(img_array[:, :, :3], cv2.COLOR_RGB2BGR)
-        # Create a blank mask
-        mask = np.zeros(img_rgb.shape[:2], np.uint8)
-        # Approximate background with GrabCut algorithm
-        bgd_model = np.zeros((1, 65), np.float64)
-        fgd_model = np.zeros((1, 65), np.float64)
-        # Define rectangle for initial segmentation (use most of the image)
-        h, w = img_rgb.shape[:2]
-        margin = min(h, w) // 10
-        rect = (margin, margin, w - 2*margin, h - 2*margin)
-        try:
-            # Apply GrabCut
-            cv2.grabCut(img_rgb, mask, rect, bgd_model, fgd_model, 5, cv2.GC_INIT_WITH_RECT)
-            # Create binary mask
-            mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
-            # Refine the mask with morphological operations
-            kernel = np.ones((5, 5), np.uint8)
-            mask2 = cv2.morphologyEx(mask2, cv2.MORPH_CLOSE, kernel)
-            mask2 = cv2.morphologyEx(mask2, cv2.MORPH_OPEN, kernel)
-            # Create alpha channel
-            alpha = mask2 * 255
-        except Exception as e:
-            print(f"GrabCut failed: {str(e)}. Using simple thresholding instead.")
-            # Fallback to simpler method if GrabCut fails
-            gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
-            _, alpha = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)
-    # Create RGBA image
-    result = np.zeros((img_array.shape[0], img_array.shape[1], 4), dtype=np.uint8)
-    result[:, :, :3] = img_array[:, :, :3]
-    result[:, :, 3] = alpha
-    # Convert back to PIL Image
-    return Image.fromarray(result)
-def preprocess_image(image_path, remove_bg=True):
     with Image.open(image_path) as img:
         img = img.convert("RGB")
         if img.width > MAX_DIMENSION or img.height > MAX_DIMENSION:
@@ -166,42 +139,21 @@ def preprocess_image(image_path, remove_bg=True):
                 new_width = int(img.width * (MAX_DIMENSION / img.height))
             img = img.resize((new_width, new_height), Image.LANCZOS)
-        # Enhanced contrast and brightness adjustment
         img_array = np.array(img)
         if len(img_array.shape) == 3 and img_array.shape[2] == 3:
-            # Enhance contrast with CLAHE
             lab = cv2.cvtColor(img_array, cv2.COLOR_RGB2LAB)
             l, a, b = cv2.split(lab)
-            clahe = cv2.createCLAHE(clipLimit=2.5, tileGridSize=(8, 8))
             cl = clahe.apply(l)
             enhanced_lab = cv2.merge((cl, a, b))
             img_array = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2RGB)
-            # Additional brightness adjustment
-            alpha = 1.1  # Contrast control (1.0-3.0)
-            beta = 5    # Brightness control (0-100)
-            img_array = cv2.convertScaleAbs(img_array, alpha=alpha, beta=beta)
             img = Image.fromarray(img_array)
-        # Background removal if requested
-        if remove_bg:
-            img = remove_background(img)
-            # Save original image with alpha for texture
-            processed_img = img
-            # For model processing, we need to save an RGB copy
-            model_img = Image.new('RGB', img.size, (255, 255, 255))
-            model_img.paste(img, mask=img.split()[3])  # Use alpha as mask
-            # Return both processed image (with alpha) and model image (RGB)
-            return processed_img
         return img
 def load_models():
     global dpt_estimator, depth_anything_model, depth_anything_processor, model_loaded, model_loading
@@ -217,13 +169,11 @@ def load_models():
         model_loading = True
         print("Loading models...")
-        # Authenticate with Hugging Face
         hf_token = os.environ.get('HF_TOKEN')
         if hf_token:
             login(token=hf_token)
             print("Authenticated with Hugging Face token")
-        # DPT-Large
         dpt_model_name = "Intel/dpt-large"
         max_retries = 3
         retry_delay = 5
@@ -254,7 +204,6 @@ def load_models():
         print("DPT-Large loaded")
         gc.collect()
-        # Depth Anything
         da_model_name = "depth-anything/Depth-Anything-V2-Small-hf"
         for attempt in range(max_retries):
             try:
@@ -300,9 +249,6 @@ def load_models():
         model_loading = False
 def fuse_depth_maps(dpt_depth, da_depth, detail_level='medium'):
-    """
-    Improved depth map fusion with better edge preservation and depth control
-    """
     if isinstance(dpt_depth, Image.Image):
         dpt_depth = np.array(dpt_depth)
     if isinstance(da_depth, torch.Tensor):
@@ -315,170 +261,78 @@ def fuse_depth_maps(dpt_depth, da_depth, detail_level='medium'):
     if dpt_depth.shape != da_depth.shape:
         da_depth = cv2.resize(da_depth, (dpt_depth.shape[1], dpt_depth.shape[0]), interpolation=cv2.INTER_CUBIC)
-    # Better normalization with more robust percentiles
-    p_low_dpt, p_high_dpt = np.percentile(dpt_depth, [2, 98])
-    p_low_da, p_high_da = np.percentile(da_depth, [2, 98])
     dpt_depth = np.clip((dpt_depth - p_low_dpt) / (p_high_dpt - p_low_dpt), 0, 1) if p_high_dpt > p_low_dpt else dpt_depth
     da_depth = np.clip((da_depth - p_low_da) / (p_high_da - p_low_da), 0, 1) if p_high_da > p_low_da else da_depth
-    # Apply bilateral filter for edge-preserving smoothing
-    dpt_depth_smooth = cv2.bilateralFilter((dpt_depth * 255).astype(np.uint8), 9, 75, 75) / 255.0
-    da_depth_smooth = cv2.bilateralFilter((da_depth * 255).astype(np.uint8), 9, 75, 75) / 255.0
-    # Detect edges more precisely using Canny with auto thresholds
-    edges_dpt = cv2.Canny(
-        (dpt_depth * 255).astype(np.uint8),
-        int(np.mean(dpt_depth * 255) * 0.66),
-        int(np.mean(dpt_depth * 255) * 1.33)
-    )
-    edges_da = cv2.Canny(
-        (da_depth * 255).astype(np.uint8),
-        int(np.mean(da_depth * 255) * 0.66),
-        int(np.mean(da_depth * 255) * 1.33)
-    )
-    # Combine edge maps
-    combined_edges = np.maximum(edges_dpt, edges_da)
-    edge_mask = gaussian_filter(combined_edges / 255.0, sigma=1.0)
     if detail_level == 'high':
-        # For high detail, we use more of the DA model at edges and more DPT for flat areas
         weight_da = 0.6
         dpt_weight = gaussian_filter(1 - edge_mask, sigma=1.0)
         da_weight = gaussian_filter(edge_mask, sigma=1.0)
-        # Adaptive depth scaling - reduce extreme depth values
-        depth_scale = np.ones_like(dpt_depth)
-        depth_scale = np.where(da_depth > 0.8, 0.8, depth_scale)  # Limit maximum depth
-        fused_depth = (dpt_weight * dpt_depth_smooth +
-                       da_weight * da_depth_smooth * weight_da * depth_scale +
-                       (1 - weight_da) * dpt_depth_smooth * (1 - da_weight))
-    elif detail_level == 'medium':
-        # For medium detail, balanced approach
-        weight_da = 0.45
-        # More aggressive depth limitation
-        depth_scale = np.ones_like(dpt_depth)
-        depth_scale = np.where(da_depth > 0.75, 0.75 / da_depth, depth_scale)  # Limit maximum depth
-        fused_depth = ((1 - weight_da) * dpt_depth_smooth +
-                       weight_da * da_depth_smooth * depth_scale)
     else:
-        # For low detail, simpler approach with more smoothing
-        weight_da = 0.3
-        fused_depth = (1 - weight_da) * gaussian_filter(dpt_depth_smooth, sigma=0.5) + weight_da * gaussian_filter(da_depth_smooth, sigma=0.5)
-    # Final cleanup
     fused_depth = np.clip(fused_depth, 0, 1)
-    # Apply depth compression to avoid extreme depth values
-    fused_depth = np.power(fused_depth, 0.85)  # Compress depth range
     return fused_depth
 def enhance_depth_map(depth_map, detail_level='medium'):
-    """
-    Enhanced depth map processing with better depth control
-    """
     enhanced_depth = depth_map.copy().astype(np.float32)
-    # More robust percentile clipping
-    p_low, p_high = np.percentile(enhanced_depth, [2, 98])
     enhanced_depth = np.clip(enhanced_depth, p_low, p_high)
     enhanced_depth = (enhanced_depth - p_low) / (p_high - p_low) if p_high > p_low else enhanced_depth
     if detail_level == 'high':
-        # Apply bilateral filter for edge-preserving smoothing
-        enhanced_depth_smooth = cv2.bilateralFilter(
-            (enhanced_depth * 255).astype(np.uint8), 7, 50, 50
-        ).astype(np.float32) / 255.0
-        # Enhance edges
-        edges = cv2.Canny((enhanced_depth * 255).astype(np.uint8), 50, 150)
-        edge_mask = gaussian_filter(edges / 255.0, sigma=1.0)
-        # Detail enhancement through unsharp masking
-        blurred = gaussian_filter(enhanced_depth, sigma=1.5)
-        detail_mask = enhanced_depth - blurred
-        enhanced_depth = enhanced_depth_smooth + 1.2 * detail_mask * (1 - edge_mask)
-        # Compression for better depth control
-        enhanced_depth = np.power(enhanced_depth, 0.85)
-    elif detail_level == 'medium':
-        # Medium detail processing
-        enhanced_depth_smooth = cv2.bilateralFilter(
-            (enhanced_depth * 255).astype(np.uint8), 5, 40, 40
-        ).astype(np.float32) / 255.0
         blurred = gaussian_filter(enhanced_depth, sigma=1.0)
-        detail_mask = enhanced_depth - blurred
-        enhanced_depth = enhanced_depth_smooth + 0.7 * detail_mask
-        enhanced_depth = np.power(enhanced_depth, 0.9)  # Milder compression
     else:
-        # Low detail - more smoothing
-        enhanced_depth = gaussian_filter(enhanced_depth, sigma=0.8)
-        enhanced_depth = np.power(enhanced_depth, 0.95)  # Light compression
-    # Final normalization
     enhanced_depth = np.clip(enhanced_depth, 0, 1)
     return enhanced_depth
 def depth_to_mesh(depth_map, image, resolution=100, detail_level='medium'):
-    """
-    Improved mesh creation with better vertex distribution and depth control
-    """
-    # Apply enhanced depth processing
     enhanced_depth = enhance_depth_map(depth_map, detail_level)
-    # Get dimensions
     h, w = enhanced_depth.shape
-    # Create grid coordinates
     x = np.linspace(0, w-1, resolution)
     y = np.linspace(0, h-1, resolution)
     x_grid, y_grid = np.meshgrid(x, y)
-    # Use bicubic interpolation for smoother depth
     interp_func = interpolate.RectBivariateSpline(
         np.arange(h), np.arange(w), enhanced_depth, kx=3, ky=3
     )
     z_values = interp_func(y, x, grid=True)
-    # Enhanced edge preservation for high detail
     if detail_level == 'high':
         dx = np.gradient(z_values, axis=1)
         dy = np.gradient(z_values, axis=0)
         gradient_magnitude = np.sqrt(dx**2 + dy**2)
-        # Limit excessive depth at edges
-        max_gradient = np.percentile(gradient_magnitude, 95)
-        edge_factor = np.clip(gradient_magnitude / max_gradient, 0, 1)
-        edge_depth_limit = np.clip(0.2 - edge_factor * 0.1, 0, 0.2)
-        # Apply depth limiting at high-gradient areas
-        z_values = z_values - edge_factor * edge_depth_limit
-    # Better normalization for z values
-    z_min, z_max = np.percentile(z_values, [2, 98])
-    z_values = (z_values - z_min) / (z_max - z_min) if z_max > z_min else z_values
-    # Adaptive depth scaling based on detail level
-    if detail_level == 'high':
-        z_scaling = 1.8  # Reduced from 2.5
-    elif detail_level == 'medium':
-        z_scaling = 1.5  # Reduced from 2.0
-    else:
-        z_scaling = 1.2  # Reduced from 1.5
-    # Apply depth compression to avoid extreme values
-    z_values = np.power(z_values, 0.85) * z_scaling
-    # Create 3D coordinates
-    x_grid = (x_grid / w - 0.5) * 2.0
-    y_grid = (y_grid / h - 0.5) * 2.0
     vertices = np.vstack([x_grid.flatten(), -y_grid.flatten(), -z_values.flatten()]).T
-    # Create faces with improved topology
     faces = []
     for i in range(resolution-1):
         for j in range(resolution-1):
@@ -486,18 +340,12 @@ def depth_to_mesh(depth_map, image, resolution=100, detail_level='medium'):
             p2 = i * resolution + (j + 1)
             p3 = (i + 1) * resolution + j
             p4 = (i + 1) * resolution + (j + 1)
-            # Check face orientation for better topology
             v1 = vertices[p1]
             v2 = vertices[p2]
             v3 = vertices[p3]
             v4 = vertices[p4]
-            # Calculate normals
             norm1 = np.cross(v2-v1, v4-v1)
             norm2 = np.cross(v4-v3, v1-v3)
-            # Check if faces should be flipped
             if np.dot(norm1, norm2) >= 0:
                 faces.append([p1, p2, p4])
                 faces.append([p1, p4, p3])
@@ -505,40 +353,12 @@ def depth_to_mesh(depth_map, image, resolution=100, detail_level='medium'):
                 faces.append([p1, p2, p3])
                 faces.append([p2, p4, p3])
-    # Check if we have valid faces before creating the mesh
-    if len(faces) == 0:
-        # Create a simple square mesh as fallback
-        faces = [[0, 1, 2], [1, 3, 2]]
     faces = np.array(faces)
-    # Ensure we have at least one vertex and face
-    if len(vertices) == 0 or len(faces) == 0:
-        # Create a minimal mesh to avoid errors
-        vertices = np.array([[0, 0, 0], [1, 0, 0], [0, 1, 0], [1, 1, 0]])
-        faces = np.array([[0, 1, 2], [1, 3, 2]])
-    # Check for out-of-bounds indexes
-    max_vertex_idx = len(vertices) - 1
-    valid_faces = []
-    for face in faces:
-        if np.all(face <= max_vertex_idx):
-            valid_faces.append(face)
-    if len(valid_faces) == 0:
-        # Create a minimal mesh to avoid errors
-        vertices = np.array([[0, 0, 0], [1, 0, 0], [0, 1, 0], [1, 1, 0]])
-        faces = np.array([[0, 1, 2], [1, 3, 2]])
-    else:
-        faces = np.array(valid_faces)
     mesh = trimesh.Trimesh(vertices=vertices, faces=faces)
-    # Apply vertex colors from image
     if image:
         img_array = np.array(image)
         vertex_colors = np.zeros((vertices.shape[0], 4), dtype=np.uint8)
         for i in range(resolution):
             for j in range(resolution):
                 img_x = j * (img_array.shape[1] - 1) / (resolution - 1)
@@ -547,79 +367,30 @@ def depth_to_mesh(depth_map, image, resolution=100, detail_level='medium'):
                 x1, y1 = min(x0 + 1, img_array.shape[1] - 1), min(y0 + 1, img_array.shape[0] - 1)
                 wx = img_x - x0
                 wy = img_y - y0
                 vertex_idx = i * resolution + j
-                # Skip if vertex index is out of range
-                if vertex_idx >= len(vertices):
-                    continue
-                # Handle RGBA images
                 if len(img_array.shape) == 3 and img_array.shape[2] == 4:
-                    # Direct copy of RGBA values with bilinear interpolation
                     for c in range(4):
-                        vertex_colors[vertex_idx, c] = int(
-                            (1-wx)*(1-wy)*img_array[y0, x0, c] +
-                            wx*(1-wy)*img_array[y0, x1, c] +
-                            (1-wx)*wy*img_array[y1, x0, c] +
-                            wx*wy*img_array[y1, x1, c]
-                        )
-                # Handle RGB images - add full opacity
-                elif len(img_array.shape) == 3 and img_array.shape[2] == 3:
-                    for c in range(3):
-                        vertex_colors[vertex_idx, c] = int(
-                            (1-wx)*(1-wy)*img_array[y0, x0, c] +
-                            wx*(1-wy)*img_array[y0, x1, c] +
-                            (1-wx)*wy*img_array[y1, x0, c] +
-                            wx*wy*img_array[y1, x1, c]
-                        )
-                    vertex_colors[vertex_idx, 3] = 255
-                # Handle grayscale images - convert to RGB with full opacity
                 else:
-                    gray = int(
-                        (1-wx)*(1-wy)*img_array[y0, x0] +
-                        wx*(1-wy)*img_array[y0, x1] +
-                        (1-wx)*wy*img_array[y1, x0] +
-                        wx*wy*img_array[y1, x1]
-                    )
-                    vertex_colors[vertex_idx, :3] = [gray, gray, gray]
                     vertex_colors[vertex_idx, 3] = 255
         mesh.visual.vertex_colors = vertex_colors
-    try:
-        # Apply smoothing for non-high detail levels
-        if detail_level != 'high':
-            mesh = mesh.smoothed(method='laplacian', iterations=1)
-        # Try to fix normals but catch any errors
-        try:
-            mesh.fix_normals()
-        except Exception as e:
-            print(f"Warning: Could not fix normals: {str(e)}")
-            # Compute face normals manually if fix_normals fails
-            mesh.face_normals = trimesh.geometry.triangles_normals(
-                mesh.triangles
-            )
-    except Exception as e:
-        print(f"Warning: Error in mesh post-processing: {str(e)}")
     return mesh
 @app.route('/health', methods=['GET'])
 def health_check():
     return jsonify({
         "status": "healthy",
         "model": "DPT-Large + Depth Anything",
-        "device": "cpu",
-        "version": "1.1.0"  # Added version indicator
     }), 200
 @app.route('/progress/<job_id>', methods=['GET'])
@@ -671,14 +442,12 @@ def convert_image_to_3d():
         output_format = request.form.get('output_format', 'glb').lower()
         detail_level = request.form.get('detail_level', 'medium').lower()
         texture_quality = request.form.get('texture_quality', 'medium').lower()
-        remove_bg = request.form.get('remove_background', 'true').lower() == 'true'
     except ValueError:
         return jsonify({"error": "Invalid parameter values"}), 400
     if output_format not in ['obj', 'glb']:
         return jsonify({"error": "Unsupported output format. Use 'obj' or 'glb'"}), 400
-    # Adjust resolution based on detail level
     if detail_level == 'high':
         mesh_resolution = min(int(mesh_resolution * 1.5), 150)
     elif detail_level == 'low':
@@ -708,13 +477,9 @@ def convert_image_to_3d():
         try:
             processing_jobs[job_id]['progress'] = 5
-            image = preprocess_image(filepath, remove_bg=remove_bg)
             processing_jobs[job_id]['progress'] = 10
-            # Save the processed image for debugging if needed
-            debug_img_path = os.path.join(output_dir, "processed_input.png")
-            image.save(debug_img_path, format="PNG")
             try:
                 dpt_model, da_model, da_processor = load_models()
                 processing_jobs[job_id]['progress'] = 30
@@ -726,24 +491,11 @@ def convert_image_to_3d():
             try:
                 def estimate_depth():
                     with torch.no_grad():
-                        # Make sure image is in RGB format for the models
-                        rgb_image = image
-                        if rgb_image.mode == 'RGBA':
-                            # Convert RGBA to RGB for model processing
-                             rgb_image = Image.new('RGB', image.size, (255, 255, 255))
-                             rgb_image.paste(image, mask=image.split()[3])  # Use alpha channel as mask
-                        # DPT-Large
-                        dpt_result = dpt_model(rgb_image)
                         dpt_depth = dpt_result["depth"]
-                        processing_jobs[job_id]['progress'] = 40
-                        # Depth Anything (if loaded)
                         if da_model and da_processor:
-                            inputs = da_processor(images=rgb_image, return_tensors="pt")  # Use RGB image here
                             inputs = {k: v.to("cpu") for k, v in inputs.items()}
                             outputs = da_model(**inputs)
                             da_depth = outputs.predicted_depth.squeeze()
@@ -753,27 +505,15 @@ def convert_image_to_3d():
                                 mode='bicubic',
                                 align_corners=False
                             ).squeeze()
-                            processing_jobs[job_id]['progress'] = 50
-                             # Improved fusion of depth maps
                             fused_depth = fuse_depth_maps(dpt_depth, da_depth, detail_level)
                         else:
-                            # Just use DPT with enhanced processing if Depth Anything is not available
                             fused_depth = np.array(dpt_depth) if isinstance(dpt_depth, Image.Image) else dpt_depth
                             if len(fused_depth.shape) > 2:
                                 fused_depth = np.mean(fused_depth, axis=2)
-                            # Apply more conservative normalization
-                            p_low, p_high = np.percentile(fused_depth, [2, 98])
                             fused_depth = np.clip((fused_depth - p_low) / (p_high - p_low), 0, 1) if p_high > p_low else fused_depth
-                            # Apply compression to limit extreme depths
-                            fused_depth = np.power(fused_depth, 0.85)
-                        # Save depth map for debugging
-                        depth_debug_path = os.path.join(output_dir, "depth_map.png")
-                        cv2.imwrite(depth_debug_path, (fused_depth * 255).astype(np.uint8))
                         return fused_depth
                 fused_depth, error = process_with_timeout(estimate_depth, [], TIMEOUT_SECONDS)
@@ -822,7 +562,6 @@ def convert_image_to_3d():
                 processing_jobs[job_id]['status'] = 'completed'
                 processing_jobs[job_id]['progress'] = 100
-                processing_jobs[job_id]['completed_at'] = time.time()
                 print(f"Job {job_id} completed")
             except Exception as e:
@@ -890,58 +629,6 @@ def preview_model(job_id):
     return jsonify({"error": "File not found"}), 404
-@app.route('/debug/<job_id>', methods=['GET'])
-def debug_processing(job_id):
-    """New endpoint to provide debug info about processing"""
-    if job_id not in processing_jobs:
-        return jsonify({"error": "Job not found"}), 404
-    job = processing_jobs[job_id]
-    output_dir = os.path.join(RESULTS_FOLDER, job_id)
-    debug_info = {
-        "job_status": job['status'],
-        "progress": job['progress'],
-        "created_at": job.get('created_at'),
-        "completed_at": job.get('completed_at'),
-        "processing_time": job.get('completed_at', time.time()) - job.get('created_at', time.time()) if job.get('created_at') else None,
-        "error": job.get('error'),
-        "output_format": job.get('output_format'),
-        "available_files": []
-    }
-    # List available debug files
-    if os.path.exists(output_dir):
-        for file in os.listdir(output_dir):
-            file_path = os.path.join(output_dir, file)
-            if os.path.isfile(file_path):
-                debug_info["available_files"].append({
-                    "filename": file,
-                    "size": os.path.getsize(file_path),
-                    "url": f"/files/{job_id}/{file}"
-                })
-    return jsonify(debug_info), 200
-@app.route('/files/<job_id>/<filename>', methods=['GET'])
-def get_job_file(job_id, filename):
-    """Access debug files from processing"""
-    if job_id not in processing_jobs:
-        return jsonify({"error": "Job not found"}), 404
-    file_path = os.path.join(RESULTS_FOLDER, job_id, filename)
-    if not os.path.exists(file_path) or not os.path.isfile(file_path):
-        return jsonify({"error": "File not found"}), 404
-    # Determine MIME type
-    mimetype = "application/octet-stream"
-    if filename.endswith(".png"):
-        mimetype = "image/png"
-    elif filename.endswith(".jpg") or filename.endswith(".jpeg"):
-        mimetype = "image/jpeg"
-    return send_file(file_path, mimetype=mimetype)
 def cleanup_old_jobs():
     current_time = time.time()
     job_ids_to_remove = []
@@ -1002,35 +689,30 @@ def model_info(job_id):
         "preview_url": job['preview_url'],
         "model_stats": model_stats,
         "created_at": job.get('created_at'),
-        "completed_at": job.get('completed_at'),
-        "processing_time": job.get('completed_at', 0) - job.get('created_at', 0) if job.get('completed_at') and job.get('created_at') else None
     }), 200
 @app.route('/', methods=['GET'])
 def index():
     return jsonify({
-        "message": "Enhanced Image to 3D API (DPT-Large + Depth Anything)",
-        "version": "1.1.0",
         "endpoints": [
             "/convert",
             "/progress/<job_id>",
             "/download/<job_id>",
             "/preview/<job_id>",
-            "/model-info/<job_id>",
-            "/debug/<job_id>",  # New debug endpoint
-            "/health"
         ],
         "parameters": {
             "mesh_resolution": "Integer (50-150)",
             "output_format": "obj or glb",
             "detail_level": "low, medium, or high",
-            "texture_quality": "low, medium, or high",
-            "remove_background": "true or false (default: true)"
         },
-        "description": "Creates high-quality 3D models from 2D images with improved depth estimation and background removal."
     }), 200
 if __name__ == '__main__':
     cleanup_old_jobs()
     port = int(os.environ.get('PORT', 7860))
-    app.run(host='0.0.0.0', port=port)

 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
+# Job tracking
 processing_jobs = {}
 # Model variables
 model_loaded = False
 model_loading = False
+TIMEOUT_SECONDS = 240
 MAX_DIMENSION = 518
 class TimeoutError(Exception):
 def allowed_file(filename):
     return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+def remove_background(image):
+    """Remove background using OpenCV GrabCut algorithm with improved precision"""
+    img_array = np.array(image)
+    # Convert to RGB if image has alpha channel
+    if img_array.shape[2] == 4:
+        img_array = cv2.cvtColor(img_array, cv2.COLOR_RGBA2RGB)
+    # Create mask for GrabCut
+    mask = np.zeros(img_array.shape[:2], np.uint8)
+    bgdModel = np.zeros((1, 65), np.float64)
+    fgdModel = np.zeros((1, 65), np.float64)
+    # Define a tighter rectangle for foreground, adjusting based on image content
+    height, width = img_array.shape[:2]
+    rect = (int(width * 0.1), int(height * 0.1), int(width * 0.8), int(height * 0.8))
+    # Run GrabCut with multiple iterations for better accuracy
+    cv2.grabCut(img_array, mask, rect, bgdModel, fgdModel, 10, cv2.GC_INIT_WITH_RECT)
+    # Refine mask using edge detection to preserve subject edges
+    mask2 = np.where((mask == cv2.GC_PR_FGD) | (mask == cv2.GC_FGD), 1, 0).astype('uint8')
+    edges = cv2.Canny(mask2 * 255, 50, 150)
+    mask2 = cv2.dilate(edges, np.ones((3, 3), np.uint8), iterations=1)
+    mask2 = cv2.erode(mask2, np.ones((3, 3), np.uint8), iterations=1)
+    # Apply mask to image
+    result = img_array * mask2[:, :, np.newaxis]
+    # Create alpha channel
+    alpha = mask2 * 255
+    result = np.dstack((result, alpha))
+    return Image.fromarray(result, 'RGBA')
+def preprocess_image(image_path):
     with Image.open(image_path) as img:
+        # Handle PNG transparency
+        if img.mode == 'RGBA':
+            # Create white background
+            background = Image.new('RGB', img.size, (255, 255, 255))
+            background.paste(img, mask=img.split()[3])
+            img = background
         img = img.convert("RGB")
         if img.width > MAX_DIMENSION or img.height > MAX_DIMENSION:
                 new_width = int(img.width * (MAX_DIMENSION / img.height))
             img = img.resize((new_width, new_height), Image.LANCZOS)
+        # Remove background
+        img = remove_background(img)
         img_array = np.array(img)
         if len(img_array.shape) == 3 and img_array.shape[2] == 3:
             lab = cv2.cvtColor(img_array, cv2.COLOR_RGB2LAB)
             l, a, b = cv2.split(lab)
+            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
             cl = clahe.apply(l)
             enhanced_lab = cv2.merge((cl, a, b))
             img_array = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2RGB)
             img = Image.fromarray(img_array)
         return img
 def load_models():
     global dpt_estimator, depth_anything_model, depth_anything_processor, model_loaded, model_loading
         model_loading = True
         print("Loading models...")
         hf_token = os.environ.get('HF_TOKEN')
         if hf_token:
             login(token=hf_token)
             print("Authenticated with Hugging Face token")
         dpt_model_name = "Intel/dpt-large"
         max_retries = 3
         retry_delay = 5
         print("DPT-Large loaded")
         gc.collect()
         da_model_name = "depth-anything/Depth-Anything-V2-Small-hf"
         for attempt in range(max_retries):
             try:
         model_loading = False
 def fuse_depth_maps(dpt_depth, da_depth, detail_level='medium'):
     if isinstance(dpt_depth, Image.Image):
         dpt_depth = np.array(dpt_depth)
     if isinstance(da_depth, torch.Tensor):
     if dpt_depth.shape != da_depth.shape:
         da_depth = cv2.resize(da_depth, (dpt_depth.shape[1], dpt_depth.shape[0]), interpolation=cv2.INTER_CUBIC)
+    p_low_dpt, p_high_dpt = np.percentile(dpt_depth, [5, 95])
+    p_low_da, p_high_da = np.percentile(da_depth, [5, 95])
     dpt_depth = np.clip((dpt_depth - p_low_dpt) / (p_high_dpt - p_low_dpt), 0, 1) if p_high_dpt > p_low_dpt else dpt_depth
     da_depth = np.clip((da_depth - p_low_da) / (p_high_da - p_low_da), 0, 1) if p_high_da > p_low_da else da_depth
     if detail_level == 'high':
         weight_da = 0.6
+        edges = cv2.Canny((da_depth * 255).astype(np.uint8), 50, 150)
+        edge_mask = (edges > 0).astype(np.float32)
         dpt_weight = gaussian_filter(1 - edge_mask, sigma=1.0)
         da_weight = gaussian_filter(edge_mask, sigma=1.0)
+        fused_depth = dpt_weight * dpt_depth + da_weight * da_depth * weight_da + (1 - weight_da) * dpt_depth
     else:
+        weight_da = 0.4 if detail_level == 'medium' else 0.2
+        fused_depth = (1 - weight_da) * dpt_depth + weight_da * da_depth
     fused_depth = np.clip(fused_depth, 0, 1)
     return fused_depth
 def enhance_depth_map(depth_map, detail_level='medium'):
     enhanced_depth = depth_map.copy().astype(np.float32)
+    p_low, p_high = np.percentile(enhanced_depth, [5, 95])
     enhanced_depth = np.clip(enhanced_depth, p_low, p_high)
     enhanced_depth = (enhanced_depth - p_low) / (p_high - p_low) if p_high > p_low else enhanced_depth
     if detail_level == 'high':
         blurred = gaussian_filter(enhanced_depth, sigma=1.0)
+        mask = enhanced_depth - blurred
+        enhanced_depth = enhanced_depth + 1.0 * mask
+        smooth1 = gaussian_filter(enhanced_depth, sigma=0.3)
+        smooth2 = gaussian_filter(enhanced_depth, sigma=1.5)
+        edge_mask = enhanced_depth - smooth2
+        enhanced_depth = smooth1 + 0.8 * edge_mask  # Reduced enhancement
+    elif detail_level == 'medium':
+        blurred = gaussian_filter(enhanced_depth, sigma=0.7)
+        mask = enhanced_depth - blurred
+        enhanced_depth = enhanced_depth + 0.6 * mask
+        enhanced_depth = gaussian_filter(enhanced_depth, sigma=0.4)
     else:
+        enhanced_depth = gaussian_filter(enhanced_depth, sigma=0.5)
     enhanced_depth = np.clip(enhanced_depth, 0, 1)
     return enhanced_depth
 def depth_to_mesh(depth_map, image, resolution=100, detail_level='medium'):
     enhanced_depth = enhance_depth_map(depth_map, detail_level)
     h, w = enhanced_depth.shape
     x = np.linspace(0, w-1, resolution)
     y = np.linspace(0, h-1, resolution)
     x_grid, y_grid = np.meshgrid(x, y)
     interp_func = interpolate.RectBivariateSpline(
         np.arange(h), np.arange(w), enhanced_depth, kx=3, ky=3
     )
     z_values = interp_func(y, x, grid=True)
     if detail_level == 'high':
         dx = np.gradient(z_values, axis=1)
         dy = np.gradient(z_values, axis=0)
         gradient_magnitude = np.sqrt(dx**2 + dy**2)
+        edge_mask = np.clip(gradient_magnitude * 2, 0, 0.1)
+        z_values = z_values + edge_mask * (z_values - gaussian_filter(z_values, sigma=0.5))
+    z_min, z_max = np.percentile(z_values, [10, 90])
+    z_values = np.clip((z_values - z_min) / (z_max - z_min), 0, 1) if z_max > z_min else z_values
+    z_scaling = 1.5 if detail_level == 'high' else 1.2 if detail_level == 'medium' else 1.0
+    z_values = z_values * z_scaling
+    x_grid = (x_grid / w - 0.5) * 1.5
+    y_grid = (y_grid / h - 0.5) * 1.5
     vertices = np.vstack([x_grid.flatten(), -y_grid.flatten(), -z_values.flatten()]).T
     faces = []
     for i in range(resolution-1):
         for j in range(resolution-1):
             p2 = i * resolution + (j + 1)
             p3 = (i + 1) * resolution + j
             p4 = (i + 1) * resolution + (j + 1)
             v1 = vertices[p1]
             v2 = vertices[p2]
             v3 = vertices[p3]
             v4 = vertices[p4]
             norm1 = np.cross(v2-v1, v4-v1)
             norm2 = np.cross(v4-v3, v1-v3)
             if np.dot(norm1, norm2) >= 0:
                 faces.append([p1, p2, p4])
                 faces.append([p1, p4, p3])
                 faces.append([p1, p2, p3])
                 faces.append([p2, p4, p3])
     faces = np.array(faces)
     mesh = trimesh.Trimesh(vertices=vertices, faces=faces)
     if image:
         img_array = np.array(image)
         vertex_colors = np.zeros((vertices.shape[0], 4), dtype=np.uint8)
         for i in range(resolution):
             for j in range(resolution):
                 img_x = j * (img_array.shape[1] - 1) / (resolution - 1)
                 x1, y1 = min(x0 + 1, img_array.shape[1] - 1), min(y0 + 1, img_array.shape[0] - 1)
                 wx = img_x - x0
                 wy = img_y - y0
                 vertex_idx = i * resolution + j
                 if len(img_array.shape) == 3 and img_array.shape[2] == 4:
                     for c in range(4):
+                        vertex_colors[vertex_idx, c] = int((1-wx)*(1-wy)*img_array[y0, x0, c] +
+                                                        wx*(1-wy)*img_array[y0, x1, c] +
+                                                        (1-wx)*wy*img_array[y1, x0, c] +
+                                                        wx*wy*img_array[y1, x1, c])
                 else:
+                    r, g, b = img_array[y0, x0]
+                    vertex_colors[vertex_idx, :3] = [r, g, b]
                     vertex_colors[vertex_idx, 3] = 255
         mesh.visual.vertex_colors = vertex_colors
+    if detail_level != 'high':
+        mesh = mesh.smoothed(method='laplacian', iterations=1)
+    mesh.fix_normals()
     return mesh
 @app.route('/health', methods=['GET'])
 def health_check():
     return jsonify({
         "status": "healthy",
         "model": "DPT-Large + Depth Anything",
+        "device": "cpu"
     }), 200
 @app.route('/progress/<job_id>', methods=['GET'])
         output_format = request.form.get('output_format', 'glb').lower()
         detail_level = request.form.get('detail_level', 'medium').lower()
         texture_quality = request.form.get('texture_quality', 'medium').lower()
     except ValueError:
         return jsonify({"error": "Invalid parameter values"}), 400
     if output_format not in ['obj', 'glb']:
         return jsonify({"error": "Unsupported output format. Use 'obj' or 'glb'"}), 400
     if detail_level == 'high':
         mesh_resolution = min(int(mesh_resolution * 1.5), 150)
     elif detail_level == 'low':
         try:
             processing_jobs[job_id]['progress'] = 5
+            image = preprocess_image(filepath)
             processing_jobs[job_id]['progress'] = 10
             try:
                 dpt_model, da_model, da_processor = load_models()
                 processing_jobs[job_id]['progress'] = 30
             try:
                 def estimate_depth():
                     with torch.no_grad():
+                        dpt_result = dpt_model(image)
                         dpt_depth = dpt_result["depth"]
                         if da_model and da_processor:
+                            inputs = da_processor(images=image, return_tensors="pt")
                             inputs = {k: v.to("cpu") for k, v in inputs.items()}
                             outputs = da_model(**inputs)
                             da_depth = outputs.predicted_depth.squeeze()
                                 mode='bicubic',
                                 align_corners=False
                             ).squeeze()
                             fused_depth = fuse_depth_maps(dpt_depth, da_depth, detail_level)
                         else:
                             fused_depth = np.array(dpt_depth) if isinstance(dpt_depth, Image.Image) else dpt_depth
                             if len(fused_depth.shape) > 2:
                                 fused_depth = np.mean(fused_depth, axis=2)
+                            p_low, p_high = np.percentile(fused_depth, [5, 95])
                             fused_depth = np.clip((fused_depth - p_low) / (p_high - p_low), 0, 1) if p_high > p_low else fused_depth
                         return fused_depth
                 fused_depth, error = process_with_timeout(estimate_depth, [], TIMEOUT_SECONDS)
                 processing_jobs[job_id]['status'] = 'completed'
                 processing_jobs[job_id]['progress'] = 100
                 print(f"Job {job_id} completed")
             except Exception as e:
     return jsonify({"error": "File not found"}), 404
 def cleanup_old_jobs():
     current_time = time.time()
     job_ids_to_remove = []
         "preview_url": job['preview_url'],
         "model_stats": model_stats,
         "created_at": job.get('created_at'),
+        "completed_at": job.get('completed_at')
     }), 200
 @app.route('/', methods=['GET'])
 def index():
     return jsonify({
+        "message": "Image to 3D API (DPT-Large + Depth Anything)",
         "endpoints": [
             "/convert",
             "/progress/<job_id>",
             "/download/<job_id>",
             "/preview/<job_id>",
+            "/model-info/<job_id>"
         ],
         "parameters": {
             "mesh_resolution": "Integer (50-150)",
             "output_format": "obj or glb",
             "detail_level": "low, medium, or high",
+            "texture_quality": "low, medium, or high"
         },
+        "description": "Creates high-quality 3D models from 2D images using DPT-Large and Depth Anything."
     }), 200
 if __name__ == '__main__':
     cleanup_old_jobs()
     port = int(os.environ.get('PORT', 7860))
+    app.run(host='0.0.0.0', port=port)