Simultaneous-Segmented-Depth-Prediction

Paused

App Files Files Community

Alessio Grancini commited on Feb 13

Commit

7ac5451

verified ·

1 Parent(s): 3ece440

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -47

app.py CHANGED Viewed

@@ -148,69 +148,65 @@ def get_camera_matrix(depth_estimator):
 @spaces.GPU
-def get_detection_data(image):
     """Get structured detection data with depth information, using Base64 image encoding."""
-    def decode_base64_image(base64_string):
-        """Decodes Base64 string into a NumPy image."""
         try:
-            print(f"🔍 Received Base64 String (Truncated): {base64_string[:50]}...")  # Debugging
-            img_data = base64.b64decode(base64_string)
             img = Image.open(BytesIO(img_data))
             img = np.array(img)
-            return cv2.cvtColor(img, cv2.COLOR_RGB2BGR)  # Convert to BGR for OpenCV
         except Exception as e:
-            print(f"🚨 Error decoding base64 image: {e}")
-            return None
-    def encode_base64_image(image):
-        """Encodes a NumPy image into a Base64 string."""
-        try:
-            _, buffer = cv2.imencode('.png', image)
-            return base64.b64encode(buffer).decode("utf-8")
-        except Exception as e:
-            print(f"🚨 Error encoding image to Base64: {e}")
-            return None
-    try:
-        if not isinstance(image, str):
-            print("🚨 Error: Expected Base64 string but received:", type(image))
-            return {"error": "Invalid input format. Expected Base64-encoded image."}
-        image = decode_base64_image(image)
-        if image is None:
-            return {"error": "Base64 decoding failed. Ensure correct encoding."}
-        # Resize image
-        image = utils.resize(image)
-        # Extract dimensions
-        height, width = image.shape[:2]
-        # Get detections and depth
         image_segmentation, objects_data = img_seg.predict(image)
         depthmap, depth_colormap = depth_estimator.make_prediction(image)
-        # Encode results as Base64
-        segmentation_b64 = encode_base64_image(image_segmentation)
-        depth_b64 = encode_base64_image(depth_colormap)
-        if segmentation_b64 is None or depth_b64 is None:
-            return {"error": "Failed to encode output images."}
-        return {
-            "detections": objects_data,  # Keeping as original
-            "depth_map": depth_b64,
-            "segmentation": segmentation_b64,
-            "image_size": {"width": width, "height": height},
         }
     except Exception as e:
         print(f"🚨 Error in get_detection_data: {str(e)}")
         return {"error": str(e)}
 def cancel():
     CANCEL_PROCESSING = True

 @spaces.GPU
+def get_detection_data(image_data):
     """Get structured detection data with depth information, using Base64 image encoding."""
+    try:
+        # Handle both string and dict input formats
+        if isinstance(image_data, dict):
+            image = image_data.get('data', '')
+        else:
+            image = image_data
+        if not isinstance(image, str):
+            return {"error": f"Invalid input format. Expected string or dict with 'data' key, got {type(image)}"}
+        # Decode base64 image
         try:
+            img_data = base64.b64decode(image)
             img = Image.open(BytesIO(img_data))
             img = np.array(img)
+            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
         except Exception as e:
+            return {"error": f"Base64 decoding failed: {str(e)}"}
+        # Process image
+        image = utils.resize(img)
         image_segmentation, objects_data = img_seg.predict(image)
         depthmap, depth_colormap = depth_estimator.make_prediction(image)
+        # Prepare structured response with spatial data
+        processed_objects = []
+        for obj in objects_data:
+            cls_id, cls_name, center, mask, color = obj
+            depth_value = depth_at_center(depthmap, [center[0]-10, center[1]-10, center[0]+10, center[1]+10])
+            processed_objects.append({
+                "class_id": int(cls_id),
+                "class_name": cls_name,
+                "center": {"x": float(center[0]), "y": float(center[1])},
+                "depth": float(depth_value),
+                "color": [int(c) for c in color]
+            })
+        # Encode results
+        response = {
+            "detections": processed_objects,
+            "depth_map": encode_base64_image(depth_colormap),
+            "segmentation": encode_base64_image(image_segmentation),
+            "camera_matrix": {
+                "fx": depth_estimator.fx_depth,
+                "fy": depth_estimator.fy_depth,
+                "cx": depth_estimator.cx_depth,
+                "cy": depth_estimator.cy_depth
+            }
         }
+        return response
     except Exception as e:
         print(f"🚨 Error in get_detection_data: {str(e)}")
         return {"error": str(e)}
 def cancel():
     CANCEL_PROCESSING = True