Spaces:

DawnC
/

VisionScout

Running on Zero

File size: 12,603 Bytes

e6a18b7


import logging
import traceback
import numpy as np
from typing import Dict, List, Any, Optional, Tuple

logger = logging.getLogger(__name__)

class SceneViewpointAnalyzer:
    """
    負責場景視角檢測和模式識別
    專注於檢測場景視角（俯視、平視等）並識別特殊場景模式（如十字路口、人流方向等）
    提供詳細的場景空間分析和視角相關的場景理解功能
    """

    def __init__(self, enhanced_scene_describer=None):
        """
        初始化場景視角分析器

        Args:
            enhanced_scene_describer: 增強場景描述器實例，用於基本視角檢測
        """
        try:
            self.enhanced_scene_describer = enhanced_scene_describer
            logger.info("SceneViewpointAnalyzer initialized successfully")

        except Exception as e:
            logger.error(f"Failed to initialize SceneViewpointAnalyzer: {str(e)}")
            logger.error(traceback.format_exc())
            raise

    def detect_viewpoint(self, detected_objects: List[Dict]) -> str:
        """
        檢測圖像視角類型

        Args:
            detected_objects: 檢測到的物件列表

        Returns:
            str: 檢測到的視角類型
        """
        try:
            # 使用內部的場景視角檢測方法
            viewpoint_info = self.detect_scene_viewpoint(detected_objects)
            return viewpoint_info.get("viewpoint", "eye_level")
        except Exception as e:
            logger.warning(f"Error detecting viewpoint: {str(e)}")
            return "eye_level"

    def get_viewpoint_confidence(self, detected_objects: List[Dict]) -> Tuple[str, float]:
        """
        獲取視角檢測結果及其信心度

        Args:
            detected_objects: 檢測到的物件列表

        Returns:
            Tuple[str, float]: (視角類型, 信心度)
        """
        try:
            viewpoint_info = self.detect_scene_viewpoint(detected_objects)
            viewpoint = viewpoint_info.get("viewpoint", "eye_level")

            # 根據檢測到的模式計算信心度
            patterns = viewpoint_info.get("patterns", [])
            confidence = 0.5  # 基礎信心度

            if "crosswalk_intersection" in patterns:
                confidence += 0.3
            if "consistent_object_size" in patterns:
                confidence += 0.2
            if "multi_directional_movement" in patterns:
                confidence += 0.1

            confidence = min(confidence, 1.0)
            return viewpoint, confidence

        except Exception as e:
            logger.error(f"Error getting viewpoint confidence: {str(e)}")
            return "eye_level", 0.5

    def detect_scene_viewpoint(self, detected_objects: List[Dict]) -> Dict:
        """
        檢測場景視角並識別特殊場景模式

        Args:
            detected_objects: 檢測到的物件列表

        Returns:
            包含視角和場景模式資訊的字典
        """
        try:
            if not detected_objects:
                logger.warning("No detected objects provided for viewpoint detection")
                return {"viewpoint": "eye_level", "patterns": []}

            # 從物件位置中提取資訊
            patterns = []

            # 檢測行人位置模式 - 篩選出所有行人物件
            pedestrian_objs = [obj for obj in detected_objects if obj.get("class_id") == 0]

            # 檢查是否有足夠的行人來識別模式 - 至少需要4個行人才能進行模式分析
            if len(pedestrian_objs) >= 4:
                # 提取行人的標準化中心座標用於模式分析
                pedestrian_positions = [obj["normalized_center"] for obj in pedestrian_objs]

                # 檢測十字交叉模式 - 這通常出現在斑馬線交叉口的俯視圖
                if self._detect_cross_pattern(pedestrian_positions):
                    patterns.append("crosswalk_intersection")

                # 檢測多方向行人流 - 分析行人是否在多個方向移動
                directions = self._analyze_movement_directions(pedestrian_positions)
                if len(directions) >= 2:
                    patterns.append("multi_directional_movement")

            # 檢查物件的大小一致性 - 在空中俯視圖中，物件大小通常更一致
            # 因為距離相對均勻，不像地面視角會有遠近差異
            if len(detected_objects) >= 5:
                sizes = [obj.get("normalized_area", 0) for obj in detected_objects]
                # 計算標準化變異數，避免受平均值影響
                size_variance = np.var(sizes) / (np.mean(sizes) ** 2) if np.mean(sizes) > 0 else 0

                # 低變異表示大小一致，可能是俯視角度
                if size_variance < 0.3:
                    patterns.append("consistent_object_size")

            # 基本視角檢測 - 使用增強場景描述器進行基礎視角判斷
            viewpoint = "eye_level"  # 預設值
            if self.enhanced_scene_describer and hasattr(self.enhanced_scene_describer, '_detect_viewpoint'):
                viewpoint = self.enhanced_scene_describer._detect_viewpoint(detected_objects)

            # 根據檢測到的模式增強視角判斷
            # 如果檢測到斑馬線交叉但視角判斷不是空中視角，優先採用模式判斷
            if "crosswalk_intersection" in patterns and viewpoint != "aerial":
                viewpoint = "aerial"

            result = {
                "viewpoint": viewpoint,
                "patterns": patterns
            }

            logger.info(f"Viewpoint detection completed: {viewpoint}, patterns: {patterns}")
            return result

        except Exception as e:
            logger.error(f"Error in scene viewpoint detection: {str(e)}")
            logger.error(traceback.format_exc())
            return {"viewpoint": "eye_level", "patterns": []}

    def _detect_cross_pattern(self, positions: List[List[float]]) -> bool:
        """
        檢測位置中的十字交叉模式
        這種模式通常出現在十字路口的俯視圖中，行人分布呈現十字形

        Args:
            positions: 位置列表 [[x1, y1], [x2, y2], ...]

        Returns:
            是否檢測到十字交叉模式
        """
        try:
            if len(positions) < 8:  # 需要足夠多的點才能形成有意義的十字模式
                return False

            # 提取 x 和 y 座標進行分析
            x_coords = [pos[0] for pos in positions]
            y_coords = [pos[1] for pos in positions]

            # 計算座標的平均值，用於確定中心線位置
            x_mean = np.mean(x_coords)
            y_mean = np.mean(y_coords)

            # 計算在中心線附近的點數量
            # 如果有足夠多的點在垂直和水平中心線附近，可能是十字交叉
            near_x_center = sum(1 for x in x_coords if abs(x - x_mean) < 0.1)  # 容忍10%的偏差
            near_y_center = sum(1 for y in y_coords if abs(y - y_mean) < 0.1)  # 容忍10%的偏差

            # 十字交叉模式的判斷條件：垂直和水平方向都有足夠的點聚集
            is_cross_pattern = near_x_center >= 3 and near_y_center >= 3

            if is_cross_pattern:
                logger.info(f"Cross pattern detected with {near_x_center} points near vertical center and {near_y_center} points near horizontal center")

            return is_cross_pattern

        except Exception as e:
            logger.error(f"Error detecting cross pattern: {str(e)}")
            logger.error(traceback.format_exc())
            return False

    def _analyze_movement_directions(self, positions: List[List[float]]) -> List[str]:
        """
        分析位置中的移動方向
        通過分析座標分布範圍來推斷主要的移動方向

        Args:
            positions: 位置列表 [[x1, y1], [x2, y2], ...]

        Returns:
            檢測到的主要方向列表
        """
        try:
            if len(positions) < 6:  # 需要足夠的點才能分析方向性
                return []

            # 提取 x 和 y 座標
            x_coords = [pos[0] for pos in positions]
            y_coords = [pos[1] for pos in positions]

            directions = []

            # 水平移動分析（左右移動）
            # 計算x座標的標準差和範圍來判斷水平方向的分散程度
            x_std = np.std(x_coords)
            x_range = max(x_coords) - min(x_coords)

            # 垂直移動分析（上下移動）
            # 計算y座標的標準差和範圍來判斷垂直方向的分散程度
            y_std = np.std(y_coords)
            y_range = max(y_coords) - min(y_coords)

            # 足夠大的範圍表示該方向有明顯的運動或分散
            # 40%的圖像範圍被認為是有意義的移動範圍
            if x_range > 0.4:
                directions.append("horizontal")
                logger.debug(f"Horizontal movement detected with range: {x_range:.3f}")

            if y_range > 0.4:
                directions.append("vertical")
                logger.debug(f"Vertical movement detected with range: {y_range:.3f}")

            logger.info(f"Movement directions analyzed: {directions}")
            return directions

        except Exception as e:
            logger.error(f"Error analyzing movement directions: {str(e)}")
            logger.error(traceback.format_exc())
            return []

    def detect_aerial_view_indicators(self, detected_objects: List[Dict]) -> Dict:
        """
        檢測俯視角度的指標
        分析物件分布特徵來判斷是否為俯視角度

        Args:
            detected_objects: 檢測到的物件列表

        Returns:
            包含俯視角度指標的字典
        """
        try:
            indicators = {
                "consistent_sizing": False,
                "grid_like_distribution": False,
                "high_object_density": False,
                "aerial_score": 0.0
            }

            if not detected_objects:
                return indicators

            # 檢查物件大小的一致性
            sizes = [obj.get("normalized_area", 0) for obj in detected_objects]
            if len(sizes) >= 3:
                size_variance = np.var(sizes) / (np.mean(sizes) ** 2) if np.mean(sizes) > 0 else 1
                # 俯視角度通常物件大小較為一致
                indicators["consistent_sizing"] = size_variance < 0.3

            # 檢查是否有網格狀分布（如停車場的俯視圖）
            positions = [obj.get("normalized_center", [0.5, 0.5]) for obj in detected_objects]
            if len(positions) >= 6:
                # 簡化的網格檢測：檢查是否有規律的行列分布
                x_coords = [pos[0] for pos in positions]
                y_coords = [pos[1] for pos in positions]

                # 計算座標的分布是否接近規律網格
                x_unique = len(set([round(x, 1) for x in x_coords]))  # 四捨五入到0.1精度
                y_unique = len(set([round(y, 1) for y in y_coords]))

                # 如果x和y方向都有多個不同的規律位置，可能是網格分布
                indicators["grid_like_distribution"] = x_unique >= 3 and y_unique >= 3

            # 檢查物件密度
            total_objects = len(detected_objects)
            # 俯視角度通常能看到更多物件
            indicators["high_object_density"] = total_objects >= 8

            # 計算俯視角度評分
            score = 0
            if indicators["consistent_sizing"]:
                score += 0.4
            if indicators["grid_like_distribution"]:
                score += 0.4
            if indicators["high_object_density"]:
                score += 0.2

            indicators["aerial_score"] = score

            logger.info(f"Aerial view indicators: score={score:.2f}, consistent_sizing={indicators['consistent_sizing']}, grid_distribution={indicators['grid_like_distribution']}, high_density={indicators['high_object_density']}")
            return indicators

        except Exception as e:
            logger.error(f"Error detecting aerial view indicators: {str(e)}")
            logger.error(traceback.format_exc())
            return {
                "consistent_sizing": False,
                "grid_like_distribution": False,
                "high_object_density": False,
                "aerial_score": 0.0
            }