VisionScout / scene_viewpoint_analyzer.py
DawnC's picture
Upload 59 files
e6a18b7 verified
raw
history blame
12.6 kB
import logging
import traceback
import numpy as np
from typing import Dict, List, Any, Optional, Tuple
logger = logging.getLogger(__name__)
class SceneViewpointAnalyzer:
"""
負責場景視角檢測和模式識別
專注於檢測場景視角(俯視、平視等)並識別特殊場景模式(如十字路口、人流方向等)
提供詳細的場景空間分析和視角相關的場景理解功能
"""
def __init__(self, enhanced_scene_describer=None):
"""
初始化場景視角分析器
Args:
enhanced_scene_describer: 增強場景描述器實例,用於基本視角檢測
"""
try:
self.enhanced_scene_describer = enhanced_scene_describer
logger.info("SceneViewpointAnalyzer initialized successfully")
except Exception as e:
logger.error(f"Failed to initialize SceneViewpointAnalyzer: {str(e)}")
logger.error(traceback.format_exc())
raise
def detect_viewpoint(self, detected_objects: List[Dict]) -> str:
"""
檢測圖像視角類型
Args:
detected_objects: 檢測到的物件列表
Returns:
str: 檢測到的視角類型
"""
try:
# 使用內部的場景視角檢測方法
viewpoint_info = self.detect_scene_viewpoint(detected_objects)
return viewpoint_info.get("viewpoint", "eye_level")
except Exception as e:
logger.warning(f"Error detecting viewpoint: {str(e)}")
return "eye_level"
def get_viewpoint_confidence(self, detected_objects: List[Dict]) -> Tuple[str, float]:
"""
獲取視角檢測結果及其信心度
Args:
detected_objects: 檢測到的物件列表
Returns:
Tuple[str, float]: (視角類型, 信心度)
"""
try:
viewpoint_info = self.detect_scene_viewpoint(detected_objects)
viewpoint = viewpoint_info.get("viewpoint", "eye_level")
# 根據檢測到的模式計算信心度
patterns = viewpoint_info.get("patterns", [])
confidence = 0.5 # 基礎信心度
if "crosswalk_intersection" in patterns:
confidence += 0.3
if "consistent_object_size" in patterns:
confidence += 0.2
if "multi_directional_movement" in patterns:
confidence += 0.1
confidence = min(confidence, 1.0)
return viewpoint, confidence
except Exception as e:
logger.error(f"Error getting viewpoint confidence: {str(e)}")
return "eye_level", 0.5
def detect_scene_viewpoint(self, detected_objects: List[Dict]) -> Dict:
"""
檢測場景視角並識別特殊場景模式
Args:
detected_objects: 檢測到的物件列表
Returns:
包含視角和場景模式資訊的字典
"""
try:
if not detected_objects:
logger.warning("No detected objects provided for viewpoint detection")
return {"viewpoint": "eye_level", "patterns": []}
# 從物件位置中提取資訊
patterns = []
# 檢測行人位置模式 - 篩選出所有行人物件
pedestrian_objs = [obj for obj in detected_objects if obj.get("class_id") == 0]
# 檢查是否有足夠的行人來識別模式 - 至少需要4個行人才能進行模式分析
if len(pedestrian_objs) >= 4:
# 提取行人的標準化中心座標用於模式分析
pedestrian_positions = [obj["normalized_center"] for obj in pedestrian_objs]
# 檢測十字交叉模式 - 這通常出現在斑馬線交叉口的俯視圖
if self._detect_cross_pattern(pedestrian_positions):
patterns.append("crosswalk_intersection")
# 檢測多方向行人流 - 分析行人是否在多個方向移動
directions = self._analyze_movement_directions(pedestrian_positions)
if len(directions) >= 2:
patterns.append("multi_directional_movement")
# 檢查物件的大小一致性 - 在空中俯視圖中,物件大小通常更一致
# 因為距離相對均勻,不像地面視角會有遠近差異
if len(detected_objects) >= 5:
sizes = [obj.get("normalized_area", 0) for obj in detected_objects]
# 計算標準化變異數,避免受平均值影響
size_variance = np.var(sizes) / (np.mean(sizes) ** 2) if np.mean(sizes) > 0 else 0
# 低變異表示大小一致,可能是俯視角度
if size_variance < 0.3:
patterns.append("consistent_object_size")
# 基本視角檢測 - 使用增強場景描述器進行基礎視角判斷
viewpoint = "eye_level" # 預設值
if self.enhanced_scene_describer and hasattr(self.enhanced_scene_describer, '_detect_viewpoint'):
viewpoint = self.enhanced_scene_describer._detect_viewpoint(detected_objects)
# 根據檢測到的模式增強視角判斷
# 如果檢測到斑馬線交叉但視角判斷不是空中視角,優先採用模式判斷
if "crosswalk_intersection" in patterns and viewpoint != "aerial":
viewpoint = "aerial"
result = {
"viewpoint": viewpoint,
"patterns": patterns
}
logger.info(f"Viewpoint detection completed: {viewpoint}, patterns: {patterns}")
return result
except Exception as e:
logger.error(f"Error in scene viewpoint detection: {str(e)}")
logger.error(traceback.format_exc())
return {"viewpoint": "eye_level", "patterns": []}
def _detect_cross_pattern(self, positions: List[List[float]]) -> bool:
"""
檢測位置中的十字交叉模式
這種模式通常出現在十字路口的俯視圖中,行人分布呈現十字形
Args:
positions: 位置列表 [[x1, y1], [x2, y2], ...]
Returns:
是否檢測到十字交叉模式
"""
try:
if len(positions) < 8: # 需要足夠多的點才能形成有意義的十字模式
return False
# 提取 x 和 y 座標進行分析
x_coords = [pos[0] for pos in positions]
y_coords = [pos[1] for pos in positions]
# 計算座標的平均值,用於確定中心線位置
x_mean = np.mean(x_coords)
y_mean = np.mean(y_coords)
# 計算在中心線附近的點數量
# 如果有足夠多的點在垂直和水平中心線附近,可能是十字交叉
near_x_center = sum(1 for x in x_coords if abs(x - x_mean) < 0.1) # 容忍10%的偏差
near_y_center = sum(1 for y in y_coords if abs(y - y_mean) < 0.1) # 容忍10%的偏差
# 十字交叉模式的判斷條件:垂直和水平方向都有足夠的點聚集
is_cross_pattern = near_x_center >= 3 and near_y_center >= 3
if is_cross_pattern:
logger.info(f"Cross pattern detected with {near_x_center} points near vertical center and {near_y_center} points near horizontal center")
return is_cross_pattern
except Exception as e:
logger.error(f"Error detecting cross pattern: {str(e)}")
logger.error(traceback.format_exc())
return False
def _analyze_movement_directions(self, positions: List[List[float]]) -> List[str]:
"""
分析位置中的移動方向
通過分析座標分布範圍來推斷主要的移動方向
Args:
positions: 位置列表 [[x1, y1], [x2, y2], ...]
Returns:
檢測到的主要方向列表
"""
try:
if len(positions) < 6: # 需要足夠的點才能分析方向性
return []
# 提取 x 和 y 座標
x_coords = [pos[0] for pos in positions]
y_coords = [pos[1] for pos in positions]
directions = []
# 水平移動分析(左右移動)
# 計算x座標的標準差和範圍來判斷水平方向的分散程度
x_std = np.std(x_coords)
x_range = max(x_coords) - min(x_coords)
# 垂直移動分析(上下移動)
# 計算y座標的標準差和範圍來判斷垂直方向的分散程度
y_std = np.std(y_coords)
y_range = max(y_coords) - min(y_coords)
# 足夠大的範圍表示該方向有明顯的運動或分散
# 40%的圖像範圍被認為是有意義的移動範圍
if x_range > 0.4:
directions.append("horizontal")
logger.debug(f"Horizontal movement detected with range: {x_range:.3f}")
if y_range > 0.4:
directions.append("vertical")
logger.debug(f"Vertical movement detected with range: {y_range:.3f}")
logger.info(f"Movement directions analyzed: {directions}")
return directions
except Exception as e:
logger.error(f"Error analyzing movement directions: {str(e)}")
logger.error(traceback.format_exc())
return []
def detect_aerial_view_indicators(self, detected_objects: List[Dict]) -> Dict:
"""
檢測俯視角度的指標
分析物件分布特徵來判斷是否為俯視角度
Args:
detected_objects: 檢測到的物件列表
Returns:
包含俯視角度指標的字典
"""
try:
indicators = {
"consistent_sizing": False,
"grid_like_distribution": False,
"high_object_density": False,
"aerial_score": 0.0
}
if not detected_objects:
return indicators
# 檢查物件大小的一致性
sizes = [obj.get("normalized_area", 0) for obj in detected_objects]
if len(sizes) >= 3:
size_variance = np.var(sizes) / (np.mean(sizes) ** 2) if np.mean(sizes) > 0 else 1
# 俯視角度通常物件大小較為一致
indicators["consistent_sizing"] = size_variance < 0.3
# 檢查是否有網格狀分布(如停車場的俯視圖)
positions = [obj.get("normalized_center", [0.5, 0.5]) for obj in detected_objects]
if len(positions) >= 6:
# 簡化的網格檢測:檢查是否有規律的行列分布
x_coords = [pos[0] for pos in positions]
y_coords = [pos[1] for pos in positions]
# 計算座標的分布是否接近規律網格
x_unique = len(set([round(x, 1) for x in x_coords])) # 四捨五入到0.1精度
y_unique = len(set([round(y, 1) for y in y_coords]))
# 如果x和y方向都有多個不同的規律位置,可能是網格分布
indicators["grid_like_distribution"] = x_unique >= 3 and y_unique >= 3
# 檢查物件密度
total_objects = len(detected_objects)
# 俯視角度通常能看到更多物件
indicators["high_object_density"] = total_objects >= 8
# 計算俯視角度評分
score = 0
if indicators["consistent_sizing"]:
score += 0.4
if indicators["grid_like_distribution"]:
score += 0.4
if indicators["high_object_density"]:
score += 0.2
indicators["aerial_score"] = score
logger.info(f"Aerial view indicators: score={score:.2f}, consistent_sizing={indicators['consistent_sizing']}, grid_distribution={indicators['grid_like_distribution']}, high_density={indicators['high_object_density']}")
return indicators
except Exception as e:
logger.error(f"Error detecting aerial view indicators: {str(e)}")
logger.error(traceback.format_exc())
return {
"consistent_sizing": False,
"grid_like_distribution": False,
"high_object_density": False,
"aerial_score": 0.0
}