Spaces:
Running
on
Zero
Running
on
Zero
File size: 12,603 Bytes
e6a18b7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 |
import logging
import traceback
import numpy as np
from typing import Dict, List, Any, Optional, Tuple
logger = logging.getLogger(__name__)
class SceneViewpointAnalyzer:
"""
負責場景視角檢測和模式識別
專注於檢測場景視角(俯視、平視等)並識別特殊場景模式(如十字路口、人流方向等)
提供詳細的場景空間分析和視角相關的場景理解功能
"""
def __init__(self, enhanced_scene_describer=None):
"""
初始化場景視角分析器
Args:
enhanced_scene_describer: 增強場景描述器實例,用於基本視角檢測
"""
try:
self.enhanced_scene_describer = enhanced_scene_describer
logger.info("SceneViewpointAnalyzer initialized successfully")
except Exception as e:
logger.error(f"Failed to initialize SceneViewpointAnalyzer: {str(e)}")
logger.error(traceback.format_exc())
raise
def detect_viewpoint(self, detected_objects: List[Dict]) -> str:
"""
檢測圖像視角類型
Args:
detected_objects: 檢測到的物件列表
Returns:
str: 檢測到的視角類型
"""
try:
# 使用內部的場景視角檢測方法
viewpoint_info = self.detect_scene_viewpoint(detected_objects)
return viewpoint_info.get("viewpoint", "eye_level")
except Exception as e:
logger.warning(f"Error detecting viewpoint: {str(e)}")
return "eye_level"
def get_viewpoint_confidence(self, detected_objects: List[Dict]) -> Tuple[str, float]:
"""
獲取視角檢測結果及其信心度
Args:
detected_objects: 檢測到的物件列表
Returns:
Tuple[str, float]: (視角類型, 信心度)
"""
try:
viewpoint_info = self.detect_scene_viewpoint(detected_objects)
viewpoint = viewpoint_info.get("viewpoint", "eye_level")
# 根據檢測到的模式計算信心度
patterns = viewpoint_info.get("patterns", [])
confidence = 0.5 # 基礎信心度
if "crosswalk_intersection" in patterns:
confidence += 0.3
if "consistent_object_size" in patterns:
confidence += 0.2
if "multi_directional_movement" in patterns:
confidence += 0.1
confidence = min(confidence, 1.0)
return viewpoint, confidence
except Exception as e:
logger.error(f"Error getting viewpoint confidence: {str(e)}")
return "eye_level", 0.5
def detect_scene_viewpoint(self, detected_objects: List[Dict]) -> Dict:
"""
檢測場景視角並識別特殊場景模式
Args:
detected_objects: 檢測到的物件列表
Returns:
包含視角和場景模式資訊的字典
"""
try:
if not detected_objects:
logger.warning("No detected objects provided for viewpoint detection")
return {"viewpoint": "eye_level", "patterns": []}
# 從物件位置中提取資訊
patterns = []
# 檢測行人位置模式 - 篩選出所有行人物件
pedestrian_objs = [obj for obj in detected_objects if obj.get("class_id") == 0]
# 檢查是否有足夠的行人來識別模式 - 至少需要4個行人才能進行模式分析
if len(pedestrian_objs) >= 4:
# 提取行人的標準化中心座標用於模式分析
pedestrian_positions = [obj["normalized_center"] for obj in pedestrian_objs]
# 檢測十字交叉模式 - 這通常出現在斑馬線交叉口的俯視圖
if self._detect_cross_pattern(pedestrian_positions):
patterns.append("crosswalk_intersection")
# 檢測多方向行人流 - 分析行人是否在多個方向移動
directions = self._analyze_movement_directions(pedestrian_positions)
if len(directions) >= 2:
patterns.append("multi_directional_movement")
# 檢查物件的大小一致性 - 在空中俯視圖中,物件大小通常更一致
# 因為距離相對均勻,不像地面視角會有遠近差異
if len(detected_objects) >= 5:
sizes = [obj.get("normalized_area", 0) for obj in detected_objects]
# 計算標準化變異數,避免受平均值影響
size_variance = np.var(sizes) / (np.mean(sizes) ** 2) if np.mean(sizes) > 0 else 0
# 低變異表示大小一致,可能是俯視角度
if size_variance < 0.3:
patterns.append("consistent_object_size")
# 基本視角檢測 - 使用增強場景描述器進行基礎視角判斷
viewpoint = "eye_level" # 預設值
if self.enhanced_scene_describer and hasattr(self.enhanced_scene_describer, '_detect_viewpoint'):
viewpoint = self.enhanced_scene_describer._detect_viewpoint(detected_objects)
# 根據檢測到的模式增強視角判斷
# 如果檢測到斑馬線交叉但視角判斷不是空中視角,優先採用模式判斷
if "crosswalk_intersection" in patterns and viewpoint != "aerial":
viewpoint = "aerial"
result = {
"viewpoint": viewpoint,
"patterns": patterns
}
logger.info(f"Viewpoint detection completed: {viewpoint}, patterns: {patterns}")
return result
except Exception as e:
logger.error(f"Error in scene viewpoint detection: {str(e)}")
logger.error(traceback.format_exc())
return {"viewpoint": "eye_level", "patterns": []}
def _detect_cross_pattern(self, positions: List[List[float]]) -> bool:
"""
檢測位置中的十字交叉模式
這種模式通常出現在十字路口的俯視圖中,行人分布呈現十字形
Args:
positions: 位置列表 [[x1, y1], [x2, y2], ...]
Returns:
是否檢測到十字交叉模式
"""
try:
if len(positions) < 8: # 需要足夠多的點才能形成有意義的十字模式
return False
# 提取 x 和 y 座標進行分析
x_coords = [pos[0] for pos in positions]
y_coords = [pos[1] for pos in positions]
# 計算座標的平均值,用於確定中心線位置
x_mean = np.mean(x_coords)
y_mean = np.mean(y_coords)
# 計算在中心線附近的點數量
# 如果有足夠多的點在垂直和水平中心線附近,可能是十字交叉
near_x_center = sum(1 for x in x_coords if abs(x - x_mean) < 0.1) # 容忍10%的偏差
near_y_center = sum(1 for y in y_coords if abs(y - y_mean) < 0.1) # 容忍10%的偏差
# 十字交叉模式的判斷條件:垂直和水平方向都有足夠的點聚集
is_cross_pattern = near_x_center >= 3 and near_y_center >= 3
if is_cross_pattern:
logger.info(f"Cross pattern detected with {near_x_center} points near vertical center and {near_y_center} points near horizontal center")
return is_cross_pattern
except Exception as e:
logger.error(f"Error detecting cross pattern: {str(e)}")
logger.error(traceback.format_exc())
return False
def _analyze_movement_directions(self, positions: List[List[float]]) -> List[str]:
"""
分析位置中的移動方向
通過分析座標分布範圍來推斷主要的移動方向
Args:
positions: 位置列表 [[x1, y1], [x2, y2], ...]
Returns:
檢測到的主要方向列表
"""
try:
if len(positions) < 6: # 需要足夠的點才能分析方向性
return []
# 提取 x 和 y 座標
x_coords = [pos[0] for pos in positions]
y_coords = [pos[1] for pos in positions]
directions = []
# 水平移動分析(左右移動)
# 計算x座標的標準差和範圍來判斷水平方向的分散程度
x_std = np.std(x_coords)
x_range = max(x_coords) - min(x_coords)
# 垂直移動分析(上下移動)
# 計算y座標的標準差和範圍來判斷垂直方向的分散程度
y_std = np.std(y_coords)
y_range = max(y_coords) - min(y_coords)
# 足夠大的範圍表示該方向有明顯的運動或分散
# 40%的圖像範圍被認為是有意義的移動範圍
if x_range > 0.4:
directions.append("horizontal")
logger.debug(f"Horizontal movement detected with range: {x_range:.3f}")
if y_range > 0.4:
directions.append("vertical")
logger.debug(f"Vertical movement detected with range: {y_range:.3f}")
logger.info(f"Movement directions analyzed: {directions}")
return directions
except Exception as e:
logger.error(f"Error analyzing movement directions: {str(e)}")
logger.error(traceback.format_exc())
return []
def detect_aerial_view_indicators(self, detected_objects: List[Dict]) -> Dict:
"""
檢測俯視角度的指標
分析物件分布特徵來判斷是否為俯視角度
Args:
detected_objects: 檢測到的物件列表
Returns:
包含俯視角度指標的字典
"""
try:
indicators = {
"consistent_sizing": False,
"grid_like_distribution": False,
"high_object_density": False,
"aerial_score": 0.0
}
if not detected_objects:
return indicators
# 檢查物件大小的一致性
sizes = [obj.get("normalized_area", 0) for obj in detected_objects]
if len(sizes) >= 3:
size_variance = np.var(sizes) / (np.mean(sizes) ** 2) if np.mean(sizes) > 0 else 1
# 俯視角度通常物件大小較為一致
indicators["consistent_sizing"] = size_variance < 0.3
# 檢查是否有網格狀分布(如停車場的俯視圖)
positions = [obj.get("normalized_center", [0.5, 0.5]) for obj in detected_objects]
if len(positions) >= 6:
# 簡化的網格檢測:檢查是否有規律的行列分布
x_coords = [pos[0] for pos in positions]
y_coords = [pos[1] for pos in positions]
# 計算座標的分布是否接近規律網格
x_unique = len(set([round(x, 1) for x in x_coords])) # 四捨五入到0.1精度
y_unique = len(set([round(y, 1) for y in y_coords]))
# 如果x和y方向都有多個不同的規律位置,可能是網格分布
indicators["grid_like_distribution"] = x_unique >= 3 and y_unique >= 3
# 檢查物件密度
total_objects = len(detected_objects)
# 俯視角度通常能看到更多物件
indicators["high_object_density"] = total_objects >= 8
# 計算俯視角度評分
score = 0
if indicators["consistent_sizing"]:
score += 0.4
if indicators["grid_like_distribution"]:
score += 0.4
if indicators["high_object_density"]:
score += 0.2
indicators["aerial_score"] = score
logger.info(f"Aerial view indicators: score={score:.2f}, consistent_sizing={indicators['consistent_sizing']}, grid_distribution={indicators['grid_like_distribution']}, high_density={indicators['high_object_density']}")
return indicators
except Exception as e:
logger.error(f"Error detecting aerial view indicators: {str(e)}")
logger.error(traceback.format_exc())
return {
"consistent_sizing": False,
"grid_like_distribution": False,
"high_object_density": False,
"aerial_score": 0.0
}
|