import re import logging import traceback import numpy as np from typing import Dict, List, Tuple, Optional, Any from PIL import Image from clip_zero_shot_classifier import CLIPZeroShotClassifier from landmark_activities import LANDMARK_ACTIVITIES from landmark_data import ALL_LANDMARKS class LandmarkProcessingManager: """ 負責處理所有地標相關的檢測和處理邏輯,包括未知物體的地標識別、 地標物體的創建和驗證,以及地標引用的清理。 """ def __init__(self, enable_landmark: bool = True, use_clip: bool = True): """ 初始化地標處理管理器。 Args: enable_landmark: 是否啟用地標檢測功能 use_clip: 是否啟用 CLIP 分析功能 """ self.logger = logging.getLogger(__name__) self.enable_landmark = enable_landmark self.use_clip = use_clip # 載入地標相關數據 self.landmark_activities = {} self.all_landmarks = {} self._load_landmark_data() # 地標分類器將按需初始化 self.landmark_classifier = None def _load_landmark_data(self): """載入地標相關的數據結構。""" try: self.landmark_activities = LANDMARK_ACTIVITIES self.logger.info("Loaded LANDMARK_ACTIVITIES successfully") except ImportError as e: self.logger.warning(f"Failed to load LANDMARK_ACTIVITIES: {e}") self.landmark_activities = {} try: self.all_landmarks = ALL_LANDMARKS self.logger.info("Loaded ALL_LANDMARKS successfully") except ImportError as e: self.logger.warning(f"Failed to load ALL_LANDMARKS: {e}") self.all_landmarks = {} def set_landmark_classifier(self, landmark_classifier): """ 設置地標分類器實例。 Args: landmark_classifier: CLIPZeroShotClassifier 實例 """ self.landmark_classifier = landmark_classifier def process_unknown_objects(self, detection_result, detected_objects, clip_analyzer=None): """ 對 YOLO 未能識別或信心度低的物體進行地標檢測。 Args: detection_result: YOLO 檢測結果 detected_objects: 已識別的物體列表 clip_analyzer: CLIP 分析器實例(用於按需初始化地標分類器) Returns: tuple: (更新後的物體列表, 地標物體列表) """ if (not self.enable_landmark or not self.use_clip or not hasattr(self, 'use_landmark_detection') or not self.use_landmark_detection): # 未啟用地標識別時,確保返回的物體列表中不包含任何地標物體 cleaned_objects = [obj for obj in detected_objects if not obj.get("is_landmark", False)] return cleaned_objects, [] try: # 獲取原始圖像 original_image = None if detection_result is not None and hasattr(detection_result, 'orig_img'): original_image = detection_result.orig_img # 檢查原始圖像是否存在 if original_image is None: self.logger.warning("Original image not available for landmark detection") return detected_objects, [] # 確保原始圖像為 PIL 格式或可轉換為 PIL 格式 if not isinstance(original_image, Image.Image): if isinstance(original_image, np.ndarray): try: if original_image.ndim == 3 and original_image.shape[2] == 4: # RGBA original_image = original_image[:, :, :3] # 轉換為 RGB if original_image.ndim == 2: # 灰度圖 original_image = Image.fromarray(original_image).convert("RGB") else: # 假設為 RGB 或 BGR original_image = Image.fromarray(original_image) if hasattr(original_image, 'mode') and original_image.mode == 'BGR': # 從 OpenCV 明確將 BGR 轉換為 RGB original_image = original_image.convert('RGB') except Exception as e: self.logger.warning(f"Error converting image for landmark detection: {e}") return detected_objects, [] else: self.logger.warning(f"Cannot process image of type {type(original_image)}") return detected_objects, [] # 獲取圖像維度 if isinstance(original_image, np.ndarray): h, w = original_image.shape[:2] elif isinstance(original_image, Image.Image): w, h = original_image.size else: self.logger.warning(f"Unable to determine image dimensions for type {type(original_image)}") return detected_objects, [] # 收集可能含有地標的區域 candidate_boxes = [] low_conf_boxes = [] # 即使沒有 YOLO 檢測到的物體,也嘗試進行更詳細的地標分析 if len(detected_objects) == 0: # 創建一個包含整個圖像的框 full_image_box = [0, 0, w, h] low_conf_boxes.append(full_image_box) candidate_boxes.append((full_image_box, "full_image")) # 加入網格分析以增加檢測成功率 grid_size = 2 # 2x2 網格 for i in range(grid_size): for j in range(grid_size): # 創建網格框 grid_box = [ j * w / grid_size, i * h / grid_size, (j + 1) * w / grid_size, (i + 1) * h / grid_size ] low_conf_boxes.append(grid_box) candidate_boxes.append((grid_box, "grid")) # 創建更大的中心框(覆蓋中心 70% 區域) center_box = [ w * 0.15, h * 0.15, w * 0.85, h * 0.85 ] low_conf_boxes.append(center_box) candidate_boxes.append((center_box, "center")) self.logger.info("No YOLO detections, attempting detailed landmark analysis with multiple regions") else: try: # 獲取原始 YOLO 檢測結果中的低置信度物體 if (hasattr(detection_result, 'boxes') and hasattr(detection_result.boxes, 'xyxy') and hasattr(detection_result.boxes, 'conf') and hasattr(detection_result.boxes, 'cls')): all_boxes = (detection_result.boxes.xyxy.cpu().numpy() if hasattr(detection_result.boxes.xyxy, 'cpu') else detection_result.boxes.xyxy) all_confs = (detection_result.boxes.conf.cpu().numpy() if hasattr(detection_result.boxes.conf, 'cpu') else detection_result.boxes.conf) all_cls = (detection_result.boxes.cls.cpu().numpy() if hasattr(detection_result.boxes.cls, 'cpu') else detection_result.boxes.cls) # 收集低置信度區域和可能含有地標的區域(如建築物) for i, (box, conf, cls) in enumerate(zip(all_boxes, all_confs, all_cls)): is_low_conf = conf < 0.4 and conf > 0.1 # 根據物體類別 ID 識別建築物 - 使用通用分類 common_building_classes = [11, 12, 13, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65] # 常見建築類別 ID is_building = int(cls) in common_building_classes # 計算相對面積 - 大物體 is_large_object = (box[2] - box[0]) * (box[3] - box[1]) > (0.1 * w * h) if is_low_conf or is_building: # 確保 box 是一個有效的數組或列表 if isinstance(box, (list, tuple, np.ndarray)) and len(box) >= 4: low_conf_boxes.append(box) if is_large_object: candidate_boxes.append((box, "building" if is_building else "low_conf")) except Exception as e: self.logger.error(f"Error processing YOLO detections: {e}") traceback.print_exc() # 按需初始化地標分類器 if not self.landmark_classifier: if clip_analyzer and hasattr(clip_analyzer, 'get_clip_instance'): try: self.logger.info("Initializing landmark classifier for process_unknown_objects") model, preprocess, device = clip_analyzer.get_clip_instance() self.landmark_classifier = CLIPZeroShotClassifier(device=device) except Exception as e: self.logger.error(f"Error initializing landmark classifier: {e}") return detected_objects, [] else: self.logger.warning("landmark_classifier not available and cannot be initialized") return detected_objects, [] # 使用智能地標搜索 landmark_results = None try: # 確保有有效的框 if not low_conf_boxes: # 如果沒有低置信度框,添加全圖 low_conf_boxes.append([0, 0, w, h]) landmark_results = self.landmark_classifier.intelligent_landmark_search( original_image, yolo_boxes=low_conf_boxes, base_threshold=0.25 ) except Exception as e: self.logger.error(f"Error in intelligent_landmark_search: {e}") traceback.print_exc() return detected_objects, [] # 處理識別結果 landmark_objects = [] # 如果有效的地標結果 if landmark_results and landmark_results.get("is_landmark_scene", False): for landmark_info in landmark_results.get("detected_landmarks", []): try: # 使用 landmark_classifier 的閾值判斷 base_threshold = 0.25 # 基礎閾值 # 獲取地標類型並設定閾值 landmark_type = "architectural" # 預設類型 type_threshold = 0.5 # 預設閾值 # 優先使用 landmark_classifier if (hasattr(self.landmark_classifier, '_determine_landmark_type') and landmark_info.get("landmark_id")): landmark_type = self.landmark_classifier._determine_landmark_type(landmark_info.get("landmark_id")) type_threshold = getattr(self.landmark_classifier, 'landmark_type_thresholds', {}).get(landmark_type, 0.5) # 否則使用本地方法 elif hasattr(self, '_determine_landmark_type'): landmark_type = self._determine_landmark_type(landmark_info.get("landmark_id", "")) # 依據地標類型調整閾值 if landmark_type == "skyscraper": type_threshold = 0.4 elif landmark_type == "natural": type_threshold = 0.6 # 或者直接從地標 ID 推斷 else: landmark_id = landmark_info.get("landmark_id", "").lower() if any(term in landmark_id for term in ["mountain", "canyon", "waterfall", "lake", "river", "natural"]): landmark_type = "natural" type_threshold = 0.6 elif any(term in landmark_id for term in ["skyscraper", "building", "tower", "tall"]): landmark_type = "skyscraper" type_threshold = 0.4 elif any(term in landmark_id for term in ["monument", "memorial", "statue", "historical"]): landmark_type = "monument" type_threshold = 0.5 effective_threshold = base_threshold * (type_threshold / 0.5) # 如果置信度足夠高 if landmark_info.get("confidence", 0) > effective_threshold: # 獲取邊界框 if "box" in landmark_info: box = landmark_info["box"] else: # 如果沒有邊界框,使用整個圖像的 90% 區域 margin_x, margin_y = w * 0.05, h * 0.05 box = [margin_x, margin_y, w - margin_x, h - margin_y] # 計算中心點和其他必要信息 center_x = (box[0] + box[2]) / 2 center_y = (box[1] + box[3]) / 2 norm_center_x = center_x / w if w > 0 else 0.5 norm_center_y = center_y / h if h > 0 else 0.5 # 獲取區域位置(需要 spatial_analyzer 的支持) region = "center" # 預設 # 創建地標物體 landmark_obj = { "class_id": (landmark_info.get("landmark_id", "")[:15] if isinstance(landmark_info.get("landmark_id", ""), str) else "-100"), # 截斷過長的 ID "class_name": landmark_info.get("landmark_name", "Unknown Landmark"), "confidence": landmark_info.get("confidence", 0.0), "box": box, "center": (center_x, center_y), "normalized_center": (norm_center_x, norm_center_y), "size": (box[2] - box[0], box[3] - box[1]), "normalized_size": ( (box[2] - box[0]) / w if w > 0 else 0, (box[3] - box[1]) / h if h > 0 else 0 ), "area": (box[2] - box[0]) * (box[3] - box[1]), "normalized_area": ( (box[2] - box[0]) * (box[3] - box[1]) / (w * h) if w * h > 0 else 0 ), "region": region, "is_landmark": True, "landmark_id": landmark_info.get("landmark_id", ""), "location": landmark_info.get("location", "Unknown Location") } # 添加額外信息 for key in ["year_built", "architectural_style", "significance"]: if key in landmark_info: landmark_obj[key] = landmark_info[key] # 添加地標類型 landmark_obj["landmark_type"] = landmark_type # 添加到檢測物體列表 detected_objects.append(landmark_obj) landmark_objects.append(landmark_obj) self.logger.info(f"Detected landmark: {landmark_info.get('landmark_name', 'Unknown')} with confidence {landmark_info.get('confidence', 0.0):.2f}") except Exception as e: self.logger.error(f"Error processing landmark: {e}") continue return detected_objects, landmark_objects return detected_objects, [] except Exception as e: self.logger.error(f"Error in landmark detection: {e}") traceback.print_exc() return detected_objects, [] def remove_landmark_references(self, text): """ 從文本中移除所有地標引用。 Args: text: 輸入文本 Returns: str: 清除地標引用後的文本 """ if not text: return text try: # 動態收集所有地標名稱和位置 landmark_names = [] locations = [] for landmark_id, info in self.all_landmarks.items(): # 收集地標名稱及其別名 landmark_names.append(info["name"]) landmark_names.extend(info.get("aliases", [])) # 收集地理位置 if "location" in info: location = info["location"] locations.append(location) # 處理分離的城市和國家名稱 parts = location.split(",") if len(parts) >= 1: locations.append(parts[0].strip()) if len(parts) >= 2: locations.append(parts[1].strip()) # 使用正則表達式動態替換所有地標名稱 for name in landmark_names: if name and len(name) > 2: # 避免過短的名稱 text = re.sub(r'\b' + re.escape(name) + r'\b', "tall structure", text, flags=re.IGNORECASE) # 動態替換所有位置引用 for location in locations: if location and len(location) > 2: # 替換常見位置表述模式 text = re.sub(r'in ' + re.escape(location), "in the urban area", text, flags=re.IGNORECASE) text = re.sub(r'of ' + re.escape(location), "of the urban area", text, flags=re.IGNORECASE) text = re.sub(r'\b' + re.escape(location) + r'\b', "the urban area", text, flags=re.IGNORECASE) except Exception as e: self.logger.warning(f"Error in dynamic landmark reference removal, using generic patterns: {e}") # 通用地標描述模式 landmark_patterns = [ # 地標地點模式 (r'an iconic structure in ([A-Z][a-zA-Z\s,]+)', r'an urban structure'), (r'a famous (monument|tower|landmark) in ([A-Z][a-zA-Z\s,]+)', r'an urban structure'), (r'(the [A-Z][a-zA-Z\s]+ Tower)', r'the tower'), (r'(the [A-Z][a-zA-Z\s]+ Building)', r'the building'), (r'(the CN Tower)', r'the tower'), (r'([A-Z][a-zA-Z\s]+) Tower', r'tall structure'), # 地標位置關係模式 (r'(centered|built|located|positioned) around the ([A-Z][a-zA-Z\s]+? (Tower|Monument|Landmark))', r'located in this area'), # 地標活動模式 (r'(sightseeing|guided tours|cultural tourism) (at|around|near) (this landmark|the [A-Z][a-zA-Z\s]+)', r'\1 in this area'), # 一般性地標形容模式 (r'this (famous|iconic|historic|well-known) (landmark|monument|tower|structure)', r'this urban structure'), (r'landmark scene', r'urban scene'), (r'tourist destination', r'urban area'), (r'tourist attraction', r'urban area') ] for pattern, replacement in landmark_patterns: text = re.sub(pattern, replacement, text, flags=re.IGNORECASE) return text def get_alternative_scene_type(self, landmark_scene_type, detected_objects, scene_scores): """ 為地標場景類型選擇適合的替代類型。 Args: landmark_scene_type: 原始地標場景類型 detected_objects: 檢測到的物體列表 scene_scores: 所有場景類型的分數 Returns: str: 適合的替代場景類型 """ # 1. 嘗試從現有場景分數中找出第二高的非地標場景 landmark_types = {"tourist_landmark", "natural_landmark", "historical_monument"} alternative_scores = {k: v for k, v in scene_scores.items() if k not in landmark_types and v > 0.2} if alternative_scores: # 返回分數最高的非地標場景類型 return max(alternative_scores.items(), key=lambda x: x[1])[0] # 2. 基於物體組合推斷場景類型 object_counts = {} for obj in detected_objects: class_name = obj.get("class_name", "") if class_name not in object_counts: object_counts[class_name] = 0 object_counts[class_name] += 1 # 根據物體組合決定場景類型 if "car" in object_counts or "truck" in object_counts or "bus" in object_counts: # 有車輛,可能是街道或交叉路口 if "traffic light" in object_counts or "stop sign" in object_counts: return "intersection" else: return "city_street" if "building" in object_counts and object_counts.get("person", 0) > 0: # 有建築物和人,可能是商業區 return "commercial_district" if object_counts.get("person", 0) > 3: # 多個行人,可能是行人區 return "pedestrian_area" if "bench" in object_counts or "potted plant" in object_counts: # 有長椅或盆栽,可能是公園區域 return "park_area" # 3. 根據原始地標場景類型選擇合適的替代場景 if landmark_scene_type == "natural_landmark": return "outdoor_natural_area" elif landmark_scene_type == "historical_monument": return "urban_architecture" # 默認回退到城市街道 return "city_street" def extract_landmark_specific_activities(self, landmark_objects): """ 從識別的地標中提取特定活動。 Args: landmark_objects: 地標物體列表 Returns: List[str]: 地標特定活動列表 """ landmark_specific_activities = [] # 優先收集來自識別地標的特定活動 for lm_obj in landmark_objects: lm_id = lm_obj.get("landmark_id") if lm_id and lm_id in self.landmark_activities: landmark_specific_activities.extend(self.landmark_activities[lm_id]) if landmark_specific_activities: landmark_names = [lm.get('landmark_name', 'unknown') for lm in landmark_objects if lm.get('is_landmark', False)] self.logger.info(f"Added {len(landmark_specific_activities)} landmark-specific activities for {', '.join(landmark_names)}") return landmark_specific_activities def update_enable_landmark_status(self, enable_landmark: bool): """ 更新地標檢測的啟用狀態。 Args: enable_landmark: 是否啟用地標檢測 """ self.enable_landmark = enable_landmark def update_use_landmark_detection_status(self, use_landmark_detection: bool): """ 更新地標檢測使用狀態。 Args: use_landmark_detection: 是否使用地標檢測 """ self.use_landmark_detection = use_landmark_detection