Spaces:
Running
on
Zero
Running
on
Zero
import re | |
import logging | |
import traceback | |
import numpy as np | |
from typing import Dict, List, Tuple, Optional, Any | |
from PIL import Image | |
from clip_zero_shot_classifier import CLIPZeroShotClassifier | |
from landmark_activities import LANDMARK_ACTIVITIES | |
from landmark_data import ALL_LANDMARKS | |
class LandmarkProcessingManager: | |
""" | |
負責處理所有地標相關的檢測和處理邏輯,包括未知物體的地標識別、 | |
地標物體的創建和驗證,以及地標引用的清理。 | |
""" | |
def __init__(self, enable_landmark: bool = True, use_clip: bool = True): | |
""" | |
初始化地標處理管理器。 | |
Args: | |
enable_landmark: 是否啟用地標檢測功能 | |
use_clip: 是否啟用 CLIP 分析功能 | |
""" | |
self.logger = logging.getLogger(__name__) | |
self.enable_landmark = enable_landmark | |
self.use_clip = use_clip | |
# 載入地標相關數據 | |
self.landmark_activities = {} | |
self.all_landmarks = {} | |
self._load_landmark_data() | |
# 地標分類器將按需初始化 | |
self.landmark_classifier = None | |
def _load_landmark_data(self): | |
"""載入地標相關的數據結構。""" | |
try: | |
self.landmark_activities = LANDMARK_ACTIVITIES | |
self.logger.info("Loaded LANDMARK_ACTIVITIES successfully") | |
except ImportError as e: | |
self.logger.warning(f"Failed to load LANDMARK_ACTIVITIES: {e}") | |
self.landmark_activities = {} | |
try: | |
self.all_landmarks = ALL_LANDMARKS | |
self.logger.info("Loaded ALL_LANDMARKS successfully") | |
except ImportError as e: | |
self.logger.warning(f"Failed to load ALL_LANDMARKS: {e}") | |
self.all_landmarks = {} | |
def set_landmark_classifier(self, landmark_classifier): | |
""" | |
設置地標分類器實例。 | |
Args: | |
landmark_classifier: CLIPZeroShotClassifier 實例 | |
""" | |
self.landmark_classifier = landmark_classifier | |
def process_unknown_objects(self, detection_result, detected_objects, clip_analyzer=None): | |
""" | |
對 YOLO 未能識別或信心度低的物體進行地標檢測。 | |
Args: | |
detection_result: YOLO 檢測結果 | |
detected_objects: 已識別的物體列表 | |
clip_analyzer: CLIP 分析器實例(用於按需初始化地標分類器) | |
Returns: | |
tuple: (更新後的物體列表, 地標物體列表) | |
""" | |
if (not self.enable_landmark or not self.use_clip or | |
not hasattr(self, 'use_landmark_detection') or not self.use_landmark_detection): | |
# 未啟用地標識別時,確保返回的物體列表中不包含任何地標物體 | |
cleaned_objects = [obj for obj in detected_objects if not obj.get("is_landmark", False)] | |
return cleaned_objects, [] | |
try: | |
# 獲取原始圖像 | |
original_image = None | |
if detection_result is not None and hasattr(detection_result, 'orig_img'): | |
original_image = detection_result.orig_img | |
# 檢查原始圖像是否存在 | |
if original_image is None: | |
self.logger.warning("Original image not available for landmark detection") | |
return detected_objects, [] | |
# 確保原始圖像為 PIL 格式或可轉換為 PIL 格式 | |
if not isinstance(original_image, Image.Image): | |
if isinstance(original_image, np.ndarray): | |
try: | |
if original_image.ndim == 3 and original_image.shape[2] == 4: # RGBA | |
original_image = original_image[:, :, :3] # 轉換為 RGB | |
if original_image.ndim == 2: # 灰度圖 | |
original_image = Image.fromarray(original_image).convert("RGB") | |
else: # 假設為 RGB 或 BGR | |
original_image = Image.fromarray(original_image) | |
if hasattr(original_image, 'mode') and original_image.mode == 'BGR': # 從 OpenCV 明確將 BGR 轉換為 RGB | |
original_image = original_image.convert('RGB') | |
except Exception as e: | |
self.logger.warning(f"Error converting image for landmark detection: {e}") | |
return detected_objects, [] | |
else: | |
self.logger.warning(f"Cannot process image of type {type(original_image)}") | |
return detected_objects, [] | |
# 獲取圖像維度 | |
if isinstance(original_image, np.ndarray): | |
h, w = original_image.shape[:2] | |
elif isinstance(original_image, Image.Image): | |
w, h = original_image.size | |
else: | |
self.logger.warning(f"Unable to determine image dimensions for type {type(original_image)}") | |
return detected_objects, [] | |
# 收集可能含有地標的區域 | |
candidate_boxes = [] | |
low_conf_boxes = [] | |
# 即使沒有 YOLO 檢測到的物體,也嘗試進行更詳細的地標分析 | |
if len(detected_objects) == 0: | |
# 創建一個包含整個圖像的框 | |
full_image_box = [0, 0, w, h] | |
low_conf_boxes.append(full_image_box) | |
candidate_boxes.append((full_image_box, "full_image")) | |
# 加入網格分析以增加檢測成功率 | |
grid_size = 2 # 2x2 網格 | |
for i in range(grid_size): | |
for j in range(grid_size): | |
# 創建網格框 | |
grid_box = [ | |
j * w / grid_size, | |
i * h / grid_size, | |
(j + 1) * w / grid_size, | |
(i + 1) * h / grid_size | |
] | |
low_conf_boxes.append(grid_box) | |
candidate_boxes.append((grid_box, "grid")) | |
# 創建更大的中心框(覆蓋中心 70% 區域) | |
center_box = [ | |
w * 0.15, h * 0.15, | |
w * 0.85, h * 0.85 | |
] | |
low_conf_boxes.append(center_box) | |
candidate_boxes.append((center_box, "center")) | |
self.logger.info("No YOLO detections, attempting detailed landmark analysis with multiple regions") | |
else: | |
try: | |
# 獲取原始 YOLO 檢測結果中的低置信度物體 | |
if (hasattr(detection_result, 'boxes') and | |
hasattr(detection_result.boxes, 'xyxy') and | |
hasattr(detection_result.boxes, 'conf') and | |
hasattr(detection_result.boxes, 'cls')): | |
all_boxes = (detection_result.boxes.xyxy.cpu().numpy() | |
if hasattr(detection_result.boxes.xyxy, 'cpu') | |
else detection_result.boxes.xyxy) | |
all_confs = (detection_result.boxes.conf.cpu().numpy() | |
if hasattr(detection_result.boxes.conf, 'cpu') | |
else detection_result.boxes.conf) | |
all_cls = (detection_result.boxes.cls.cpu().numpy() | |
if hasattr(detection_result.boxes.cls, 'cpu') | |
else detection_result.boxes.cls) | |
# 收集低置信度區域和可能含有地標的區域(如建築物) | |
for i, (box, conf, cls) in enumerate(zip(all_boxes, all_confs, all_cls)): | |
is_low_conf = conf < 0.4 and conf > 0.1 | |
# 根據物體類別 ID 識別建築物 - 使用通用分類 | |
common_building_classes = [11, 12, 13, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65] # 常見建築類別 ID | |
is_building = int(cls) in common_building_classes | |
# 計算相對面積 - 大物體 | |
is_large_object = (box[2] - box[0]) * (box[3] - box[1]) > (0.1 * w * h) | |
if is_low_conf or is_building: | |
# 確保 box 是一個有效的數組或列表 | |
if isinstance(box, (list, tuple, np.ndarray)) and len(box) >= 4: | |
low_conf_boxes.append(box) | |
if is_large_object: | |
candidate_boxes.append((box, "building" if is_building else "low_conf")) | |
except Exception as e: | |
self.logger.error(f"Error processing YOLO detections: {e}") | |
traceback.print_exc() | |
# 按需初始化地標分類器 | |
if not self.landmark_classifier: | |
if clip_analyzer and hasattr(clip_analyzer, 'get_clip_instance'): | |
try: | |
self.logger.info("Initializing landmark classifier for process_unknown_objects") | |
model, preprocess, device = clip_analyzer.get_clip_instance() | |
self.landmark_classifier = CLIPZeroShotClassifier(device=device) | |
except Exception as e: | |
self.logger.error(f"Error initializing landmark classifier: {e}") | |
return detected_objects, [] | |
else: | |
self.logger.warning("landmark_classifier not available and cannot be initialized") | |
return detected_objects, [] | |
# 使用智能地標搜索 | |
landmark_results = None | |
try: | |
# 確保有有效的框 | |
if not low_conf_boxes: | |
# 如果沒有低置信度框,添加全圖 | |
low_conf_boxes.append([0, 0, w, h]) | |
landmark_results = self.landmark_classifier.intelligent_landmark_search( | |
original_image, | |
yolo_boxes=low_conf_boxes, | |
base_threshold=0.25 | |
) | |
except Exception as e: | |
self.logger.error(f"Error in intelligent_landmark_search: {e}") | |
traceback.print_exc() | |
return detected_objects, [] | |
# 處理識別結果 | |
landmark_objects = [] | |
# 如果有效的地標結果 | |
if landmark_results and landmark_results.get("is_landmark_scene", False): | |
for landmark_info in landmark_results.get("detected_landmarks", []): | |
try: | |
# 使用 landmark_classifier 的閾值判斷 | |
base_threshold = 0.25 # 基礎閾值 | |
# 獲取地標類型並設定閾值 | |
landmark_type = "architectural" # 預設類型 | |
type_threshold = 0.5 # 預設閾值 | |
# 優先使用 landmark_classifier | |
if (hasattr(self.landmark_classifier, '_determine_landmark_type') and | |
landmark_info.get("landmark_id")): | |
landmark_type = self.landmark_classifier._determine_landmark_type(landmark_info.get("landmark_id")) | |
type_threshold = getattr(self.landmark_classifier, 'landmark_type_thresholds', {}).get(landmark_type, 0.5) | |
# 否則使用本地方法 | |
elif hasattr(self, '_determine_landmark_type'): | |
landmark_type = self._determine_landmark_type(landmark_info.get("landmark_id", "")) | |
# 依據地標類型調整閾值 | |
if landmark_type == "skyscraper": | |
type_threshold = 0.4 | |
elif landmark_type == "natural": | |
type_threshold = 0.6 | |
# 或者直接從地標 ID 推斷 | |
else: | |
landmark_id = landmark_info.get("landmark_id", "").lower() | |
if any(term in landmark_id for term in ["mountain", "canyon", "waterfall", "lake", "river", "natural"]): | |
landmark_type = "natural" | |
type_threshold = 0.6 | |
elif any(term in landmark_id for term in ["skyscraper", "building", "tower", "tall"]): | |
landmark_type = "skyscraper" | |
type_threshold = 0.4 | |
elif any(term in landmark_id for term in ["monument", "memorial", "statue", "historical"]): | |
landmark_type = "monument" | |
type_threshold = 0.5 | |
effective_threshold = base_threshold * (type_threshold / 0.5) | |
# 如果置信度足夠高 | |
if landmark_info.get("confidence", 0) > effective_threshold: | |
# 獲取邊界框 | |
if "box" in landmark_info: | |
box = landmark_info["box"] | |
else: | |
# 如果沒有邊界框,使用整個圖像的 90% 區域 | |
margin_x, margin_y = w * 0.05, h * 0.05 | |
box = [margin_x, margin_y, w - margin_x, h - margin_y] | |
# 計算中心點和其他必要信息 | |
center_x = (box[0] + box[2]) / 2 | |
center_y = (box[1] + box[3]) / 2 | |
norm_center_x = center_x / w if w > 0 else 0.5 | |
norm_center_y = center_y / h if h > 0 else 0.5 | |
# 獲取區域位置(需要 spatial_analyzer 的支持) | |
region = "center" # 預設 | |
# 創建地標物體 | |
landmark_obj = { | |
"class_id": (landmark_info.get("landmark_id", "")[:15] | |
if isinstance(landmark_info.get("landmark_id", ""), str) | |
else "-100"), # 截斷過長的 ID | |
"class_name": landmark_info.get("landmark_name", "Unknown Landmark"), | |
"confidence": landmark_info.get("confidence", 0.0), | |
"box": box, | |
"center": (center_x, center_y), | |
"normalized_center": (norm_center_x, norm_center_y), | |
"size": (box[2] - box[0], box[3] - box[1]), | |
"normalized_size": ( | |
(box[2] - box[0]) / w if w > 0 else 0, | |
(box[3] - box[1]) / h if h > 0 else 0 | |
), | |
"area": (box[2] - box[0]) * (box[3] - box[1]), | |
"normalized_area": ( | |
(box[2] - box[0]) * (box[3] - box[1]) / (w * h) if w * h > 0 else 0 | |
), | |
"region": region, | |
"is_landmark": True, | |
"landmark_id": landmark_info.get("landmark_id", ""), | |
"location": landmark_info.get("location", "Unknown Location") | |
} | |
# 添加額外信息 | |
for key in ["year_built", "architectural_style", "significance"]: | |
if key in landmark_info: | |
landmark_obj[key] = landmark_info[key] | |
# 添加地標類型 | |
landmark_obj["landmark_type"] = landmark_type | |
# 添加到檢測物體列表 | |
detected_objects.append(landmark_obj) | |
landmark_objects.append(landmark_obj) | |
self.logger.info(f"Detected landmark: {landmark_info.get('landmark_name', 'Unknown')} with confidence {landmark_info.get('confidence', 0.0):.2f}") | |
except Exception as e: | |
self.logger.error(f"Error processing landmark: {e}") | |
continue | |
return detected_objects, landmark_objects | |
return detected_objects, [] | |
except Exception as e: | |
self.logger.error(f"Error in landmark detection: {e}") | |
traceback.print_exc() | |
return detected_objects, [] | |
def remove_landmark_references(self, text): | |
""" | |
從文本中移除所有地標引用。 | |
Args: | |
text: 輸入文本 | |
Returns: | |
str: 清除地標引用後的文本 | |
""" | |
if not text: | |
return text | |
try: | |
# 動態收集所有地標名稱和位置 | |
landmark_names = [] | |
locations = [] | |
for landmark_id, info in self.all_landmarks.items(): | |
# 收集地標名稱及其別名 | |
landmark_names.append(info["name"]) | |
landmark_names.extend(info.get("aliases", [])) | |
# 收集地理位置 | |
if "location" in info: | |
location = info["location"] | |
locations.append(location) | |
# 處理分離的城市和國家名稱 | |
parts = location.split(",") | |
if len(parts) >= 1: | |
locations.append(parts[0].strip()) | |
if len(parts) >= 2: | |
locations.append(parts[1].strip()) | |
# 使用正則表達式動態替換所有地標名稱 | |
for name in landmark_names: | |
if name and len(name) > 2: # 避免過短的名稱 | |
text = re.sub(r'\b' + re.escape(name) + r'\b', "tall structure", text, flags=re.IGNORECASE) | |
# 動態替換所有位置引用 | |
for location in locations: | |
if location and len(location) > 2: | |
# 替換常見位置表述模式 | |
text = re.sub(r'in ' + re.escape(location), "in the urban area", text, flags=re.IGNORECASE) | |
text = re.sub(r'of ' + re.escape(location), "of the urban area", text, flags=re.IGNORECASE) | |
text = re.sub(r'\b' + re.escape(location) + r'\b', "the urban area", text, flags=re.IGNORECASE) | |
except Exception as e: | |
self.logger.warning(f"Error in dynamic landmark reference removal, using generic patterns: {e}") | |
# 通用地標描述模式 | |
landmark_patterns = [ | |
# 地標地點模式 | |
(r'an iconic structure in ([A-Z][a-zA-Z\s,]+)', r'an urban structure'), | |
(r'a famous (monument|tower|landmark) in ([A-Z][a-zA-Z\s,]+)', r'an urban structure'), | |
(r'(the [A-Z][a-zA-Z\s]+ Tower)', r'the tower'), | |
(r'(the [A-Z][a-zA-Z\s]+ Building)', r'the building'), | |
(r'(the CN Tower)', r'the tower'), | |
(r'([A-Z][a-zA-Z\s]+) Tower', r'tall structure'), | |
# 地標位置關係模式 | |
(r'(centered|built|located|positioned) around the ([A-Z][a-zA-Z\s]+? (Tower|Monument|Landmark))', r'located in this area'), | |
# 地標活動模式 | |
(r'(sightseeing|guided tours|cultural tourism) (at|around|near) (this landmark|the [A-Z][a-zA-Z\s]+)', r'\1 in this area'), | |
# 一般性地標形容模式 | |
(r'this (famous|iconic|historic|well-known) (landmark|monument|tower|structure)', r'this urban structure'), | |
(r'landmark scene', r'urban scene'), | |
(r'tourist destination', r'urban area'), | |
(r'tourist attraction', r'urban area') | |
] | |
for pattern, replacement in landmark_patterns: | |
text = re.sub(pattern, replacement, text, flags=re.IGNORECASE) | |
return text | |
def get_alternative_scene_type(self, landmark_scene_type, detected_objects, scene_scores): | |
""" | |
為地標場景類型選擇適合的替代類型。 | |
Args: | |
landmark_scene_type: 原始地標場景類型 | |
detected_objects: 檢測到的物體列表 | |
scene_scores: 所有場景類型的分數 | |
Returns: | |
str: 適合的替代場景類型 | |
""" | |
# 1. 嘗試從現有場景分數中找出第二高的非地標場景 | |
landmark_types = {"tourist_landmark", "natural_landmark", "historical_monument"} | |
alternative_scores = {k: v for k, v in scene_scores.items() if k not in landmark_types and v > 0.2} | |
if alternative_scores: | |
# 返回分數最高的非地標場景類型 | |
return max(alternative_scores.items(), key=lambda x: x[1])[0] | |
# 2. 基於物體組合推斷場景類型 | |
object_counts = {} | |
for obj in detected_objects: | |
class_name = obj.get("class_name", "") | |
if class_name not in object_counts: | |
object_counts[class_name] = 0 | |
object_counts[class_name] += 1 | |
# 根據物體組合決定場景類型 | |
if "car" in object_counts or "truck" in object_counts or "bus" in object_counts: | |
# 有車輛,可能是街道或交叉路口 | |
if "traffic light" in object_counts or "stop sign" in object_counts: | |
return "intersection" | |
else: | |
return "city_street" | |
if "building" in object_counts and object_counts.get("person", 0) > 0: | |
# 有建築物和人,可能是商業區 | |
return "commercial_district" | |
if object_counts.get("person", 0) > 3: | |
# 多個行人,可能是行人區 | |
return "pedestrian_area" | |
if "bench" in object_counts or "potted plant" in object_counts: | |
# 有長椅或盆栽,可能是公園區域 | |
return "park_area" | |
# 3. 根據原始地標場景類型選擇合適的替代場景 | |
if landmark_scene_type == "natural_landmark": | |
return "outdoor_natural_area" | |
elif landmark_scene_type == "historical_monument": | |
return "urban_architecture" | |
# 默認回退到城市街道 | |
return "city_street" | |
def extract_landmark_specific_activities(self, landmark_objects): | |
""" | |
從識別的地標中提取特定活動。 | |
Args: | |
landmark_objects: 地標物體列表 | |
Returns: | |
List[str]: 地標特定活動列表 | |
""" | |
landmark_specific_activities = [] | |
# 優先收集來自識別地標的特定活動 | |
for lm_obj in landmark_objects: | |
lm_id = lm_obj.get("landmark_id") | |
if lm_id and lm_id in self.landmark_activities: | |
landmark_specific_activities.extend(self.landmark_activities[lm_id]) | |
if landmark_specific_activities: | |
landmark_names = [lm.get('landmark_name', 'unknown') for lm in landmark_objects if lm.get('is_landmark', False)] | |
self.logger.info(f"Added {len(landmark_specific_activities)} landmark-specific activities for {', '.join(landmark_names)}") | |
return landmark_specific_activities | |
def update_enable_landmark_status(self, enable_landmark: bool): | |
""" | |
更新地標檢測的啟用狀態。 | |
Args: | |
enable_landmark: 是否啟用地標檢測 | |
""" | |
self.enable_landmark = enable_landmark | |
def update_use_landmark_detection_status(self, use_landmark_detection: bool): | |
""" | |
更新地標檢測使用狀態。 | |
Args: | |
use_landmark_detection: 是否使用地標檢測 | |
""" | |
self.use_landmark_detection = use_landmark_detection | |