import random import hashlib import numpy as np import sqlite3 import re import traceback from typing import List, Dict, Tuple, Optional, Any from dataclasses import dataclass from sentence_transformers import SentenceTransformer import torch from sklearn.metrics.pairwise import cosine_similarity from dog_database import get_dog_description from breed_health_info import breed_health_info from breed_noise_info import breed_noise_info @dataclass class BreedDescriptionVector: """品種描述向量的資料結構""" breed_name: str description_text: str embedding: np.ndarray characteristics: Dict[str, Any] class SemanticVectorManager: """ 語義向量管理器 處理 SBERT 模型初始化、品種向量化建構和品種描述生成 """ def __init__(self): """初始化語義向量管理器""" self.model_name = 'all-MiniLM-L6-v2' self.sbert_model = None self._sbert_loading_attempted = False self.breed_vectors = {} self.breed_list = self._get_breed_list() # 延遲SBERT模型載入直到需要時才在GPU環境中進行 print("SemanticVectorManager initialized (SBERT loading deferred)") def _get_breed_list(self) -> List[str]: """從資料庫獲取品種清單""" try: conn = sqlite3.connect('animal_detector.db') cursor = conn.cursor() cursor.execute("SELECT DISTINCT Breed FROM AnimalCatalog") breeds = [row[0] for row in cursor.fetchall()] cursor.close() conn.close() # 過濾掉野生動物品種 breeds = [breed for breed in breeds if breed != 'Dhole'] return breeds except Exception as e: print(f"Error getting breed list: {str(e)}") return ['Labrador_Retriever', 'German_Shepherd', 'Golden_Retriever', 'Bulldog', 'Poodle', 'Beagle', 'Rottweiler', 'Yorkshire_Terrier'] def _initialize_model(self): """初始化 SBERT 模型,包含容錯機制 - 設計用於ZeroGPU相容性""" if self.sbert_model is not None or self._sbert_loading_attempted: return self.sbert_model try: print("Loading SBERT model in GPU context...") # 如果主要模型失敗,嘗試不同的模型名稱 model_options = ['all-MiniLM-L6-v2', 'all-mpnet-base-v2', 'all-MiniLM-L12-v2'] for model_name in model_options: try: # 明確指定設備以處理ZeroGPU環境 import torch device = 'cuda' if torch.cuda.is_available() else 'cpu' self.sbert_model = SentenceTransformer(model_name, device=device) self.model_name = model_name print(f"SBERT model {model_name} loaded successfully on {device}") return self.sbert_model except Exception as model_e: print(f"Failed to load {model_name}: {str(model_e)}") continue # 如果所有模型都失敗 print("All SBERT models failed to load. Using basic text matching fallback.") self.sbert_model = None return None except Exception as e: print(f"Failed to initialize any SBERT model: {str(e)}") print(traceback.format_exc()) print("Will provide basic text-based recommendations without embeddings") self.sbert_model = None return None finally: self._sbert_loading_attempted = True def _create_breed_description(self, breed: str) -> str: """為品種創建包含所有關鍵特徵的全面自然語言描述""" try: # 獲取所有信息來源 breed_info = get_dog_description(breed) or {} health_info = breed_health_info.get(breed, {}) if breed_health_info else {} noise_info = breed_noise_info.get(breed, {}) if breed_noise_info else {} breed_display_name = breed.replace('_', ' ') description_parts = [] # 1. 基本尺寸和身體特徵 size = breed_info.get('Size', 'medium').lower() description_parts.append(f"{breed_display_name} is a {size} sized dog breed") # 2. 氣質和個性(匹配的關鍵因素) temperament = breed_info.get('Temperament', '') if temperament: description_parts.append(f"with a {temperament.lower()} temperament") # 3. 運動和活動水平(公寓居住的關鍵因素) exercise_needs = breed_info.get('Exercise Needs', 'moderate').lower() if 'high' in exercise_needs or 'very high' in exercise_needs: description_parts.append("requiring high daily exercise and mental stimulation") elif 'low' in exercise_needs or 'minimal' in exercise_needs: description_parts.append("with minimal exercise requirements, suitable for apartment living") else: description_parts.append("with moderate exercise needs") # 4. 噪音特徵(安靜需求的關鍵因素) noise_level = noise_info.get('noise_level', 'moderate').lower() if 'low' in noise_level or 'quiet' in noise_level: description_parts.append("known for being quiet and rarely barking") elif 'high' in noise_level or 'loud' in noise_level: description_parts.append("tends to be vocal and bark frequently") else: description_parts.append("with moderate barking tendencies") # 5. 居住空間相容性 if size in ['small', 'tiny']: description_parts.append("excellent for small apartments and limited spaces") elif size in ['large', 'giant']: description_parts.append("requiring large living spaces and preferably a yard") else: description_parts.append("adaptable to various living situations") # 6. 美容和維護 grooming_needs = breed_info.get('Grooming Needs', 'moderate').lower() if 'high' in grooming_needs: description_parts.append("requiring regular professional grooming") elif 'low' in grooming_needs: description_parts.append("with minimal grooming requirements") else: description_parts.append("with moderate grooming needs") # 7. 家庭相容性 good_with_children = breed_info.get('Good with Children', 'Yes') if good_with_children == 'Yes': description_parts.append("excellent with children and families") else: description_parts.append("better suited for adult households") # 8. 智力和可訓練性(從資料庫描述中提取) intelligence_keywords = [] description_text = breed_info.get('Description', '').lower() if description_text: # 從描述中提取智力指標 if any(word in description_text for word in ['intelligent', 'smart', 'clever', 'quick to learn']): intelligence_keywords.extend(['highly intelligent', 'trainable', 'quick learner']) elif any(word in description_text for word in ['stubborn', 'independent', 'difficult to train']): intelligence_keywords.extend(['independent minded', 'requires patience', 'challenging to train']) else: intelligence_keywords.extend(['moderate intelligence', 'trainable with consistency']) # 從描述中提取工作/用途特徵 if any(word in description_text for word in ['working', 'herding', 'guard', 'hunting']): intelligence_keywords.extend(['working breed', 'purpose-driven', 'task-oriented']) elif any(word in description_text for word in ['companion', 'lap', 'toy', 'decorative']): intelligence_keywords.extend(['companion breed', 'affectionate', 'people-focused']) # 添加智力背景到描述中 if intelligence_keywords: description_parts.append(f"characterized as {', '.join(intelligence_keywords[:2])}") # 9. 特殊特徵和用途(使用資料庫挖掘進行增強) if breed_info.get('Description'): desc = breed_info.get('Description', '')[:150] # 增加到 150 字元以提供更多背景 if desc: # 從描述中提取關鍵特徵以便更好的語義匹配 desc_lower = desc.lower() key_traits = [] # 從描述中提取關鍵行為特徵 if 'friendly' in desc_lower: key_traits.append('friendly') if 'gentle' in desc_lower: key_traits.append('gentle') if 'energetic' in desc_lower or 'active' in desc_lower: key_traits.append('energetic') if 'calm' in desc_lower or 'peaceful' in desc_lower: key_traits.append('calm') if 'protective' in desc_lower or 'guard' in desc_lower: key_traits.append('protective') trait_text = f" and {', '.join(key_traits)}" if key_traits else "" description_parts.append(f"Known for: {desc.lower()}{trait_text}") # 10. 照護水平需求 try: care_level = breed_info.get('Care Level', 'moderate') if isinstance(care_level, str): description_parts.append(f"requiring {care_level.lower()} overall care level") else: description_parts.append("requiring moderate overall care level") except Exception as e: print(f"Error processing care level for {breed}: {str(e)}") description_parts.append("requiring moderate overall care level") # 11. 壽命資訊 try: lifespan = breed_info.get('Lifespan', '10-12 years') if lifespan and isinstance(lifespan, str) and lifespan.strip(): description_parts.append(f"with a typical lifespan of {lifespan}") else: description_parts.append("with a typical lifespan of 10-12 years") except Exception as e: print(f"Error processing lifespan for {breed}: {str(e)}") description_parts.append("with a typical lifespan of 10-12 years") # 創建全面的描述 full_description = '. '.join(description_parts) + '.' # 添加全面的關鍵字以便更好的語義匹配 keywords = [] # 基本品種名稱關鍵字 keywords.extend([word.lower() for word in breed_display_name.split()]) # 氣質關鍵字 if temperament: keywords.extend([word.lower().strip(',') for word in temperament.split()]) # 基於尺寸的關鍵字 if 'small' in size or 'tiny' in size: keywords.extend(['small', 'tiny', 'compact', 'little', 'apartment', 'indoor', 'lap']) elif 'large' in size or 'giant' in size: keywords.extend(['large', 'big', 'giant', 'huge', 'yard', 'space', 'outdoor']) else: keywords.extend(['medium', 'moderate', 'average', 'balanced']) # 活動水平關鍵字 exercise_needs = breed_info.get('Exercise Needs', 'moderate').lower() if 'high' in exercise_needs: keywords.extend(['active', 'energetic', 'exercise', 'outdoor', 'hiking', 'running', 'athletic']) elif 'low' in exercise_needs: keywords.extend(['calm', 'low-energy', 'indoor', 'relaxed', 'couch', 'sedentary']) else: keywords.extend(['moderate', 'balanced', 'walks', 'regular']) # 噪音水平關鍵字 noise_level = noise_info.get('noise_level', 'moderate').lower() if 'quiet' in noise_level or 'low' in noise_level: keywords.extend(['quiet', 'silent', 'calm', 'peaceful', 'low-noise']) elif 'high' in noise_level or 'loud' in noise_level: keywords.extend(['vocal', 'barking', 'loud', 'alert', 'watchdog']) # 居住情況關鍵字 if size in ['small', 'tiny'] and 'low' in exercise_needs: keywords.extend(['apartment', 'city', 'urban', 'small-space']) if size in ['large', 'giant'] or 'high' in exercise_needs: keywords.extend(['house', 'yard', 'suburban', 'rural', 'space']) # 家庭關鍵字 good_with_children = breed_info.get('Good with Children', 'Yes') if good_with_children == 'Yes': keywords.extend(['family', 'children', 'kids', 'friendly', 'gentle']) # 智力和可訓練性關鍵字(從資料庫描述挖掘) if intelligence_keywords: keywords.extend([word.lower() for phrase in intelligence_keywords for word in phrase.split()]) # 美容相關關鍵字(增強) grooming_needs = breed_info.get('Grooming Needs', 'moderate').lower() if 'high' in grooming_needs: keywords.extend(['high-maintenance', 'professional-grooming', 'daily-brushing', 'coat-care']) elif 'low' in grooming_needs: keywords.extend(['low-maintenance', 'minimal-grooming', 'easy-care', 'wash-and-go']) else: keywords.extend(['moderate-grooming', 'weekly-brushing', 'regular-care']) # 基於壽命的關鍵字 lifespan = breed_info.get('Lifespan', '10-12 years') if lifespan and isinstance(lifespan, str): try: # 從壽命字符串中提取年數(例如 "10-12 years" 或 "12-15 years") import re years = re.findall(r'\d+', lifespan) if years: avg_years = sum(int(y) for y in years) / len(years) if avg_years >= 14: keywords.extend(['long-lived', 'longevity', 'durable', 'healthy-lifespan']) elif avg_years <= 8: keywords.extend(['shorter-lifespan', 'health-considerations', 'special-care']) else: keywords.extend(['average-lifespan', 'moderate-longevity']) except: keywords.extend(['average-lifespan']) # 將關鍵字添加到描述中以便更好的語義匹配 unique_keywords = list(set(keywords)) keyword_text = ' '.join(unique_keywords) full_description += f" Additional context: {keyword_text}" return full_description except Exception as e: print(f"Error creating description for {breed}: {str(e)}") return f"{breed.replace('_', ' ')} is a dog breed with unique characteristics." def _build_breed_vectors(self): """為所有品種建立向量表示 - 延遲調用當需要時""" try: print("Building breed vector database...") # 初始化模型如果尚未完成 if self.sbert_model is None: self._initialize_model() # 如果模型不可用則跳過 if self.sbert_model is None: print("SBERT model not available, skipping vector building") return for breed in self.breed_list: description = self._create_breed_description(breed) # 生成嵌入向量 embedding = self.sbert_model.encode(description, convert_to_tensor=False) # 獲取品種特徵 breed_info = get_dog_description(breed) characteristics = { 'size': breed_info.get('Size', 'Medium') if breed_info else 'Medium', 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate') if breed_info else 'Moderate', 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate') if breed_info else 'Moderate', 'good_with_children': breed_info.get('Good with Children', 'Yes') if breed_info else 'Yes', 'temperament': breed_info.get('Temperament', '') if breed_info else '' } self.breed_vectors[breed] = BreedDescriptionVector( breed_name=breed, description_text=description, embedding=embedding, characteristics=characteristics ) print(f"Successfully built {len(self.breed_vectors)} breed vectors") except Exception as e: print(f"Error building breed vectors: {str(e)}") print(traceback.format_exc()) raise def get_breed_vectors(self) -> Dict[str, BreedDescriptionVector]: """獲取所有品種向量""" # 確保向量已建構 if not self.breed_vectors: self._build_breed_vectors() return self.breed_vectors def get_sbert_model(self) -> Optional[SentenceTransformer]: """獲取 SBERT 模型""" return self.sbert_model def get_breed_list(self) -> List[str]: """獲取品種清單""" return self.breed_list def is_model_available(self) -> bool: """檢查 SBERT 模型是否可用""" return self.sbert_model is not None def encode_text(self, text: str) -> np.ndarray: """使用 SBERT 模型編碼文本""" # 初始化模型如果尚未完成 if self.sbert_model is None: self._initialize_model() if self.sbert_model is None: raise RuntimeError("SBERT model not available") return self.sbert_model.encode(text, convert_to_tensor=False)