import random import hashlib import numpy as np import sqlite3 import re import traceback from typing import List, Dict, Tuple, Optional, Any from dataclasses import dataclass from sentence_transformers import SentenceTransformer import torch from sklearn.metrics.pairwise import cosine_similarity from dog_database import get_dog_description from breed_health_info import breed_health_info from breed_noise_info import breed_noise_info from scoring_calculation_system import UserPreferences, calculate_compatibility_score, UnifiedScoringSystem, calculate_unified_breed_scores from query_understanding import QueryUnderstandingEngine, analyze_user_query from constraint_manager import ConstraintManager, apply_breed_constraints from multi_head_scorer import MultiHeadScorer, score_breed_candidates, BreedScore from score_calibrator import ScoreCalibrator, calibrate_breed_scores from config_manager import get_config_manager, get_standardized_breed_data class UserQueryAnalyzer: """ 用戶查詢分析器 專門處理用戶輸入分析、生活方式關鍵字提取和偏好解析 """ def __init__(self, breed_list: List[str]): """初始化用戶查詢分析器""" self.breed_list = breed_list self.comparative_keywords = { 'most': 1.0, 'love': 1.0, 'prefer': 0.9, 'like': 0.8, 'then': 0.7, 'second': 0.7, 'followed': 0.6, 'third': 0.5, 'least': 0.3, 'dislike': 0.2 } self.stop_words = { 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'from', 'up', 'about', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'can', 'i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', 'her', 'hers', 'herself', 'it', 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves' } def parse_comparative_preferences(self, user_input: str) -> Dict[str, float]: """解析比較性偏好表達""" breed_scores = {} # 標準化輸入 text = user_input.lower() # 找到品種名稱和偏好關鍵字 for breed in self.breed_list: breed_display = breed.replace('_', ' ').lower() breed_words = breed_display.split() # 檢查是否提到此品種 breed_mentioned = False for word in breed_words: if word in text: breed_mentioned = True break if breed_mentioned: # 在附近找到偏好關鍵字 breed_score = 0.5 # 預設分數 # 在品種名稱 50 字符內尋找關鍵字 breed_pos = text.find(breed_words[0]) if breed_pos != -1: # 檢查背景中的關鍵字 context_start = max(0, breed_pos - 50) context_end = min(len(text), breed_pos + 50) context = text[context_start:context_end] for keyword, score in self.comparative_keywords.items(): if keyword in context: breed_score = max(breed_score, score) breed_scores[breed] = breed_score return breed_scores def extract_lifestyle_keywords(self, user_input: str) -> Dict[str, List[str]]: """增強的生活方式關鍵字提取,具有更好的模式匹配""" keywords = { 'living_space': [], 'activity_level': [], 'family_situation': [], 'noise_preference': [], 'size_preference': [], 'care_level': [], 'special_needs': [], 'intelligence_preference': [], 'grooming_preference': [], 'lifespan_preference': [], 'temperament_preference': [], 'experience_level': [] } text = user_input.lower() # 增強居住空間檢測 apartment_terms = ['apartment', 'flat', 'condo', 'small space', 'city living', 'urban', 'no yard', 'indoor'] house_terms = ['house', 'yard', 'garden', 'backyard', 'large space', 'suburban', 'rural', 'farm'] if any(term in text for term in apartment_terms): keywords['living_space'].append('apartment') if any(term in text for term in house_terms): keywords['living_space'].append('house') # 增強活動水平檢測 high_activity = ['active', 'energetic', 'exercise', 'hiking', 'running', 'outdoor', 'sports', 'jogging', 'athletic', 'adventure', 'vigorous', 'high energy', 'workout'] low_activity = ['calm', 'lazy', 'indoor', 'low energy', 'couch', 'sedentary', 'relaxed', 'peaceful', 'quiet lifestyle', 'minimal exercise'] moderate_activity = ['moderate', 'walk', 'daily walks', 'light exercise'] if any(term in text for term in high_activity): keywords['activity_level'].append('high') if any(term in text for term in low_activity): keywords['activity_level'].append('low') if any(term in text for term in moderate_activity): keywords['activity_level'].append('moderate') # 增強家庭情況檢測 children_terms = ['children', 'kids', 'family', 'child', 'toddler', 'baby', 'teenage', 'school age'] elderly_terms = ['elderly', 'senior', 'old', 'retirement', 'aged', 'mature'] single_terms = ['single', 'alone', 'individual', 'solo', 'myself'] if any(term in text for term in children_terms): keywords['family_situation'].append('children') if any(term in text for term in elderly_terms): keywords['family_situation'].append('elderly') if any(term in text for term in single_terms): keywords['family_situation'].append('single') # 增強噪音偏好檢測 quiet_terms = ['quiet', 'silent', 'noise-sensitive', 'peaceful', 'no barking', 'minimal noise', 'soft-spoken', 'calm', 'tranquil'] noise_ok_terms = ['loud', 'barking ok', 'noise tolerant', 'vocal', 'doesn\'t matter'] if any(term in text for term in quiet_terms): keywords['noise_preference'].append('low') if any(term in text for term in noise_ok_terms): keywords['noise_preference'].append('high') # 增強體型偏好檢測 small_terms = ['small', 'tiny', 'little', 'compact', 'miniature', 'toy', 'lap dog'] large_terms = ['large', 'big', 'giant', 'huge', 'massive', 'great'] medium_terms = ['medium', 'moderate size', 'average', 'mid-sized'] if any(term in text for term in small_terms): keywords['size_preference'].append('small') if any(term in text for term in large_terms): keywords['size_preference'].append('large') if any(term in text for term in medium_terms): keywords['size_preference'].append('medium') # 增強照護水平檢測 low_care = ['low maintenance', 'easy care', 'simple', 'minimal grooming', 'wash and go'] high_care = ['high maintenance', 'grooming', 'care intensive', 'professional grooming', 'daily brushing'] if any(term in text for term in low_care): keywords['care_level'].append('low') if any(term in text for term in high_care): keywords['care_level'].append('high') # 智力偏好檢測(新增) smart_terms = ['smart', 'intelligent', 'clever', 'bright', 'quick learner', 'easy to train', 'trainable', 'genius', 'brilliant'] independent_terms = ['independent', 'stubborn', 'strong-willed', 'less trainable', 'thinks for themselves'] if any(term in text for term in smart_terms): keywords['intelligence_preference'].append('high') if any(term in text for term in independent_terms): keywords['intelligence_preference'].append('independent') # 美容偏好檢測(新增) low_grooming_terms = ['low grooming', 'minimal grooming', 'easy care', 'wash and wear', 'no grooming', 'simple coat'] high_grooming_terms = ['high grooming', 'professional grooming', 'lots of care', 'high maintenance coat', 'daily brushing', 'regular grooming'] if any(term in text for term in low_grooming_terms): keywords['grooming_preference'].append('low') if any(term in text for term in high_grooming_terms): keywords['grooming_preference'].append('high') # 壽命偏好檢測(新增) long_lived_terms = ['long lived', 'long lifespan', 'live long', 'many years', '15+ years', 'longevity'] healthy_terms = ['healthy breed', 'few health issues', 'robust', 'hardy', 'strong constitution'] if any(term in text for term in long_lived_terms): keywords['lifespan_preference'].append('long') if any(term in text for term in healthy_terms): keywords['lifespan_preference'].append('healthy') # 氣質偏好檢測(新增) gentle_terms = ['gentle', 'calm', 'peaceful', 'laid back', 'chill', 'mellow', 'docile'] playful_terms = ['playful', 'energetic', 'fun', 'active personality', 'lively', 'spirited', 'bouncy'] protective_terms = ['protective', 'guard', 'watchdog', 'alert', 'vigilant', 'defensive'] friendly_terms = ['friendly', 'social', 'outgoing', 'loves people', 'sociable', 'gregarious'] if any(term in text for term in gentle_terms): keywords['temperament_preference'].append('gentle') if any(term in text for term in playful_terms): keywords['temperament_preference'].append('playful') if any(term in text for term in protective_terms): keywords['temperament_preference'].append('protective') if any(term in text for term in friendly_terms): keywords['temperament_preference'].append('friendly') # 經驗水平檢測(新增) beginner_terms = ['first time', 'beginner', 'new to dogs', 'never had', 'novice', 'inexperienced'] advanced_terms = ['experienced', 'advanced', 'dog expert', 'many dogs before', 'professional', 'seasoned'] if any(term in text for term in beginner_terms): keywords['experience_level'].append('beginner') if any(term in text for term in advanced_terms): keywords['experience_level'].append('advanced') # 增強特殊需求檢測 guard_terms = ['guard', 'protection', 'security', 'watchdog', 'protective', 'defender'] companion_terms = ['therapy', 'emotional support', 'companion', 'comfort', 'lap dog', 'cuddly'] hypoallergenic_terms = ['hypoallergenic', 'allergies', 'non-shedding', 'allergy-friendly', 'no shed'] multi_pet_terms = ['good with cats', 'cat friendly', 'multi-pet', 'other animals'] if any(term in text for term in guard_terms): keywords['special_needs'].append('guard') if any(term in text for term in companion_terms): keywords['special_needs'].append('companion') if any(term in text for term in hypoallergenic_terms): keywords['special_needs'].append('hypoallergenic') if any(term in text for term in multi_pet_terms): keywords['special_needs'].append('multi_pet') return keywords def preprocess_text(self, text: str) -> str: """預處理文本""" # 轉換為小寫 text = text.lower() # 移除特殊字符,保留字母、數字和基本標點 text = re.sub(r'[^\w\s\-\']', ' ', text) # 標準化空格 text = ' '.join(text.split()) return text def generate_search_keywords(self, text: str) -> List[str]: """ 為語義搜索生成關鍵字 Args: text: 輸入文本 Returns: 關鍵字列表 """ text = self.preprocess_text(text) keywords = [] try: # 分詞並過濾停用詞 words = text.split() for word in words: if len(word) > 2 and word not in self.stop_words: keywords.append(word) # 提取重要短語 phrases = self._extract_phrases(text) keywords.extend(phrases) # 移除重複項 keywords = list(set(keywords)) return keywords except Exception as e: print(f"Error generating search keywords: {str(e)}") return [] def _extract_phrases(self, text: str) -> List[str]: """ 提取重要短語 Args: text: 輸入文本 Returns: 短語列表 """ phrases = [] # 定義重要短語模式 phrase_patterns = [ r'good with \w+', r'apartment \w+', r'family \w+', r'exercise \w+', r'grooming \w+', r'noise \w+', r'training \w+', r'health \w+', r'\w+ friendly', r'\w+ tolerant', r'\w+ maintenance', r'\w+ energy', r'\w+ barking', r'\w+ shedding' ] for pattern in phrase_patterns: matches = re.findall(pattern, text) phrases.extend(matches) return phrases def analyze_sentiment(self, text: str) -> Dict[str, float]: """ 分析文本情感 Args: text: 輸入文本 Returns: 情感分析結果 """ # 簡化的情感分析實現 positive_words = [ 'love', 'like', 'prefer', 'enjoy', 'want', 'need', 'looking for', 'good', 'great', 'excellent', 'perfect', 'wonderful', 'amazing' ] negative_words = [ 'hate', 'dislike', 'avoid', 'don\'t want', 'no', 'not', 'bad', 'terrible', 'awful', 'horrible', 'worst', 'never' ] words = text.lower().split() positive_count = sum(1 for word in words if word in positive_words) negative_count = sum(1 for word in words if word in negative_words) total_words = len(words) if total_words == 0: return {'positive': 0.5, 'negative': 0.5, 'neutral': 0.0} positive_score = positive_count / total_words negative_score = negative_count / total_words neutral_score = max(0, 1 - positive_score - negative_score) return { 'positive': positive_score, 'negative': negative_score, 'neutral': neutral_score } def parse_user_requirements(self, user_input: str) -> Dict[str, Any]: """更準確地解析用戶需求""" requirements = { 'living_space': None, 'exercise_level': None, 'preferred_size': None, 'noise_tolerance': None } input_lower = user_input.lower() # 居住空間檢測 if 'apartment' in input_lower or 'small' in input_lower: requirements['living_space'] = 'apartment' elif 'large house' in input_lower or 'big' in input_lower: requirements['living_space'] = 'large_house' elif 'medium' in input_lower: requirements['living_space'] = 'medium_house' # 運動水平檢測 if "don't exercise" in input_lower or 'low exercise' in input_lower: requirements['exercise_level'] = 'low' elif any(term in input_lower for term in ['hiking', 'running', 'active']): requirements['exercise_level'] = 'high' elif '30 minutes' in input_lower or 'moderate' in input_lower: requirements['exercise_level'] = 'moderate' # 體型偏好檢測 if any(term in input_lower for term in ['small dog', 'tiny', 'toy']): requirements['preferred_size'] = 'small' elif any(term in input_lower for term in ['large dog', 'big dog']): requirements['preferred_size'] = 'large' elif 'medium' in input_lower: requirements['preferred_size'] = 'medium' return requirements def analyze_user_description_enhanced(self, user_description: str) -> Dict[str, Any]: """增強用戶描述分析""" text = user_description.lower() analysis = { 'mentioned_breeds': [], 'lifestyle_keywords': {}, 'preference_strength': {}, 'constraint_requirements': [], 'user_context': {} } # 提取提及的品種 for breed in self.breed_list: breed_display = breed.replace('_', ' ').lower() if breed_display in text or any(word in text for word in breed_display.split()): analysis['mentioned_breeds'].append(breed) # 簡單偏好強度分析 if any(word in text for word in ['love', 'prefer', 'like', '喜歡', '最愛']): analysis['preference_strength'][breed] = 0.8 else: analysis['preference_strength'][breed] = 0.5 # 提取約束要求 if any(word in text for word in ['quiet', 'silent', 'no barking', '安靜']): analysis['constraint_requirements'].append('low_noise') if any(word in text for word in ['apartment', 'small space', '公寓']): analysis['constraint_requirements'].append('apartment_suitable') if any(word in text for word in ['children', 'kids', 'family', '小孩']): analysis['constraint_requirements'].append('child_friendly') # 提取用戶背景 analysis['user_context'] = { 'has_children': any(word in text for word in ['children', 'kids', '小孩']), 'living_space': 'apartment' if any(word in text for word in ['apartment', '公寓']) else 'house', 'activity_level': 'high' if any(word in text for word in ['active', 'energetic', '活躍']) else 'moderate', 'noise_sensitive': any(word in text for word in ['quiet', 'silent', '安靜']), 'experience_level': 'beginner' if any(word in text for word in ['first time', 'beginner', '新手']) else 'intermediate' } return analysis def create_user_preferences_from_analysis_enhanced(self, analysis: Dict[str, Any]) -> 'UserPreferences': """從分析結果創建用戶偏好物件""" context = analysis['user_context'] # 推斷居住空間類型 living_space = 'apartment' if context.get('living_space') == 'apartment' else 'house_small' # 推斷院子權限 yard_access = 'no_yard' if living_space == 'apartment' else 'shared_yard' # 推斷運動時間 activity_level = context.get('activity_level', 'moderate') exercise_time_map = {'high': 120, 'moderate': 60, 'low': 30} exercise_time = exercise_time_map.get(activity_level, 60) # 推斷運動類型 exercise_type_map = {'high': 'active_training', 'moderate': 'moderate_activity', 'low': 'light_walks'} exercise_type = exercise_type_map.get(activity_level, 'moderate_activity') # 推斷噪音容忍度 noise_tolerance = 'low' if context.get('noise_sensitive', False) else 'medium' return UserPreferences( living_space=living_space, yard_access=yard_access, exercise_time=exercise_time, exercise_type=exercise_type, grooming_commitment='medium', experience_level=context.get('experience_level', 'intermediate'), time_availability='moderate', has_children=context.get('has_children', False), children_age='school_age' if context.get('has_children', False) else None, noise_tolerance=noise_tolerance, space_for_play=(living_space != 'apartment'), other_pets=False, climate='moderate', health_sensitivity='medium', barking_acceptance=noise_tolerance, size_preference='no_preference' ) def get_candidate_breeds_enhanced(self, analysis: Dict[str, Any]) -> List[str]: """獲取候選品種列表""" candidate_breeds = set() # 如果提及特定品種,優先包含 if analysis['mentioned_breeds']: candidate_breeds.update(analysis['mentioned_breeds']) # 根據約束要求過濾品種 if 'apartment_suitable' in analysis['constraint_requirements']: apartment_suitable = [ 'French_Bulldog', 'Cavalier_King_Charles_Spaniel', 'Boston_Terrier', 'Pug', 'Bichon_Frise', 'Cocker_Spaniel', 'Yorkshire_Terrier', 'Shih_Tzu' ] candidate_breeds.update(breed for breed in apartment_suitable if breed in self.breed_list) if 'child_friendly' in analysis['constraint_requirements']: child_friendly = [ 'Labrador_Retriever', 'Golden_Retriever', 'Beagle', 'Cavalier_King_Charles_Spaniel', 'Bichon_Frise', 'Poodle', 'Cocker_Spaniel' ] candidate_breeds.update(breed for breed in child_friendly if breed in self.breed_list) # 如果候選品種不足,添加更多通用品種 if len(candidate_breeds) < 20: general_breeds = [ 'Labrador_Retriever', 'German_Shepherd', 'Golden_Retriever', 'French_Bulldog', 'Bulldog', 'Poodle', 'Beagle', 'Rottweiler', 'Yorkshire_Terrier', 'Boston_Terrier', 'Border_Collie', 'Siberian_Husky', 'Cavalier_King_Charles_Spaniel', 'Boxer', 'Bichon_Frise', 'Cocker_Spaniel', 'Shih_Tzu', 'Pug', 'Chihuahua' ] candidate_breeds.update(breed for breed in general_breeds if breed in self.breed_list) return list(candidate_breeds)[:30] # 限制候選數量以提高效率