import random
import hashlib
import numpy as np
import sqlite3
import re
import traceback
from typing import List, Dict, Tuple, Optional, Any
from dataclasses import dataclass
from sentence_transformers import SentenceTransformer
import torch
from sklearn.metrics.pairwise import cosine_similarity
from dog_database import get_dog_description
from breed_health_info import breed_health_info
from breed_noise_info import breed_noise_info
from scoring_calculation_system import UserPreferences, calculate_compatibility_score, UnifiedScoringSystem, calculate_unified_breed_scores
from query_understanding import QueryUnderstandingEngine, analyze_user_query
from constraint_manager import ConstraintManager, apply_breed_constraints
from multi_head_scorer import MultiHeadScorer, score_breed_candidates, BreedScore
from score_calibrator import ScoreCalibrator, calibrate_breed_scores
from config_manager import get_config_manager, get_standardized_breed_data

class UserQueryAnalyzer:
    """
    用戶查詢分析器
    專門處理用戶輸入分析、生活方式關鍵字提取和偏好解析
    """

    def __init__(self, breed_list: List[str]):
        """初始化用戶查詢分析器"""
        self.breed_list = breed_list
        self.comparative_keywords = {
            'most': 1.0, 'love': 1.0, 'prefer': 0.9, 'like': 0.8,
            'then': 0.7, 'second': 0.7, 'followed': 0.6,
            'third': 0.5, 'least': 0.3, 'dislike': 0.2
        }
        self.stop_words = {
            'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by',
            'from', 'up', 'about', 'into', 'through', 'during', 'before', 'after', 'above', 'below',
            'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did',
            'will', 'would', 'could', 'should', 'may', 'might', 'must', 'can', 'i', 'me', 'my', 'myself',
            'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours', 'yourself', 'yourselves', 'he',
            'him', 'his', 'himself', 'she', 'her', 'hers', 'herself', 'it', 'its', 'itself', 'they',
            'them', 'their', 'theirs', 'themselves'
        }

    def parse_comparative_preferences(self, user_input: str) -> Dict[str, float]:
        """解析比較性偏好表達"""
        breed_scores = {}

        # 標準化輸入
        text = user_input.lower()

        # 找到品種名稱和偏好關鍵字
        for breed in self.breed_list:
            breed_display = breed.replace('_', ' ').lower()
            breed_words = breed_display.split()

            # 檢查是否提到此品種
            breed_mentioned = False
            for word in breed_words:
                if word in text:
                    breed_mentioned = True
                    break

            if breed_mentioned:
                # 在附近找到偏好關鍵字
                breed_score = 0.5  # 預設分數

                # 在品種名稱 50 字符內尋找關鍵字
                breed_pos = text.find(breed_words[0])
                if breed_pos != -1:
                    # 檢查背景中的關鍵字
                    context_start = max(0, breed_pos - 50)
                    context_end = min(len(text), breed_pos + 50)
                    context = text[context_start:context_end]

                    for keyword, score in self.comparative_keywords.items():
                        if keyword in context:
                            breed_score = max(breed_score, score)

                breed_scores[breed] = breed_score

        return breed_scores

    def extract_lifestyle_keywords(self, user_input: str) -> Dict[str, List[str]]:
        """增強的生活方式關鍵字提取，具有更好的模式匹配"""
        keywords = {
            'living_space': [],
            'activity_level': [],
            'family_situation': [],
            'noise_preference': [],
            'size_preference': [],
            'care_level': [],
            'special_needs': [],
            'intelligence_preference': [],
            'grooming_preference': [],
            'lifespan_preference': [],
            'temperament_preference': [],
            'experience_level': []
        }

        text = user_input.lower()

        # 增強居住空間檢測
        apartment_terms = ['apartment', 'flat', 'condo', 'small space', 'city living', 'urban', 'no yard', 'indoor']
        house_terms = ['house', 'yard', 'garden', 'backyard', 'large space', 'suburban', 'rural', 'farm']

        if any(term in text for term in apartment_terms):
            keywords['living_space'].append('apartment')
        if any(term in text for term in house_terms):
            keywords['living_space'].append('house')

        # 增強活動水平檢測
        high_activity = ['active', 'energetic', 'exercise', 'hiking', 'running', 'outdoor', 'sports', 'jogging',
                        'athletic', 'adventure', 'vigorous', 'high energy', 'workout']
        low_activity = ['calm', 'lazy', 'indoor', 'low energy', 'couch', 'sedentary', 'relaxed',
                       'peaceful', 'quiet lifestyle', 'minimal exercise']
        moderate_activity = ['moderate', 'walk', 'daily walks', 'light exercise']

        if any(term in text for term in high_activity):
            keywords['activity_level'].append('high')
        if any(term in text for term in low_activity):
            keywords['activity_level'].append('low')
        if any(term in text for term in moderate_activity):
            keywords['activity_level'].append('moderate')

        # 增強家庭情況檢測
        children_terms = ['children', 'kids', 'family', 'child', 'toddler', 'baby', 'teenage', 'school age']
        elderly_terms = ['elderly', 'senior', 'old', 'retirement', 'aged', 'mature']
        single_terms = ['single', 'alone', 'individual', 'solo', 'myself']

        if any(term in text for term in children_terms):
            keywords['family_situation'].append('children')
        if any(term in text for term in elderly_terms):
            keywords['family_situation'].append('elderly')
        if any(term in text for term in single_terms):
            keywords['family_situation'].append('single')

        # 增強噪音偏好檢測
        quiet_terms = ['quiet', 'silent', 'noise-sensitive', 'peaceful', 'no barking', 'minimal noise',
                      'soft-spoken', 'calm', 'tranquil']
        noise_ok_terms = ['loud', 'barking ok', 'noise tolerant', 'vocal', 'doesn\'t matter']

        if any(term in text for term in quiet_terms):
            keywords['noise_preference'].append('low')
        if any(term in text for term in noise_ok_terms):
            keywords['noise_preference'].append('high')

        # 增強體型偏好檢測
        small_terms = ['small', 'tiny', 'little', 'compact', 'miniature', 'toy', 'lap dog']
        large_terms = ['large', 'big', 'giant', 'huge', 'massive', 'great']
        medium_terms = ['medium', 'moderate size', 'average', 'mid-sized']

        if any(term in text for term in small_terms):
            keywords['size_preference'].append('small')
        if any(term in text for term in large_terms):
            keywords['size_preference'].append('large')
        if any(term in text for term in medium_terms):
            keywords['size_preference'].append('medium')

        # 增強照護水平檢測
        low_care = ['low maintenance', 'easy care', 'simple', 'minimal grooming', 'wash and go']
        high_care = ['high maintenance', 'grooming', 'care intensive', 'professional grooming', 'daily brushing']

        if any(term in text for term in low_care):
            keywords['care_level'].append('low')
        if any(term in text for term in high_care):
            keywords['care_level'].append('high')

        # 智力偏好檢測（新增）
        smart_terms = ['smart', 'intelligent', 'clever', 'bright', 'quick learner', 'easy to train', 'trainable', 'genius', 'brilliant']
        independent_terms = ['independent', 'stubborn', 'strong-willed', 'less trainable', 'thinks for themselves']

        if any(term in text for term in smart_terms):
            keywords['intelligence_preference'].append('high')
        if any(term in text for term in independent_terms):
            keywords['intelligence_preference'].append('independent')

        # 美容偏好檢測（新增）
        low_grooming_terms = ['low grooming', 'minimal grooming', 'easy care', 'wash and wear', 'no grooming', 'simple coat']
        high_grooming_terms = ['high grooming', 'professional grooming', 'lots of care', 'high maintenance coat', 'daily brushing', 'regular grooming']

        if any(term in text for term in low_grooming_terms):
            keywords['grooming_preference'].append('low')
        if any(term in text for term in high_grooming_terms):
            keywords['grooming_preference'].append('high')

        # 壽命偏好檢測（新增）
        long_lived_terms = ['long lived', 'long lifespan', 'live long', 'many years', '15+ years', 'longevity']
        healthy_terms = ['healthy breed', 'few health issues', 'robust', 'hardy', 'strong constitution']

        if any(term in text for term in long_lived_terms):
            keywords['lifespan_preference'].append('long')
        if any(term in text for term in healthy_terms):
            keywords['lifespan_preference'].append('healthy')

        # 氣質偏好檢測（新增）
        gentle_terms = ['gentle', 'calm', 'peaceful', 'laid back', 'chill', 'mellow', 'docile']
        playful_terms = ['playful', 'energetic', 'fun', 'active personality', 'lively', 'spirited', 'bouncy']
        protective_terms = ['protective', 'guard', 'watchdog', 'alert', 'vigilant', 'defensive']
        friendly_terms = ['friendly', 'social', 'outgoing', 'loves people', 'sociable', 'gregarious']

        if any(term in text for term in gentle_terms):
            keywords['temperament_preference'].append('gentle')
        if any(term in text for term in playful_terms):
            keywords['temperament_preference'].append('playful')
        if any(term in text for term in protective_terms):
            keywords['temperament_preference'].append('protective')
        if any(term in text for term in friendly_terms):
            keywords['temperament_preference'].append('friendly')

        # 經驗水平檢測（新增）
        beginner_terms = ['first time', 'beginner', 'new to dogs', 'never had', 'novice', 'inexperienced']
        advanced_terms = ['experienced', 'advanced', 'dog expert', 'many dogs before', 'professional', 'seasoned']

        if any(term in text for term in beginner_terms):
            keywords['experience_level'].append('beginner')
        if any(term in text for term in advanced_terms):
            keywords['experience_level'].append('advanced')

        # 增強特殊需求檢測
        guard_terms = ['guard', 'protection', 'security', 'watchdog', 'protective', 'defender']
        companion_terms = ['therapy', 'emotional support', 'companion', 'comfort', 'lap dog', 'cuddly']
        hypoallergenic_terms = ['hypoallergenic', 'allergies', 'non-shedding', 'allergy-friendly', 'no shed']
        multi_pet_terms = ['good with cats', 'cat friendly', 'multi-pet', 'other animals']

        if any(term in text for term in guard_terms):
            keywords['special_needs'].append('guard')
        if any(term in text for term in companion_terms):
            keywords['special_needs'].append('companion')
        if any(term in text for term in hypoallergenic_terms):
            keywords['special_needs'].append('hypoallergenic')
        if any(term in text for term in multi_pet_terms):
            keywords['special_needs'].append('multi_pet')

        return keywords

    def preprocess_text(self, text: str) -> str:
        """預處理文本"""
        # 轉換為小寫
        text = text.lower()

        # 移除特殊字符，保留字母、數字和基本標點
        text = re.sub(r'[^\w\s\-\']', ' ', text)

        # 標準化空格
        text = ' '.join(text.split())

        return text

    def generate_search_keywords(self, text: str) -> List[str]:
        """
        為語義搜索生成關鍵字

        Args:
            text: 輸入文本

        Returns:
            關鍵字列表
        """
        text = self.preprocess_text(text)
        keywords = []

        try:
            # 分詞並過濾停用詞
            words = text.split()
            for word in words:
                if len(word) > 2 and word not in self.stop_words:
                    keywords.append(word)

            # 提取重要短語
            phrases = self._extract_phrases(text)
            keywords.extend(phrases)

            # 移除重複項
            keywords = list(set(keywords))

            return keywords

        except Exception as e:
            print(f"Error generating search keywords: {str(e)}")
            return []

    def _extract_phrases(self, text: str) -> List[str]:
        """
        提取重要短語

        Args:
            text: 輸入文本

        Returns:
            短語列表
        """
        phrases = []

        # 定義重要短語模式
        phrase_patterns = [
            r'good with \w+',
            r'apartment \w+',
            r'family \w+',
            r'exercise \w+',
            r'grooming \w+',
            r'noise \w+',
            r'training \w+',
            r'health \w+',
            r'\w+ friendly',
            r'\w+ tolerant',
            r'\w+ maintenance',
            r'\w+ energy',
            r'\w+ barking',
            r'\w+ shedding'
        ]

        for pattern in phrase_patterns:
            matches = re.findall(pattern, text)
            phrases.extend(matches)

        return phrases

    def analyze_sentiment(self, text: str) -> Dict[str, float]:
        """
        分析文本情感

        Args:
            text: 輸入文本

        Returns:
            情感分析結果
        """
        # 簡化的情感分析實現
        positive_words = [
            'love', 'like', 'prefer', 'enjoy', 'want', 'need', 'looking for',
            'good', 'great', 'excellent', 'perfect', 'wonderful', 'amazing'
        ]

        negative_words = [
            'hate', 'dislike', 'avoid', 'don\'t want', 'no', 'not',
            'bad', 'terrible', 'awful', 'horrible', 'worst', 'never'
        ]

        words = text.lower().split()
        positive_count = sum(1 for word in words if word in positive_words)
        negative_count = sum(1 for word in words if word in negative_words)
        total_words = len(words)

        if total_words == 0:
            return {'positive': 0.5, 'negative': 0.5, 'neutral': 0.0}

        positive_score = positive_count / total_words
        negative_score = negative_count / total_words
        neutral_score = max(0, 1 - positive_score - negative_score)

        return {
            'positive': positive_score,
            'negative': negative_score,
            'neutral': neutral_score
        }

    def parse_user_requirements(self, user_input: str) -> Dict[str, Any]:
        """更準確地解析用戶需求"""
        requirements = {
            'living_space': None,
            'exercise_level': None,
            'preferred_size': None,
            'noise_tolerance': None
        }

        input_lower = user_input.lower()

        # 居住空間檢測
        if 'apartment' in input_lower or 'small' in input_lower:
            requirements['living_space'] = 'apartment'
        elif 'large house' in input_lower or 'big' in input_lower:
            requirements['living_space'] = 'large_house'
        elif 'medium' in input_lower:
            requirements['living_space'] = 'medium_house'

        # 運動水平檢測
        if "don't exercise" in input_lower or 'low exercise' in input_lower:
            requirements['exercise_level'] = 'low'
        elif any(term in input_lower for term in ['hiking', 'running', 'active']):
            requirements['exercise_level'] = 'high'
        elif '30 minutes' in input_lower or 'moderate' in input_lower:
            requirements['exercise_level'] = 'moderate'

        # 體型偏好檢測
        if any(term in input_lower for term in ['small dog', 'tiny', 'toy']):
            requirements['preferred_size'] = 'small'
        elif any(term in input_lower for term in ['large dog', 'big dog']):
            requirements['preferred_size'] = 'large'
        elif 'medium' in input_lower:
            requirements['preferred_size'] = 'medium'

        return requirements

    def analyze_user_description_enhanced(self, user_description: str) -> Dict[str, Any]:
        """增強用戶描述分析"""
        text = user_description.lower()
        analysis = {
            'mentioned_breeds': [],
            'lifestyle_keywords': {},
            'preference_strength': {},
            'constraint_requirements': [],
            'user_context': {}
        }

        # 提取提及的品種
        for breed in self.breed_list:
            breed_display = breed.replace('_', ' ').lower()
            if breed_display in text or any(word in text for word in breed_display.split()):
                analysis['mentioned_breeds'].append(breed)
                # 簡單偏好強度分析
                if any(word in text for word in ['love', 'prefer', 'like', '喜歡', '最愛']):
                    analysis['preference_strength'][breed] = 0.8
                else:
                    analysis['preference_strength'][breed] = 0.5

        # 提取約束要求
        if any(word in text for word in ['quiet', 'silent', 'no barking', '安靜']):
            analysis['constraint_requirements'].append('low_noise')
        if any(word in text for word in ['apartment', 'small space', '公寓']):
            analysis['constraint_requirements'].append('apartment_suitable')
        if any(word in text for word in ['children', 'kids', 'family', '小孩']):
            analysis['constraint_requirements'].append('child_friendly')

        # 提取用戶背景
        analysis['user_context'] = {
            'has_children': any(word in text for word in ['children', 'kids', '小孩']),
            'living_space': 'apartment' if any(word in text for word in ['apartment', '公寓']) else 'house',
            'activity_level': 'high' if any(word in text for word in ['active', 'energetic', '活躍']) else 'moderate',
            'noise_sensitive': any(word in text for word in ['quiet', 'silent', '安靜']),
            'experience_level': 'beginner' if any(word in text for word in ['first time', 'beginner', '新手']) else 'intermediate'
        }

        return analysis

    def create_user_preferences_from_analysis_enhanced(self, analysis: Dict[str, Any]) -> 'UserPreferences':
        """從分析結果創建用戶偏好物件"""
        context = analysis['user_context']

        # 推斷居住空間類型
        living_space = 'apartment' if context.get('living_space') == 'apartment' else 'house_small'

        # 推斷院子權限
        yard_access = 'no_yard' if living_space == 'apartment' else 'shared_yard'

        # 推斷運動時間
        activity_level = context.get('activity_level', 'moderate')
        exercise_time_map = {'high': 120, 'moderate': 60, 'low': 30}
        exercise_time = exercise_time_map.get(activity_level, 60)

        # 推斷運動類型
        exercise_type_map = {'high': 'active_training', 'moderate': 'moderate_activity', 'low': 'light_walks'}
        exercise_type = exercise_type_map.get(activity_level, 'moderate_activity')

        # 推斷噪音容忍度
        noise_tolerance = 'low' if context.get('noise_sensitive', False) else 'medium'

        return UserPreferences(
            living_space=living_space,
            yard_access=yard_access,
            exercise_time=exercise_time,
            exercise_type=exercise_type,
            grooming_commitment='medium',
            experience_level=context.get('experience_level', 'intermediate'),
            time_availability='moderate',
            has_children=context.get('has_children', False),
            children_age='school_age' if context.get('has_children', False) else None,
            noise_tolerance=noise_tolerance,
            space_for_play=(living_space != 'apartment'),
            other_pets=False,
            climate='moderate',
            health_sensitivity='medium',
            barking_acceptance=noise_tolerance,
            size_preference='no_preference'
        )

    def get_candidate_breeds_enhanced(self, analysis: Dict[str, Any]) -> List[str]:
        """獲取候選品種列表"""
        candidate_breeds = set()

        # 如果提及特定品種，優先包含
        if analysis['mentioned_breeds']:
            candidate_breeds.update(analysis['mentioned_breeds'])

        # 根據約束要求過濾品種
        if 'apartment_suitable' in analysis['constraint_requirements']:
            apartment_suitable = [
                'French_Bulldog', 'Cavalier_King_Charles_Spaniel', 'Boston_Terrier',
                'Pug', 'Bichon_Frise', 'Cocker_Spaniel', 'Yorkshire_Terrier', 'Shih_Tzu'
            ]
            candidate_breeds.update(breed for breed in apartment_suitable if breed in self.breed_list)

        if 'child_friendly' in analysis['constraint_requirements']:
            child_friendly = [
                'Labrador_Retriever', 'Golden_Retriever', 'Beagle', 'Cavalier_King_Charles_Spaniel',
                'Bichon_Frise', 'Poodle', 'Cocker_Spaniel'
            ]
            candidate_breeds.update(breed for breed in child_friendly if breed in self.breed_list)

        # 如果候選品種不足，添加更多通用品種
        if len(candidate_breeds) < 20:
            general_breeds = [
                'Labrador_Retriever', 'German_Shepherd', 'Golden_Retriever', 'French_Bulldog',
                'Bulldog', 'Poodle', 'Beagle', 'Rottweiler', 'Yorkshire_Terrier', 'Boston_Terrier',
                'Border_Collie', 'Siberian_Husky', 'Cavalier_King_Charles_Spaniel', 'Boxer',
                'Bichon_Frise', 'Cocker_Spaniel', 'Shih_Tzu', 'Pug', 'Chihuahua'
            ]
            candidate_breeds.update(breed for breed in general_breeds if breed in self.breed_list)

        return list(candidate_breeds)[:30]  # 限制候選數量以提高效率