diff --git "a/semantic_breed_recommender.py" "b/semantic_breed_recommender.py" --- "a/semantic_breed_recommender.py" +++ "b/semantic_breed_recommender.py" @@ -18,722 +18,134 @@ from constraint_manager import ConstraintManager, apply_breed_constraints from multi_head_scorer import MultiHeadScorer, score_breed_candidates, BreedScore from score_calibrator import ScoreCalibrator, calibrate_breed_scores from config_manager import get_config_manager, get_standardized_breed_data - -@dataclass -class BreedDescriptionVector: - """Data structure for breed description vectorization""" - breed_name: str - description_text: str - embedding: np.ndarray - characteristics: Dict[str, Any] +from semantic_vector_manager import SemanticVectorManager, BreedDescriptionVector +from user_query_analyzer import UserQueryAnalyzer +from matching_score_calculator import MatchingScoreCalculator class SemanticBreedRecommender: """ - Enhanced SBERT-based semantic breed recommendation system - Provides multi-dimensional natural language understanding for dog breed recommendations + 增強的基於 SBERT 的語義品種推薦系統 (Facade Pattern) + 為狗品種推薦提供多維度自然語言理解 """ def __init__(self): - """Initialize the semantic recommender""" - self.model_name = 'all-MiniLM-L6-v2' # Efficient SBERT model - self.sbert_model = None - self._sbert_loading_attempted = False - self.breed_vectors = {} - self.breed_list = self._get_breed_list() - self.comparative_keywords = { - 'most': 1.0, 'love': 1.0, 'prefer': 0.9, 'like': 0.8, - 'then': 0.7, 'second': 0.7, 'followed': 0.6, - 'third': 0.5, 'least': 0.3, 'dislike': 0.2 - } - # Defer SBERT model loading until needed in GPU context - # This prevents CUDA initialization issues in ZeroGPU environment - print("SemanticBreedRecommender initialized (SBERT loading deferred)") - - # Initialize multi-head scorer with SBERT model if enhanced mode is enabled - # if self.sbert_model: - # self.multi_head_scorer = MultiHeadScorer(self.sbert_model) - # print("Multi-head scorer initialized with SBERT model") - - def _get_breed_list(self) -> List[str]: - """Get breed list from database""" - try: - conn = sqlite3.connect('animal_detector.db') - cursor = conn.cursor() - cursor.execute("SELECT DISTINCT Breed FROM AnimalCatalog") - breeds = [row[0] for row in cursor.fetchall()] - cursor.close() - conn.close() - return breeds - except Exception as e: - print(f"Error getting breed list: {str(e)}") - # Backup breed list for Google Colab environment - return ['Labrador_Retriever', 'German_Shepherd', 'Golden_Retriever', - 'Bulldog', 'Poodle', 'Beagle', 'Rottweiler', 'Yorkshire_Terrier'] - - def _initialize_model(self): - """Initialize SBERT model with fallback - designed for ZeroGPU compatibility""" - if self.sbert_model is not None or self._sbert_loading_attempted: - return self.sbert_model - - try: - print("Loading SBERT model in GPU context...") - # Try different model names if the primary one fails - model_options = ['all-MiniLM-L6-v2', 'all-mpnet-base-v2', 'all-MiniLM-L12-v2'] - - for model_name in model_options: - try: - # Specify device explicitly to handle ZeroGPU environment - import torch - device = 'cuda' if torch.cuda.is_available() else 'cpu' - self.sbert_model = SentenceTransformer(model_name, device=device) - self.model_name = model_name - print(f"SBERT model {model_name} loaded successfully on {device}") - return self.sbert_model - except Exception as model_e: - print(f"Failed to load {model_name}: {str(model_e)}") - continue - - # If all models fail - print("All SBERT models failed to load. Using basic text matching fallback.") - self.sbert_model = None - return None - - except Exception as e: - print(f"Failed to initialize any SBERT model: {str(e)}") - print(traceback.format_exc()) - print("Will provide basic text-based recommendations without embeddings") - self.sbert_model = None - return None - finally: - self._sbert_loading_attempted = True - - def _create_breed_description(self, breed: str) -> str: - """Create comprehensive natural language description for breed with all key characteristics""" - try: - # Get all information sources - breed_info = get_dog_description(breed) or {} - health_info = breed_health_info.get(breed, {}) if breed_health_info else {} - noise_info = breed_noise_info.get(breed, {}) if breed_noise_info else {} - - breed_display_name = breed.replace('_', ' ') - description_parts = [] - - # 1. Basic size and physical characteristics - size = breed_info.get('Size', 'medium').lower() - description_parts.append(f"{breed_display_name} is a {size} sized dog breed") - - # 2. Temperament and personality (critical for matching) - temperament = breed_info.get('Temperament', '') - if temperament: - description_parts.append(f"with a {temperament.lower()} temperament") - - # 3. Exercise and activity level (critical for apartment living) - exercise_needs = breed_info.get('Exercise Needs', 'moderate').lower() - if 'high' in exercise_needs or 'very high' in exercise_needs: - description_parts.append("requiring high daily exercise and mental stimulation") - elif 'low' in exercise_needs or 'minimal' in exercise_needs: - description_parts.append("with minimal exercise requirements, suitable for apartment living") - else: - description_parts.append("with moderate exercise needs") - - # 4. Noise characteristics (critical for quiet requirements) - noise_level = noise_info.get('noise_level', 'moderate').lower() - if 'low' in noise_level or 'quiet' in noise_level: - description_parts.append("known for being quiet and rarely barking") - elif 'high' in noise_level or 'loud' in noise_level: - description_parts.append("tends to be vocal and bark frequently") - else: - description_parts.append("with moderate barking tendencies") - - # 5. Living space compatibility - if size in ['small', 'tiny']: - description_parts.append("excellent for small apartments and limited spaces") - elif size in ['large', 'giant']: - description_parts.append("requiring large living spaces and preferably a yard") - else: - description_parts.append("adaptable to various living situations") - - # 6. Grooming and maintenance - grooming_needs = breed_info.get('Grooming Needs', 'moderate').lower() - if 'high' in grooming_needs: - description_parts.append("requiring regular professional grooming") - elif 'low' in grooming_needs: - description_parts.append("with minimal grooming requirements") - else: - description_parts.append("with moderate grooming needs") + """初始化語義品種推薦器""" + # 初始化語義向量管理器 + self.vector_manager = SemanticVectorManager() - # 7. Family compatibility - good_with_children = breed_info.get('Good with Children', 'Yes') - if good_with_children == 'Yes': - description_parts.append("excellent with children and families") - else: - description_parts.append("better suited for adult households") - - # 8. Intelligence and trainability (from database description) - intelligence_keywords = [] - description_text = breed_info.get('Description', '').lower() - - if description_text: - # Extract intelligence indicators from description - if any(word in description_text for word in ['intelligent', 'smart', 'clever', 'quick to learn']): - intelligence_keywords.extend(['highly intelligent', 'trainable', 'quick learner']) - elif any(word in description_text for word in ['stubborn', 'independent', 'difficult to train']): - intelligence_keywords.extend(['independent minded', 'requires patience', 'challenging to train']) - else: - intelligence_keywords.extend(['moderate intelligence', 'trainable with consistency']) - - # Extract working/purpose traits from description - if any(word in description_text for word in ['working', 'herding', 'guard', 'hunting']): - intelligence_keywords.extend(['working breed', 'purpose-driven', 'task-oriented']) - elif any(word in description_text for word in ['companion', 'lap', 'toy', 'decorative']): - intelligence_keywords.extend(['companion breed', 'affectionate', 'people-focused']) - - # Add intelligence context to description - if intelligence_keywords: - description_parts.append(f"characterized as {', '.join(intelligence_keywords[:2])}") - - # 9. Special characteristics and purpose (enhanced with database mining) - if breed_info.get('Description'): - desc = breed_info.get('Description', '')[:150] # Increased to 150 chars for more context - if desc: - # Extract key traits from description for better semantic matching - desc_lower = desc.lower() - key_traits = [] - - # Extract key behavioral traits from description - if 'friendly' in desc_lower: - key_traits.append('friendly') - if 'gentle' in desc_lower: - key_traits.append('gentle') - if 'energetic' in desc_lower or 'active' in desc_lower: - key_traits.append('energetic') - if 'calm' in desc_lower or 'peaceful' in desc_lower: - key_traits.append('calm') - if 'protective' in desc_lower or 'guard' in desc_lower: - key_traits.append('protective') - - trait_text = f" and {', '.join(key_traits)}" if key_traits else "" - description_parts.append(f"Known for: {desc.lower()}{trait_text}") - - # 9. Care level requirements - try: - care_level = breed_info.get('Care Level', 'moderate') - if isinstance(care_level, str): - description_parts.append(f"requiring {care_level.lower()} overall care level") - else: - description_parts.append("requiring moderate overall care level") - except Exception as e: - print(f"Error processing care level for {breed}: {str(e)}") - description_parts.append("requiring moderate overall care level") - - # 10. Lifespan information - try: - lifespan = breed_info.get('Lifespan', '10-12 years') - if lifespan and isinstance(lifespan, str) and lifespan.strip(): - description_parts.append(f"with a typical lifespan of {lifespan}") - else: - description_parts.append("with a typical lifespan of 10-12 years") - except Exception as e: - print(f"Error processing lifespan for {breed}: {str(e)}") - description_parts.append("with a typical lifespan of 10-12 years") - - # Create comprehensive description - full_description = '. '.join(description_parts) + '.' - - # Add comprehensive keywords for better semantic matching - keywords = [] - - # Basic breed name keywords - keywords.extend([word.lower() for word in breed_display_name.split()]) - - # Temperament keywords - if temperament: - keywords.extend([word.lower().strip(',') for word in temperament.split()]) - - # Size-based keywords - if 'small' in size or 'tiny' in size: - keywords.extend(['small', 'tiny', 'compact', 'little', 'apartment', 'indoor', 'lap']) - elif 'large' in size or 'giant' in size: - keywords.extend(['large', 'big', 'giant', 'huge', 'yard', 'space', 'outdoor']) - else: - keywords.extend(['medium', 'moderate', 'average', 'balanced']) + # 初始化用戶查詢分析器 + self.query_analyzer = UserQueryAnalyzer(self.vector_manager.get_breed_list()) - # Activity level keywords - exercise_needs = breed_info.get('Exercise Needs', 'moderate').lower() - if 'high' in exercise_needs: - keywords.extend(['active', 'energetic', 'exercise', 'outdoor', 'hiking', 'running', 'athletic']) - elif 'low' in exercise_needs: - keywords.extend(['calm', 'low-energy', 'indoor', 'relaxed', 'couch', 'sedentary']) - else: - keywords.extend(['moderate', 'balanced', 'walks', 'regular']) + # 初始化匹配評分計算器 + self.score_calculator = MatchingScoreCalculator(self.vector_manager.get_breed_list()) - # Noise level keywords - noise_level = noise_info.get('noise_level', 'moderate').lower() - if 'quiet' in noise_level or 'low' in noise_level: - keywords.extend(['quiet', 'silent', 'calm', 'peaceful', 'low-noise']) - elif 'high' in noise_level or 'loud' in noise_level: - keywords.extend(['vocal', 'barking', 'loud', 'alert', 'watchdog']) - - # Living situation keywords - if size in ['small', 'tiny'] and 'low' in exercise_needs: - keywords.extend(['apartment', 'city', 'urban', 'small-space']) - if size in ['large', 'giant'] or 'high' in exercise_needs: - keywords.extend(['house', 'yard', 'suburban', 'rural', 'space']) - - # Family keywords - good_with_children = breed_info.get('Good with Children', 'Yes') - if good_with_children == 'Yes': - keywords.extend(['family', 'children', 'kids', 'friendly', 'gentle']) - - # Intelligence and trainability keywords (from database description mining) - if intelligence_keywords: - keywords.extend([word.lower() for phrase in intelligence_keywords for word in phrase.split()]) - - # Grooming-based keywords (enhanced) - grooming_needs = breed_info.get('Grooming Needs', 'moderate').lower() - if 'high' in grooming_needs: - keywords.extend(['high-maintenance', 'professional-grooming', 'daily-brushing', 'coat-care']) - elif 'low' in grooming_needs: - keywords.extend(['low-maintenance', 'minimal-grooming', 'easy-care', 'wash-and-go']) - else: - keywords.extend(['moderate-grooming', 'weekly-brushing', 'regular-care']) - - # Lifespan-based keywords - lifespan = breed_info.get('Lifespan', '10-12 years') - if lifespan and isinstance(lifespan, str): - try: - # Extract years from lifespan string (e.g., "10-12 years" or "12-15 years") - import re - years = re.findall(r'\d+', lifespan) - if years: - avg_years = sum(int(y) for y in years) / len(years) - if avg_years >= 14: - keywords.extend(['long-lived', 'longevity', 'durable', 'healthy-lifespan']) - elif avg_years <= 8: - keywords.extend(['shorter-lifespan', 'health-considerations', 'special-care']) - else: - keywords.extend(['average-lifespan', 'moderate-longevity']) - except: - keywords.extend(['average-lifespan']) - - # Add keywords to description for better semantic matching - unique_keywords = list(set(keywords)) - keyword_text = ' '.join(unique_keywords) - full_description += f" Additional context: {keyword_text}" - - return full_description + # 保留原有屬性以維持向後兼容性 + self.model_name = self.vector_manager.model_name + self.sbert_model = self.vector_manager.get_sbert_model() + self.breed_vectors = self.vector_manager.get_breed_vectors() + self.breed_list = self.vector_manager.get_breed_list() + self.comparative_keywords = self.query_analyzer.comparative_keywords - except Exception as e: - print(f"Error creating description for {breed}: {str(e)}") - return f"{breed.replace('_', ' ')} is a dog breed with unique characteristics." - - def _build_breed_vectors(self): - """Build vector representations for all breeds - called lazily when needed""" + # 初始化增強系統組件(如果可用) try: - print("Building breed vector database...") - - # Initialize model if not already done - if self.sbert_model is None: - self._initialize_model() - - # Skip if model is not available - if self.sbert_model is None: - print("SBERT model not available, skipping vector building") - return - - for breed in self.breed_list: - description = self._create_breed_description(breed) - - # Generate embedding vector - embedding = self.sbert_model.encode(description, convert_to_tensor=False) - - # Get breed characteristics - breed_info = get_dog_description(breed) - characteristics = { - 'size': breed_info.get('Size', 'Medium') if breed_info else 'Medium', - 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate') if breed_info else 'Moderate', - 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate') if breed_info else 'Moderate', - 'good_with_children': breed_info.get('Good with Children', 'Yes') if breed_info else 'Yes', - 'temperament': breed_info.get('Temperament', '') if breed_info else '' - } - - self.breed_vectors[breed] = BreedDescriptionVector( - breed_name=breed, - description_text=description, - embedding=embedding, - characteristics=characteristics - ) - - print(f"Successfully built {len(self.breed_vectors)} breed vectors") - - except Exception as e: - print(f"Error building breed vectors: {str(e)}") - print(traceback.format_exc()) - raise + self.query_engine = QueryUnderstandingEngine() + self.constraint_manager = ConstraintManager() + self.multi_head_scorer = None + self.score_calibrator = ScoreCalibrator() + self.config_manager = get_config_manager() + + # 如果 SBERT 模型可用,初始化多頭評分器 + if self.sbert_model: + self.multi_head_scorer = MultiHeadScorer(self.sbert_model) + print("Multi-head scorer initialized with SBERT model") + except ImportError: + print("Enhanced system components not available, using basic functionality") + self.query_engine = None + self.constraint_manager = None + self.multi_head_scorer = None + self.score_calibrator = None + self.config_manager = None def _parse_comparative_preferences(self, user_input: str) -> Dict[str, float]: - """Parse comparative preference expressions""" - breed_scores = {} - - # Normalize input - text = user_input.lower() - - # Find breed names and preference keywords - for breed in self.breed_list: - breed_display = breed.replace('_', ' ').lower() - breed_words = breed_display.split() - - # Check if this breed is mentioned - breed_mentioned = False - for word in breed_words: - if word in text: - breed_mentioned = True - break - - if breed_mentioned: - # Find nearby preference keywords - breed_score = 0.5 # Default score - - # Look for keywords within 50 characters of breed name - breed_pos = text.find(breed_words[0]) - if breed_pos != -1: - # Check for keywords in context - context_start = max(0, breed_pos - 50) - context_end = min(len(text), breed_pos + 50) - context = text[context_start:context_end] - - for keyword, score in self.comparative_keywords.items(): - if keyword in context: - breed_score = max(breed_score, score) - - breed_scores[breed] = breed_score - - return breed_scores + """解析比較性偏好表達""" + return self.query_analyzer.parse_comparative_preferences(user_input) def _extract_lifestyle_keywords(self, user_input: str) -> Dict[str, List[str]]: - """Enhanced lifestyle keyword extraction with better pattern matching""" - keywords = { - 'living_space': [], - 'activity_level': [], - 'family_situation': [], - 'noise_preference': [], - 'size_preference': [], - 'care_level': [], - 'special_needs': [], - 'intelligence_preference': [], - 'grooming_preference': [], - 'lifespan_preference': [], - 'temperament_preference': [], - 'experience_level': [] - } - - text = user_input.lower() - - # Enhanced living space detection - apartment_terms = ['apartment', 'flat', 'condo', 'small space', 'city living', 'urban', 'no yard', 'indoor'] - house_terms = ['house', 'yard', 'garden', 'backyard', 'large space', 'suburban', 'rural', 'farm'] - - if any(term in text for term in apartment_terms): - keywords['living_space'].append('apartment') - if any(term in text for term in house_terms): - keywords['living_space'].append('house') - - # Enhanced activity level detection - high_activity = ['active', 'energetic', 'exercise', 'hiking', 'running', 'outdoor', 'sports', 'jogging', - 'athletic', 'adventure', 'vigorous', 'high energy', 'workout'] - low_activity = ['calm', 'lazy', 'indoor', 'low energy', 'couch', 'sedentary', 'relaxed', - 'peaceful', 'quiet lifestyle', 'minimal exercise'] - moderate_activity = ['moderate', 'walk', 'daily walks', 'light exercise'] - - if any(term in text for term in high_activity): - keywords['activity_level'].append('high') - if any(term in text for term in low_activity): - keywords['activity_level'].append('low') - if any(term in text for term in moderate_activity): - keywords['activity_level'].append('moderate') - - # Enhanced family situation detection - children_terms = ['children', 'kids', 'family', 'child', 'toddler', 'baby', 'teenage', 'school age'] - elderly_terms = ['elderly', 'senior', 'old', 'retirement', 'aged', 'mature'] - single_terms = ['single', 'alone', 'individual', 'solo', 'myself'] - - if any(term in text for term in children_terms): - keywords['family_situation'].append('children') - if any(term in text for term in elderly_terms): - keywords['family_situation'].append('elderly') - if any(term in text for term in single_terms): - keywords['family_situation'].append('single') - - # Enhanced noise preference detection - quiet_terms = ['quiet', 'silent', 'noise-sensitive', 'peaceful', 'no barking', 'minimal noise', - 'soft-spoken', 'calm', 'tranquil'] - noise_ok_terms = ['loud', 'barking ok', 'noise tolerant', 'vocal', 'doesn\'t matter'] - - if any(term in text for term in quiet_terms): - keywords['noise_preference'].append('low') - if any(term in text for term in noise_ok_terms): - keywords['noise_preference'].append('high') - - # Enhanced size preference detection - small_terms = ['small', 'tiny', 'little', 'compact', 'miniature', 'toy', 'lap dog'] - large_terms = ['large', 'big', 'giant', 'huge', 'massive', 'great'] - medium_terms = ['medium', 'moderate size', 'average', 'mid-sized'] - - if any(term in text for term in small_terms): - keywords['size_preference'].append('small') - if any(term in text for term in large_terms): - keywords['size_preference'].append('large') - if any(term in text for term in medium_terms): - keywords['size_preference'].append('medium') - - # Enhanced care level detection - low_care = ['low maintenance', 'easy care', 'simple', 'minimal grooming', 'wash and go'] - high_care = ['high maintenance', 'grooming', 'care intensive', 'professional grooming', 'daily brushing'] - - if any(term in text for term in low_care): - keywords['care_level'].append('low') - if any(term in text for term in high_care): - keywords['care_level'].append('high') - - # Intelligence preference detection (NEW) - smart_terms = ['smart', 'intelligent', 'clever', 'bright', 'quick learner', 'easy to train', 'trainable', 'genius', 'brilliant'] - independent_terms = ['independent', 'stubborn', 'strong-willed', 'less trainable', 'thinks for themselves'] - - if any(term in text for term in smart_terms): - keywords['intelligence_preference'].append('high') - if any(term in text for term in independent_terms): - keywords['intelligence_preference'].append('independent') - - # Grooming preference detection (NEW) - low_grooming_terms = ['low grooming', 'minimal grooming', 'easy care', 'wash and wear', 'no grooming', 'simple coat'] - high_grooming_terms = ['high grooming', 'professional grooming', 'lots of care', 'high maintenance coat', 'daily brushing', 'regular grooming'] - - if any(term in text for term in low_grooming_terms): - keywords['grooming_preference'].append('low') - if any(term in text for term in high_grooming_terms): - keywords['grooming_preference'].append('high') - - # Lifespan preference detection (NEW) - long_lived_terms = ['long lived', 'long lifespan', 'live long', 'many years', '15+ years', 'longevity'] - healthy_terms = ['healthy breed', 'few health issues', 'robust', 'hardy', 'strong constitution'] - - if any(term in text for term in long_lived_terms): - keywords['lifespan_preference'].append('long') - if any(term in text for term in healthy_terms): - keywords['lifespan_preference'].append('healthy') - - # Temperament preference detection (NEW) - gentle_terms = ['gentle', 'calm', 'peaceful', 'laid back', 'chill', 'mellow', 'docile'] - playful_terms = ['playful', 'energetic', 'fun', 'active personality', 'lively', 'spirited', 'bouncy'] - protective_terms = ['protective', 'guard', 'watchdog', 'alert', 'vigilant', 'defensive'] - friendly_terms = ['friendly', 'social', 'outgoing', 'loves people', 'sociable', 'gregarious'] - - if any(term in text for term in gentle_terms): - keywords['temperament_preference'].append('gentle') - if any(term in text for term in playful_terms): - keywords['temperament_preference'].append('playful') - if any(term in text for term in protective_terms): - keywords['temperament_preference'].append('protective') - if any(term in text for term in friendly_terms): - keywords['temperament_preference'].append('friendly') - - # Experience level detection (NEW) - beginner_terms = ['first time', 'beginner', 'new to dogs', 'never had', 'novice', 'inexperienced'] - advanced_terms = ['experienced', 'advanced', 'dog expert', 'many dogs before', 'professional', 'seasoned'] - - if any(term in text for term in beginner_terms): - keywords['experience_level'].append('beginner') - if any(term in text for term in advanced_terms): - keywords['experience_level'].append('advanced') - - # Enhanced special needs detection - guard_terms = ['guard', 'protection', 'security', 'watchdog', 'protective', 'defender'] - companion_terms = ['therapy', 'emotional support', 'companion', 'comfort', 'lap dog', 'cuddly'] - hypoallergenic_terms = ['hypoallergenic', 'allergies', 'non-shedding', 'allergy-friendly', 'no shed'] - multi_pet_terms = ['good with cats', 'cat friendly', 'multi-pet', 'other animals'] - - if any(term in text for term in guard_terms): - keywords['special_needs'].append('guard') - if any(term in text for term in companion_terms): - keywords['special_needs'].append('companion') - if any(term in text for term in hypoallergenic_terms): - keywords['special_needs'].append('hypoallergenic') - if any(term in text for term in multi_pet_terms): - keywords['special_needs'].append('multi_pet') - - return keywords + """增強的生活方式關鍵字提取,具有更好的模式匹配""" + return self.query_analyzer.extract_lifestyle_keywords(user_input) def _apply_size_distribution_correction(self, recommendations: List[Dict]) -> List[Dict]: - """Apply size distribution correction to prevent large breed bias""" - if len(recommendations) < 10: - return recommendations - - # Analyze size distribution - size_counts = {'toy': 0, 'small': 0, 'medium': 0, 'large': 0, 'giant': 0} - - for rec in recommendations: - breed_info = get_dog_description(rec['breed']) - if breed_info: - size = self._normalize_breed_size(breed_info.get('Size', 'Medium')) - size_counts[size] += 1 - - total_recs = len(recommendations) - large_giant_ratio = (size_counts['large'] + size_counts['giant']) / total_recs - - # If more than 70% are large/giant breeds, apply correction - if large_giant_ratio > 0.7: - corrected_recommendations = [] - size_quotas = {'toy': 2, 'small': 4, 'medium': 6, 'large': 2, 'giant': 1} - current_counts = {'toy': 0, 'small': 0, 'medium': 0, 'large': 0, 'giant': 0} - - # First pass: add breeds within quotas - for rec in recommendations: - breed_info = get_dog_description(rec['breed']) - if breed_info: - size = self._normalize_breed_size(breed_info.get('Size', 'Medium')) - if current_counts[size] < size_quotas[size]: - corrected_recommendations.append(rec) - current_counts[size] += 1 - - # Second pass: fill remaining slots with best remaining candidates - remaining_slots = 15 - len(corrected_recommendations) - remaining_breeds = [rec for rec in recommendations if rec not in corrected_recommendations] - - corrected_recommendations.extend(remaining_breeds[:remaining_slots]) - return corrected_recommendations - - return recommendations + """應用尺寸分佈修正以防止大型品種偏差""" + return self.score_calculator.apply_size_distribution_correction(recommendations) def _normalize_breed_size(self, size: str) -> str: - """Normalize breed size to standard categories""" - if not isinstance(size, str): - return 'medium' - - size_lower = size.lower() - if any(term in size_lower for term in ['toy', 'tiny']): - return 'toy' - elif 'small' in size_lower: - return 'small' - elif 'medium' in size_lower: - return 'medium' - elif 'large' in size_lower: - return 'large' - elif any(term in size_lower for term in ['giant', 'extra large']): - return 'giant' - else: - return 'medium' + """標準化品種尺寸到標準分類""" + return self.score_calculator._normalize_breed_size(size) def _parse_user_requirements(self, user_input: str) -> Dict[str, Any]: - """Parse user requirements more accurately""" - requirements = { - 'living_space': None, - 'exercise_level': None, - 'preferred_size': None, - 'noise_tolerance': None - } - - input_lower = user_input.lower() - - # Living space detection - if 'apartment' in input_lower or 'small' in input_lower: - requirements['living_space'] = 'apartment' - elif 'large house' in input_lower or 'big' in input_lower: - requirements['living_space'] = 'large_house' - elif 'medium' in input_lower: - requirements['living_space'] = 'medium_house' - - # Exercise level detection - if "don't exercise" in input_lower or 'low exercise' in input_lower: - requirements['exercise_level'] = 'low' - elif any(term in input_lower for term in ['hiking', 'running', 'active']): - requirements['exercise_level'] = 'high' - elif '30 minutes' in input_lower or 'moderate' in input_lower: - requirements['exercise_level'] = 'moderate' - - # Size preference detection - if any(term in input_lower for term in ['small dog', 'tiny', 'toy']): - requirements['preferred_size'] = 'small' - elif any(term in input_lower for term in ['large dog', 'big dog']): - requirements['preferred_size'] = 'large' - elif 'medium' in input_lower: - requirements['preferred_size'] = 'medium' - - return requirements + """更準確地解析用戶需求""" + return self.query_analyzer.parse_user_requirements(user_input) def _apply_hard_constraints(self, breed: str, user_input: str, breed_characteristics: Dict[str, Any]) -> float: - """Enhanced hard constraints with stricter penalties""" - penalty = 0.0 - user_text_lower = user_input.lower() + """增強硬約束,具有更嚴格的懲罰""" + return self.score_calculator.apply_hard_constraints(breed, user_input, breed_characteristics) - # Get breed information - breed_info = get_dog_description(breed) - if not breed_info: - return 0.0 - - breed_size = breed_info.get('Size', '').lower() - exercise_needs = breed_info.get('Exercise Needs', '').lower() - - # Apartment living constraints - MUCH STRICTER - if any(term in user_text_lower for term in ['apartment', 'flat', 'studio', 'small space']): - if 'giant' in breed_size: - return -2.0 # Complete elimination - elif 'large' in breed_size: - if any(term in exercise_needs for term in ['high', 'very high']): - return -2.0 # Complete elimination - else: - penalty -= 0.5 # Still significant penalty - elif 'medium' in breed_size and 'very high' in exercise_needs: - penalty -= 0.6 - - # Exercise mismatch constraints - if "don't exercise much" in user_text_lower or "low exercise" in user_text_lower: - if any(term in exercise_needs for term in ['very high', 'extreme', 'intense']): - return -2.0 # Complete elimination - elif 'high' in exercise_needs: - penalty -= 0.8 - - # Moderate lifestyle detection - if any(term in user_text_lower for term in ['moderate', 'balanced', '30 minutes', 'half hour']): - # Penalize extremes - if 'giant' in breed_size: - penalty -= 0.7 # Strong penalty for giants - elif 'very high' in exercise_needs: - penalty -= 0.5 - - # Children safety (existing logic remains but enhanced) - if any(term in user_text_lower for term in ['child', 'kids', 'family', 'baby']): - good_with_children = breed_info.get('Good with Children', '').lower() - if good_with_children == 'no': - return -2.0 # Complete elimination for safety - - return penalty + def _calculate_lifestyle_bonus(self, breed_characteristics: Dict[str, Any], + lifestyle_keywords: Dict[str, List[str]]) -> float: + """增強生活方式匹配獎勵計算""" + return self.score_calculator.calculate_lifestyle_bonus(breed_characteristics, lifestyle_keywords) + + def _apply_intelligent_trait_matching(self, recommendations: List[Dict], user_input: str) -> List[Dict]: + """基於增強關鍵字提取和數據庫挖掘應用智能特徵匹配""" + return self.score_calculator.apply_intelligent_trait_matching(recommendations, user_input) + + def _get_breed_info_from_standardized(self, standardized_info) -> Dict[str, Any]: + """將標準化品種信息轉換為字典格式""" + return self.score_calculator.get_breed_info_from_standardized(standardized_info) + + def _get_fallback_recommendations(self, top_k: int = 15) -> List[Dict[str, Any]]: + """當增強系統失敗時獲取備用推薦""" + return self.score_calculator.get_fallback_recommendations(top_k) def get_enhanced_semantic_recommendations(self, user_input: str, top_k: int = 15) -> List[Dict[str, Any]]: """ - Enhanced multi-dimensional semantic breed recommendation + 增強的多維度語義品種推薦 Args: - user_input: User's natural language description - top_k: Number of recommendations to return + user_input: 用戶的自然語言描述 + top_k: 返回的推薦數量 Returns: - List of recommended breeds with enhanced scoring + 增強評分的推薦品種列表 """ try: - # Stage 1: Query Understanding - dimensions = self.query_engine.analyze_query(user_input) - print(f"Query dimensions detected: {len(dimensions.spatial_constraints + dimensions.activity_level + dimensions.noise_preferences + dimensions.size_preferences + dimensions.family_context + dimensions.maintenance_level + dimensions.special_requirements)} total dimensions") - - # Stage 2: Apply Constraints - filter_result = self.constraint_manager.apply_constraints(dimensions, min_candidates=max(8, top_k)) - print(f"Constraint filtering: {len(self.breed_list)} -> {len(filter_result.passed_breeds)} candidates") + # 階段 1: 查詢理解 + if self.query_engine: + dimensions = self.query_engine.analyze_query(user_input) + print(f"Query dimensions detected: {len(dimensions.spatial_constraints + dimensions.activity_level + dimensions.noise_preferences + dimensions.size_preferences + dimensions.family_context + dimensions.maintenance_level + dimensions.special_requirements)} total dimensions") + else: + print("Query engine not available, using basic analysis") + return self.get_semantic_recommendations(user_input, top_k) - if not filter_result.passed_breeds: - error_msg = f"No dog breeds match your requirements after applying constraints. Applied constraints: {filter_result.applied_constraints}. Consider relaxing some requirements." - print(f"ERROR: {error_msg}") - raise ValueError(error_msg) + # 階段 2: 應用約束 + if self.constraint_manager: + filter_result = self.constraint_manager.apply_constraints(dimensions, min_candidates=max(8, top_k)) + print(f"Constraint filtering: {len(self.breed_list)} -> {len(filter_result.passed_breeds)} candidates") - # Stage 3: Multi-head Scoring + if not filter_result.passed_breeds: + error_msg = f"No dog breeds match your requirements after applying constraints. Applied constraints: {filter_result.applied_constraints}. Consider relaxing some requirements." + print(f"ERROR: {error_msg}") + raise ValueError(error_msg) + else: + print("Constraint manager not available, using all breeds") + filter_result = type('FilterResult', (), { + 'passed_breeds': self.breed_list, + 'applied_constraints': [], + 'relaxed_constraints': [], + 'warnings': [] + })() + + # 階段 3: 多頭評分 if self.multi_head_scorer: breed_scores = self.multi_head_scorer.score_breeds(filter_result.passed_breeds, dimensions) print(f"Multi-head scoring completed for {len(breed_scores)} breeds") @@ -741,24 +153,34 @@ class SemanticBreedRecommender: print("Multi-head scorer not available, using fallback scoring") return self.get_semantic_recommendations(user_input, top_k) - # Stage 4: Score Calibration - breed_score_tuples = [(score.breed_name, score.final_score) for score in breed_scores] - calibration_result = self.score_calibrator.calibrate_scores(breed_score_tuples) - print(f"Score calibration: method={calibration_result.calibration_method}") + # 階段 4: 分數校準 + if self.score_calibrator: + breed_score_tuples = [(score.breed_name, score.final_score) for score in breed_scores] + calibration_result = self.score_calibrator.calibrate_scores(breed_score_tuples) + print(f"Score calibration: method={calibration_result.calibration_method}") + else: + print("Score calibrator not available, using raw scores") + calibration_result = type('CalibrationResult', (), { + 'score_mapping': {score.breed_name: score.final_score for score in breed_scores}, + 'calibration_method': 'none' + })() - # Stage 5: Generate Final Recommendations + # 階段 5: 生成最終推薦 final_recommendations = [] for i, breed_score in enumerate(breed_scores[:top_k]): breed_name = breed_score.breed_name - # Get calibrated score + # 獲取校準後的分數 calibrated_score = calibration_result.score_mapping.get(breed_name, breed_score.final_score) - # Get standardized breed info - standardized_info = get_standardized_breed_data(breed_name.replace(' ', '_')) - if standardized_info: - breed_info = self._get_breed_info_from_standardized(standardized_info) + # 獲取標準化品種信息 + if self.config_manager: + standardized_info = get_standardized_breed_data(breed_name.replace(' ', '_')) + if standardized_info: + breed_info = self._get_breed_info_from_standardized(standardized_info) + else: + breed_info = get_dog_description(breed_name.replace(' ', '_')) or {} else: breed_info = get_dog_description(breed_name.replace(' ', '_')) or {} @@ -789,10 +211,10 @@ class SemanticBreedRecommender: final_recommendations.append(recommendation) - # Apply size distribution correction before returning + # 應用尺寸分佈修正 corrected_recommendations = self._apply_size_distribution_correction(final_recommendations) - # Stage 6: Apply Intelligent Trait Matching Enhancement + # 階段 6: 應用智能特徵匹配增強 intelligence_enhanced_recommendations = self._apply_intelligent_trait_matching(corrected_recommendations, user_input) print(f"Generated {len(intelligence_enhanced_recommendations)} enhanced semantic recommendations with intelligent trait matching") @@ -801,232 +223,72 @@ class SemanticBreedRecommender: except Exception as e: print(f"Error in enhanced semantic recommendations: {str(e)}") print(traceback.format_exc()) - # Fallback to original method + # 回退到原始方法 return self.get_semantic_recommendations(user_input, top_k) - def _apply_intelligent_trait_matching(self, recommendations: List[Dict], user_input: str) -> List[Dict]: - """Apply intelligent trait matching based on enhanced keyword extraction and database mining""" - try: - # Extract enhanced keywords from user input - extracted_keywords = self._extract_lifestyle_keywords(user_input) - - # Apply intelligent trait matching to each recommendation - enhanced_recommendations = [] - - for rec in recommendations: - breed_name = rec['breed'].replace(' ', '_') - - # Get breed database information - breed_info = get_dog_description(breed_name) or {} - - # Calculate intelligent trait bonuses - intelligence_bonus = 0.0 - trait_match_details = {} - - # 1. Intelligence Matching - if extracted_keywords.get('intelligence_preference'): - intelligence_pref = extracted_keywords['intelligence_preference'][0] - breed_desc = breed_info.get('Description', '').lower() - - if intelligence_pref == 'high': - if any(word in breed_desc for word in ['intelligent', 'smart', 'clever', 'quick to learn', 'trainable']): - intelligence_bonus += 0.05 - trait_match_details['intelligence_match'] = 'High intelligence match detected' - elif any(word in breed_desc for word in ['stubborn', 'independent', 'difficult']): - intelligence_bonus -= 0.02 - trait_match_details['intelligence_warning'] = 'May be challenging to train' - - elif intelligence_pref == 'independent': - if any(word in breed_desc for word in ['independent', 'stubborn', 'strong-willed']): - intelligence_bonus += 0.03 - trait_match_details['independence_match'] = 'Independent nature match' - - # 2. Grooming Preference Matching - if extracted_keywords.get('grooming_preference'): - grooming_pref = extracted_keywords['grooming_preference'][0] - breed_grooming = breed_info.get('Grooming Needs', '').lower() - - if grooming_pref == 'low' and 'low' in breed_grooming: - intelligence_bonus += 0.03 - trait_match_details['grooming_match'] = 'Low maintenance grooming match' - elif grooming_pref == 'high' and 'high' in breed_grooming: - intelligence_bonus += 0.03 - trait_match_details['grooming_match'] = 'High maintenance grooming match' - elif grooming_pref == 'low' and 'high' in breed_grooming: - intelligence_bonus -= 0.04 - trait_match_details['grooming_mismatch'] = 'High grooming needs may not suit preferences' - - # 3. Temperament Preference Matching - if extracted_keywords.get('temperament_preference'): - temp_prefs = extracted_keywords['temperament_preference'] - breed_temperament = breed_info.get('Temperament', '').lower() - breed_desc = breed_info.get('Description', '').lower() - - temp_text = (breed_temperament + ' ' + breed_desc).lower() - - for temp_pref in temp_prefs: - if temp_pref == 'gentle' and any(word in temp_text for word in ['gentle', 'calm', 'peaceful', 'mild']): - intelligence_bonus += 0.04 - trait_match_details['temperament_match'] = f'Gentle temperament match: {temp_pref}' - elif temp_pref == 'playful' and any(word in temp_text for word in ['playful', 'energetic', 'lively', 'fun']): - intelligence_bonus += 0.04 - trait_match_details['temperament_match'] = f'Playful temperament match: {temp_pref}' - elif temp_pref == 'protective' and any(word in temp_text for word in ['protective', 'guard', 'alert', 'watchful']): - intelligence_bonus += 0.04 - trait_match_details['temperament_match'] = f'Protective temperament match: {temp_pref}' - elif temp_pref == 'friendly' and any(word in temp_text for word in ['friendly', 'social', 'outgoing', 'people']): - intelligence_bonus += 0.04 - trait_match_details['temperament_match'] = f'Friendly temperament match: {temp_pref}' - - # 4. Experience Level Matching - if extracted_keywords.get('experience_level'): - exp_level = extracted_keywords['experience_level'][0] - breed_desc = breed_info.get('Description', '').lower() - - if exp_level == 'beginner': - # Favor easy-to-handle breeds for beginners - if any(word in breed_desc for word in ['easy', 'gentle', 'good for beginners', 'family', 'calm']): - intelligence_bonus += 0.06 - trait_match_details['beginner_friendly'] = 'Good choice for first-time owners' - elif any(word in breed_desc for word in ['challenging', 'dominant', 'requires experience', 'strong-willed']): - intelligence_bonus -= 0.08 - trait_match_details['experience_warning'] = 'May be challenging for first-time owners' - - elif exp_level == 'advanced': - # Advanced users can handle more challenging breeds - if any(word in breed_desc for word in ['working', 'requires experience', 'intelligent', 'strong']): - intelligence_bonus += 0.03 - trait_match_details['advanced_suitable'] = 'Good match for experienced owners' - - # 5. Lifespan Preference Matching - if extracted_keywords.get('lifespan_preference'): - lifespan_pref = extracted_keywords['lifespan_preference'][0] - breed_lifespan = breed_info.get('Lifespan', '10-12 years') - - try: - import re - years = re.findall(r'\d+', breed_lifespan) - if years: - avg_years = sum(int(y) for y in years) / len(years) - if lifespan_pref == 'long' and avg_years >= 13: - intelligence_bonus += 0.02 - trait_match_details['longevity_match'] = f'Long lifespan match: {breed_lifespan}' - elif lifespan_pref == 'healthy' and avg_years >= 12: - intelligence_bonus += 0.02 - trait_match_details['health_match'] = f'Healthy lifespan: {breed_lifespan}' - except: - pass - - # Apply the intelligence bonus to the overall score - original_score = rec['overall_score'] - enhanced_score = min(1.0, original_score + intelligence_bonus) - - # Create enhanced recommendation with trait matching details - enhanced_rec = rec.copy() - enhanced_rec['overall_score'] = enhanced_score - enhanced_rec['intelligence_bonus'] = intelligence_bonus - enhanced_rec['trait_match_details'] = trait_match_details - - # Add detailed explanation if significant enhancement occurred - if abs(intelligence_bonus) > 0.02: - enhancement_explanation = [] - for detail_key, detail_value in trait_match_details.items(): - enhancement_explanation.append(detail_value) - - if enhancement_explanation: - current_explanation = enhanced_rec.get('explanation', '') - enhanced_explanation = current_explanation + f" Enhanced matching: {'; '.join(enhancement_explanation)}" - enhanced_rec['explanation'] = enhanced_explanation - - enhanced_recommendations.append(enhanced_rec) - - # Re-sort by enhanced overall score - enhanced_recommendations.sort(key=lambda x: x['overall_score'], reverse=True) - - # Update ranks - for i, rec in enumerate(enhanced_recommendations): - rec['rank'] = i + 1 - - print(f"Applied intelligent trait matching with average bonus: {sum(r['intelligence_bonus'] for r in enhanced_recommendations) / len(enhanced_recommendations):.3f}") - - return enhanced_recommendations - - except Exception as e: - print(f"Error in intelligent trait matching: {str(e)}") - # Return original recommendations if trait matching fails - return recommendations - def get_semantic_recommendations(self, user_input: str, top_k: int = 15) -> List[Dict[str, Any]]: """ - Get breed recommendations based on natural language description + 基於自然語言描述獲取品種推薦 Args: - user_input: User's natural language description - top_k: Number of recommendations to return + user_input: 用戶的自然語言描述 + top_k: 返回的推薦數量 Returns: - List of recommended breeds + 推薦品種列表 """ try: print(f"Processing user input: {user_input}") - # 嘗試載入SBERT模型(如果尚未載入) - if self.sbert_model is None: - self._initialize_model() - - # Check if model is available - if not, raise error + # 檢查模型是否可用 - 如果不可用,則報錯 if self.sbert_model is None: error_msg = "SBERT model not available. This could be due to:\n• Model download failed\n• Insufficient memory\n• Network connectivity issues\n\nPlease check your environment and try again." print(f"ERROR: {error_msg}") raise RuntimeError(error_msg) - # 確保breed vectors已建構 - if not self.breed_vectors: - self._build_breed_vectors() - - # Generate user input embedding - user_embedding = self.sbert_model.encode(user_input, convert_to_tensor=False) + # 生成用戶輸入嵌入 + user_embedding = self.vector_manager.encode_text(user_input) - # Parse comparative preferences + # 解析比較性偏好 comparative_prefs = self._parse_comparative_preferences(user_input) - # Extract lifestyle keywords + # 提取生活方式關鍵字 lifestyle_keywords = self._extract_lifestyle_keywords(user_input) - # Calculate similarity with all breeds and apply constraints + # 計算與所有品種的相似度並應用約束 similarities = [] for breed, breed_vector in self.breed_vectors.items(): - # Apply hard constraints first + # 首先應用硬約束 constraint_penalty = self._apply_hard_constraints(breed, user_input, breed_vector.characteristics) - # Skip breeds that violate critical constraints - if constraint_penalty <= -1.0: # Complete disqualification + # 跳過違反關鍵約束的品種 + if constraint_penalty <= -1.0: # 完全取消資格 continue - # Basic semantic similarity + # 基本語義相似度 semantic_score = cosine_similarity( [user_embedding], [breed_vector.embedding] )[0][0] - # Comparative preference weighting + # 比較性偏好加權 comparative_bonus = comparative_prefs.get(breed, 0.0) - # Lifestyle matching bonus + # 生活方式匹配獎勵 lifestyle_bonus = self._calculate_lifestyle_bonus( breed_vector.characteristics, lifestyle_keywords ) - # Apply constraint penalties + # 應用約束懲罰 lifestyle_bonus += constraint_penalty - # Enhanced combined score with better distribution - # Apply exponential scaling to create more natural score spread - base_semantic = semantic_score ** 0.8 # Slightly compress high scores - enhanced_lifestyle = lifestyle_bonus * 2.0 # Amplify lifestyle matching - enhanced_comparative = comparative_bonus * 1.5 # Amplify breed preferences + # 更好分佈的增強組合分數 + # 應用指數縮放以創建更自然的分數分佈 + base_semantic = semantic_score ** 0.8 # 輕微壓縮高分 + enhanced_lifestyle = lifestyle_bonus * 2.0 # 放大生活方式匹配 + enhanced_comparative = comparative_bonus * 1.5 # 放大品種偏好 final_score = ( base_semantic * 0.55 + @@ -1034,11 +296,11 @@ class SemanticBreedRecommender: enhanced_lifestyle * 0.15 ) - # Add small random variation to break ties naturally - random.seed(hash(breed)) # Consistent for same breed + # 添加小的隨機變化以自然地打破平局 + random.seed(hash(breed)) # 對相同品種保持一致 final_score += random.uniform(-0.03, 0.03) - # Ensure final score doesn't exceed 1.0 + # 確保最終分數不超過 1.0 final_score = min(1.0, final_score) similarities.append({ @@ -1049,10 +311,10 @@ class SemanticBreedRecommender: 'lifestyle_bonus': lifestyle_bonus }) - # Calculate standardized display scores with balanced distribution + # 計算平衡分佈的標準化顯示分數 breed_display_scores = [] - # First, collect all semantic scores for normalization + # 首先,收集所有語義分數以進行標準化 all_semantic_scores = [breed_data['semantic_score'] for breed_data in similarities] semantic_mean = np.mean(all_semantic_scores) semantic_std = np.std(all_semantic_scores) if len(all_semantic_scores) > 1 else 1.0 @@ -1061,24 +323,24 @@ class SemanticBreedRecommender: breed = breed_data['breed'] base_semantic = breed_data['semantic_score'] - # Normalize semantic score to prevent extreme outliers + # 標準化語義分數以防止極端異常值 if semantic_std > 0: normalized_semantic = (base_semantic - semantic_mean) / semantic_std - normalized_semantic = max(-2.0, min(2.0, normalized_semantic)) # Cap at 2 standard deviations - scaled_semantic = 0.5 + (normalized_semantic * 0.1) # Map to 0.3-0.7 range + normalized_semantic = max(-2.0, min(2.0, normalized_semantic)) # 限制在 2 個標準差 + scaled_semantic = 0.5 + (normalized_semantic * 0.1) # 映射到 0.3-0.7 範圍 else: scaled_semantic = 0.5 - # Get breed characteristics + # 獲取品種特徵 breed_info = get_dog_description(breed) if breed != 'Unknown' else {} breed_size = breed_info.get('Size', '').lower() if breed_info else '' exercise_needs = breed_info.get('Exercise Needs', '').lower() if breed_info else '' - # Calculate feature matching score (more important than pure semantic similarity) + # 計算特徵匹配分數(比純語義相似度更重要) feature_score = 0.0 user_text = user_input.lower() - # Size and space requirements (high weight) + # 尺寸和空間需求(高權重) if any(term in user_text for term in ['apartment', 'small', 'limited space']): if 'small' in breed_size: feature_score += 0.25 @@ -1087,7 +349,7 @@ class SemanticBreedRecommender: elif 'large' in breed_size or 'giant' in breed_size: feature_score -= 0.30 - # Exercise requirements (high weight) + # 運動需求(高權重) if any(term in user_text for term in ['low exercise', 'minimal exercise', "doesn't need", 'not much']): if 'low' in exercise_needs or 'minimal' in exercise_needs: feature_score += 0.20 @@ -1099,7 +361,7 @@ class SemanticBreedRecommender: elif 'low' in exercise_needs: feature_score -= 0.15 - # Family compatibility + # 家庭相容性 if any(term in user_text for term in ['children', 'kids', 'family']): good_with_children = breed_info.get('Good with Children', '') if breed_info else '' if good_with_children == 'Yes': @@ -1107,42 +369,42 @@ class SemanticBreedRecommender: elif good_with_children == 'No': feature_score -= 0.20 - # Combine scores with balanced weights + # 平衡權重組合分數 final_score = ( - scaled_semantic * 0.35 + # Reduced semantic weight - feature_score * 0.45 + # Increased feature matching weight + scaled_semantic * 0.35 + # 降低語義權重 + feature_score * 0.45 + # 增加特徵匹配權重 breed_data['lifestyle_bonus'] * 0.15 + breed_data['comparative_bonus'] * 0.05 ) - # Calculate base compatibility score + # 計算基本相容性分數 base_compatibility = final_score - # Apply dynamic scoring with natural distribution - if base_compatibility >= 0.9: # Exceptional matches + # 應用自然分佈的動態評分 + if base_compatibility >= 0.9: # 例外匹配 score_range = (0.92, 0.98) position = (base_compatibility - 0.9) / 0.1 - elif base_compatibility >= 0.75: # Excellent matches + elif base_compatibility >= 0.75: # 優秀匹配 score_range = (0.85, 0.91) position = (base_compatibility - 0.75) / 0.15 - elif base_compatibility >= 0.6: # Good matches + elif base_compatibility >= 0.6: # 良好匹配 score_range = (0.75, 0.84) position = (base_compatibility - 0.6) / 0.15 - elif base_compatibility >= 0.45: # Fair matches + elif base_compatibility >= 0.45: # 公平匹配 score_range = (0.65, 0.74) position = (base_compatibility - 0.45) / 0.15 - elif base_compatibility >= 0.3: # Poor matches + elif base_compatibility >= 0.3: # 較差匹配 score_range = (0.55, 0.64) position = (base_compatibility - 0.3) / 0.15 - else: # Very poor matches + else: # 非常差的匹配 score_range = (0.45, 0.54) position = max(0, base_compatibility / 0.3) - # Calculate final score with natural variation + # 計算帶自然變化的最終分數 score_span = score_range[1] - score_range[0] base_score = score_range[0] + (position * score_span) - # Add controlled random variation for natural ranking + # 添加控制的隨機變化以進行自然排名 random.seed(hash(breed + user_input[:15])) variation = random.uniform(-0.015, 0.015) display_score = round(max(0.45, min(0.98, base_score + variation)), 3) @@ -1155,24 +417,24 @@ class SemanticBreedRecommender: 'lifestyle_bonus': breed_data['lifestyle_bonus'] }) - # Sort by display score to ensure ranking consistency + # 按顯示分數排序以確保排名一致性 breed_display_scores.sort(key=lambda x: x['display_score'], reverse=True) top_breeds = breed_display_scores[:top_k] - # Convert to standard recommendation format + # 轉換為標準推薦格式 recommendations = [] for i, breed_data in enumerate(top_breeds): breed = breed_data['breed'] display_score = breed_data['display_score'] - # Get detailed information + # 獲取詳細信息 breed_info = get_dog_description(breed) recommendation = { 'breed': breed.replace('_', ' '), 'rank': i + 1, - 'overall_score': display_score, # Use display score for consistency - 'final_score': display_score, # Ensure final_score matches overall_score + 'overall_score': display_score, # 使用顯示分數以保持一致性 + 'final_score': display_score, # 確保 final_score 與 overall_score 匹配 'semantic_score': breed_data['semantic_score'], 'comparative_bonus': breed_data['comparative_bonus'], 'lifestyle_bonus': breed_data['lifestyle_bonus'], @@ -1196,158 +458,6 @@ class SemanticBreedRecommender: print(traceback.format_exc()) return [] - def _calculate_lifestyle_bonus(self, breed_characteristics: Dict[str, Any], - lifestyle_keywords: Dict[str, List[str]]) -> float: - """Enhanced lifestyle matching bonus calculation""" - bonus = 0.0 - penalties = 0.0 - - # Enhanced size matching - breed_size = breed_characteristics.get('size', '').lower() - size_prefs = lifestyle_keywords.get('size_preference', []) - for pref in size_prefs: - if pref in breed_size: - bonus += 0.25 # Strong reward for size match - elif (pref == 'small' and 'large' in breed_size) or \ - (pref == 'large' and 'small' in breed_size): - penalties += 0.15 # Penalty for size mismatch - - # Enhanced activity level matching - breed_exercise = breed_characteristics.get('exercise_needs', '').lower() - activity_prefs = lifestyle_keywords.get('activity_level', []) - - if 'high' in activity_prefs: - if 'high' in breed_exercise or 'very high' in breed_exercise: - bonus += 0.2 - elif 'low' in breed_exercise: - penalties += 0.2 - elif 'low' in activity_prefs: - if 'low' in breed_exercise: - bonus += 0.2 - elif 'high' in breed_exercise or 'very high' in breed_exercise: - penalties += 0.25 - elif 'moderate' in activity_prefs: - if 'moderate' in breed_exercise: - bonus += 0.15 - - # Enhanced family situation matching - good_with_children = breed_characteristics.get('good_with_children', 'Yes') - family_prefs = lifestyle_keywords.get('family_situation', []) - - if 'children' in family_prefs: - if good_with_children == 'Yes': - bonus += 0.15 - else: - penalties += 0.3 # Strong penalty for non-child-friendly breeds - - # Enhanced living space matching - living_prefs = lifestyle_keywords.get('living_space', []) - if 'apartment' in living_prefs: - if 'small' in breed_size: - bonus += 0.2 - elif 'medium' in breed_size and 'low' in breed_exercise: - bonus += 0.1 - elif 'large' in breed_size or 'giant' in breed_size: - penalties += 0.2 # Penalty for large dogs in apartments - - # Noise preference matching - noise_prefs = lifestyle_keywords.get('noise_preference', []) - temperament = breed_characteristics.get('temperament', '').lower() - - if 'low' in noise_prefs: - # Reward quiet breeds - if any(term in temperament for term in ['gentle', 'calm', 'quiet']): - bonus += 0.1 - - # Care level matching - grooming_needs = breed_characteristics.get('grooming_needs', '').lower() - care_prefs = lifestyle_keywords.get('care_level', []) - - if 'low' in care_prefs and 'low' in grooming_needs: - bonus += 0.1 - elif 'high' in care_prefs and 'high' in grooming_needs: - bonus += 0.1 - elif 'low' in care_prefs and 'high' in grooming_needs: - penalties += 0.15 - - # Special needs matching - special_needs = lifestyle_keywords.get('special_needs', []) - - if 'guard' in special_needs: - if any(term in temperament for term in ['protective', 'alert', 'watchful']): - bonus += 0.1 - elif 'companion' in special_needs: - if any(term in temperament for term in ['affectionate', 'gentle', 'loyal']): - bonus += 0.1 - - # Calculate final bonus with penalties - final_bonus = bonus - penalties - return max(-0.3, min(0.5, final_bonus)) # Allow negative bonus but limit range - - def _get_breed_info_from_standardized(self, standardized_info) -> Dict[str, Any]: - """Convert standardized breed info to dictionary format""" - try: - size_map = {1: 'Tiny', 2: 'Small', 3: 'Medium', 4: 'Large', 5: 'Giant'} - exercise_map = {1: 'Low', 2: 'Moderate', 3: 'High', 4: 'Very High'} - care_map = {1: 'Low', 2: 'Moderate', 3: 'High'} - - return { - 'Size': size_map.get(standardized_info.size_category, 'Medium'), - 'Exercise Needs': exercise_map.get(standardized_info.exercise_level, 'Moderate'), - 'Grooming Needs': care_map.get(standardized_info.care_complexity, 'Moderate'), - 'Good with Children': 'Yes' if standardized_info.child_compatibility >= 0.8 else - 'No' if standardized_info.child_compatibility <= 0.2 else 'Unknown', - 'Temperament': 'Varies by individual', - 'Lifespan': '10-12 years', - 'Description': f'A {size_map.get(standardized_info.size_category, "medium")} sized breed' - } - except Exception as e: - print(f"Error converting standardized info: {str(e)}") - return {} - - def _get_fallback_recommendations(self, top_k: int = 15) -> List[Dict[str, Any]]: - """Get fallback recommendations when enhanced system fails""" - try: - safe_breeds = [ - ('Labrador Retriever', 0.85), - ('Golden Retriever', 0.82), - ('Cavalier King Charles Spaniel', 0.80), - ('French Bulldog', 0.78), - ('Boston Terrier', 0.76), - ('Bichon Frise', 0.74), - ('Pug', 0.72), - ('Cocker Spaniel', 0.70) - ] - - recommendations = [] - for i, (breed, score) in enumerate(safe_breeds[:top_k]): - breed_info = get_dog_description(breed.replace(' ', '_')) or {} - - recommendation = { - 'breed': breed, - 'rank': i + 1, - 'overall_score': score, - 'final_score': score, - 'semantic_score': score * 0.8, - 'comparative_bonus': 0.0, - 'lifestyle_bonus': 0.0, - 'size': breed_info.get('Size', 'Unknown'), - 'temperament': breed_info.get('Temperament', ''), - 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate'), - 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate'), - 'good_with_children': breed_info.get('Good with Children', 'Yes'), - 'lifespan': breed_info.get('Lifespan', '10-12 years'), - 'description': breed_info.get('Description', ''), - 'search_type': 'fallback' - } - recommendations.append(recommendation) - - return recommendations - - except Exception as e: - print(f"Error generating fallback recommendations: {str(e)}") - return [] - def get_enhanced_recommendations_with_unified_scoring(self, user_input: str, top_k: int = 15) -> List[Dict[str, Any]]: """簡化的增強推薦方法""" try: @@ -1364,137 +474,34 @@ class SemanticBreedRecommender: def _analyze_user_description_enhanced(self, user_description: str) -> Dict[str, Any]: """增強用戶描述分析""" - text = user_description.lower() - analysis = { - 'mentioned_breeds': [], - 'lifestyle_keywords': {}, - 'preference_strength': {}, - 'constraint_requirements': [], - 'user_context': {} - } - - # 提取提及的品種 - for breed in self.breed_list: - breed_display = breed.replace('_', ' ').lower() - if breed_display in text or any(word in text for word in breed_display.split()): - analysis['mentioned_breeds'].append(breed) - # 簡單偏好強度分析 - if any(word in text for word in ['love', 'prefer', 'like', '喜歡', '最愛']): - analysis['preference_strength'][breed] = 0.8 - else: - analysis['preference_strength'][breed] = 0.5 - - # 提取約束要求 - if any(word in text for word in ['quiet', 'silent', 'no barking', '安靜']): - analysis['constraint_requirements'].append('low_noise') - if any(word in text for word in ['apartment', 'small space', '公寓']): - analysis['constraint_requirements'].append('apartment_suitable') - if any(word in text for word in ['children', 'kids', 'family', '小孩']): - analysis['constraint_requirements'].append('child_friendly') - - # 提取用戶背景 - analysis['user_context'] = { - 'has_children': any(word in text for word in ['children', 'kids', '小孩']), - 'living_space': 'apartment' if any(word in text for word in ['apartment', '公寓']) else 'house', - 'activity_level': 'high' if any(word in text for word in ['active', 'energetic', '活躍']) else 'moderate', - 'noise_sensitive': any(word in text for word in ['quiet', 'silent', '安靜']), - 'experience_level': 'beginner' if any(word in text for word in ['first time', 'beginner', '新手']) else 'intermediate' - } - - return analysis + return self.query_analyzer.analyze_user_description_enhanced(user_description) def _create_user_preferences_from_analysis_enhanced(self, analysis: Dict[str, Any]) -> UserPreferences: """從分析結果創建用戶偏好物件""" - context = analysis['user_context'] - - # 推斷居住空間類型 - living_space = 'apartment' if context.get('living_space') == 'apartment' else 'house_small' - - # 推斷院子權限 - yard_access = 'no_yard' if living_space == 'apartment' else 'shared_yard' - - # 推斷運動時間 - activity_level = context.get('activity_level', 'moderate') - exercise_time_map = {'high': 120, 'moderate': 60, 'low': 30} - exercise_time = exercise_time_map.get(activity_level, 60) - - # 推斷運動類型 - exercise_type_map = {'high': 'active_training', 'moderate': 'moderate_activity', 'low': 'light_walks'} - exercise_type = exercise_type_map.get(activity_level, 'moderate_activity') - - # 推斷噪音容忍度 - noise_tolerance = 'low' if context.get('noise_sensitive', False) else 'medium' - - return UserPreferences( - living_space=living_space, - yard_access=yard_access, - exercise_time=exercise_time, - exercise_type=exercise_type, - grooming_commitment='medium', - experience_level=context.get('experience_level', 'intermediate'), - time_availability='moderate', - has_children=context.get('has_children', False), - children_age='school_age' if context.get('has_children', False) else None, - noise_tolerance=noise_tolerance, - space_for_play=(living_space != 'apartment'), - other_pets=False, - climate='moderate', - health_sensitivity='medium', - barking_acceptance=noise_tolerance, - size_preference='no_preference' - ) + return self.query_analyzer.create_user_preferences_from_analysis_enhanced(analysis) def _get_candidate_breeds_enhanced(self, analysis: Dict[str, Any]) -> List[str]: """獲取候選品種列表""" - candidate_breeds = set() - - # 如果提及特定品種,優先包含 - if analysis['mentioned_breeds']: - candidate_breeds.update(analysis['mentioned_breeds']) - - # 根據約束要求過濾品種 - if 'apartment_suitable' in analysis['constraint_requirements']: - apartment_suitable = [ - 'French_Bulldog', 'Cavalier_King_Charles_Spaniel', 'Boston_Terrier', - 'Pug', 'Bichon_Frise', 'Cocker_Spaniel', 'Yorkshire_Terrier', 'Shih_Tzu' - ] - candidate_breeds.update(breed for breed in apartment_suitable if breed in self.breed_list) - - if 'child_friendly' in analysis['constraint_requirements']: - child_friendly = [ - 'Labrador_Retriever', 'Golden_Retriever', 'Beagle', 'Cavalier_King_Charles_Spaniel', - 'Bichon_Frise', 'Poodle', 'Cocker_Spaniel' - ] - candidate_breeds.update(breed for breed in child_friendly if breed in self.breed_list) - - # 如果候選品種不足,添加更多通用品種 - if len(candidate_breeds) < 20: - general_breeds = [ - 'Labrador_Retriever', 'German_Shepherd', 'Golden_Retriever', 'French_Bulldog', - 'Bulldog', 'Poodle', 'Beagle', 'Rottweiler', 'Yorkshire_Terrier', 'Boston_Terrier', - 'Border_Collie', 'Siberian_Husky', 'Cavalier_King_Charles_Spaniel', 'Boxer', - 'Bichon_Frise', 'Cocker_Spaniel', 'Shih_Tzu', 'Pug', 'Chihuahua' - ] - candidate_breeds.update(breed for breed in general_breeds if breed in self.breed_list) - - return list(candidate_breeds)[:30] # 限制候選數量以提高效率 + return self.query_analyzer.get_candidate_breeds_enhanced(analysis) def _apply_constraint_filtering_enhanced(self, breed: str, analysis: Dict[str, Any]) -> float: """應用約束過濾,返回調整分數""" - penalty = 0.0 + # 這個方法需要從 score_calculator 調用適當的方法 + # 但原始實現中沒有這個具體方法,所以我們提供基本實現 + constraint_penalty = 0.0 breed_info = get_dog_description(breed) if not breed_info: - return penalty + return constraint_penalty # 低噪音要求 if 'low_noise' in analysis['constraint_requirements']: noise_info = breed_noise_info.get(breed, {}) noise_level = noise_info.get('noise_level', 'moderate').lower() if 'high' in noise_level: - penalty -= 0.3 # 嚴重扣分 + constraint_penalty -= 0.3 # 嚴重扣分 elif 'low' in noise_level: - penalty += 0.1 # 輕微加分 + constraint_penalty += 0.1 # 輕微加分 # 公寓適合性 if 'apartment_suitable' in analysis['constraint_requirements']: @@ -1502,76 +509,58 @@ class SemanticBreedRecommender: exercise_needs = breed_info.get('Exercise Needs', '').lower() if size in ['large', 'giant']: - penalty -= 0.2 + constraint_penalty -= 0.2 elif size in ['small', 'tiny']: - penalty += 0.1 + constraint_penalty += 0.1 if 'high' in exercise_needs: - penalty -= 0.15 + constraint_penalty -= 0.15 # 兒童友善性 if 'child_friendly' in analysis['constraint_requirements']: good_with_children = breed_info.get('Good with Children', 'Unknown') if good_with_children == 'Yes': - penalty += 0.15 + constraint_penalty += 0.15 elif good_with_children == 'No': - penalty -= 0.4 # 嚴重扣分 + constraint_penalty -= 0.4 # 嚴重扣分 - return penalty + return constraint_penalty def _get_breed_characteristics_enhanced(self, breed: str) -> Dict[str, Any]: """獲取品種特徵""" - breed_info = get_dog_description(breed) - if not breed_info: - return {} - - characteristics = { - 'size': breed_info.get('Size', 'Unknown'), - 'temperament': breed_info.get('Temperament', ''), - 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate'), - 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate'), - 'good_with_children': breed_info.get('Good with Children', 'Unknown'), - 'lifespan': breed_info.get('Lifespan', '10-12 years'), - 'description': breed_info.get('Description', '') - } - - # 添加噪音資訊 - noise_info = breed_noise_info.get(breed, {}) - characteristics['noise_level'] = noise_info.get('noise_level', 'moderate') - - return characteristics + return self.score_calculator.get_breed_characteristics_enhanced(breed) def get_hybrid_recommendations(self, user_description: str, user_preferences: Optional[Any] = None, top_k: int = 15) -> List[Dict[str, Any]]: """ - Hybrid recommendations: Combine semantic matching with traditional scoring + 混合推薦:結合語義匹配與傳統評分 Args: - user_description: User's natural language description - user_preferences: Optional structured preference settings - top_k: Number of recommendations to return + user_description: 用戶的自然語言描述 + user_preferences: 可選的結構化偏好設置 + top_k: 返回的推薦數量 Returns: - Hybrid recommendation results + 混合推薦結果 """ try: - # Get semantic recommendations + # 獲取語義推薦 semantic_recommendations = self.get_semantic_recommendations(user_description, top_k * 2) if not user_preferences: return semantic_recommendations[:top_k] - # Combine with traditional scoring + # 與傳統評分結合 hybrid_results = [] for semantic_rec in semantic_recommendations: breed_name = semantic_rec['breed'].replace(' ', '_') - # Calculate traditional compatibility score + # 計算傳統相容性分數 traditional_score = calculate_compatibility_score(user_preferences, breed_name) - # Hybrid score (semantic 40% + traditional 60%) + # 混合分數(語義 40% + 傳統 60%) hybrid_score = ( semantic_rec['overall_score'] * 0.4 + traditional_score * 0.6 @@ -1581,10 +570,10 @@ class SemanticBreedRecommender: semantic_rec['traditional_score'] = traditional_score hybrid_results.append(semantic_rec) - # Re-sort by hybrid score + # 按混合分數重新排序 hybrid_results.sort(key=lambda x: x['hybrid_score'], reverse=True) - # Update rankings + # 更新排名 for i, result in enumerate(hybrid_results[:top_k]): result['rank'] = i + 1 result['overall_score'] = result['hybrid_score'] @@ -1596,17 +585,14 @@ class SemanticBreedRecommender: print(traceback.format_exc()) return self.get_semantic_recommendations(user_description, top_k) + def get_breed_recommendations_by_description(user_description: str, user_preferences: Optional[Any] = None, top_k: int = 15) -> List[Dict[str, Any]]: - """Main interface function for getting breed recommendations by description""" + """基於描述獲取品種推薦的主要介面函數""" try: print("Initializing Enhanced SemanticBreedRecommender...") recommender = SemanticBreedRecommender() - - # 嘗試載入SBERT模型(如果尚未載入) - if not recommender.sbert_model: - recommender._initialize_model() # 優先使用整合統一評分系統的增強推薦 print("Using enhanced recommendation system with unified scoring") @@ -1651,22 +637,14 @@ def get_enhanced_recommendations_with_unified_scoring(user_description: str, top # 創建基本推薦器實例 recommender = SemanticBreedRecommender() - # 嘗試載入SBERT模型(如果尚未載入) - if not recommender.sbert_model: - recommender._initialize_model() - - if not recommender.sbert_model: + if not recommender.vector_manager.is_model_available(): print("SBERT model not available, using basic text matching...") # 使用基本文字匹配邏輯 - return _get_basic_text_matching_recommendations(user_description, top_k) - - # 確保breed vectors已建構 - if not recommender.breed_vectors: - recommender._build_breed_vectors() + return _get_basic_text_matching_recommendations(user_description, top_k, recommender) # 使用語意相似度推薦 recommendations = [] - user_embedding = recommender.sbert_model.encode(user_description) + user_embedding = recommender.vector_manager.encode_text(user_description) # 計算所有品種的增強分數 all_breed_scores = [] @@ -1678,7 +656,7 @@ def get_enhanced_recommendations_with_unified_scoring(user_description: str, top breed_info = get_dog_description(breed_name) or {} # 計算增強的匹配分數 - enhanced_score = _calculate_enhanced_matching_score( + enhanced_score = recommender.score_calculator.calculate_enhanced_matching_score( breed_name, breed_info, user_description, similarity ) @@ -1718,461 +696,20 @@ def get_enhanced_recommendations_with_unified_scoring(user_description: str, top print(traceback.format_exc()) raise RuntimeError(error_msg) from e -def _calculate_enhanced_matching_score(breed: str, breed_info: dict, user_description: str, base_similarity: float) -> dict: - """計算增強的匹配分數,基於用戶描述和品種特性""" - try: - user_desc = user_description.lower() - - # 分析用戶需求 - space_requirements = _analyze_space_requirements(user_desc) - exercise_requirements = _analyze_exercise_requirements(user_desc) - noise_requirements = _analyze_noise_requirements(user_desc) - size_requirements = _analyze_size_requirements(user_desc) - family_requirements = _analyze_family_requirements(user_desc) - - # 獲取品種特性 - breed_size = breed_info.get('Size', '').lower() - breed_exercise = breed_info.get('Exercise Needs', '').lower() - breed_noise = breed_noise_info.get(breed, {}).get('noise_level', 'moderate').lower() - breed_temperament = breed_info.get('Temperament', '').lower() - breed_good_with_children = breed_info.get('Good with Children', '').lower() - - # 計算各維度匹配分數 - dimension_scores = {} - - # 空間匹配 (30% 權重) - space_score = _calculate_space_compatibility(space_requirements, breed_size, breed_exercise) - dimension_scores['space'] = space_score - - # 運動需求匹配 (25% 權重) - exercise_score = _calculate_exercise_compatibility(exercise_requirements, breed_exercise) - dimension_scores['exercise'] = exercise_score - - # 噪音匹配 (20% 權重) - noise_score = _calculate_noise_compatibility(noise_requirements, breed_noise) - dimension_scores['noise'] = noise_score - - # 體型匹配 (15% 權重) - size_score = _calculate_size_compatibility(size_requirements, breed_size) - dimension_scores['grooming'] = min(0.9, base_similarity + 0.1) # 美容需求基於語意相似度 - - # 家庭相容性 (10% 權重) - family_score = _calculate_family_compatibility(family_requirements, breed_good_with_children, breed_temperament) - dimension_scores['family'] = family_score - dimension_scores['experience'] = min(0.9, base_similarity + 0.05) # 經驗需求基於語意相似度 - - # 應用硬約束過濾 - constraint_penalty = _apply_hard_constraints_enhanced(user_desc, breed_info) - - # 計算加權總分 - 精確化維度權重配置 - # 根據指導建議重新平衡維度權重 - weighted_score = ( - space_score * 0.30 + # 空間相容性(降低5%) - exercise_score * 0.28 + # 運動需求匹配(降低2%) - noise_score * 0.18 + # 噪音控制(提升3%) - family_score * 0.12 + # 家庭相容性(提升2%) - size_score * 0.08 + # 體型匹配(降低2%) - min(0.9, base_similarity + 0.1) * 0.04 # 護理需求(新增獨立權重) - ) - - # 優化完美匹配獎勵機制 - 降低觸發門檻並增加層次 - perfect_match_bonus = 0.0 - if space_score >= 0.88 and exercise_score >= 0.88 and noise_score >= 0.85: - perfect_match_bonus = 0.08 # 卓越匹配獎勵 - elif space_score >= 0.82 and exercise_score >= 0.82 and noise_score >= 0.75: - perfect_match_bonus = 0.04 # 優秀匹配獎勵 - elif space_score >= 0.75 and exercise_score >= 0.75: - perfect_match_bonus = 0.02 # 良好匹配獎勵 - - # 結合語意相似度與維度匹配 - 調整為75%維度匹配 25%語義相似度 - base_combined_score = (weighted_score * 0.75 + base_similarity * 0.25) + perfect_match_bonus - - # 應用漸進式約束懲罰,但確保基礎分數保障 - raw_final_score = base_combined_score + constraint_penalty - - # 實施動態分數保障機制 - 提升至40-42%基礎分數 - # 根據品種特性動態調整基礎分數 - base_guaranteed_score = 0.42 # 提升基礎保障分數 - - # 特殊品種基礎分數調整 - high_adaptability_breeds = ['French_Bulldog', 'Pug', 'Golden_Retriever', 'Labrador_Retriever'] - if any(breed in breed for breed in high_adaptability_breeds): - base_guaranteed_score = 0.45 # 高適應性品種更高基礎分數 - - # 動態分數分佈優化 - if raw_final_score >= base_guaranteed_score: - # 對於高分品種,實施適度壓縮避免過度集中 - if raw_final_score > 0.85: - compression_factor = 0.92 # 輕度壓縮高分 - final_score = 0.85 + (raw_final_score - 0.85) * compression_factor - else: - final_score = raw_final_score - final_score = min(0.93, final_score) # 降低最高分數限制 - else: - # 對於低分品種,使用改進的保障機制 - normalized_raw_score = max(0.15, raw_final_score) - # 基礎保障75% + 實際計算25%,保持一定區分度 - final_score = base_guaranteed_score * 0.75 + normalized_raw_score * 0.25 - final_score = max(base_guaranteed_score, min(0.93, final_score)) - - lifestyle_bonus = max(0.0, weighted_score - base_similarity) - - return { - 'final_score': final_score, - 'weighted_score': weighted_score, - 'lifestyle_bonus': lifestyle_bonus, - 'dimension_scores': dimension_scores, - 'constraint_penalty': constraint_penalty - } - - except Exception as e: - print(f"Error in enhanced matching calculation for {breed}: {str(e)}") - return { - 'final_score': base_similarity, - 'weighted_score': base_similarity, - 'lifestyle_bonus': 0.0, - 'dimension_scores': { - 'space': base_similarity * 0.9, - 'exercise': base_similarity * 0.85, - 'grooming': base_similarity * 0.8, - 'experience': base_similarity * 0.75, - 'noise': base_similarity * 0.7, - 'family': base_similarity * 0.65 - }, - 'constraint_penalty': 0.0 - } - -def _analyze_space_requirements(user_desc: str) -> dict: - """分析空間需求 - 增強中等活動量識別""" - requirements = {'type': 'unknown', 'size': 'medium', 'importance': 0.5} - - if any(word in user_desc for word in ['apartment', 'small apartment', 'small space', 'condo', 'flat']): - requirements['type'] = 'apartment' - requirements['size'] = 'small' - requirements['importance'] = 0.95 # 提高重要性 - elif any(word in user_desc for word in ['medium-sized house', 'medium house', 'townhouse']): - requirements['type'] = 'medium_house' - requirements['size'] = 'medium' - requirements['importance'] = 0.8 # 中等活動量用戶的特殊標記 - elif any(word in user_desc for word in ['large house', 'big house', 'yard', 'garden', 'large space', 'backyard']): - requirements['type'] = 'house' - requirements['size'] = 'large' - requirements['importance'] = 0.7 - - return requirements - -def _analyze_exercise_requirements(user_desc: str) -> dict: - """分析運動需求 - 增強中等活動量識別""" - requirements = {'level': 'moderate', 'importance': 0.5} - - # 低運動量識別 - if any(word in user_desc for word in ["don't exercise", "don't exercise much", "low exercise", "minimal", "lazy", "not active"]): - requirements['level'] = 'low' - requirements['importance'] = 0.95 - # 中等運動量的精確識別 - elif any(phrase in user_desc for phrase in ['30 minutes', 'half hour', 'moderate', 'balanced', 'walk about']): - if 'walk' in user_desc or 'daily' in user_desc: - requirements['level'] = 'moderate' - requirements['importance'] = 0.85 # 中等活動量的特殊標記 - # 高運動量識別 - elif any(word in user_desc for word in ['active', 'hiking', 'outdoor activities', 'running', 'outdoors', 'love hiking']): - requirements['level'] = 'high' - requirements['importance'] = 0.9 - - return requirements - -def _analyze_noise_requirements(user_desc: str) -> dict: - """分析噪音需求""" - requirements = {'tolerance': 'medium', 'importance': 0.5} - - if any(word in user_desc for word in ['quiet', 'no bark', "won't bark", "doesn't bark", 'silent', 'peaceful']): - requirements['tolerance'] = 'low' - requirements['importance'] = 0.9 - elif any(word in user_desc for word in ['loud', 'barking ok', 'noise ok']): - requirements['tolerance'] = 'high' - requirements['importance'] = 0.7 - - return requirements - -def _analyze_size_requirements(user_desc: str) -> dict: - """分析體型需求""" - requirements = {'preferred': 'any', 'importance': 0.5} - - if any(word in user_desc for word in ['small', 'tiny', 'little', 'lap dog', 'compact']): - requirements['preferred'] = 'small' - requirements['importance'] = 0.8 - elif any(word in user_desc for word in ['large', 'big', 'giant']): - requirements['preferred'] = 'large' - requirements['importance'] = 0.8 - - return requirements - -def _analyze_family_requirements(user_desc: str) -> dict: - """分析家庭需求""" - requirements = {'children': False, 'importance': 0.3} - - if any(word in user_desc for word in ['children', 'kids', 'family', 'child']): - requirements['children'] = True - requirements['importance'] = 0.8 - - return requirements - -def _calculate_space_compatibility(space_req: dict, breed_size: str, breed_exercise: str) -> float: - """計算空間相容性分數 - 增強中等活動量處理""" - if space_req['type'] == 'apartment': - if 'small' in breed_size or 'toy' in breed_size: - base_score = 0.95 - elif 'medium' in breed_size: - if 'low' in breed_exercise: - base_score = 0.75 - else: - base_score = 0.45 # 降低中型犬在公寓的分數 - elif 'large' in breed_size: - base_score = 0.05 # 大型犬極度不適合公寓 - elif 'giant' in breed_size: - base_score = 0.01 # 超大型犬完全不適合公寓 - else: - base_score = 0.7 - elif space_req['type'] == 'medium_house': - # 中型房屋的特殊處理 - 適合中等活動量用戶 - if 'small' in breed_size or 'toy' in breed_size: - base_score = 0.9 - elif 'medium' in breed_size: - base_score = 0.95 # 中型犬在中型房屋很適合 - elif 'large' in breed_size: - if 'moderate' in breed_exercise or 'low' in breed_exercise: - base_score = 0.8 # 低運動量大型犬還可以 - else: - base_score = 0.6 # 高運動量大型犬不太適合 - elif 'giant' in breed_size: - base_score = 0.3 # 超大型犬在中型房屋不太適合 - else: - base_score = 0.85 - else: - # 大型房屋的情況 - if 'small' in breed_size or 'toy' in breed_size: - base_score = 0.85 - elif 'medium' in breed_size: - base_score = 0.9 - elif 'large' in breed_size or 'giant' in breed_size: - base_score = 0.95 - else: - base_score = 0.8 - - return min(0.95, base_score) - -def _calculate_exercise_compatibility(exercise_req: dict, breed_exercise: str) -> float: - """計算運動需求相容性分數 - 增強中等活動量處理""" - if exercise_req['level'] == 'low': - if 'low' in breed_exercise or 'minimal' in breed_exercise: - return 0.95 - elif 'moderate' in breed_exercise: - return 0.5 # 降低不匹配分數 - elif 'high' in breed_exercise: - return 0.1 # 進一步降低高運動需求的匹配 - else: - return 0.7 - elif exercise_req['level'] == 'high': - if 'high' in breed_exercise: - return 0.95 - elif 'moderate' in breed_exercise: - return 0.8 - elif 'low' in breed_exercise: - return 0.6 - else: - return 0.7 - else: # moderate - 中等活動量的精確處理 - if 'moderate' in breed_exercise: - return 0.95 # 完美匹配 - elif 'low' in breed_exercise: - return 0.85 # 低運動需求的品種對中等活動量用戶也不錯 - elif 'high' in breed_exercise: - return 0.5 # 中等活動量用戶不太適合高運動需求品種 - else: - return 0.75 - - return 0.6 - -def _calculate_noise_compatibility(noise_req: dict, breed_noise: str) -> float: - """計算噪音相容性分數,更好處理複合等級""" - breed_noise_lower = breed_noise.lower() - - if noise_req['tolerance'] == 'low': - if 'low' in breed_noise_lower and 'moderate' not in breed_noise_lower: - return 0.95 # 純低噪音 - elif 'low-moderate' in breed_noise_lower or 'low to moderate' in breed_noise_lower: - return 0.8 # 低到中等噪音,還可接受 - elif breed_noise_lower in ['moderate']: - return 0.4 # 中等噪音有些問題 - elif 'high' in breed_noise_lower: - return 0.1 # 高噪音不適合 - else: - return 0.6 # 未知噪音水平,保守估計 - elif noise_req['tolerance'] == 'high': - if 'high' in breed_noise_lower: - return 0.9 - elif 'moderate' in breed_noise_lower: - return 0.85 - elif 'low' in breed_noise_lower: - return 0.8 # 安靜犬對高容忍度的人也很好 - else: - return 0.8 - else: # moderate tolerance - if 'moderate' in breed_noise_lower: - return 0.9 - elif 'low' in breed_noise_lower: - return 0.85 - elif 'high' in breed_noise_lower: - return 0.6 - else: - return 0.75 - - return 0.7 - -def _calculate_size_compatibility(size_req: dict, breed_size: str) -> float: - """計算體型相容性分數""" - if size_req['preferred'] == 'small': - if any(word in breed_size for word in ['small', 'toy', 'tiny']): - return 0.9 - elif 'medium' in breed_size: - return 0.6 - else: - return 0.3 - elif size_req['preferred'] == 'large': - if any(word in breed_size for word in ['large', 'giant']): - return 0.9 - elif 'medium' in breed_size: - return 0.7 - else: - return 0.4 - - return 0.7 # 無特別偏好 - -def _calculate_family_compatibility(family_req: dict, good_with_children: str, temperament: str) -> float: - """計算家庭相容性分數""" - if family_req['children']: - if 'yes' in good_with_children.lower(): - return 0.9 - elif any(word in temperament for word in ['gentle', 'patient', 'friendly']): - return 0.8 - elif 'no' in good_with_children.lower(): - return 0.2 - else: - return 0.6 - - return 0.7 - -def _apply_hard_constraints_enhanced(user_desc: str, breed_info: dict) -> float: - """應用品種特性感知的動態懲罰機制""" - penalty = 0.0 - - # 建立懲罰衰減係數和補償機制 - penalty_decay_factor = 0.7 - breed_adaptability_bonus = 0.0 - breed_size = breed_info.get('Size', '').lower() - breed_exercise = breed_info.get('Exercise Needs', '').lower() - breed_name = breed_info.get('Breed', '').replace(' ', '_') - - # 公寓空間約束 - 品種特性感知懲罰機制 - if 'apartment' in user_desc or 'small apartment' in user_desc: - if 'giant' in breed_size: - base_penalty = -0.35 # 減少基礎懲罰 - # 特定品種適應性補償 - adaptable_giants = ['Mastiff', 'Great Dane'] # 相對安靜的巨型犬 - if any(adapt_breed in breed_name for adapt_breed in adaptable_giants): - breed_adaptability_bonus += 0.08 - penalty += base_penalty * penalty_decay_factor - elif 'large' in breed_size: - base_penalty = -0.25 # 減少大型犬懲罰 - # 適合公寓的大型犬補償 - apartment_friendly_large = ['Greyhound', 'Great_Dane'] - if any(apt_breed in breed_name for apt_breed in apartment_friendly_large): - breed_adaptability_bonus += 0.06 - penalty += base_penalty * penalty_decay_factor - elif 'medium' in breed_size and 'high' in breed_exercise: - penalty += -0.15 * penalty_decay_factor # 進一步減少懲罰 - - # 運動需求不匹配 - 品種特性感���懲罰機制 - if any(phrase in user_desc for phrase in ["don't exercise", "not active", "low exercise", "don't exercise much"]): - if 'high' in breed_exercise: - base_penalty = -0.28 # 減少基礎懲罰 - # 低維護高運動犬種補償 - adaptable_high_energy = ['Greyhound', 'Whippet'] # 運動爆發型,平時安靜 - if any(adapt_breed in breed_name for adapt_breed in adaptable_high_energy): - breed_adaptability_bonus += 0.10 - penalty += base_penalty * penalty_decay_factor - elif 'moderate' in breed_exercise: - penalty += -0.08 * penalty_decay_factor # 進一步減少懲罰 - - # 噪音控制需求不匹配 - 品種特性感知懲罰機制 - if any(phrase in user_desc for phrase in ['quiet', "won't bark", "doesn't bark", "silent"]): - breed_noise = breed_noise_info.get(breed_name, {}).get('noise_level', 'moderate').lower() - if 'high' in breed_noise: - base_penalty = -0.18 # 減少基礎懲罰 - # 訓練性良好的高噪音品種補償 - trainable_vocal_breeds = ['German_Shepherd', 'Golden_Retriever'] - if any(train_breed in breed_name for train_breed in trainable_vocal_breeds): - breed_adaptability_bonus += 0.05 - penalty += base_penalty * penalty_decay_factor - elif 'moderate' in breed_noise and 'low' not in breed_noise: - penalty += -0.05 * penalty_decay_factor - - # 體型偏好不匹配 - 漸進式懲罰 - if any(phrase in user_desc for phrase in ['small', 'tiny', 'little']): - if 'giant' in breed_size: - penalty -= 0.35 # 超大型犬懲罰 - elif 'large' in breed_size: - penalty -= 0.20 # 大型犬懲罰 - - # 中等活動量用戶的特殊約束處理 - 漸進式懲罰 - moderate_activity_terms = ['30 minutes', 'half hour', 'moderate', 'balanced', 'medium-sized house'] - if any(term in user_desc for term in moderate_activity_terms): - # 超大型犬對中等活動量用戶的適度懲罰 - giant_breeds = ['Saint Bernard', 'Tibetan Mastiff', 'Great Dane', 'Mastiff', 'Newfoundland'] - if any(giant in breed_name for giant in giant_breeds) or 'giant' in breed_size: - penalty -= 0.35 # 適度懲罰,不完全排除 - - # 中型房屋 + 超大型犬的額外考量 - if 'medium-sized house' in user_desc and any(giant in breed_name for giant in giant_breeds): - if not any(high_activity in user_desc for high_activity in ['hiking', 'running', 'active', 'outdoor activities']): - penalty -= 0.15 # 輕度額外懲罰 - - # 30分鐘散步對極高運動需求品種的懲罰 - if any(term in user_desc for term in ['30 minutes', 'half hour']) and 'walk' in user_desc: - high_energy_breeds = ['Siberian Husky', 'Border Collie', 'Jack Russell Terrier', 'Weimaraner'] - if any(he_breed in breed_name for he_breed in high_energy_breeds) and 'high' in breed_exercise: - penalty -= 0.25 # 適度懲罰極高運動需求品種 - - # 添加特殊品種適應性補償機制 - # 對於邊界適配品種,給予適度補償 - boundary_adaptable_breeds = { - 'Italian_Greyhound': 0.08, # 安靜、低維護的小型犬 - 'Boston_Bull': 0.06, # 適應性強的小型犬 - 'Havanese': 0.05, # 友好適應的小型犬 - 'Silky_terrier': 0.04, # 安靜的玩具犬 - 'Basset': 0.07 # 低能量但友好的中型犬 - } - - if breed_name in boundary_adaptable_breeds: - breed_adaptability_bonus += boundary_adaptable_breeds[breed_name] - - # 應用品種適應性補償並設置懲罰上限 - final_penalty = penalty + breed_adaptability_bonus - # 限制最大懲罰,避免單一約束主導評分 - final_penalty = max(-0.4, final_penalty) - - return final_penalty - -def _get_basic_text_matching_recommendations(user_description: str, top_k: int = 15) -> List[Dict[str, Any]]: +def _get_basic_text_matching_recommendations(user_description: str, top_k: int = 15, recommender=None) -> List[Dict[str, Any]]: """基本文字匹配推薦(SBERT 不可用時的後備方案)""" try: print("Using basic text matching as fallback...") + # 如果沒有提供 recommender,創建一個新的 + if recommender is None: + recommender = SemanticBreedRecommender() + # 基本關鍵字匹配 keywords = user_description.lower().split() breed_scores = [] - # 從數據庫獲取品種清單 + # 從數據庫獲取品種清單或使用預設清單 try: conn = sqlite3.connect('animal_detector.db') cursor = conn.cursor() @@ -2180,13 +717,15 @@ def _get_basic_text_matching_recommendations(user_description: str, top_k: int = basic_breeds = [row[0] for row in cursor.fetchall()] cursor.close() conn.close() + # 過濾掉野生動物品種 + basic_breeds = [breed for breed in basic_breeds if breed != 'Dhole'] except Exception as e: print(f"Could not load breed list from database: {str(e)}") # 後備品種清單 basic_breeds = [ 'Labrador_Retriever', 'Golden_Retriever', 'German_Shepherd', 'French_Bulldog', 'Border_Collie', 'Poodle', 'Beagle', 'Rottweiler', 'Yorkshire_Terrier', - 'Dachshund', 'Boxer', 'Siberian_Husky', 'Great_Dane', 'Pomeranian', 'Shih-Tzu', + 'Dachshund', 'Boxer', 'Siberian_Husky', 'Great_Dane', 'Pomeranian', 'Shih_Tzu', 'Maltese_Dog', 'Chihuahua', 'Cavalier_King_Charles_Spaniel', 'Boston_Terrier', 'Japanese_Spaniel', 'Toy_Terrier', 'Affenpinscher', 'Pekingese', 'Lhasa' ] @@ -2200,7 +739,7 @@ def _get_basic_text_matching_recommendations(user_description: str, top_k: int = base_score = min(0.95, 0.3 + (matches / len(keywords)) * 0.6) # 應用增強匹配邏輯 - enhanced_score = _calculate_enhanced_matching_score( + enhanced_score = recommender.score_calculator.calculate_enhanced_matching_score( breed, breed_info, user_description, base_score ) @@ -2243,4 +782,4 @@ def _get_basic_text_matching_recommendations(user_description: str, top_k: int = except Exception as e: error_msg = f"Error in basic text matching: {str(e)}" print(f"ERROR: {error_msg}") - raise RuntimeError(error_msg) from e \ No newline at end of file + raise RuntimeError(error_msg) from e