File size: 18,417 Bytes
595e0a5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
import random
import hashlib
import numpy as np
import sqlite3
import re
import traceback
from typing import List, Dict, Tuple, Optional, Any
from dataclasses import dataclass
from sentence_transformers import SentenceTransformer
import torch
from sklearn.metrics.pairwise import cosine_similarity
from dog_database import get_dog_description
from breed_health_info import breed_health_info
from breed_noise_info import breed_noise_info

@dataclass
class BreedDescriptionVector:
    """品種描述向量的資料結構"""
    breed_name: str
    description_text: str
    embedding: np.ndarray
    characteristics: Dict[str, Any]

class SemanticVectorManager:
    """
    語義向量管理器
    處理 SBERT 模型初始化、品種向量化建構和品種描述生成
    """

    def __init__(self):
        """初始化語義向量管理器"""
        self.model_name = 'all-MiniLM-L6-v2'  
        self.sbert_model = None
        self._sbert_loading_attempted = False
        self.breed_vectors = {}
        self.breed_list = self._get_breed_list()
        # 延遲SBERT模型載入直到需要時才在GPU環境中進行
        print("SemanticVectorManager initialized (SBERT loading deferred)")

    def _get_breed_list(self) -> List[str]:
        """從資料庫獲取品種清單"""
        try:
            conn = sqlite3.connect('animal_detector.db')
            cursor = conn.cursor()
            cursor.execute("SELECT DISTINCT Breed FROM AnimalCatalog")
            breeds = [row[0] for row in cursor.fetchall()]
            cursor.close()
            conn.close()
            # 過濾掉野生動物品種
            breeds = [breed for breed in breeds if breed != 'Dhole']
            return breeds
        except Exception as e:
            print(f"Error getting breed list: {str(e)}")
            return ['Labrador_Retriever', 'German_Shepherd', 'Golden_Retriever',
                   'Bulldog', 'Poodle', 'Beagle', 'Rottweiler', 'Yorkshire_Terrier']

    def _initialize_model(self):
        """初始化 SBERT 模型,包含容錯機制 - 設計用於ZeroGPU相容性"""
        if self.sbert_model is not None or self._sbert_loading_attempted:
            return self.sbert_model
            
        try:
            print("Loading SBERT model in GPU context...")
            # 如果主要模型失敗,嘗試不同的模型名稱
            model_options = ['all-MiniLM-L6-v2', 'all-mpnet-base-v2', 'all-MiniLM-L12-v2']

            for model_name in model_options:
                try:
                    # 明確指定設備以處理ZeroGPU環境
                    import torch
                    device = 'cuda' if torch.cuda.is_available() else 'cpu'
                    self.sbert_model = SentenceTransformer(model_name, device=device)
                    self.model_name = model_name
                    print(f"SBERT model {model_name} loaded successfully on {device}")
                    return self.sbert_model
                except Exception as model_e:
                    print(f"Failed to load {model_name}: {str(model_e)}")
                    continue

            # 如果所有模型都失敗
            print("All SBERT models failed to load. Using basic text matching fallback.")
            self.sbert_model = None
            return None

        except Exception as e:
            print(f"Failed to initialize any SBERT model: {str(e)}")
            print(traceback.format_exc())
            print("Will provide basic text-based recommendations without embeddings")
            self.sbert_model = None
            return None
        finally:
            self._sbert_loading_attempted = True

    def _create_breed_description(self, breed: str) -> str:
        """為品種創建包含所有關鍵特徵的全面自然語言描述"""
        try:
            # 獲取所有信息來源
            breed_info = get_dog_description(breed) or {}
            health_info = breed_health_info.get(breed, {}) if breed_health_info else {}
            noise_info = breed_noise_info.get(breed, {}) if breed_noise_info else {}

            breed_display_name = breed.replace('_', ' ')
            description_parts = []

            # 1. 基本尺寸和身體特徵
            size = breed_info.get('Size', 'medium').lower()
            description_parts.append(f"{breed_display_name} is a {size} sized dog breed")

            # 2. 氣質和個性(匹配的關鍵因素)
            temperament = breed_info.get('Temperament', '')
            if temperament:
                description_parts.append(f"with a {temperament.lower()} temperament")

            # 3. 運動和活動水平(公寓居住的關鍵因素)
            exercise_needs = breed_info.get('Exercise Needs', 'moderate').lower()
            if 'high' in exercise_needs or 'very high' in exercise_needs:
                description_parts.append("requiring high daily exercise and mental stimulation")
            elif 'low' in exercise_needs or 'minimal' in exercise_needs:
                description_parts.append("with minimal exercise requirements, suitable for apartment living")
            else:
                description_parts.append("with moderate exercise needs")

            # 4. 噪音特徵(安靜需求的關鍵因素)
            noise_level = noise_info.get('noise_level', 'moderate').lower()
            if 'low' in noise_level or 'quiet' in noise_level:
                description_parts.append("known for being quiet and rarely barking")
            elif 'high' in noise_level or 'loud' in noise_level:
                description_parts.append("tends to be vocal and bark frequently")
            else:
                description_parts.append("with moderate barking tendencies")

            # 5. 居住空間相容性
            if size in ['small', 'tiny']:
                description_parts.append("excellent for small apartments and limited spaces")
            elif size in ['large', 'giant']:
                description_parts.append("requiring large living spaces and preferably a yard")
            else:
                description_parts.append("adaptable to various living situations")

            # 6. 美容和維護
            grooming_needs = breed_info.get('Grooming Needs', 'moderate').lower()
            if 'high' in grooming_needs:
                description_parts.append("requiring regular professional grooming")
            elif 'low' in grooming_needs:
                description_parts.append("with minimal grooming requirements")
            else:
                description_parts.append("with moderate grooming needs")

            # 7. 家庭相容性
            good_with_children = breed_info.get('Good with Children', 'Yes')
            if good_with_children == 'Yes':
                description_parts.append("excellent with children and families")
            else:
                description_parts.append("better suited for adult households")

            # 8. 智力和可訓練性(從資料庫描述中提取)
            intelligence_keywords = []
            description_text = breed_info.get('Description', '').lower()

            if description_text:
                # 從描述中提取智力指標
                if any(word in description_text for word in ['intelligent', 'smart', 'clever', 'quick to learn']):
                    intelligence_keywords.extend(['highly intelligent', 'trainable', 'quick learner'])
                elif any(word in description_text for word in ['stubborn', 'independent', 'difficult to train']):
                    intelligence_keywords.extend(['independent minded', 'requires patience', 'challenging to train'])
                else:
                    intelligence_keywords.extend(['moderate intelligence', 'trainable with consistency'])

                # 從描述中提取工作/用途特徵
                if any(word in description_text for word in ['working', 'herding', 'guard', 'hunting']):
                    intelligence_keywords.extend(['working breed', 'purpose-driven', 'task-oriented'])
                elif any(word in description_text for word in ['companion', 'lap', 'toy', 'decorative']):
                    intelligence_keywords.extend(['companion breed', 'affectionate', 'people-focused'])

                # 添加智力背景到描述中
                if intelligence_keywords:
                    description_parts.append(f"characterized as {', '.join(intelligence_keywords[:2])}")

            # 9. 特殊特徵和用途(使用資料庫挖掘進行增強)
            if breed_info.get('Description'):
                desc = breed_info.get('Description', '')[:150]  # 增加到 150 字元以提供更多背景
                if desc:
                    # 從描述中提取關鍵特徵以便更好的語義匹配
                    desc_lower = desc.lower()
                    key_traits = []

                    # 從描述中提取關鍵行為特徵
                    if 'friendly' in desc_lower:
                        key_traits.append('friendly')
                    if 'gentle' in desc_lower:
                        key_traits.append('gentle')
                    if 'energetic' in desc_lower or 'active' in desc_lower:
                        key_traits.append('energetic')
                    if 'calm' in desc_lower or 'peaceful' in desc_lower:
                        key_traits.append('calm')
                    if 'protective' in desc_lower or 'guard' in desc_lower:
                        key_traits.append('protective')

                    trait_text = f" and {', '.join(key_traits)}" if key_traits else ""
                    description_parts.append(f"Known for: {desc.lower()}{trait_text}")

            # 10. 照護水平需求
            try:
                care_level = breed_info.get('Care Level', 'moderate')
                if isinstance(care_level, str):
                    description_parts.append(f"requiring {care_level.lower()} overall care level")
                else:
                    description_parts.append("requiring moderate overall care level")
            except Exception as e:
                print(f"Error processing care level for {breed}: {str(e)}")
                description_parts.append("requiring moderate overall care level")

            # 11. 壽命資訊
            try:
                lifespan = breed_info.get('Lifespan', '10-12 years')
                if lifespan and isinstance(lifespan, str) and lifespan.strip():
                    description_parts.append(f"with a typical lifespan of {lifespan}")
                else:
                    description_parts.append("with a typical lifespan of 10-12 years")
            except Exception as e:
                print(f"Error processing lifespan for {breed}: {str(e)}")
                description_parts.append("with a typical lifespan of 10-12 years")

            # 創建全面的描述
            full_description = '. '.join(description_parts) + '.'

            # 添加全面的關鍵字以便更好的語義匹配
            keywords = []

            # 基本品種名稱關鍵字
            keywords.extend([word.lower() for word in breed_display_name.split()])

            # 氣質關鍵字
            if temperament:
                keywords.extend([word.lower().strip(',') for word in temperament.split()])

            # 基於尺寸的關鍵字
            if 'small' in size or 'tiny' in size:
                keywords.extend(['small', 'tiny', 'compact', 'little', 'apartment', 'indoor', 'lap'])
            elif 'large' in size or 'giant' in size:
                keywords.extend(['large', 'big', 'giant', 'huge', 'yard', 'space', 'outdoor'])
            else:
                keywords.extend(['medium', 'moderate', 'average', 'balanced'])

            # 活動水平關鍵字
            exercise_needs = breed_info.get('Exercise Needs', 'moderate').lower()
            if 'high' in exercise_needs:
                keywords.extend(['active', 'energetic', 'exercise', 'outdoor', 'hiking', 'running', 'athletic'])
            elif 'low' in exercise_needs:
                keywords.extend(['calm', 'low-energy', 'indoor', 'relaxed', 'couch', 'sedentary'])
            else:
                keywords.extend(['moderate', 'balanced', 'walks', 'regular'])

            # 噪音水平關鍵字
            noise_level = noise_info.get('noise_level', 'moderate').lower()
            if 'quiet' in noise_level or 'low' in noise_level:
                keywords.extend(['quiet', 'silent', 'calm', 'peaceful', 'low-noise'])
            elif 'high' in noise_level or 'loud' in noise_level:
                keywords.extend(['vocal', 'barking', 'loud', 'alert', 'watchdog'])

            # 居住情況關鍵字
            if size in ['small', 'tiny'] and 'low' in exercise_needs:
                keywords.extend(['apartment', 'city', 'urban', 'small-space'])
            if size in ['large', 'giant'] or 'high' in exercise_needs:
                keywords.extend(['house', 'yard', 'suburban', 'rural', 'space'])

            # 家庭關鍵字
            good_with_children = breed_info.get('Good with Children', 'Yes')
            if good_with_children == 'Yes':
                keywords.extend(['family', 'children', 'kids', 'friendly', 'gentle'])

            # 智力和可訓練性關鍵字(從資料庫描述挖掘)
            if intelligence_keywords:
                keywords.extend([word.lower() for phrase in intelligence_keywords for word in phrase.split()])

            # 美容相關關鍵字(增強)
            grooming_needs = breed_info.get('Grooming Needs', 'moderate').lower()
            if 'high' in grooming_needs:
                keywords.extend(['high-maintenance', 'professional-grooming', 'daily-brushing', 'coat-care'])
            elif 'low' in grooming_needs:
                keywords.extend(['low-maintenance', 'minimal-grooming', 'easy-care', 'wash-and-go'])
            else:
                keywords.extend(['moderate-grooming', 'weekly-brushing', 'regular-care'])

            # 基於壽命的關鍵字
            lifespan = breed_info.get('Lifespan', '10-12 years')
            if lifespan and isinstance(lifespan, str):
                try:
                    # 從壽命字符串中提取年數(例如 "10-12 years" 或 "12-15 years")
                    import re
                    years = re.findall(r'\d+', lifespan)
                    if years:
                        avg_years = sum(int(y) for y in years) / len(years)
                        if avg_years >= 14:
                            keywords.extend(['long-lived', 'longevity', 'durable', 'healthy-lifespan'])
                        elif avg_years <= 8:
                            keywords.extend(['shorter-lifespan', 'health-considerations', 'special-care'])
                        else:
                            keywords.extend(['average-lifespan', 'moderate-longevity'])
                except:
                    keywords.extend(['average-lifespan'])

            # 將關鍵字添加到描述中以便更好的語義匹配
            unique_keywords = list(set(keywords))
            keyword_text = ' '.join(unique_keywords)
            full_description += f" Additional context: {keyword_text}"

            return full_description

        except Exception as e:
            print(f"Error creating description for {breed}: {str(e)}")
            return f"{breed.replace('_', ' ')} is a dog breed with unique characteristics."

    def _build_breed_vectors(self):
        """為所有品種建立向量表示 - 延遲調用當需要時"""
        try:
            print("Building breed vector database...")

            # 初始化模型如果尚未完成
            if self.sbert_model is None:
                self._initialize_model()
                
            # 如果模型不可用則跳過
            if self.sbert_model is None:
                print("SBERT model not available, skipping vector building")
                return

            for breed in self.breed_list:
                description = self._create_breed_description(breed)

                # 生成嵌入向量
                embedding = self.sbert_model.encode(description, convert_to_tensor=False)

                # 獲取品種特徵
                breed_info = get_dog_description(breed)
                characteristics = {
                    'size': breed_info.get('Size', 'Medium') if breed_info else 'Medium',
                    'exercise_needs': breed_info.get('Exercise Needs', 'Moderate') if breed_info else 'Moderate',
                    'grooming_needs': breed_info.get('Grooming Needs', 'Moderate') if breed_info else 'Moderate',
                    'good_with_children': breed_info.get('Good with Children', 'Yes') if breed_info else 'Yes',
                    'temperament': breed_info.get('Temperament', '') if breed_info else ''
                }

                self.breed_vectors[breed] = BreedDescriptionVector(
                    breed_name=breed,
                    description_text=description,
                    embedding=embedding,
                    characteristics=characteristics
                )

            print(f"Successfully built {len(self.breed_vectors)} breed vectors")

        except Exception as e:
            print(f"Error building breed vectors: {str(e)}")
            print(traceback.format_exc())
            raise

    def get_breed_vectors(self) -> Dict[str, BreedDescriptionVector]:
        """獲取所有品種向量"""
        # 確保向量已建構
        if not self.breed_vectors:
            self._build_breed_vectors()
        return self.breed_vectors

    def get_sbert_model(self) -> Optional[SentenceTransformer]:
        """獲取 SBERT 模型"""
        return self.sbert_model

    def get_breed_list(self) -> List[str]:
        """獲取品種清單"""
        return self.breed_list

    def is_model_available(self) -> bool:
        """檢查 SBERT 模型是否可用"""
        return self.sbert_model is not None

    def encode_text(self, text: str) -> np.ndarray:
        """使用 SBERT 模型編碼文本"""
        # 初始化模型如果尚未完成
        if self.sbert_model is None:
            self._initialize_model()
            
        if self.sbert_model is None:
            raise RuntimeError("SBERT model not available")
        return self.sbert_model.encode(text, convert_to_tensor=False)