File size: 34,507 Bytes
b9333d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
627a9fc
b9333d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cdee10f
 
 
b9333d0
cdee10f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3483e5e
 
 
 
5f8f5f0
 
fd37d3e
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
import re
import json
import requests
import traceback
import time
import os
from typing import Dict, Any, List, Optional
from datetime import datetime, timedelta

# Updated imports for pydantic
from pydantic import BaseModel, Field

# Updated imports for LangChain
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_ollama import OllamaLLM
from langchain.chains import LLMChain
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

# Enhanced HuggingFace imports for improved functionality
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import numpy as np

# Import endpoints documentation
from endpoints_documentation import endpoints_documentation

# Set environment variables for HuggingFace
# os.environ["HF_HOME"] = "/tmp/huggingface"
os.environ["HF_HOME"] = "/tmp/huggingface"
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"

class EndpointRequest(BaseModel):
    """Data model for API endpoint requests"""
    endpoint: str = Field(..., description="The API endpoint path to call")
    method: str = Field(..., description="The HTTP method to use (GET or POST)")
    params: Dict[str, Any] = Field(default_factory=dict, description="Parameters for the API call")
    missing_required: List[str] = Field(default_factory=list, description="Any required parameters that are missing")


class AIAgent:
    def __init__(self):
        self.endpoints_documentation = endpoints_documentation
        self.ollama_base_url = "http://localhost:11434"  # Default Ollama URL
        self.model_name = "mistral"  # Using mistral model for better multilingual support
        # self.model_name = 'llama3'
        self.BASE_URL = 'https://agent.serveo.net'
        self.headers = {
            'Content-type': 'application/json'
        }
        self.user_id = '3b0b698d-ae49-4ba3-b83b-2b51fce7331d'
        self.max_retries = 3
        self.retry_delay = 2  # seconds
        
        # Enhanced language detection using HuggingFace models
        self._initialize_language_tools()
        
        # Initialize LangChain components
        self._initialize_llm()
        self._initialize_parsers_and_chains()
        
        # Add date parsing capabilities
        self._initialize_date_parser()

    def _initialize_language_tools(self):
        """Initialize more sophisticated language processing tools"""
        # Use multilingual embeddings for semantic understanding
        self.embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")
        
        # Initialize language identification model
        try:
            self.language_classifier = pipeline(
                "text-classification", 
                model="papluca/xlm-roberta-base-language-detection",
                top_k=1
            )
            print("Language classification model loaded successfully")
        except Exception as e:
            print(f"Failed to load language classification model: {e}")
            # Fallback to basic regex detection if model fails to load
            self.language_classifier = None
            
        # Add sentiment analysis for enhanced response generation
        try:
            self.sentiment_analyzer = pipeline(
                "sentiment-analysis",
                model="cardiffnlp/twitter-xlm-roberta-base-sentiment"
            )
            print("Sentiment analysis model loaded successfully")
        except Exception as e:
            print(f"Failed to load sentiment analysis model: {e}")
            self.sentiment_analyzer = None

    def _initialize_date_parser(self):
        """Initialize date parsing model for handling relative date expressions"""
        try:
            self.date_parser = pipeline(
                "token-classification",
                model="Jean-Baptiste/roberta-large-ner-english",
                aggregation_strategy="simple"
            )
            print("Date parsing model loaded successfully")
        except Exception as e:
            print(f"Failed to load date parsing model: {e}")
            self.date_parser = None

    def detect_language(self, text):
        """
        Enhanced language detection using HuggingFace models
        """
        # First try using the HuggingFace language classification model if available
        if self.language_classifier and len(text.strip()) > 3:
            try:
                result = self.language_classifier(text)
                detected_lang = result[0][0]['label']
                confidence = result[0][0]['score']
                
                print(f"Language detected: {detected_lang} with confidence {confidence:.4f}")
                
                # Map the detected language to our simplified language set
                if detected_lang in ['ar', 'arabic']:
                    return "arabic"
                elif detected_lang in ['en', 'english']:
                    return "english"
                elif confidence > 0.8:  # If confident but not English/Arabic
                    # We currently only support English/Arabic, but log other languages
                    print(f"Detected unsupported language: {detected_lang}")
                    # Default to English for other languages for now
                    return "english"
            except Exception as e:
                print(f"Error in language detection model: {e}")
                # Continue to fallback methods
        
        # Fallback: Basic detection of Arabic text using regex
        arabic_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF]+')
        if arabic_pattern.search(text):
            return "arabic"
        
        # Default to English
        return "english"
    
    def analyze_sentiment(self, text):
        """Analyze the sentiment of the input text"""
        if self.sentiment_analyzer and len(text.strip()) > 3:
            try:
                result = self.sentiment_analyzer(text)
                sentiment = result[0]['label']
                score = result[0]['score']
                return {
                    "sentiment": sentiment,
                    "score": score
                }
            except Exception as e:
                print(f"Error in sentiment analysis: {e}")
        
        # Default neutral sentiment if analysis fails
        return {"sentiment": "NEUTRAL", "score": 0.5}

    def extract_semantic_keywords(self, text, top_n=5):
        """Extract semantic keywords from text using embeddings"""
        try:
            # Simple keyword extraction using embeddings comparison
            # This is a basic implementation - could be enhanced further
            words = re.findall(r'\b\w+\b', text.lower())
            unique_words = list(set([w for w in words if len(w) > 3]))
            
            if not unique_words:
                return []
                
            # Get embeddings for all words
            embeddings_list = []
            for word in unique_words:
                try:
                    emb = self.embeddings.embed_query(word)
                    embeddings_list.append((word, emb))
                except Exception as e:
                    print(f"Error embedding word {word}: {e}")
            
            # Get embedding for full text
            text_embedding = self.embeddings.embed_query(text)
            
            # Calculate similarity to full text
            similarities = []
            for word, emb in embeddings_list:
                similarity = np.dot(emb, text_embedding) / (np.linalg.norm(emb) * np.linalg.norm(text_embedding))
                similarities.append((word, similarity))
            
            # Sort by similarity
            similarities.sort(key=lambda x: x[1], reverse=True)
            
            # Return top N keywords
            return [word for word, _ in similarities[:top_n]]
            
        except Exception as e:
            print(f"Error extracting keywords: {e}")
            return []

    def _initialize_llm(self):
        """Initialize the LLM with appropriate configuration"""
        # Set up the callback manager for streaming (optional)
        callbacks = [StreamingStdOutCallbackHandler()]
        
        # Initialize the Ollama LLM with updated parameters
        self.llm = OllamaLLM(
            model=self.model_name,
            base_url=self.ollama_base_url,
            callbacks=callbacks,
            temperature=0.7,
            num_ctx=8192,  # Increased context window
            top_p=0.9,
            request_timeout=60,  # Timeout in seconds
        )

    def _initialize_parsers_and_chains(self):
        """Initialize output parsers and LLM chains"""
        # Setup JSON parser for structured output
        self.json_parser = JsonOutputParser(pydantic_object=EndpointRequest)
        
        # Create multilingual router prompt template with enhanced context
        self.router_prompt_template = PromptTemplate(
            template="""
                You are a precise API routing assistant. Your job is to analyze user queries and select the correct API endpoint with proper parameters.

                === ENDPOINT DOCUMENTATION ===
                {endpoints_documentation}

                === USER REQUEST ANALYSIS ===
                User Query: {user_query}
                Language: {detected_language}
                Keywords: {extracted_keywords}
                Sentiment: {sentiment_analysis}

                === ROUTING PROCESS ===
                Follow these steps in order:

                STEP 1: INTENT ANALYSIS
                - What is the user trying to accomplish?
                - What type of operation are they requesting? (create, read, update, delete, search, etc.)
                - What entity/resource are they working with?

                STEP 2: ENDPOINT MATCHING
                - Review each endpoint in the documentation
                - Match the user's intent to the endpoint's PURPOSE/DESCRIPTION
                - Consider the HTTP method (GET for retrieval, POST for creation, etc.)
                - Verify the endpoint can handle the user's specific request

                STEP 3: PARAMETER EXTRACTION
                - Identify ALL required parameters from the endpoint documentation
                - Extract parameter values from the user query
                - Convert data types as needed (dates to ISO 8601, numbers to integers, etc.)
                - Set appropriate defaults for optional parameters if beneficial

                STEP 4: VALIDATION
                - Ensure ALL required parameters are provided or identified as missing
                - Verify parameter formats match documentation requirements
                - Check that the selected endpoint actually solves the user's problem

                === RESPONSE FORMAT ===
                Provide your analysis and decision in this exact JSON structure:

                {{
                    "reasoning": {{
                        "user_intent": "Brief description of what the user wants to accomplish",
                        "selected_endpoint": "Why this endpoint was chosen over others",
                        "parameter_mapping": "How user query maps to endpoint parameters"
                    }},
                    "endpoint": "/exact_endpoint_path_from_documentation",
                    "method": "HTTP_METHOD",
                    "params": {{
                        "required_param_1": "extracted_or_converted_value",
                        "required_param_2": "extracted_or_converted_value",
                        "optional_param": "value_if_applicable"
                    }},
                    "missing_required": ["list", "of", "missing", "required", "parameters"],
                    "confidence": 0.95
                }}

                === CRITICAL RULES ===
                1. ONLY select endpoints that exist in the provided documentation
                2. NEVER fabricate or assume endpoint parameters not in documentation
                3. ALL required parameters MUST be included or listed as missing
                4. Convert dates/times to ISO 8601 format (YYYY-MM-DDTHH:MM:SS)
                5. If patient_id is required and not provided, add it to missing_required
                6. Match endpoints by PURPOSE, not just keywords in the path
                7. If multiple endpoints could work, choose the most specific one
                8. If no endpoint matches, set endpoint to null and explain in reasoning

                === EXAMPLES OF GOOD MATCHING ===
                - User wants "patient records" → Use patient retrieval endpoint, not general search
                - User wants to "schedule appointment" → Use appointment creation endpoint
                - User asks "what appointments today" → Use appointment listing with date filter
                - User wants to "update medication" → Use medication update endpoint with patient_id

                Think step by step and be precise with your endpoint selection and parameter extraction.
                """,
                input_variables=["endpoints_documentation", "user_query", "detected_language", 
                                "extracted_keywords", "sentiment_analysis"],
                partial_variables={"format_instructions": self.json_parser.get_format_instructions()}
            )
        
        # # Create user-friendly response template with enhanced context awareness
        # self.user_response_template = PromptTemplate(
        #     template="""
        #     You are a professional and friendly virtual assistant for a healthcare system.
        #     Your task is to generate clear, concise, and professional responses to user queries.

        #     IMPORTANT RULES:
        #     - Respond ONLY in {detected_language}
        #     - For Arabic, use Modern Standard Arabic (فصحى)
        #     - Keep responses SHORT and DIRECT
        #     - Include ONLY essential information
        #     - NEVER mix languages
        #     - ALWAYS use the EXACT data from the system response
        #     - NEVER make up or modify hospital information
        #     - Use professional and polite tone

        #     Original query: {user_query}
        #     System result: {api_response}
        #     User sentiment: {sentiment_analysis}

        #     ARABIC RESPONSE RULES:
        #     - Use Arabic numbers (١، ٢، ٣)
        #     - Use proper date format (١٥ مايو ٢٠٢٥)
        #     - Use proper time format (الساعة ٨ صباحاً)
        #     - Use formal medical terms
        #     - Keep sentences short and clear
        #     - Use exact hospital names and addresses from the data
        #     - Use exact working hours from the data
        #     - Use professional healthcare terminology

        #     ENGLISH RESPONSE RULES:
        #     - Use clear, direct language
        #     - Include only essential details
        #     - Use proper medical terms
        #     - Keep responses concise
        #     - Use exact hospital names and addresses from the data
        #     - Use exact working hours from the data
        #     - Use professional healthcare terminology

        #     Remember: 
        #     - Keep responses SHORT and FOCUSED
        #     - Use ONLY data from the system response
        #     - NEVER modify or make up hospital information
        #     - Include only what's necessary to answer the query
        #     - Maintain professional and polite tone
        #     - Use proper healthcare terminology
        #     """,
        #     input_variables=["user_query", "api_response", "detected_language", 
        #                    "sentiment_analysis", "extracted_keywords"]
        # )
        # Create user-friendly response template with enhanced context awareness
        # Create user-friendly response template with enhanced context awareness
        # Create user-friendly response template with enhanced context awareness
        self.user_response_template = PromptTemplate(
            template="""
            You are a professional healthcare assistant. Generate clear, accurate responses using EXACT data from the system.

            === STRICT REQUIREMENTS ===
            - Respond ONLY in {detected_language}
            - Use EXACT information from api_response - NO modifications
            - Keep responses SHORT, SIMPLE, and DIRECT
            - Use professional healthcare tone
            - NEVER mix languages or make up information

            === ORIGINAL REQUEST ===
            User Query: {user_query}
            User Sentiment: {sentiment_analysis}

            === SYSTEM DATA ===
            {api_response}

            === LANGUAGE-SPECIFIC FORMATTING ===

            FOR ARABIC RESPONSES:
            - Use Modern Standard Arabic (الفصحى)
            - Use Arabic numerals: ١، ٢، ٣، ٤، ٥، ٦، ٧، ٨، ٩، ١٠
            - Time format: "من الساعة ٨:٠٠ صباحاً إلى ٥:٠٠ مساءً"
            - Date format: "١٥ مايو ٢٠٢٥"
            - Use proper Arabic medical terminology
            - Keep sentences short and grammatically correct
            - Example format for hospitals:
            "مستشفى [الاسم] - العنوان: [العنوان الكامل] - أوقات العمل: من [الوقت] إلى [الوقت]"

            FOR ENGLISH RESPONSES:
            - Use clear, professional language
            - Time format: "8:00 AM to 5:00 PM"
            - Date format: "May 15, 2025"
            - Keep sentences concise and direct
            - Example format for hospitals:
            "[Hospital Name] - Address: [Full Address] - Hours: [Opening Time] to [Closing Time]"

            === RESPONSE STRUCTURE ===
            1. Direct answer to the user's question
            2. Essential details only (names, addresses, hours, contact info)
            3. Brief helpful note if needed
            4. No unnecessary introductions or conclusions

            === CRITICAL RULES ===
            - Extract information EXACTLY as provided in api_response
            - Do NOT include technical URLs, IDs, or system codes in the response
            - Do NOT show raw links or booking URLs to users
            - Present information in natural, conversational language
            - Do NOT use bullet points or technical formatting
            - Write as if you're speaking to the patient directly
            - If data is missing, state "المعلومات غير متوفرة" (Arabic) or "Information not available" (English)
            - Convert technical data into human-readable format
            - NEVER add translations or explanations in other languages
            - NEVER include "Translated response" or similar phrases
            - END your response immediately after providing the requested information
            - Do NOT add any English translation when responding in Arabic
            - Do NOT add any Arabic translation when responding in English

            === HUMAN-LIKE FORMATTING RULES ===
            FOR ARABIC:
            - Instead of "رابط الحجز: [URL]" → say "تم حجز موعدك بنجاح"
            - Instead of "الأزمة: غير متوفرة" → omit or say "بدون أعراض محددة"
            - Use natural sentences like "موعدك مع الدكتور [Name] يوم [Date] في تمام الساعة [Time]"
            - Avoid technical terms and system language

            FOR ENGLISH:
            - Instead of "Booking URL: [link]" → say "Your appointment has been scheduled"
            - Use natural sentences like "You have an appointment with Dr. [Name] on [Date] at [Time]"
            - Avoid showing raw URLs, IDs, or technical data

            === QUALITY CHECKS ===
            Before responding, verify:
            ✓ Response sounds natural and conversational
            ✓ No technical URLs, IDs, or system codes are shown
            ✓ Information is presented in human-friendly language
            ✓ Grammar is correct in the target language
            ✓ Response directly answers the user's question
            ✓ No bullet points or technical formatting
            ✓ Sounds like a helpful human assistant, not a system

            Generate a response that is accurate, helpful, and professionally formatted.

            === FINAL INSTRUCTION ===
            Respond ONLY in the requested language. Do NOT provide translations, explanations, or additional text in any other language. Stop immediately after answering the user's question.
            """,
            input_variables=["user_query", "api_response", "detected_language", 
                            "sentiment_analysis", "extracted_keywords"]
        )
        
        # Create LLM chains
        self.router_chain = LLMChain(
            llm=self.llm,
            prompt=self.router_prompt_template,
            output_key="route_result"
        )
        
        self.user_response_chain = LLMChain(
            llm=self.llm,
            prompt=self.user_response_template,
            output_key="user_friendly_response"
        )

    def parse_relative_date(self, text, detected_language):
        """
        Parse relative dates from text using a combination of methods
        """
        today = datetime.now()
        
        # Handle common relative date patterns in English and Arabic
        tomorrow_patterns = {
            'english': [r'\btomorrow\b', r'\bnext day\b'],
            'arabic': [r'\bغدا\b', r'\bبكرة\b', r'\bغدًا\b', r'\bالغد\b']
        }
        
        next_week_patterns = {
            'english': [r'\bnext week\b'],
            'arabic': [r'\bالأسبوع القادم\b', r'\bالأسبوع المقبل\b', r'\bالاسبوع الجاي\b']
        }
        
        # Check for "tomorrow" patterns
        for pattern in tomorrow_patterns.get(detected_language, []) + tomorrow_patterns.get('english', []):
            if re.search(pattern, text, re.IGNORECASE):
                return (today + timedelta(days=1)).strftime('%Y-%m-%dT%H:%M:%S')
        
        # Check for "next week" patterns
        for pattern in next_week_patterns.get(detected_language, []) + next_week_patterns.get('english', []):
            if re.search(pattern, text, re.IGNORECASE):
                return (today + timedelta(days=7)).strftime('%Y-%m-%dT%H:%M:%S')
        
        # If NER model is available, use it to extract date entities
        if self.date_parser and detected_language == 'english':
            try:
                date_entities = self.date_parser(text)
                for entity in date_entities:
                    if entity['entity_group'] == 'DATE':
                        # Here you would need more complex date parsing logic
                        # This is just a placeholder
                        print(f"Found date entity: {entity['word']}")
                        # For now, just default to tomorrow if we detect any date
                        return (today + timedelta(days=1)).strftime('%Y-%m-%dT%H:%M:%S')
            except Exception as e:
                print(f"Error in date parsing: {e}")
        
        # Default return None if no date pattern is recognized
        return None

    def process_user_query(self, user_query: str) -> Dict[str, Any]:
        """
        Process the user query through the LangChain pipeline and return a response
        """
        try:
            start_time = time.time()
            
            # Detect language of the query
            detected_language = self.detect_language(user_query)
            print(f"Detected language: {detected_language}")
            
            # Enhanced context using Hugging Face models
            sentiment_result = self.analyze_sentiment(user_query)
            print(f"Sentiment analysis: {sentiment_result}")
            
            extracted_keywords = self.extract_semantic_keywords(user_query)
            print(f"Extracted keywords: {extracted_keywords}")
            
            # Try to extract dates from query
            parsed_date = self.parse_relative_date(user_query, detected_language)
            if parsed_date:
                print(f"Parsed relative date: {parsed_date}")
            
            # 1. Route the query to determine which API endpoint to call
            router_result = self.router_chain.invoke({
                "endpoints_documentation": json.dumps(self.endpoints_documentation, indent=2),
                "user_query": user_query,
                "detected_language": detected_language,
                "extracted_keywords": ", ".join(extracted_keywords),
                "sentiment_analysis": json.dumps(sentiment_result)
            })
            
            # 2. Parse the router response
            route_result = router_result["route_result"]
            parsed_route = None
            
            # Clean the response first
            cleaned_response = route_result
            
            # Remove any comments (both single-line and multi-line)
            cleaned_response = re.sub(r'//.*?$', '', cleaned_response, flags=re.MULTILINE)
            cleaned_response = re.sub(r'/\*.*?\*/', '', cleaned_response, flags=re.DOTALL)
            
            # Remove any trailing commas
            cleaned_response = re.sub(r',(\s*[}\]])', r'\1', cleaned_response)
            
            # Try different methods to parse the JSON response
            try:
                # First attempt: direct JSON parsing of cleaned response
                parsed_route = json.loads(cleaned_response)
            except json.JSONDecodeError:
                try:
                    # Second attempt: extract JSON from markdown code block
                    json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', cleaned_response, re.DOTALL)
                    if json_match:
                        parsed_route = json.loads(json_match.group(1))
                except (json.JSONDecodeError, AttributeError):
                    try:
                        # Third attempt: find JSON-like content using regex
                        json_pattern = r'\{\s*"endpoint"\s*:.*?\}'
                        json_match = re.search(json_pattern, cleaned_response, re.DOTALL)
                        if json_match:
                            json_str = json_match.group(0)
                            # Additional cleaning for the extracted JSON
                            json_str = re.sub(r'//.*?$', '', json_str, flags=re.MULTILINE)
                            json_str = re.sub(r',(\s*[}\]])', r'\1', json_str)
                            parsed_route = json.loads(json_str)
                    except (json.JSONDecodeError, AttributeError):
                        print(f"Failed to parse JSON. Raw response: {route_result}")
                        print(f"Cleaned response: {cleaned_response}")
                        raise ValueError("Could not extract valid JSON from LLM response")
            
            if not parsed_route:
                raise ValueError("Failed to parse LLM response into valid JSON")
            
            # Replace any placeholder values and inject parsed dates if available
            if 'params' in parsed_route:
                if 'patient_id' in parsed_route['params']:
                    parsed_route['params']['patient_id'] = self.user_id
                
                # Inject parsed date if available and a date parameter exists
                date_params = ['appointment_date', 'date', 'schedule_date', 'date_time', 'new_date_time']
                if parsed_date:
                    for param in date_params:
                        if param in parsed_route['params']:
                            parsed_route['params'][param] = parsed_date
            
            print('Parsed route: ', parsed_route)
            print(f"Routing completed in {time.time() - start_time:.2f} seconds")
            
            # 3. Make the backend API call
            backend_response = self.backend_call(parsed_route)
            
            # 4. Generate user-friendly response
            user_friendly_result = self.user_response_chain.invoke({
                "user_query": user_query,
                "api_response": json.dumps(backend_response, indent=2),
                "detected_language": detected_language,
                "sentiment_analysis": json.dumps(sentiment_result),
                "extracted_keywords": ", ".join(extracted_keywords)
            })
            print('user response: ', user_friendly_result["user_friendly_response"])
            
            print(f"Total processing time: {time.time() - start_time:.2f} seconds")
            
            return {
                "routing_info": parsed_route,
                "api_response": backend_response,
                "user_friendly_response": user_friendly_result["user_friendly_response"],
                "detected_language": detected_language,
                "sentiment": sentiment_result,
                "keywords": extracted_keywords
            }
            
        except Exception as e:
            error_detail = {
                "error": f"Error processing query: {str(e)}",
                "type": type(e).__name__,
                "traceback": traceback.format_exc()
            }
            print(f"Error: {error_detail['error']}")
            print(f"Traceback: {error_detail['traceback']}")
            return error_detail

    def backend_call(self, data: Dict[str, Any]) -> Dict[str, Any]:
        """
        Make the actual API call to the backend with retry logic
        """
        endpoint_url = data.get('endpoint')
        endpoint_method = data.get('method')
        endpoint_params = data.get('params', {}).copy()  # Create a copy to avoid modifying the original
        
        print('Endpoint url: ' + endpoint_url)
        print('Method: ', endpoint_method)
        print('Params: ', endpoint_params)
        
        # Add retry logic for more robust API calls
        retries = 0
        while retries < self.max_retries:
            try:
                if endpoint_method.upper() == 'GET':
                    response = requests.get(
                        self.BASE_URL + endpoint_url,
                        params=endpoint_params,
                        headers=self.headers,
                        timeout=10  # Add timeout for backend calls
                    )
                elif endpoint_method.upper() == 'POST':  # POST or other methods
                    response = requests.post(
                        self.BASE_URL + endpoint_url,
                        json=endpoint_params,
                        headers=self.headers,
                        timeout=10
                    )
                elif endpoint_method.upper() == 'PUT':
                    response = requests.put(
                        self.BASE_URL + endpoint_url,
                        json=endpoint_params,
                        headers=self.headers,
                        timeout=10
                    )
                
                # Check if response status is success
                response.raise_for_status()
                return response.json()
                
            except requests.exceptions.RequestException as e:
                retries += 1
                if retries >= self.max_retries:
                    return {
                        "error": "Backend API call failed after multiple retries",
                        "details": str(e),
                        "status_code": getattr(e.response, 'status_code', None) if hasattr(e, 'response') else None
                    }
                
                print(f"API call attempt {retries} failed, retrying in {self.retry_delay} seconds...")
                time.sleep(self.retry_delay)


# Initialize the AI agent singleton
# ai_agent = AIAgent()


# Test the agent directly
# if __name__ == "__main__":
#     agent = AIAgent()
    
#     # Test with English query
#     # print("\n---Testing English Query---")
#     # english_response = agent.process_user_query("I need to book an appointment with Dr. Smith tomorrow at 8 PM")
#     # print("\nEnglish response:")
#     # print(english_response["user_friendly_response"])
    
#     # Test with Arabic query
#     print("\n---Testing Arabic Query---")
#     # arabic_response = agent.process_user_query(" اريد الغاء الحجز مع الدكتور Smith")
#     arabic_response = agent.process_user_query("اريد حجز ميعاد غدا في الساعه الثامنه مساء مع الدكتور Smith")
#     # arabic_response = agent.process_user_query("متى يفتح المستشفى؟")
#     # arabic_response = agent.process_user_query("اريد معرفه كل الحجوزات الخاصه بي")
#     print("\nArabic response:")
#     print(arabic_response["user_friendly_response"])


# Fast api section 
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Dict, Any, Optional


app = FastAPI(
    title="Healthcare AI Assistant",
    description="An AI-powered healthcare assistant that handles appointment booking and queries",
    version="1.0.0"
)

# Initialize the AI agent
agent = AIAgent()

class QueryRequest(BaseModel):
    query: str
    language: Optional[str] = None

class QueryResponse(BaseModel):
    routing_info: Dict[str, Any]
    api_response: Dict[str, Any]
    user_friendly_response: str
    detected_language: str
    sentiment: Dict[str, Any]

@app.post("/query", response_model=QueryResponse)
async def process_query(request: QueryRequest):
    """
    Process a user query and return a response
    """
    try:
        response = agent.process_user_query(request.query)
        return response
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/health")
async def health_check():
    """
    Health check endpoint
    """
    return {"status": "healthy", "service": "healthcare-ai-assistant"}

@app.get("/")
async def root():
    return {"message": "Hello World"}

# if __name__ == "__main__":
#     import uvicorn
#     uvicorn.run(app, host="0.0.0.0", port=8000)