Spaces:
Sleeping
Sleeping
import re | |
import json | |
import requests | |
import traceback | |
import time | |
import os | |
from typing import Dict, Any, List, Optional, Tuple | |
from datetime import datetime, timedelta | |
# Updated imports for pydantic | |
from pydantic import BaseModel, Field | |
# Updated imports for LangChain | |
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate | |
from langchain_core.output_parsers import JsonOutputParser | |
from langchain_ollama import OllamaLLM | |
from langchain.chains import LLMChain | |
from langchain.callbacks.manager import CallbackManager | |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
from langchain_huggingface.embeddings import HuggingFaceEmbeddings | |
# Enhanced HuggingFace imports for improved functionality | |
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification | |
import numpy as np | |
# Import endpoints documentation | |
from endpoints_documentation import endpoints_documentation | |
# Set environment variables for HuggingFace | |
# if os.name == 'posix' and os.uname().sysname == 'Darwin': # Check if running on macOS | |
# os.environ["HF_HOME"] = os.path.expanduser("~/Library/Caches/huggingface") | |
# os.environ["TRANSFORMERS_CACHE"] = os.path.expanduser("~/Library/Caches/huggingface/transformers") | |
# else: | |
os.environ["HF_HOME"] = "/tmp/huggingface" | |
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1" | |
class ChatMessage(BaseModel): | |
"""Data model for chat messages""" | |
message_id: str = Field(..., description="Unique identifier for the message") | |
user_id: str = Field(..., description="User identifier") | |
message: str = Field(..., description="The user's message") | |
timestamp: datetime = Field(default_factory=datetime.now, description="When the message was sent") | |
language: str = Field(default="english", description="Detected language of the message") | |
class ChatResponse(BaseModel): | |
"""Data model for chatbot responses""" | |
response_id: str = Field(..., description="Unique identifier for the response") | |
response_type: str = Field(..., description="Type of response: 'conversation' or 'api_action'") | |
message: str = Field(..., description="The chatbot's response message") | |
api_call_made: bool = Field(default=False, description="Whether an API call was made") | |
api_data: Optional[Dict[str, Any]] = Field(default=None, description="API response data if applicable") | |
language: str = Field(default="english", description="Language of the response") | |
timestamp: datetime = Field(default_factory=datetime.now, description="When the response was generated") | |
class EndpointRequest(BaseModel): | |
"""Data model for API endpoint requests""" | |
endpoint: str = Field(..., description="The API endpoint path to call") | |
method: str = Field(..., description="The HTTP method to use (GET or POST)") | |
params: Dict[str, Any] = Field(default_factory=dict, description="Parameters for the API call") | |
missing_required: List[str] = Field(default_factory=list, description="Any required parameters that are missing") | |
class HealthcareChatbot: | |
def __init__(self): | |
self.endpoints_documentation = endpoints_documentation | |
self.ollama_base_url = "http://localhost:11434" | |
self.model_name = "gemma3" | |
self.BASE_URL = 'https://f376-197-54-54-66.ngrok-free.app' | |
self.headers = {'Content-type': 'application/json'} | |
self.user_id = '86639f4c-5dfc-441d-b229-084f0fcdd748' | |
self.max_retries = 3 | |
self.retry_delay = 2 | |
# Store conversation history | |
self.conversation_history = [] | |
self.max_history_length = 10 # Keep last 10 exchanges | |
# Initialize components | |
self._initialize_language_tools() | |
self._initialize_llm() | |
self._initialize_parsers_and_chains() | |
self._initialize_date_parser() | |
print("Healthcare Chatbot initialized successfully!") | |
self._print_welcome_message() | |
def _print_welcome_message(self): | |
"""Print welcome message in both languages""" | |
print("\n" + "="*60) | |
print("🏥 HEALTHCARE CHATBOT READY") | |
print("="*60) | |
print("English: Hello! I'm your healthcare assistant. I can help you with:") | |
print("• Booking and managing appointments") | |
print("• Finding hospital information") | |
print("• Viewing your medical records") | |
print("• General healthcare questions") | |
print() | |
print("Arabic: مرحباً! أنا مساعدك الطبي. يمكنني مساعدتك في:") | |
print("• حجز وإدارة المواعيد") | |
print("• العثور على معلومات المستشفى") | |
print("• عرض سجلاتك الطبية") | |
print("• الأسئلة الطبية العامة") | |
print("="*60) | |
print("Type 'quit' or 'خروج' to exit\n") | |
def _initialize_language_tools(self): | |
"""Initialize language processing tools""" | |
try: | |
self.embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large") | |
self.language_classifier = pipeline( | |
"text-classification", | |
model="papluca/xlm-roberta-base-language-detection", | |
top_k=1 | |
) | |
self.sentiment_analyzer = pipeline( | |
"sentiment-analysis", | |
model="cardiffnlp/twitter-xlm-roberta-base-sentiment" | |
) | |
print("✓ Language processing models loaded successfully") | |
except Exception as e: | |
print(f"⚠ Warning: Some language models failed to load: {e}") | |
self.language_classifier = None | |
self.sentiment_analyzer = None | |
def _initialize_date_parser(self): | |
"""Initialize date parsing model""" | |
try: | |
self.date_parser = pipeline( | |
"token-classification", | |
model="Jean-Baptiste/roberta-large-ner-english", | |
aggregation_strategy="simple" | |
) | |
except Exception as e: | |
print(f"⚠ Warning: Date parsing model failed to load: {e}") | |
self.date_parser = None | |
def _initialize_llm(self): | |
"""Initialize the LLM""" | |
callbacks = [StreamingStdOutCallbackHandler()] | |
self.llm = OllamaLLM( | |
model=self.model_name, | |
base_url=self.ollama_base_url, | |
callbacks=callbacks, | |
temperature=0.7, | |
num_ctx=8192, | |
top_p=0.9, | |
request_timeout=60, | |
) | |
def _initialize_parsers_and_chains(self): | |
"""Initialize all prompt templates and chains""" | |
self.json_parser = JsonOutputParser(pydantic_object=EndpointRequest) | |
# Intent classification prompt | |
# self.intent_classifier_template = PromptTemplate( | |
# template=""" | |
# You are an intent classifier. Your job is simple: understand what the user wants and check if any API endpoint can do that. | |
# User Message: {user_query} | |
# Language: {detected_language} | |
# API Endpoints: {endpoints_documentation} | |
# Think step by step: | |
# 1. What does the user want from this message? | |
# Read the user's message carefully. What is the user trying to say or accomplish? What would a human understand from this message? | |
# 2. Can any API endpoint fulfill what the user wants? | |
# Look at each API endpoint. Does any endpoint do what the user is asking for? Be very precise - only say yes if there's a clear match. | |
# Important rules: | |
# - Focus ONLY on the current message, ignore conversation history for classification | |
# - If the user is just talking, being social, or saying something casual, that's CONVERSATION | |
# - Only choose API_ACTION if the user is clearly asking for something an API endpoint can do | |
# - When you're not sure, choose CONVERSATION | |
# Answer in this format: | |
# {{ | |
# "intent": "API_ACTION" or "CONVERSATION", | |
# "confidence": [0.0 to 1.0], | |
# "reasoning": "What does the user want? Can any API do this?", | |
# "requires_backend": true or false | |
# }} | |
# """, | |
# input_variables=["user_query", "detected_language", "conversation_history", "endpoints_documentation"] | |
# ) | |
self.intent_classifier_template = PromptTemplate( | |
template=""" | |
You are a strict intent classification system. Your only task is to determine if the user message requires an API action or is general conversation. | |
=== ABSOLUTE RULES === | |
1. OUTPUT FORMAT MUST BE EXACTLY: | |
{{ | |
"intent": "API_ACTION" or "CONVERSATION", | |
"confidence": 0.0-1.0, | |
"reasoning": "clear justification", | |
"requires_backend": true or false | |
}} | |
2. Never invent custom intent types | |
3. Never output endpoint names in the intent field | |
4. "requires_backend" must match the intent (true for API_ACTION) | |
=== CLASSIFICATION CRITERIA === | |
API_ACTION must meet ALL of: | |
- The message contains a clear, actionable request | |
- The request matches a documented API endpoint's purpose | |
- The request requires specific backend functionality | |
CONVERSATION applies when: | |
- The message is social/greeting/smalltalk | |
- The request is too vague for API action | |
- No API endpoint matches the request | |
=== INPUT DATA === | |
User Message: {user_query} | |
Detected Language: {detected_language} | |
API Endpoints: {endpoints_documentation} | |
=== DECISION PROCESS === | |
1. Analyze the message literally - what is the explicit request? | |
2. Check endpoints documentation - is there an exact functional match? | |
3. If uncertain, default to CONVERSATION | |
4. Validate against rules before responding | |
=== OUTPUT VALIDATION === | |
Before responding, verify: | |
- Intent is ONLY "API_ACTION" or "CONVERSATION" | |
- Confidence reflects certainty (1.0 = perfect match) | |
- Reasoning explains the endpoint match (for API_ACTION) | |
- requires_backend aligns with intent | |
Respond ONLY in the exact specified format. | |
""", | |
input_variables=["user_query", "detected_language", "conversation_history", "endpoints_documentation"] | |
) | |
# API routing prompt (reuse existing router_prompt_template) | |
self.router_prompt_template = PromptTemplate( | |
template=""" | |
You are a precise API routing assistant. Your job is to analyze user queries and select the correct API endpoint with proper parameters. | |
=== ENDPOINT DOCUMENTATION === | |
{endpoints_documentation} | |
=== USER REQUEST ANALYSIS === | |
User Query: {user_query} | |
Language: {detected_language} | |
Keywords: {extracted_keywords} | |
Sentiment: {sentiment_analysis} | |
Current Context: | |
- DateTime: {current_datetime} | |
- Timezone: {timezone} | |
- User Locale: {user_locale} | |
=== ROUTING PROCESS === | |
Follow these steps in order: | |
STEP 1: INTENT ANALYSIS | |
- What is the user trying to accomplish? | |
- What type of operation are they requesting? (create, read, update, delete, search, etc.) | |
- What entity/resource are they working with? | |
STEP 2: DATE/TIME PROCESSING | |
- Identify any temporal expressions in the user query | |
- Convert relative dates/times using the current context: | |
* "اليوم" (today) = current date | |
* "غدا" (tomorrow) = current date + 1 day | |
* "أمس" (yesterday) = current date - 1 day | |
* "الأسبوع القادم" (next week) = current date + 7 days | |
* "بعد ساعتين" (in 2 hours) = current time + 2 hours | |
* "صباحًا" (morning/AM), "مساءً" (evening/PM) | |
- Handle different date formats and languages | |
- Account for timezone differences | |
- Convert to ISO 8601 format: YYYY-MM-DDTHH:MM:SS | |
STEP 3: ENDPOINT MATCHING | |
- Review each endpoint in the documentation | |
- Match the user's intent to the endpoint's PURPOSE/DESCRIPTION | |
- Consider the HTTP method (GET for retrieval, POST for creation, etc.) | |
- Verify the endpoint can handle the user's specific request | |
STEP 4: PARAMETER EXTRACTION | |
- Identify ALL required parameters from the endpoint documentation | |
- Extract parameter values from the user query | |
- Convert data types as needed: | |
- Dates/times to ISO 8601 format (YYYY-MM-DDTHH:mm:ss) | |
- Numbers to integers | |
- Set appropriate defaults for optional parameters if beneficial | |
STEP 5: VALIDATION | |
- Ensure ALL required parameters are provided or identified as missing | |
- Verify parameter formats match documentation requirements | |
- Check that the selected endpoint actually solves the user's problem | |
=== RESPONSE FORMAT === | |
Provide your analysis and decision in this exact JSON structure: | |
{{ | |
"reasoning": {{ | |
"user_intent": "Brief description of what the user wants to accomplish", | |
"selected_endpoint": "Why this endpoint was chosen over others", | |
"parameter_mapping": "How user query maps to endpoint parameters" | |
}}, | |
"endpoint": "/exact_endpoint_path_from_documentation", | |
"method": "HTTP_METHOD", | |
"params": {{ | |
"required_param_1": "extracted_or_converted_value", | |
"required_param_2": "extracted_or_converted_value", | |
"optional_param": "value_if_applicable" | |
}}, | |
"missing_required": ["list", "of", "missing", "required", "parameters"], | |
"confidence": 0.95 | |
}} | |
=== CRITICAL RULES === | |
1. ONLY select endpoints that exist in the provided documentation | |
2. NEVER fabricate or assume endpoint parameters not in documentation | |
3. ALL required parameters MUST be included or listed as missing | |
4. Convert dates/times to ISO 8601 format (YYYY-MM-DDTHH:mm:ss) | |
5. If patient_id is required and not provided, add it to missing_required | |
6. Match endpoints by PURPOSE, not just keywords in the path | |
7. If multiple endpoints could work, choose the most specific one | |
8. If no endpoint matches, set endpoint to null and explain in reasoning | |
=== EXAMPLES OF GOOD MATCHING === | |
- User wants "patient records" → Use patient retrieval endpoint, not general search | |
- User wants to "schedule appointment" → Use appointment creation endpoint | |
- User asks "what appointments today" → Use appointment listing with date filter | |
- User wants to "update medication" → Use medication update endpoint with patient_id | |
Think step by step and be precise with your endpoint selection and parameter extraction.:""", | |
input_variables=["endpoints_documentation", "user_query", "detected_language", | |
"extracted_keywords", "sentiment_analysis", "conversation_history", | |
"current_datetime", "timezone", "user_locale"] | |
) | |
# old one | |
# self.router_prompt_template = PromptTemplate( | |
# template=""" | |
# You are a precise API routing assistant. Your job is to analyze user queries and select the correct API endpoint with proper parameters. | |
# === ENDPOINT DOCUMENTATION === | |
# {endpoints_documentation} | |
# === USER REQUEST ANALYSIS === | |
# User Query: {user_query} | |
# Language: {detected_language} | |
# Keywords: {extracted_keywords} | |
# Sentiment: {sentiment_analysis} | |
# === ROUTING PROCESS === | |
# Follow these steps in order: | |
# STEP 1: INTENT ANALYSIS | |
# - What is the user trying to accomplish? | |
# - What type of operation are they requesting? (create, read, update, delete, search, etc.) | |
# - What entity/resource are they working with? | |
# STEP 2: ENDPOINT MATCHING | |
# - Review each endpoint in the documentation | |
# - Match the user's intent to the endpoint's PURPOSE/DESCRIPTION | |
# - Consider the HTTP method (GET for retrieval, POST for creation, etc.) | |
# - Verify the endpoint can handle the user's specific request | |
# STEP 3: PARAMETER EXTRACTION | |
# - Identify ALL required parameters from the endpoint documentation | |
# - Extract parameter values from the user query | |
# - Convert data types as needed (dates to ISO 8601, numbers to integers, etc.) | |
# - Set appropriate defaults for optional parameters if beneficial | |
# STEP 4: VALIDATION | |
# - Ensure ALL required parameters are provided or identified as missing | |
# - Verify parameter formats match documentation requirements | |
# - Check that the selected endpoint actually solves the user's problem | |
# === RESPONSE FORMAT === | |
# Provide your analysis and decision in this exact JSON structure: | |
# {{ | |
# "reasoning": {{ | |
# "user_intent": "Brief description of what the user wants to accomplish", | |
# "selected_endpoint": "Why this endpoint was chosen over others", | |
# "parameter_mapping": "How user query maps to endpoint parameters" | |
# }}, | |
# "endpoint": "/exact_endpoint_path_from_documentation", | |
# "method": "HTTP_METHOD", | |
# "params": {{ | |
# "required_param_1": "extracted_or_converted_value", | |
# "required_param_2": "extracted_or_converted_value", | |
# "optional_param": "value_if_applicable" | |
# }}, | |
# "missing_required": ["list", "of", "missing", "required", "parameters"], | |
# "confidence": 0.95 | |
# }} | |
# === CRITICAL RULES === | |
# 1. ONLY select endpoints that exist in the provided documentation | |
# 2. NEVER fabricate or assume endpoint parameters not in documentation | |
# 3. ALL required parameters MUST be included or listed as missing | |
# 4. Convert dates/times to ISO 8601 format (YYYY-MM-DDTHH:MM:SS) | |
# 5. If patient_id is required and not provided, add it to missing_required | |
# 6. Match endpoints by PURPOSE, not just keywords in the path | |
# 7. If multiple endpoints could work, choose the most specific one | |
# 8. If no endpoint matches, set endpoint to null and explain in reasoning | |
# === EXAMPLES OF GOOD MATCHING === | |
# - User wants "patient records" → Use patient retrieval endpoint, not general search | |
# - User wants to "schedule appointment" → Use appointment creation endpoint | |
# - User asks "what appointments today" → Use appointment listing with date filter | |
# - User wants to "update medication" → Use medication update endpoint with patient_id | |
# Think step by step and be precise with your endpoint selection and parameter extraction.:""", | |
# input_variables=["endpoints_documentation", "user_query", "detected_language", | |
# "extracted_keywords", "sentiment_analysis", "conversation_history"] | |
# ) | |
# Conversational response prompt | |
self.conversation_template = PromptTemplate( | |
template=""" | |
You are a friendly and professional healthcare chatbot assistant. | |
=== RESPONSE GUIDELINES === | |
- Respond ONLY in {detected_language} | |
- Be helpful, empathetic, and professional | |
- Keep responses concise but informative | |
- Use appropriate medical terminology when needed | |
- Maintain a caring and supportive tone | |
=== CONTEXT === | |
User Message: {user_query} | |
Language: {detected_language} | |
Sentiment: {sentiment_analysis} | |
Conversation History: {conversation_history} | |
=== LANGUAGE-SPECIFIC INSTRUCTIONS === | |
FOR ARABIC RESPONSES: | |
- Use Modern Standard Arabic (الفصحى) | |
- Be respectful and formal as appropriate in Arabic culture | |
- Use proper Arabic medical terminology | |
- Keep sentences clear and grammatically correct | |
FOR ENGLISH RESPONSES: | |
- Use clear, professional English | |
- Be warm and approachable | |
- Use appropriate medical terminology | |
=== RESPONSE RULES === | |
1. Address the user's question or comment directly | |
2. Provide helpful information when possible | |
3. If you cannot help with something specific, explain what you CAN help with | |
4. Never provide specific medical advice - always recommend consulting healthcare professionals | |
5. Be encouraging and supportive | |
6. Do NOT mix languages in your response | |
7. End responses naturally without asking multiple questions | |
Generate a helpful conversational response:""", | |
input_variables=["user_query", "detected_language", "sentiment_analysis", "conversation_history"] | |
) | |
# API response formatting prompt (reuse existing user_response_template) | |
self.user_response_template = PromptTemplate( | |
template=""" | |
You are a professional healthcare assistant. Answer the user's question using the provided API data. | |
User Query: {user_query} | |
User Sentiment: {sentiment_analysis} | |
Response Language: {detected_language} | |
API Response Data: | |
{api_response} | |
=== INSTRUCTIONS === | |
1. Read and understand the API response data above | |
2. Use ONLY the actual data from the API response - never make up information | |
3. Respond in {detected_language} language only | |
4. Write like you're talking to a friend or family member - warm, friendly, and caring | |
5. Make it sound natural and conversational, not like a system message | |
6. Convert technical data to simple, everyday language | |
=== DATE AND TIME FORMATTING === | |
When you see date_time fields like '2025-05-30T10:28:10': | |
- For English: Convert to "May 30, 2025 at 10:28 AM" | |
- For Arabic: Convert to "٣٠ مايو ٢٠٢٥ في الساعة ١٠:٢٨ صباحاً" | |
=== RESPONSE EXAMPLES === | |
For appointment confirmations: | |
- English: "Great! I've got your appointment set up for May 30, 2025 at 10:28 AM. Everything looks good!" | |
- Arabic: "ممتاز! موعدك محجوز يوم ٣٠ مايو ٢٠٢٥ الساعة ١٠:٢٨ صباحاً. كل شيء جاهز!" | |
For appointment info: | |
- English: "Your next appointment is on May 30, 2025 at 10:28 AM. See you then!" | |
- Arabic: "موعدك القادم يوم ٣٠ مايو ٢٠٢٥ الساعة ١٠:٢٨ صباحاً. نراك قريباً!" | |
=== TONE GUIDELINES === | |
- Use friendly words like: "Great!", "Perfect!", "All set!", "ممتاز!", "رائع!", "تمام!" | |
- Add reassuring phrases: "Everything looks good", "You're all set", "كل شيء جاهز", "تم بنجاح" | |
- Sound helpful and caring, not robotic or formal | |
=== LANGUAGE FORMATTING === | |
For Arabic responses: | |
- Use Arabic numerals: ٠١٢٣٤٥٦٧٨٩ | |
- Use Arabic month names: يناير، فبراير، مارس، أبريل، مايو، يونيو، يوليو، أغسطس، سبتمبر، أكتوبر، نوفمبر، ديسمبر | |
- Friendly, warm Arabic tone | |
For English responses: | |
- Use standard English numerals | |
- 12-hour time format with AM/PM | |
- Friendly, conversational English tone | |
=== CRITICAL RULES === | |
- Extract dates and times exactly as they appear in the API response | |
- Never use example dates or placeholder information | |
- Respond only in the specified language | |
- Make your response sound like a helpful friend, not a computer | |
- Focus on answering the user's specific question with warmth and care | |
Generate a friendly, helpful response using the API data provided above. | |
""", | |
input_variables=["user_query", "api_response", "detected_language", "sentiment_analysis"] | |
) | |
# self.user_response_template = PromptTemplate( | |
# template=""" | |
# You are a professional healthcare assistant. Your task is to carefully analyze the API data and respond to the user's question accurately. | |
# User Query: {user_query} | |
# User Sentiment: {sentiment_analysis} | |
# Response Language: {detected_language} | |
# API Response Data: | |
# {api_response} | |
# === CRITICAL INSTRUCTIONS === | |
# 1. FIRST: Carefully read and analyze the API response data above | |
# 2. SECOND: Identify all date_time fields in the format 'YYYY-MM-DDTHH:MM:SS' | |
# 3. THIRD: Extract the EXACT dates and times from the API response - DO NOT use any example dates | |
# 4. FOURTH: Convert these extracted dates to the user-friendly format specified below | |
# 5. FIFTH: Respond ONLY in {detected_language} language | |
# 6. Use a warm, friendly, conversational tone like talking to a friend | |
# === DATE EXTRACTION AND CONVERSION === | |
# Step 1: Find date_time fields in the API response (format: 'YYYY-MM-DDTHH:MM:SS') | |
# Step 2: Convert ONLY the actual extracted dates using these rules: | |
# For English: | |
# - Convert 'YYYY-MM-DDTHH:MM:SS' to readable format | |
# - Example: '2025-06-01T08:00:00' becomes "June 1, 2025 at 8:00 AM" | |
# - Use 12-hour format with AM/PM | |
# For Arabic: | |
# - Convert to Arabic numerals and month names | |
# - Example: '2025-06-01T08:00:00' becomes "١ يونيو ٢٠٢٥ في الساعة ٨:٠٠ صباحاً" | |
# - Arabic months: يناير، فبراير، مارس، أبريل، مايو، يونيو، يوليو، أغسطس، سبتمبر، أكتوبر، نوفمبر، ديسمبر | |
# - Arabic numerals: ٠١٢٣٤٥٦٧٨٩ | |
# === RESPONSE APPROACH === | |
# 1. Analyze what the user is asking for | |
# 2. Find the relevant information in the API response | |
# 3. Extract actual dates/times from the API data | |
# 4. Convert technical information to simple language | |
# 5. Respond warmly and helpfully | |
# === TONE AND LANGUAGE === | |
# English responses: | |
# - Use phrases like: "Great!", "Perfect!", "All set!", "Here's what I found:" | |
# - Be conversational and reassuring | |
# Arabic responses: | |
# - Use phrases like: "ممتاز!", "رائع!", "تمام!", "إليك ما وجدته:" | |
# - Be warm and helpful in Arabic style | |
# === IMPORTANT REMINDERS === | |
# - NEVER use example dates from this prompt | |
# - ALWAYS extract dates from the actual API response data | |
# - If no dates exist in API response, don't mention any dates | |
# - Stay focused on answering the user's specific question | |
# - Use only information that exists in the API response | |
# Now, carefully analyze the API response above and generate a helpful response to the user's query using ONLY the actual data provided. | |
# """, | |
# input_variables=["user_query", "api_response", "detected_language", "sentiment_analysis"] | |
# ) | |
# Create chains | |
self.intent_chain = LLMChain(llm=self.llm, prompt=self.intent_classifier_template) | |
self.router_chain = LLMChain(llm=self.llm, prompt=self.router_prompt_template) | |
self.conversation_chain = LLMChain(llm=self.llm, prompt=self.conversation_template) | |
self.api_response_chain = LLMChain(llm=self.llm, prompt=self.user_response_template) | |
def detect_language(self, text): | |
"""Detect language of the input text""" | |
if self.language_classifier and len(text.strip()) > 3: | |
try: | |
result = self.language_classifier(text) | |
detected_lang = result[0][0]['label'] | |
confidence = result[0][0]['score'] | |
if detected_lang in ['ar', 'arabic']: | |
return "arabic" | |
elif detected_lang in ['en', 'english']: | |
return "english" | |
elif confidence > 0.8: | |
return "english" # Default to English for unsupported languages | |
except: | |
pass | |
# Fallback: Basic Arabic detection | |
arabic_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF]+') | |
if arabic_pattern.search(text): | |
return "arabic" | |
return "english" | |
def analyze_sentiment(self, text): | |
"""Analyze sentiment of the text""" | |
if self.sentiment_analyzer and len(text.strip()) > 3: | |
try: | |
result = self.sentiment_analyzer(text) | |
return { | |
"sentiment": result[0]['label'], | |
"score": result[0]['score'] | |
} | |
except: | |
pass | |
return {"sentiment": "NEUTRAL", "score": 0.5} | |
def extract_keywords(self, text): | |
"""Extract keywords from text""" | |
# Simple keyword extraction | |
words = re.findall(r'\b\w+\b', text.lower()) | |
# Filter out common words and keep meaningful ones | |
stopwords = {'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were'} | |
keywords = [w for w in words if len(w) > 3 and w not in stopwords] | |
return list(set(keywords))[:5] # Return top 5 unique keywords | |
def get_conversation_context(self): | |
"""Get recent conversation history as context""" | |
if not self.conversation_history: | |
return "No previous conversation" | |
context = [] | |
for item in self.conversation_history[-3:]: # Last 3 exchanges | |
context.append(f"User: {item['user_message']}") | |
context.append(f"Bot: {item['bot_response'][:100]}...") # Truncate long responses | |
return " | ".join(context) | |
def add_to_history(self, user_message, bot_response, response_type): | |
"""Add exchange to conversation history""" | |
self.conversation_history.append({ | |
'timestamp': datetime.now(), | |
'user_message': user_message, | |
'bot_response': bot_response, | |
'response_type': response_type | |
}) | |
# Keep only recent history | |
if len(self.conversation_history) > self.max_history_length: | |
self.conversation_history = self.conversation_history[-self.max_history_length:] | |
def classify_intent(self, user_query, detected_language): | |
"""Classify if the user query requires API action or is conversational""" | |
try: | |
result = self.intent_chain.invoke({ | |
"user_query": user_query, | |
"detected_language": detected_language, | |
"conversation_history": self.get_conversation_context(), | |
"endpoints_documentation": json.dumps(self.endpoints_documentation, indent=2) | |
}) | |
# Parse the JSON response | |
intent_text = result["text"] | |
# Clean and parse JSON | |
cleaned_response = re.sub(r'//.*?$', '', intent_text, flags=re.MULTILINE) | |
cleaned_response = re.sub(r'/\*.*?\*/', '', cleaned_response, flags=re.DOTALL) | |
cleaned_response = re.sub(r',(\s*[}\]])', r'\1', cleaned_response) | |
try: | |
intent_data = json.loads(cleaned_response) | |
return intent_data | |
except json.JSONDecodeError: | |
# Try to extract JSON from the response | |
json_match = re.search(r'\{.*?\}', cleaned_response, re.DOTALL) | |
if json_match: | |
intent_data = json.loads(json_match.group(0)) | |
return intent_data | |
else: | |
# Default classification if parsing fails | |
return { | |
"intent": "CONVERSATION", | |
"confidence": 0.5, | |
"reasoning": "Failed to parse LLM response", | |
"requires_backend": False | |
} | |
except Exception as e: | |
print(f"Error in intent classification: {e}") | |
return { | |
"intent": "CONVERSATION", | |
"confidence": 0.5, | |
"reasoning": f"Error in classification: {str(e)}", | |
"requires_backend": False | |
} | |
def handle_conversation(self, user_query, detected_language, sentiment_result): | |
"""Handle conversational responses""" | |
try: | |
result = self.conversation_chain.invoke({ | |
"user_query": user_query, | |
"detected_language": detected_language, | |
"sentiment_analysis": json.dumps(sentiment_result), | |
"conversation_history": self.get_conversation_context() | |
}) | |
return result["text"].strip() | |
except Exception as e: | |
# Fallback response | |
if detected_language == "arabic": | |
return "أعتذر، واجهت مشكلة في المعالجة. كيف يمكنني مساعدتك؟" | |
else: | |
return "I apologize, I encountered a processing issue. How can I help you?" | |
def backend_call(self, data: Dict[str, Any]) -> Dict[str, Any]: | |
"""Make API call to backend with retry logic""" | |
endpoint_url = data.get('endpoint') | |
endpoint_method = data.get('method') | |
endpoint_params = data.get('params', {}).copy() | |
print('Sending the api request') | |
print(f"🔗 Making API call to {endpoint_method} {self.BASE_URL + endpoint_url} with params: {endpoint_params}") | |
# Inject patient_id if needed | |
if 'patient_id' in endpoint_params: | |
endpoint_params['patient_id'] = self.user_id | |
retries = 0 | |
response = None | |
while retries < self.max_retries: | |
try: | |
if endpoint_method.upper() == 'GET': | |
response = requests.get( | |
self.BASE_URL + endpoint_url, | |
params=endpoint_params, | |
headers=self.headers, | |
timeout=10 | |
) | |
elif endpoint_method.upper() in ['POST', 'PUT', 'DELETE']: | |
response = requests.request( | |
endpoint_method.upper(), | |
self.BASE_URL + endpoint_url, | |
json=endpoint_params, | |
headers=self.headers, | |
timeout=10 | |
) | |
response.raise_for_status() | |
print('Backend Response : ', response.json()) | |
return response.json() | |
except requests.exceptions.RequestException as e: | |
retries += 1 | |
if retries >= self.max_retries: | |
return { | |
"error": "Backend API call failed after multiple retries", | |
"details": str(e), | |
"status_code": getattr(e.response, 'status_code', None) if hasattr(e, 'response') else None | |
} | |
time.sleep(self.retry_delay) | |
def parse_relative_date(self, text, detected_language): | |
""" | |
Parse relative dates from text using a combination of methods | |
""" | |
today = datetime.now() | |
# Handle common relative date patterns in English and Arabic | |
tomorrow_patterns = { | |
'english': [r'\btomorrow\b', r'\bnext day\b'], | |
'arabic': [r'\bغدا\b', r'\bبكرة\b', r'\bغدًا\b', r'\bالغد\b'] | |
} | |
next_week_patterns = { | |
'english': [r'\bnext week\b'], | |
'arabic': [r'\bالأسبوع القادم\b', r'\bالأسبوع المقبل\b', r'\bالاسبوع الجاي\b'] | |
} | |
# Check for "tomorrow" patterns | |
for pattern in tomorrow_patterns.get(detected_language, []) + tomorrow_patterns.get('english', []): | |
if re.search(pattern, text, re.IGNORECASE): | |
return (today + timedelta(days=1)).strftime('%Y-%m-%dT%H:%M:%S') | |
# Check for "next week" patterns | |
for pattern in next_week_patterns.get(detected_language, []) + next_week_patterns.get('english', []): | |
if re.search(pattern, text, re.IGNORECASE): | |
return (today + timedelta(days=7)).strftime('%Y-%m-%dT%H:%M:%S') | |
# If NER model is available, use it to extract date entities | |
if self.date_parser and detected_language == 'english': | |
try: | |
date_entities = self.date_parser(text) | |
for entity in date_entities: | |
if entity['entity_group'] == 'DATE': | |
# Here you would need more complex date parsing logic | |
# This is just a placeholder | |
print(f"Found date entity: {entity['word']}") | |
# For now, just default to tomorrow if we detect any date | |
return (today + timedelta(days=1)).strftime('%Y-%m-%dT%H:%M:%S') | |
except Exception as e: | |
print(f"Error in date parsing: {e}") | |
# Default return None if no date pattern is recognized | |
return None | |
def handle_api_action(self, user_query, detected_language, sentiment_result, keywords): | |
"""Handle API-based actions""" | |
try: | |
# parsed_date = self.parse_relative_date(user_query, detected_language) | |
# if parsed_date: | |
# print(f"Parsed relative date: {parsed_date}") | |
# Route the query to determine API endpoint | |
router_result = self.router_chain.invoke({ | |
"endpoints_documentation": json.dumps(self.endpoints_documentation, indent=2), | |
"user_query": user_query, | |
"detected_language": detected_language, | |
"extracted_keywords": ", ".join(keywords), | |
"sentiment_analysis": json.dumps(sentiment_result), | |
"conversation_history": self.get_conversation_context(), | |
"current_datetime": datetime.now().strftime('%Y-%m-%dT%H:%M:%S'), | |
"timezone": "UTC", | |
"user_locale": "en-US" | |
}) | |
# Parse router response | |
route_text = router_result["text"] | |
# cleaned_response = re.sub(r'//.*?$', '', route_text, flags=re.MULTILINE) | |
# cleaned_response = re.sub(r'/\*.*?\*/', '', cleaned_response, flags=re.DOTALL) | |
# cleaned_response = re.sub(r',(\s*[}\]])', r'\1', cleaned_response) | |
# try: | |
# parsed_route = json.loads(cleaned_response) | |
# except json.JSONDecodeError: | |
# json_match = re.search(r'\{.*?\}', cleaned_response, re.DOTALL) | |
# if json_match: | |
# parsed_route = json.loads(json_match.group(0)) | |
# else: | |
# raise ValueError("Could not parse routing response") | |
# print(f"🔍 Parsed route: {parsed_route}") | |
cleaned_response = route_text | |
# Remove any comments (both single-line and multi-line) | |
cleaned_response = re.sub(r'//.*?$', '', cleaned_response, flags=re.MULTILINE) | |
cleaned_response = re.sub(r'/\*.*?\*/', '', cleaned_response, flags=re.DOTALL) | |
# Remove any trailing commas | |
cleaned_response = re.sub(r',(\s*[}\]])', r'\1', cleaned_response) | |
# Try different methods to parse the JSON response | |
try: | |
# First attempt: direct JSON parsing of cleaned response | |
parsed_route = json.loads(cleaned_response) | |
except json.JSONDecodeError: | |
try: | |
# Second attempt: extract JSON from markdown code block | |
json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', cleaned_response, re.DOTALL) | |
if json_match: | |
parsed_route = json.loads(json_match.group(1)) | |
except (json.JSONDecodeError, AttributeError): | |
try: | |
# Third attempt: find JSON-like content using regex | |
json_pattern = r'\{\s*"endpoint"\s*:.*?\}' | |
json_match = re.search(json_pattern, cleaned_response, re.DOTALL) | |
if json_match: | |
json_str = json_match.group(0) | |
# Additional cleaning for the extracted JSON | |
json_str = re.sub(r'//.*?$', '', json_str, flags=re.MULTILINE) | |
json_str = re.sub(r',(\s*[}\]])', r'\1', json_str) | |
parsed_route = json.loads(json_str) | |
except (json.JSONDecodeError, AttributeError): | |
print(f"Failed to parse JSON. Raw response: {route_text}") | |
print(f"Cleaned response: {cleaned_response}") | |
raise ValueError("Could not extract valid JSON from LLM response") | |
if not parsed_route: | |
raise ValueError("Failed to parse LLM response into valid JSON") | |
# Replace any placeholder values and inject parsed dates if available | |
if 'params' in parsed_route: | |
if 'patient_id' in parsed_route['params']: | |
parsed_route['params']['patient_id'] = self.user_id | |
else: | |
parsed_route['params']['patient_id'] = self.user_id | |
# Inject parsed date if available and a date parameter exists | |
# date_params = ['appointment_date', 'date', 'schedule_date', 'date_time', 'new_date_time'] | |
# if parsed_date: | |
# for param in date_params: | |
# if param in parsed_route['params']: | |
# parsed_route['params'][param] = parsed_date | |
print('Parsed route: ', parsed_route) | |
# Make backend API call | |
api_response = self.backend_call(parsed_route) | |
print("🔗 API response received:", api_response) | |
# Generate user-friendly response | |
user_response_result = self.api_response_chain.invoke({ | |
"user_query": user_query, | |
"api_response": json.dumps(api_response, indent=2), | |
"detected_language": detected_language, | |
"sentiment_analysis": json.dumps(sentiment_result), | |
}) | |
print("🔗 API response:", user_response_result["text"].strip()) | |
return { | |
"response": user_response_result["text"].strip(), | |
"api_data": api_response, | |
"routing_info": parsed_route | |
} | |
except Exception as e: | |
# Fallback error response | |
if detected_language == "arabic": | |
error_msg = "أعتذر، لم أتمكن من معالجة طلبك. يرجى المحاولة مرة أخرى أو صياغة السؤال بطريقة مختلفة." | |
else: | |
error_msg = "I apologize, I couldn't process your request. Please try again or rephrase your question." | |
return { | |
"response": error_msg, | |
"api_data": {"error": str(e)}, | |
"routing_info": None | |
} | |
def chat(self, user_message: str) -> ChatResponse: | |
"""Main chat method that handles user messages""" | |
start_time = time.time() | |
# Check for exit commands | |
exit_commands = ['quit', 'exit', 'bye', 'خروج', 'وداعا', 'مع السلامة'] | |
if user_message.lower().strip() in exit_commands: | |
return ChatResponse( | |
response_id=f"resp_{int(time.time())}", | |
response_type="conversation", | |
message="Goodbye! Take care of your health! / وداعاً! اعتن بصحتك!", | |
language="bilingual" | |
) | |
try: | |
# Language detection and analysis | |
detected_language = self.detect_language(user_message) | |
sentiment_result = self.analyze_sentiment(user_message) | |
keywords = self.extract_keywords(user_message) | |
print(f"🔍 Language: {detected_language} | Sentiment: {sentiment_result['sentiment']} | Keywords: {keywords}") | |
# Classify intent | |
intent_data = self.classify_intent(user_message, detected_language) | |
print(f"🎯 Intent: {intent_data['intent']} (confidence: {intent_data.get('confidence', 'N/A')})") | |
# Handle based on intent | |
if intent_data["intent"] == "API_ACTION" and intent_data.get("requires_backend", False): | |
# Handle API-based actions | |
print("🔗 Processing API action...") | |
action_result = self.handle_api_action(user_message, detected_language, sentiment_result, keywords) | |
# print(action_result) | |
response = ChatResponse( | |
response_id=f"resp_{int(time.time())}", | |
response_type="api_action", | |
message=action_result["response"], | |
api_call_made=True, | |
api_data=json.dumps(action_result["api_data"]) if 'action_result' in action_result else None, | |
language=detected_language | |
) | |
else: | |
# Handle conversational responses | |
print("💬 Processing conversational response...") | |
conv_response = self.handle_conversation(user_message, detected_language, sentiment_result) | |
response = ChatResponse( | |
response_id=f"resp_{int(time.time())}", | |
response_type="conversation", | |
message=conv_response, | |
api_call_made=False, | |
language=detected_language | |
) | |
# Add to conversation history | |
self.add_to_history(user_message, response.message, response.response_type) | |
print(f"⏱️ Processing time: {time.time() - start_time:.2f}s") | |
return response | |
except Exception as e: | |
print(f"❌ Error in chat processing: {e}") | |
error_msg = "I apologize for the technical issue. Please try again. / أعتذر عن المشكلة التقنية. يرجى المحاولة مرة أخرى." | |
return ChatResponse( | |
response_id=f"resp_{int(time.time())}", | |
response_type="conversation", | |
message=error_msg, | |
api_call_made=False, | |
language="bilingual" | |
) | |
def start_interactive_chat(self): | |
"""Start an interactive chat session""" | |
print("🚀 Starting interactive chat session...") | |
while True: | |
try: | |
# Get user input | |
user_input = input("\n👤 You: ").strip() | |
if not user_input: | |
continue | |
# Process the message | |
print("🤖 Processing...") | |
response = self.chat(user_input) | |
# Display response | |
print(f"\n🏥 Healthcare Bot: {response.message}") | |
# Show additional info if API call was made | |
if response.api_call_made and response.api_data: | |
if "error" not in response.api_data: | |
print("✅ Successfully retrieved information from healthcare system") | |
else: | |
print("⚠️ There was an issue accessing the healthcare system") | |
# Check for exit | |
if "Goodbye" in response.message or "وداعاً" in response.message: | |
break | |
except KeyboardInterrupt: | |
print("\n\n👋 Chat session ended. Goodbye!") | |
break | |
except Exception as e: | |
print(f"\n❌ Unexpected error: {e}") | |
print("The chat session will continue...") | |
# Create a simple function to start the chatbot | |
# def start_healthcare_chatbot(): | |
# """Initialize and start the healthcare chatbot""" | |
# try: | |
# chatbot = HealthcareChatbot() | |
# chatbot.start_interactive_chat() | |
# except Exception as e: | |
# print(f"Failed to start chatbot: {e}") | |
# print("Please check your Ollama installation and endpoint documentation.") | |
# Test the chatbot | |
# if __name__ == "__main__": | |
# You can test individual messages like this: | |
# chatbot = HealthcareChatbot() | |
# Test conversational message | |
# print("\n=== TESTING CONVERSATIONAL MESSAGE ===") | |
# conv_response = chatbot.chat("Hello, how are you today?") | |
# print(f"Response: {conv_response.message}") | |
# print(f"Type: {conv_response.response_type}") | |
# Test API action message | |
# print("\n=== TESTING API ACTION MESSAGE ===") | |
# api_response = chatbot.chat("I want to book an appointment tomorrow at 2 PM") | |
# print(f"Response: {api_response.message}") | |
# print(f"Type: {api_response.response_type}") | |
# print(f"API Called: {api_response.api_call_made}") | |
# Start interactive session (uncomment to run) | |
# start_healthcare_chatbot() | |
# Fast api section | |
from fastapi import FastAPI, HTTPException | |
from pydantic import BaseModel | |
from typing import Dict, Any, Optional | |
app = FastAPI( | |
title="Healthcare AI Assistant", | |
description="An AI-powered healthcare assistant that handles appointment booking and queries", | |
version="1.0.0" | |
) | |
# Initialize the AI agent | |
agent = HealthcareChatbot() | |
class QueryRequest(BaseModel): | |
query: str | |
class QueryResponse(BaseModel): | |
routing_info: Dict[str, Any] | |
api_response: Dict[str, Any] | |
user_friendly_response: str | |
detected_language: str | |
sentiment: Dict[str, Any] | |
async def process_query(request: QueryRequest): | |
""" | |
Process a user query and return a response | |
""" | |
try: | |
response = agent.chat(request.query) | |
return response | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=str(e)) | |
async def health_check(): | |
""" | |
Health check endpoint | |
""" | |
return {"status": "healthy", "service": "healthcare-ai-assistant"} | |
async def root(): | |
return {"message": "Hello World"} | |
# if __name__ == "__main__": | |
# import uvicorn | |
# uvicorn.run(app, host="0.0.0.0", port=8000) |