Spaces:

Rabbit-Innotech
/

GBVR_Chatbot

Sleeping

App Files Files Community

Rabbit-Innotech commited on Apr 25

Commit

bd67e9b

verified ·

1 Parent(s): 5f75e1c

Update app.py

Browse files

Files changed (1) hide show

app.py +327 -663

app.py CHANGED Viewed

@@ -1,284 +1,93 @@
 import os
-import requests
 import time
-from io import BytesIO
-from typing import Iterator, List, Dict, Any, Optional
-from urllib.parse import urljoin, urlparse
-# Data processing imports
 import pandas as pd
-import PyPDF2
-from PyPDF2 import PdfReader
-from bs4 import BeautifulSoup
-# AI and NLP imports
-from openai import OpenAI
 from langchain_huggingface import HuggingFaceEmbeddings
-from langchain_chroma import Chroma
 from langchain_core.prompts import PromptTemplate
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnablePassthrough
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.schema import Document
-# UI import
 import gradio as gr
-class DataProcessor:
-    """Handles processing of various data sources including CSV, Excel, PDF, and web content."""
-    def __init__(self, folder_path: str = "./"):
-        self.folder_path = folder_path
-        self.files = os.listdir(folder_path)
-    def process_tabular_data(self) -> List[str]:
-        """Process CSV and Excel files to extract data."""
-        context_data = []
-        data_files = [f for f in self.files if f.endswith(('.csv', '.xlsx', '.xls'))]
-        for f, file in enumerate(data_files, 1):
-            print(f"\nProcessing file {f}: {file}")
-            file_path = os.path.join(self.folder_path, file)
-            try:
-                # Read file based on extension
-                if file.endswith('.csv'):
-                    df = pd.read_csv(file_path)
-                else:
-                    df = pd.read_excel(file_path)
-                # Extract non-empty values from column 2
-                context_data.extend(df.iloc[:, 2].dropna().astype(str).tolist())
-            except Exception as e:
-                print(f"Error processing file {file}: {str(e)}")
-        return context_data
-    def extract_text_from_pdf(self, pdf_path: str) -> str:
-        """Extract text content from a PDF file."""
-        try:
-            with open(pdf_path, "rb") as file:
-                reader = PyPDF2.PdfReader(file)
-                return "".join(page.extract_text() or "" for page in reader.pages)
-        except Exception as e:
-            print(f"Error with {pdf_path}: {e}")
-            return ""
-    def process_pdf_files(self) -> List[Document]:
-        """Process all PDF files and return documents."""
-        pdf_files = [f for f in self.files if f.lower().endswith(".pdf")]
-        documents = []
-        for file in pdf_files:
-            print(f"Processing: {file}")
-            pdf_path = os.path.join(self.folder_path, file)
-            text = self.extract_text_from_pdf(pdf_path)
-            if text:
-                documents.append(Document(page_content=text, metadata={"source": file}))
-        return documents
-    def split_documents(self, documents: List[Document], chunk_size: int = 500) -> List[str]:
-        """Split documents into manageable chunks."""
-        text_splitter = RecursiveCharacterTextSplitter(
-            separators=['\n\n', '\n', '.', ','],
-            chunk_size=chunk_size,
-            chunk_overlap=50
-        )
-        chunks = text_splitter.split_documents(documents)
-        return [chunk.page_content for chunk in chunks]
-    def extract_pdf_text_from_url(self, pdf_url: str) -> Optional[str]:
-        """Extract text from a PDF URL."""
-        try:
-            response = requests.get(pdf_url)
-            response.raise_for_status()
-            with BytesIO(response.content) as file:
-                reader = PdfReader(file)
-                pdf_text = ""
-                for page in reader.pages:
-                    pdf_text += page.extract_text()
-            return pdf_text if pdf_text else None
-        except requests.exceptions.RequestException as e:
-            print(f"Error fetching PDF {pdf_url}: {e}")
-            return None
-        except Exception as e:
-            print(f"Error reading PDF {pdf_url}: {e}")
-            return None
-class WebScraper:
-    """Web scraping functionality for collecting data from websites."""
-    def scrape_websites(self, base_urls: List[str]) -> Dict[str, str]:
-        """Scrape content from a list of base URLs and their internal links."""
-        try:
-            visited_links = set()
-            content_by_url = {}
-            for base_url in base_urls:
-                if not base_url.strip():
-                    continue
-                print(f"Scraping base URL: {base_url}")
-                html_content = self.fetch_page_content(base_url)
-                if html_content:
-                    cleaned_content = self.clean_body_content(html_content)
-                    content_by_url[base_url] = cleaned_content
-                    visited_links.add(base_url)
-                    # Extract and process internal links
-                    soup = BeautifulSoup(html_content, "html.parser")
-                    links = self.extract_internal_links(base_url, soup)
-                    for link in links:
-                        if link not in visited_links:
-                            print(f"Scraping link: {link}")
-                            page_content = self.fetch_page_content(link)
-                            if page_content:
-                                cleaned_content = self.clean_body_content(page_content)
-                                content_by_url[link] = cleaned_content
-                                visited_links.add(link)
-                            # Extract PDF content if link is a PDF
-                            if link.lower().endswith('.pdf'):
-                                print(f"Extracting PDF content from: {link}")
-                                pdf_processor = DataProcessor()
-                                pdf_content = pdf_processor.extract_pdf_text_from_url(link)
-                                if pdf_content:
-                                    content_by_url[link] = pdf_content
-            return content_by_url
-        except Exception as e:
-            print(f"Error during scraping: {e}")
-            return {}
-    def fetch_page_content(self, url: str) -> Optional[str]:
-        """Fetch HTML content from a URL."""
-        try:
-            response = requests.get(url, timeout=10)
-            response.raise_for_status()
-            return response.text
-        except requests.exceptions.RequestException as e:
-            print(f"Error fetching {url}: {e}")
-            return None
-    def extract_internal_links(self, base_url: str, soup: BeautifulSoup) -> set:
-        """Extract internal links from a BeautifulSoup object."""
-        links = set()
-        for anchor in soup.find_all("a", href=True):
-            href = anchor["href"]
-            full_url = urljoin(base_url, href)
-            if self.is_internal_link(base_url, full_url):
-                links.add(full_url)
-        return links
-    def is_internal_link(self, base_url: str, link_url: str) -> bool:
-        """Check if a link is internal to the base URL."""
-        base_netloc = urlparse(base_url).netloc
-        link_netloc = urlparse(link_url).netloc
-        return base_netloc == link_netloc
-    def clean_body_content(self, html_content: str) -> str:
-        """Clean HTML content to extract useful text."""
-        soup = BeautifulSoup(html_content, "html.parser")
-        # Remove scripts and styles
-        for script_or_style in soup(["script", "style"]):
-            script_or_style.extract()
-        # Get text and clean up
-        cleaned_content = soup.get_text(separator="\n")
-        cleaned_content = "\n".join(
-            line.strip() for line in cleaned_content.splitlines() if line.strip()
-        )
-        return cleaned_content
-class TranslationService:
-    """Translation service using Hugging Face API."""
-    def __init__(self, api_token: str, model_name: str = "facebook/nllb-200-distilled-600M"):
-        self.model_name = model_name
-        self.url = f"https://api-inference.huggingface.co/models/{model_name}"
-        self.headers = {"Authorization": f"Bearer {api_token}"}
-    def translate_text(self, text: str, src_lang: str, tgt_lang: str) -> str:
-        """Translate text using Hugging Face API."""
         try:
-            response = requests.post(
-                self.url,
-                headers=self.headers,
-                json={
-                    "inputs": text,
-                    "parameters": {
-                        "src_lang": src_lang,
-                        "tgt_lang": tgt_lang
-                    }
-                }
-            )
-            if response.status_code == 200:
-                result = response.json()
-                if isinstance(result, list) and len(result) > 0:
-                    return result[0]['translation_text']
-                return result['translation_text']
             else:
-                print(f"Translation error: {response.status_code}, {response.text}")
-                return text  # Return original text if translation fails
         except Exception as e:
-            print(f"Translation error: {e}")
-            return text  # Return original text if translation fails
-class OpenRouterLLM:
-    """LLM service using OpenRouter API."""
-    def __init__(self, key: str):
-        try:
-            self.client = OpenAI(
-                base_url="https://openrouter.ai/api/v1",
-                api_key=key
-            )
-            self.headers = {
-                "HTTP-Referer": "http://localhost:3000",
-                "X-Title": "Local Development"
-            }
-        except Exception as e:
-            print(f"Initialization error: {e}")
-            raise
-    def stream(self, prompt: str) -> Iterator[str]:
-        """Stream response from LLM."""
-        try:
-            completion = self.client.chat.completions.create(
-               # model="meta-llama/llama-3.3-70b-instruct:free",
-                model="meta-llama/llama-4-maverick:free",
-                messages=[{"role": "user", "content": prompt}],
-                stream=True
-            )
-            for chunk in completion:
-                delta = chunk.choices[0].delta
-                if hasattr(delta, "content") and delta.content:
-                    yield delta.content
-        except Exception as e:
-            yield f"Streaming error: {str(e)}"
 class UserSession:
-    """Manage user session information and conversation history."""
-    def __init__(self, llm: OpenRouterLLM):
         self.current_user = None
         self.welcome_message = None
         self.conversation_history = []
         self.llm = llm
-    def set_user(self, user_info: Dict[str, Any]) -> None:
-        """Set current user and initialize welcome message."""
         self.current_user = user_info
         self.set_welcome_message(user_info.get("Nickname", "Guest"))
         # Initialize conversation history with welcome message
@@ -286,454 +95,309 @@ class UserSession:
         self.conversation_history = [
             {"role": "assistant", "content": welcome},
         ]
-    def get_user(self) -> Dict[str, Any]:
-        """Get current user information."""
         return self.current_user
-    def set_welcome_message(self, nickname: str, src_lang: str = "eng_Latn", tgt_lang: str = "kin_Latn") -> None:
         """Set a dynamic welcome message using the LLM."""
         prompt = (
-            f"Create a very brief welcome message for {nickname}. "
             f"The message should: "
             f"1. Welcome {nickname} warmly and professionally. "
             f"2. Emphasize that this is a safe and trusted space. "
             f"3. Highlight specialized support for gender-based violence (GBV) and legal assistance. "
             f"4. Use a tone that is warm, reassuring, and professional. "
-            f"5. Keep the message concise and impactful."
         )
-        try:
-            # Use the LLM to generate the message
-            welcome = "".join(list(self.llm.stream(prompt)))
-            # Get translation service and translate welcome message
-            api_token = os.environ.get('Token')
-            if not api_token:
-                self.welcome_message = f"Welcome {nickname}! This is a safe space where you can find support and resources."
-                return
-            translator = TranslationService(api_token)
-            welcome_text = translator.translate_text(welcome, src_lang, tgt_lang)
-            # Format the message with HTML styling
-            self.welcome_message = welcome_text
-        except Exception as e:
-            print(f"Error generating welcome message: {e}")
-            self.welcome_message = f"Welcome {nickname}! This is a safe space where you can find support and resources."
-    def get_welcome_message(self) -> str:
-        """Get the welcome message."""
-        return self.welcome_message or "Welcome! This is a safe space where you can find support."
-    def add_to_history(self, role: str, message: str) -> None:
-        """Add a message to the conversation history."""
         self.conversation_history.append({"role": role, "content": message})
-    def get_conversation_history(self) -> List[Dict[str, str]]:
-        """Get the full conversation history."""
         return self.conversation_history
-    def get_formatted_history(self) -> str:
-        """Get conversation history formatted as a string for the LLM."""
         formatted_history = ""
         for entry in self.conversation_history:
             role = "User" if entry["role"] == "user" else "Assistant"
             formatted_history += f"{role}: {entry['content']}\n\n"
         return formatted_history
-class GBVSupportChatbot:
-    """Main chatbot application class."""
-    def __init__(self):
-        self.api_key = os.environ.get('A1')
-        self.api_token = os.environ.get('Token')
-        # Add fallback for missing environment variables
-        if not self.api_key:
-            print("Warning: V2 API key not found in environment variables.")
-            self.api_key = "demo_key"  # Use a placeholder value
-        if not self.api_token:
-            print("Warning: Token not found in environment variables.")
-            self.api_token = "demo_token"  # Use a placeholder value
-        self.llm_instance = OpenRouterLLM(key=self.api_key)
-        self.user_session = UserSession(self.llm_instance)
-        self.translator = TranslationService(self.api_token)
-        # Initialize embedding model
-        try:
-            self.embed_model = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")
-            # Initialize vector store
-            self.vectorstore = Chroma(
-                collection_name="Dataset",
-                embedding_function=self.embed_model,
-            )
-        except Exception as e:
-            print(f"Error initializing embeddings: {e}")
-            # Create a simple placeholder for vectorstore if initialization fails
-            self.vectorstore = None
-        # Template for GBV support chatbot
-        self.template = """
-        You are a compassionate and supportive AI assistant specializing in helping individuals affected by Gender-Based Violence (GBV). Your responses must be based EXCLUSIVELY on the information provided in the context. Your primary goal is to provide emotionally intelligent support while maintaining appropriate boundaries.
-        **Previous conversation:** {conversation_history}
-        **Context information:** {context}
-        **User's Question:** {question}
-        When responding follow these guidelines:
-        1. **Strict Context Adherence**
-           - Only use information that appears in the provided {context}
-           - If the answer is not found in the context, state "I don't have that information in my available resources" rather than generating a response
-        2. **Personalized Communication**
-           - Avoid contractions (e.g., use I am instead of I'm)
-           - Incorporate thoughtful pauses or reflective questions when the conversation involves difficult topics
-           - Use selective emojis (😊, 🤗, ❤️) only when tone-appropriate and not during crisis discussions
-           - Balance warmth with professionalism
-        3. **Emotional Intelligence**
-           - Validate feelings without judgment
-           - Offer reassurance when appropriate, always centered on empowerment
-           - Adjust your tone based on the emotional state conveyed
-        4. **Conversation Management**
-           - Refer to {conversation_history} to maintain continuity and avoid repetition
-           - Use clear paragraph breaks for readability
-        5. **Information Delivery**
-           - Extract only relevant information from {context} that directly addresses the question
-           - Present information in accessible, non-technical language
-           - When information is unavailable, respond with: "I don't have that specific information right now, {first_name}. Would it be helpful if I focus on [alternative support option]?"
-        6. **Safety and Ethics**
-           - Do not generate any speculative content or advice not supported by the context
-           - If the context contains safety information, prioritize sharing that information
-        Your response must come entirely from the provided context, maintaining the supportive tone while never introducing information from outside the provided materials.
-        **Context:** {context}
-        **User's Question:** {question}
-        **Your Response:**
-        """
-    def load_data(self) -> None:
-        """Load and process all data sources."""
-        if not self.vectorstore:
-            print("Warning: Vector store not initialized. Skipping data loading.")
-            return
-        try:
-            # Process all data sources
-            data_processor = DataProcessor()
-            context_data = data_processor.process_tabular_data()
-            # Process PDFs
-            pdf_documents = data_processor.process_pdf_files()
-            text_chunks = data_processor.split_documents(pdf_documents)
-            # Combine all data
-            all_data = []
-            all_data.extend(context_data)
-            all_data.extend([item for item in text_chunks if item not in all_data])
-            if all_data:
-                # Add data to vector store
-                self.vectorstore.add_texts(all_data)
-            else:
-                print("Warning: No data found to load into vector store.")
-        except Exception as e:
-            print(f"Error loading data: {e}")
-    def create_rag_chain(self):
-        """Create RAG chain with user context and conversation history."""
-        try:
-            if self.vectorstore:
-                retriever = self.vectorstore.as_retriever()
-            else:
-                # Create a simple fallback if vectorstore is not available
-                retriever = FallbackRetriever()
-            rag_prompt = PromptTemplate.from_template(self.template)
-            def stream_func(input_dict):
-                try:
-                    # Get context using the retriever's invoke method
-                    if self.vectorstore:
-                        context = retriever.invoke(input_dict["question"])
-                        context_str = "\n".join([doc.page_content for doc in context])
-                    else:
-                        context_str = "No specific information available on this topic."
-                    # Get user info from the session
-                    user_info = self.user_session.get_user() or {}
-                    first_name = user_info.get("Nickname", "User")
-                    # Get conversation history
-                    conversation_history = self.user_session.get_formatted_history()
-                    # Format prompt with user context and conversation history
-                    prompt = rag_prompt.format(
-                        context=context_str,
-                        question=input_dict["question"],
-                        first_name=first_name,
-                        conversation_history=conversation_history
-                    )
-                    # Stream response
-                    return self.llm_instance.stream(prompt)
-                except Exception as e:
-                    print(f"Error in RAG chain: {e}")
-                    yield f"I apologize, but I'm having trouble processing your request. Please try again or rephrase your question."
-            return stream_func
-        except Exception as e:
-            print(f"Error creating RAG chain: {e}")
-            # Return a simple fallback function
-            def fallback_func(input_dict):
-                yield "I apologize, but I'm having technical difficulties. Please try again later."
-            return fallback_func
-    def collect_user_info(self, nickname: str):
-        """Collect and process user information."""
-        if not nickname:
-            return "Nickname is required to proceed.", gr.update(visible=False), gr.update(visible=True), []
-        # Store user info for chat session
-        user_info = {
-            "Nickname": nickname,
-            "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
         }
-        # Set user in session
-        self.user_session.set_user(user_info)
-        # Generate welcome message
-        welcome_message = self.user_session.get_welcome_message()
-        # Create welcome message in the new messages format for Gradio chatbot
-        chat_history = [{"role": "assistant", "content": welcome_message}]
-        # Return welcome message and update UI
-        return welcome_message, gr.update(visible=True), gr.update(visible=False), chat_history
-    def rag_memory_stream(self, message: str, history):
-        """Process user message, translate, and generate response."""
-        try:
-            # First, yield the current history to show user message
-            history_copy = history.copy()
-            history_copy.append({"role": "user", "content": message})
-            yield history_copy, ""
-            # Translate user message to English (from Kinyarwanda by default)
-            try:
-                english_message = self.translator.translate_text(message, "kin_Latn", "eng_Latn")
-            except Exception as e:
-                print(f"Translation error: {e}")
-                english_message = message  # Fallback to original message if translation fails
-            # Add translated message to history
-            self.user_session.add_to_history("user", english_message)
-            # Generate response using RAG chain
-            full_response = ""
-            rag_chain = self.create_rag_chain()
-            # Generate chunks of response and update as they come
-            for new_text in rag_chain({"question": english_message}):
-                full_response += new_text
-                # Translate response back to user language (Kinyarwanda by default)
-                try:
-                    translated_response = self.translator.translate_text(full_response, "eng_Latn", "kin_Latn")
-                except Exception as e:
-                    print(f"Translation error: {e}")
-                    translated_response = full_response  # Fallback to original message if translation fails
-                # Update history with current response
-                current_history = history_copy.copy()
-                current_history.append({"role": "assistant", "content": translated_response})
-                yield current_history, ""
-            # Add final response to session history
-            self.user_session.add_to_history("assistant", full_response)
-        except Exception as e:
-            print(f"Error in chat processing: {e}")
-            # Provide a fallback response if something goes wrong
-            error_history = history.copy()
-            error_history.append({"role": "user", "content": message})
-            error_history.append({
-                "role": "assistant",
-                "content": "I apologize, but I'm having trouble processing your request. Please try again."
-            })
-            yield error_history, ""
-    def create_chatbot_interface(self):
-        """Create and configure the chatbot UI."""
-        with gr.Blocks() as demo:
-            # User registration section
-            with gr.Column(visible=True, elem_id="registration_container") as registration_container:
-                gr.Markdown("### Your privacy matters to us! Just share a nickname you feel comfy with to start chatting..")
-                with gr.Row():
-                    first_name = gr.Textbox(
-                        label="Nickname",
-                        placeholder="Enter a nickname you feel comfortable with",
-                        scale=1,
-                        elem_id="input_nickname"
-                    )
-                with gr.Row():
-                    submit_btn = gr.Button("Start Chatting", variant="primary", scale=2)
-                response_message = gr.Markdown()
-            # Chatbot section (initially hidden)
-            with gr.Column(visible=False, elem_id="chatbot_container") as chatbot_container:
-                # Use the new messages format for the chatbot
-                chatbot = gr.Chatbot(
-                    label="Chat with GBVR",
-                    height=500,
-                    show_label=True,
-                    elem_id="chat_interface",
-                    type="messages"  # Use messages format instead of tuples
-                )
-                with gr.Row():
-                    msg = gr.Textbox(
-                        placeholder="Type your message here...",
-                        label="Your message",
-                        show_label=False,
-                        container=False,
-                        scale=7
-                    )
-                    send_btn = gr.Button("Send", variant="primary", scale=1)
-                # Configure event handlers
-                msg_event = msg.submit(
-                    self.rag_memory_stream,
-                    inputs=[msg, chatbot],
-                    outputs=[chatbot, msg]
-                )
-                send_event = send_btn.click(
-                    self.rag_memory_stream,
-                    inputs=[msg, chatbot],
-                    outputs=[chatbot, msg]
                 )
-                # Clear textbox after sending
-                msg_event.then(lambda: "", None, msg)
-                send_event.then(lambda: "", None, msg)
-                # Footer with version info
-                gr.Markdown("Ijwi ry'Ubufasha Chatbot v1.0.0 © 2025")
-            # Handle user registration
-            submit_btn.click(
-                self.collect_user_info,
-                inputs=[first_name],
-                outputs=[response_message, chatbot_container, registration_container, chatbot]
             )
-        # Add CSS styles
-        demo.css = """
-        :root {
-            --background: #f0f0f0;
-            --text: #000000;
-        }
-        body, .gradio-container {
-            margin: 0;
-            padding: 0;
-            width: 100%;
-            height: 100vh;
-            display: flex;
-            flex-direction: column;
-            justify-content: center;
-            align-items: center;
-            background: var(--background);
-            color: var(--text);
-        }
-        .gradio-container {
-            max-width: 100%;
-            max-height: 100%;
-        }
-        .gr-box {
-            background: var(--background);
-            color: var(--text);
-            border-radius: 12px;
-            padding: 2rem;
-            border: 1px solid rgba(0, 0, 0, 0.1);
-            box-shadow: 0 4px 6px rgba(0, 0, 0, 0.05);
-        }
-        .gr-button-primary {
-            background: var(--background);
-            color: var(--text);
-            padding: 12px 24px;
-            border-radius: 8px;
-            transition: all 0.3s ease;
-            border: 1px solid rgba(0, 0, 0, 0.1);
-        }
-        .gr-button-primary:hover {
-            transform: translateY(-1px);
-            box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2);
-        }
-        footer {
-            text-align: center;
-            color: var(--text);
-            opacity: 0.7;
-            padding: 1rem;
-            font-size: 0.9em;
-        }
-        .gr-markdown h3 {
-            color: var(--text);
-            margin-bottom: 1rem;
-        }
-        .registration-markdown, .chat-title h1 {
-            color: var(--text);
-        }
-        """
-        return demo
-# Fallback retriever class for when vectorstore is not available
-class FallbackRetriever:
-    def invoke(self, query):
-        # Return a list of document-like objects with empty content
-        return [Document(page_content="No specific information available on this topic.", metadata={})]
-# Main execution function
-def main():
-    # Initialize the chatbot
-    chatbot = GBVSupportChatbot()
-    try:
-        # Load data
-        chatbot.load_data()
-        # Create and launch the interface
-        demo = chatbot.create_chatbot_interface()
-        demo.launch(share=True)
-    except Exception as e:
-        print(f"Error in main execution: {e}")
 if __name__ == "__main__":
-    main()

 import os
 import time
 import pandas as pd
+import gradio as gr
+from langchain_groq import ChatGroq
 from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import Chroma
 from langchain_core.prompts import PromptTemplate
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnablePassthrough
+import os
+from langchain_groq import ChatGroq
+from langchain.prompts import ChatPromptTemplate, PromptTemplate
+from langchain.output_parsers import ResponseSchema, StructuredOutputParser
+from urllib.parse import urljoin, urlparse
+import requests
+from io import BytesIO
+from langchain_chroma import Chroma
+import requests
+from bs4 import BeautifulSoup
+from langchain_core.prompts import ChatPromptTemplate
 import gradio as gr
+from PyPDF2 import PdfReader
+groq_api_key= os.environ.get('grop_API_KEY')
+# Set up embedding model
+embed_model = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")
+# Process data from Drive
+def process_data_files():
+    folder_path = "./"
+    context_data = []
+    # Get list of data files
+    all_files = os.listdir(folder_path)
+    data_files = [f for f in all_files if f.lower().endswith(('.csv', '.xlsx', '.xls'))]
+    # Process each file
+    for index, file_name in enumerate(data_files, 1):
+        file_path = os.path.join(folder_path, file_name)
         try:
+            # Read file
+            if file_name.lower().endswith('.csv'):
+                df = pd.read_csv(file_path)
+            else:
+                df = pd.read_excel(file_path)
+            # Check if column 3 exists
+            if df.shape[1] > 2:
+                column_data = df.iloc[:, 2].dropna().astype(str).tolist()
+                # Each row becomes one chunk
+                for i, text in enumerate(column_data):
+                    context_data.append({"page_content": text, "metadata": {"source": file_name, "row": i+1}})
             else:
+                print(f"Warning: File {file_name} has fewer than 3 columns.")
         except Exception as e:
+            print(f"Error processing file {file_name}: {e}")
+    return context_data
+# Create vectorstore
+def create_vectorstore(data):
+    # Extract just the text content from each Document object in the list
+    cleaned_texts = [doc["page_content"] for doc in data]
+    metadatas = [doc["metadata"] for doc in data]
+    # Create vector store
+    vectorstore = Chroma(
+        collection_name="GBVRS",
+        embedding_function=embed_model,
+    )
+    # Add data to vector store
+    vectorstore.add_texts(cleaned_texts, metadatas=metadatas)
+    return vectorstore
+# User session management
 class UserSession:
+    def __init__(self, llm):
         self.current_user = None
         self.welcome_message = None
         self.conversation_history = []
         self.llm = llm
+    def set_user(self, user_info):
         self.current_user = user_info
         self.set_welcome_message(user_info.get("Nickname", "Guest"))
         # Initialize conversation history with welcome message
         self.conversation_history = [
             {"role": "assistant", "content": welcome},
         ]
+    def get_user(self):
         return self.current_user
+    def set_welcome_message(self, nickname):
         """Set a dynamic welcome message using the LLM."""
+        # Define a prompt for the LLM to generate a welcome message
         prompt = (
+            f"Create a very brief welcome message for {nickname} that fits in 3 lines. "
             f"The message should: "
             f"1. Welcome {nickname} warmly and professionally. "
             f"2. Emphasize that this is a safe and trusted space. "
             f"3. Highlight specialized support for gender-based violence (GBV) and legal assistance. "
             f"4. Use a tone that is warm, reassuring, and professional. "
+            f"5. Keep the message concise and impactful, ensuring it fits within the character limit."
         )
+        # Use the LLM to generate the message
+        response = self.llm.invoke(prompt)
+        welcome = response.content
+        # Format the message with HTML styling
+        self.welcome_message = (
+            f"<div style='font-size: 24px; font-weight: bold; color: #2E86C1;'>"
+            f"<div style='font-size: 20px;'>"
+            f"{welcome}"
+            f"</div>"
+        )
+    def get_welcome_message(self):
+        return self.welcome_message
+    def add_to_history(self, role, message):
+        """Add a message to the conversation history"""
         self.conversation_history.append({"role": role, "content": message})
+    def get_conversation_history(self):
+        """Get the full conversation history"""
         return self.conversation_history
+    def get_formatted_history(self):
+        """Get conversation history formatted as a string for the LLM"""
         formatted_history = ""
         for entry in self.conversation_history:
             role = "User" if entry["role"] == "user" else "Assistant"
             formatted_history += f"{role}: {entry['content']}\n\n"
         return formatted_history
+# Format context from documents
+def format_context(retrieved_docs):
+    return "\n".join([doc.page_content for doc in retrieved_docs])
+# RAG Chain creation with updated approach
+def create_rag_chain(retriever, template, api_key):
+    llm = ChatGroq(model="llama-3.3-70b-versatile", api_key=api_key)
+    rag_prompt = PromptTemplate.from_template(template)
+    # Define the RAG chain using the recommended approach
+    def get_context_and_question(query):
+        # Get user info from the session
+        user_info = user_session.get_user() or {}
+        first_name = user_info.get("Nickname", "User")
+        # Get conversation history
+        conversation_history = user_session.get_formatted_history()
+        # Retrieve documents
+        retrieved_docs = retriever.invoke(query)
+        context_str = format_context(retrieved_docs)
+        # Return the combined inputs for the prompt
+        return {
+            "context": context_str,
+            "question": query,
+            "first_name": first_name,
+            "conversation_history": conversation_history
         }
+    # Build the chain
+    rag_chain = (
+        RunnablePassthrough()
+        | get_context_and_question
+        | rag_prompt
+        | llm
+        | StrOutputParser()
+    )
+    return rag_chain
+# RAG memory function for user interaction (without translation)
+def rag_memory_stream(message, history):
+    # Add user message to history
+    user_session.add_to_history("user", message)
+    # Get response from RAG chain
+    response = rag_chain.invoke(message)
+    # Add assistant response to history
+    user_session.add_to_history("assistant", response)
+    # Yield the response
+    yield response
+# Add initial message to start the conversation
+def add_initial_message(chatbot):
+    return chatbot
+# Store user details and handle session
+def collect_user_info(nickname):
+    if not nickname:
+        return "Nickname is required to proceed.", gr.update(visible=False), gr.update(visible=True), []
+    # Store user info for chat session
+    user_info = {
+        "Nickname": nickname,
+        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
+    }
+    # Set user in session
+    user_session.set_user(user_info)
+    # Generate welcome message
+    welcome_message = user_session.get_welcome_message()
+    # Add initial message to start the conversation
+    chat_history = add_initial_message([(None, welcome_message)])
+    # Return welcome message and update UI
+    return welcome_message, gr.update(visible=True), gr.update(visible=False), chat_history
+# Gradio Interface Setup with improved UX
+def chatbot_interface():
+    global template, rag_chain
+    template = """
+    **Role**: Compassionate Regal Assistance and GBV Support Specialist with Emotional Awareness.
+    You are a friendly and empathetic chatbot designed to assist users in a conversational and human-like manner. Your goal is to provide accurate, helpful, and emotionally supportive responses based on the provided context: {context}. Follow these guidelines:
+    1. **Emotional Awareness**
+      - Acknowledge the user's emotions and respond with empathy.
+      - Use phrases like "I understand how you feel," "That sounds challenging," or "I'm here to support you."
+      - If the user expresses negative emotions, offer comfort and reassurance.
+    2. **Contextual Interaction**
+      - Begin with a warm and empathetic welcome message.
+      - Extract precise details from the provided context: {context}.
+      - Respond directly to the user's question: {question}.
+      - Only provide detailed information if user requests it.
+      - Remember the user's name is {first_name}.
+    3. **Communication Guidelines**
+      - Maintain a warm, conversational tone (avoid over-familiarity).
+      - Use occasional emojis for engagement (e.g., 😊, 🤗, ❤️).
+      - Provide clear, concise, and emotionally supportive information.
+    4. **Response Strategies**
+      - Greet users naturally and ask about their wellbeing (e.g., "Welcome, {first_name}! 😊 How are you feeling today?", "Hello {first_name}! 🤗 What's on your mind?").
+      - Always start with a check-in about the user's wellbeing or current situation.
+      - Provide a concise summary with only relevant information.
+      - Avoid generating content beyond the context.
+      - Handle missing information transparently.
+    5. **No Extra Content**
+      - If no information in {context} matches the user's request {question} :
+        * Respond politely: "I don't have that information at the moment, {first_name}. 😊"
+        * Offer alternative assistance options.
+      - Strictly avoid generating unsupported content.
+      - Prevent information padding or speculation.
+    6. **Extracting Relevant Links**
+       - If the user asks for a link related to their request `{question}`, extract the most relevant URL from `{context}` and provide it directly.
+       - Example response:
+         - "Here is the link you requested, [URL]"
+    7. **Real-Time Awareness**
+      - Acknowledge the current context when appropriate.
+      - Stay focused on the user's immediate needs.
+    8. **Previous Conversation Context**
+      - Consider the conversation history: {conversation_history}
+      - Maintain continuity with previous exchanges.
+    **Context:** {context}
+    **User's Question:** {question}
+    **Your Response:**
+    """
+    with gr.Blocks() as demo:
+        # User registration section
+        with gr.Column(visible=True, elem_id="registration_container") as registration_container:
+            gr.Markdown("### Your privacy is our concern, please provide your nickname.")
+            with gr.Row():
+                first_name = gr.Textbox(
+                    label="Nickname",
+                    placeholder="Enter your Nickname",
+                    scale=1,
+                    elem_id="input_nickname"
                 )
+            with gr.Row():
+                submit_btn = gr.Button("Start Chatting", variant="primary", scale=2)
+            response_message = gr.Markdown()
+        # Chatbot section (initially hidden)
+        with gr.Column(visible=False, elem_id="chatbot_container") as chatbot_container:
+            chat_interface = gr.ChatInterface(
+                fn=rag_memory_stream,
+                title="Chat with GBVR",
+                fill_height=True
             )
+            # Footer with version info
+            gr.Markdown("Ijwi ry'Ubufasha v1.0.0 © 2025")
+        # Handle user registration
+        submit_btn.click(
+            collect_user_info,
+            inputs=[first_name],
+            outputs=[response_message, chatbot_container, registration_container, chat_interface.chatbot]
+        )
+    demo.css = """
+    :root {
+        --background: #f0f0f0;
+        --text: #000000;
+    }
+    body, .gradio-container {
+        margin: 0;
+        padding: 0;
+        width: 100vw;
+        height: 100vh;
+        display: flex;
+        flex-direction: column;
+        justify-content: center;
+        align-items: center;
+        background: var(--background);
+        color: var(--text);
+    }
+    .gradio-container {
+        max-width: 100%;
+        max-height: 100%;
+    }
+    .gr-box {
+        background: var(--background);
+        color: var(--text);
+        border-radius: 12px;
+        padding: 2rem;
+        border: 1px solid rgba(0, 0, 0, 0.1);
+        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.05);
+    }
+    .gr-button-primary {
+        background: var(--background);
+        color: var(--text);
+        padding: 12px 24px;
+        border-radius: 8px;
+        transition: all 0.3s ease;
+        border: 1px solid rgba(0, 0, 0, 0.1);
+    }
+    .gr-button-primary:hover {
+        transform: translateY(-1px);
+        box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2);
+    }
+    footer {
+        text-align: center;
+        color: var(--text);
+        opacity: 0.7;
+        padding: 1rem;
+        font-size: 0.9em;
+    }
+    .gr-markdown h3 {
+        color: var(--text);
+        margin-bottom: 1rem;
+    }
+    .registration-markdown, .chat-title h1 {
+        color: var(--text);
+    }
+    """
+    return demo
+# Main execution
 if __name__ == "__main__":
+    # Process data and create vectorstore
+    data = process_data_files()
+    vectorstore = create_vectorstore(data)
+    retriever = vectorstore.as_retriever()
+    # Initialize LLM for the user session
+    llm = ChatGroq(model="llama-3.3-70b-versatile", api_key=groq_api_key)
+    user_session = UserSession(llm)
+    # Create RAG chain with the new approach
+    rag_chain = create_rag_chain(retriever, template, groq_api_key)
+    # Launch the interface
+    chatbot_interface().launch(share=True)