Spaces:

Macdensten91
/

google-flan-t5-small

Sleeping

App Files Files Community

Macdensten91 commited on Apr 9

Commit

0980116

verified ·

1 Parent(s): f0915ac

Update app.py

Browse files

Files changed (1) hide show

app.py +155 -71

app.py CHANGED Viewed

@@ -1,93 +1,177 @@
-import random
 import re
 from huggingface_hub import InferenceClient
-# Initialize the InferenceClient with your Hugging Face API token
-client = InferenceClient(
-    model="HuggingFaceH4/zephyr-7b-beta",  # Specify your model here
-    token="your_huggingface_api_token"     # Replace with your actual token
-)
-# Multilingual greetings dictionary
-greetings = {
-    "en": ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"],
-    "fr": ["bonjour", "salut", "coucou", "bonsoir"],
-    "am": ["ሰላም", "ሰላም እንደምን", "እንዴት"]
-}
 def is_greeting(query: str, lang: str) -> bool:
-    """
-    Check if the user's query is a greeting in the specified language.
-    """
     greet_list = greetings.get(lang, greetings["en"])
-    # Convert to lowercase for non-Amharic languages
     if lang != "am":
         query = query.lower()
     return any(query.startswith(greet) for greet in greet_list)
-def generate_dynamic_out_of_scope_message(language: str) -> str:
-    """
-    Generate a dynamic out-of-scope message using the Hugging Face Inference API.
-    """
-    # Define language-specific system prompts
-    system_prompts = {
-        "en": (
-            "You are a helpful chatbot specializing in agriculture and agro-investment. "
-            "A user has asked a question unrelated to these topics. "
-            "Generate a friendly and intelligent out-of-scope response in English, encouraging the user to ask about agriculture or agro-investment."
-        ),
-        "fr": (
-            "Vous êtes un chatbot utile spécialisé dans l'agriculture et les investissements agroalimentaires. "
-            "Un utilisateur a posé une question sans rapport avec ces sujets. "
-            "Générez une réponse amicale et intelligente en français, encourageant l'utilisateur à poser des questions sur l'agriculture ou les investissements agroalimentaires."
-        ),
-        "am": (
-            "እርስዎ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ውስጥ የሚሰራ እገዛ የሚሰጥ ቻትቦት ነው። "
-            "ተጠቃሚው ከእነዚህ ጉዳዮች ውጪ ጥያቄ አቀርቧል። "
-            "በአማርኛ የተሰጠ የውጭ ክፍል ምላሽ ይፍጠሩ፣ ተጠቃሚውን ለግብርና ወይም ለአገልግሎት ስርዓተ-ቢዝነስ ጥያቄዎች ለመጠየቅ ያበረታታ።"
-        )
     }
-    prompt = system_prompts.get(language, system_prompts["en"])
-    messages = [{"role": "system", "content": prompt}]
-    # Call the model to generate the response
-    response = client.chat_completion(
-        messages,
-        max_tokens=80,
-        temperature=0.7,
-        top_p=0.95,
-    )
-    # Extract the generated message content
-    try:
-        out_message = response.choices[0].message.content
-    except AttributeError:
-        out_message = str(response)
-    return out_message.strip()
 def is_domain_query(query: str) -> bool:
-    """
-    Determine if the query is related to agriculture or agro-investment.
-    """
     domain_keywords = [
         "agriculture", "farming", "crop", "agro", "investment", "soil",
         "irrigation", "harvest", "organic", "sustainable", "agribusiness",
-        "livestock", "agroalimentaire", "agriculture durable"
     ]
     return any(re.search(r"\b" + keyword + r"\b", query, re.IGNORECASE) for keyword in domain_keywords)
-def handle_user_query(query: str, lang: str = "en") -> str:
     """
-    Process the user's query and provide an appropriate response.
     """
-    if is_greeting(query, lang):
-        return random.choice(greetings.get(lang, greetings["en"])).capitalize() + "!"
-    elif is_domain_query(query):
-        # Here you would integrate your domain-specific response generation
-        return "This is a domain-specific question. Processing accordingly..."
-    else:
-        return generate_dynamic_out_of_scope_message(lang)
-# Example usage
-user_query = "Tell me about space travel."
-response = handle_user_query(user_query, lang="en")
-print(response)

 import re
+import time
+import random
+import gradio as gr
 from huggingface_hub import InferenceClient
+# Optional: Enable scraping if your site is deployed.
+ENABLE_SCRAPING = False
+SITE_URL = "https://your-agri-future-site.com"
+# Global variable to hold scraped content
+knowledge_base = ""
+# --- Optional: Scraping Functionality ---
+if ENABLE_SCRAPING:
+    try:
+        from selenium import webdriver
+        from selenium.webdriver.chrome.options import Options
+        from selenium.webdriver.common.by import By
+        def scrape_site(url):
+            options = Options()
+            options.headless = True  # Run browser in headless mode.
+            driver = webdriver.Chrome(options=options)
+            driver.get(url)
+            # Use explicit waits in production code; here we use a simple sleep.
+            time.sleep(5)
+            try:
+                # Customize the selector as per your site's HTML structure.
+                content_element = driver.find_element(By.ID, "content")
+                page_text = content_element.text
+            except Exception as e:
+                page_text = "Error encountered during scraping: " + str(e)
+            driver.quit()
+            return page_text
+        knowledge_base = scrape_site(SITE_URL)
+        print("Scraped knowledge base successfully.")
+    except Exception as e:
+        print("Scraping failed or Selenium is not configured:", e)
+else:
+    print("Scraping is disabled; proceeding without scraped site content.")
+# --- Multilingual Helpers ---
+# Language-specific greeting detection
 def is_greeting(query: str, lang: str) -> bool:
+    greetings = {
+        "en": ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"],
+        "fr": ["bonjour", "salut", "coucou", "bonsoir"],
+        "am": ["ሰላም", "ሰላም እንደምን", "እንዴት"]
+    }
+    # Retrieve greetings for the provided language; default to English if unavailable.
     greet_list = greetings.get(lang, greetings["en"])
+    # For Amharic, no transformation; for Latin scripts, convert to lower case.
     if lang != "am":
         query = query.lower()
     return any(query.startswith(greet) for greet in greet_list)
+# Language-specific out-of-scope messages
+def get_out_of_scope_message(lang: str) -> str:
+    messages = {
+        "en": [
+            "I appreciate your curiosity. However, my expertise lies exclusively in agricultural and agro-investment insights. Could you please frame your question accordingly?",
+            "That’s an interesting thought, but I'm tailored specifically for topics concerning agriculture and agro-investment. Please ask a question within that realm.",
+            "While I value your inquiry, I'm optimized to provide insights solely on agriculture and related investment matters. Could you rephrase your query to align with these topics?",
+            "It appears your question may not be directly tied to agriculture or agro-investment. Please ask something along those lines so I can assist effectively."
+        ],
+        "fr": [
+            "J'apprécie votre curiosité. Cependant, mon expertise se limite exclusivement aux informations sur l'agriculture et les investissements agroalimentaires. Pourriez-vous reformuler votre question en ce sens ?",
+            "C'est une pensée intéressante, mais je suis spécialisé dans les domaines de l'agriculture et des investissements agroalimentaires. Merci de poser une question dans ce domaine.",
+            "Bien que votre question soit pertinente, je me concentre uniquement sur l'agriculture et les investissements associés. Pourriez-vous reformuler votre demande en conséquence ?",
+            "Votre interrogation semble éloignée de l'agriculture ou des investissements agroalimentaires. Merci de poser une question dans ces domaines pour que je puisse vous aider efficacement."
+        ],
+        "am": [
+            "እባክዎ ልጠይቁት ጥያቄ በተለይ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ዙሪያ መሆኑን አላስቀምጥም። እባኮትን ጥያቄዎን እንደዚህ በማቅረብ ደግሞ ይሞክሩ።",
+            "ልዩ ጥያቄዎችን ማቅረብ ይፈልጋሉ እንጂ፣ እኔ በተለይ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ጥያቄዎች ላይ ብቻ እንደሚሰራ ተዘጋጅቻለሁ። እባክዎ ጥያቄዎን በእነዚህ ክስተቶች ውስጥ ያቅርቡ።",
+            "እንደምታዩት ጥያቄዎ በግብርና ወይም በአገልግሎት ስርዓተ-ቢዝነስ ላይ የተመረጠ አይደለም። እባክዎ በዚህ አውድ የሆነ ጥያቄ ይጠይቁ።"
+        ]
     }
+    # Return a random message for the given language; default to English if not available.
+    return random.choice(messages.get(lang, messages["en"]))
+# Helper to determine if a query is relevant to our domain (English check only; can be expanded).
 def is_domain_query(query: str) -> bool:
     domain_keywords = [
         "agriculture", "farming", "crop", "agro", "investment", "soil",
         "irrigation", "harvest", "organic", "sustainable", "agribusiness",
+        "livestock",  # additional English keywords
+        "agriculture", "agroalimentaire", "agriculture durable"  # French terms can also be included
     ]
     return any(re.search(r"\b" + keyword + r"\b", query, re.IGNORECASE) for keyword in domain_keywords)
+def retrieve_relevant_snippet(query: str, text: str, max_length: int = 300) -> str:
     """
+    A simple retrieval function that searches for sentences in the text
+    containing domain keywords from the query.
+    Returns a snippet limited to max_length characters.
     """
+    sentences = re.split(r'[.?!]', text)
+    for sentence in sentences:
+        if is_domain_query(sentence) and all(word.lower() in sentence.lower() for word in query.split()):
+            snippet = sentence.strip()
+            return snippet[:max_length] + "..." if len(snippet) > max_length else snippet
+    return ""
+# --- Chat Assistant Response Function ---
+client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
+def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, language):
+    # language is expected as a string code: "en", "fr", or "am"
+    # Check for a greeting in the appropriate language.
+    if is_greeting(message, language):
+        greetings = {
+            "en": "Hello! How can I assist you today with your agriculture or agro-investment inquiries?",
+            "fr": "Bonjour! Comment puis-je vous aider aujourd'hui en matière d'agriculture ou d'investissements agroalimentaires?",
+            "am": "ሰላም! ዛሬ ስለ ግብርና ወይም ስለ አገልግሎት ስርዓተ-ቢዝነስ ጥያቄዎች እንዴት ልረዳዎት?"
+        }
+        yield greetings.get(language, greetings["en"])
+        return
+    # If the query is not recognized as domain related, return an out-of-scope message.
+    if not is_domain_query(message):
+        yield get_out_of_scope_message(language)
+        return
+    # Build conversation context starting with the system message.
+    messages_context = [{"role": "system", "content": system_message}]
+    for user_msg, assistant_msg in history:
+        if user_msg:
+            messages_context.append({"role": "user", "content": user_msg})
+        if assistant_msg:
+            messages_context.append({"role": "assistant", "content": assistant_msg})
+    # Optional: Append retrieved context from scraped site content.
+    if knowledge_base:
+        snippet = retrieve_relevant_snippet(message, knowledge_base)
+        if snippet:
+            retrieval_context = f"Reference info from Agri Future Investment platform: {snippet}"
+            messages_context.insert(0, {"role": "system", "content": retrieval_context})
+    # Append the new user message.
+    messages_context.append({"role": "user", "content": message})
+    # Stream the model's reply token-by-token.
+    response = ""
+    for message_resp in client.chat_completion(
+        messages_context,
+        max_tokens=max_tokens,
+        stream=True,
+        temperature=temperature,
+        top_p=top_p,
+    ):
+        token = message_resp.choices[0].delta.content
+        response += token
+        yield response
+# --- Gradio Chat Interface ---
+# The language selection dropdown uses language codes: "en" for English, "fr" for French, "am" for Amharic.
+demo = gr.ChatInterface(
+    respond,
+    additional_inputs=[
+        gr.Textbox(
+            value="You are AgriFutureBot, designed to help visitors of the Agri Future Investment platform understand content about the site and answer questions strictly related to agriculture and agro-investment topics.",
+            label="System Message"
+        ),
+        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens"),
+        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)"),
+        gr.Dropdown(choices=["en", "fr", "am"], value="en", label="Language (en, fr, am)")
+    ],
+)
+if __name__ == "__main__":
+    demo.launch()