Spaces:

Macdensten91
/

google-flan-t5-small

Sleeping

App Files Files Community

Macdensten91 commited on Apr 9

Commit

6ac4ea2

verified ·

1 Parent(s): c010699

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -25

app.py CHANGED Viewed

@@ -1,10 +1,10 @@
 import re
 import time
 import gradio as gr
 from huggingface_hub import InferenceClient
 # Optional: Enable scraping if your site is deployed.
-# Set this flag to False until your site is available.
 ENABLE_SCRAPING = False
 SITE_URL = "https://your-agri-future-site.com"
@@ -23,10 +23,10 @@ if ENABLE_SCRAPING:
             options.headless = True  # Run browser in headless mode.
             driver = webdriver.Chrome(options=options)
             driver.get(url)
-            # Use explicit wait in production code; here we use a simple sleep.
             time.sleep(5)
             try:
-                # Customize the selector based on your site’s HTML
                 content_element = driver.find_element(By.ID, "content")
                 page_text = content_element.text
             except Exception as e:
@@ -41,63 +41,112 @@ if ENABLE_SCRAPING:
 else:
     print("Scraping is disabled; proceeding without scraped site content.")
-# --- Domain-Related Helpers ---
 def is_domain_query(query: str) -> bool:
-    """Check if the query is relevant to agriculture and agro-investment."""
     domain_keywords = [
         "agriculture", "farming", "crop", "agro", "investment", "soil",
         "irrigation", "harvest", "organic", "sustainable", "agribusiness",
-        "livestock"
     ]
     return any(re.search(r"\b" + keyword + r"\b", query, re.IGNORECASE) for keyword in domain_keywords)
 def retrieve_relevant_snippet(query: str, text: str, max_length: int = 300) -> str:
     """
-    A simple retrieval function that searches for any sentence in the text
-    that contains domain keywords present in the query.
     Returns a snippet limited to max_length characters.
     """
     sentences = re.split(r'[.?!]', text)
     for sentence in sentences:
         if is_domain_query(sentence) and all(word.lower() in sentence.lower() for word in query.split()):
             snippet = sentence.strip()
-            if len(snippet) > max_length:
-                snippet = snippet[:max_length] + "..."
-            return snippet
     return ""
 # --- Chat Assistant Response Function ---
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
-    # Check domain relevance
     if not is_domain_query(message):
-        yield "I'm sorry, but please ask a question related to agriculture or agro-investment topics."
         return
-    # Build the conversation context starting with the system message.
-    messages = [{"role": "system", "content": system_message}]
     for user_msg, assistant_msg in history:
         if user_msg:
-            messages.append({"role": "user", "content": user_msg})
         if assistant_msg:
-            messages.append({"role": "assistant", "content": assistant_msg})
-    # Optional: Append a retrieval-based context derived from the scraped content.
     if knowledge_base:
         snippet = retrieve_relevant_snippet(message, knowledge_base)
         if snippet:
-            # Prepend additional context for the model to take into account.
-            retrieval_context = f"Reference information from Agri Future Investment platform: {snippet}"
-            messages.insert(0, {"role": "system", "content": retrieval_context})
-    # Append the new user query.
-    messages.append({"role": "user", "content": message})
     # Stream the model's reply token-by-token.
     response = ""
     for message_resp in client.chat_completion(
-        messages,
         max_tokens=max_tokens,
         stream=True,
         temperature=temperature,
@@ -108,6 +157,8 @@ def respond(message, history: list[tuple[str, str]], system_message, max_tokens,
         yield response
 # --- Gradio Chat Interface ---
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
@@ -118,6 +169,7 @@ demo = gr.ChatInterface(
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)"),
     ],
 )

 import re
 import time
+import random
 import gradio as gr
 from huggingface_hub import InferenceClient
 # Optional: Enable scraping if your site is deployed.
 ENABLE_SCRAPING = False
 SITE_URL = "https://your-agri-future-site.com"
             options.headless = True  # Run browser in headless mode.
             driver = webdriver.Chrome(options=options)
             driver.get(url)
+            # Use explicit waits in production code; here we use a simple sleep.
             time.sleep(5)
             try:
+                # Customize the selector as per your site's HTML structure.
                 content_element = driver.find_element(By.ID, "content")
                 page_text = content_element.text
             except Exception as e:
 else:
     print("Scraping is disabled; proceeding without scraped site content.")
+# --- Multilingual Helpers ---
+# Language-specific greeting detection
+def is_greeting(query: str, lang: str) -> bool:
+    greetings = {
+        "en": ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"],
+        "fr": ["bonjour", "salut", "coucou", "bonsoir"],
+        "am": ["ሰላም", "ሰላም እንደምን", "እንዴት"]
+    }
+    # Retrieve greetings for the provided language; default to English if unavailable.
+    greet_list = greetings.get(lang, greetings["en"])
+    # For Amharic, no transformation; for Latin scripts, convert to lower case.
+    if lang != "am":
+        query = query.lower()
+    return any(query.startswith(greet) for greet in greet_list)
+# Language-specific out-of-scope messages
+def get_out_of_scope_message(lang: str) -> str:
+    messages = {
+        "en": [
+            "I appreciate your curiosity. However, my expertise lies exclusively in agricultural and agro-investment insights. Could you please frame your question accordingly?",
+            "That’s an interesting thought, but I'm tailored specifically for topics concerning agriculture and agro-investment. Please ask a question within that realm.",
+            "While I value your inquiry, I'm optimized to provide insights solely on agriculture and related investment matters. Could you rephrase your query to align with these topics?",
+            "It appears your question may not be directly tied to agriculture or agro-investment. Please ask something along those lines so I can assist effectively."
+        ],
+        "fr": [
+            "J'apprécie votre curiosité. Cependant, mon expertise se limite exclusivement aux informations sur l'agriculture et les investissements agroalimentaires. Pourriez-vous reformuler votre question en ce sens ?",
+            "C'est une pensée intéressante, mais je suis spécialisé dans les domaines de l'agriculture et des investissements agroalimentaires. Merci de poser une question dans ce domaine.",
+            "Bien que votre question soit pertinente, je me concentre uniquement sur l'agriculture et les investissements associés. Pourriez-vous reformuler votre demande en conséquence ?",
+            "Votre interrogation semble éloignée de l'agriculture ou des investissements agroalimentaires. Merci de poser une question dans ces domaines pour que je puisse vous aider efficacement."
+        ],
+        "am": [
+            "እባክዎ ልጠይቁት ጥያቄ በተለይ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ዙሪያ መሆኑን አላስቀምጥም። እባኮትን ጥያቄዎን እንደዚህ በማቅረብ ደግሞ ይሞክሩ።",
+            "ልዩ ጥያቄዎችን ማቅረብ ይፈልጋሉ እንጂ፣ እኔ በተለይ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ጥያቄዎች ላይ ብቻ እንደሚሰራ ተዘጋጅቻለሁ። እባክዎ ጥያቄዎን በእነዚህ ክስተቶች ውስጥ ያቅርቡ።",
+            "እንደምታዩት ጥያቄዎ በግብርና ወይም በአገልግሎት ስርዓተ-ቢዝነስ ላይ የተመረጠ አይደለም። እባክዎ በዚህ አውድ የሆነ ጥያቄ ይጠይቁ።"
+        ]
+    }
+    # Return a random message for the given language; default to English if not available.
+    return random.choice(messages.get(lang, messages["en"]))
+# Helper to determine if a query is relevant to our domain (English check only; can be expanded).
 def is_domain_query(query: str) -> bool:
     domain_keywords = [
         "agriculture", "farming", "crop", "agro", "investment", "soil",
         "irrigation", "harvest", "organic", "sustainable", "agribusiness",
+        "livestock",  # additional English keywords
+        "agriculture", "agroalimentaire", "agriculture durable"  # French terms can also be included
     ]
     return any(re.search(r"\b" + keyword + r"\b", query, re.IGNORECASE) for keyword in domain_keywords)
 def retrieve_relevant_snippet(query: str, text: str, max_length: int = 300) -> str:
     """
+    A simple retrieval function that searches for sentences in the text
+    containing domain keywords from the query.
     Returns a snippet limited to max_length characters.
     """
     sentences = re.split(r'[.?!]', text)
     for sentence in sentences:
         if is_domain_query(sentence) and all(word.lower() in sentence.lower() for word in query.split()):
             snippet = sentence.strip()
+            return snippet[:max_length] + "..." if len(snippet) > max_length else snippet
     return ""
 # --- Chat Assistant Response Function ---
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
+def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, language):
+    # language is expected as a string code: "en", "fr", or "am"
+    # Check for a greeting in the appropriate language.
+    if is_greeting(message, language):
+        greetings = {
+            "en": "Hello! How can I assist you today with your agriculture or agro-investment inquiries?",
+            "fr": "Bonjour! Comment puis-je vous aider aujourd'hui en matière d'agriculture ou d'investissements agroalimentaires?",
+            "am": "ሰላም! ዛሬ ስለ ግብርና ወይም ስለ አገልግሎት ስርዓተ-ቢዝነስ ጥያቄዎች እንዴት ልረዳዎት?"
+        }
+        yield greetings.get(language, greetings["en"])
+        return
+    # If the query is not recognized as domain related, return an out-of-scope message.
     if not is_domain_query(message):
+        yield get_out_of_scope_message(language)
         return
+    # Build conversation context starting with the system message.
+    messages_context = [{"role": "system", "content": system_message}]
     for user_msg, assistant_msg in history:
         if user_msg:
+            messages_context.append({"role": "user", "content": user_msg})
         if assistant_msg:
+            messages_context.append({"role": "assistant", "content": assistant_msg})
+    # Optional: Append retrieved context from scraped site content.
     if knowledge_base:
         snippet = retrieve_relevant_snippet(message, knowledge_base)
         if snippet:
+            retrieval_context = f"Reference info from Agri Future Investment platform: {snippet}"
+            messages_context.insert(0, {"role": "system", "content": retrieval_context})
+    # Append the new user message.
+    messages_context.append({"role": "user", "content": message})
     # Stream the model's reply token-by-token.
     response = ""
     for message_resp in client.chat_completion(
+        messages_context,
         max_tokens=max_tokens,
         stream=True,
         temperature=temperature,
         yield response
 # --- Gradio Chat Interface ---
+# The language selection dropdown uses language codes: "en" for English, "fr" for French, "am" for Amharic.
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)"),
+        gr.Dropdown(choices=["en", "fr", "am"], value="en", label="Language (en, fr, am)")
     ],
 )