Spaces:

Macdensten91
/

google-flan-t5-small

Running

App Files Files Community

Macdensten91 commited on Apr 9

Commit

c010699

verified ·

1 Parent(s): 553ae67

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -35

app.py CHANGED Viewed

@@ -1,64 +1,125 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
     messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
     messages.append({"role": "user", "content": message})
     response = ""
-    for message in client.chat_completion(
         messages,
         max_tokens=max_tokens,
         stream=True,
         temperature=temperature,
         top_p=top_p,
     ):
-        token = message.choices[0].delta.content
         response += token
         yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
-        gr.Textbox(value="you are AgriFutureBot, Design to help visitors and users from the Agri Future Invest platform understand more content about the site as well as know more about Agriculture and it's related investments only", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
         ),
     ],
 )
 if __name__ == "__main__":
-    demo.launch()

+import re
+import time
 import gradio as gr
 from huggingface_hub import InferenceClient
+# Optional: Enable scraping if your site is deployed.
+# Set this flag to False until your site is available.
+ENABLE_SCRAPING = False
+SITE_URL = "https://your-agri-future-site.com"
+# Global variable to hold scraped content
+knowledge_base = ""
+# --- Optional: Scraping Functionality ---
+if ENABLE_SCRAPING:
+    try:
+        from selenium import webdriver
+        from selenium.webdriver.chrome.options import Options
+        from selenium.webdriver.common.by import By
+        def scrape_site(url):
+            options = Options()
+            options.headless = True  # Run browser in headless mode.
+            driver = webdriver.Chrome(options=options)
+            driver.get(url)
+            # Use explicit wait in production code; here we use a simple sleep.
+            time.sleep(5)
+            try:
+                # Customize the selector based on your site’s HTML
+                content_element = driver.find_element(By.ID, "content")
+                page_text = content_element.text
+            except Exception as e:
+                page_text = "Error encountered during scraping: " + str(e)
+            driver.quit()
+            return page_text
+        knowledge_base = scrape_site(SITE_URL)
+        print("Scraped knowledge base successfully.")
+    except Exception as e:
+        print("Scraping failed or Selenium is not configured:", e)
+else:
+    print("Scraping is disabled; proceeding without scraped site content.")
+# --- Domain-Related Helpers ---
+def is_domain_query(query: str) -> bool:
+    """Check if the query is relevant to agriculture and agro-investment."""
+    domain_keywords = [
+        "agriculture", "farming", "crop", "agro", "investment", "soil",
+        "irrigation", "harvest", "organic", "sustainable", "agribusiness",
+        "livestock"
+    ]
+    return any(re.search(r"\b" + keyword + r"\b", query, re.IGNORECASE) for keyword in domain_keywords)
+def retrieve_relevant_snippet(query: str, text: str, max_length: int = 300) -> str:
+    """
+    A simple retrieval function that searches for any sentence in the text
+    that contains domain keywords present in the query.
+    Returns a snippet limited to max_length characters.
+    """
+    sentences = re.split(r'[.?!]', text)
+    for sentence in sentences:
+        if is_domain_query(sentence) and all(word.lower() in sentence.lower() for word in query.split()):
+            snippet = sentence.strip()
+            if len(snippet) > max_length:
+                snippet = snippet[:max_length] + "..."
+            return snippet
+    return ""
+# --- Chat Assistant Response Function ---
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
+def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
+    # Check domain relevance
+    if not is_domain_query(message):
+        yield "I'm sorry, but please ask a question related to agriculture or agro-investment topics."
+        return
+    # Build the conversation context starting with the system message.
     messages = [{"role": "system", "content": system_message}]
+    for user_msg, assistant_msg in history:
+        if user_msg:
+            messages.append({"role": "user", "content": user_msg})
+        if assistant_msg:
+            messages.append({"role": "assistant", "content": assistant_msg})
+    # Optional: Append a retrieval-based context derived from the scraped content.
+    if knowledge_base:
+        snippet = retrieve_relevant_snippet(message, knowledge_base)
+        if snippet:
+            # Prepend additional context for the model to take into account.
+            retrieval_context = f"Reference information from Agri Future Investment platform: {snippet}"
+            messages.insert(0, {"role": "system", "content": retrieval_context})
+    # Append the new user query.
     messages.append({"role": "user", "content": message})
+    # Stream the model's reply token-by-token.
     response = ""
+    for message_resp in client.chat_completion(
         messages,
         max_tokens=max_tokens,
         stream=True,
         temperature=temperature,
         top_p=top_p,
     ):
+        token = message_resp.choices[0].delta.content
         response += token
         yield response
+# --- Gradio Chat Interface ---
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
+        gr.Textbox(
+            value="You are AgriFutureBot, designed to help visitors of the Agri Future Investment platform understand content about the site and answer questions strictly related to agriculture and agro-investment topics.",
+            label="System Message"
         ),
+        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens"),
+        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)"),
     ],
 )
 if __name__ == "__main__":
+    demo.launch()