Macdensten91's picture
Update app.py
f035190 verified
raw
history blame
5.07 kB
import re
import time
import random
import gradio as gr
from huggingface_hub import InferenceClient
# Optional: Enable scraping if your site is deployed.
ENABLE_SCRAPING = False
SITE_URL = "https://your-agri-future-site.com"
# Global variable to hold scraped content
knowledge_base = ""
# --- Optional: Scraping Functionality ---
if ENABLE_SCRAPING:
try:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
def scrape_site(url):
options = Options()
options.headless = True # Run browser in headless mode.
driver = webdriver.Chrome(options=options)
driver.get(url)
# Use explicit waits in production; here we use a basic sleep.
time.sleep(5)
try:
# Customize the selector based on your site's HTML structure.
content_element = driver.find_element(By.ID, "content")
page_text = content_element.text
except Exception as e:
page_text = "Error encountered during scraping: " + str(e)
driver.quit()
return page_text
knowledge_base = scrape_site(SITE_URL)
print("Scraped knowledge base successfully.")
except Exception as e:
print("Scraping failed or Selenium is not configured:", e)
else:
print("Scraping is disabled; proceeding without scraped site content.")
# --- Multilingual Helpers ---
def is_greeting(query: str, lang: str) -> bool:
greetings = {
"en": ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"],
"fr": ["bonjour", "salut", "coucou", "bonsoir"],
"am": ["ሰላም", "ሰላም እንደምን", "እንዴት"]
}
greet_list = greetings.get(lang, greetings["en"])
# For languages using Latin script, convert to lower case.
if lang != "am":
query = query.lower()
return any(query.startswith(greet) for greet in greet_list)
# Rather than using fixed out-of-scope messages, use the model via Hugging Face to generate them.
def generate_dynamic_out_of_scope_message(language: str) -> str:
# Define language-specific system prompts for generating a dynamic out-of-scope message.
system_prompts = {
"en": (
"You are a helpful chatbot specializing in agriculture and agro-investment. "
"A user just asked a question that is not related to these topics. "
"Generate a friendly, varied, and intelligent out-of-scope response in English that kindly encourages the user to ask about agriculture or agro-investment."
),
"fr": (
"Vous êtes un chatbot utile spécialisé dans l'agriculture et les investissements agroalimentaires. "
"Un utilisateur vient de poser une question qui ne concerne pas ces sujets. "
"Générez une réponse élégante, variée et intelligente en français pour indiquer que la question est hors de portée, en invitant l'utilisateur à poser une question sur l'agriculture ou les investissements agroalimentaires."
),
"am": (
"እርስዎ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ውስጥ በተለይ የተሞሉ ቻትቦት ናቸው። "
"ተጠቃሚው ለግብርና ወይም ለአገልግሎት ስርዓተ-ቢዝነስ ተያይዞ ያልሆነ ጥያቄ አስቀድመዋል። "
"በአማርኛ በተለያዩ መልኩ የውጭ ክፍል መልእክት ፍጥረት ያድርጉ፤ እባኮትን ተጠቃሚውን ለግብርና ወይም ለአገልግሎት ጥያቄዎች ለመጠየቅ ያነጋግሩ።"
)
}
prompt = system_prompts.get(language, system_prompts["en"])
messages = [{"role": "system", "content": prompt}]
# Call the model without streaming to generate the complete message.
response = client.chat_completion(
messages,
max_tokens=80,
stream=False,
temperature=0.7,
top_p=0.95,
)
# Depending on the client structure, adjust the extraction of the generated text.
try:
out_message = response.choices[0].message.content
except AttributeError:
# If the response structure differs, do a fallback conversion.
out_message = str(response)
return out_message.strip()
# A helper to determine domain relevance (basic implementation; can be expanded).
def is_domain_query(query: str) -> bool:
domain_keywords = [
"agriculture", "farming", "crop", "agro", "investment", "soil",
"irrigation", "harvest", "organic", "sustainable", "agribusiness",
"livestock", "agroalimentaire", "agriculture durable"
]
return any(re.search(r"\b" + keyword + r"\b", query, re.IGNORECASE) for keyword in domain_keywords)
def retrieve_relevant_snippet(query: str, text: str, max_length: int = 300) -> str:
sentences = re.split(r'[.?!]', text)
for sentence in sentences