Spaces:
Sleeping
Sleeping
import re | |
import time | |
import random | |
import gradio as gr | |
from huggingface_hub import InferenceClient | |
# Optional: Enable scraping if your site is deployed. | |
ENABLE_SCRAPING = False | |
SITE_URL = "https://your-agri-future-site.com" | |
# Global variable to hold scraped content | |
knowledge_base = "" | |
# --- Optional: Scraping Functionality --- | |
if ENABLE_SCRAPING: | |
try: | |
from selenium import webdriver | |
from selenium.webdriver.chrome.options import Options | |
from selenium.webdriver.common.by import By | |
def scrape_site(url): | |
options = Options() | |
options.headless = True # Run browser in headless mode. | |
driver = webdriver.Chrome(options=options) | |
driver.get(url) | |
# Use explicit waits in production; here we use a basic sleep. | |
time.sleep(5) | |
try: | |
# Customize the selector based on your site's HTML structure. | |
content_element = driver.find_element(By.ID, "content") | |
page_text = content_element.text | |
except Exception as e: | |
page_text = "Error encountered during scraping: " + str(e) | |
driver.quit() | |
return page_text | |
knowledge_base = scrape_site(SITE_URL) | |
print("Scraped knowledge base successfully.") | |
except Exception as e: | |
print("Scraping failed or Selenium is not configured:", e) | |
else: | |
print("Scraping is disabled; proceeding without scraped site content.") | |
# --- Multilingual Helpers --- | |
def is_greeting(query: str, lang: str) -> bool: | |
greetings = { | |
"en": ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"], | |
"fr": ["bonjour", "salut", "coucou", "bonsoir"], | |
"am": ["ሰላም", "ሰላም እንደምን", "እንዴት"] | |
} | |
greet_list = greetings.get(lang, greetings["en"]) | |
# For languages using Latin script, convert to lower case. | |
if lang != "am": | |
query = query.lower() | |
return any(query.startswith(greet) for greet in greet_list) | |
# Rather than using fixed out-of-scope messages, use the model via Hugging Face to generate them. | |
def generate_dynamic_out_of_scope_message(language: str) -> str: | |
# Define language-specific system prompts for generating a dynamic out-of-scope message. | |
system_prompts = { | |
"en": ( | |
"You are a helpful chatbot specializing in agriculture and agro-investment. " | |
"A user just asked a question that is not related to these topics. " | |
"Generate a friendly, varied, and intelligent out-of-scope response in English that kindly encourages the user to ask about agriculture or agro-investment." | |
), | |
"fr": ( | |
"Vous êtes un chatbot utile spécialisé dans l'agriculture et les investissements agroalimentaires. " | |
"Un utilisateur vient de poser une question qui ne concerne pas ces sujets. " | |
"Générez une réponse élégante, variée et intelligente en français pour indiquer que la question est hors de portée, en invitant l'utilisateur à poser une question sur l'agriculture ou les investissements agroalimentaires." | |
), | |
"am": ( | |
"እርስዎ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ውስጥ በተለይ የተሞሉ ቻትቦት ናቸው። " | |
"ተጠቃሚው ለግብርና ወይም ለአገልግሎት ስርዓተ-ቢዝነስ ተያይዞ ያልሆነ ጥያቄ አስቀድመዋል። " | |
"በአማርኛ በተለያዩ መልኩ የውጭ ክፍል መልእክት ፍጥረት ያድርጉ፤ እባኮትን ተጠቃሚውን ለግብርና ወይም ለአገልግሎት ጥያቄዎች ለመጠየቅ ያነጋግሩ።" | |
) | |
} | |
prompt = system_prompts.get(language, system_prompts["en"]) | |
messages = [{"role": "system", "content": prompt}] | |
# Call the model without streaming to generate the complete message. | |
response = client.chat_completion( | |
messages, | |
max_tokens=80, | |
stream=False, | |
temperature=0.7, | |
top_p=0.95, | |
) | |
# Depending on the client structure, adjust the extraction of the generated text. | |
try: | |
out_message = response.choices[0].message.content | |
except AttributeError: | |
# If the response structure differs, do a fallback conversion. | |
out_message = str(response) | |
return out_message.strip() | |
# A helper to determine domain relevance (basic implementation; can be expanded). | |
def is_domain_query(query: str) -> bool: | |
domain_keywords = [ | |
"agriculture", "farming", "crop", "agro", "investment", "soil", | |
"irrigation", "harvest", "organic", "sustainable", "agribusiness", | |
"livestock", "agroalimentaire", "agriculture durable" | |
] | |
return any(re.search(r"\b" + keyword + r"\b", query, re.IGNORECASE) for keyword in domain_keywords) | |
def retrieve_relevant_snippet(query: str, text: str, max_length: int = 300) -> str: | |
sentences = re.split(r'[.?!]', text) | |
for sentence in sentences |