Spaces:
Sleeping
Sleeping
import re | |
import time | |
import random | |
import gradio as gr | |
from huggingface_hub import InferenceClient | |
# Optional: Enable scraping if your site is deployed. | |
ENABLE_SCRAPING = False | |
SITE_URL = "https://your-agri-future-site.com" | |
# Global variable to hold scraped content | |
knowledge_base = "" | |
# --- Optional: Scraping Functionality --- | |
if ENABLE_SCRAPING: | |
try: | |
from selenium import webdriver | |
from selenium.webdriver.chrome.options import Options | |
from selenium.webdriver.common.by import By | |
def scrape_site(url): | |
options = Options() | |
options.headless = True # Run browser in headless mode. | |
driver = webdriver.Chrome(options=options) | |
driver.get(url) | |
# Use explicit waits in production code; here we use a simple sleep. | |
time.sleep(5) | |
try: | |
# Customize the selector as per your site's HTML structure. | |
content_element = driver.find_element(By.ID, "content") | |
page_text = content_element.text | |
except Exception as e: | |
page_text = "Error encountered during scraping: " + str(e) | |
driver.quit() | |
return page_text | |
knowledge_base = scrape_site(SITE_URL) | |
print("Scraped knowledge base successfully.") | |
except Exception as e: | |
print("Scraping failed or Selenium is not configured:", e) | |
else: | |
print("Scraping is disabled; proceeding without scraped site content.") | |
# --- Multilingual Helpers --- | |
# Language-specific greeting detection | |
def is_greeting(query: str, lang: str) -> bool: | |
greetings = { | |
"en": ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"], | |
"fr": ["bonjour", "salut", "coucou", "bonsoir"], | |
"am": ["ሰላም", "ሰላም እንደምን", "እንዴት"] | |
} | |
# Retrieve greetings for the provided language; default to English if unavailable. | |
greet_list = greetings.get(lang, greetings["en"]) | |
# For Amharic, no transformation; for Latin scripts, convert to lower case. | |
if lang != "am": | |
query = query.lower() | |
return any(query.startswith(greet) for greet in greet_list) | |
# Language-specific out-of-scope messages | |
def get_out_of_scope_message(lang: str) -> str: | |
messages = { | |
"en": [ | |
"I appreciate your curiosity. However, my expertise lies exclusively in agricultural and agro-investment insights. Could you please frame your question accordingly?", | |
"That’s an interesting thought, but I'm tailored specifically for topics concerning agriculture and agro-investment. Please ask a question within that realm.", | |
"While I value your inquiry, I'm optimized to provide insights solely on agriculture and related investment matters. Could you rephrase your query to align with these topics?", | |
"It appears your question may not be directly tied to agriculture or agro-investment. Please ask something along those lines so I can assist effectively." | |
], | |
"fr": [ | |
"J'apprécie votre curiosité. Cependant, mon expertise se limite exclusivement aux informations sur l'agriculture et les investissements agroalimentaires. Pourriez-vous reformuler votre question en ce sens ?", | |
"C'est une pensée intéressante, mais je suis spécialisé dans les domaines de l'agriculture et des investissements agroalimentaires. Merci de poser une question dans ce domaine.", | |
"Bien que votre question soit pertinente, je me concentre uniquement sur l'agriculture et les investissements associés. Pourriez-vous reformuler votre demande en conséquence ?", | |
"Votre interrogation semble éloignée de l'agriculture ou des investissements agroalimentaires. Merci de poser une question dans ces domaines pour que je puisse vous aider efficacement." | |
], | |
"am": [ | |
"እባክዎ ልጠይቁት ጥያቄ በተለይ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ዙሪያ መሆኑን አላስቀምጥም። እባኮትን ጥያቄዎን እንደዚህ በማቅረብ ደግሞ ይሞክሩ።", | |
"ልዩ ጥያቄዎችን ማቅረብ ይፈልጋሉ እንጂ፣ እኔ በተለይ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ጥያቄዎች ላይ ብቻ እንደሚሰራ ተዘጋጅቻለሁ። እባክዎ ጥያቄዎን በእነዚህ ክስተቶች ውስጥ ያቅርቡ።", | |
"እንደምታዩት ጥያቄዎ በግብርና ወይም በአገልግሎት ስርዓተ-ቢዝነስ ላይ የተመረጠ አይደለም። እባክዎ በዚህ አውድ የሆነ ጥያቄ ይጠይቁ።" | |
] | |
} | |
# Return a random message for the given language; default to English if not available. | |
return random.choice(messages.get(lang, messages["en"])) | |
# Helper to determine if a query is relevant to our domain (English check only; can be expanded). | |
def is_domain_query(query: str) -> bool: | |
domain_keywords = [ | |
"agriculture", "farming", "crop", "agro", "investment", "soil", | |
"irrigation", "harvest", "organic", "sustainable", "agribusiness", | |
"livestock", # additional English keywords | |
"agriculture", "agroalimentaire", "agriculture durable" # French terms can also be included | |
] | |
return any(re.search(r"\b" + keyword + r"\b", query, re.IGNORECASE) for keyword in domain_keywords) | |
def retrieve_relevant_snippet(query: str, text: str, max_length: int = 300) -> str: | |
""" | |
A simple retrieval function that searches for sentences in the text | |
containing domain keywords from the query. | |
Returns a snippet limited to max_length characters. | |
""" | |
sentences = re.split(r'[.?!]', text) | |
for sentence in sentences: | |
if is_domain_query(sentence) and all(word.lower() in sentence.lower() for word in query.split()): | |
snippet = sentence.strip() | |
return snippet[:max_length] + "..." if len(snippet) > max_length else snippet | |
return "" | |
# --- Chat Assistant Response Function --- | |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") | |
def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, language): | |
# language is expected as a string code: "en", "fr", or "am" | |
# Check for a greeting in the appropriate language. | |
if is_greeting(message, language): | |
greetings = { | |
"en": "Hello! How can I assist you today with your agriculture or agro-investment inquiries?", | |
"fr": "Bonjour! Comment puis-je vous aider aujourd'hui en matière d'agriculture ou d'investissements agroalimentaires?", | |
"am": "ሰላም! ዛሬ ስለ ግብርና ወይም ስለ አገልግሎት ስርዓተ-ቢዝነስ ጥያቄዎች እንዴት ልረዳዎት?" | |
} | |
yield greetings.get(language, greetings["en"]) | |
return | |
# If the query is not recognized as domain related, return an out-of-scope message. | |
if not is_domain_query(message): | |
yield get_out_of_scope_message(language) | |
return | |
# Build conversation context starting with the system message. | |
messages_context = [{"role": "system", "content": system_message}] | |
for user_msg, assistant_msg in history: | |
if user_msg: | |
messages_context.append({"role": "user", "content": user_msg}) | |
if assistant_msg: | |
messages_context.append({"role": "assistant", "content": assistant_msg}) | |
# Optional: Append retrieved context from scraped site content. | |
if knowledge_base: | |
snippet = retrieve_relevant_snippet(message, knowledge_base) | |
if snippet: | |
retrieval_context = f"Reference info from Agri Future Investment platform: {snippet}" | |
messages_context.insert(0, {"role": "system", "content": retrieval_context}) | |
# Append the new user message. | |
messages_context.append({"role": "user", "content": message}) | |
# Stream the model's reply token-by-token. | |
response = "" | |
for message_resp in client.chat_completion( | |
messages_context, | |
max_tokens=max_tokens, | |
stream=True, | |
temperature=temperature, | |
top_p=top_p, | |
): | |
token = message_resp.choices[0].delta.content | |
response += token | |
yield response | |
# --- Gradio Chat Interface --- | |
# The language selection dropdown uses language codes: "en" for English, "fr" for French, "am" for Amharic. | |
demo = gr.ChatInterface( | |
respond, | |
additional_inputs=[ | |
gr.Textbox( | |
value="You are AgriFutureBot, designed to help visitors of the Agri Future Investment platform understand content about the site and answer questions strictly related to agriculture and agro-investment topics.", | |
label="System Message" | |
), | |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens"), | |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), | |
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)"), | |
gr.Dropdown(choices=["en", "fr", "am"], value="en", label="Language (en, fr, am)") | |
], | |
) | |
if __name__ == "__main__": | |
demo.launch() |