Spaces:

Macdensten91
/

google-flan-t5-small

Sleeping

App Files Files Community

google-flan-t5-small / app.py

Macdensten91

Update app.py

f035190 verified 5 months ago

raw

history blame

5.07 kB

	import re
	import time
	import random
	import gradio as gr
	from huggingface_hub import InferenceClient

	# Optional: Enable scraping if your site is deployed.
	ENABLE_SCRAPING = False
	SITE_URL = "https://your-agri-future-site.com"

	# Global variable to hold scraped content
	knowledge_base = ""

	# --- Optional: Scraping Functionality ---
	if ENABLE_SCRAPING:
	try:
	from selenium import webdriver
	from selenium.webdriver.chrome.options import Options
	from selenium.webdriver.common.by import By

	def scrape_site(url):
	options = Options()
	options.headless = True # Run browser in headless mode.
	driver = webdriver.Chrome(options=options)
	driver.get(url)
	# Use explicit waits in production; here we use a basic sleep.
	time.sleep(5)
	try:
	# Customize the selector based on your site's HTML structure.
	content_element = driver.find_element(By.ID, "content")
	page_text = content_element.text
	except Exception as e:
	page_text = "Error encountered during scraping: " + str(e)
	driver.quit()
	return page_text

	knowledge_base = scrape_site(SITE_URL)
	print("Scraped knowledge base successfully.")
	except Exception as e:
	print("Scraping failed or Selenium is not configured:", e)
	else:
	print("Scraping is disabled; proceeding without scraped site content.")

	# --- Multilingual Helpers ---

	def is_greeting(query: str, lang: str) -> bool:
	greetings = {
	"en": ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"],
	"fr": ["bonjour", "salut", "coucou", "bonsoir"],
	"am": ["ሰላም", "ሰላም እንደምን", "እንዴት"]
	}
	greet_list = greetings.get(lang, greetings["en"])
	# For languages using Latin script, convert to lower case.
	if lang != "am":
	query = query.lower()
	return any(query.startswith(greet) for greet in greet_list)

	# Rather than using fixed out-of-scope messages, use the model via Hugging Face to generate them.
	def generate_dynamic_out_of_scope_message(language: str) -> str:
	# Define language-specific system prompts for generating a dynamic out-of-scope message.
	system_prompts = {
	"en": (
	"You are a helpful chatbot specializing in agriculture and agro-investment. "
	"A user just asked a question that is not related to these topics. "
	"Generate a friendly, varied, and intelligent out-of-scope response in English that kindly encourages the user to ask about agriculture or agro-investment."
	),
	"fr": (
	"Vous êtes un chatbot utile spécialisé dans l'agriculture et les investissements agroalimentaires. "
	"Un utilisateur vient de poser une question qui ne concerne pas ces sujets. "
	"Générez une réponse élégante, variée et intelligente en français pour indiquer que la question est hors de portée, en invitant l'utilisateur à poser une question sur l'agriculture ou les investissements agroalimentaires."
	),
	"am": (
	"እርስዎ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ውስጥ በተለይ የተሞሉ ቻትቦት ናቸው። "
	"ተጠቃሚው ለግብርና ወይም ለአገልግሎት ስርዓተ-ቢዝነስ ተያይዞ ያልሆነ ጥያቄ አስቀድመዋል። "
	"በአማርኛ በተለያዩ መልኩ የውጭ ክፍል መልእክት ፍጥረት ያድርጉ፤ እባኮትን ተጠቃሚውን ለግብርና ወይም ለአገልግሎት ጥያቄዎች ለመጠየቅ ያነጋግሩ።"
	)
	}
	prompt = system_prompts.get(language, system_prompts["en"])
	messages = [{"role": "system", "content": prompt}]

	# Call the model without streaming to generate the complete message.
	response = client.chat_completion(
	messages,
	max_tokens=80,
	stream=False,
	temperature=0.7,
	top_p=0.95,
	)
	# Depending on the client structure, adjust the extraction of the generated text.
	try:
	out_message = response.choices[0].message.content
	except AttributeError:
	# If the response structure differs, do a fallback conversion.
	out_message = str(response)
	return out_message.strip()

	# A helper to determine domain relevance (basic implementation; can be expanded).
	def is_domain_query(query: str) -> bool:
	domain_keywords = [
	"agriculture", "farming", "crop", "agro", "investment", "soil",
	"irrigation", "harvest", "organic", "sustainable", "agribusiness",
	"livestock", "agroalimentaire", "agriculture durable"
	]
	return any(re.search(r"\b" + keyword + r"\b", query, re.IGNORECASE) for keyword in domain_keywords)

	def retrieve_relevant_snippet(query: str, text: str, max_length: int = 300) -> str:
	sentences = re.split(r'[.?!]', text)
	for sentence in sentences