Macdensten91 commited on
Commit
4002882
·
verified ·
1 Parent(s): 0980116

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -72
app.py CHANGED
@@ -4,11 +4,17 @@ import random
4
  import gradio as gr
5
  from huggingface_hub import InferenceClient
6
 
 
 
 
 
 
 
7
  # Optional: Enable scraping if your site is deployed.
8
  ENABLE_SCRAPING = False
9
  SITE_URL = "https://your-agri-future-site.com"
10
 
11
- # Global variable to hold scraped content
12
  knowledge_base = ""
13
 
14
  # --- Optional: Scraping Functionality ---
@@ -23,10 +29,10 @@ if ENABLE_SCRAPING:
23
  options.headless = True # Run browser in headless mode.
24
  driver = webdriver.Chrome(options=options)
25
  driver.get(url)
26
- # Use explicit waits in production code; here we use a simple sleep.
27
  time.sleep(5)
28
  try:
29
- # Customize the selector as per your site's HTML structure.
30
  content_element = driver.find_element(By.ID, "content")
31
  page_text = content_element.text
32
  except Exception as e:
@@ -43,59 +49,106 @@ else:
43
 
44
  # --- Multilingual Helpers ---
45
 
46
- # Language-specific greeting detection
47
  def is_greeting(query: str, lang: str) -> bool:
48
  greetings = {
49
  "en": ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"],
50
  "fr": ["bonjour", "salut", "coucou", "bonsoir"],
51
  "am": ["ሰላም", "ሰላም እንደምን", "እንዴት"]
52
  }
53
- # Retrieve greetings for the provided language; default to English if unavailable.
54
  greet_list = greetings.get(lang, greetings["en"])
55
- # For Amharic, no transformation; for Latin scripts, convert to lower case.
56
  if lang != "am":
57
  query = query.lower()
58
  return any(query.startswith(greet) for greet in greet_list)
59
 
60
- # Language-specific out-of-scope messages
61
- def get_out_of_scope_message(lang: str) -> str:
62
- messages = {
63
- "en": [
64
- "I appreciate your curiosity. However, my expertise lies exclusively in agricultural and agro-investment insights. Could you please frame your question accordingly?",
65
- "That’s an interesting thought, but I'm tailored specifically for topics concerning agriculture and agro-investment. Please ask a question within that realm.",
66
- "While I value your inquiry, I'm optimized to provide insights solely on agriculture and related investment matters. Could you rephrase your query to align with these topics?",
67
- "It appears your question may not be directly tied to agriculture or agro-investment. Please ask something along those lines so I can assist effectively."
68
- ],
69
- "fr": [
70
- "J'apprécie votre curiosité. Cependant, mon expertise se limite exclusivement aux informations sur l'agriculture et les investissements agroalimentaires. Pourriez-vous reformuler votre question en ce sens ?",
71
- "C'est une pensée intéressante, mais je suis spécialisé dans les domaines de l'agriculture et des investissements agroalimentaires. Merci de poser une question dans ce domaine.",
72
- "Bien que votre question soit pertinente, je me concentre uniquement sur l'agriculture et les investissements associés. Pourriez-vous reformuler votre demande en conséquence ?",
73
- "Votre interrogation semble éloignée de l'agriculture ou des investissements agroalimentaires. Merci de poser une question dans ces domaines pour que je puisse vous aider efficacement."
74
- ],
75
- "am": [
76
- "እባክዎ ልጠይቁት ጥያቄ በተለይ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ዙሪያ መሆኑን አላስቀምጥም። እባኮትን ጥያቄዎን እንደዚህ በማቅረብ ደግሞ ይሞክሩ።",
77
- "ልዩ ጥያቄዎችን ማቅረብ ይፈልጋሉ እንጂ፣ እኔ በተለይ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ጥያቄዎች ላይ ብቻ እንደሚሰራ ተዘጋጅቻለሁ። እባክዎ ጥያቄዎን በእነዚህ ክስተቶች ውስጥ ያቅርቡ።",
78
- "እንደምታዩት ጥያቄዎ በግብርና ወይም በአገልግሎት ስርዓተ-ቢዝነስ ላይ የተመረጠ አይደለም። እባክዎ በዚህ አውድ የሆነ ጥያቄ ይጠይቁ።"
79
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  }
81
- # Return a random message for the given language; default to English if not available.
82
- return random.choice(messages.get(lang, messages["en"]))
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- # Helper to determine if a query is relevant to our domain (English check only; can be expanded).
85
  def is_domain_query(query: str) -> bool:
 
 
 
86
  domain_keywords = [
87
  "agriculture", "farming", "crop", "agro", "investment", "soil",
88
  "irrigation", "harvest", "organic", "sustainable", "agribusiness",
89
- "livestock", # additional English keywords
90
- "agriculture", "agroalimentaire", "agriculture durable" # French terms can also be included
 
 
 
91
  ]
92
  return any(re.search(r"\b" + keyword + r"\b", query, re.IGNORECASE) for keyword in domain_keywords)
93
 
94
  def retrieve_relevant_snippet(query: str, text: str, max_length: int = 300) -> str:
95
  """
96
- A simple retrieval function that searches for sentences in the text
97
- containing domain keywords from the query.
98
- Returns a snippet limited to max_length characters.
99
  """
100
  sentences = re.split(r'[.?!]', text)
101
  for sentence in sentences:
@@ -105,71 +158,59 @@ def retrieve_relevant_snippet(query: str, text: str, max_length: int = 300) -> s
105
  return ""
106
 
107
  # --- Chat Assistant Response Function ---
108
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
109
-
110
- def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, language):
111
- # language is expected as a string code: "en", "fr", or "am"
112
-
113
- # Check for a greeting in the appropriate language.
114
  if is_greeting(message, language):
115
- greetings = {
116
- "en": "Hello! How can I assist you today with your agriculture or agro-investment inquiries?",
117
- "fr": "Bonjour! Comment puis-je vous aider aujourd'hui en matière d'agriculture ou d'investissements agroalimentaires?",
118
- "am": "ሰላም! ዛሬ ስለ ግብርና ወይም ስለ አገልግሎት ስርዓተ-ቢዝነስ ጥያቄዎች እንዴት ልረዳዎት?"
119
- }
120
- yield greetings.get(language, greetings["en"])
121
  return
122
 
123
- # If the query is not recognized as domain related, return an out-of-scope message.
124
  if not is_domain_query(message):
125
- yield get_out_of_scope_message(language)
126
  return
127
 
128
- # Build conversation context starting with the system message.
129
- messages_context = [{"role": "system", "content": system_message}]
130
  for user_msg, assistant_msg in history:
131
  if user_msg:
132
- messages_context.append({"role": "user", "content": user_msg})
133
  if assistant_msg:
134
- messages_context.append({"role": "assistant", "content": assistant_msg})
135
-
136
- # Optional: Append retrieved context from scraped site content.
137
  if knowledge_base:
138
  snippet = retrieve_relevant_snippet(message, knowledge_base)
139
  if snippet:
140
- retrieval_context = f"Reference info from Agri Future Investment platform: {snippet}"
141
- messages_context.insert(0, {"role": "system", "content": retrieval_context})
142
-
143
- # Append the new user message.
144
- messages_context.append({"role": "user", "content": message})
145
-
146
- # Stream the model's reply token-by-token.
147
- response = ""
148
- for message_resp in client.chat_completion(
149
- messages_context,
150
  max_tokens=max_tokens,
151
  stream=True,
152
  temperature=temperature,
153
  top_p=top_p,
154
  ):
155
- token = message_resp.choices[0].delta.content
156
- response += token
157
- yield response
158
 
159
  # --- Gradio Chat Interface ---
160
-
161
- # The language selection dropdown uses language codes: "en" for English, "fr" for French, "am" for Amharic.
162
  demo = gr.ChatInterface(
163
- respond,
164
  additional_inputs=[
165
  gr.Textbox(
166
- value="You are AgriFutureBot, designed to help visitors of the Agri Future Investment platform understand content about the site and answer questions strictly related to agriculture and agro-investment topics.",
167
  label="System Message"
168
  ),
169
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens"),
170
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
171
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)"),
172
- gr.Dropdown(choices=["en", "fr", "am"], value="en", label="Language (en, fr, am)")
173
  ],
174
  )
175
 
 
4
  import gradio as gr
5
  from huggingface_hub import InferenceClient
6
 
7
+ # Initialize the InferenceClient (update the model and token as needed)
8
+ client = InferenceClient(
9
+ model="HuggingFaceH4/zephyr-7b-beta", # Change to your model if needed
10
+ token="your_huggingface_api_token" # Replace with your Hugging Face API token
11
+ )
12
+
13
  # Optional: Enable scraping if your site is deployed.
14
  ENABLE_SCRAPING = False
15
  SITE_URL = "https://your-agri-future-site.com"
16
 
17
+ # Global variable to hold scraped content.
18
  knowledge_base = ""
19
 
20
  # --- Optional: Scraping Functionality ---
 
29
  options.headless = True # Run browser in headless mode.
30
  driver = webdriver.Chrome(options=options)
31
  driver.get(url)
32
+ # Use explicit waits in production; here we use a basic sleep.
33
  time.sleep(5)
34
  try:
35
+ # Customize the selector based on your site's HTML structure.
36
  content_element = driver.find_element(By.ID, "content")
37
  page_text = content_element.text
38
  except Exception as e:
 
49
 
50
  # --- Multilingual Helpers ---
51
 
 
52
  def is_greeting(query: str, lang: str) -> bool:
53
  greetings = {
54
  "en": ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"],
55
  "fr": ["bonjour", "salut", "coucou", "bonsoir"],
56
  "am": ["ሰላም", "ሰላም እንደምን", "እንዴት"]
57
  }
 
58
  greet_list = greetings.get(lang, greetings["en"])
59
+ # For languages using Latin script, convert the query to lower case.
60
  if lang != "am":
61
  query = query.lower()
62
  return any(query.startswith(greet) for greet in greet_list)
63
 
64
+ def generate_dynamic_greeting(language: str) -> str:
65
+ """
66
+ Generate a dynamic, context-relevant greeting using the Hugging Face Inference API.
67
+ """
68
+ system_prompts = {
69
+ "en": (
70
+ "You are a friendly chatbot specializing in agriculture and agro-investment. "
71
+ "A user just greeted you. Generate a warm, dynamic greeting message in English that is context-aware and encourages discussion about agriculture or agro-investment."
72
+ ),
73
+ "fr": (
74
+ "Vous êtes un chatbot chaleureux spécialisé dans l'agriculture et les investissements agroalimentaires. "
75
+ "Un utilisateur vient de vous saluer. Générez un message de salutation dynamique et chaleureux en français, en restant pertinent par rapport à l'agriculture ou aux investissements agroalimentaires."
76
+ ),
77
+ "am": (
78
+ "እርስዎ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ውስጥ ባለሙያ ቻትቦት ናቸው። "
79
+ "ተጠቃሚው በአማርኛ ሰላም መልእክት አስቀድመዋል። "
80
+ "በአማርኛ ተዛማጅ እና ትክክለኛ የሆነ ሰላም መልእክት ፍጥረት ያድርጉ።"
81
+ )
82
+ }
83
+ prompt = system_prompts.get(language, system_prompts["en"])
84
+ messages = [{"role": "system", "content": prompt}]
85
+ response = client.chat_completion(
86
+ messages,
87
+ max_tokens=80,
88
+ stream=False,
89
+ temperature=0.7,
90
+ top_p=0.95,
91
+ )
92
+ try:
93
+ greeting_message = response.choices[0].message.content
94
+ except AttributeError:
95
+ greeting_message = str(response)
96
+ return greeting_message.strip()
97
+
98
+ def generate_dynamic_out_of_scope_message(language: str) -> str:
99
+ """
100
+ Generate a dynamic out-of-scope message using the Hugging Face Inference API.
101
+ """
102
+ system_prompts = {
103
+ "en": (
104
+ "You are a helpful chatbot specializing in agriculture and agro-investment. "
105
+ "A user just asked a question that is not related to these topics. "
106
+ "Generate a friendly, varied, and intelligent out-of-scope response in English that kindly encourages the user to ask about agriculture or agro-investment."
107
+ ),
108
+ "fr": (
109
+ "Vous êtes un chatbot utile spécialisé dans l'agriculture et les investissements agroalimentaires. "
110
+ "Un utilisateur vient de poser une question qui ne concerne pas ces sujets. "
111
+ "Générez une réponse élégante, variée et intelligente en français pour indiquer que la question est hors de portée, en invitant l'utilisateur à poser une question sur l'agriculture ou les investissements agroalimentaires."
112
+ ),
113
+ "am": (
114
+ "እርስዎ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ውስጥ በተለይ የተሞሉ ቻትቦት ናቸው። "
115
+ "ተጠቃሚው ለግብርና ወይም ለአገልግሎት ስርዓተ-ቢዝነስ ተያይዞ ያልሆነ ጥያቄ አስቀድመዋል። "
116
+ "በአማርኛ በተለያዩ መልኩ የውጭ ክፍል መልእክት ፍጥረት ያድርጉ፤ እባኮትን ተጠቃሚውን ለግብርና ወይም ለአገልግሎት ጥያቄዎች ለመጠየቅ ያነጋግሩ።"
117
+ )
118
  }
119
+ prompt = system_prompts.get(language, system_prompts["en"])
120
+ messages = [{"role": "system", "content": prompt}]
121
+ response = client.chat_completion(
122
+ messages,
123
+ max_tokens=80,
124
+ stream=False,
125
+ temperature=0.7,
126
+ top_p=0.95,
127
+ )
128
+ try:
129
+ out_message = response.choices[0].message.content
130
+ except AttributeError:
131
+ out_message = str(response)
132
+ return out_message.strip()
133
 
 
134
  def is_domain_query(query: str) -> bool:
135
+ """
136
+ Check if a query relates to agriculture or agro-investment.
137
+ """
138
  domain_keywords = [
139
  "agriculture", "farming", "crop", "agro", "investment", "soil",
140
  "irrigation", "harvest", "organic", "sustainable", "agribusiness",
141
+ "livestock", "agroalimentaire", "agriculture durable",
142
+ "greenhouse", "horticulture", "pesticide", "fertilizer",
143
+ "rural development", "food production", "crop yield", "farm equipment",
144
+ "agronomy", "farming techniques", "organic farming", "agro-tech",
145
+ "farm management", "agrifood"
146
  ]
147
  return any(re.search(r"\b" + keyword + r"\b", query, re.IGNORECASE) for keyword in domain_keywords)
148
 
149
  def retrieve_relevant_snippet(query: str, text: str, max_length: int = 300) -> str:
150
  """
151
+ Retrieve a relevant snippet from the text based on the query.
 
 
152
  """
153
  sentences = re.split(r'[.?!]', text)
154
  for sentence in sentences:
 
158
  return ""
159
 
160
  # --- Chat Assistant Response Function ---
161
+ def respond(message, history: list, system_message, max_tokens, temperature, top_p, language):
162
+ # Check for a greeting.
 
 
 
 
163
  if is_greeting(message, language):
164
+ yield generate_dynamic_greeting(language)
 
 
 
 
 
165
  return
166
 
167
+ # If query is out of domain, generate an out-of-scope message.
168
  if not is_domain_query(message):
169
+ yield generate_dynamic_out_of_scope_message(language)
170
  return
171
 
172
+ # Build conversation context from the system message and conversation history.
173
+ messages_list = [{"role": "system", "content": system_message}]
174
  for user_msg, assistant_msg in history:
175
  if user_msg:
176
+ messages_list.append({"role": "user", "content": user_msg})
177
  if assistant_msg:
178
+ messages_list.append({"role": "assistant", "content": assistant_msg})
179
+
180
+ # Optionally add a relevant snippet from the scraped content (if available).
181
  if knowledge_base:
182
  snippet = retrieve_relevant_snippet(message, knowledge_base)
183
  if snippet:
184
+ retrieval_context = f"Reference from Agri Future Investment platform: {snippet}"
185
+ messages_list.insert(0, {"role": "system", "content": retrieval_context})
186
+
187
+ messages_list.append({"role": "user", "content": message})
188
+
189
+ # Generate the assistant's answer by streaming responses.
190
+ response_text = ""
191
+ for partial_response in client.chat_completion(
192
+ messages_list,
 
193
  max_tokens=max_tokens,
194
  stream=True,
195
  temperature=temperature,
196
  top_p=top_p,
197
  ):
198
+ token = partial_response.choices[0].delta.content
199
+ response_text += token
200
+ yield response_text
201
 
202
  # --- Gradio Chat Interface ---
 
 
203
  demo = gr.ChatInterface(
204
+ fn=respond,
205
  additional_inputs=[
206
  gr.Textbox(
207
+ value="You are AgriFutureBot, a specialized assistant for agriculture and agro-investment insights.",
208
  label="System Message"
209
  ),
210
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens"),
211
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
212
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)"),
213
+ gr.Dropdown(choices=["en", "fr", "am"], value="en", label="Language")
214
  ],
215
  )
216