Macdensten91 commited on
Commit
6ac4ea2
·
verified ·
1 Parent(s): c010699

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -25
app.py CHANGED
@@ -1,10 +1,10 @@
1
  import re
2
  import time
 
3
  import gradio as gr
4
  from huggingface_hub import InferenceClient
5
 
6
  # Optional: Enable scraping if your site is deployed.
7
- # Set this flag to False until your site is available.
8
  ENABLE_SCRAPING = False
9
  SITE_URL = "https://your-agri-future-site.com"
10
 
@@ -23,10 +23,10 @@ if ENABLE_SCRAPING:
23
  options.headless = True # Run browser in headless mode.
24
  driver = webdriver.Chrome(options=options)
25
  driver.get(url)
26
- # Use explicit wait in production code; here we use a simple sleep.
27
  time.sleep(5)
28
  try:
29
- # Customize the selector based on your sites HTML
30
  content_element = driver.find_element(By.ID, "content")
31
  page_text = content_element.text
32
  except Exception as e:
@@ -41,63 +41,112 @@ if ENABLE_SCRAPING:
41
  else:
42
  print("Scraping is disabled; proceeding without scraped site content.")
43
 
44
- # --- Domain-Related Helpers ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  def is_domain_query(query: str) -> bool:
46
- """Check if the query is relevant to agriculture and agro-investment."""
47
  domain_keywords = [
48
  "agriculture", "farming", "crop", "agro", "investment", "soil",
49
  "irrigation", "harvest", "organic", "sustainable", "agribusiness",
50
- "livestock"
 
51
  ]
52
  return any(re.search(r"\b" + keyword + r"\b", query, re.IGNORECASE) for keyword in domain_keywords)
53
 
54
  def retrieve_relevant_snippet(query: str, text: str, max_length: int = 300) -> str:
55
  """
56
- A simple retrieval function that searches for any sentence in the text
57
- that contains domain keywords present in the query.
58
  Returns a snippet limited to max_length characters.
59
  """
60
  sentences = re.split(r'[.?!]', text)
61
  for sentence in sentences:
62
  if is_domain_query(sentence) and all(word.lower() in sentence.lower() for word in query.split()):
63
  snippet = sentence.strip()
64
- if len(snippet) > max_length:
65
- snippet = snippet[:max_length] + "..."
66
- return snippet
67
  return ""
68
 
69
  # --- Chat Assistant Response Function ---
70
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
71
 
72
- def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
73
- # Check domain relevance
 
 
 
 
 
 
 
 
 
 
 
 
74
  if not is_domain_query(message):
75
- yield "I'm sorry, but please ask a question related to agriculture or agro-investment topics."
76
  return
77
 
78
- # Build the conversation context starting with the system message.
79
- messages = [{"role": "system", "content": system_message}]
80
  for user_msg, assistant_msg in history:
81
  if user_msg:
82
- messages.append({"role": "user", "content": user_msg})
83
  if assistant_msg:
84
- messages.append({"role": "assistant", "content": assistant_msg})
85
 
86
- # Optional: Append a retrieval-based context derived from the scraped content.
87
  if knowledge_base:
88
  snippet = retrieve_relevant_snippet(message, knowledge_base)
89
  if snippet:
90
- # Prepend additional context for the model to take into account.
91
- retrieval_context = f"Reference information from Agri Future Investment platform: {snippet}"
92
- messages.insert(0, {"role": "system", "content": retrieval_context})
93
 
94
- # Append the new user query.
95
- messages.append({"role": "user", "content": message})
96
 
97
  # Stream the model's reply token-by-token.
98
  response = ""
99
  for message_resp in client.chat_completion(
100
- messages,
101
  max_tokens=max_tokens,
102
  stream=True,
103
  temperature=temperature,
@@ -108,6 +157,8 @@ def respond(message, history: list[tuple[str, str]], system_message, max_tokens,
108
  yield response
109
 
110
  # --- Gradio Chat Interface ---
 
 
111
  demo = gr.ChatInterface(
112
  respond,
113
  additional_inputs=[
@@ -118,6 +169,7 @@ demo = gr.ChatInterface(
118
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens"),
119
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
120
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)"),
 
121
  ],
122
  )
123
 
 
1
  import re
2
  import time
3
+ import random
4
  import gradio as gr
5
  from huggingface_hub import InferenceClient
6
 
7
  # Optional: Enable scraping if your site is deployed.
 
8
  ENABLE_SCRAPING = False
9
  SITE_URL = "https://your-agri-future-site.com"
10
 
 
23
  options.headless = True # Run browser in headless mode.
24
  driver = webdriver.Chrome(options=options)
25
  driver.get(url)
26
+ # Use explicit waits in production code; here we use a simple sleep.
27
  time.sleep(5)
28
  try:
29
+ # Customize the selector as per your site's HTML structure.
30
  content_element = driver.find_element(By.ID, "content")
31
  page_text = content_element.text
32
  except Exception as e:
 
41
  else:
42
  print("Scraping is disabled; proceeding without scraped site content.")
43
 
44
+ # --- Multilingual Helpers ---
45
+
46
+ # Language-specific greeting detection
47
+ def is_greeting(query: str, lang: str) -> bool:
48
+ greetings = {
49
+ "en": ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"],
50
+ "fr": ["bonjour", "salut", "coucou", "bonsoir"],
51
+ "am": ["ሰላም", "ሰላም እንደምን", "እንዴት"]
52
+ }
53
+ # Retrieve greetings for the provided language; default to English if unavailable.
54
+ greet_list = greetings.get(lang, greetings["en"])
55
+ # For Amharic, no transformation; for Latin scripts, convert to lower case.
56
+ if lang != "am":
57
+ query = query.lower()
58
+ return any(query.startswith(greet) for greet in greet_list)
59
+
60
+ # Language-specific out-of-scope messages
61
+ def get_out_of_scope_message(lang: str) -> str:
62
+ messages = {
63
+ "en": [
64
+ "I appreciate your curiosity. However, my expertise lies exclusively in agricultural and agro-investment insights. Could you please frame your question accordingly?",
65
+ "That’s an interesting thought, but I'm tailored specifically for topics concerning agriculture and agro-investment. Please ask a question within that realm.",
66
+ "While I value your inquiry, I'm optimized to provide insights solely on agriculture and related investment matters. Could you rephrase your query to align with these topics?",
67
+ "It appears your question may not be directly tied to agriculture or agro-investment. Please ask something along those lines so I can assist effectively."
68
+ ],
69
+ "fr": [
70
+ "J'apprécie votre curiosité. Cependant, mon expertise se limite exclusivement aux informations sur l'agriculture et les investissements agroalimentaires. Pourriez-vous reformuler votre question en ce sens ?",
71
+ "C'est une pensée intéressante, mais je suis spécialisé dans les domaines de l'agriculture et des investissements agroalimentaires. Merci de poser une question dans ce domaine.",
72
+ "Bien que votre question soit pertinente, je me concentre uniquement sur l'agriculture et les investissements associés. Pourriez-vous reformuler votre demande en conséquence ?",
73
+ "Votre interrogation semble éloignée de l'agriculture ou des investissements agroalimentaires. Merci de poser une question dans ces domaines pour que je puisse vous aider efficacement."
74
+ ],
75
+ "am": [
76
+ "እባክዎ ልጠይቁት ጥያቄ በተለይ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ዙሪያ መሆኑን አላስቀምጥም። እባኮትን ጥያቄዎን እንደዚህ በማቅረብ ደግሞ ይሞክሩ።",
77
+ "ልዩ ጥያቄዎችን ማቅረብ ይፈልጋሉ እንጂ፣ እኔ በተለይ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ጥያቄዎች ላይ ብቻ እንደሚሰራ ተዘጋጅቻለሁ። እባክዎ ጥያቄዎን በእነዚህ ክስተቶች ውስጥ ያቅርቡ።",
78
+ "እንደምታዩት ጥያቄዎ በግብርና ወይም በአገልግሎት ስርዓተ-ቢዝነስ ላይ የተመረጠ አይደለም። እባክዎ በዚህ አውድ የሆነ ጥያቄ ይጠይቁ።"
79
+ ]
80
+ }
81
+ # Return a random message for the given language; default to English if not available.
82
+ return random.choice(messages.get(lang, messages["en"]))
83
+
84
+ # Helper to determine if a query is relevant to our domain (English check only; can be expanded).
85
  def is_domain_query(query: str) -> bool:
 
86
  domain_keywords = [
87
  "agriculture", "farming", "crop", "agro", "investment", "soil",
88
  "irrigation", "harvest", "organic", "sustainable", "agribusiness",
89
+ "livestock", # additional English keywords
90
+ "agriculture", "agroalimentaire", "agriculture durable" # French terms can also be included
91
  ]
92
  return any(re.search(r"\b" + keyword + r"\b", query, re.IGNORECASE) for keyword in domain_keywords)
93
 
94
  def retrieve_relevant_snippet(query: str, text: str, max_length: int = 300) -> str:
95
  """
96
+ A simple retrieval function that searches for sentences in the text
97
+ containing domain keywords from the query.
98
  Returns a snippet limited to max_length characters.
99
  """
100
  sentences = re.split(r'[.?!]', text)
101
  for sentence in sentences:
102
  if is_domain_query(sentence) and all(word.lower() in sentence.lower() for word in query.split()):
103
  snippet = sentence.strip()
104
+ return snippet[:max_length] + "..." if len(snippet) > max_length else snippet
 
 
105
  return ""
106
 
107
  # --- Chat Assistant Response Function ---
108
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
109
 
110
+ def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, language):
111
+ # language is expected as a string code: "en", "fr", or "am"
112
+
113
+ # Check for a greeting in the appropriate language.
114
+ if is_greeting(message, language):
115
+ greetings = {
116
+ "en": "Hello! How can I assist you today with your agriculture or agro-investment inquiries?",
117
+ "fr": "Bonjour! Comment puis-je vous aider aujourd'hui en matière d'agriculture ou d'investissements agroalimentaires?",
118
+ "am": "ሰላም! ዛሬ ስለ ግብርና ወይም ስለ አገልግሎት ስርዓተ-ቢዝነስ ጥያቄዎች እንዴት ልረዳዎት?"
119
+ }
120
+ yield greetings.get(language, greetings["en"])
121
+ return
122
+
123
+ # If the query is not recognized as domain related, return an out-of-scope message.
124
  if not is_domain_query(message):
125
+ yield get_out_of_scope_message(language)
126
  return
127
 
128
+ # Build conversation context starting with the system message.
129
+ messages_context = [{"role": "system", "content": system_message}]
130
  for user_msg, assistant_msg in history:
131
  if user_msg:
132
+ messages_context.append({"role": "user", "content": user_msg})
133
  if assistant_msg:
134
+ messages_context.append({"role": "assistant", "content": assistant_msg})
135
 
136
+ # Optional: Append retrieved context from scraped site content.
137
  if knowledge_base:
138
  snippet = retrieve_relevant_snippet(message, knowledge_base)
139
  if snippet:
140
+ retrieval_context = f"Reference info from Agri Future Investment platform: {snippet}"
141
+ messages_context.insert(0, {"role": "system", "content": retrieval_context})
 
142
 
143
+ # Append the new user message.
144
+ messages_context.append({"role": "user", "content": message})
145
 
146
  # Stream the model's reply token-by-token.
147
  response = ""
148
  for message_resp in client.chat_completion(
149
+ messages_context,
150
  max_tokens=max_tokens,
151
  stream=True,
152
  temperature=temperature,
 
157
  yield response
158
 
159
  # --- Gradio Chat Interface ---
160
+
161
+ # The language selection dropdown uses language codes: "en" for English, "fr" for French, "am" for Amharic.
162
  demo = gr.ChatInterface(
163
  respond,
164
  additional_inputs=[
 
169
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens"),
170
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
171
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)"),
172
+ gr.Dropdown(choices=["en", "fr", "am"], value="en", label="Language (en, fr, am)")
173
  ],
174
  )
175