Macdensten91 commited on
Commit
0980116
·
verified ·
1 Parent(s): f0915ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -71
app.py CHANGED
@@ -1,93 +1,177 @@
1
- import random
2
  import re
 
 
 
3
  from huggingface_hub import InferenceClient
4
 
5
- # Initialize the InferenceClient with your Hugging Face API token
6
- client = InferenceClient(
7
- model="HuggingFaceH4/zephyr-7b-beta", # Specify your model here
8
- token="your_huggingface_api_token" # Replace with your actual token
9
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- # Multilingual greetings dictionary
12
- greetings = {
13
- "en": ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"],
14
- "fr": ["bonjour", "salut", "coucou", "bonsoir"],
15
- "am": ["ሰላም", "ሰላም እንደምን", "እንዴት"]
16
- }
17
 
 
18
  def is_greeting(query: str, lang: str) -> bool:
19
- """
20
- Check if the user's query is a greeting in the specified language.
21
- """
 
 
 
22
  greet_list = greetings.get(lang, greetings["en"])
23
- # Convert to lowercase for non-Amharic languages
24
  if lang != "am":
25
  query = query.lower()
26
  return any(query.startswith(greet) for greet in greet_list)
27
 
28
- def generate_dynamic_out_of_scope_message(language: str) -> str:
29
- """
30
- Generate a dynamic out-of-scope message using the Hugging Face Inference API.
31
- """
32
- # Define language-specific system prompts
33
- system_prompts = {
34
- "en": (
35
- "You are a helpful chatbot specializing in agriculture and agro-investment. "
36
- "A user has asked a question unrelated to these topics. "
37
- "Generate a friendly and intelligent out-of-scope response in English, encouraging the user to ask about agriculture or agro-investment."
38
- ),
39
- "fr": (
40
- "Vous êtes un chatbot utile spécialisé dans l'agriculture et les investissements agroalimentaires. "
41
- "Un utilisateur a posé une question sans rapport avec ces sujets. "
42
- "Générez une réponse amicale et intelligente en français, encourageant l'utilisateur à poser des questions sur l'agriculture ou les investissements agroalimentaires."
43
- ),
44
- "am": (
45
- "እርስዎ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ውስጥ የሚሰራ እገዛ የሚሰጥ ቻትቦት ነው። "
46
- "ተጠቃሚው ከእነዚህ ጉዳዮች ውጪ ጥያቄ አቀርቧል። "
47
- "በአማርኛ የተሰጠ የውጭ ክፍል ምላሽ ይፍጠሩ፣ ተጠቃሚውን ለግብርና ወይም ለአገልግሎት ስርዓተ-ቢዝነስ ጥያቄዎች ለመጠየቅ ያበረታታ።"
48
- )
49
  }
50
- prompt = system_prompts.get(language, system_prompts["en"])
51
- messages = [{"role": "system", "content": prompt}]
52
-
53
- # Call the model to generate the response
54
- response = client.chat_completion(
55
- messages,
56
- max_tokens=80,
57
- temperature=0.7,
58
- top_p=0.95,
59
- )
60
- # Extract the generated message content
61
- try:
62
- out_message = response.choices[0].message.content
63
- except AttributeError:
64
- out_message = str(response)
65
- return out_message.strip()
66
 
 
67
  def is_domain_query(query: str) -> bool:
68
- """
69
- Determine if the query is related to agriculture or agro-investment.
70
- """
71
  domain_keywords = [
72
  "agriculture", "farming", "crop", "agro", "investment", "soil",
73
  "irrigation", "harvest", "organic", "sustainable", "agribusiness",
74
- "livestock", "agroalimentaire", "agriculture durable"
 
75
  ]
76
  return any(re.search(r"\b" + keyword + r"\b", query, re.IGNORECASE) for keyword in domain_keywords)
77
 
78
- def handle_user_query(query: str, lang: str = "en") -> str:
79
  """
80
- Process the user's query and provide an appropriate response.
 
 
81
  """
82
- if is_greeting(query, lang):
83
- return random.choice(greetings.get(lang, greetings["en"])).capitalize() + "!"
84
- elif is_domain_query(query):
85
- # Here you would integrate your domain-specific response generation
86
- return "This is a domain-specific question. Processing accordingly..."
87
- else:
88
- return generate_dynamic_out_of_scope_message(lang)
89
-
90
- # Example usage
91
- user_query = "Tell me about space travel."
92
- response = handle_user_query(user_query, lang="en")
93
- print(response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import re
2
+ import time
3
+ import random
4
+ import gradio as gr
5
  from huggingface_hub import InferenceClient
6
 
7
+ # Optional: Enable scraping if your site is deployed.
8
+ ENABLE_SCRAPING = False
9
+ SITE_URL = "https://your-agri-future-site.com"
10
+
11
+ # Global variable to hold scraped content
12
+ knowledge_base = ""
13
+
14
+ # --- Optional: Scraping Functionality ---
15
+ if ENABLE_SCRAPING:
16
+ try:
17
+ from selenium import webdriver
18
+ from selenium.webdriver.chrome.options import Options
19
+ from selenium.webdriver.common.by import By
20
+
21
+ def scrape_site(url):
22
+ options = Options()
23
+ options.headless = True # Run browser in headless mode.
24
+ driver = webdriver.Chrome(options=options)
25
+ driver.get(url)
26
+ # Use explicit waits in production code; here we use a simple sleep.
27
+ time.sleep(5)
28
+ try:
29
+ # Customize the selector as per your site's HTML structure.
30
+ content_element = driver.find_element(By.ID, "content")
31
+ page_text = content_element.text
32
+ except Exception as e:
33
+ page_text = "Error encountered during scraping: " + str(e)
34
+ driver.quit()
35
+ return page_text
36
+
37
+ knowledge_base = scrape_site(SITE_URL)
38
+ print("Scraped knowledge base successfully.")
39
+ except Exception as e:
40
+ print("Scraping failed or Selenium is not configured:", e)
41
+ else:
42
+ print("Scraping is disabled; proceeding without scraped site content.")
43
 
44
+ # --- Multilingual Helpers ---
 
 
 
 
 
45
 
46
+ # Language-specific greeting detection
47
  def is_greeting(query: str, lang: str) -> bool:
48
+ greetings = {
49
+ "en": ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"],
50
+ "fr": ["bonjour", "salut", "coucou", "bonsoir"],
51
+ "am": ["ሰላም", "ሰላም እንደምን", "እንዴት"]
52
+ }
53
+ # Retrieve greetings for the provided language; default to English if unavailable.
54
  greet_list = greetings.get(lang, greetings["en"])
55
+ # For Amharic, no transformation; for Latin scripts, convert to lower case.
56
  if lang != "am":
57
  query = query.lower()
58
  return any(query.startswith(greet) for greet in greet_list)
59
 
60
+ # Language-specific out-of-scope messages
61
+ def get_out_of_scope_message(lang: str) -> str:
62
+ messages = {
63
+ "en": [
64
+ "I appreciate your curiosity. However, my expertise lies exclusively in agricultural and agro-investment insights. Could you please frame your question accordingly?",
65
+ "That’s an interesting thought, but I'm tailored specifically for topics concerning agriculture and agro-investment. Please ask a question within that realm.",
66
+ "While I value your inquiry, I'm optimized to provide insights solely on agriculture and related investment matters. Could you rephrase your query to align with these topics?",
67
+ "It appears your question may not be directly tied to agriculture or agro-investment. Please ask something along those lines so I can assist effectively."
68
+ ],
69
+ "fr": [
70
+ "J'apprécie votre curiosité. Cependant, mon expertise se limite exclusivement aux informations sur l'agriculture et les investissements agroalimentaires. Pourriez-vous reformuler votre question en ce sens ?",
71
+ "C'est une pensée intéressante, mais je suis spécialisé dans les domaines de l'agriculture et des investissements agroalimentaires. Merci de poser une question dans ce domaine.",
72
+ "Bien que votre question soit pertinente, je me concentre uniquement sur l'agriculture et les investissements associés. Pourriez-vous reformuler votre demande en conséquence ?",
73
+ "Votre interrogation semble éloignée de l'agriculture ou des investissements agroalimentaires. Merci de poser une question dans ces domaines pour que je puisse vous aider efficacement."
74
+ ],
75
+ "am": [
76
+ "እባክዎ ልጠይቁት ጥያቄ በተለይ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ዙሪያ መሆኑን አላስቀምጥም። እባኮትን ጥያቄዎን እንደዚህ በማቅረብ ደግሞ ይሞክሩ።",
77
+ "ልዩ ጥያቄዎችን ማቅረብ ይፈልጋሉ እንጂ፣ እኔ በተለይ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ጥያቄዎች ላይ ብቻ እንደሚሰራ ተዘጋጅቻለሁ። እባክዎ ጥያቄዎን በእነዚህ ክስተቶች ውስጥ ያቅርቡ።",
78
+ "እንደምታዩት ጥያቄዎ በግብርና ወይም በአገልግሎት ስርዓተ-ቢዝነስ ላይ የተመረጠ አይደለም። እባክዎ በዚህ አውድ የሆነ ጥያቄ ይጠይቁ።"
79
+ ]
 
80
  }
81
+ # Return a random message for the given language; default to English if not available.
82
+ return random.choice(messages.get(lang, messages["en"]))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
+ # Helper to determine if a query is relevant to our domain (English check only; can be expanded).
85
  def is_domain_query(query: str) -> bool:
 
 
 
86
  domain_keywords = [
87
  "agriculture", "farming", "crop", "agro", "investment", "soil",
88
  "irrigation", "harvest", "organic", "sustainable", "agribusiness",
89
+ "livestock", # additional English keywords
90
+ "agriculture", "agroalimentaire", "agriculture durable" # French terms can also be included
91
  ]
92
  return any(re.search(r"\b" + keyword + r"\b", query, re.IGNORECASE) for keyword in domain_keywords)
93
 
94
+ def retrieve_relevant_snippet(query: str, text: str, max_length: int = 300) -> str:
95
  """
96
+ A simple retrieval function that searches for sentences in the text
97
+ containing domain keywords from the query.
98
+ Returns a snippet limited to max_length characters.
99
  """
100
+ sentences = re.split(r'[.?!]', text)
101
+ for sentence in sentences:
102
+ if is_domain_query(sentence) and all(word.lower() in sentence.lower() for word in query.split()):
103
+ snippet = sentence.strip()
104
+ return snippet[:max_length] + "..." if len(snippet) > max_length else snippet
105
+ return ""
106
+
107
+ # --- Chat Assistant Response Function ---
108
+ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
109
+
110
+ def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, language):
111
+ # language is expected as a string code: "en", "fr", or "am"
112
+
113
+ # Check for a greeting in the appropriate language.
114
+ if is_greeting(message, language):
115
+ greetings = {
116
+ "en": "Hello! How can I assist you today with your agriculture or agro-investment inquiries?",
117
+ "fr": "Bonjour! Comment puis-je vous aider aujourd'hui en matière d'agriculture ou d'investissements agroalimentaires?",
118
+ "am": "ሰላም! ዛሬ ስለ ግብርና ወይም ስለ አገልግሎት ስርዓተ-ቢዝነስ ጥያቄዎች እንዴት ልረዳዎት?"
119
+ }
120
+ yield greetings.get(language, greetings["en"])
121
+ return
122
+
123
+ # If the query is not recognized as domain related, return an out-of-scope message.
124
+ if not is_domain_query(message):
125
+ yield get_out_of_scope_message(language)
126
+ return
127
+
128
+ # Build conversation context starting with the system message.
129
+ messages_context = [{"role": "system", "content": system_message}]
130
+ for user_msg, assistant_msg in history:
131
+ if user_msg:
132
+ messages_context.append({"role": "user", "content": user_msg})
133
+ if assistant_msg:
134
+ messages_context.append({"role": "assistant", "content": assistant_msg})
135
+
136
+ # Optional: Append retrieved context from scraped site content.
137
+ if knowledge_base:
138
+ snippet = retrieve_relevant_snippet(message, knowledge_base)
139
+ if snippet:
140
+ retrieval_context = f"Reference info from Agri Future Investment platform: {snippet}"
141
+ messages_context.insert(0, {"role": "system", "content": retrieval_context})
142
+
143
+ # Append the new user message.
144
+ messages_context.append({"role": "user", "content": message})
145
+
146
+ # Stream the model's reply token-by-token.
147
+ response = ""
148
+ for message_resp in client.chat_completion(
149
+ messages_context,
150
+ max_tokens=max_tokens,
151
+ stream=True,
152
+ temperature=temperature,
153
+ top_p=top_p,
154
+ ):
155
+ token = message_resp.choices[0].delta.content
156
+ response += token
157
+ yield response
158
+
159
+ # --- Gradio Chat Interface ---
160
+
161
+ # The language selection dropdown uses language codes: "en" for English, "fr" for French, "am" for Amharic.
162
+ demo = gr.ChatInterface(
163
+ respond,
164
+ additional_inputs=[
165
+ gr.Textbox(
166
+ value="You are AgriFutureBot, designed to help visitors of the Agri Future Investment platform understand content about the site and answer questions strictly related to agriculture and agro-investment topics.",
167
+ label="System Message"
168
+ ),
169
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens"),
170
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
171
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)"),
172
+ gr.Dropdown(choices=["en", "fr", "am"], value="en", label="Language (en, fr, am)")
173
+ ],
174
+ )
175
+
176
+ if __name__ == "__main__":
177
+ demo.launch()