Macdensten91 commited on
Commit
c010699
·
verified ·
1 Parent(s): 553ae67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -35
app.py CHANGED
@@ -1,64 +1,125 @@
 
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
 
 
 
 
9
 
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
  messages = [{"role": "system", "content": system_message}]
 
 
 
 
 
19
 
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
 
 
25
 
 
26
  messages.append({"role": "user", "content": message})
27
 
 
28
  response = ""
29
-
30
- for message in client.chat_completion(
31
  messages,
32
  max_tokens=max_tokens,
33
  stream=True,
34
  temperature=temperature,
35
  top_p=top_p,
36
  ):
37
- token = message.choices[0].delta.content
38
-
39
  response += token
40
  yield response
41
 
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
  demo = gr.ChatInterface(
47
  respond,
48
  additional_inputs=[
49
- gr.Textbox(value="you are AgriFutureBot, Design to help visitors and users from the Agri Future Invest platform understand more content about the site as well as know more about Agriculture and it's related investments only", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
  ),
 
 
 
59
  ],
60
  )
61
 
62
-
63
  if __name__ == "__main__":
64
- demo.launch()
 
1
+ import re
2
+ import time
3
  import gradio as gr
4
  from huggingface_hub import InferenceClient
5
 
6
+ # Optional: Enable scraping if your site is deployed.
7
+ # Set this flag to False until your site is available.
8
+ ENABLE_SCRAPING = False
9
+ SITE_URL = "https://your-agri-future-site.com"
10
+
11
+ # Global variable to hold scraped content
12
+ knowledge_base = ""
13
+
14
+ # --- Optional: Scraping Functionality ---
15
+ if ENABLE_SCRAPING:
16
+ try:
17
+ from selenium import webdriver
18
+ from selenium.webdriver.chrome.options import Options
19
+ from selenium.webdriver.common.by import By
20
+
21
+ def scrape_site(url):
22
+ options = Options()
23
+ options.headless = True # Run browser in headless mode.
24
+ driver = webdriver.Chrome(options=options)
25
+ driver.get(url)
26
+ # Use explicit wait in production code; here we use a simple sleep.
27
+ time.sleep(5)
28
+ try:
29
+ # Customize the selector based on your site’s HTML
30
+ content_element = driver.find_element(By.ID, "content")
31
+ page_text = content_element.text
32
+ except Exception as e:
33
+ page_text = "Error encountered during scraping: " + str(e)
34
+ driver.quit()
35
+ return page_text
36
+
37
+ knowledge_base = scrape_site(SITE_URL)
38
+ print("Scraped knowledge base successfully.")
39
+ except Exception as e:
40
+ print("Scraping failed or Selenium is not configured:", e)
41
+ else:
42
+ print("Scraping is disabled; proceeding without scraped site content.")
43
+
44
+ # --- Domain-Related Helpers ---
45
+ def is_domain_query(query: str) -> bool:
46
+ """Check if the query is relevant to agriculture and agro-investment."""
47
+ domain_keywords = [
48
+ "agriculture", "farming", "crop", "agro", "investment", "soil",
49
+ "irrigation", "harvest", "organic", "sustainable", "agribusiness",
50
+ "livestock"
51
+ ]
52
+ return any(re.search(r"\b" + keyword + r"\b", query, re.IGNORECASE) for keyword in domain_keywords)
53
+
54
+ def retrieve_relevant_snippet(query: str, text: str, max_length: int = 300) -> str:
55
+ """
56
+ A simple retrieval function that searches for any sentence in the text
57
+ that contains domain keywords present in the query.
58
+ Returns a snippet limited to max_length characters.
59
+ """
60
+ sentences = re.split(r'[.?!]', text)
61
+ for sentence in sentences:
62
+ if is_domain_query(sentence) and all(word.lower() in sentence.lower() for word in query.split()):
63
+ snippet = sentence.strip()
64
+ if len(snippet) > max_length:
65
+ snippet = snippet[:max_length] + "..."
66
+ return snippet
67
+ return ""
68
+
69
+ # --- Chat Assistant Response Function ---
70
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
71
 
72
+ def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
73
+ # Check domain relevance
74
+ if not is_domain_query(message):
75
+ yield "I'm sorry, but please ask a question related to agriculture or agro-investment topics."
76
+ return
77
 
78
+ # Build the conversation context starting with the system message.
 
 
 
 
 
 
 
79
  messages = [{"role": "system", "content": system_message}]
80
+ for user_msg, assistant_msg in history:
81
+ if user_msg:
82
+ messages.append({"role": "user", "content": user_msg})
83
+ if assistant_msg:
84
+ messages.append({"role": "assistant", "content": assistant_msg})
85
 
86
+ # Optional: Append a retrieval-based context derived from the scraped content.
87
+ if knowledge_base:
88
+ snippet = retrieve_relevant_snippet(message, knowledge_base)
89
+ if snippet:
90
+ # Prepend additional context for the model to take into account.
91
+ retrieval_context = f"Reference information from Agri Future Investment platform: {snippet}"
92
+ messages.insert(0, {"role": "system", "content": retrieval_context})
93
 
94
+ # Append the new user query.
95
  messages.append({"role": "user", "content": message})
96
 
97
+ # Stream the model's reply token-by-token.
98
  response = ""
99
+ for message_resp in client.chat_completion(
 
100
  messages,
101
  max_tokens=max_tokens,
102
  stream=True,
103
  temperature=temperature,
104
  top_p=top_p,
105
  ):
106
+ token = message_resp.choices[0].delta.content
 
107
  response += token
108
  yield response
109
 
110
+ # --- Gradio Chat Interface ---
 
 
 
111
  demo = gr.ChatInterface(
112
  respond,
113
  additional_inputs=[
114
+ gr.Textbox(
115
+ value="You are AgriFutureBot, designed to help visitors of the Agri Future Investment platform understand content about the site and answer questions strictly related to agriculture and agro-investment topics.",
116
+ label="System Message"
 
 
 
 
 
 
117
  ),
118
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens"),
119
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
120
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)"),
121
  ],
122
  )
123
 
 
124
  if __name__ == "__main__":
125
+ demo.launch()