Futuresony commited on
Commit
6aee846
·
verified ·
1 Parent(s): 82b90a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -138
app.py CHANGED
@@ -12,8 +12,7 @@ import gspread
12
  from google.auth import default
13
  from tqdm import tqdm
14
  from duckduckgo_search import DDGS
15
- import spacy
16
- from pathlib import Path
17
  import base64
18
 
19
  # Suppress warnings
@@ -38,7 +37,7 @@ PRE_PASS1_BUSINESS_PART_LOOKUP_THRESHOLD = 0.5
38
  # --- Global variables to load once ---
39
  tokenizer = None
40
  model = None
41
- nlp = None # SpaCy model
42
  embedder = None # Sentence Transformer
43
  data = [] # Google Sheet data
44
  descriptions = []
@@ -46,27 +45,7 @@ embeddings = torch.tensor([]) # Google Sheet embeddings
46
 
47
  # --- Loading Functions (Run once on startup) ---
48
 
49
- def load_spacy_model():
50
- """Loads or downloads the spaCy model."""
51
- model_name = "en_core_web_sm"
52
- try:
53
- print(f"Loading spaCy model '{model_name}'...")
54
- nlp_model = spacy.load(model_name)
55
- print(f"SpaCy model '{model_name}' loaded.")
56
- return nlp_model
57
- except OSError:
58
- print(f"SpaCy model '{model_name}' not found locally. Attempting download...")
59
- # For HF Spaces, ensuring it's in requirements.txt is key.
60
- # We'll assume requirements.txt handles installation, and try loading again.
61
- print("Assuming 'en_core_web_sm' is installed via requirements.txt. Attempting to load...")
62
- try:
63
- nlp_model = spacy.load(model_name)
64
- print(f"SpaCy model '{model_name}' loaded after assumed installation.")
65
- return nlp_model
66
- except Exception as e:
67
- print(f"Failed to load spaCy model '{model_name}' after assumed installation: {e}")
68
- print("SpaCy will not be available.")
69
- return None # Return None if loading fails
70
 
71
  def load_sentence_transformer():
72
  """Loads the Sentence Transformer model."""
@@ -92,7 +71,6 @@ def load_google_sheet_data(sheet_id, service_account_key_base64):
92
  key_dict = json.loads(key_bytes)
93
 
94
  # Authenticate using the service account key
95
- # Use service_account.Credentials.from_service_account_info directly
96
  from google.oauth2 import service_account
97
  creds = service_account.Credentials.from_service_account_info(key_dict)
98
  client = gspread.authorize(creds)
@@ -118,7 +96,6 @@ def load_google_sheet_data(sheet_id, service_account_key_base64):
118
  descriptions = [row["Description"] for row in filtered_data]
119
  print(f"Loaded {len(descriptions)} entries from Google Sheet for embedding.")
120
 
121
- # embeddings will be encoded after embedder is loaded
122
  return filtered_data, descriptions, None # Return descriptions, embeddings encoded later
123
 
124
  except gspread.exceptions.SpreadsheetNotFound:
@@ -131,7 +108,7 @@ def load_google_sheet_data(sheet_id, service_account_key_base64):
131
 
132
 
133
  def load_llm_model(model_id, hf_token):
134
- """Loads the LLM in full precision (for CPU).""" # Modified description
135
  print(f"Loading model {model_id} in full precision...")
136
  if not hf_token:
137
  print("Error: HF_TOKEN secret is not set. Cannot load Hugging Face model.")
@@ -142,12 +119,10 @@ def load_llm_model(model_id, hf_token):
142
  if llm_tokenizer.pad_token is None:
143
  llm_tokenizer.pad_token = llm_tokenizer.eos_token
144
 
145
- # Load the model without quantization config
146
  llm_model = AutoModelForCausalLM.from_pretrained(
147
  model_id,
148
  token=hf_token,
149
  device_map="auto", # This will likely map to 'cpu'
150
- # Removed quantization_config=bnb_config
151
  )
152
 
153
  print(f"Model {model_id} loaded in full precision.")
@@ -155,17 +130,15 @@ def load_llm_model(model_id, hf_token):
155
 
156
  except Exception as e:
157
  print(f"Error loading model {model_id}: {e}")
158
- # Removed specific bitsandbytes message
159
  print("Please ensure transformers, trl, peft, and accelerate are installed.")
160
  print("Check your Hugging Face token.")
161
- # Do not raise, return None to allow app to start without LLM
162
  return None, None
163
 
164
  # --- Load all assets on startup ---
165
  print("Loading assets...")
166
- nlp = load_spacy_model()
167
  embedder = load_sentence_transformer()
168
- data, descriptions, _ = load_google_sheet_data(SHEET_ID, GOOGLE_SERVICE_ACCOUNT_KEY_BASE64) # Load data and descriptions first
169
 
170
  if embedder and descriptions:
171
  print("Encoding Google Sheet descriptions...")
@@ -174,25 +147,24 @@ if embedder and descriptions:
174
  print("Encoding complete.")
175
  except Exception as e:
176
  print(f"Error during embedding: {e}")
177
- embeddings = torch.tensor([]) # Ensure embeddings is an empty tensor on error
178
  else:
179
  print("Skipping embedding due to missing embedder or descriptions.")
180
- embeddings = torch.tensor([]) # Ensure embeddings is an empty tensor if no descriptions
181
 
182
  model, tokenizer = load_llm_model(model_id, HF_TOKEN)
183
 
184
- # Check if essential components loaded
185
- if not model or not tokenizer or not embedder or not nlp:
186
  print("\nERROR: Essential components failed to load. The application may not function correctly.")
187
  if not model: print("- LLM Model failed to load.")
188
  if not tokenizer: print("- LLM Tokenizer failed to load.")
189
  if not embedder: print("- Sentence Embedder failed to load.")
190
- if not nlp: print("- spaCy Model failed to load.")
191
  # Continue, but the main inference function will need checks
192
 
193
- # --- Helper Functions (from your script) ---
194
 
195
- # Function to perform DuckDuckGo Search and return results with URLs
196
  def perform_duckduckgo_search(query, max_results=3):
197
  """
198
  Performs a search using DuckDuckGo and returns a list of dictionaries.
@@ -200,16 +172,15 @@ def perform_duckduckgo_search(query, max_results=3):
200
  """
201
  search_results_list = []
202
  try:
203
- time.sleep(1) # Add a delay before each search
204
  with DDGS() as ddgs:
205
  for r in ddgs.text(query, max_results=max_results):
206
- search_results_list.append(r) # Append the dictionary directly
207
  except Exception as e:
208
- print(f"Error during DuckDuckgo search for '{query}': {e}")
209
  return []
210
  return search_results_list
211
 
212
- # Function to retrieve relevant business info
213
  def retrieve_business_info(query, data, embeddings, embedder, threshold=0.50):
214
  """
215
  Retrieves relevant business information based on query similarity.
@@ -236,26 +207,19 @@ def retrieve_business_info(query, data, embeddings, embedder, threshold=0.50):
236
  print(f"Error during business information retrieval: {e}")
237
  return None, 0.0
238
 
239
- # Function to split user query into potential sub-queries using spaCy
240
  def split_query(query):
241
- """Splits a user query into potential sub-queries using spaCy."""
242
- if nlp is None:
243
- print("SpaCy model not loaded. Cannot split query.")
244
- return [query] # Return original query if nlp is not available
 
245
 
246
- try:
247
- doc = nlp(query)
248
- sentences = [sent.text.strip() for sent in doc.sents]
249
- if len(sentences) == 1:
250
- parts = re.split(r',| and (who|what|where|when|why|how|is|are|can|tell me about)|;', query, flags=re.IGNORECASE)
251
- parts = [part.strip() for part in parts if part is not None and part.strip()]
252
- if len(parts) <= 1:
253
- return [query]
254
- return parts
255
- return sentences
256
- except Exception as e:
257
- print(f"Error during query splitting: {e}")
258
- return [query] # Return original query on error
259
 
260
  # --- Pass 1 System Prompt ---
261
  pass1_instructions_action = """You are a helpful assistant for a business. Your primary goal in this first step is to analyze the user's query and decide which actions are needed to answer it.
@@ -300,26 +264,24 @@ When search results were used to answer the question, list the URLs from the sea
300
  """
301
 
302
  # --- Main Inference Function for Gradio ---
303
- # This function will be called every time the user submits a query
304
- # chat_history is now a parameter managed by Gradio's State
305
  def respond(user_input, chat_history):
306
  """
307
  Processes user input, performs actions (lookup/search), and generates a response.
308
  Manages chat history within Gradio state.
309
  """
310
- # Check if models loaded successfully
311
- if model is None or tokenizer is None or embedder is None or nlp is None:
312
- return "", chat_history + [(user_input, "Sorry, the application failed to load necessary components. Please try again later or contact the administrator.")] # Return empty string for input, updated history
313
 
314
  original_user_input = user_input
315
 
316
  # Initialize action results containers for this turn
317
  search_results_dicts = []
318
  business_lookup_results_formatted = []
319
- response_pass1_raw = "" # To store the raw actions generated by Pass 1
320
 
321
  # --- Pre-Pass 1: Programmatic Business Info Check for Query Parts ---
322
- query_parts = split_query(original_user_input)
323
  business_check_results = []
324
  overall_pre_pass1_score = 0.0
325
 
@@ -357,15 +319,14 @@ def respond(user_input, chat_history):
357
 
358
  if is_likely_direct_answer:
359
  print("Programmatically determined likely direct answer.")
360
- response_pass1_raw = f"ACTION: ANSWER_DIRECTLY: " # Signal Pass 2
361
 
362
  else:
363
  pass1_user_message_content = pass1_instructions_action.format(
364
  business_check_summary=business_check_summary,
365
- PRE_PASS1_BUSINESS_PART_LOOKUP_THRESHOLD=PRE_PASS1_BUSINESS_PART_LOOKUP_THRESHOLD # Pass threshold to prompt
366
  ) + "\n\nUser Query: " + user_input
367
 
368
- # Create a temporary history for Pass 1 focusing only on the current turn's user query and instructions
369
  temp_chat_history_pass1 = [{"role": "user", "content": pass1_user_message_content}]
370
 
371
  try:
@@ -374,9 +335,6 @@ def respond(user_input, chat_history):
374
  tokenize=False,
375
  add_generation_prompt=True
376
  )
377
- # print("\n--- Pass 1 Prompt ---") # Debug print
378
- # print(prompt_pass1)
379
- # print("---------------------")
380
 
381
  generation_config_pass1 = GenerationConfig(
382
  max_new_tokens=200,
@@ -387,8 +345,8 @@ def respond(user_input, chat_history):
387
  use_cache=True
388
  )
389
 
390
- input_ids_pass1 = tokenizer(prompt_pass1, return_tensors="pt").input_ids # Removed .to(model.device) as device_map="auto" handles it
391
- if model and input_ids_pass1.numel() > 0: # Added check for model
392
  outputs_pass1 = model.generate(
393
  input_ids=input_ids_pass1,
394
  generation_config=generation_config_pass1,
@@ -398,23 +356,15 @@ def respond(user_input, chat_history):
398
  generated_tokens_pass1 = outputs_pass1[0, prompt_length_pass1:]
399
  response_pass1_raw = tokenizer.decode(generated_tokens_pass1, skip_special_tokens=True).strip()
400
  else:
401
- response_pass1_raw = "" # No actions generated
402
  else:
403
- response_pass1_raw = "" # Empty input or model not loaded
404
-
405
- # print("\n--- Raw Pass 1 Response ---") # Debug print
406
- # print(response_pass1_raw)
407
- # print("--------------------------")
408
-
409
 
410
  except Exception as e:
411
  print(f"Error during Pass 1 (Action Identification): {e}")
412
- # If Pass 1 fails, fallback to attempting a direct answer in Pass 2
413
  response_pass1_raw = f"ACTION: ANSWER_DIRECTLY: Error in Pass 1 - {e}"
414
 
415
-
416
  # --- Parse Model's Requested Actions with Validation ---
417
- # Always parse even if flagged for direct answer to handle potential Pass 1 errors
418
  if response_pass1_raw:
419
  lines = response_pass1_raw.strip().split('\n')
420
  for line in lines:
@@ -422,7 +372,6 @@ def respond(user_input, chat_history):
422
  if line.startswith(SEARCH_MARKER):
423
  query = line[len(SEARCH_MARKER):].strip()
424
  if query:
425
- # Validate SEARCH Action
426
  _, score = retrieve_business_info(query, data, embeddings, embedder, threshold=0.0)
427
  if score < SEARCH_VALIDATION_THRESHOLD:
428
  requested_actions.append(("SEARCH", query))
@@ -432,8 +381,7 @@ def respond(user_input, chat_history):
432
  elif line.startswith(BUSINESS_LOOKUP_MARKER):
433
  query = line[len(BUSINESS_LOOKUP_MARKER):].strip()
434
  if query:
435
- # Validate Business Lookup Query
436
- match, score = retrieve_business_info(query, data, embeddings, embedder, threshold=0.0) # Use low threshold for scoring
437
  if score > BUSINESS_LOOKUP_VALIDATION_THRESHOLD:
438
  requested_actions.append(("LOOKUP_BUSINESS_INFO", query))
439
  print(f"Validated Business Lookup Action for '{query}' (Score: {score:.4f})")
@@ -441,13 +389,11 @@ def respond(user_input, chat_history):
441
  print(f"Rejected Business Lookup Action for '{query}' (Score: {score:.4f}) - Below validation threshold.")
442
  elif line.startswith(ANSWER_DIRECTLY_MARKER):
443
  answer = line[len(ANSWER_DIRECTLY_MARKER):].strip()
444
- answer_directly_provided = answer if answer else original_user_input # Use explicit answer if provided, else original query hint
445
- requested_actions = [] # Clear other actions if DIRECT_ANSWER is given
446
- break # Exit action parsing loop
447
 
448
  # --- Execute Actions (Search and Lookup) ---
449
- # Only execute actions if ANSWER_DIRECTLY was NOT the primary outcome of Pass 1
450
- # and there are validated requested actions.
451
  context_for_pass2 = ""
452
 
453
  if requested_actions:
@@ -464,7 +410,7 @@ def respond(user_input, chat_history):
464
 
465
  elif action_type == "LOOKUP_BUSINESS_INFO":
466
  print(f"Performing business info lookup for: '{query}'")
467
- match, score = retrieve_business_info(query, data, embeddings, embedder, threshold=retrieve_business_info.__defaults__[0]) # Use default threshold for retrieval
468
  print(f"Actual lookup score for '{query}': {score:.4f} (Threshold: {retrieve_business_info.__defaults__[0]})")
469
  if match:
470
  formatted_match = f"""Service: {match.get('Service', 'N/A')}
@@ -493,20 +439,16 @@ Available: {match.get('Available', 'N/A')}"""
493
  context_for_pass2 = "Note: No relevant information was found in Business Information or via Search for your query."
494
  print("Note: No results were found for the requested actions.")
495
 
496
- # If ANSWER_DIRECTLY was determined (either programmatically or by Pass 1 model output)
497
  if answer_directly_provided is not None:
498
  print(f"Handling as direct answer: {answer_directly_provided}")
499
- # Provide a simple context indicating it's a direct answer scenario
500
  context_for_pass2 = "Note: This query is a simple request or greeting."
501
  if answer_directly_provided != original_user_input and answer_directly_provided != "":
502
  context_for_pass2 += f" Initial suggestion from action step: {answer_directly_provided}"
503
- # Ensure no search/lookup results are included if it was flagged as direct answer
504
  search_results_dicts = []
505
  business_lookup_results_formatted = []
506
 
507
-
508
- # If no actions were requested or direct answer flagged, and no results found...
509
- # This handles cases where Pass 1 failed or generated nothing useful
510
  if not requested_actions and answer_directly_provided is None:
511
  if response_pass1_raw.strip():
512
  print("Warning: Pass 1 did not result in valid actions or a direct answer.")
@@ -514,42 +456,29 @@ Available: {match.get('Available', 'N/A')}"""
514
  else:
515
  print("Warning: Pass 1 generated an empty response.")
516
  context_for_pass2 = "Error: Pass 1 generated an empty response."
517
- # In this case, we will still try Pass 2 with the limited context
518
-
519
 
520
  # --- Pass 2: Synthesize and Respond ---
521
- final_response = "Sorry, I couldn't generate a response." # Default response on error
522
 
523
  if model is not None and tokenizer is not None:
524
  pass2_user_message_content = pass2_instructions_synthesize + "\n\nOriginal User Query: " + original_user_input + "\n\n" + context_for_pass2
525
 
526
- # --- Chat History Management for Pass 2 ---
527
- # Gradio's chat history state is [(User1, Bot1), (User2, Bot2), ...]
528
- # We need to format the history correctly for the model template
529
- # The Pass 2 prompt should build upon the *actual* conversation history, not just the Pass 2 context message.
530
- # Let's build the chat history for the model template
531
  model_chat_history = []
532
  for user_msg, bot_msg in chat_history:
533
  model_chat_history.append({"role": "user", "content": user_msg})
534
  model_chat_history.append({"role": "assistant", "content": bot_msg})
535
 
536
- # Add the *current* user query and the Pass 2 specific content as the latest turn
537
- # The Pass 2 instructions and context are part of the *current* user turn's input to the model
538
  model_chat_history.append({"role": "user", "content": pass2_user_message_content})
539
 
540
  try:
541
  prompt_pass2 = tokenizer.apply_chat_template(
542
  model_chat_history,
543
  tokenize=False,
544
- add_generation_prompt=True # Add the assistant prompt token to start the response
545
  )
546
- # print("\n--- Pass 2 Prompt ---") # Debug print
547
- # print(prompt_pass2)
548
- # print("---------------------")
549
-
550
 
551
  generation_config_pass2 = GenerationConfig(
552
- max_new_tokens=1500, # Generate a longer response
553
  do_sample=True,
554
  temperature=0.7,
555
  top_k=50,
@@ -560,8 +489,8 @@ Available: {match.get('Available', 'N/A')}"""
560
  use_cache=True
561
  )
562
 
563
- input_ids_pass2 = tokenizer(prompt_pass2, return_tensors="pt").input_ids # Removed .to(model.device)
564
- if model and input_ids_pass2.numel() > 0: # Added check for model
565
  outputs_pass2 = model.generate(
566
  input_ids=input_ids_pass2,
567
  generation_config=generation_config_pass2,
@@ -572,19 +501,16 @@ Available: {match.get('Available', 'N/A')}"""
572
  generated_tokens_pass2 = outputs_pass2[0, prompt_length_pass2:]
573
  final_response = tokenizer.decode(generated_tokens_pass2, skip_special_tokens=True).strip()
574
  else:
575
- final_response = "..." # Indicate potentially empty response
576
  else:
577
- final_response = "Error: Model or empty input for Pass 2." # Indicate model not loaded or empty input
578
-
579
 
580
  except Exception as gen_error:
581
  print(f"Error during model generation in Pass 2: {gen_error}")
582
  final_response = "Error generating response in Pass 2."
583
 
584
-
585
  # --- Post-process Final Response from Pass 2 ---
586
  cleaned_response = final_response
587
- # Filter out the Pass 2 instructions and context markers that might bleed through
588
  lines = cleaned_response.split('\n')
589
  cleaned_lines = [line for line in lines if not line.strip().lower().startswith("business information")
590
  and not line.strip().lower().startswith("search results")
@@ -594,37 +520,27 @@ Available: {match.get('Available', 'N/A')}"""
594
 
595
  cleaned_response = "\n".join(cleaned_lines).strip()
596
 
597
- # Extract and list URLs from the search results that were actually used
598
- # This assumes the model uses the provided snippets with URLs
599
  urls_to_list = [result.get('href') for result in search_results_dicts if result.get('href')]
600
- urls_to_list = list(dict.fromkeys(urls_to_list)) # Remove duplicates
601
 
602
- # Only add Sources if search was performed AND results were found
603
  if search_results_dicts and urls_to_list:
604
  cleaned_response += "\n\nSources:\n" + "\n".join(urls_to_list)
605
 
606
  final_response = cleaned_response
607
 
608
- # Check if the final response is empty or just whitespace after cleaning
609
  if not final_response.strip():
610
  final_response = "Sorry, I couldn't generate a meaningful response based on the information found."
611
  print("Warning: Final response was empty after cleaning.")
612
 
613
- else: # Model or tokenizer not loaded (this check is at the very beginning of the function)
614
  final_response = "Sorry, the core language model is not available."
615
  print("Error: LLM model or tokenizer not loaded for Pass 2.")
616
 
617
-
618
  # --- Update Chat History for Gradio ---
619
- # Append the user's original message and the final bot response to the history state
620
- # The format is (user_input, bot_response)
621
  updated_chat_history = chat_history + [(original_user_input, final_response)]
622
 
623
- # Optional: Manage history length
624
- max_history_pairs = 10 # Keep last 10 turns (20 messages total)
625
  if len(updated_chat_history) > max_history_pairs:
626
  updated_chat_history = updated_chat_history[-max_history_pairs:]
627
- # print(f"History truncated. Keeping last {len(updated_chat_history)} turns.") # Debug print
628
 
629
- # Return the updated history state and an empty string for the input box
630
  return "", updated_chat_history
 
12
  from google.auth import default
13
  from tqdm import tqdm
14
  from duckduckgo_search import DDGS
15
+ # Removed spacy and pathlib imports
 
16
  import base64
17
 
18
  # Suppress warnings
 
37
  # --- Global variables to load once ---
38
  tokenizer = None
39
  model = None
40
+ # Removed nlp = None
41
  embedder = None # Sentence Transformer
42
  data = [] # Google Sheet data
43
  descriptions = []
 
45
 
46
  # --- Loading Functions (Run once on startup) ---
47
 
48
+ # Removed load_spacy_model function
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  def load_sentence_transformer():
51
  """Loads the Sentence Transformer model."""
 
71
  key_dict = json.loads(key_bytes)
72
 
73
  # Authenticate using the service account key
 
74
  from google.oauth2 import service_account
75
  creds = service_account.Credentials.from_service_account_info(key_dict)
76
  client = gspread.authorize(creds)
 
96
  descriptions = [row["Description"] for row in filtered_data]
97
  print(f"Loaded {len(descriptions)} entries from Google Sheet for embedding.")
98
 
 
99
  return filtered_data, descriptions, None # Return descriptions, embeddings encoded later
100
 
101
  except gspread.exceptions.SpreadsheetNotFound:
 
108
 
109
 
110
  def load_llm_model(model_id, hf_token):
111
+ """Loads the LLM in full precision (for CPU)."""
112
  print(f"Loading model {model_id} in full precision...")
113
  if not hf_token:
114
  print("Error: HF_TOKEN secret is not set. Cannot load Hugging Face model.")
 
119
  if llm_tokenizer.pad_token is None:
120
  llm_tokenizer.pad_token = llm_tokenizer.eos_token
121
 
 
122
  llm_model = AutoModelForCausalLM.from_pretrained(
123
  model_id,
124
  token=hf_token,
125
  device_map="auto", # This will likely map to 'cpu'
 
126
  )
127
 
128
  print(f"Model {model_id} loaded in full precision.")
 
130
 
131
  except Exception as e:
132
  print(f"Error loading model {model_id}: {e}")
 
133
  print("Please ensure transformers, trl, peft, and accelerate are installed.")
134
  print("Check your Hugging Face token.")
 
135
  return None, None
136
 
137
  # --- Load all assets on startup ---
138
  print("Loading assets...")
139
+ # Removed nlp = load_spacy_model()
140
  embedder = load_sentence_transformer()
141
+ data, descriptions, _ = load_google_sheet_data(SHEET_ID, GOOGLE_SERVICE_ACCOUNT_KEY_BASE64)
142
 
143
  if embedder and descriptions:
144
  print("Encoding Google Sheet descriptions...")
 
147
  print("Encoding complete.")
148
  except Exception as e:
149
  print(f"Error during embedding: {e}")
150
+ embeddings = torch.tensor([])
151
  else:
152
  print("Skipping embedding due to missing embedder or descriptions.")
153
+ embeddings = torch.tensor([])
154
 
155
  model, tokenizer = load_llm_model(model_id, HF_TOKEN)
156
 
157
+ # Check if essential components loaded (Removed nlp from this check)
158
+ if not model or not tokenizer or not embedder:
159
  print("\nERROR: Essential components failed to load. The application may not function correctly.")
160
  if not model: print("- LLM Model failed to load.")
161
  if not tokenizer: print("- LLM Tokenizer failed to load.")
162
  if not embedder: print("- Sentence Embedder failed to load.")
163
+ # Removed spaCy error message
164
  # Continue, but the main inference function will need checks
165
 
166
+ # --- Helper Functions ---
167
 
 
168
  def perform_duckduckgo_search(query, max_results=3):
169
  """
170
  Performs a search using DuckDuckGo and returns a list of dictionaries.
 
172
  """
173
  search_results_list = []
174
  try:
175
+ time.sleep(1)
176
  with DDGS() as ddgs:
177
  for r in ddgs.text(query, max_results=max_results):
178
+ search_results_list.append(r)
179
  except Exception as e:
180
+ print(f"Error during Duckduckgo search for '{query}': {e}")
181
  return []
182
  return search_results_list
183
 
 
184
  def retrieve_business_info(query, data, embeddings, embedder, threshold=0.50):
185
  """
186
  Retrieves relevant business information based on query similarity.
 
207
  print(f"Error during business information retrieval: {e}")
208
  return None, 0.0
209
 
210
+ # Alternative split_query function without spaCy
211
  def split_query(query):
212
+ """Splits a user query into potential sub-queries using regex."""
213
+ # This regex splits on common separators like comma, semicolon, and conjunctions followed by interrogative words
214
+ parts = re.split(r',|;|\band\s+(?:who|what|where|when|why|how|is|are|can|tell me about)\b', query, flags=re.IGNORECASE)
215
+ # Filter out empty strings and strip whitespace
216
+ parts = [part.strip() for part in parts if part and part.strip()]
217
 
218
+ # If splitting didn't produce multiple meaningful parts, return the original query
219
+ if len(parts) <= 1:
220
+ return [query]
221
+
222
+ return parts
 
 
 
 
 
 
 
 
223
 
224
  # --- Pass 1 System Prompt ---
225
  pass1_instructions_action = """You are a helpful assistant for a business. Your primary goal in this first step is to analyze the user's query and decide which actions are needed to answer it.
 
264
  """
265
 
266
  # --- Main Inference Function for Gradio ---
 
 
267
  def respond(user_input, chat_history):
268
  """
269
  Processes user input, performs actions (lookup/search), and generates a response.
270
  Manages chat history within Gradio state.
271
  """
272
+ # Check if models loaded successfully (Removed nlp from this check)
273
+ if model is None or tokenizer is None or embedder is None:
274
+ return "", chat_history + [(user_input, "Sorry, the application failed to load necessary components. Please try again later or contact the administrator.")]
275
 
276
  original_user_input = user_input
277
 
278
  # Initialize action results containers for this turn
279
  search_results_dicts = []
280
  business_lookup_results_formatted = []
281
+ response_pass1_raw = ""
282
 
283
  # --- Pre-Pass 1: Programmatic Business Info Check for Query Parts ---
284
+ query_parts = split_query(original_user_input) # This now uses the regex split
285
  business_check_results = []
286
  overall_pre_pass1_score = 0.0
287
 
 
319
 
320
  if is_likely_direct_answer:
321
  print("Programmatically determined likely direct answer.")
322
+ response_pass1_raw = f"ACTION: ANSWER_DIRECTLY: "
323
 
324
  else:
325
  pass1_user_message_content = pass1_instructions_action.format(
326
  business_check_summary=business_check_summary,
327
+ PRE_PASS1_BUSINESS_PART_LOOKUP_THRESHOLD=PRE_PASS1_BUSINESS_PART_LOOKUP_THRESHOLD
328
  ) + "\n\nUser Query: " + user_input
329
 
 
330
  temp_chat_history_pass1 = [{"role": "user", "content": pass1_user_message_content}]
331
 
332
  try:
 
335
  tokenize=False,
336
  add_generation_prompt=True
337
  )
 
 
 
338
 
339
  generation_config_pass1 = GenerationConfig(
340
  max_new_tokens=200,
 
345
  use_cache=True
346
  )
347
 
348
+ input_ids_pass1 = tokenizer(prompt_pass1, return_tensors="pt").input_ids
349
+ if model and input_ids_pass1.numel() > 0:
350
  outputs_pass1 = model.generate(
351
  input_ids=input_ids_pass1,
352
  generation_config=generation_config_pass1,
 
356
  generated_tokens_pass1 = outputs_pass1[0, prompt_length_pass1:]
357
  response_pass1_raw = tokenizer.decode(generated_tokens_pass1, skip_special_tokens=True).strip()
358
  else:
359
+ response_pass1_raw = ""
360
  else:
361
+ response_pass1_raw = ""
 
 
 
 
 
362
 
363
  except Exception as e:
364
  print(f"Error during Pass 1 (Action Identification): {e}")
 
365
  response_pass1_raw = f"ACTION: ANSWER_DIRECTLY: Error in Pass 1 - {e}"
366
 
 
367
  # --- Parse Model's Requested Actions with Validation ---
 
368
  if response_pass1_raw:
369
  lines = response_pass1_raw.strip().split('\n')
370
  for line in lines:
 
372
  if line.startswith(SEARCH_MARKER):
373
  query = line[len(SEARCH_MARKER):].strip()
374
  if query:
 
375
  _, score = retrieve_business_info(query, data, embeddings, embedder, threshold=0.0)
376
  if score < SEARCH_VALIDATION_THRESHOLD:
377
  requested_actions.append(("SEARCH", query))
 
381
  elif line.startswith(BUSINESS_LOOKUP_MARKER):
382
  query = line[len(BUSINESS_LOOKUP_MARKER):].strip()
383
  if query:
384
+ match, score = retrieve_business_info(query, data, embeddings, embedder, threshold=0.0)
 
385
  if score > BUSINESS_LOOKUP_VALIDATION_THRESHOLD:
386
  requested_actions.append(("LOOKUP_BUSINESS_INFO", query))
387
  print(f"Validated Business Lookup Action for '{query}' (Score: {score:.4f})")
 
389
  print(f"Rejected Business Lookup Action for '{query}' (Score: {score:.4f}) - Below validation threshold.")
390
  elif line.startswith(ANSWER_DIRECTLY_MARKER):
391
  answer = line[len(ANSWER_DIRECTLY_MARKER):].strip()
392
+ answer_directly_provided = answer if answer else original_user_input
393
+ requested_actions = []
394
+ break
395
 
396
  # --- Execute Actions (Search and Lookup) ---
 
 
397
  context_for_pass2 = ""
398
 
399
  if requested_actions:
 
410
 
411
  elif action_type == "LOOKUP_BUSINESS_INFO":
412
  print(f"Performing business info lookup for: '{query}'")
413
+ match, score = retrieve_business_info(query, data, embeddings, embedder, threshold=retrieve_business_info.__defaults__[0])
414
  print(f"Actual lookup score for '{query}': {score:.4f} (Threshold: {retrieve_business_info.__defaults__[0]})")
415
  if match:
416
  formatted_match = f"""Service: {match.get('Service', 'N/A')}
 
439
  context_for_pass2 = "Note: No relevant information was found in Business Information or via Search for your query."
440
  print("Note: No results were found for the requested actions.")
441
 
442
+ # If ANSWER_DIRECTLY was determined
443
  if answer_directly_provided is not None:
444
  print(f"Handling as direct answer: {answer_directly_provided}")
 
445
  context_for_pass2 = "Note: This query is a simple request or greeting."
446
  if answer_directly_provided != original_user_input and answer_directly_provided != "":
447
  context_for_pass2 += f" Initial suggestion from action step: {answer_directly_provided}"
 
448
  search_results_dicts = []
449
  business_lookup_results_formatted = []
450
 
451
+ # If no actions or direct answer, and no results
 
 
452
  if not requested_actions and answer_directly_provided is None:
453
  if response_pass1_raw.strip():
454
  print("Warning: Pass 1 did not result in valid actions or a direct answer.")
 
456
  else:
457
  print("Warning: Pass 1 generated an empty response.")
458
  context_for_pass2 = "Error: Pass 1 generated an empty response."
 
 
459
 
460
  # --- Pass 2: Synthesize and Respond ---
461
+ final_response = "Sorry, I couldn't generate a response."
462
 
463
  if model is not None and tokenizer is not None:
464
  pass2_user_message_content = pass2_instructions_synthesize + "\n\nOriginal User Query: " + original_user_input + "\n\n" + context_for_pass2
465
 
 
 
 
 
 
466
  model_chat_history = []
467
  for user_msg, bot_msg in chat_history:
468
  model_chat_history.append({"role": "user", "content": user_msg})
469
  model_chat_history.append({"role": "assistant", "content": bot_msg})
470
 
 
 
471
  model_chat_history.append({"role": "user", "content": pass2_user_message_content})
472
 
473
  try:
474
  prompt_pass2 = tokenizer.apply_chat_template(
475
  model_chat_history,
476
  tokenize=False,
477
+ add_generation_prompt=True
478
  )
 
 
 
 
479
 
480
  generation_config_pass2 = GenerationConfig(
481
+ max_new_tokens=1500,
482
  do_sample=True,
483
  temperature=0.7,
484
  top_k=50,
 
489
  use_cache=True
490
  )
491
 
492
+ input_ids_pass2 = tokenizer(prompt_pass2, return_tensors="pt").input_ids
493
+ if model and input_ids_pass2.numel() > 0:
494
  outputs_pass2 = model.generate(
495
  input_ids=input_ids_pass2,
496
  generation_config=generation_config_pass2,
 
501
  generated_tokens_pass2 = outputs_pass2[0, prompt_length_pass2:]
502
  final_response = tokenizer.decode(generated_tokens_pass2, skip_special_tokens=True).strip()
503
  else:
504
+ final_response = "..."
505
  else:
506
+ final_response = "Error: Model or empty input for Pass 2."
 
507
 
508
  except Exception as gen_error:
509
  print(f"Error during model generation in Pass 2: {gen_error}")
510
  final_response = "Error generating response in Pass 2."
511
 
 
512
  # --- Post-process Final Response from Pass 2 ---
513
  cleaned_response = final_response
 
514
  lines = cleaned_response.split('\n')
515
  cleaned_lines = [line for line in lines if not line.strip().lower().startswith("business information")
516
  and not line.strip().lower().startswith("search results")
 
520
 
521
  cleaned_response = "\n".join(cleaned_lines).strip()
522
 
 
 
523
  urls_to_list = [result.get('href') for result in search_results_dicts if result.get('href')]
524
+ urls_to_list = list(dict.fromkeys(urls_to_list))
525
 
 
526
  if search_results_dicts and urls_to_list:
527
  cleaned_response += "\n\nSources:\n" + "\n".join(urls_to_list)
528
 
529
  final_response = cleaned_response
530
 
 
531
  if not final_response.strip():
532
  final_response = "Sorry, I couldn't generate a meaningful response based on the information found."
533
  print("Warning: Final response was empty after cleaning.")
534
 
535
+ else:
536
  final_response = "Sorry, the core language model is not available."
537
  print("Error: LLM model or tokenizer not loaded for Pass 2.")
538
 
 
539
  # --- Update Chat History for Gradio ---
 
 
540
  updated_chat_history = chat_history + [(original_user_input, final_response)]
541
 
542
+ max_history_pairs = 10
 
543
  if len(updated_chat_history) > max_history_pairs:
544
  updated_chat_history = updated_chat_history[-max_history_pairs:]
 
545
 
 
546
  return "", updated_chat_history