dolphinium commited on
Commit
840c57d
Β·
1 Parent(s): c3741ac

add history to chatbot and update solr query generation prompt errors. TODO: fix code generation for visualizations.

Browse files
Files changed (1) hide show
  1. app.py +120 -80
app.py CHANGED
@@ -18,6 +18,7 @@ from IPython.display import display, Markdown
18
  logging.getLogger('matplotlib').setLevel(logging.WARNING)
19
 
20
  # --- SSH Tunnel Configuration ---
 
21
  SSH_HOST = os.environ.get('SSH_HOST')
22
  SSH_PORT = 5322
23
  SSH_USER = os.environ.get('SSH_USER')
@@ -35,7 +36,7 @@ SOLR_PASS = os.environ.get('SOLR_PASS')
35
  try:
36
  genai.configure(api_key=os.environ.get('GEMINI_API_KEY'))
37
  except Exception as e:
38
- print(f"❌ Gemini API Key Error: {e}. Please ensure 'GEMINI_API_KEY' is set in Colab Secrets.")
39
 
40
  # --- Global Variables ---
41
  ssh_tunnel_server = None
@@ -62,7 +63,7 @@ try:
62
  print(f"βœ… Solr connection successful on core '{SOLR_CORE_NAME}'.")
63
 
64
  # 3. Initialize the LLM
65
- llm_model = genai.GenerativeModel('gemini-2.5-flash', generation_config=genai.types.GenerationConfig(temperature=0))
66
  print(f"βœ… LLM Model '{llm_model.model_name}' initialized.")
67
 
68
  print("βœ… System Initialized Successfully.")
@@ -223,49 +224,70 @@ formatted_field_info = format_metadata_for_prompt(field_metadata)
223
 
224
  def parse_suggestions_from_report(report_text):
225
  """Extracts numbered suggestions from the report's markdown text."""
226
- # This function remains useful for potentially allowing users to reference suggestions by number, even if we don't force it.
227
- suggestions_match = re.search(r"### Suggestions for Further Exploration\s*\n(.*?)$", report_text, re.DOTALL | re.IGNORECASE)
228
  if not suggestions_match: return []
229
  suggestions_text = suggestions_match.group(1)
230
  suggestions = re.findall(r"^\s*\d+\.\s*(.*)", suggestions_text, re.MULTILINE)
231
  return [s.strip() for s in suggestions]
232
 
233
 
234
- # NEW: Heavily revised prompt for better accuracy using a few-shot example.
235
- def llm_generate_solr_query(natural_language_query, field_metadata):
236
- """Generates a Solr query and facet JSON from a natural language query."""
237
-
238
-
 
 
 
 
239
 
240
  prompt = f"""
241
  You are an expert Solr query engineer who converts natural language questions into precise Solr JSON Facet API query objects. Your primary goal is to create a valid JSON object with `query` and `json.facet` keys.
242
 
243
  ---
244
- ### CONTEXT & RULES
245
 
246
- 1. **Today's Date for Calculations**: {datetime.datetime.now().date().strftime("%Y-%m-%d")}
247
- 2. **Field Usage**: You MUST use the fields described in the 'Field Definitions' section. Pay close attention to the definitions to select the correct field.
248
- 3. **Facet vs. Query Field Distinction**: This is critical.
 
249
  * For searching in the main `query` parameter, ALWAYS use the multi-valued search fields (ending in `_s`, like `company_name_s`) to get comprehensive results.
250
  * For grouping in a `terms` facet, ALWAYS use the canonical, single-value field (e.g., `company_name`, `molecule_name`) to ensure unique and accurate grouping.
251
- 4. **No `count(*)`**: Do NOT use functions like `count(*)`. The default facet bucket count is sufficient for counting documents.
252
- 5. **Allowed Aggregations**: For statistical facets (`stats` or `stat` type), only use these functions: `sum`, `avg`, `min`, `max`, `unique`. The primary metric field is `total_deal_value_in_million`.
253
- 6. **Term Facet Limits**: Every `terms` facet MUST include a `limit` key. Default to `limit: 10` unless the user specifies a different number of top results.
254
- 7. **Output Format**: Your final output must be a single, raw JSON object and nothing else.
255
 
256
  ---
257
  ### FIELD DEFINITIONS (Your Source of Truth)
258
 
259
- {formatted_field_info}
260
  ---
261
- ### EXAMPLE
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
 
263
- **User Query:** "What are the infection news in this year, specifically comparing deal values for injection vs oral routes?"
264
 
265
- **Correct JSON Output:**
266
  ```json
267
  {{
268
- "query": "therapeutic_category_s:infections AND date_year:{datetime.datetime.now().year} AND total_deal_value_in_million:[0 TO *]",
269
  "json.facet": {{
270
  "injection_deals": {{
271
  "type": "query",
@@ -287,44 +309,46 @@ You are an expert Solr query engineer who converts natural language questions in
287
  ---
288
  ### YOUR TASK
289
 
290
- Now, convert the following user query into a single, raw JSON object with 'query' and 'json.facet' keys, strictly following all rules and field definitions provided above.
291
 
292
- **User Query:** "{natural_language_query}"
293
  """
294
  try:
295
- # Assuming llm_model is your generative model client
296
  response = llm_model.generate_content(prompt)
 
297
  cleaned_text = re.sub(r'```json\s*|\s*```', '', response.text, flags=re.MULTILINE | re.DOTALL).strip()
298
  return json.loads(cleaned_text)
299
  except Exception as e:
300
- print(f"Error in llm_generate_solr_query: {e}\nRaw Response:\n{response.text if 'response' in locals() else 'N/A'}")
 
301
  return None
302
 
 
303
  def llm_generate_visualization_code(query_context, facet_data):
304
  """Generates Python code for visualization based on query and data."""
305
  prompt = f"""
306
- You are a Python Data Visualization expert specializing in Matplotlib and Seaborn.
307
- Your task is to generate Python code to create a single, insightful visualization.
308
-
309
- **Context:**
310
- 1. **User's Analytical Goal:** "{query_context}"
311
- 2. **Aggregated Data (from Solr Facets):**
312
- ```json
313
- {json.dumps(facet_data, indent=2)}
314
- ```
315
-
316
- **Instructions:**
317
- 1. **Goal:** Write Python code to generate a chart that best visualizes the answer to the user's goal using the provided data.
318
- 2. **Data Access:** The data is available in a Python dictionary named `facet_data`. Your code must parse this dictionary.
319
- 3. **Code Requirements:**
320
- * Start with `import matplotlib.pyplot as plt` and `import seaborn as sns`.
321
- * Use `plt.style.use('seaborn-v0_8-whitegrid')` and `fig, ax = plt.subplots(figsize=(12, 7))`. Plot using the `ax` object.
322
- * Always include a clear `ax.set_title(...)`, `ax.set_xlabel(...)`, and `ax.set_ylabel(...)`.
323
- * Dynamically find the primary facet key and extract the 'buckets'.
324
- * For each bucket, extract the 'val' (label) and the relevant metric ('count' or a nested metric).
325
- * Use `plt.tight_layout()` and rotate x-axis labels if needed.
326
- 4. **Output Format:** ONLY output raw Python code. Do not wrap it. Do not include `plt.show()` or any explanation.
327
- """
328
  try:
329
  response = llm_model.generate_content(prompt)
330
  code = re.sub(r'^```python\s*|\s*```$', '', response.text, flags=re.MULTILINE)
@@ -339,19 +363,20 @@ def execute_viz_code_and_get_path(viz_code, facet_data):
339
  try:
340
  if not os.path.exists('/tmp/plots'): os.makedirs('/tmp/plots')
341
  plot_path = f"/tmp/plots/plot_{datetime.datetime.now().timestamp()}.png"
342
- exec_globals = {'facet_data': facet_data, 'plt': plt, 'sns': sns}
 
343
  exec(viz_code, exec_globals)
344
  fig = exec_globals.get('fig')
345
  if fig:
346
  fig.savefig(plot_path, bbox_inches='tight')
347
- plt.close(fig)
348
  return plot_path
349
  return None
350
  except Exception as e:
351
  print(f"ERROR executing visualization code: {e}\n---Code---\n{viz_code}")
352
  return None
353
 
354
- # NEW: Enhanced prompt based on expert feedback for a more strategic and insightful report.
355
  def llm_generate_summary_and_suggestions_stream(query_context, facet_data):
356
  """
357
  Yields a streaming analytical report and strategic, context-aware suggestions for further exploration.
@@ -404,7 +429,6 @@ After the report, create a final section titled `### Deeper Dive: Suggested Foll
404
  Generate the full report and the strategic suggestions based on the user's question and the data provided.
405
  """
406
  try:
407
- # Assuming llm_model is your generative model client
408
  response_stream = llm_model.generate_content(prompt, stream=True)
409
  for chunk in response_stream:
410
  yield chunk.text
@@ -412,17 +436,21 @@ Generate the full report and the strategic suggestions based on the user's quest
412
  print(f"Error in llm_generate_summary_and_suggestions_stream: {e}")
413
  yield "Sorry, I was unable to generate a summary for this data."
414
 
415
- # CHANGED: Reworked the entire function for a simpler, more flexible user-driven flow.
416
  def process_analysis_flow(user_input, history, state):
417
  """
418
  A generator that manages the conversation and yields tuples of UI updates for Gradio.
419
- This version treats any user input as a new query.
420
  """
421
  # Initialize state on the first run
422
  if state is None:
423
  state = {'query_count': 0, 'last_suggestions': []}
424
 
425
- # Reset UI components for the new analysis
 
 
 
 
426
  yield (history, state, gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False))
427
 
428
  query_context = user_input.strip()
@@ -435,8 +463,8 @@ def process_analysis_flow(user_input, history, state):
435
  history.append((user_input, f"Analyzing: '{query_context}'\n\n*Generating Solr query...*"))
436
  yield (history, state, None, None, None, None)
437
 
438
- # 2. Generate Solr Query
439
- llm_solr_obj = llm_generate_solr_query(query_context, field_metadata)
440
  if not llm_solr_obj or 'query' not in llm_solr_obj or 'json.facet' not in llm_solr_obj:
441
  history.append((None, "I'm sorry, I couldn't generate a valid Solr query for that request. Please try rephrasing your question."))
442
  yield (history, state, None, None, None, None)
@@ -482,15 +510,21 @@ def process_analysis_flow(user_input, history, state):
482
  yield (history, state, output_plot, output_report, gr.update(value=formatted_query, visible=True), gr.update(value=formatted_data, visible=True))
483
 
484
  report_text = ""
 
 
 
485
  for chunk in llm_generate_summary_and_suggestions_stream(query_context, facet_data):
486
  report_text += chunk
487
- yield (history, state, output_plot, report_text, gr.update(value=formatted_query, visible=True), gr.update(value=formatted_data, visible=True))
 
 
 
488
 
489
  # 6. Finalize and prompt for next action
490
  state['query_count'] += 1
491
  state['last_suggestions'] = parse_suggestions_from_report(report_text)
492
 
493
- next_prompt = "Analysis complete. What would you like to explore next? You can ask a follow-up question, pick a suggestion, or ask something new."
494
  history.append((None, next_prompt))
495
  yield (history, state, output_plot, report_text, gr.update(value=formatted_query, visible=True), gr.update(value=formatted_data, visible=True))
496
 
@@ -500,21 +534,19 @@ def process_analysis_flow(user_input, history, state):
500
  print(f"Error during analysis execution: {e}")
501
  yield (history, state, None, None, gr.update(value=formatted_query, visible=True), None)
502
 
 
 
503
  with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important}") as demo:
504
  state = gr.State()
505
 
506
  gr.Markdown("# πŸ’Š PharmaCircle AI Data Analyst")
507
- # CHANGED: Updated introductory text for the new workflow.
508
  gr.Markdown("Ask a question to begin your analysis. I will generate a Solr query, retrieve the data, create a visualization, and write a report. You can then ask follow-up questions freely.")
509
 
510
  with gr.Row():
511
  with gr.Column(scale=1):
512
- chatbot = gr.Chatbot(label="Analysis Chat Log", height=700, show_copy_button=True)
513
- # CHANGED: Updated placeholder to encourage free-form questions.
514
  msg_textbox = gr.Textbox(placeholder="Ask a question, e.g., 'Show me the top 5 companies by total deal value in 2023'", label="Your Question", interactive=True)
515
  with gr.Row():
516
- # REMOVED: The "Start Initial Analysis" button.
517
- # CHANGED: The "Clear" button is now the primary action button besides submitting text.
518
  clear_button = gr.Button("πŸ”„ Start New Analysis", variant="primary")
519
 
520
  with gr.Column(scale=2):
@@ -526,28 +558,36 @@ with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important}")
526
  report_display = gr.Markdown("Report will be streamed here...", visible=False)
527
 
528
  # --- Event Wiring ---
529
- # REMOVED: The click handler for the old start button.
530
-
531
- # This is now the main event handler for all user queries.
532
- msg_textbox.submit(
533
- fn=process_analysis_flow,
534
- inputs=[msg_textbox, chatbot, state],
535
- outputs=[chatbot, state, plot_display, report_display, solr_query_display, solr_data_display]
536
- )
537
-
538
  def reset_all():
539
- # This function now correctly resets the UI for a completely new session.
540
  return (
541
- None, # chatbot
542
- None, # state
543
- "", # msg_textbox
544
  gr.update(value=None, visible=False), # plot_display
545
  gr.update(value=None, visible=False), # report_display
546
- gr.update(value=None, visible=False), # solr_query_display
547
- gr.update(value=None, visible=False) # solr_data_display
548
  )
549
 
550
- clear_button.click(fn=reset_all, inputs=None, outputs=[chatbot, state, msg_textbox, plot_display, report_display, solr_query_display, solr_data_display], queue=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
551
 
552
  if is_initialized:
553
  demo.queue().launch(debug=True, share=True)
 
18
  logging.getLogger('matplotlib').setLevel(logging.WARNING)
19
 
20
  # --- SSH Tunnel Configuration ---
21
+ # It's recommended to load secrets securely, e.g., from environment variables
22
  SSH_HOST = os.environ.get('SSH_HOST')
23
  SSH_PORT = 5322
24
  SSH_USER = os.environ.get('SSH_USER')
 
36
  try:
37
  genai.configure(api_key=os.environ.get('GEMINI_API_KEY'))
38
  except Exception as e:
39
+ print(f"❌ Gemini API Key Error: {e}. Please ensure 'GEMINI_API_KEY' is set in your environment.")
40
 
41
  # --- Global Variables ---
42
  ssh_tunnel_server = None
 
63
  print(f"βœ… Solr connection successful on core '{SOLR_CORE_NAME}'.")
64
 
65
  # 3. Initialize the LLM
66
+ llm_model = genai.GenerativeModel('gemini-1.5-flash', generation_config=genai.types.GenerationConfig(temperature=0))
67
  print(f"βœ… LLM Model '{llm_model.model_name}' initialized.")
68
 
69
  print("βœ… System Initialized Successfully.")
 
224
 
225
  def parse_suggestions_from_report(report_text):
226
  """Extracts numbered suggestions from the report's markdown text."""
227
+ suggestions_match = re.search(r"### (?:Deeper Dive: Suggested Follow-up Analyses|Suggestions for Further Exploration)\s*\n(.*?)$", report_text, re.DOTALL | re.IGNORECASE)
 
228
  if not suggestions_match: return []
229
  suggestions_text = suggestions_match.group(1)
230
  suggestions = re.findall(r"^\s*\d+\.\s*(.*)", suggestions_text, re.MULTILINE)
231
  return [s.strip() for s in suggestions]
232
 
233
 
234
+ def llm_generate_solr_query_with_history(natural_language_query, field_metadata, chat_history):
235
+ """Generates a Solr query and facet JSON from a natural language query, considering the conversation history."""
236
+ # Format the chat history for the prompt
237
+ formatted_history = ""
238
+ for user_msg, bot_msg in chat_history:
239
+ # We only need the user's queries for context, not the bot's detailed responses.
240
+ if user_msg:
241
+ # CORRECTED: Properly formatted f-string with a newline character
242
+ formatted_history += f"- User: \"{user_msg}\"\n"
243
 
244
  prompt = f"""
245
  You are an expert Solr query engineer who converts natural language questions into precise Solr JSON Facet API query objects. Your primary goal is to create a valid JSON object with `query` and `json.facet` keys.
246
 
247
  ---
248
+ ### CONVERSATIONAL CONTEXT & RULES
249
 
250
+ 1. **Today's Date for Calculations**: 2025-07-16
251
+ 2. **Allowed Facet Types**: The `type` key for any facet MUST be one of the following: `terms`, `query`, or `range`. **Do not use `date_histogram`**. For time-series analysis, use a `range` facet on a date field.
252
+ 3. **Field Usage**: You MUST use the fields described in the 'Field Definitions' section. Pay close attention to the definitions to select the correct field.
253
+ 4. **Facet vs. Query Field Distinction**: This is critical.
254
  * For searching in the main `query` parameter, ALWAYS use the multi-valued search fields (ending in `_s`, like `company_name_s`) to get comprehensive results.
255
  * For grouping in a `terms` facet, ALWAYS use the canonical, single-value field (e.g., `company_name`, `molecule_name`) to ensure unique and accurate grouping.
256
+ 5. **No `count(*)`**: Do NOT use functions like `count(*)`. The default facet bucket count is sufficient for counting documents.
257
+ 6. **Allowed Aggregations**: For statistical facets, only use these functions: `sum`, `avg`, `min`, `max`, `unique`. The primary metric field is `total_deal_value_in_million`. The aggregation MUST be a simple string like `"sum(total_deal_value_in_million)"` and not a nested JSON object.
258
+ 7. **Term Facet Limits**: Every `terms` facet MUST include a `limit` key. Default to `limit: 10` unless the user specifies a different number of top results.
259
+ 8. **Output Format**: Your final output must be a single, raw JSON object and nothing else. Do not add comments, explanations, or markdown formatting like ```json.
260
 
261
  ---
262
  ### FIELD DEFINITIONS (Your Source of Truth)
263
 
264
+ `{formatted_field_info}`
265
  ---
266
+ ### CHAT HISTORY
267
+ `{formatted_history}`
268
+ ---
269
+ ### EXAMPLE OF A FOLLOW-UP QUERY
270
+
271
+ **Initial User Query:** "What are the infections news in this year?"
272
+ ```json
273
+ {{
274
+ "query": "date_year:2025 AND therapeutic_category_s:infections",
275
+ "json.facet": {{
276
+ "infections_news_by_type": {{
277
+ "type": "terms",
278
+ "field": "news_type",
279
+ "limit": 10
280
+ }}
281
+ }}
282
+ }}
283
+ ```
284
 
285
+ **Follow-up User Query:** "Compare deal values for injection vs oral."
286
 
287
+ **Correct JSON Output for the Follow-up:**
288
  ```json
289
  {{
290
+ "query": "therapeutic_category_s:infections AND date_year:2025 AND total_deal_value_in_million:[0 TO *]",
291
  "json.facet": {{
292
  "injection_deals": {{
293
  "type": "query",
 
309
  ---
310
  ### YOUR TASK
311
 
312
+ Now, convert the following user query into a single, raw JSON object with 'query' and 'json.facet' keys, strictly following all rules and field definitions provided above and considering the chat history.
313
 
314
+ **Current User Query:** `{natural_language_query}`
315
  """
316
  try:
 
317
  response = llm_model.generate_content(prompt)
318
+ # Using a more robust regex to clean the response
319
  cleaned_text = re.sub(r'```json\s*|\s*```', '', response.text, flags=re.MULTILINE | re.DOTALL).strip()
320
  return json.loads(cleaned_text)
321
  except Exception as e:
322
+ raw_response_text = response.text if 'response' in locals() else 'N/A'
323
+ print(f"Error in llm_generate_solr_query_with_history: {e}\nRaw Response:\n{raw_response_text}")
324
  return None
325
 
326
+
327
  def llm_generate_visualization_code(query_context, facet_data):
328
  """Generates Python code for visualization based on query and data."""
329
  prompt = f"""
330
+ You are a Python Data Visualization expert specializing in Matplotlib and Seaborn.
331
+ Your task is to generate Python code to create a single, insightful visualization.
332
+
333
+ **Context:**
334
+ 1. **User's Analytical Goal:** "{query_context}"
335
+ 2. **Aggregated Data (from Solr Facets):**
336
+ ```json
337
+ {json.dumps(facet_data, indent=2)}
338
+ ```
339
+
340
+ **Instructions:**
341
+ 1. **Goal:** Write Python code to generate a chart that best visualizes the answer to the user's goal using the provided data.
342
+ 2. **Data Access:** The data is available in a Python dictionary named `facet_data`. Your code must parse this dictionary.
343
+ 3. **Code Requirements:**
344
+ * Start with `import matplotlib.pyplot as plt` and `import seaborn as sns`.
345
+ * Use `plt.style.use('seaborn-v0_8-whitegrid')` and `fig, ax = plt.subplots(figsize=(12, 7))`. Plot using the `ax` object.
346
+ * Always include a clear `ax.set_title(...)`, `ax.set_xlabel(...)`, and `ax.set_ylabel(...)`.
347
+ * Dynamically find the primary facet key and extract the 'buckets'.
348
+ * For each bucket, extract the 'val' (label) and the relevant metric ('count' or a nested metric).
349
+ * Use `plt.tight_layout()` and rotate x-axis labels if needed.
350
+ 4. **Output Format:** ONLY output raw Python code. Do not wrap it in ```python ... ```. Do not include `plt.show()` or any explanation.
351
+ """
352
  try:
353
  response = llm_model.generate_content(prompt)
354
  code = re.sub(r'^```python\s*|\s*```$', '', response.text, flags=re.MULTILINE)
 
363
  try:
364
  if not os.path.exists('/tmp/plots'): os.makedirs('/tmp/plots')
365
  plot_path = f"/tmp/plots/plot_{datetime.datetime.now().timestamp()}.png"
366
+ # The exec environment needs access to the required libraries and the data
367
+ exec_globals = {'facet_data': facet_data, 'plt': plt, 'sns': sns, 'pd': pd}
368
  exec(viz_code, exec_globals)
369
  fig = exec_globals.get('fig')
370
  if fig:
371
  fig.savefig(plot_path, bbox_inches='tight')
372
+ plt.close(fig) # Important to free up memory
373
  return plot_path
374
  return None
375
  except Exception as e:
376
  print(f"ERROR executing visualization code: {e}\n---Code---\n{viz_code}")
377
  return None
378
 
379
+
380
  def llm_generate_summary_and_suggestions_stream(query_context, facet_data):
381
  """
382
  Yields a streaming analytical report and strategic, context-aware suggestions for further exploration.
 
429
  Generate the full report and the strategic suggestions based on the user's question and the data provided.
430
  """
431
  try:
 
432
  response_stream = llm_model.generate_content(prompt, stream=True)
433
  for chunk in response_stream:
434
  yield chunk.text
 
436
  print(f"Error in llm_generate_summary_and_suggestions_stream: {e}")
437
  yield "Sorry, I was unable to generate a summary for this data."
438
 
439
+ # CORRECTED: Only one, correctly implemented version of this function remains.
440
  def process_analysis_flow(user_input, history, state):
441
  """
442
  A generator that manages the conversation and yields tuples of UI updates for Gradio.
443
+ This version treats any user input as a new query and considers conversation history.
444
  """
445
  # Initialize state on the first run
446
  if state is None:
447
  state = {'query_count': 0, 'last_suggestions': []}
448
 
449
+ # If history is None (from a reset), initialize it as an empty list
450
+ if history is None:
451
+ history = []
452
+
453
+ # Reset UI components for the new analysis, but keep chat history
454
  yield (history, state, gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False))
455
 
456
  query_context = user_input.strip()
 
463
  history.append((user_input, f"Analyzing: '{query_context}'\n\n*Generating Solr query...*"))
464
  yield (history, state, None, None, None, None)
465
 
466
+ # 2. Generate Solr Query with history
467
+ llm_solr_obj = llm_generate_solr_query_with_history(query_context, field_metadata, history)
468
  if not llm_solr_obj or 'query' not in llm_solr_obj or 'json.facet' not in llm_solr_obj:
469
  history.append((None, "I'm sorry, I couldn't generate a valid Solr query for that request. Please try rephrasing your question."))
470
  yield (history, state, None, None, None, None)
 
510
  yield (history, state, output_plot, output_report, gr.update(value=formatted_query, visible=True), gr.update(value=formatted_data, visible=True))
511
 
512
  report_text = ""
513
+ # The history object is not modified during streaming, so we pass it once
514
+ # The yield statement for streaming only updates the report text
515
+ stream_history = history[:] # Make a copy
516
  for chunk in llm_generate_summary_and_suggestions_stream(query_context, facet_data):
517
  report_text += chunk
518
+ yield (stream_history, state, output_plot, report_text, gr.update(value=formatted_query, visible=True), gr.update(value=formatted_data, visible=True))
519
+
520
+ # Update the main history with the final report text
521
+ history.append((None, report_text))
522
 
523
  # 6. Finalize and prompt for next action
524
  state['query_count'] += 1
525
  state['last_suggestions'] = parse_suggestions_from_report(report_text)
526
 
527
+ next_prompt = "Analysis complete. What would you like to explore next? You can ask a follow-up question, or ask something new."
528
  history.append((None, next_prompt))
529
  yield (history, state, output_plot, report_text, gr.update(value=formatted_query, visible=True), gr.update(value=formatted_data, visible=True))
530
 
 
534
  print(f"Error during analysis execution: {e}")
535
  yield (history, state, None, None, gr.update(value=formatted_query, visible=True), None)
536
 
537
+
538
+ # --- Gradio UI ---
539
  with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important}") as demo:
540
  state = gr.State()
541
 
542
  gr.Markdown("# πŸ’Š PharmaCircle AI Data Analyst")
 
543
  gr.Markdown("Ask a question to begin your analysis. I will generate a Solr query, retrieve the data, create a visualization, and write a report. You can then ask follow-up questions freely.")
544
 
545
  with gr.Row():
546
  with gr.Column(scale=1):
547
+ chatbot = gr.Chatbot(label="Analysis Chat Log", height=700, show_copy_button=True, avatar_images=(None, "https://pharma-circle.com/images/favicon.png"))
 
548
  msg_textbox = gr.Textbox(placeholder="Ask a question, e.g., 'Show me the top 5 companies by total deal value in 2023'", label="Your Question", interactive=True)
549
  with gr.Row():
 
 
550
  clear_button = gr.Button("πŸ”„ Start New Analysis", variant="primary")
551
 
552
  with gr.Column(scale=2):
 
558
  report_display = gr.Markdown("Report will be streamed here...", visible=False)
559
 
560
  # --- Event Wiring ---
 
 
 
 
 
 
 
 
 
561
  def reset_all():
562
+ """Resets the entire UI for a new analysis session."""
563
  return (
564
+ [], # chatbot (cleared)
565
+ None, # state (reset)
566
+ "", # msg_textbox (cleared)
567
  gr.update(value=None, visible=False), # plot_display
568
  gr.update(value=None, visible=False), # report_display
569
+ gr.update(value=None, visible=False), # solr_query_display
570
+ gr.update(value=None, visible=False) # solr_data_display
571
  )
572
 
573
+ # Main event handler for all user queries
574
+ msg_textbox.submit(
575
+ fn=process_analysis_flow,
576
+ inputs=[msg_textbox, chatbot, state],
577
+ outputs=[chatbot, state, plot_display, report_display, solr_query_display, solr_data_display],
578
+ ).then(
579
+ lambda: gr.update(value=""),
580
+ None,
581
+ [msg_textbox],
582
+ queue=False,
583
+ )
584
+
585
+ clear_button.click(
586
+ fn=reset_all,
587
+ inputs=None,
588
+ outputs=[chatbot, state, msg_textbox, plot_display, report_display, solr_query_display, solr_data_display],
589
+ queue=False
590
+ )
591
 
592
  if is_initialized:
593
  demo.queue().launch(debug=True, share=True)