Spaces:

dolphinium
/

pc-ai-data-analyst

Sleeping

App Files Files Community

dolphinium commited on Jul 16

Commit

840c57d

1 Parent(s): c3741ac

add history to chatbot and update solr query generation prompt errors. TODO: fix code generation for visualizations.

Browse files

Files changed (1) hide show

app.py +120 -80

app.py CHANGED Viewed

@@ -18,6 +18,7 @@ from IPython.display import display, Markdown
 logging.getLogger('matplotlib').setLevel(logging.WARNING)
 # --- SSH Tunnel Configuration ---
 SSH_HOST = os.environ.get('SSH_HOST')
 SSH_PORT = 5322
 SSH_USER = os.environ.get('SSH_USER')
@@ -35,7 +36,7 @@ SOLR_PASS = os.environ.get('SOLR_PASS')
 try:
     genai.configure(api_key=os.environ.get('GEMINI_API_KEY'))
 except Exception as e:
-    print(f"❌ Gemini API Key Error: {e}. Please ensure 'GEMINI_API_KEY' is set in Colab Secrets.")
 # --- Global Variables ---
 ssh_tunnel_server = None
@@ -62,7 +63,7 @@ try:
     print(f"✅ Solr connection successful on core '{SOLR_CORE_NAME}'.")
     # 3. Initialize the LLM
-    llm_model = genai.GenerativeModel('gemini-2.5-flash', generation_config=genai.types.GenerationConfig(temperature=0))
     print(f"✅ LLM Model '{llm_model.model_name}' initialized.")
     print("✅ System Initialized Successfully.")
@@ -223,49 +224,70 @@ formatted_field_info = format_metadata_for_prompt(field_metadata)
 def parse_suggestions_from_report(report_text):
     """Extracts numbered suggestions from the report's markdown text."""
-    # This function remains useful for potentially allowing users to reference suggestions by number, even if we don't force it.
-    suggestions_match = re.search(r"### Suggestions for Further Exploration\s*\n(.*?)$", report_text, re.DOTALL | re.IGNORECASE)
     if not suggestions_match: return []
     suggestions_text = suggestions_match.group(1)
     suggestions = re.findall(r"^\s*\d+\.\s*(.*)", suggestions_text, re.MULTILINE)
     return [s.strip() for s in suggestions]
-# NEW: Heavily revised prompt for better accuracy using a few-shot example.
-def llm_generate_solr_query(natural_language_query, field_metadata):
-    """Generates a Solr query and facet JSON from a natural language query."""
     prompt = f"""
 You are an expert Solr query engineer who converts natural language questions into precise Solr JSON Facet API query objects. Your primary goal is to create a valid JSON object with `query` and `json.facet` keys.
 ---
-### CONTEXT & RULES
-1.  **Today's Date for Calculations**: {datetime.datetime.now().date().strftime("%Y-%m-%d")}
-2.  **Field Usage**: You MUST use the fields described in the 'Field Definitions' section. Pay close attention to the definitions to select the correct field.
-3.  **Facet vs. Query Field Distinction**: This is critical.
     *   For searching in the main `query` parameter, ALWAYS use the multi-valued search fields (ending in `_s`, like `company_name_s`) to get comprehensive results.
     *   For grouping in a `terms` facet, ALWAYS use the canonical, single-value field (e.g., `company_name`, `molecule_name`) to ensure unique and accurate grouping.
-4.  **No `count(*)`**: Do NOT use functions like `count(*)`. The default facet bucket count is sufficient for counting documents.
-5.  **Allowed Aggregations**: For statistical facets (`stats` or `stat` type), only use these functions: `sum`, `avg`, `min`, `max`, `unique`. The primary metric field is `total_deal_value_in_million`.
-6.  **Term Facet Limits**: Every `terms` facet MUST include a `limit` key. Default to `limit: 10` unless the user specifies a different number of top results.
-7.  **Output Format**: Your final output must be a single, raw JSON object and nothing else.
 ---
 ### FIELD DEFINITIONS (Your Source of Truth)
-{formatted_field_info}
 ---
-### EXAMPLE
-**User Query:** "What are the infection news in this year, specifically comparing deal values for injection vs oral routes?"
-**Correct JSON Output:**
 ```json
 {{
-  "query": "therapeutic_category_s:infections AND date_year:{datetime.datetime.now().year} AND total_deal_value_in_million:[0 TO *]",
   "json.facet": {{
     "injection_deals": {{
       "type": "query",
@@ -287,44 +309,46 @@ You are an expert Solr query engineer who converts natural language questions in
 ---
 ### YOUR TASK
-Now, convert the following user query into a single, raw JSON object with 'query' and 'json.facet' keys, strictly following all rules and field definitions provided above.
-**User Query:** "{natural_language_query}"
 """
     try:
-        # Assuming llm_model is your generative model client
         response = llm_model.generate_content(prompt)
         cleaned_text = re.sub(r'```json\s*|\s*```', '', response.text, flags=re.MULTILINE | re.DOTALL).strip()
         return json.loads(cleaned_text)
     except Exception as e:
-        print(f"Error in llm_generate_solr_query: {e}\nRaw Response:\n{response.text if 'response' in locals() else 'N/A'}")
         return None
 def llm_generate_visualization_code(query_context, facet_data):
     """Generates Python code for visualization based on query and data."""
     prompt = f"""
-    You are a Python Data Visualization expert specializing in Matplotlib and Seaborn.
-    Your task is to generate Python code to create a single, insightful visualization.
-    **Context:**
-    1.  **User's Analytical Goal:** "{query_context}"
-    2.  **Aggregated Data (from Solr Facets):**
-        ```json
-        {json.dumps(facet_data, indent=2)}
-        ```
-    **Instructions:**
-    1.  **Goal:** Write Python code to generate a chart that best visualizes the answer to the user's goal using the provided data.
-    2.  **Data Access:** The data is available in a Python dictionary named `facet_data`. Your code must parse this dictionary.
-    3.  **Code Requirements:**
-        *   Start with `import matplotlib.pyplot as plt` and `import seaborn as sns`.
-        *   Use `plt.style.use('seaborn-v0_8-whitegrid')` and `fig, ax = plt.subplots(figsize=(12, 7))`. Plot using the `ax` object.
-        *   Always include a clear `ax.set_title(...)`, `ax.set_xlabel(...)`, and `ax.set_ylabel(...)`.
-        *   Dynamically find the primary facet key and extract the 'buckets'.
-        *   For each bucket, extract the 'val' (label) and the relevant metric ('count' or a nested metric).
-        *   Use `plt.tight_layout()` and rotate x-axis labels if needed.
-    4.  **Output Format:** ONLY output raw Python code. Do not wrap it. Do not include `plt.show()` or any explanation.
-    """
     try:
         response = llm_model.generate_content(prompt)
         code = re.sub(r'^```python\s*|\s*```$', '', response.text, flags=re.MULTILINE)
@@ -339,19 +363,20 @@ def execute_viz_code_and_get_path(viz_code, facet_data):
     try:
         if not os.path.exists('/tmp/plots'): os.makedirs('/tmp/plots')
         plot_path = f"/tmp/plots/plot_{datetime.datetime.now().timestamp()}.png"
-        exec_globals = {'facet_data': facet_data, 'plt': plt, 'sns': sns}
         exec(viz_code, exec_globals)
         fig = exec_globals.get('fig')
         if fig:
             fig.savefig(plot_path, bbox_inches='tight')
-            plt.close(fig)
             return plot_path
         return None
     except Exception as e:
         print(f"ERROR executing visualization code: {e}\n---Code---\n{viz_code}")
         return None
-# NEW: Enhanced prompt based on expert feedback for a more strategic and insightful report.
 def llm_generate_summary_and_suggestions_stream(query_context, facet_data):
     """
     Yields a streaming analytical report and strategic, context-aware suggestions for further exploration.
@@ -404,7 +429,6 @@ After the report, create a final section titled `### Deeper Dive: Suggested Foll
 Generate the full report and the strategic suggestions based on the user's question and the data provided.
 """
     try:
-        # Assuming llm_model is your generative model client
         response_stream = llm_model.generate_content(prompt, stream=True)
         for chunk in response_stream:
             yield chunk.text
@@ -412,17 +436,21 @@ Generate the full report and the strategic suggestions based on the user's quest
         print(f"Error in llm_generate_summary_and_suggestions_stream: {e}")
         yield "Sorry, I was unable to generate a summary for this data."
-# CHANGED: Reworked the entire function for a simpler, more flexible user-driven flow.
 def process_analysis_flow(user_input, history, state):
     """
     A generator that manages the conversation and yields tuples of UI updates for Gradio.
-    This version treats any user input as a new query.
     """
     # Initialize state on the first run
     if state is None:
         state = {'query_count': 0, 'last_suggestions': []}
-    # Reset UI components for the new analysis
     yield (history, state, gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False))
     query_context = user_input.strip()
@@ -435,8 +463,8 @@ def process_analysis_flow(user_input, history, state):
     history.append((user_input, f"Analyzing: '{query_context}'\n\n*Generating Solr query...*"))
     yield (history, state, None, None, None, None)
-    # 2. Generate Solr Query
-    llm_solr_obj = llm_generate_solr_query(query_context, field_metadata)
     if not llm_solr_obj or 'query' not in llm_solr_obj or 'json.facet' not in llm_solr_obj:
         history.append((None, "I'm sorry, I couldn't generate a valid Solr query for that request. Please try rephrasing your question."))
         yield (history, state, None, None, None, None)
@@ -482,15 +510,21 @@ def process_analysis_flow(user_input, history, state):
         yield (history, state, output_plot, output_report, gr.update(value=formatted_query, visible=True), gr.update(value=formatted_data, visible=True))
         report_text = ""
         for chunk in llm_generate_summary_and_suggestions_stream(query_context, facet_data):
             report_text += chunk
-            yield (history, state, output_plot, report_text, gr.update(value=formatted_query, visible=True), gr.update(value=formatted_data, visible=True))
         # 6. Finalize and prompt for next action
         state['query_count'] += 1
         state['last_suggestions'] = parse_suggestions_from_report(report_text)
-        next_prompt = "Analysis complete. What would you like to explore next? You can ask a follow-up question, pick a suggestion, or ask something new."
         history.append((None, next_prompt))
         yield (history, state, output_plot, report_text, gr.update(value=formatted_query, visible=True), gr.update(value=formatted_data, visible=True))
@@ -500,21 +534,19 @@ def process_analysis_flow(user_input, history, state):
         print(f"Error during analysis execution: {e}")
         yield (history, state, None, None, gr.update(value=formatted_query, visible=True), None)
 with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important}") as demo:
     state = gr.State()
     gr.Markdown("# 💊 PharmaCircle AI Data Analyst")
-    # CHANGED: Updated introductory text for the new workflow.
     gr.Markdown("Ask a question to begin your analysis. I will generate a Solr query, retrieve the data, create a visualization, and write a report. You can then ask follow-up questions freely.")
     with gr.Row():
         with gr.Column(scale=1):
-            chatbot = gr.Chatbot(label="Analysis Chat Log", height=700, show_copy_button=True)
-            # CHANGED: Updated placeholder to encourage free-form questions.
             msg_textbox = gr.Textbox(placeholder="Ask a question, e.g., 'Show me the top 5 companies by total deal value in 2023'", label="Your Question", interactive=True)
             with gr.Row():
-                # REMOVED: The "Start Initial Analysis" button.
-                # CHANGED: The "Clear" button is now the primary action button besides submitting text.
                 clear_button = gr.Button("🔄 Start New Analysis", variant="primary")
         with gr.Column(scale=2):
@@ -526,28 +558,36 @@ with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important}")
             report_display = gr.Markdown("Report will be streamed here...", visible=False)
     # --- Event Wiring ---
-    # REMOVED: The click handler for the old start button.
-    # This is now the main event handler for all user queries.
-    msg_textbox.submit(
-        fn=process_analysis_flow,
-        inputs=[msg_textbox, chatbot, state],
-        outputs=[chatbot, state, plot_display, report_display, solr_query_display, solr_data_display]
-    )
     def reset_all():
-        # This function now correctly resets the UI for a completely new session.
         return (
-            None, # chatbot
-            None, # state
-            "",   # msg_textbox
             gr.update(value=None, visible=False), # plot_display
             gr.update(value=None, visible=False), # report_display
-            gr.update(value=None, visible=False),  # solr_query_display
-            gr.update(value=None, visible=False)   # solr_data_display
         )
-    clear_button.click(fn=reset_all, inputs=None, outputs=[chatbot, state, msg_textbox, plot_display, report_display, solr_query_display, solr_data_display], queue=False)
 if is_initialized:
     demo.queue().launch(debug=True, share=True)

 logging.getLogger('matplotlib').setLevel(logging.WARNING)
 # --- SSH Tunnel Configuration ---
+# It's recommended to load secrets securely, e.g., from environment variables
 SSH_HOST = os.environ.get('SSH_HOST')
 SSH_PORT = 5322
 SSH_USER = os.environ.get('SSH_USER')
 try:
     genai.configure(api_key=os.environ.get('GEMINI_API_KEY'))
 except Exception as e:
+    print(f"❌ Gemini API Key Error: {e}. Please ensure 'GEMINI_API_KEY' is set in your environment.")
 # --- Global Variables ---
 ssh_tunnel_server = None
     print(f"✅ Solr connection successful on core '{SOLR_CORE_NAME}'.")
     # 3. Initialize the LLM
+    llm_model = genai.GenerativeModel('gemini-1.5-flash', generation_config=genai.types.GenerationConfig(temperature=0))
     print(f"✅ LLM Model '{llm_model.model_name}' initialized.")
     print("✅ System Initialized Successfully.")
 def parse_suggestions_from_report(report_text):
     """Extracts numbered suggestions from the report's markdown text."""
+    suggestions_match = re.search(r"### (?:Deeper Dive: Suggested Follow-up Analyses|Suggestions for Further Exploration)\s*\n(.*?)$", report_text, re.DOTALL | re.IGNORECASE)
     if not suggestions_match: return []
     suggestions_text = suggestions_match.group(1)
     suggestions = re.findall(r"^\s*\d+\.\s*(.*)", suggestions_text, re.MULTILINE)
     return [s.strip() for s in suggestions]
+def llm_generate_solr_query_with_history(natural_language_query, field_metadata, chat_history):
+    """Generates a Solr query and facet JSON from a natural language query, considering the conversation history."""
+    # Format the chat history for the prompt
+    formatted_history = ""
+    for user_msg, bot_msg in chat_history:
+        # We only need the user's queries for context, not the bot's detailed responses.
+        if user_msg:
+            # CORRECTED: Properly formatted f-string with a newline character
+            formatted_history += f"- User: \"{user_msg}\"\n"
     prompt = f"""
 You are an expert Solr query engineer who converts natural language questions into precise Solr JSON Facet API query objects. Your primary goal is to create a valid JSON object with `query` and `json.facet` keys.
 ---
+### CONVERSATIONAL CONTEXT & RULES
+1.  **Today's Date for Calculations**: 2025-07-16
+2.  **Allowed Facet Types**: The `type` key for any facet MUST be one of the following: `terms`, `query`, or `range`. **Do not use `date_histogram`**. For time-series analysis, use a `range` facet on a date field.
+3.  **Field Usage**: You MUST use the fields described in the 'Field Definitions' section. Pay close attention to the definitions to select the correct field.
+4.  **Facet vs. Query Field Distinction**: This is critical.
     *   For searching in the main `query` parameter, ALWAYS use the multi-valued search fields (ending in `_s`, like `company_name_s`) to get comprehensive results.
     *   For grouping in a `terms` facet, ALWAYS use the canonical, single-value field (e.g., `company_name`, `molecule_name`) to ensure unique and accurate grouping.
+5.  **No `count(*)`**: Do NOT use functions like `count(*)`. The default facet bucket count is sufficient for counting documents.
+6.  **Allowed Aggregations**: For statistical facets, only use these functions: `sum`, `avg`, `min`, `max`, `unique`. The primary metric field is `total_deal_value_in_million`. The aggregation MUST be a simple string like `"sum(total_deal_value_in_million)"` and not a nested JSON object.
+7.  **Term Facet Limits**: Every `terms` facet MUST include a `limit` key. Default to `limit: 10` unless the user specifies a different number of top results.
+8.  **Output Format**: Your final output must be a single, raw JSON object and nothing else. Do not add comments, explanations, or markdown formatting like ```json.
 ---
 ### FIELD DEFINITIONS (Your Source of Truth)
+`{formatted_field_info}`
 ---
+### CHAT HISTORY
+`{formatted_history}`
+---
+### EXAMPLE OF A FOLLOW-UP QUERY
+**Initial User Query:** "What are the infections news in this year?"
+```json
+{{
+  "query": "date_year:2025 AND therapeutic_category_s:infections",
+  "json.facet": {{
+  "infections_news_by_type": {{
+    "type": "terms",
+    "field": "news_type",
+    "limit": 10
+  }}
+}}
+}}
+```
+**Follow-up User Query:** "Compare deal values for injection vs oral."
+**Correct JSON Output for the Follow-up:**
 ```json
 {{
+  "query": "therapeutic_category_s:infections AND date_year:2025 AND total_deal_value_in_million:[0 TO *]",
   "json.facet": {{
     "injection_deals": {{
       "type": "query",
 ---
 ### YOUR TASK
+Now, convert the following user query into a single, raw JSON object with 'query' and 'json.facet' keys, strictly following all rules and field definitions provided above and considering the chat history.
+**Current User Query:** `{natural_language_query}`
 """
     try:
         response = llm_model.generate_content(prompt)
+        # Using a more robust regex to clean the response
         cleaned_text = re.sub(r'```json\s*|\s*```', '', response.text, flags=re.MULTILINE | re.DOTALL).strip()
         return json.loads(cleaned_text)
     except Exception as e:
+        raw_response_text = response.text if 'response' in locals() else 'N/A'
+        print(f"Error in llm_generate_solr_query_with_history: {e}\nRaw Response:\n{raw_response_text}")
         return None
 def llm_generate_visualization_code(query_context, facet_data):
     """Generates Python code for visualization based on query and data."""
     prompt = f"""
+You are a Python Data Visualization expert specializing in Matplotlib and Seaborn.
+Your task is to generate Python code to create a single, insightful visualization.
+**Context:**
+1.  **User's Analytical Goal:** "{query_context}"
+2.  **Aggregated Data (from Solr Facets):**
+    ```json
+    {json.dumps(facet_data, indent=2)}
+    ```
+**Instructions:**
+1.  **Goal:** Write Python code to generate a chart that best visualizes the answer to the user's goal using the provided data.
+2.  **Data Access:** The data is available in a Python dictionary named `facet_data`. Your code must parse this dictionary.
+3.  **Code Requirements:**
+    *   Start with `import matplotlib.pyplot as plt` and `import seaborn as sns`.
+    *   Use `plt.style.use('seaborn-v0_8-whitegrid')` and `fig, ax = plt.subplots(figsize=(12, 7))`. Plot using the `ax` object.
+    *   Always include a clear `ax.set_title(...)`, `ax.set_xlabel(...)`, and `ax.set_ylabel(...)`.
+    *   Dynamically find the primary facet key and extract the 'buckets'.
+    *   For each bucket, extract the 'val' (label) and the relevant metric ('count' or a nested metric).
+    *   Use `plt.tight_layout()` and rotate x-axis labels if needed.
+4.  **Output Format:** ONLY output raw Python code. Do not wrap it in ```python ... ```. Do not include `plt.show()` or any explanation.
+"""
     try:
         response = llm_model.generate_content(prompt)
         code = re.sub(r'^```python\s*|\s*```$', '', response.text, flags=re.MULTILINE)
     try:
         if not os.path.exists('/tmp/plots'): os.makedirs('/tmp/plots')
         plot_path = f"/tmp/plots/plot_{datetime.datetime.now().timestamp()}.png"
+        # The exec environment needs access to the required libraries and the data
+        exec_globals = {'facet_data': facet_data, 'plt': plt, 'sns': sns, 'pd': pd}
         exec(viz_code, exec_globals)
         fig = exec_globals.get('fig')
         if fig:
             fig.savefig(plot_path, bbox_inches='tight')
+            plt.close(fig) # Important to free up memory
             return plot_path
         return None
     except Exception as e:
         print(f"ERROR executing visualization code: {e}\n---Code---\n{viz_code}")
         return None
 def llm_generate_summary_and_suggestions_stream(query_context, facet_data):
     """
     Yields a streaming analytical report and strategic, context-aware suggestions for further exploration.
 Generate the full report and the strategic suggestions based on the user's question and the data provided.
 """
     try:
         response_stream = llm_model.generate_content(prompt, stream=True)
         for chunk in response_stream:
             yield chunk.text
         print(f"Error in llm_generate_summary_and_suggestions_stream: {e}")
         yield "Sorry, I was unable to generate a summary for this data."
+# CORRECTED: Only one, correctly implemented version of this function remains.
 def process_analysis_flow(user_input, history, state):
     """
     A generator that manages the conversation and yields tuples of UI updates for Gradio.
+    This version treats any user input as a new query and considers conversation history.
     """
     # Initialize state on the first run
     if state is None:
         state = {'query_count': 0, 'last_suggestions': []}
+    # If history is None (from a reset), initialize it as an empty list
+    if history is None:
+        history = []
+    # Reset UI components for the new analysis, but keep chat history
     yield (history, state, gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False))
     query_context = user_input.strip()
     history.append((user_input, f"Analyzing: '{query_context}'\n\n*Generating Solr query...*"))
     yield (history, state, None, None, None, None)
+    # 2. Generate Solr Query with history
+    llm_solr_obj = llm_generate_solr_query_with_history(query_context, field_metadata, history)
     if not llm_solr_obj or 'query' not in llm_solr_obj or 'json.facet' not in llm_solr_obj:
         history.append((None, "I'm sorry, I couldn't generate a valid Solr query for that request. Please try rephrasing your question."))
         yield (history, state, None, None, None, None)
         yield (history, state, output_plot, output_report, gr.update(value=formatted_query, visible=True), gr.update(value=formatted_data, visible=True))
         report_text = ""
+        # The history object is not modified during streaming, so we pass it once
+        # The yield statement for streaming only updates the report text
+        stream_history = history[:] # Make a copy
         for chunk in llm_generate_summary_and_suggestions_stream(query_context, facet_data):
             report_text += chunk
+            yield (stream_history, state, output_plot, report_text, gr.update(value=formatted_query, visible=True), gr.update(value=formatted_data, visible=True))
+        # Update the main history with the final report text
+        history.append((None, report_text))
         # 6. Finalize and prompt for next action
         state['query_count'] += 1
         state['last_suggestions'] = parse_suggestions_from_report(report_text)
+        next_prompt = "Analysis complete. What would you like to explore next? You can ask a follow-up question, or ask something new."
         history.append((None, next_prompt))
         yield (history, state, output_plot, report_text, gr.update(value=formatted_query, visible=True), gr.update(value=formatted_data, visible=True))
         print(f"Error during analysis execution: {e}")
         yield (history, state, None, None, gr.update(value=formatted_query, visible=True), None)
+# --- Gradio UI ---
 with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important}") as demo:
     state = gr.State()
     gr.Markdown("# 💊 PharmaCircle AI Data Analyst")
     gr.Markdown("Ask a question to begin your analysis. I will generate a Solr query, retrieve the data, create a visualization, and write a report. You can then ask follow-up questions freely.")
     with gr.Row():
         with gr.Column(scale=1):
+            chatbot = gr.Chatbot(label="Analysis Chat Log", height=700, show_copy_button=True, avatar_images=(None, "https://pharma-circle.com/images/favicon.png"))
             msg_textbox = gr.Textbox(placeholder="Ask a question, e.g., 'Show me the top 5 companies by total deal value in 2023'", label="Your Question", interactive=True)
             with gr.Row():
                 clear_button = gr.Button("🔄 Start New Analysis", variant="primary")
         with gr.Column(scale=2):
             report_display = gr.Markdown("Report will be streamed here...", visible=False)
     # --- Event Wiring ---
     def reset_all():
+        """Resets the entire UI for a new analysis session."""
         return (
+            [],   # chatbot (cleared)
+            None, # state (reset)
+            "",   # msg_textbox (cleared)
             gr.update(value=None, visible=False), # plot_display
             gr.update(value=None, visible=False), # report_display
+            gr.update(value=None, visible=False), # solr_query_display
+            gr.update(value=None, visible=False)  # solr_data_display
         )
+    # Main event handler for all user queries
+    msg_textbox.submit(
+        fn=process_analysis_flow,
+        inputs=[msg_textbox, chatbot, state],
+        outputs=[chatbot, state, plot_display, report_display, solr_query_display, solr_data_display],
+    ).then(
+        lambda: gr.update(value=""),
+        None,
+        [msg_textbox],
+        queue=False,
+    )
+    clear_button.click(
+        fn=reset_all,
+        inputs=None,
+        outputs=[chatbot, state, msg_textbox, plot_display, report_display, solr_query_display, solr_data_display],
+        queue=False
+    )
 if is_initialized:
     demo.queue().launch(debug=True, share=True)