Spaces:

ksg-dfci
/

trial_search_alpha

Running on CPU Upgrade

App Files Files Community

kenlkehl commited on Dec 21, 2024

Commit

0bf83d7

verified ·

1 Parent(s): a45c425

Upload app.py

Browse files

Files changed (1) hide show

app.py +89 -40

app.py CHANGED Viewed

@@ -23,7 +23,6 @@ checker_pipe = pipeline('text-classification', 'ksg-dfci/TrialChecker', tokenize
                         truncation=True, padding='max_length', max_length=512)
 import gradio as gr
 import pandas as pd
 import torch
@@ -37,17 +36,17 @@ import tempfile
 # trial_spaces (DataFrame), embedding_model (SentenceTransformer),
 # trial_space_embeddings (torch.tensor), checker_pipe (transformers pipeline)
-def match_clinical_trials_text(patient_summary: str):
     """
     1) Perform the trial matching and classification.
-    2) Return a free-text representation of the results.
-    3) Also return the DataFrame for CSV export in a second output.
     """
-    # Encode the patient summary
     patient_embedding = embedding_model.encode([patient_summary], convert_to_tensor=True)
     # Compute similarities
-    similarities = F.cosine_similarity(patient_embedding, trial_space_embeddings)
     # Pull top 10
     sorted_similarities, sorted_indices = torch.sort(similarities, descending=True)
@@ -80,7 +79,7 @@ def match_clinical_trials_text(patient_summary: str):
     analysis['trial_checker_result'] = [x['label'] for x in classifier_results]
     analysis['trial_checker_score'] = [x['score'] for x in classifier_results]
-    # Subset of final columns
     out_df = analysis[[
         'patient_summary_query',
         'nct_id',
@@ -92,38 +91,91 @@ def match_clinical_trials_text(patient_summary: str):
         'trial_checker_score'
     ]]
-    # Convert the DataFrame rows into a free-text summary
-    text_output_lines = []
-    for idx, row in out_df.iterrows():
-        text_block = (
-            f"=== Result #{idx + 1} ===\n"
-            f"Patient Summary: {row['patient_summary_query']}\n"
-            f"NCT ID: {row['nct_id']}\n"
-            f"Title: {row['trial_title']}\n"
-            f"Brief Summary: {row['trial_brief_summary']}\n"
-            f"Eligibility Criteria: {row['trial_eligibility_criteria']}\n"
-            f"Trial Space: {row['this_space']}\n"
-            f"Checker Result: {row['trial_checker_result']}\n"
-            f"Checker Score: {row['trial_checker_score']}\n"
-            "------------------------------\n"
-        )
-        text_output_lines.append(text_block)
-    # Combine into a single multi-line string
-    final_text_output = "".join(text_output_lines)
-    # Return (free text, DataFrame for export)
-    return final_text_output, out_df
 def export_results(df: pd.DataFrame):
     """
     Saves the DataFrame to a temporary CSV file
     so Gradio can provide it as a downloadable file.
     """
     temp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
     df.to_csv(temp.name, index=False)
     return temp.name
 custom_css = """
 #input_box textarea {
     width: 600px !important;
@@ -132,47 +184,44 @@ custom_css = """
 """
 with gr.Blocks(css=custom_css) as demo:
-    # Display some introductory text
     gr.HTML("""
-    <h3>Alpha Version of Clinical Trial Search (Free Text Output)</h3>
     <p>Based on clinicaltrials.gov cancer trials export 10/31/24</p>
     <p>Queries take approximately 30 seconds to run.</p>
     """)
-    # Input box for the patient summary
     patient_summary_input = gr.Textbox(
         label="Enter Patient Summary",
         elem_id="input_box",
         value="70M with metastatic lung adenocarcinoma, KRAS G12C mutation, PD-L1 high, previously treated with pembrolizumab."
     )
-    # Button to start matching
     submit_btn = gr.Button("Find Matches")
-    # We'll store the DataFrame in a state for CSV export
     results_state = gr.State()
-    # Free-text output (multi-line)
-    output_text = gr.Textbox(label="Results (Free Text)", lines=20, interactive=False)
-    # Button to export the CSV
     export_btn = gr.Button("Export Results")
-    # On submit, run match_clinical_trials_text
     submit_btn.click(
-        fn=match_clinical_trials_text,
         inputs=patient_summary_input,
-        outputs=[output_text, results_state]
     )
-    # On export, convert state (DataFrame) to a downloadable CSV
     export_btn.click(
         fn=export_results,
         inputs=results_state,
         outputs=gr.File(label="Download CSV")
     )
-    # Enable queue so there's a visible "Processing..."
     demo.queue()
 if __name__ == "__main__":

                         truncation=True, padding='max_length', max_length=512)
 import gradio as gr
 import pandas as pd
 import torch
 # trial_spaces (DataFrame), embedding_model (SentenceTransformer),
 # trial_space_embeddings (torch.tensor), checker_pipe (transformers pipeline)
+def match_clinical_trials_collapsible(patient_summary: str):
     """
     1) Perform the trial matching and classification.
+    2) Generate an HTML string with collapsible items for each trial.
+    3) Return (collapsible_html, df_for_export).
     """
+    # Encode user input
     patient_embedding = embedding_model.encode([patient_summary], convert_to_tensor=True)
     # Compute similarities
+    similarities = torch.nn.functional.cosine_similarity(patient_embedding, trial_space_embeddings)
     # Pull top 10
     sorted_similarities, sorted_indices = torch.sort(similarities, descending=True)
     analysis['trial_checker_result'] = [x['label'] for x in classifier_results]
     analysis['trial_checker_score'] = [x['score'] for x in classifier_results]
+    # Subset final columns
     out_df = analysis[[
         'patient_summary_query',
         'nct_id',
         'trial_checker_score'
     ]]
+    # Convert DataFrame to collapsible HTML
+    collapsible_html = df_to_collapsible_html(out_df)
+    # Return the HTML plus the DataFrame for CSV export
+    return collapsible_html, out_df
+def df_to_collapsible_html(df: pd.DataFrame) -> str:
+    """
+    Creates an HTML string with an accordion-like display.
+    Clicking on an NCT ID + Title header reveals/hides more details.
+    """
+    # Basic styling for the accordion
+    css = """
+    <style>
+    .accordion-header {
+      cursor: pointer;
+      background-color: #f2f2f2;
+      padding: 8px;
+      margin-bottom: 4px;
+      border: 1px solid #ccc;
+      font-weight: bold;
+    }
+    .accordion-content {
+      display: none;
+      border-left: 2px solid #ccc;
+      margin-left: 10px;
+      padding-left: 10px;
+      padding-top: 4px;
+      padding-bottom: 4px;
+      margin-bottom: 10px;
+    }
+    </style>
+    """
+    # JavaScript for toggling the display of each accordion content
+    script = """
+    <script>
+    function toggleAccordion(contentId) {
+      var content = document.getElementById(contentId);
+      if (content.style.display === "none" || content.style.display === "") {
+        content.style.display = "block";
+      } else {
+        content.style.display = "none";
+      }
+    }
+    </script>
+    """
+    # Build the accordion items
+    accordion_items = []
+    for idx, row in df.iterrows():
+        content_id = f"accordion-content-{idx}"
+        header_html = f"""
+        <div class="accordion-header" onclick="toggleAccordion('{content_id}')">
+          [{idx + 1}] NCT ID: {row['nct_id']} - {row['trial_title']}
+        </div>
+        """
+        content_html = f"""
+        <div id="{content_id}" class="accordion-content">
+          <p><strong>Brief Summary:</strong> {row['trial_brief_summary']}</p>
+          <p><strong>Eligibility Criteria:</strong> {row['trial_eligibility_criteria']}</p>
+          <p><strong>Trial Space:</strong> {row['this_space']}</p>
+          <p><strong>Checker Result:</strong> {row['trial_checker_result']}</p>
+          <p><strong>Checker Score:</strong> {row['trial_checker_score']}</p>
+        </div>
+        """
+        accordion_items.append(header_html + content_html)
+    # Combine everything
+    full_html = css + script + "<div>" + "".join(accordion_items) + "</div>"
+    return full_html
 def export_results(df: pd.DataFrame):
     """
     Saves the DataFrame to a temporary CSV file
     so Gradio can provide it as a downloadable file.
     """
+    import tempfile
     temp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
     df.to_csv(temp.name, index=False)
     return temp.name
+# Minimal CSS for the input box
 custom_css = """
 #input_box textarea {
     width: 600px !important;
 """
 with gr.Blocks(css=custom_css) as demo:
+    # Intro text
     gr.HTML("""
+    <h3>Alpha Version of Clinical Trial Search (Collapsible Results)</h3>
     <p>Based on clinicaltrials.gov cancer trials export 10/31/24</p>
     <p>Queries take approximately 30 seconds to run.</p>
     """)
     patient_summary_input = gr.Textbox(
         label="Enter Patient Summary",
         elem_id="input_box",
         value="70M with metastatic lung adenocarcinoma, KRAS G12C mutation, PD-L1 high, previously treated with pembrolizumab."
     )
     submit_btn = gr.Button("Find Matches")
+    # We'll store the DataFrame in a state for CSV export.
     results_state = gr.State()
+    # Display the collapsible results in a gr.HTML component
+    output_html = gr.HTML(label="Results")
     export_btn = gr.Button("Export Results")
+    # On "Find Matches", produce (collapsible_html, df)
     submit_btn.click(
+        fn=match_clinical_trials_collapsible,
         inputs=patient_summary_input,
+        outputs=[output_html, results_state]
     )
+    # On "Export Results", convert state (DataFrame) to a downloadable CSV
     export_btn.click(
         fn=export_results,
         inputs=results_state,
         outputs=gr.File(label="Download CSV")
     )
+    # Enable queue for "Processing..." feedback
     demo.queue()
 if __name__ == "__main__":