Spaces:

ksg-dfci
/

trial_search_alpha

Running on CPU Upgrade

App Files Files Community

kenlkehl commited on Dec 20, 2024

Commit

3815070

verified ·

1 Parent(s): 49dc990

Upload app.py

Browse files

Files changed (1) hide show

app.py +32 -13

app.py CHANGED Viewed

@@ -21,13 +21,7 @@ tokenizer = AutoTokenizer.from_pretrained("roberta-large")
 checker_pipe = pipeline('text-classification', 'ksg-dfci/TrialChecker', tokenizer=tokenizer,
                         truncation=True, padding='max_length', max_length=512)
-import gradio as gr
-import pandas as pd
-import torch
-import torch.nn.functional as F
-from sentence_transformers import SentenceTransformer
-from safetensors import safe_open
-from transformers import pipeline, AutoTokenizer
 # We assume the following objects have already been loaded:
 # trial_spaces (DataFrame), embedding_model (SentenceTransformer),
@@ -51,7 +45,7 @@ def match_clinical_trials(patient_summary: str):
     relevant_eligibility_criteria = trial_spaces.iloc[top_indices].eligibility_criteria
     analysis = pd.DataFrame({
-        'patient_summary': patient_summary,
         'this_space': relevant_spaces,
         'nct_id': relevant_nctid,
         'trial_title': relevant_title,
@@ -59,15 +53,16 @@ def match_clinical_trials(patient_summary: str):
         'trial_eligibility_criteria': relevant_eligibility_criteria
     }).reset_index(drop=True)
-    analysis['pt_trial_pair'] = analysis['this_space'] + "\nNow here is the patient summary:" + analysis['patient_summary']
     # Run checker pipeline
     classifier_results = checker_pipe(analysis.pt_trial_pair.tolist())
     analysis['trial_checker_result'] = [x['label'] for x in classifier_results]
     analysis['trial_checker_score'] = [x['score'] for x in classifier_results]
-    # Return a subset of columns that are most relevant
     return analysis[[
         'nct_id',
         'trial_title',
         'trial_brief_summary',
@@ -76,6 +71,12 @@ def match_clinical_trials(patient_summary: str):
         'trial_checker_score'
     ]]
 custom_css = """
 #input_box textarea {
     width: 600px !important;
@@ -100,10 +101,21 @@ custom_css = """
 with gr.Blocks(css=custom_css) as demo:
     gr.HTML("<h3>Clinical Trial Matcher</h3>")
-    patient_summary_input = gr.Textbox(label="Enter Patient Summary", elem_id="input_box")
     submit_btn = gr.Button("Find Matches")
     output_df = gr.DataFrame(
         headers=[
             "nct_id",
             "trial_title",
             "trial_brief_summary",
@@ -113,11 +125,18 @@ with gr.Blocks(css=custom_css) as demo:
         ],
         elem_id="output_df"
     )
     submit_btn.click(fn=match_clinical_trials,
                      inputs=patient_summary_input,
-                     outputs=output_df)
-if __name__ == "__main__":
     demo.launch()

 checker_pipe = pipeline('text-classification', 'ksg-dfci/TrialChecker', tokenizer=tokenizer,
                         truncation=True, padding='max_length', max_length=512)
 # We assume the following objects have already been loaded:
 # trial_spaces (DataFrame), embedding_model (SentenceTransformer),
     relevant_eligibility_criteria = trial_spaces.iloc[top_indices].eligibility_criteria
     analysis = pd.DataFrame({
+        'patient_summary_query': patient_summary,
         'this_space': relevant_spaces,
         'nct_id': relevant_nctid,
         'trial_title': relevant_title,
         'trial_eligibility_criteria': relevant_eligibility_criteria
     }).reset_index(drop=True)
+    analysis['pt_trial_pair'] = analysis['this_space'] + "\nNow here is the patient summary:" + analysis['patient_summary_query']
     # Run checker pipeline
     classifier_results = checker_pipe(analysis.pt_trial_pair.tolist())
     analysis['trial_checker_result'] = [x['label'] for x in classifier_results]
     analysis['trial_checker_score'] = [x['score'] for x in classifier_results]
+    # Return the final subset of columns including patient_summary_query as first column
     return analysis[[
+        'patient_summary_query',
         'nct_id',
         'trial_title',
         'trial_brief_summary',
         'trial_checker_score'
     ]]
+def export_results(df: pd.DataFrame):
+    # Save the dataframe to a temporary CSV file and return its path
+    temp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
+    df.to_csv(temp.name, index=False)
+    return temp.name
 custom_css = """
 #input_box textarea {
     width: 600px !important;
 with gr.Blocks(css=custom_css) as demo:
     gr.HTML("<h3>Clinical Trial Matcher</h3>")
+    patient_summary_input = gr.Textbox(
+        label="Enter Patient Summary",
+        elem_id="input_box",
+        value="70M with metastatic lung adenocarcinoma, KRAS G12C mutation, PD-L1 high, previously treated with pembrolizumab."
+    )
     submit_btn = gr.Button("Find Matches")
+    # We'll store the DataFrame in a state so we can export it after generation
+    results_state = gr.State()
     output_df = gr.DataFrame(
         headers=[
+            "patient_summary_query",
             "nct_id",
             "trial_title",
             "trial_brief_summary",
         ],
         elem_id="output_df"
     )
+    export_btn = gr.Button("Export Results")
+    # On "Find Matches", show the DataFrame and store it in state
     submit_btn.click(fn=match_clinical_trials,
                      inputs=patient_summary_input,
+                     outputs=[output_df, results_state])
+    # On "Export Results", use the state to create and return a CSV file
+    export_btn.click(fn=export_results,
+                     inputs=results_state,
+                     outputs=gr.File(label="Download CSV"))
+if __name__ == 'main':
     demo.launch()