Spaces:

gloignon
/

textographe

Sleeping

App Files Files Community

gloignon commited on Sep 22, 2024

Commit

6119c28

verified ·

1 Parent(s): 11f43b6

Trying a new generic method for input pairs

Browse files

Files changed (1) hide show

app.py +47 -28

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import gradio as gr
-import numpy as np
 import pandas as pd
 import plotly.express as px
 from sklearn.decomposition import PCA
@@ -9,21 +8,28 @@ from sentence_transformers import SentenceTransformer
 model = SentenceTransformer('all-MiniLM-L6-v2')
 # Function to compute document embeddings and apply PCA
-# Modify the Gradio interface to accept a list of identifiers and texts
-def compute_pca(data):
-    # data is expected to be a list of dictionaries with 'Identifier' and 'Text' keys
     df = pd.DataFrame(data, columns=["Identifier", "Text"])
     # Remove rows where 'Identifier' or 'Text' is empty or contains only whitespace
     valid_entries = df[
-        (df['Identifier'].str.strip() != '') &
-        (df['Text'].str.strip() != '')
     ]
     if valid_entries.empty:
         return gr.Plot.update(value=None, label="No data to process. Please fill in the boxes.")
     # Generate embeddings
     embeddings = model.encode(valid_entries['Text'].tolist())
@@ -38,50 +44,63 @@ def compute_pca(data):
     # Plot the PCA result with identifiers as labels
     fig = px.scatter(valid_entries, x='PC1', y='PC2', text='Identifier', title='PCA of Text Embeddings')
     return fig
 def text_editor_app():
     with gr.Blocks() as demo:
         identifier_inputs = []
         text_inputs = []
         gr.Markdown("### Enter at least two identifier-text pairs:")
-        for i in range(4):  # Assuming we have 4 entries
-            with gr.Column():
-                id_input = gr.Textbox(label=f"Identifier {i+1}")
-                text_input = gr.Textbox(label=f"Text {i+1}")
                 identifier_inputs.append(id_input)
                 text_inputs.append(text_input)
-            gr.Markdown("---")  # Add a horizontal rule to create a break
-        # Button to run the analysis
-        analyze_button = gr.Button("Run Analysis")
-        # Output plot
         output_plot = gr.Plot(label="PCA Visualization")
         # Function to collect inputs and process them
         def collect_inputs(*args):
-            # args will be identifier1, text1, identifier2, text2, ..., identifier4, text4
-            # So we need to pair them up
             data = []
             for i in range(0, len(args), 2):
                 identifier = args[i]
                 text = args[i+1]
                 data.append([identifier, text])
-            return compute_pca(data)
         inputs = []
         for id_input, text_input in zip(identifier_inputs, text_inputs):
             inputs.extend([id_input, text_input])
         analyze_button.click(fn=collect_inputs, inputs=inputs, outputs=output_plot)
     return demo
 # Launch the app
 text_editor_app().launch()

 import gradio as gr
 import pandas as pd
 import plotly.express as px
 from sklearn.decomposition import PCA
 model = SentenceTransformer('all-MiniLM-L6-v2')
 # Function to compute document embeddings and apply PCA
+def compute_pca(*args):
+    # args will be identifier1, text1, identifier2, text2, ..., identifierN, textN
+    # Pair them up
+    data = []
+    for i in range(0, len(args), 2):
+        identifier = args[i]
+        text = args[i+1]
+        data.append([identifier, text])
     df = pd.DataFrame(data, columns=["Identifier", "Text"])
     # Remove rows where 'Identifier' or 'Text' is empty or contains only whitespace
     valid_entries = df[
+        (df['Identifier'].astype(str).str.strip() != '') &
+        (df['Text'].astype(str).str.strip() != '')
     ]
     if valid_entries.empty:
         return gr.Plot.update(value=None, label="No data to process. Please fill in the boxes.")
+    if len(valid_entries) < 2:
+        return gr.Plot.update(value=None, label="At least two texts are required to perform PCA.")
     # Generate embeddings
     embeddings = model.encode(valid_entries['Text'].tolist())
     # Plot the PCA result with identifiers as labels
     fig = px.scatter(valid_entries, x='PC1', y='PC2', text='Identifier', title='PCA of Text Embeddings')
+    fig.update_traces(textposition='top center')
     return fig
 def text_editor_app():
     with gr.Blocks() as demo:
+        num_pairs_visible = gr.State(value=4)
         identifier_inputs = []
         text_inputs = []
+        pair_rows = []
         gr.Markdown("### Enter at least two identifier-text pairs:")
+        with gr.Column() as input_column:
+            for i in range(10):  # Max 10 pairs
+                with gr.Column(visible=(i < 4)) as pair_row:
+                    id_input = gr.Textbox(label=f"Identifier {i+1}")
+                    text_input = gr.Textbox(label=f"Text {i+1}")
+                    gr.Markdown("---")  # Add a horizontal rule to create a break
                 identifier_inputs.append(id_input)
                 text_inputs.append(text_input)
+                pair_rows.append(pair_row)
+        add_pair_btn = gr.Button("Add Text")
+        analyze_button = gr.Button("Run Analysis")
         output_plot = gr.Plot(label="PCA Visualization")
+        def add_pair(num_visible):
+            if num_visible >= len(pair_rows):
+                return [gr.update()] * len(pair_rows) + [num_visible]  # No more pairs to show
+            updates = []
+            for idx, pair_row in enumerate(pair_rows):
+                if idx < num_visible + 1:
+                    updates.append(gr.update(visible=True))
+                else:
+                    updates.append(gr.update())
+            num_visible += 1
+            return updates + [num_visible]
+        add_pair_btn.click(fn=add_pair, inputs=num_pairs_visible, outputs=pair_rows + [num_pairs_visible])
         # Function to collect inputs and process them
         def collect_inputs(*args):
             data = []
             for i in range(0, len(args), 2):
                 identifier = args[i]
                 text = args[i+1]
                 data.append([identifier, text])
+            return compute_pca(*args)
         inputs = []
         for id_input, text_input in zip(identifier_inputs, text_inputs):
             inputs.extend([id_input, text_input])
         analyze_button.click(fn=collect_inputs, inputs=inputs, outputs=output_plot)
     return demo
 # Launch the app
 text_editor_app().launch()