Spaces:

gloignon
/

textographe

Sleeping

App Files Files Community

gloignon commited on Sep 23, 2024

Commit

76026d0

verified ·

1 Parent(s): 2834d04

back to working version

Browse files

Files changed (1) hide show

app.py +29 -47

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import gradio as gr
 import pandas as pd
 import plotly.express as px
 from sklearn.decomposition import PCA
@@ -8,28 +9,21 @@ from sentence_transformers import SentenceTransformer
 model = SentenceTransformer('all-MiniLM-L6-v2')
 # Function to compute document embeddings and apply PCA
-def compute_pca(*args):
-    # args will be identifier1, text1, identifier2, text2, ..., identifierN, textN
-    # Pair them up
-    data = []
-    for i in range(0, len(args), 2):
-        identifier = args[i]
-        text = args[i+1]
-        data.append([identifier, text])
     df = pd.DataFrame(data, columns=["Identifier", "Text"])
     # Remove rows where 'Identifier' or 'Text' is empty or contains only whitespace
     valid_entries = df[
-        (df['Identifier'].astype(str).str.strip() != '') &
-        (df['Text'].astype(str).str.strip() != '')
     ]
     if valid_entries.empty:
         return gr.Plot.update(value=None, label="No data to process. Please fill in the boxes.")
-    if len(valid_entries) < 2:
-        return gr.Plot.update(value=None, label="At least two texts are required to perform PCA.")
     # Generate embeddings
     embeddings = model.encode(valid_entries['Text'].tolist())
@@ -44,62 +38,50 @@ def compute_pca(*args):
     # Plot the PCA result with identifiers as labels
     fig = px.scatter(valid_entries, x='PC1', y='PC2', text='Identifier', title='PCA of Text Embeddings')
-    fig.update_traces(textposition='top center')
     return fig
 def text_editor_app():
     with gr.Blocks() as demo:
         identifier_inputs = []
         text_inputs = []
-        pair_rows = []
         gr.Markdown("### Enter at least two identifier-text pairs:")
-        add_pair_btn = gr.Button("Add Text")
-        analyze_button = gr.Button("Run Analysis")
-        output_plot = gr.Plot(label="PCA Visualization")
-        max_pairs = 10  # Maximum number of pairs
-        initial_pairs = 4  # Initial number of visible pairs
-        # Create the input pairs
-        for i in range(max_pairs):
-            with gr.Column(visible=(i < initial_pairs)) as pair_row:
                 id_input = gr.Textbox(label=f"Identifier {i+1}")
                 text_input = gr.Textbox(label=f"Text {i+1}")
-                gr.Markdown("---")  # Add a horizontal rule to create a break
-            identifier_inputs.append(id_input)
-            text_inputs.append(text_input)
-            pair_rows.append(pair_row)
-        # Function to add a new pair
-        def add_pair():
-            # Find the next invisible pair and make it visible
-            for pair_row in pair_rows:
-                if not pair_row.visible:
-                    return gr.update(visible=True, value=None, interactive=True, component=pair_row)
-            return None  # No more pairs to show
-        # Connect the add_pair function to the button
-        add_pair_btn.click(fn=add_pair, inputs=None, outputs=pair_rows)
         # Function to collect inputs and process them
         def collect_inputs(*args):
             data = []
             for i in range(0, len(args), 2):
                 identifier = args[i]
                 text = args[i+1]
                 data.append([identifier, text])
-            return compute_pca(*args)
-        # Combine all inputs
         inputs = []
         for id_input, text_input in zip(identifier_inputs, text_inputs):
             inputs.extend([id_input, text_input])
         analyze_button.click(fn=collect_inputs, inputs=inputs, outputs=output_plot)
     return demo
 # Launch the app
 text_editor_app().launch()

 import gradio as gr
+import numpy as np
 import pandas as pd
 import plotly.express as px
 from sklearn.decomposition import PCA
 model = SentenceTransformer('all-MiniLM-L6-v2')
 # Function to compute document embeddings and apply PCA
+# Modify the Gradio interface to accept a list of identifiers and texts
+def compute_pca(data):
+    # data is expected to be a list of dictionaries with 'Identifier' and 'Text' keys
     df = pd.DataFrame(data, columns=["Identifier", "Text"])
     # Remove rows where 'Identifier' or 'Text' is empty or contains only whitespace
     valid_entries = df[
+        (df['Identifier'].str.strip() != '') &
+        (df['Text'].str.strip() != '')
     ]
     if valid_entries.empty:
         return gr.Plot.update(value=None, label="No data to process. Please fill in the boxes.")
     # Generate embeddings
     embeddings = model.encode(valid_entries['Text'].tolist())
     # Plot the PCA result with identifiers as labels
     fig = px.scatter(valid_entries, x='PC1', y='PC2', text='Identifier', title='PCA of Text Embeddings')
     return fig
 def text_editor_app():
     with gr.Blocks() as demo:
         identifier_inputs = []
         text_inputs = []
         gr.Markdown("### Enter at least two identifier-text pairs:")
+        for i in range(4):  # Assuming we have 4 entries
+            with gr.Column():
                 id_input = gr.Textbox(label=f"Identifier {i+1}")
                 text_input = gr.Textbox(label=f"Text {i+1}")
+                identifier_inputs.append(id_input)
+                text_inputs.append(text_input)
+            gr.Markdown("---")  # Add a horizontal rule to create a break
+        # Button to run the analysis
+        analyze_button = gr.Button("Run Analysis")
+        # Output plot
+        output_plot = gr.Plot(label="PCA Visualization")
         # Function to collect inputs and process them
         def collect_inputs(*args):
+            # args will be identifier1, text1, identifier2, text2, ..., identifier4, text4
+            # So we need to pair them up
             data = []
             for i in range(0, len(args), 2):
                 identifier = args[i]
                 text = args[i+1]
                 data.append([identifier, text])
+            return compute_pca(data)
         inputs = []
         for id_input, text_input in zip(identifier_inputs, text_inputs):
             inputs.extend([id_input, text_input])
         analyze_button.click(fn=collect_inputs, inputs=inputs, outputs=output_plot)
     return demo
 # Launch the app
 text_editor_app().launch()