Spaces:

gloignon
/

textographe

Sleeping

App Files Files Community

gloignon commited on Sep 21, 2024

Commit

68dda59

verified ·

1 Parent(s): 6195096

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -15

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ from sentence_transformers import SentenceTransformer
 model = SentenceTransformer('all-MiniLM-L6-v2')
 # Function to calculate embeddings and PCA
-def compute_pca(texts):
     # Generate embeddings
     embeddings = model.encode(texts)
@@ -19,39 +19,50 @@ def compute_pca(texts):
     # Create DataFrame for visualization
     df = pd.DataFrame({
         'Text': texts,
         'PC1': pca_result[:, 0],
         'PC2': pca_result[:, 1]
     })
-    # Plot the PCA result
-    fig = px.scatter(df, x='PC1', y='PC2', text='Text', title='PCA of Text Embeddings')
     return fig
 # Define Gradio app layout and interactions
 def text_editor_app():
     with gr.Blocks() as demo:
-        # Text box to input texts
-        text_input = gr.Textbox(lines=10, placeholder="Enter or paste your texts here, one per line...", label="Text Inputs")
-        # Display the list of texts
-        texts = gr.Dataframe(headers=["Texts"], label="Text List", interactive=True)
-        # Button to process texts
         submit_button = gr.Button("Compute Embeddings and PCA")
         # Output plot
         output_plot = gr.Plot(label="PCA Visualization")
-        # Define button click interaction
-        def process_texts(text_input):
-            # Split input texts by newline
             text_list = text_input.strip().split('\n')
-            # Return a new Dataframe instance instead of updating the existing one
-            return [[t] for t in text_list]
-        submit_button.click(fn=lambda x: compute_pca([t[0] for t in x]), inputs=texts, outputs=output_plot)
-        text_input.change(fn=process_texts, inputs=text_input, outputs=texts)
     return demo

 model = SentenceTransformer('all-MiniLM-L6-v2')
 # Function to calculate embeddings and PCA
+def compute_pca(texts, ids):
     # Generate embeddings
     embeddings = model.encode(texts)
     # Create DataFrame for visualization
     df = pd.DataFrame({
+        'ID': ids,
         'Text': texts,
         'PC1': pca_result[:, 0],
         'PC2': pca_result[:, 1]
     })
+    # Plot the PCA result with identifiers as labels
+    fig = px.scatter(df, x='PC1', y='PC2', text='ID', title='PCA of Text Embeddings')
     return fig
 # Define Gradio app layout and interactions
 def text_editor_app():
     with gr.Blocks() as demo:
+        # Input fields for text and identifier
+        text_input = gr.Textbox(lines=5, placeholder="Enter or paste your texts here, one per line...", label="Text Inputs")
+        id_input = gr.Textbox(lines=5, placeholder="Enter an identifier for each text, one per line...", label="Identifier Inputs")
+        # Display the list of texts with identifiers
+        texts_df = gr.Dataframe(headers=["ID", "Text"], label="Text List with Identifiers", interactive=True)
+        # Button to process texts and identifiers
         submit_button = gr.Button("Compute Embeddings and PCA")
         # Output plot
         output_plot = gr.Plot(label="PCA Visualization")
+        # Function to process input texts and identifiers
+        def process_texts_and_ids(text_input, id_input):
+            # Split input texts and identifiers by newline
             text_list = text_input.strip().split('\n')
+            id_list = id_input.strip().split('\n')
+            # Ensure both lists are of equal length
+            if len(text_list) != len(id_list):
+                return gr.update(value=[], error="Number of texts and identifiers must match.")
+            # Return a new DataFrame instance with both text and identifiers
+            return [[id_list[i], text_list[i]] for i in range(len(text_list))]
+        # Define the button click interaction
+        submit_button.click(fn=lambda x: compute_pca([t[1] for t in x], [t[0] for t in x]), inputs=texts_df, outputs=output_plot)
+        # Update DataFrame with texts and identifiers
+        text_input.change(fn=process_texts_and_ids, inputs=[text_input, id_input], outputs=texts_df)
+        id_input.change(fn=process_texts_and_ids, inputs=[text_input, id_input], outputs=texts_df)
     return demo