Spaces:

gloignon
/

textographe

Sleeping

App Files Files Community

gloignon commited on Sep 21, 2024

Commit

1b5bc8f

verified ·

1 Parent(s): 1014197

Trying new generic method

Browse files

Files changed (1) hide show

app.py +17 -36

app.py CHANGED Viewed

@@ -9,64 +9,45 @@ from sentence_transformers import SentenceTransformer
 model = SentenceTransformer('all-MiniLM-L6-v2')
 # Function to compute document embeddings and apply PCA
-def compute_pca(id1, text1, id2, text2, id3, text3, id4, text4):
-    # Collect identifiers and texts into lists
-    identifiers = [id1, id2, id3, id4]
-    texts = [text1, text2, text3, text4]
-    # Filter out any empty inputs
-    valid_entries = [(id, text) for id, text in zip(identifiers, texts) if id and text]
-    if not valid_entries:
         return gr.Plot.update(value=None, label="No data to process. Please fill in the boxes.")
-    # Unzip identifiers and texts
-    identifiers, texts = zip(*valid_entries)
     # Generate embeddings
-    embeddings = model.encode(texts)
     # Perform PCA to reduce to 2 dimensions
     pca = PCA(n_components=2)
     pca_result = pca.fit_transform(embeddings)
-    # Create DataFrame for visualization
-    result_df = pd.DataFrame({
-        'Identifier': identifiers,
-        'PC1': pca_result[:, 0],
-        'PC2': pca_result[:, 1]
-    })
     # Plot the PCA result with identifiers as labels
-    fig = px.scatter(result_df, x='PC1', y='PC2', text='Identifier', title='PCA of Text Embeddings')
     return fig
-# Gradio interface
 def text_editor_app():
     with gr.Blocks() as demo:
-        # Input boxes for four identifier-text pairs
-        with gr.Row():
-            id1 = gr.Textbox(label="Identifier 1")
-            text1 = gr.Textbox(label="Text 1")
-        with gr.Row():
-            id2 = gr.Textbox(label="Identifier 2")
-            text2 = gr.Textbox(label="Text 2")
-        with gr.Row():
-            id3 = gr.Textbox(label="Identifier 3")
-            text3 = gr.Textbox(label="Text 3")
-        with gr.Row():
-            id4 = gr.Textbox(label="Identifier 4")
-            text4 = gr.Textbox(label="Text 4")
         # Button to run the analysis
         analyze_button = gr.Button("Run Analysis")
         # Output plot
         output_plot = gr.Plot(label="PCA Visualization")
         # Run analysis when the button is clicked
-        analyze_button.click(fn=compute_pca, inputs=[id1, text1, id2, text2, id3, text3, id4, text4], outputs=output_plot)
     return demo
 # Launch the app
 text_editor_app().launch()

 model = SentenceTransformer('all-MiniLM-L6-v2')
 # Function to compute document embeddings and apply PCA
+# Modify the Gradio interface to accept a list of identifiers and texts
+def compute_pca(data):
+    # data is expected to be a list of dictionaries with 'Identifier' and 'Text' keys
+    df = pd.DataFrame(data)
+    valid_entries = df.dropna()
+    if valid_entries.empty:
         return gr.Plot.update(value=None, label="No data to process. Please fill in the boxes.")
     # Generate embeddings
+    embeddings = model.encode(valid_entries['Text'].tolist())
     # Perform PCA to reduce to 2 dimensions
     pca = PCA(n_components=2)
     pca_result = pca.fit_transform(embeddings)
+    # Add PCA results to the DataFrame
+    valid_entries['PC1'] = pca_result[:, 0]
+    valid_entries['PC2'] = pca_result[:, 1]
     # Plot the PCA result with identifiers as labels
+    fig = px.scatter(valid_entries, x='PC1', y='PC2', text='Identifier', title='PCA of Text Embeddings')
     return fig
 def text_editor_app():
     with gr.Blocks() as demo:
+        # Use a DataFrame component for inputs
+        data_input = gr.Dataframe(headers=["Identifier", "Text"], datatype=["str", "str"], label="Input Data")
         # Button to run the analysis
         analyze_button = gr.Button("Run Analysis")
         # Output plot
         output_plot = gr.Plot(label="PCA Visualization")
         # Run analysis when the button is clicked
+        analyze_button.click(fn=compute_pca, inputs=[data_input], outputs=output_plot)
     return demo
 # Launch the app
 text_editor_app().launch()