gloignon commited on
Commit
68dda59
·
verified ·
1 Parent(s): 6195096

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -15
app.py CHANGED
@@ -9,7 +9,7 @@ from sentence_transformers import SentenceTransformer
9
  model = SentenceTransformer('all-MiniLM-L6-v2')
10
 
11
  # Function to calculate embeddings and PCA
12
- def compute_pca(texts):
13
  # Generate embeddings
14
  embeddings = model.encode(texts)
15
 
@@ -19,39 +19,50 @@ def compute_pca(texts):
19
 
20
  # Create DataFrame for visualization
21
  df = pd.DataFrame({
 
22
  'Text': texts,
23
  'PC1': pca_result[:, 0],
24
  'PC2': pca_result[:, 1]
25
  })
26
 
27
- # Plot the PCA result
28
- fig = px.scatter(df, x='PC1', y='PC2', text='Text', title='PCA of Text Embeddings')
29
  return fig
30
 
31
  # Define Gradio app layout and interactions
32
  def text_editor_app():
33
  with gr.Blocks() as demo:
34
- # Text box to input texts
35
- text_input = gr.Textbox(lines=10, placeholder="Enter or paste your texts here, one per line...", label="Text Inputs")
 
36
 
37
- # Display the list of texts
38
- texts = gr.Dataframe(headers=["Texts"], label="Text List", interactive=True)
39
 
40
- # Button to process texts
41
  submit_button = gr.Button("Compute Embeddings and PCA")
42
 
43
  # Output plot
44
  output_plot = gr.Plot(label="PCA Visualization")
45
 
46
- # Define button click interaction
47
- def process_texts(text_input):
48
- # Split input texts by newline
49
  text_list = text_input.strip().split('\n')
50
- # Return a new Dataframe instance instead of updating the existing one
51
- return [[t] for t in text_list]
 
 
 
 
 
 
52
 
53
- submit_button.click(fn=lambda x: compute_pca([t[0] for t in x]), inputs=texts, outputs=output_plot)
54
- text_input.change(fn=process_texts, inputs=text_input, outputs=texts)
 
 
 
55
 
56
  return demo
57
 
 
9
  model = SentenceTransformer('all-MiniLM-L6-v2')
10
 
11
  # Function to calculate embeddings and PCA
12
+ def compute_pca(texts, ids):
13
  # Generate embeddings
14
  embeddings = model.encode(texts)
15
 
 
19
 
20
  # Create DataFrame for visualization
21
  df = pd.DataFrame({
22
+ 'ID': ids,
23
  'Text': texts,
24
  'PC1': pca_result[:, 0],
25
  'PC2': pca_result[:, 1]
26
  })
27
 
28
+ # Plot the PCA result with identifiers as labels
29
+ fig = px.scatter(df, x='PC1', y='PC2', text='ID', title='PCA of Text Embeddings')
30
  return fig
31
 
32
  # Define Gradio app layout and interactions
33
  def text_editor_app():
34
  with gr.Blocks() as demo:
35
+ # Input fields for text and identifier
36
+ text_input = gr.Textbox(lines=5, placeholder="Enter or paste your texts here, one per line...", label="Text Inputs")
37
+ id_input = gr.Textbox(lines=5, placeholder="Enter an identifier for each text, one per line...", label="Identifier Inputs")
38
 
39
+ # Display the list of texts with identifiers
40
+ texts_df = gr.Dataframe(headers=["ID", "Text"], label="Text List with Identifiers", interactive=True)
41
 
42
+ # Button to process texts and identifiers
43
  submit_button = gr.Button("Compute Embeddings and PCA")
44
 
45
  # Output plot
46
  output_plot = gr.Plot(label="PCA Visualization")
47
 
48
+ # Function to process input texts and identifiers
49
+ def process_texts_and_ids(text_input, id_input):
50
+ # Split input texts and identifiers by newline
51
  text_list = text_input.strip().split('\n')
52
+ id_list = id_input.strip().split('\n')
53
+
54
+ # Ensure both lists are of equal length
55
+ if len(text_list) != len(id_list):
56
+ return gr.update(value=[], error="Number of texts and identifiers must match.")
57
+
58
+ # Return a new DataFrame instance with both text and identifiers
59
+ return [[id_list[i], text_list[i]] for i in range(len(text_list))]
60
 
61
+ # Define the button click interaction
62
+ submit_button.click(fn=lambda x: compute_pca([t[1] for t in x], [t[0] for t in x]), inputs=texts_df, outputs=output_plot)
63
+ # Update DataFrame with texts and identifiers
64
+ text_input.change(fn=process_texts_and_ids, inputs=[text_input, id_input], outputs=texts_df)
65
+ id_input.change(fn=process_texts_and_ids, inputs=[text_input, id_input], outputs=texts_df)
66
 
67
  return demo
68