Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,7 @@ from sentence_transformers import SentenceTransformer
|
|
9 |
model = SentenceTransformer('all-MiniLM-L6-v2')
|
10 |
|
11 |
# Function to calculate embeddings and PCA
|
12 |
-
def compute_pca(texts):
|
13 |
# Generate embeddings
|
14 |
embeddings = model.encode(texts)
|
15 |
|
@@ -19,39 +19,50 @@ def compute_pca(texts):
|
|
19 |
|
20 |
# Create DataFrame for visualization
|
21 |
df = pd.DataFrame({
|
|
|
22 |
'Text': texts,
|
23 |
'PC1': pca_result[:, 0],
|
24 |
'PC2': pca_result[:, 1]
|
25 |
})
|
26 |
|
27 |
-
# Plot the PCA result
|
28 |
-
fig = px.scatter(df, x='PC1', y='PC2', text='
|
29 |
return fig
|
30 |
|
31 |
# Define Gradio app layout and interactions
|
32 |
def text_editor_app():
|
33 |
with gr.Blocks() as demo:
|
34 |
-
#
|
35 |
-
text_input = gr.Textbox(lines=
|
|
|
36 |
|
37 |
-
# Display the list of texts
|
38 |
-
|
39 |
|
40 |
-
# Button to process texts
|
41 |
submit_button = gr.Button("Compute Embeddings and PCA")
|
42 |
|
43 |
# Output plot
|
44 |
output_plot = gr.Plot(label="PCA Visualization")
|
45 |
|
46 |
-
#
|
47 |
-
def
|
48 |
-
# Split input texts by newline
|
49 |
text_list = text_input.strip().split('\n')
|
50 |
-
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
55 |
|
56 |
return demo
|
57 |
|
|
|
9 |
model = SentenceTransformer('all-MiniLM-L6-v2')
|
10 |
|
11 |
# Function to calculate embeddings and PCA
|
12 |
+
def compute_pca(texts, ids):
|
13 |
# Generate embeddings
|
14 |
embeddings = model.encode(texts)
|
15 |
|
|
|
19 |
|
20 |
# Create DataFrame for visualization
|
21 |
df = pd.DataFrame({
|
22 |
+
'ID': ids,
|
23 |
'Text': texts,
|
24 |
'PC1': pca_result[:, 0],
|
25 |
'PC2': pca_result[:, 1]
|
26 |
})
|
27 |
|
28 |
+
# Plot the PCA result with identifiers as labels
|
29 |
+
fig = px.scatter(df, x='PC1', y='PC2', text='ID', title='PCA of Text Embeddings')
|
30 |
return fig
|
31 |
|
32 |
# Define Gradio app layout and interactions
|
33 |
def text_editor_app():
|
34 |
with gr.Blocks() as demo:
|
35 |
+
# Input fields for text and identifier
|
36 |
+
text_input = gr.Textbox(lines=5, placeholder="Enter or paste your texts here, one per line...", label="Text Inputs")
|
37 |
+
id_input = gr.Textbox(lines=5, placeholder="Enter an identifier for each text, one per line...", label="Identifier Inputs")
|
38 |
|
39 |
+
# Display the list of texts with identifiers
|
40 |
+
texts_df = gr.Dataframe(headers=["ID", "Text"], label="Text List with Identifiers", interactive=True)
|
41 |
|
42 |
+
# Button to process texts and identifiers
|
43 |
submit_button = gr.Button("Compute Embeddings and PCA")
|
44 |
|
45 |
# Output plot
|
46 |
output_plot = gr.Plot(label="PCA Visualization")
|
47 |
|
48 |
+
# Function to process input texts and identifiers
|
49 |
+
def process_texts_and_ids(text_input, id_input):
|
50 |
+
# Split input texts and identifiers by newline
|
51 |
text_list = text_input.strip().split('\n')
|
52 |
+
id_list = id_input.strip().split('\n')
|
53 |
+
|
54 |
+
# Ensure both lists are of equal length
|
55 |
+
if len(text_list) != len(id_list):
|
56 |
+
return gr.update(value=[], error="Number of texts and identifiers must match.")
|
57 |
+
|
58 |
+
# Return a new DataFrame instance with both text and identifiers
|
59 |
+
return [[id_list[i], text_list[i]] for i in range(len(text_list))]
|
60 |
|
61 |
+
# Define the button click interaction
|
62 |
+
submit_button.click(fn=lambda x: compute_pca([t[1] for t in x], [t[0] for t in x]), inputs=texts_df, outputs=output_plot)
|
63 |
+
# Update DataFrame with texts and identifiers
|
64 |
+
text_input.change(fn=process_texts_and_ids, inputs=[text_input, id_input], outputs=texts_df)
|
65 |
+
id_input.change(fn=process_texts_and_ids, inputs=[text_input, id_input], outputs=texts_df)
|
66 |
|
67 |
return demo
|
68 |
|