Spaces:
Sleeping
Sleeping
File size: 2,341 Bytes
f4d46f6 250aae7 1b5bc8f 7fc6901 1b5bc8f aafdbf2 f4d46f6 1b5bc8f f4d46f6 250aae7 f4d46f6 1b5bc8f 7fc6901 1b5bc8f f4d46f6 68dda59 1b5bc8f f4d46f6 47c5583 807ad31 250aae7 f4d46f6 807ad31 47c5583 807ad31 f4d46f6 1b5bc8f 47c5583 f4d46f6 1014197 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import gradio as gr
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.decomposition import PCA
from sentence_transformers import SentenceTransformer
# Load pre-trained sentence transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')
# Function to compute document embeddings and apply PCA
# Modify the Gradio interface to accept a list of identifiers and texts
def compute_pca(data):
# data is expected to be a list of dictionaries with 'Identifier' and 'Text' keys
df = pd.DataFrame(data)
# Remove rows where 'Identifier' or 'Text' is empty or contains only whitespace
valid_entries = df[
(df['Identifier'].str.strip() != '') &
(df['Text'].str.strip() != '')
]
if valid_entries.empty:
return gr.Plot.update(value=None, label="No data to process. Please fill in the boxes.")
# Generate embeddings
embeddings = model.encode(valid_entries['Text'].tolist())
# Perform PCA to reduce to 2 dimensions
pca = PCA(n_components=2)
pca_result = pca.fit_transform(embeddings)
# Add PCA results to the DataFrame
valid_entries = valid_entries.reset_index(drop=True)
valid_entries['PC1'] = pca_result[:, 0]
valid_entries['PC2'] = pca_result[:, 1]
# Plot the PCA result with identifiers as labels
fig = px.scatter(valid_entries, x='PC1', y='PC2', text='Identifier', title='PCA of Text Embeddings')
return fig
def text_editor_app():
with gr.Blocks() as demo:
identifiers = []
texts = []
with gr.Row():
for i in range(4): # Assuming 4 entries
with gr.Column():
id_input = gr.Textbox(label=f"Identifier {i+1}")
text_input = gr.Textbox(label=f"Text {i+1}")
identifiers.append(id_input)
texts.append(text_input)
analyze_button = gr.Button("Run Analysis")
output_plot = gr.Plot(label="PCA Visualization")
def collect_inputs(*args):
data = list(zip(args[:4], args[4:])) # Pair identifiers and texts
return compute_pca(data)
inputs = identifiers + texts
analyze_button.click(fn=collect_inputs, inputs=inputs, outputs=output_plot)
return demo
# Launch the app
text_editor_app().launch()
|