textographe / app.py
gloignon's picture
Update app.py
bac87b6 verified
raw
history blame
2.33 kB
import gradio as gr
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.decomposition import PCA
from sentence_transformers import SentenceTransformer
# Load pre-trained sentence transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')
# Function to compute document embeddings and apply PCA
# Modify the Gradio interface to accept a list of identifiers and texts
def compute_pca(data):
# data is expected to be a list of dictionaries with 'Identifier' and 'Text' keys
df = pd.DataFrame(data)
# Remove rows where 'Identifier' or 'Text' is empty or contains only whitespace
valid_entries = df[
(df['Identifier'].str.strip() != '') &
(df['Text'].str.strip() != '')
]
if valid_entries.empty:
return gr.Plot.update(value=None, label="No data to process. Please fill in the boxes.")
# Generate embeddings
embeddings = model.encode(valid_entries['Text'].tolist())
# Perform PCA to reduce to 2 dimensions
pca = PCA(n_components=2)
pca_result = pca.fit_transform(embeddings)
# Add PCA results to the DataFrame
valid_entries = valid_entries.reset_index(drop=True)
valid_entries['PC1'] = pca_result[:, 0]
valid_entries['PC2'] = pca_result[:, 1]
# Plot the PCA result with identifiers as labels
fig = px.scatter(valid_entries, x='PC1', y='PC2', text='Identifier', title='PCA of Text Embeddings')
return fig
def text_editor_app():
with gr.Blocks() as demo:
# Use a DataFrame component for inputs with fixed columns
data_input = gr.Dataframe(
headers=["Identifier", "Text"],
datatype=["str", "str"],
row_count=(4, "dynamic"),
col_count=(2, "fixed"),
label="Input Data (Enter at least two texts)",
wrap=True,
placeholder="Please enter at least two identifier-text pairs."
)
# Button to run the analysis
analyze_button = gr.Button("Run Analysis")
# Output plot
output_plot = gr.Plot(label="PCA Visualization")
# Run analysis when the button is clicked
analyze_button.click(
fn=compute_pca, inputs=data_input, outputs=output_plot
)
return demo
# Launch the app
text_editor_app().launch()