import gradio as gr import pandas as pd import torch from transformers import AutoTokenizer, AutoModelForSequenceClassification import plotly.express as px import plotly.graph_objects as go from collections import defaultdict # Load model and tokenizer globally for efficiency model_name = "tabularisai/multilingual-sentiment-analysis" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) def predict_sentiment(texts): """ Predict sentiment for a list of texts """ inputs = tokenizer(texts, return_tensors="pt", truncation=True, padding=True, max_length=512) with torch.no_grad(): outputs = model(**inputs) probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1) sentiment_map = { 0: "Very Negative", 1: "Negative", 2: "Neutral", 3: "Positive", 4: "Very Positive" } return [sentiment_map[p] for p in torch.argmax(probabilities, dim=-1).tolist()] def process_single_sheet(df, product_name): """ Process a single dataframe and return sentiment analysis results """ if 'Reviews' not in df.columns: raise ValueError(f"'Reviews' column not found in sheet/file for {product_name}") reviews = df['Reviews'].fillna("") sentiments = predict_sentiment(reviews.tolist()) df['Sentiment'] = sentiments # Calculate sentiment distribution sentiment_counts = pd.Series(sentiments).value_counts() return df, sentiment_counts def create_comparison_charts(sentiment_results): """ Create comparison charts for different products Returns two plotly figures: bar chart and pie chart """ # Prepare data for plotting products = [] sentiments = [] counts = [] for product, sentiment_counts in sentiment_results.items(): for sentiment, count in sentiment_counts.items(): products.append(product) sentiments.append(sentiment) counts.append(count) plot_df = pd.DataFrame({ 'Product': products, 'Sentiment': sentiments, 'Count': counts }) # Create stacked bar chart bar_fig = px.bar(plot_df, x='Product', y='Count', color='Sentiment', title='Sentiment Distribution by Product', labels={'Count': 'Number of Reviews'}, color_discrete_sequence=px.colors.qualitative.Set3) # Create pie chart for overall sentiment distribution pie_fig = px.pie(plot_df, values='Count', names='Sentiment', title='Overall Sentiment Distribution', color_discrete_sequence=px.colors.qualitative.Set3) # Create summary table summary_df = plot_df.pivot_table( values='Count', index='Product', columns='Sentiment', fill_value=0 ).round(2) # Add total reviews column summary_df['Total Reviews'] = summary_df.sum(axis=1) # Calculate percentage of positive reviews (Positive + Very Positive) positive_cols = ['Positive', 'Very Positive'] positive_cols = [col for col in positive_cols if col in summary_df.columns] summary_df['Positive Ratio'] = (summary_df[positive_cols].sum(axis=1) / summary_df['Total Reviews'] * 100).round(2) return bar_fig, pie_fig, summary_df def process_file(file_obj): """ Process the input file and add sentiment analysis results """ try: file_path = file_obj.name sentiment_results = defaultdict(pd.Series) all_processed_dfs = {} if file_path.endswith('.csv'): # Process single CSV file df = pd.read_csv(file_path) product_name = "Product" # Default name for CSV processed_df, sentiment_counts = process_single_sheet(df, product_name) all_processed_dfs[product_name] = processed_df sentiment_results[product_name] = sentiment_counts elif file_path.endswith(('.xlsx', '.xls')): # Process multiple sheets in Excel file excel_file = pd.ExcelFile(file_path) for sheet_name in excel_file.sheet_names: df = pd.read_excel(file_path, sheet_name=sheet_name) processed_df, sentiment_counts = process_single_sheet(df, sheet_name) all_processed_dfs[sheet_name] = processed_df sentiment_results[sheet_name] = sentiment_counts else: raise ValueError("Unsupported file format. Please upload a CSV or Excel file.") # Create visualizations bar_chart, pie_chart, summary_table = create_comparison_charts(sentiment_results) # Save results to a new Excel file output_path = "sentiment_analysis_results.xlsx" with pd.ExcelWriter(output_path) as writer: # Save processed data for sheet_name, df in all_processed_dfs.items(): df.to_excel(writer, sheet_name=sheet_name, index=False) # Save summary summary_table.to_excel(writer, sheet_name='Summary', index=True) return ( bar_chart, pie_chart, summary_table, output_path ) except Exception as e: raise gr.Error(str(e)) # Create Gradio interface with gr.Blocks() as interface: gr.Markdown("# Multi-Product Review Sentiment Analysis") gr.Markdown(""" Upload a file to analyze sentiments: - For CSV: Single product reviews with 'Reviews' column - For Excel: Multiple sheets, each named after the product, with 'Reviews' column """) with gr.Row(): file_input = gr.File( label="Upload File (CSV or Excel)", file_types=[".csv", ".xlsx", ".xls"] ) with gr.Row(): analyze_btn = gr.Button("Analyze Sentiments") with gr.Row(): bar_plot = gr.Plot(label="Sentiment Distribution by Product") pie_plot = gr.Plot(label="Overall Sentiment Distribution") with gr.Row(): summary_table = gr.Dataframe(label="Summary Statistics") with gr.Row(): output_file = gr.File(label="Download Detailed Results") analyze_btn.click( fn=process_file, inputs=[file_input], outputs=[bar_plot, pie_plot, summary_table, output_file] ) # Launch the interface interface.launch()