import gradio as gr
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import plotly.express as px
import plotly.graph_objects as go
from collections import defaultdict

# Load model and tokenizer globally for efficiency
model_name = "tabularisai/multilingual-sentiment-analysis"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)


def predict_sentiment(texts):
    """
    Predict sentiment for a list of texts
    """
    inputs = tokenizer(texts, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
    sentiment_map = {
        0: "Very Negative",
        1: "Negative",
        2: "Neutral",
        3: "Positive",
        4: "Very Positive"
    }
    return [sentiment_map[p] for p in torch.argmax(probabilities, dim=-1).tolist()]


def process_single_sheet(df, product_name):
    """
    Process a single dataframe and return sentiment analysis results
    """
    if 'Reviews' not in df.columns:
        raise ValueError(f"'Reviews' column not found in sheet/file for {product_name}")

    reviews = df['Reviews'].fillna("")
    sentiments = predict_sentiment(reviews.tolist())
    df['Sentiment'] = sentiments

    # Calculate sentiment distribution
    sentiment_counts = pd.Series(sentiments).value_counts()

    return df, sentiment_counts


def create_comparison_charts(sentiment_results):
    """
    Create comparison charts for different products
    Returns two plotly figures: bar chart and pie chart
    """
    # Prepare data for plotting
    products = []
    sentiments = []
    counts = []

    for product, sentiment_counts in sentiment_results.items():
        for sentiment, count in sentiment_counts.items():
            products.append(product)
            sentiments.append(sentiment)
            counts.append(count)

    plot_df = pd.DataFrame({
        'Product': products,
        'Sentiment': sentiments,
        'Count': counts
    })

    # Create stacked bar chart
    bar_fig = px.bar(plot_df,
                     x='Product',
                     y='Count',
                     color='Sentiment',
                     title='Sentiment Distribution by Product',
                     labels={'Count': 'Number of Reviews'},
                     color_discrete_sequence=px.colors.qualitative.Set3)

    # Create pie chart for overall sentiment distribution
    pie_fig = px.pie(plot_df,
                     values='Count',
                     names='Sentiment',
                     title='Overall Sentiment Distribution',
                     color_discrete_sequence=px.colors.qualitative.Set3)

    # Create summary table
    summary_df = plot_df.pivot_table(
        values='Count',
        index='Product',
        columns='Sentiment',
        fill_value=0
    ).round(2)

    # Add total reviews column
    summary_df['Total Reviews'] = summary_df.sum(axis=1)

    # Calculate percentage of positive reviews (Positive + Very Positive)
    positive_cols = ['Positive', 'Very Positive']
    positive_cols = [col for col in positive_cols if col in summary_df.columns]
    summary_df['Positive Ratio'] = (summary_df[positive_cols].sum(axis=1) / summary_df['Total Reviews'] * 100).round(2)

    return bar_fig, pie_fig, summary_df


def process_file(file_obj):
    """
    Process the input file and add sentiment analysis results
    """
    try:
        file_path = file_obj.name
        sentiment_results = defaultdict(pd.Series)
        all_processed_dfs = {}

        if file_path.endswith('.csv'):
            # Process single CSV file
            df = pd.read_csv(file_path)
            product_name = "Product"  # Default name for CSV
            processed_df, sentiment_counts = process_single_sheet(df, product_name)
            all_processed_dfs[product_name] = processed_df
            sentiment_results[product_name] = sentiment_counts

        elif file_path.endswith(('.xlsx', '.xls')):
            # Process multiple sheets in Excel file
            excel_file = pd.ExcelFile(file_path)

            for sheet_name in excel_file.sheet_names:
                df = pd.read_excel(file_path, sheet_name=sheet_name)
                processed_df, sentiment_counts = process_single_sheet(df, sheet_name)
                all_processed_dfs[sheet_name] = processed_df
                sentiment_results[sheet_name] = sentiment_counts
        else:
            raise ValueError("Unsupported file format. Please upload a CSV or Excel file.")

        # Create visualizations
        bar_chart, pie_chart, summary_table = create_comparison_charts(sentiment_results)

        # Save results to a new Excel file
        output_path = "sentiment_analysis_results.xlsx"
        with pd.ExcelWriter(output_path) as writer:
            # Save processed data
            for sheet_name, df in all_processed_dfs.items():
                df.to_excel(writer, sheet_name=sheet_name, index=False)

            # Save summary
            summary_table.to_excel(writer, sheet_name='Summary', index=True)

        return (
            bar_chart,
            pie_chart,
            summary_table,
            output_path
        )

    except Exception as e:
        raise gr.Error(str(e))


# Create Gradio interface
with gr.Blocks() as interface:
    gr.Markdown("# Multi-Product Review Sentiment Analysis")
    gr.Markdown("""
    Upload a file to analyze sentiments:
    - For CSV: Single product reviews with 'Reviews' column
    - For Excel: Multiple sheets, each named after the product, with 'Reviews' column
    """)

    with gr.Row():
        file_input = gr.File(
            label="Upload File (CSV or Excel)",
            file_types=[".csv", ".xlsx", ".xls"]
        )

    with gr.Row():
        analyze_btn = gr.Button("Analyze Sentiments")

    with gr.Row():
        bar_plot = gr.Plot(label="Sentiment Distribution by Product")
        pie_plot = gr.Plot(label="Overall Sentiment Distribution")

    with gr.Row():
        summary_table = gr.Dataframe(label="Summary Statistics")

    with gr.Row():
        output_file = gr.File(label="Download Detailed Results")

    analyze_btn.click(
        fn=process_file,
        inputs=[file_input],
        outputs=[bar_plot, pie_plot, summary_table, output_file]
    )

# Launch the interface
interface.launch()