Spaces:

mxiean
/

G10_TripAdvisor

Sleeping

File size: 6,781 Bytes

import streamlit as st
from transformers import pipeline
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import pandas as pd
from datetime import datetime

# Constants
RATING_MAP = {
    0: "Negative (⭐)",
    1: "Neutral (⭐⭐)",
    2: "Positive (⭐⭐⭐)"
}

@st.cache_resource
def load_models():
    sentiment_model = pipeline(
        "text-classification", 
        model="AndrewLi403/CustomModel_tripadvisor_finetuned"
    )
    ner_model = pipeline("ner", model="dslim/bert-base-NER")
    return sentiment_model, ner_model

def analyze_sentiment(text, model):
    result = model(text)[0]
    rating = int(result['label'].split('_')[-1])
    return {
        'rating': rating,
        'label': RATING_MAP[rating],
        'score': result['score']
    }

def extract_aspects(text, model):
    entities = model(text)
    aspects = []
    current_entity = ""
    
    for entity in entities:
        if entity['word'].startswith('##'):
            current_entity += entity['word'][2:]
        else:
            if current_entity:
                aspects.append({
                    'entity': current_entity,
                    'type': prev_type
                })
            current_entity = entity['word']
            prev_type = entity['entity']
    
    if current_entity:
        aspects.append({
            'entity': current_entity,
            'type': prev_type
        })
    
    return [a for a in aspects if a['type'] in ['PRODUCT', 'ORG', 'PERSON']]

def plot_sentiment_distribution(df):
    fig, ax = plt.subplots()
    df['label'].value_counts().loc[list(RATING_MAP.values())].plot.pie(
        autopct='%1.1f%%',
        colors=['#ff9999','#66b3ff','#99ff99'],
        ax=ax
    )
    ax.set_ylabel('')
    return fig

def plot_wordcloud(negative_reviews):
    text = " ".join(negative_reviews)
    wordcloud = WordCloud(
        width=800, 
        height=400,
        background_color='white',
        colormap='Reds'
    ).generate(text)
    
    fig, ax = plt.subplots(figsize=(10, 5))
    ax.imshow(wordcloud, interpolation='bilinear')
    ax.axis('off')
    return fig

def main():
    st.title("Restaurant Review Analyzer")
    st.markdown("Using fine-tuned model for sentiment and aspect analysis")
    
    sentiment_model, ner_model = load_models()
    
    st.sidebar.header("Analysis Options")
    analysis_mode = st.sidebar.radio(
        "Select Mode",
        ["Single Review", "Batch Analysis"]
    )
    
    if 'history' not in st.session_state:
        st.session_state.history = pd.DataFrame(
            columns=['text', 'rating', 'label', 'date', 'aspects']
        )
    
    if analysis_mode == "Single Review":
        user_input = st.text_area("Enter or paste a restaurant review:", height=150)
        
        if st.button("Analyze"):
            if user_input:
                with st.spinner("Analyzing..."):
                    sentiment = analyze_sentiment(user_input, sentiment_model)
                    aspects = extract_aspects(user_input, ner_model)
                    
                    new_entry = pd.DataFrame([{
                        'text': user_input,
                        'rating': sentiment['rating'],
                        'label': sentiment['label'],
                        'date': datetime.now(),
                        'aspects': aspects
                    }])
                    st.session_state.history = pd.concat(
                        [st.session_state.history, new_entry],
                        ignore_index=True
                    )
                    
                    st.subheader("Analysis Results")
                    col1, col2 = st.columns(2)
                    with col1:
                        st.metric("Rating", sentiment['label'])
                    with col2:
                        st.metric("Confidence", f"{sentiment['score']:.2f}")
                    
                    if aspects:
                        st.subheader("Identified Aspects")
                        for aspect in aspects:
                            st.markdown(f"- **{aspect['type']}**: `{aspect['entity']}`)
                    else:
                        st.info("No specific entities identified")
            else:
                st.warning("Please enter a review")
    
    else:
        uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
        
        if uploaded_file:
            df = pd.read_csv(uploaded_file)
            if 'text' not in df.columns:
                st.error("CSV must contain 'text' column")
            else:
                if st.button("Analyze All"):
                    progress_bar = st.progress(0)
                    results = []
                    
                    for i, row in enumerate(df.itertuples()):
                        sentiment = analyze_sentiment(row.text, sentiment_model)
                        aspects = extract_aspects(row.text, ner_model)
                        
                        results.append({
                            'text': row.text,
                            'rating': sentiment['rating'],
                            'label': sentiment['label'],
                            'date': datetime.now(),
                            'aspects': aspects
                        })
                        
                        progress_bar.progress((i + 1) / len(df))
                    
                    st.session_state.history = pd.concat(
                        [st.session_state.history, pd.DataFrame(results)],
                        ignore_index=True
                    )
                    st.success(f"Completed analysis of {len(df)} reviews")
    
    if not st.session_state.history.empty:
        st.divider()
        st.header("Analysis History")
        
        with st.expander("View Raw Data"):
            st.dataframe(st.session_state.history)
        
        st.subheader("Sentiment Distribution")
        fig1 = plot_sentiment_distribution(st.session_state.history)
        st.pyplot(fig1)
        
        negative_reviews = st.session_state.history[
            st.session_state.history['rating'] == 0
        ]['text'].tolist()
        
        if negative_reviews:
            st.subheader("Negative Reviews Word Cloud")
            fig2 = plot_wordcloud(negative_reviews)
            st.pyplot(fig2)
        else:
            st.info("No negative reviews yet")
        
        if len(st.session_state.history) > 1:
            st.subheader("Rating Trend Over Time")
            time_df = st.session_state.history.copy()
            time_df['date'] = pd.to_datetime(time_df['date'])
            time_df = time_df.set_index('date').resample('D')['rating'].mean()
            st.line_chart(time_df)

if __name__ == "__main__":
    main()