Spaces:

mxiean
/

G10_TripAdvisor

Sleeping

App Files Files Community

mxiean commited on Mar 27

Commit

49fc581

verified ·

1 Parent(s): a80b27b

Create app.py

Browse files

Files changed (1) hide show

app.py +219 -0

app.py ADDED Viewed

	@@ -0,0 +1,219 @@

+import streamlit as st
+from transformers import pipeline
+import matplotlib.pyplot as plt
+from wordcloud import WordCloud
+import pandas as pd
+from datetime import datetime
+# Constants
+RATING_MAP = {
+    0: "Negative (⭐)",
+    1: "Neutral (⭐⭐)",
+    2: "Positive (⭐⭐⭐)"
+}
+# Load models
+@st.cache_resource
+def load_models():
+    sentiment_model = pipeline(
+        "text-classification",
+        model="AndrewLi403/CustomModel_tripadvisor_finetuned"
+    )
+    ner_model = pipeline("ner", model="dslim/bert-base-NER")
+    return sentiment_model, ner_model
+# Sentiment analysis
+def analyze_sentiment(text, model):
+    result = model(text)[0]
+    rating = int(result['label'].split('_')[-1])  # Get 0, 1, or 2
+    return {
+        'rating': rating,
+        'label': RATING_MAP[rating],
+        'score': result['score']
+    }
+# Entity extraction
+def extract_aspects(text, model):
+    entities = model(text)
+    aspects = []
+    current_entity = ""
+    # Merge subword tokens
+    for entity in entities:
+        if entity['word'].startswith('##'):
+            current_entity += entity['word'][2:]
+        else:
+            if current_entity:
+                aspects.append({
+                    'entity': current_entity,
+                    'type': prev_type
+                })
+            current_entity = entity['word']
+            prev_type = entity['entity']
+    if current_entity:
+        aspects.append({
+            'entity': current_entity,
+            'type': prev_type
+        })
+    return [a for a in aspects if a['type'] in ['PRODUCT', 'ORG', 'PERSON']]
+# Visualization functions
+def plot_sentiment_distribution(df):
+    fig, ax = plt.subplots()
+    df['label'].value_counts().loc[list(RATING_MAP.values())].plot.pie(
+        autopct='%1.1f%%',
+        colors=['#ff9999','#66b3ff','#99ff99'],
+        ax=ax
+    )
+    ax.set_ylabel('')
+    return fig
+def plot_wordcloud(negative_reviews):
+    text = " ".join(negative_reviews)
+    wordcloud = WordCloud(
+        width=800,
+        height=400,
+        background_color='white',
+        colormap='Reds'
+    ).generate(text)
+    fig, ax = plt.subplots(figsize=(10, 5))
+    ax.imshow(wordcloud, interpolation='bilinear')
+    ax.axis('off')
+    return fig
+# Main app
+def main():
+    st.title("Restaurant Review Analyzer")
+    st.markdown("Using fine-tuned model for sentiment and aspect analysis")
+    # Initialize models
+    sentiment_model, ner_model = load_models()
+    # Sidebar controls
+    st.sidebar.header("Analysis Options")
+    analysis_mode = st.sidebar.radio(
+        "Select Mode",
+        ["Single Review", "Batch Analysis"]
+    )
+    # Initialize session state
+    if 'history' not in st.session_state:
+        st.session_state.history = pd.DataFrame(
+            columns=['text', 'rating', 'label', 'date', 'aspects']
+        )
+    if analysis_mode == "Single Review":
+        # Single review analysis
+        user_input = st.text_area("Enter or paste a restaurant review:", height=150)
+        if st.button("Analyze"):
+            if user_input:
+                with st.spinner("Analyzing..."):
+                    # Sentiment analysis
+                    sentiment = analyze_sentiment(user_input, sentiment_model)
+                    # Aspect extraction
+                    aspects = extract_aspects(user_input, ner_model)
+                    # Save to history
+                    new_entry = pd.DataFrame([{
+                        'text': user_input,
+                        'rating': sentiment['rating'],
+                        'label': sentiment['label'],
+                        'date': datetime.now(),
+                        'aspects': aspects
+                    }])
+                    st.session_state.history = pd.concat(
+                        [st.session_state.history, new_entry],
+                        ignore_index=True
+                    )
+                    # Display results
+                    st.subheader("Analysis Results")
+                    col1, col2 = st.columns(2)
+                    with col1:
+                        st.metric("Rating", sentiment['label'])
+                    with col2:
+                        st.metric("Confidence", f"{sentiment['score']:.2f}")
+                    if aspects:
+                        st.subheader("Identified Aspects")
+                        for aspect in aspects:
+                            st.markdown(f"- **{aspect['type']}**: `{aspect['entity']}`)
+                    else:
+                        st.info("No specific entities identified")
+            else:
+                st.warning("Please enter a review")
+    else:
+        # Batch analysis mode
+        uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
+        if uploaded_file:
+            df = pd.read_csv(uploaded_file)
+            if 'text' not in df.columns:
+                st.error("CSV must contain 'text' column")
+            else:
+                if st.button("Analyze All"):
+                    progress_bar = st.progress(0)
+                    results = []
+                    for i, row in enumerate(df.itertuples()):
+                        sentiment = analyze_sentiment(row.text, sentiment_model)
+                        aspects = extract_aspects(row.text, ner_model)
+                        results.append({
+                            'text': row.text,
+                            'rating': sentiment['rating'],
+                            'label': sentiment['label'],
+                            'date': datetime.now(),
+                            'aspects': aspects
+                        })
+                        progress_bar.progress((i + 1) / len(df))
+                    st.session_state.history = pd.concat(
+                        [st.session_state.history, pd.DataFrame(results)],
+                        ignore_index=True
+                    )
+                    st.success(f"Completed analysis of {len(df)} reviews")
+    # Display historical data and visualizations
+    if not st.session_state.history.empty:
+        st.divider()
+        st.header("Analysis History")
+        # Raw data display
+        with st.expander("View Raw Data"):
+            st.dataframe(st.session_state.history)
+        # Visualizations
+        st.subheader("Sentiment Distribution")
+        fig1 = plot_sentiment_distribution(st.session_state.history)
+        st.pyplot(fig1)
+        # Negative reviews word cloud
+        negative_reviews = st.session_state.history[
+            st.session_state.history['rating'] == 0
+        ]['text'].tolist()
+        if negative_reviews:
+            st.subheader("Negative Reviews Word Cloud")
+            fig2 = plot_wordcloud(negative_reviews)
+            st.pyplot(fig2)
+        else:
+            st.info("No negative reviews yet")
+        # Time trend analysis
+        if len(st.session_state.history) > 1:
+            st.subheader("Rating Trend Over Time")
+            time_df = st.session_state.history.copy()
+            time_df['date'] = pd.to_datetime(time_df['date'])
+            time_df = time_df.set_index('date').resample('D')['rating'].mean()
+            st.line_chart(time_df)
+if __name__ == "__main__":
+    main()