Spaces:

mxiean
/

G10_TripAdvisor

Sleeping

App Files Files Community

mxiean commited on Mar 27

Commit

5073adc

verified ·

1 Parent(s): db24cbc

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -107

app.py CHANGED Viewed

@@ -1,16 +1,7 @@
 import streamlit as st
 from transformers import pipeline
-import matplotlib.pyplot as plt
-from wordcloud import WordCloud
 import pandas as pd
 from datetime import datetime
-from collections import Counter
-import re
-from nltk.corpus import stopwords
-import nltk
-# Download NLTK stopwords (first-time only)
-nltk.download('stopwords')
 # Constants
 RATING_MAP = {
@@ -21,125 +12,70 @@ RATING_MAP = {
 @st.cache_resource
 def load_models():
     sentiment_model = pipeline(
         "text-classification",
         model="AndrewLi403/CustomModel_tripadvisor_finetuned"
     )
-    return sentiment_model
-def preprocess_text(text):
-    """Clean and tokenize English text"""
-    # Convert to lowercase
-    text = text.lower()
-    # Remove special characters
-    text = re.sub(r'[^\w\s]', '', text)
-    # Tokenize
-    words = text.split()
-    # Remove stopwords
-    stop_words = set(stopwords.words('english'))
-    words = [w for w in words if w not in stop_words and len(w) > 2]
-    return words
-def analyze_sentiment(text, model):
-    result = model(text)[0]
-    rating = int(result['label'].split('_')[-1])
-    return {
-        'rating': rating,
-        'label': RATING_MAP[rating],
-        'score': result['score']
-    }
-def generate_wordcloud(text, sentiment):
-    """Generate word cloud from English text"""
-    words = preprocess_text(text)
-    word_freq = Counter(words)
-    wc = WordCloud(
-        width=800,
-        height=400,
-        background_color='white',
-        colormap='Reds' if sentiment['rating'] == 0 else 'Greens',
-        collocations=False  # Better for single documents
-    ).generate_from_frequencies(word_freq)
-    fig, ax = plt.subplots(figsize=(10, 5))
-    ax.imshow(wc, interpolation='bilinear')
-    ax.axis('off')
-    return fig
-def display_top_keywords(text, n=10):
-    """Show most frequent keywords"""
-    words = preprocess_text(text)
-    counter = Counter(words)
-    top_words = counter.most_common(n)
-    st.subheader(f"Top {n} Keywords")
-    cols = st.columns(2)
-    for i, (word, count) in enumerate(top_words):
-        cols[i%2].metric(f"{word.title()}", f"{count} mentions")
 def main():
-    st.title("Tripadvisor Hotel Review Analyzer")
-    st.markdown("Instant sentiment and keyword analysis for English reviews")
-    if 'model' not in st.session_state:
-        st.session_state.model = load_models()
-    user_input = st.text_area("Paste your English review here:", height=150)
-    if st.button("Analyze Review"):
-        if user_input:
             with st.spinner("Analyzing..."):
-                # Sentiment analysis
-                sentiment = analyze_sentiment(user_input, st.session_state.model)
                 # Display results
                 st.subheader("Analysis Results")
                 col1, col2 = st.columns(2)
                 with col1:
-                    st.metric("Overall Rating", sentiment['label'])
-                with col2:
-                    st.metric("Confidence Score", f"{sentiment['score']:.0%}")
-                # Generate visualizations
-                st.subheader("Keyword Visualization")
-                tab1, tab2 = st.tabs(["Word Cloud", "Top Keywords"])
-                with tab1:
-                    fig = generate_wordcloud(user_input, sentiment)
-                    st.pyplot(fig)
-                with tab2:
-                    display_top_keywords(user_input)
-                # Store in session history
-                if 'history' not in st.session_state:
-                    st.session_state.history = []
-                st.session_state.history.append({
-                    'text': user_input[:100] + "..." if len(user_input) > 100 else user_input,
-                    'rating': sentiment['rating'],
-                    'date': datetime.now().strftime("%Y-%m-%d %H:%M")
-                })
         else:
-            st.warning("Please enter a review to analyze")
-    # Display history if exists
-    if 'history' in st.session_state and st.session_state.history:
-        st.divider()
-        with st.expander("Recent Analyses (Last 5)"):
-            history_df = pd.DataFrame(st.session_state.history[-5:])
-            st.dataframe(
-                history_df,
-                column_config={
-                    "text": "Review Excerpt",
-                    "rating": st.column_config.NumberColumn(
-                        "Rating",
-                        format="%d ⭐",
-                    ),
-                    "date": "Analyzed At"
-                },
-                hide_index=True
-            )
 if __name__ == "__main__":
     main()

 import streamlit as st
 from transformers import pipeline
 import pandas as pd
 from datetime import datetime
 # Constants
 RATING_MAP = {
 @st.cache_resource
 def load_models():
+    # Load sentiment analysis model
     sentiment_model = pipeline(
         "text-classification",
         model="AndrewLi403/CustomModel_tripadvisor_finetuned"
     )
+    # Load fake review detection model (automatically handles sigmoid)
+    fake_detector = pipeline(
+        "text-classification",
+        model="filippoferrari/finetuning-fake-reviews-detector-model"
+    )
+    return sentiment_model, fake_detector
+def analyze_review(text, sentiment_model, fake_detector):
+    # Sentiment analysis
+    sentiment_result = sentiment_model(text)[0]
+    rating = int(sentiment_result['label'].split('_')[-1])
+    # Fake detection
+    fake_result = fake_detector(text)[0]
+    is_fake = fake_result['label'] == 'FAKE'
+    return {
+        'sentiment': RATING_MAP[rating],
+        'sentiment_score': sentiment_result['score'],
+        'is_fake': is_fake,
+        'fake_score': fake_result['score']
+    }
 def main():
+    st.title("Hotel Review Analyzer")
+    st.markdown("Analyze sentiment and detect fake reviews")
+    # Load models
+    sentiment_model, fake_detector = load_models()
+    # Input
+    review_text = st.text_area("Paste your hotel review here:", height=150)
+    if st.button("Analyze"):
+        if review_text:
             with st.spinner("Analyzing..."):
+                # Get analysis results
+                results = analyze_review(review_text, sentiment_model, fake_detector)
                 # Display results
                 st.subheader("Analysis Results")
                 col1, col2 = st.columns(2)
                 with col1:
+                    st.metric("Sentiment Rating",
+                            results['sentiment'],
+                            delta=f"{results['sentiment_score']:.2f}")
+                with col2:
+                    st.metric("Authenticity",
+                            "SUSPICIOUS" if results['is_fake'] else "GENUINE",
+                            delta=f"{results['fake_score']:.2f}",
+                            delta_color="inverse" if results['is_fake'] else "normal")
+                # Warning for fake reviews
+                if results['is_fake']:
+                    st.warning("⚠️ This review shows characteristics of potentially fake content!")
         else:
+            st.error("Please enter a review to analyze")
 if __name__ == "__main__":
     main()