mxiean commited on
Commit
5073adc
·
verified ·
1 Parent(s): db24cbc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -107
app.py CHANGED
@@ -1,16 +1,7 @@
1
  import streamlit as st
2
  from transformers import pipeline
3
- import matplotlib.pyplot as plt
4
- from wordcloud import WordCloud
5
  import pandas as pd
6
  from datetime import datetime
7
- from collections import Counter
8
- import re
9
- from nltk.corpus import stopwords
10
- import nltk
11
-
12
- # Download NLTK stopwords (first-time only)
13
- nltk.download('stopwords')
14
 
15
  # Constants
16
  RATING_MAP = {
@@ -21,125 +12,70 @@ RATING_MAP = {
21
 
22
  @st.cache_resource
23
  def load_models():
 
24
  sentiment_model = pipeline(
25
  "text-classification",
26
  model="AndrewLi403/CustomModel_tripadvisor_finetuned"
27
  )
28
- return sentiment_model
29
-
30
- def preprocess_text(text):
31
- """Clean and tokenize English text"""
32
- # Convert to lowercase
33
- text = text.lower()
34
- # Remove special characters
35
- text = re.sub(r'[^\w\s]', '', text)
36
- # Tokenize
37
- words = text.split()
38
- # Remove stopwords
39
- stop_words = set(stopwords.words('english'))
40
- words = [w for w in words if w not in stop_words and len(w) > 2]
41
- return words
42
-
43
- def analyze_sentiment(text, model):
44
- result = model(text)[0]
45
- rating = int(result['label'].split('_')[-1])
46
- return {
47
- 'rating': rating,
48
- 'label': RATING_MAP[rating],
49
- 'score': result['score']
50
- }
51
 
52
- def generate_wordcloud(text, sentiment):
53
- """Generate word cloud from English text"""
54
- words = preprocess_text(text)
55
- word_freq = Counter(words)
56
 
57
- wc = WordCloud(
58
- width=800,
59
- height=400,
60
- background_color='white',
61
- colormap='Reds' if sentiment['rating'] == 0 else 'Greens',
62
- collocations=False # Better for single documents
63
- ).generate_from_frequencies(word_freq)
64
-
65
- fig, ax = plt.subplots(figsize=(10, 5))
66
- ax.imshow(wc, interpolation='bilinear')
67
- ax.axis('off')
68
- return fig
69
-
70
- def display_top_keywords(text, n=10):
71
- """Show most frequent keywords"""
72
- words = preprocess_text(text)
73
- counter = Counter(words)
74
- top_words = counter.most_common(n)
75
 
76
- st.subheader(f"Top {n} Keywords")
77
- cols = st.columns(2)
78
- for i, (word, count) in enumerate(top_words):
79
- cols[i%2].metric(f"{word.title()}", f"{count} mentions")
 
 
80
 
81
  def main():
82
- st.title("Tripadvisor Hotel Review Analyzer")
83
- st.markdown("Instant sentiment and keyword analysis for English reviews")
84
 
85
- if 'model' not in st.session_state:
86
- st.session_state.model = load_models()
87
 
88
- user_input = st.text_area("Paste your English review here:", height=150)
 
89
 
90
- if st.button("Analyze Review"):
91
- if user_input:
92
  with st.spinner("Analyzing..."):
93
- # Sentiment analysis
94
- sentiment = analyze_sentiment(user_input, st.session_state.model)
95
 
96
  # Display results
97
  st.subheader("Analysis Results")
 
98
  col1, col2 = st.columns(2)
99
  with col1:
100
- st.metric("Overall Rating", sentiment['label'])
101
- with col2:
102
- st.metric("Confidence Score", f"{sentiment['score']:.0%}")
103
 
104
- # Generate visualizations
105
- st.subheader("Keyword Visualization")
106
- tab1, tab2 = st.tabs(["Word Cloud", "Top Keywords"])
107
-
108
- with tab1:
109
- fig = generate_wordcloud(user_input, sentiment)
110
- st.pyplot(fig)
111
-
112
- with tab2:
113
- display_top_keywords(user_input)
114
 
115
- # Store in session history
116
- if 'history' not in st.session_state:
117
- st.session_state.history = []
118
- st.session_state.history.append({
119
- 'text': user_input[:100] + "..." if len(user_input) > 100 else user_input,
120
- 'rating': sentiment['rating'],
121
- 'date': datetime.now().strftime("%Y-%m-%d %H:%M")
122
- })
123
  else:
124
- st.warning("Please enter a review to analyze")
125
-
126
- # Display history if exists
127
- if 'history' in st.session_state and st.session_state.history:
128
- st.divider()
129
- with st.expander("Recent Analyses (Last 5)"):
130
- history_df = pd.DataFrame(st.session_state.history[-5:])
131
- st.dataframe(
132
- history_df,
133
- column_config={
134
- "text": "Review Excerpt",
135
- "rating": st.column_config.NumberColumn(
136
- "Rating",
137
- format="%d ⭐",
138
- ),
139
- "date": "Analyzed At"
140
- },
141
- hide_index=True
142
- )
143
 
144
  if __name__ == "__main__":
145
  main()
 
1
  import streamlit as st
2
  from transformers import pipeline
 
 
3
  import pandas as pd
4
  from datetime import datetime
 
 
 
 
 
 
 
5
 
6
  # Constants
7
  RATING_MAP = {
 
12
 
13
  @st.cache_resource
14
  def load_models():
15
+ # Load sentiment analysis model
16
  sentiment_model = pipeline(
17
  "text-classification",
18
  model="AndrewLi403/CustomModel_tripadvisor_finetuned"
19
  )
20
+ # Load fake review detection model (automatically handles sigmoid)
21
+ fake_detector = pipeline(
22
+ "text-classification",
23
+ model="filippoferrari/finetuning-fake-reviews-detector-model"
24
+ )
25
+ return sentiment_model, fake_detector
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ def analyze_review(text, sentiment_model, fake_detector):
28
+ # Sentiment analysis
29
+ sentiment_result = sentiment_model(text)[0]
30
+ rating = int(sentiment_result['label'].split('_')[-1])
31
 
32
+ # Fake detection
33
+ fake_result = fake_detector(text)[0]
34
+ is_fake = fake_result['label'] == 'FAKE'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
+ return {
37
+ 'sentiment': RATING_MAP[rating],
38
+ 'sentiment_score': sentiment_result['score'],
39
+ 'is_fake': is_fake,
40
+ 'fake_score': fake_result['score']
41
+ }
42
 
43
  def main():
44
+ st.title("Hotel Review Analyzer")
45
+ st.markdown("Analyze sentiment and detect fake reviews")
46
 
47
+ # Load models
48
+ sentiment_model, fake_detector = load_models()
49
 
50
+ # Input
51
+ review_text = st.text_area("Paste your hotel review here:", height=150)
52
 
53
+ if st.button("Analyze"):
54
+ if review_text:
55
  with st.spinner("Analyzing..."):
56
+ # Get analysis results
57
+ results = analyze_review(review_text, sentiment_model, fake_detector)
58
 
59
  # Display results
60
  st.subheader("Analysis Results")
61
+
62
  col1, col2 = st.columns(2)
63
  with col1:
64
+ st.metric("Sentiment Rating",
65
+ results['sentiment'],
66
+ delta=f"{results['sentiment_score']:.2f}")
67
 
68
+ with col2:
69
+ st.metric("Authenticity",
70
+ "SUSPICIOUS" if results['is_fake'] else "GENUINE",
71
+ delta=f"{results['fake_score']:.2f}",
72
+ delta_color="inverse" if results['is_fake'] else "normal")
 
 
 
 
 
73
 
74
+ # Warning for fake reviews
75
+ if results['is_fake']:
76
+ st.warning("⚠️ This review shows characteristics of potentially fake content!")
 
 
 
 
 
77
  else:
78
+ st.error("Please enter a review to analyze")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
  if __name__ == "__main__":
81
  main()