mxiean commited on
Commit
49fc581
·
verified ·
1 Parent(s): a80b27b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +219 -0
app.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+ import matplotlib.pyplot as plt
4
+ from wordcloud import WordCloud
5
+ import pandas as pd
6
+ from datetime import datetime
7
+
8
+ # Constants
9
+ RATING_MAP = {
10
+ 0: "Negative (⭐)",
11
+ 1: "Neutral (⭐⭐)",
12
+ 2: "Positive (⭐⭐⭐)"
13
+ }
14
+
15
+ # Load models
16
+ @st.cache_resource
17
+ def load_models():
18
+ sentiment_model = pipeline(
19
+ "text-classification",
20
+ model="AndrewLi403/CustomModel_tripadvisor_finetuned"
21
+ )
22
+ ner_model = pipeline("ner", model="dslim/bert-base-NER")
23
+ return sentiment_model, ner_model
24
+
25
+ # Sentiment analysis
26
+ def analyze_sentiment(text, model):
27
+ result = model(text)[0]
28
+ rating = int(result['label'].split('_')[-1]) # Get 0, 1, or 2
29
+ return {
30
+ 'rating': rating,
31
+ 'label': RATING_MAP[rating],
32
+ 'score': result['score']
33
+ }
34
+
35
+ # Entity extraction
36
+ def extract_aspects(text, model):
37
+ entities = model(text)
38
+ aspects = []
39
+ current_entity = ""
40
+
41
+ # Merge subword tokens
42
+ for entity in entities:
43
+ if entity['word'].startswith('##'):
44
+ current_entity += entity['word'][2:]
45
+ else:
46
+ if current_entity:
47
+ aspects.append({
48
+ 'entity': current_entity,
49
+ 'type': prev_type
50
+ })
51
+ current_entity = entity['word']
52
+ prev_type = entity['entity']
53
+
54
+ if current_entity:
55
+ aspects.append({
56
+ 'entity': current_entity,
57
+ 'type': prev_type
58
+ })
59
+
60
+ return [a for a in aspects if a['type'] in ['PRODUCT', 'ORG', 'PERSON']]
61
+
62
+ # Visualization functions
63
+ def plot_sentiment_distribution(df):
64
+ fig, ax = plt.subplots()
65
+ df['label'].value_counts().loc[list(RATING_MAP.values())].plot.pie(
66
+ autopct='%1.1f%%',
67
+ colors=['#ff9999','#66b3ff','#99ff99'],
68
+ ax=ax
69
+ )
70
+ ax.set_ylabel('')
71
+ return fig
72
+
73
+ def plot_wordcloud(negative_reviews):
74
+ text = " ".join(negative_reviews)
75
+ wordcloud = WordCloud(
76
+ width=800,
77
+ height=400,
78
+ background_color='white',
79
+ colormap='Reds'
80
+ ).generate(text)
81
+
82
+ fig, ax = plt.subplots(figsize=(10, 5))
83
+ ax.imshow(wordcloud, interpolation='bilinear')
84
+ ax.axis('off')
85
+ return fig
86
+
87
+ # Main app
88
+ def main():
89
+ st.title("Restaurant Review Analyzer")
90
+ st.markdown("Using fine-tuned model for sentiment and aspect analysis")
91
+
92
+ # Initialize models
93
+ sentiment_model, ner_model = load_models()
94
+
95
+ # Sidebar controls
96
+ st.sidebar.header("Analysis Options")
97
+ analysis_mode = st.sidebar.radio(
98
+ "Select Mode",
99
+ ["Single Review", "Batch Analysis"]
100
+ )
101
+
102
+ # Initialize session state
103
+ if 'history' not in st.session_state:
104
+ st.session_state.history = pd.DataFrame(
105
+ columns=['text', 'rating', 'label', 'date', 'aspects']
106
+ )
107
+
108
+ if analysis_mode == "Single Review":
109
+ # Single review analysis
110
+ user_input = st.text_area("Enter or paste a restaurant review:", height=150)
111
+
112
+ if st.button("Analyze"):
113
+ if user_input:
114
+ with st.spinner("Analyzing..."):
115
+ # Sentiment analysis
116
+ sentiment = analyze_sentiment(user_input, sentiment_model)
117
+
118
+ # Aspect extraction
119
+ aspects = extract_aspects(user_input, ner_model)
120
+
121
+ # Save to history
122
+ new_entry = pd.DataFrame([{
123
+ 'text': user_input,
124
+ 'rating': sentiment['rating'],
125
+ 'label': sentiment['label'],
126
+ 'date': datetime.now(),
127
+ 'aspects': aspects
128
+ }])
129
+ st.session_state.history = pd.concat(
130
+ [st.session_state.history, new_entry],
131
+ ignore_index=True
132
+ )
133
+
134
+ # Display results
135
+ st.subheader("Analysis Results")
136
+ col1, col2 = st.columns(2)
137
+ with col1:
138
+ st.metric("Rating", sentiment['label'])
139
+ with col2:
140
+ st.metric("Confidence", f"{sentiment['score']:.2f}")
141
+
142
+ if aspects:
143
+ st.subheader("Identified Aspects")
144
+ for aspect in aspects:
145
+ st.markdown(f"- **{aspect['type']}**: `{aspect['entity']}`)
146
+ else:
147
+ st.info("No specific entities identified")
148
+ else:
149
+ st.warning("Please enter a review")
150
+
151
+ else:
152
+ # Batch analysis mode
153
+ uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
154
+
155
+ if uploaded_file:
156
+ df = pd.read_csv(uploaded_file)
157
+ if 'text' not in df.columns:
158
+ st.error("CSV must contain 'text' column")
159
+ else:
160
+ if st.button("Analyze All"):
161
+ progress_bar = st.progress(0)
162
+ results = []
163
+
164
+ for i, row in enumerate(df.itertuples()):
165
+ sentiment = analyze_sentiment(row.text, sentiment_model)
166
+ aspects = extract_aspects(row.text, ner_model)
167
+
168
+ results.append({
169
+ 'text': row.text,
170
+ 'rating': sentiment['rating'],
171
+ 'label': sentiment['label'],
172
+ 'date': datetime.now(),
173
+ 'aspects': aspects
174
+ })
175
+
176
+ progress_bar.progress((i + 1) / len(df))
177
+
178
+ st.session_state.history = pd.concat(
179
+ [st.session_state.history, pd.DataFrame(results)],
180
+ ignore_index=True
181
+ )
182
+ st.success(f"Completed analysis of {len(df)} reviews")
183
+
184
+ # Display historical data and visualizations
185
+ if not st.session_state.history.empty:
186
+ st.divider()
187
+ st.header("Analysis History")
188
+
189
+ # Raw data display
190
+ with st.expander("View Raw Data"):
191
+ st.dataframe(st.session_state.history)
192
+
193
+ # Visualizations
194
+ st.subheader("Sentiment Distribution")
195
+ fig1 = plot_sentiment_distribution(st.session_state.history)
196
+ st.pyplot(fig1)
197
+
198
+ # Negative reviews word cloud
199
+ negative_reviews = st.session_state.history[
200
+ st.session_state.history['rating'] == 0
201
+ ]['text'].tolist()
202
+
203
+ if negative_reviews:
204
+ st.subheader("Negative Reviews Word Cloud")
205
+ fig2 = plot_wordcloud(negative_reviews)
206
+ st.pyplot(fig2)
207
+ else:
208
+ st.info("No negative reviews yet")
209
+
210
+ # Time trend analysis
211
+ if len(st.session_state.history) > 1:
212
+ st.subheader("Rating Trend Over Time")
213
+ time_df = st.session_state.history.copy()
214
+ time_df['date'] = pd.to_datetime(time_df['date'])
215
+ time_df = time_df.set_index('date').resample('D')['rating'].mean()
216
+ st.line_chart(time_df)
217
+
218
+ if __name__ == "__main__":
219
+ main()