G10_TripAdvisor / app.py
mxiean's picture
Update app.py
598d648 verified
raw
history blame
6.78 kB
import streamlit as st
from transformers import pipeline
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import pandas as pd
from datetime import datetime
# Constants
RATING_MAP = {
0: "Negative (⭐)",
1: "Neutral (⭐⭐)",
2: "Positive (⭐⭐⭐)"
}
@st.cache_resource
def load_models():
sentiment_model = pipeline(
"text-classification",
model="AndrewLi403/CustomModel_tripadvisor_finetuned"
)
ner_model = pipeline("ner", model="dslim/bert-base-NER")
return sentiment_model, ner_model
def analyze_sentiment(text, model):
result = model(text)[0]
rating = int(result['label'].split('_')[-1])
return {
'rating': rating,
'label': RATING_MAP[rating],
'score': result['score']
}
def extract_aspects(text, model):
entities = model(text)
aspects = []
current_entity = ""
for entity in entities:
if entity['word'].startswith('##'):
current_entity += entity['word'][2:]
else:
if current_entity:
aspects.append({
'entity': current_entity,
'type': prev_type
})
current_entity = entity['word']
prev_type = entity['entity']
if current_entity:
aspects.append({
'entity': current_entity,
'type': prev_type
})
return [a for a in aspects if a['type'] in ['PRODUCT', 'ORG', 'PERSON']]
def plot_sentiment_distribution(df):
fig, ax = plt.subplots()
df['label'].value_counts().loc[list(RATING_MAP.values())].plot.pie(
autopct='%1.1f%%',
colors=['#ff9999','#66b3ff','#99ff99'],
ax=ax
)
ax.set_ylabel('')
return fig
def plot_wordcloud(negative_reviews):
text = " ".join(negative_reviews)
wordcloud = WordCloud(
width=800,
height=400,
background_color='white',
colormap='Reds'
).generate(text)
fig, ax = plt.subplots(figsize=(10, 5))
ax.imshow(wordcloud, interpolation='bilinear')
ax.axis('off')
return fig
def main():
st.title("Restaurant Review Analyzer")
st.markdown("Using fine-tuned model for sentiment and aspect analysis")
sentiment_model, ner_model = load_models()
st.sidebar.header("Analysis Options")
analysis_mode = st.sidebar.radio(
"Select Mode",
["Single Review", "Batch Analysis"]
)
if 'history' not in st.session_state:
st.session_state.history = pd.DataFrame(
columns=['text', 'rating', 'label', 'date', 'aspects']
)
if analysis_mode == "Single Review":
user_input = st.text_area("Enter or paste a restaurant review:", height=150)
if st.button("Analyze"):
if user_input:
with st.spinner("Analyzing..."):
sentiment = analyze_sentiment(user_input, sentiment_model)
aspects = extract_aspects(user_input, ner_model)
new_entry = pd.DataFrame([{
'text': user_input,
'rating': sentiment['rating'],
'label': sentiment['label'],
'date': datetime.now(),
'aspects': aspects
}])
st.session_state.history = pd.concat(
[st.session_state.history, new_entry],
ignore_index=True
)
st.subheader("Analysis Results")
col1, col2 = st.columns(2)
with col1:
st.metric("Rating", sentiment['label'])
with col2:
st.metric("Confidence", f"{sentiment['score']:.2f}")
if aspects:
st.subheader("Identified Aspects")
for aspect in aspects:
st.markdown(f"- **{aspect['type']}**: `{aspect['entity']}`)
else:
st.info("No specific entities identified")
else:
st.warning("Please enter a review")
else:
uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
if uploaded_file:
df = pd.read_csv(uploaded_file)
if 'text' not in df.columns:
st.error("CSV must contain 'text' column")
else:
if st.button("Analyze All"):
progress_bar = st.progress(0)
results = []
for i, row in enumerate(df.itertuples()):
sentiment = analyze_sentiment(row.text, sentiment_model)
aspects = extract_aspects(row.text, ner_model)
results.append({
'text': row.text,
'rating': sentiment['rating'],
'label': sentiment['label'],
'date': datetime.now(),
'aspects': aspects
})
progress_bar.progress((i + 1) / len(df))
st.session_state.history = pd.concat(
[st.session_state.history, pd.DataFrame(results)],
ignore_index=True
)
st.success(f"Completed analysis of {len(df)} reviews")
if not st.session_state.history.empty:
st.divider()
st.header("Analysis History")
with st.expander("View Raw Data"):
st.dataframe(st.session_state.history)
st.subheader("Sentiment Distribution")
fig1 = plot_sentiment_distribution(st.session_state.history)
st.pyplot(fig1)
negative_reviews = st.session_state.history[
st.session_state.history['rating'] == 0
]['text'].tolist()
if negative_reviews:
st.subheader("Negative Reviews Word Cloud")
fig2 = plot_wordcloud(negative_reviews)
st.pyplot(fig2)
else:
st.info("No negative reviews yet")
if len(st.session_state.history) > 1:
st.subheader("Rating Trend Over Time")
time_df = st.session_state.history.copy()
time_df['date'] = pd.to_datetime(time_df['date'])
time_df = time_df.set_index('date').resample('D')['rating'].mean()
st.line_chart(time_df)
if __name__ == "__main__":
main()