Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import pipeline | |
import matplotlib.pyplot as plt | |
from wordcloud import WordCloud | |
import pandas as pd | |
from datetime import datetime | |
# Constants | |
RATING_MAP = { | |
0: "Negative (⭐)", | |
1: "Neutral (⭐⭐)", | |
2: "Positive (⭐⭐⭐)" | |
} | |
def load_models(): | |
sentiment_model = pipeline( | |
"text-classification", | |
model="AndrewLi403/CustomModel_tripadvisor_finetuned" | |
) | |
ner_model = pipeline("ner", model="dslim/bert-base-NER") | |
return sentiment_model, ner_model | |
def analyze_sentiment(text, model): | |
result = model(text)[0] | |
rating = int(result['label'].split('_')[-1]) | |
return { | |
'rating': rating, | |
'label': RATING_MAP[rating], | |
'score': result['score'] | |
} | |
def extract_aspects(text, model): | |
entities = model(text) | |
aspects = [] | |
current_entity = "" | |
for entity in entities: | |
if entity['word'].startswith('##'): | |
current_entity += entity['word'][2:] | |
else: | |
if current_entity: | |
aspects.append({ | |
'entity': current_entity, | |
'type': prev_type | |
}) | |
current_entity = entity['word'] | |
prev_type = entity['entity'] | |
if current_entity: | |
aspects.append({ | |
'entity': current_entity, | |
'type': prev_type | |
}) | |
return [a for a in aspects if a['type'] in ['PRODUCT', 'ORG', 'PERSON']] | |
def plot_sentiment_distribution(df): | |
fig, ax = plt.subplots() | |
df['label'].value_counts().loc[list(RATING_MAP.values())].plot.pie( | |
autopct='%1.1f%%', | |
colors=['#ff9999','#66b3ff','#99ff99'], | |
ax=ax | |
) | |
ax.set_ylabel('') | |
return fig | |
def plot_wordcloud(negative_reviews): | |
text = " ".join(negative_reviews) | |
wordcloud = WordCloud( | |
width=800, | |
height=400, | |
background_color='white', | |
colormap='Reds' | |
).generate(text) | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
ax.imshow(wordcloud, interpolation='bilinear') | |
ax.axis('off') | |
return fig | |
def main(): | |
st.title("Restaurant Review Analyzer") | |
st.markdown("Using fine-tuned model for sentiment and aspect analysis") | |
sentiment_model, ner_model = load_models() | |
st.sidebar.header("Analysis Options") | |
analysis_mode = st.sidebar.radio( | |
"Select Mode", | |
["Single Review", "Batch Analysis"] | |
) | |
if 'history' not in st.session_state: | |
st.session_state.history = pd.DataFrame( | |
columns=['text', 'rating', 'label', 'date', 'aspects'] | |
) | |
if analysis_mode == "Single Review": | |
user_input = st.text_area("Enter or paste a restaurant review:", height=150) | |
if st.button("Analyze"): | |
if user_input: | |
with st.spinner("Analyzing..."): | |
sentiment = analyze_sentiment(user_input, sentiment_model) | |
aspects = extract_aspects(user_input, ner_model) | |
new_entry = pd.DataFrame([{ | |
'text': user_input, | |
'rating': sentiment['rating'], | |
'label': sentiment['label'], | |
'date': datetime.now(), | |
'aspects': aspects | |
}]) | |
st.session_state.history = pd.concat( | |
[st.session_state.history, new_entry], | |
ignore_index=True | |
) | |
st.subheader("Analysis Results") | |
col1, col2 = st.columns(2) | |
with col1: | |
st.metric("Rating", sentiment['label']) | |
with col2: | |
st.metric("Confidence", f"{sentiment['score']:.2f}") | |
if aspects: | |
st.subheader("Identified Aspects") | |
for aspect in aspects: | |
st.markdown(f"- **{aspect['type']}**: `{aspect['entity']}`) | |
else: | |
st.info("No specific entities identified") | |
else: | |
st.warning("Please enter a review") | |
else: | |
uploaded_file = st.file_uploader("Upload CSV file", type=["csv"]) | |
if uploaded_file: | |
df = pd.read_csv(uploaded_file) | |
if 'text' not in df.columns: | |
st.error("CSV must contain 'text' column") | |
else: | |
if st.button("Analyze All"): | |
progress_bar = st.progress(0) | |
results = [] | |
for i, row in enumerate(df.itertuples()): | |
sentiment = analyze_sentiment(row.text, sentiment_model) | |
aspects = extract_aspects(row.text, ner_model) | |
results.append({ | |
'text': row.text, | |
'rating': sentiment['rating'], | |
'label': sentiment['label'], | |
'date': datetime.now(), | |
'aspects': aspects | |
}) | |
progress_bar.progress((i + 1) / len(df)) | |
st.session_state.history = pd.concat( | |
[st.session_state.history, pd.DataFrame(results)], | |
ignore_index=True | |
) | |
st.success(f"Completed analysis of {len(df)} reviews") | |
if not st.session_state.history.empty: | |
st.divider() | |
st.header("Analysis History") | |
with st.expander("View Raw Data"): | |
st.dataframe(st.session_state.history) | |
st.subheader("Sentiment Distribution") | |
fig1 = plot_sentiment_distribution(st.session_state.history) | |
st.pyplot(fig1) | |
negative_reviews = st.session_state.history[ | |
st.session_state.history['rating'] == 0 | |
]['text'].tolist() | |
if negative_reviews: | |
st.subheader("Negative Reviews Word Cloud") | |
fig2 = plot_wordcloud(negative_reviews) | |
st.pyplot(fig2) | |
else: | |
st.info("No negative reviews yet") | |
if len(st.session_state.history) > 1: | |
st.subheader("Rating Trend Over Time") | |
time_df = st.session_state.history.copy() | |
time_df['date'] = pd.to_datetime(time_df['date']) | |
time_df = time_df.set_index('date').resample('D')['rating'].mean() | |
st.line_chart(time_df) | |
if __name__ == "__main__": | |
main() |