Spaces:

mxiean
/

G10_TripAdvisor

Sleeping

App Files Files Community

G10_TripAdvisor / app.py

mxiean

Update app.py

598d648 verified 5 months ago

raw

history blame

6.78 kB

	import streamlit as st
	from transformers import pipeline
	import matplotlib.pyplot as plt
	from wordcloud import WordCloud
	import pandas as pd
	from datetime import datetime

	# Constants
	RATING_MAP = {
	0: "Negative (⭐)",
	1: "Neutral (⭐⭐)",
	2: "Positive (⭐⭐⭐)"
	}

	@st.cache_resource
	def load_models():
	sentiment_model = pipeline(
	"text-classification",
	model="AndrewLi403/CustomModel_tripadvisor_finetuned"
	)
	ner_model = pipeline("ner", model="dslim/bert-base-NER")
	return sentiment_model, ner_model

	def analyze_sentiment(text, model):
	result = model(text)[0]
	rating = int(result['label'].split('_')[-1])
	return {
	'rating': rating,
	'label': RATING_MAP[rating],
	'score': result['score']
	}

	def extract_aspects(text, model):
	entities = model(text)
	aspects = []
	current_entity = ""

	for entity in entities:
	if entity['word'].startswith('##'):
	current_entity += entity['word'][2:]
	else:
	if current_entity:
	aspects.append({
	'entity': current_entity,
	'type': prev_type
	})
	current_entity = entity['word']
	prev_type = entity['entity']

	if current_entity:
	aspects.append({
	'entity': current_entity,
	'type': prev_type
	})

	return [a for a in aspects if a['type'] in ['PRODUCT', 'ORG', 'PERSON']]

	def plot_sentiment_distribution(df):
	fig, ax = plt.subplots()
	df['label'].value_counts().loc[list(RATING_MAP.values())].plot.pie(
	autopct='%1.1f%%',
	colors=['#ff9999','#66b3ff','#99ff99'],
	ax=ax
	)
	ax.set_ylabel('')
	return fig

	def plot_wordcloud(negative_reviews):
	text = " ".join(negative_reviews)
	wordcloud = WordCloud(
	width=800,
	height=400,
	background_color='white',
	colormap='Reds'
	).generate(text)

	fig, ax = plt.subplots(figsize=(10, 5))
	ax.imshow(wordcloud, interpolation='bilinear')
	ax.axis('off')
	return fig

	def main():
	st.title("Restaurant Review Analyzer")
	st.markdown("Using fine-tuned model for sentiment and aspect analysis")

	sentiment_model, ner_model = load_models()

	st.sidebar.header("Analysis Options")
	analysis_mode = st.sidebar.radio(
	"Select Mode",
	["Single Review", "Batch Analysis"]
	)

	if 'history' not in st.session_state:
	st.session_state.history = pd.DataFrame(
	columns=['text', 'rating', 'label', 'date', 'aspects']
	)

	if analysis_mode == "Single Review":
	user_input = st.text_area("Enter or paste a restaurant review:", height=150)

	if st.button("Analyze"):
	if user_input:
	with st.spinner("Analyzing..."):
	sentiment = analyze_sentiment(user_input, sentiment_model)
	aspects = extract_aspects(user_input, ner_model)

	new_entry = pd.DataFrame([{
	'text': user_input,
	'rating': sentiment['rating'],
	'label': sentiment['label'],
	'date': datetime.now(),
	'aspects': aspects
	}])
	st.session_state.history = pd.concat(
	[st.session_state.history, new_entry],
	ignore_index=True
	)

	st.subheader("Analysis Results")
	col1, col2 = st.columns(2)
	with col1:
	st.metric("Rating", sentiment['label'])
	with col2:
	st.metric("Confidence", f"{sentiment['score']:.2f}")

	if aspects:
	st.subheader("Identified Aspects")
	for aspect in aspects:
	st.markdown(f"- {aspect['type']}: `{aspect['entity']}`)
	else:
	st.info("No specific entities identified")
	else:
	st.warning("Please enter a review")

	else:
	uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])

	if uploaded_file:
	df = pd.read_csv(uploaded_file)
	if 'text' not in df.columns:
	st.error("CSV must contain 'text' column")
	else:
	if st.button("Analyze All"):
	progress_bar = st.progress(0)
	results = []

	for i, row in enumerate(df.itertuples()):
	sentiment = analyze_sentiment(row.text, sentiment_model)
	aspects = extract_aspects(row.text, ner_model)

	results.append({
	'text': row.text,
	'rating': sentiment['rating'],
	'label': sentiment['label'],
	'date': datetime.now(),
	'aspects': aspects
	})

	progress_bar.progress((i + 1) / len(df))

	st.session_state.history = pd.concat(
	[st.session_state.history, pd.DataFrame(results)],
	ignore_index=True
	)
	st.success(f"Completed analysis of {len(df)} reviews")

	if not st.session_state.history.empty:
	st.divider()
	st.header("Analysis History")

	with st.expander("View Raw Data"):
	st.dataframe(st.session_state.history)

	st.subheader("Sentiment Distribution")
	fig1 = plot_sentiment_distribution(st.session_state.history)
	st.pyplot(fig1)

	negative_reviews = st.session_state.history[
	st.session_state.history['rating'] == 0
	]['text'].tolist()

	if negative_reviews:
	st.subheader("Negative Reviews Word Cloud")
	fig2 = plot_wordcloud(negative_reviews)
	st.pyplot(fig2)
	else:
	st.info("No negative reviews yet")

	if len(st.session_state.history) > 1:
	st.subheader("Rating Trend Over Time")
	time_df = st.session_state.history.copy()
	time_df['date'] = pd.to_datetime(time_df['date'])
	time_df = time_df.set_index('date').resample('D')['rating'].mean()
	st.line_chart(time_df)

	if __name__ == "__main__":
	main()