Spaces:

mxiean
/

G10_TripAdvisor

Sleeping

App Files Files Community

mxiean commited on Mar 27

Commit

3515dc3

verified ·

1 Parent(s): f130149

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -18

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import streamlit as st
-from transformers import pipeline
 import matplotlib.pyplot as plt
 from wordcloud import WordCloud
 import pandas as pd
@@ -19,15 +19,40 @@ def load_models():
         model="AndrewLi403/CustomModel_tripadvisor_finetuned"
     )
     ner_model = pipeline("ner", model="dslim/bert-base-NER")
-    return sentiment_model, ner_model
-def analyze_sentiment(text, model):
-    result = model(text)[0]
-    rating = int(result['label'].split('_')[-1])
     return {
-        'rating': rating,
-        'label': RATING_MAP[rating],
-        'score': result['score']
     }
 def extract_aspects(text, model):
@@ -57,22 +82,16 @@ def extract_aspects(text, model):
 def plot_sentiment_distribution(df):
     fig, ax = plt.subplots()
-    # Get counts for all possible ratings
     counts = df['label'].value_counts()
-    # Ensure all rating categories are present (even with 0 counts)
     for rating in RATING_MAP.values():
         if rating not in counts.index:
             counts[rating] = 0
-    # Sort by the predefined rating order
     counts = counts.loc[list(RATING_MAP.values())]
-    # Plot with consistent colors
     counts.plot.pie(
         autopct='%1.1f%%',
-        colors=['#ff9999','#66b3ff','#99ff99'],  # Negative, Neutral, Positive
         ax=ax
     )
     ax.set_ylabel('')
@@ -96,7 +115,7 @@ def main():
     st.title("Restaurant Review Analyzer")
     st.markdown("Using fine-tuned model for sentiment and aspect analysis")
-    sentiment_model, ner_model = load_models()
     st.sidebar.header("Analysis Options")
     analysis_mode = st.sidebar.radio(
@@ -115,7 +134,7 @@ def main():
         if st.button("Analyze"):
             if user_input:
                 with st.spinner("Analyzing..."):
-                    sentiment = analyze_sentiment(user_input, sentiment_model)
                     aspects = extract_aspects(user_input, ner_model)
                     new_entry = pd.DataFrame([{
@@ -159,7 +178,7 @@ def main():
                     results = []
                     for i, row in enumerate(df.itertuples()):
-                        sentiment = analyze_sentiment(row.text, sentiment_model)
                         aspects = extract_aspects(row.text, ner_model)
                         results.append({

 import streamlit as st
+from transformers import pipeline, AutoTokenizer
 import matplotlib.pyplot as plt
 from wordcloud import WordCloud
 import pandas as pd
         model="AndrewLi403/CustomModel_tripadvisor_finetuned"
     )
     ner_model = pipeline("ner", model="dslim/bert-base-NER")
+    tokenizer = AutoTokenizer.from_pretrained("AndrewLi403/CustomModel_tripadvisor_finetuned")
+    return sentiment_model, ner_model, tokenizer
+def analyze_sentiment(text, model, tokenizer, chunk_size=400):
+    tokens = tokenizer.tokenize(text)
+    # Short text processing
+    if len(tokens) <= 512:
+        result = model(text)[0]
+        rating = int(result['label'].split('_')[-1])
+        return {
+            'rating': rating,
+            'label': RATING_MAP[rating],
+            'score': result['score']
+        }
+    # Long text chunk processing
+    chunks = [tokens[i:i+chunk_size] for i in range(0, len(tokens), chunk_size)]
+    results = []
+    for chunk in chunks:
+        chunk_text = tokenizer.convert_tokens_to_string(chunk)
+        result = model(chunk_text)[0]
+        results.append(result)
+    # Aggregate results (majority vote + average confidence)
+    final_label = max(set(r['label'] for r in results),
+                     key=lambda x: sum(1 for r in results if r['label'] == x))
+    avg_score = sum(r['score'] for r in results) / len(results)
     return {
+        'rating': int(final_label.split('_')[-1]),
+        'label': RATING_MAP[int(final_label.split('_')[-1])],
+        'score': avg_score
     }
 def extract_aspects(text, model):
 def plot_sentiment_distribution(df):
     fig, ax = plt.subplots()
     counts = df['label'].value_counts()
     for rating in RATING_MAP.values():
         if rating not in counts.index:
             counts[rating] = 0
     counts = counts.loc[list(RATING_MAP.values())]
     counts.plot.pie(
         autopct='%1.1f%%',
+        colors=['#ff9999','#66b3ff','#99ff99'],
         ax=ax
     )
     ax.set_ylabel('')
     st.title("Restaurant Review Analyzer")
     st.markdown("Using fine-tuned model for sentiment and aspect analysis")
+    sentiment_model, ner_model, tokenizer = load_models()
     st.sidebar.header("Analysis Options")
     analysis_mode = st.sidebar.radio(
         if st.button("Analyze"):
             if user_input:
                 with st.spinner("Analyzing..."):
+                    sentiment = analyze_sentiment(user_input, sentiment_model, tokenizer)
                     aspects = extract_aspects(user_input, ner_model)
                     new_entry = pd.DataFrame([{
                     results = []
                     for i, row in enumerate(df.itertuples()):
+                        sentiment = analyze_sentiment(row.text, sentiment_model, tokenizer)
                         aspects = extract_aspects(row.text, ner_model)
                         results.append({