mxiean commited on
Commit
3515dc3
·
verified ·
1 Parent(s): f130149

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -18
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import streamlit as st
2
- from transformers import pipeline
3
  import matplotlib.pyplot as plt
4
  from wordcloud import WordCloud
5
  import pandas as pd
@@ -19,15 +19,40 @@ def load_models():
19
  model="AndrewLi403/CustomModel_tripadvisor_finetuned"
20
  )
21
  ner_model = pipeline("ner", model="dslim/bert-base-NER")
22
- return sentiment_model, ner_model
 
23
 
24
- def analyze_sentiment(text, model):
25
- result = model(text)[0]
26
- rating = int(result['label'].split('_')[-1])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  return {
28
- 'rating': rating,
29
- 'label': RATING_MAP[rating],
30
- 'score': result['score']
31
  }
32
 
33
  def extract_aspects(text, model):
@@ -57,22 +82,16 @@ def extract_aspects(text, model):
57
 
58
  def plot_sentiment_distribution(df):
59
  fig, ax = plt.subplots()
60
-
61
- # Get counts for all possible ratings
62
  counts = df['label'].value_counts()
63
 
64
- # Ensure all rating categories are present (even with 0 counts)
65
  for rating in RATING_MAP.values():
66
  if rating not in counts.index:
67
  counts[rating] = 0
68
 
69
- # Sort by the predefined rating order
70
  counts = counts.loc[list(RATING_MAP.values())]
71
-
72
- # Plot with consistent colors
73
  counts.plot.pie(
74
  autopct='%1.1f%%',
75
- colors=['#ff9999','#66b3ff','#99ff99'], # Negative, Neutral, Positive
76
  ax=ax
77
  )
78
  ax.set_ylabel('')
@@ -96,7 +115,7 @@ def main():
96
  st.title("Restaurant Review Analyzer")
97
  st.markdown("Using fine-tuned model for sentiment and aspect analysis")
98
 
99
- sentiment_model, ner_model = load_models()
100
 
101
  st.sidebar.header("Analysis Options")
102
  analysis_mode = st.sidebar.radio(
@@ -115,7 +134,7 @@ def main():
115
  if st.button("Analyze"):
116
  if user_input:
117
  with st.spinner("Analyzing..."):
118
- sentiment = analyze_sentiment(user_input, sentiment_model)
119
  aspects = extract_aspects(user_input, ner_model)
120
 
121
  new_entry = pd.DataFrame([{
@@ -159,7 +178,7 @@ def main():
159
  results = []
160
 
161
  for i, row in enumerate(df.itertuples()):
162
- sentiment = analyze_sentiment(row.text, sentiment_model)
163
  aspects = extract_aspects(row.text, ner_model)
164
 
165
  results.append({
 
1
  import streamlit as st
2
+ from transformers import pipeline, AutoTokenizer
3
  import matplotlib.pyplot as plt
4
  from wordcloud import WordCloud
5
  import pandas as pd
 
19
  model="AndrewLi403/CustomModel_tripadvisor_finetuned"
20
  )
21
  ner_model = pipeline("ner", model="dslim/bert-base-NER")
22
+ tokenizer = AutoTokenizer.from_pretrained("AndrewLi403/CustomModel_tripadvisor_finetuned")
23
+ return sentiment_model, ner_model, tokenizer
24
 
25
+ def analyze_sentiment(text, model, tokenizer, chunk_size=400):
26
+ tokens = tokenizer.tokenize(text)
27
+
28
+ # Short text processing
29
+ if len(tokens) <= 512:
30
+ result = model(text)[0]
31
+ rating = int(result['label'].split('_')[-1])
32
+ return {
33
+ 'rating': rating,
34
+ 'label': RATING_MAP[rating],
35
+ 'score': result['score']
36
+ }
37
+
38
+ # Long text chunk processing
39
+ chunks = [tokens[i:i+chunk_size] for i in range(0, len(tokens), chunk_size)]
40
+ results = []
41
+
42
+ for chunk in chunks:
43
+ chunk_text = tokenizer.convert_tokens_to_string(chunk)
44
+ result = model(chunk_text)[0]
45
+ results.append(result)
46
+
47
+ # Aggregate results (majority vote + average confidence)
48
+ final_label = max(set(r['label'] for r in results),
49
+ key=lambda x: sum(1 for r in results if r['label'] == x))
50
+ avg_score = sum(r['score'] for r in results) / len(results)
51
+
52
  return {
53
+ 'rating': int(final_label.split('_')[-1]),
54
+ 'label': RATING_MAP[int(final_label.split('_')[-1])],
55
+ 'score': avg_score
56
  }
57
 
58
  def extract_aspects(text, model):
 
82
 
83
  def plot_sentiment_distribution(df):
84
  fig, ax = plt.subplots()
 
 
85
  counts = df['label'].value_counts()
86
 
 
87
  for rating in RATING_MAP.values():
88
  if rating not in counts.index:
89
  counts[rating] = 0
90
 
 
91
  counts = counts.loc[list(RATING_MAP.values())]
 
 
92
  counts.plot.pie(
93
  autopct='%1.1f%%',
94
+ colors=['#ff9999','#66b3ff','#99ff99'],
95
  ax=ax
96
  )
97
  ax.set_ylabel('')
 
115
  st.title("Restaurant Review Analyzer")
116
  st.markdown("Using fine-tuned model for sentiment and aspect analysis")
117
 
118
+ sentiment_model, ner_model, tokenizer = load_models()
119
 
120
  st.sidebar.header("Analysis Options")
121
  analysis_mode = st.sidebar.radio(
 
134
  if st.button("Analyze"):
135
  if user_input:
136
  with st.spinner("Analyzing..."):
137
+ sentiment = analyze_sentiment(user_input, sentiment_model, tokenizer)
138
  aspects = extract_aspects(user_input, ner_model)
139
 
140
  new_entry = pd.DataFrame([{
 
178
  results = []
179
 
180
  for i, row in enumerate(df.itertuples()):
181
+ sentiment = analyze_sentiment(row.text, sentiment_model, tokenizer)
182
  aspects = extract_aspects(row.text, ner_model)
183
 
184
  results.append({