820nam commited on
Commit
80a6db2
ยท
verified ยท
1 Parent(s): c87048f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -37
app.py CHANGED
@@ -5,6 +5,11 @@ import seaborn as sns
5
  from transformers import pipeline
6
  import openai
7
  import os
 
 
 
 
 
8
 
9
  # Streamlit ํŽ˜์ด์ง€ ์„ค์ •์„ ๊ฐ€์žฅ ๋จผ์ € ํ˜ธ์ถœ
10
  st.set_page_config(page_title="์ •์น˜์  ๊ด€์  ๋ถ„์„", page_icon="๐Ÿ“ฐ", layout="wide")
@@ -54,10 +59,52 @@ def fetch_naver_news(query, display=5):
54
  st.error("๋‰ด์Šค ๋ฐ์ดํ„ฐ๋ฅผ ๋ถˆ๋Ÿฌ์˜ค๋Š” ๋ฐ ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค.")
55
  return []
56
 
57
- # ์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„ ๋ชจ๋ธ ๋กœ๋“œ
58
- def load_sentiment_model():
59
- classifier = pipeline("text-classification", model="bucketresearch/politicalBiasBERT")
60
- return classifier
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  # GPT-4๋ฅผ ์ด์šฉํ•ด ๋ฐ˜๋Œ€ ๊ด€์  ๊ธฐ์‚ฌ ์ƒ์„ฑ
63
  def generate_article_gpt4(prompt):
@@ -76,28 +123,13 @@ def generate_article_gpt4(prompt):
76
  except Exception as e:
77
  return f"Error generating text: {e}"
78
 
79
- # ์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„
80
- def analyze_article_sentiment(text, classifier):
81
- result = classifier(text[:512]) # ๋„ˆ๋ฌด ๊ธด ํ…์ŠคํŠธ๋Š” ์ž˜๋ผ์„œ ๋ถ„์„
82
- label = result[0]["label"]
83
- score = result[0]["score"]
84
-
85
- # ๋ชจ๋ธ์—์„œ ๋ฐ˜ํ™˜ํ•˜๋Š” ๋ผ๋ฒจ์„ "์ง„๋ณด", "๋ณด์ˆ˜", "์ค‘๋ฆฝ"์œผ๋กœ ๋งคํ•‘
86
- if label == "LEFT":
87
- return "์ง„๋ณด", score
88
- elif label == "RIGHT":
89
- return "๋ณด์ˆ˜", score
90
- else:
91
- return "์ค‘๋ฆฝ", score
92
-
93
  # ์ •์น˜์  ๊ด€์  ๋น„๊ต ๋ฐ ๋ฐ˜๋Œ€ ๊ด€์  ์ƒ์„ฑ
94
- def analyze_news_political_viewpoint(query):
95
  # ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๊ฐ€์ ธ์˜ค๊ธฐ
96
  news_items = fetch_naver_news(query)
97
  if not news_items:
98
  return [], {}
99
 
100
- classifier = load_sentiment_model()
101
  results = []
102
  sentiment_counts = {"์ง„๋ณด": 0, "๋ณด์ˆ˜": 0, "์ค‘๋ฆฝ": 0} # ๋งคํ•‘๋œ ๋ผ๋ฒจ์— ๋งž๊ฒŒ ์ดˆ๊ธฐํ™”
103
 
@@ -107,8 +139,8 @@ def analyze_news_political_viewpoint(query):
107
  link = item["link"] # ๋‰ด์Šค ๋งํฌ ๊ฐ€์ ธ์˜ค๊ธฐ
108
  combined_text = f"{title}. {description}"
109
 
110
- # ๊ธฐ์‚ฌ ์„ฑํ–ฅ ๋ถ„์„
111
- sentiment, score = analyze_article_sentiment(combined_text, classifier)
112
  sentiment_counts[sentiment] += 1 # ๋งคํ•‘๋œ ํ‚ค๋กœ ์นด์šดํŠธ ์ฆ๊ฐ€
113
 
114
  # ๋ฐ˜๋Œ€ ๊ด€์  ๊ธฐ์‚ฌ ์ƒ์„ฑ
@@ -120,7 +152,6 @@ def analyze_news_political_viewpoint(query):
120
  "์ œ๋ชฉ": title,
121
  "์›๋ณธ ๊ธฐ์‚ฌ": description,
122
  "์„ฑํ–ฅ": sentiment,
123
- "์„ฑํ–ฅ ์ ์ˆ˜": score,
124
  "๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ": opposite_article,
125
  "๋‰ด์Šค ๋งํฌ": link # ๋งํฌ ์ถ”๊ฐ€
126
  })
@@ -146,29 +177,34 @@ def visualize_sentiment_distribution(sentiment_counts):
146
  st.title("๐Ÿ“ฐ ์ •์น˜์  ๊ด€์  ๋น„๊ต ๋ถ„์„ ๋„๊ตฌ")
147
  st.markdown("๋‰ด์Šค ๊ธฐ์‚ฌ์˜ ์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„๊ณผ ๋ฐ˜๋Œ€ ๊ด€์  ๊ธฐ์‚ฌ๋ฅผ ์ƒ์„ฑํ•˜์—ฌ ๋น„๊ตํ•ฉ๋‹ˆ๋‹ค.")
148
 
 
 
 
 
 
 
 
149
  # ์‚ฌ์šฉ์ž๋กœ๋ถ€ํ„ฐ ๊ฒ€์ƒ‰์–ด ์ž…๋ ฅ ๋ฐ›๊ธฐ
150
  query = st.text_input("๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", value="์ •์น˜")
151
 
152
  # ๋ถ„์„ ์‹œ์ž‘ ๋ฒ„ํŠผ
153
  if st.button("๐Ÿ” ๋ถ„์„ ์‹œ์ž‘"):
154
  with st.spinner("๋ถ„์„ ์ค‘..."):
155
- analysis_results, sentiment_counts = analyze_news_political_viewpoint(query)
156
 
157
  if analysis_results:
158
  st.success("๋‰ด์Šค ๋ถ„์„์ด ์™„๋ฃŒ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
159
 
160
- # ์„ฑํ–ฅ ๋ถ„ํฌ ์‹œ๊ฐํ™” (๋ง‰๋Œ€ ๊ทธ๋ž˜ํ”„)
161
- st.subheader("๐Ÿ“Š ์„ฑํ–ฅ ๋ถ„ํฌ ์‹œ๊ฐํ™”")
162
- visualize_sentiment_distribution(sentiment_counts)
163
-
164
- # ์ƒ์„ธ ๋ถ„์„ ๊ฒฐ๊ณผ ์ถœ๋ ฅ
165
- st.subheader("๐Ÿ“ ์ƒ์„ธ ๋ถ„์„ ๊ฒฐ๊ณผ")
166
  for result in analysis_results:
167
- st.write(f"#### {result['์ œ๋ชฉ']}")
168
- st.write(f"- **์›๋ณธ ๊ธฐ์‚ฌ**: {result['์›๋ณธ ๊ธฐ์‚ฌ']}")
169
- st.write(f"- **์„ฑํ–ฅ**: {result['์„ฑํ–ฅ']} (์ ์ˆ˜: {result['์„ฑํ–ฅ ์ ์ˆ˜']:.2f})")
170
- st.write(f"- **๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ**: {result['๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ']}")
171
- st.write(f"- **๋‰ด์Šค ๋งํฌ**: [๋งํฌ]({result['๋‰ด์Šค ๋งํฌ']})") # ๋งํฌ ์ถœ๋ ฅ
172
- st.write("---")
 
 
 
173
  else:
174
- st.error("๋ถ„์„๋œ ๋‰ด์Šค ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
 
5
  from transformers import pipeline
6
  import openai
7
  import os
8
+ from sklearn.feature_extraction.text import TfidfVectorizer
9
+ from sklearn.linear_model import LogisticRegression
10
+ from sklearn.model_selection import train_test_split
11
+ from sklearn.metrics import accuracy_score
12
+ import joblib
13
 
14
  # Streamlit ํŽ˜์ด์ง€ ์„ค์ •์„ ๊ฐ€์žฅ ๋จผ์ € ํ˜ธ์ถœ
15
  st.set_page_config(page_title="์ •์น˜์  ๊ด€์  ๋ถ„์„", page_icon="๐Ÿ“ฐ", layout="wide")
 
59
  st.error("๋‰ด์Šค ๋ฐ์ดํ„ฐ๋ฅผ ๋ถˆ๋Ÿฌ์˜ค๋Š” ๋ฐ ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค.")
60
  return []
61
 
62
+ # ๋จธ์‹ ๋Ÿฌ๋‹ ๋ชจ๋ธ ๋กœ๋“œ ๋ฐ ํ•™์Šต
63
+ def train_ml_model():
64
+ # ์—ฌ๊ธฐ์„œ๋Š” ์ƒ˜ํ”Œ ๋ฐ์ดํ„ฐ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ํ•™์Šต
65
+ # ์‹ค์ œ ๋ฐ์ดํ„ฐ๋ฅผ ์ด์šฉํ•œ ํ•™์Šต ๊ณผ์ •์ด ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค.
66
+ data = [
67
+ ("์ง„๋ณด์ ์ธ ์ •๋ถ€ ์ •์ฑ…์„ ๊ฐ•ํ™”ํ•ด์•ผ ํ•œ๋‹ค", "LEFT"),
68
+ ("๋ณด์ˆ˜์ ์ธ ๊ฒฝ์ œ ์ •์ฑ…์ด ํ•„์š”ํ•˜๋‹ค", "RIGHT"),
69
+ ("์ค‘๋ฆฝ์ ์ธ ์ž…์žฅ์—์„œ ์ƒํ™ฉ์„ ํ‰๊ฐ€ํ•œ๋‹ค", "NEUTRAL")
70
+ ]
71
+ texts, labels = zip(*data)
72
+
73
+ # TF-IDF ๋ฒกํ„ฐํ™”
74
+ vectorizer = TfidfVectorizer(max_features=1000)
75
+ X = vectorizer.fit_transform(texts)
76
+ y = labels
77
+
78
+ # ํ›ˆ๋ จ ๋ฐ ํ…Œ์ŠคํŠธ ๋ฐ์ดํ„ฐ ๋‚˜๋ˆ„๊ธฐ
79
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
80
+
81
+ # ๋กœ์ง€์Šคํ‹ฑ ํšŒ๊ท€ ๋ชจ๋ธ ํ•™์Šต
82
+ model = LogisticRegression()
83
+ model.fit(X_train, y_train)
84
+
85
+ # ๋ชจ๋ธ ์„ฑ๋Šฅ ํ‰๊ฐ€
86
+ y_pred = model.predict(X_test)
87
+ accuracy = accuracy_score(y_test, y_pred)
88
+ st.write(f"๋ชจ๋ธ ์ •ํ™•๋„: {accuracy:.2f}")
89
+
90
+ # ๋ชจ๋ธ ์ €์žฅ
91
+ joblib.dump(model, 'political_bias_model.pkl')
92
+ joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')
93
+
94
+ return model, vectorizer
95
+
96
+ # ๋กœ๋“œ๋œ ๋จธ์‹ ๋Ÿฌ๋‹ ๋ชจ๋ธ๋กœ ์„ฑํ–ฅ ๋ถ„์„
97
+ def analyze_article_sentiment_ml(text, model, vectorizer):
98
+ X = vectorizer.transform([text])
99
+ prediction = model.predict(X)[0]
100
+
101
+ # ์„ฑํ–ฅ์— ๋”ฐ๋ฅธ ๋ ˆ์ด๋ธ” ๋ฐ˜ํ™˜
102
+ if prediction == "LEFT":
103
+ return "์ง„๋ณด"
104
+ elif prediction == "RIGHT":
105
+ return "๋ณด์ˆ˜"
106
+ else:
107
+ return "์ค‘๋ฆฝ"
108
 
109
  # GPT-4๋ฅผ ์ด์šฉํ•ด ๋ฐ˜๋Œ€ ๊ด€์  ๊ธฐ์‚ฌ ์ƒ์„ฑ
110
  def generate_article_gpt4(prompt):
 
123
  except Exception as e:
124
  return f"Error generating text: {e}"
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  # ์ •์น˜์  ๊ด€์  ๋น„๊ต ๋ฐ ๋ฐ˜๋Œ€ ๊ด€์  ์ƒ์„ฑ
127
+ def analyze_news_political_viewpoint(query, model, vectorizer):
128
  # ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๊ฐ€์ ธ์˜ค๊ธฐ
129
  news_items = fetch_naver_news(query)
130
  if not news_items:
131
  return [], {}
132
 
 
133
  results = []
134
  sentiment_counts = {"์ง„๋ณด": 0, "๋ณด์ˆ˜": 0, "์ค‘๋ฆฝ": 0} # ๋งคํ•‘๋œ ๋ผ๋ฒจ์— ๋งž๊ฒŒ ์ดˆ๊ธฐํ™”
135
 
 
139
  link = item["link"] # ๋‰ด์Šค ๋งํฌ ๊ฐ€์ ธ์˜ค๊ธฐ
140
  combined_text = f"{title}. {description}"
141
 
142
+ # ๋จธ์‹ ๋Ÿฌ๋‹ ๋ชจ๋ธ์„ ์ด์šฉํ•œ ์„ฑํ–ฅ ๋ถ„์„
143
+ sentiment = analyze_article_sentiment_ml(combined_text, model, vectorizer)
144
  sentiment_counts[sentiment] += 1 # ๋งคํ•‘๋œ ํ‚ค๋กœ ์นด์šดํŠธ ์ฆ๊ฐ€
145
 
146
  # ๋ฐ˜๋Œ€ ๊ด€์  ๊ธฐ์‚ฌ ์ƒ์„ฑ
 
152
  "์ œ๋ชฉ": title,
153
  "์›๋ณธ ๊ธฐ์‚ฌ": description,
154
  "์„ฑํ–ฅ": sentiment,
 
155
  "๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ": opposite_article,
156
  "๋‰ด์Šค ๋งํฌ": link # ๋งํฌ ์ถ”๊ฐ€
157
  })
 
177
  st.title("๐Ÿ“ฐ ์ •์น˜์  ๊ด€์  ๋น„๊ต ๋ถ„์„ ๋„๊ตฌ")
178
  st.markdown("๋‰ด์Šค ๊ธฐ์‚ฌ์˜ ์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„๊ณผ ๋ฐ˜๋Œ€ ๊ด€์  ๊ธฐ์‚ฌ๋ฅผ ์ƒ์„ฑํ•˜์—ฌ ๋น„๊ตํ•ฉ๋‹ˆ๋‹ค.")
179
 
180
+ # ๋จธ์‹ ๋Ÿฌ๋‹ ๋ชจ๋ธ ๋กœ๋“œ
181
+ if not os.path.exists('political_bias_model.pkl'):
182
+ model, vectorizer = train_ml_model()
183
+ else:
184
+ model = joblib.load('political_bias_model.pkl')
185
+ vectorizer = joblib.load('tfidf_vectorizer.pkl')
186
+
187
  # ์‚ฌ์šฉ์ž๋กœ๋ถ€ํ„ฐ ๊ฒ€์ƒ‰์–ด ์ž…๋ ฅ ๋ฐ›๊ธฐ
188
  query = st.text_input("๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", value="์ •์น˜")
189
 
190
  # ๋ถ„์„ ์‹œ์ž‘ ๋ฒ„ํŠผ
191
  if st.button("๐Ÿ” ๋ถ„์„ ์‹œ์ž‘"):
192
  with st.spinner("๋ถ„์„ ์ค‘..."):
193
+ analysis_results, sentiment_counts = analyze_news_political_viewpoint(query, model, vectorizer)
194
 
195
  if analysis_results:
196
  st.success("๋‰ด์Šค ๋ถ„์„์ด ์™„๋ฃŒ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
197
 
198
+ # ๋‰ด์Šค ๊ธฐ์‚ฌ ๋ชฉ๋ก ํ‘œ์‹œ
 
 
 
 
 
199
  for result in analysis_results:
200
+ st.subheader(result["์ œ๋ชฉ"])
201
+ st.write(f"์„ฑํ–ฅ: {result['์„ฑํ–ฅ']}")
202
+ st.write(f"๊ธฐ์‚ฌ: {result['์›๋ณธ ๊ธฐ์‚ฌ']}")
203
+ st.write(f"[์›๋ณธ ๊ธฐ์‚ฌ ๋ณด๊ธฐ]({result['๋‰ด์Šค ๋งํฌ']})")
204
+ st.write(f"๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ: {result['๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ']}")
205
+ st.markdown("---")
206
+
207
+ # ์„ฑํ–ฅ ๋ถ„ํฌ ์‹œ๊ฐํ™”
208
+ visualize_sentiment_distribution(sentiment_counts)
209
  else:
210
+ st.warning("๊ฒ€์ƒ‰๋œ ๋‰ด์Šค๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")