|
import streamlit as st |
|
import requests |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
from transformers import pipeline |
|
import openai |
|
import os |
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
from sklearn.linear_model import LogisticRegression |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.metrics import accuracy_score |
|
import joblib |
|
|
|
|
|
st.set_page_config(page_title="์ ์น์ ๊ด์ ๋ถ์", page_icon="๐ฐ", layout="wide") |
|
|
|
|
|
openai.api_key = os.getenv("OPENAI_API_KEY") |
|
|
|
|
|
st.markdown( |
|
""" |
|
<style> |
|
body { |
|
font-family: 'Nanum Gothic', sans-serif; |
|
} |
|
</style> |
|
""", |
|
unsafe_allow_html=True |
|
) |
|
|
|
|
|
import matplotlib |
|
matplotlib.rcParams['font.family'] = 'NanumGothic' |
|
matplotlib.rcParams['axes.unicode_minus'] = False |
|
|
|
|
|
def fetch_naver_news(query, display=5): |
|
client_id = "I_8koTJh3R5l4wLurQbG" |
|
client_secret = "W5oWYlAgur" |
|
|
|
url = "https://openapi.naver.com/v1/search/news.json" |
|
headers = { |
|
"X-Naver-Client-Id": client_id, |
|
"X-Naver-Client-Secret": client_secret, |
|
} |
|
params = { |
|
"query": query, |
|
"display": display, |
|
"start": 1, |
|
"sort": "date", |
|
} |
|
|
|
response = requests.get(url, headers=headers, params=params) |
|
if response.status_code == 200: |
|
news_data = response.json() |
|
return news_data['items'] |
|
else: |
|
st.error("๋ด์ค ๋ฐ์ดํฐ๋ฅผ ๋ถ๋ฌ์ค๋ ๋ฐ ์คํจํ์ต๋๋ค.") |
|
return [] |
|
|
|
|
|
def train_ml_model(): |
|
|
|
|
|
data = [ |
|
("์ง๋ณด์ ์ธ ์ ๋ถ ์ ์ฑ
์ ๊ฐํํด์ผ ํ๋ค", "LEFT"), |
|
("๋ณด์์ ์ธ ๊ฒฝ์ ์ ์ฑ
์ด ํ์ํ๋ค", "RIGHT"), |
|
("์ค๋ฆฝ์ ์ธ ์
์ฅ์์ ์ํฉ์ ํ๊ฐํ๋ค", "NEUTRAL") |
|
] |
|
texts, labels = zip(*data) |
|
|
|
|
|
vectorizer = TfidfVectorizer(max_features=1000) |
|
X = vectorizer.fit_transform(texts) |
|
y = labels |
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
|
|
model = LogisticRegression() |
|
model.fit(X_train, y_train) |
|
|
|
|
|
y_pred = model.predict(X_test) |
|
accuracy = accuracy_score(y_test, y_pred) |
|
st.write(f"๋ชจ๋ธ ์ ํ๋: {accuracy:.2f}") |
|
|
|
|
|
joblib.dump(model, 'political_bias_model.pkl') |
|
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl') |
|
|
|
return model, vectorizer |
|
|
|
|
|
def analyze_article_sentiment_ml(text, model, vectorizer): |
|
X = vectorizer.transform([text]) |
|
prediction = model.predict(X)[0] |
|
|
|
|
|
if prediction == "LEFT": |
|
return "์ง๋ณด" |
|
elif prediction == "RIGHT": |
|
return "๋ณด์" |
|
else: |
|
return "์ค๋ฆฝ" |
|
|
|
|
|
def generate_article_gpt4(prompt): |
|
try: |
|
|
|
response = openai.ChatCompletion.create( |
|
model="gpt-4", |
|
messages=[ |
|
{"role": "system", "content": "You are a helpful assistant that generates articles."}, |
|
{"role": "user", "content": prompt} |
|
], |
|
max_tokens=1024, |
|
temperature=0.7 |
|
) |
|
return response['choices'][0]['message']['content'] |
|
except Exception as e: |
|
return f"Error generating text: {e}" |
|
|
|
|
|
def analyze_news_political_viewpoint(query, model, vectorizer): |
|
|
|
news_items = fetch_naver_news(query) |
|
if not news_items: |
|
return [], {} |
|
|
|
results = [] |
|
sentiment_counts = {"์ง๋ณด": 0, "๋ณด์": 0, "์ค๋ฆฝ": 0} |
|
|
|
for item in news_items: |
|
title = item["title"] |
|
description = item["description"] |
|
link = item["link"] |
|
combined_text = f"{title}. {description}" |
|
|
|
|
|
sentiment = analyze_article_sentiment_ml(combined_text, model, vectorizer) |
|
sentiment_counts[sentiment] += 1 |
|
|
|
|
|
opposite_perspective = "๋ณด์์ " if sentiment == "์ง๋ณด" else "์ง๋ณด์ " |
|
prompt = f"{combined_text}๋ฅผ ๊ธฐ๋ฐ์ผ๋ก {opposite_perspective} ๊ด์ ์ ๊ธฐ์ฌ๋ฅผ ์์ฑํด์ฃผ์ธ์." |
|
opposite_article = generate_article_gpt4(prompt) |
|
|
|
results.append({ |
|
"์ ๋ชฉ": title, |
|
"์๋ณธ ๊ธฐ์ฌ": description, |
|
"์ฑํฅ": sentiment, |
|
"๋์กฐ ๊ด์ ๊ธฐ์ฌ": opposite_article, |
|
"๋ด์ค ๋งํฌ": link |
|
}) |
|
|
|
return results, sentiment_counts |
|
|
|
|
|
def visualize_sentiment_distribution(sentiment_counts): |
|
fig, ax = plt.subplots(figsize=(8, 5)) |
|
labels = list(sentiment_counts.keys()) |
|
sizes = list(sentiment_counts.values()) |
|
|
|
|
|
color_palette = sns.color_palette("pastel")[0:len(sizes)] |
|
|
|
ax.bar(labels, sizes, color=color_palette) |
|
ax.set_xlabel('์ฑํฅ', fontsize=14) |
|
ax.set_ylabel('๊ฑด์', fontsize=14) |
|
ax.set_title('๋ด์ค ์ฑํฅ ๋ถํฌ', fontsize=16) |
|
st.pyplot(fig) |
|
|
|
|
|
st.title("๐ฐ ์ ์น์ ๊ด์ ๋น๊ต ๋ถ์ ๋๊ตฌ") |
|
st.markdown("๋ด์ค ๊ธฐ์ฌ์ ์ ์น ์ฑํฅ ๋ถ์๊ณผ ๋ฐ๋ ๊ด์ ๊ธฐ์ฌ๋ฅผ ์์ฑํ์ฌ ๋น๊ตํฉ๋๋ค.") |
|
|
|
|
|
if not os.path.exists('political_bias_model.pkl'): |
|
model, vectorizer = train_ml_model() |
|
else: |
|
model = joblib.load('political_bias_model.pkl') |
|
vectorizer = joblib.load('tfidf_vectorizer.pkl') |
|
|
|
|
|
query = st.text_input("๊ฒ์ ํค์๋๋ฅผ ์
๋ ฅํ์ธ์", value="์ ์น") |
|
|
|
|
|
if st.button("๐ ๋ถ์ ์์"): |
|
with st.spinner("๋ถ์ ์ค..."): |
|
analysis_results, sentiment_counts = analyze_news_political_viewpoint(query, model, vectorizer) |
|
|
|
if analysis_results: |
|
st.success("๋ด์ค ๋ถ์์ด ์๋ฃ๋์์ต๋๋ค.") |
|
|
|
|
|
for result in analysis_results: |
|
st.subheader(result["์ ๋ชฉ"]) |
|
st.write(f"์ฑํฅ: {result['์ฑํฅ']}") |
|
st.write(f"๊ธฐ์ฌ: {result['์๋ณธ ๊ธฐ์ฌ']}") |
|
st.write(f"[์๋ณธ ๊ธฐ์ฌ ๋ณด๊ธฐ]({result['๋ด์ค ๋งํฌ']})") |
|
st.write(f"๋์กฐ ๊ด์ ๊ธฐ์ฌ: {result['๋์กฐ ๊ด์ ๊ธฐ์ฌ']}") |
|
st.markdown("---") |
|
|
|
|
|
visualize_sentiment_distribution(sentiment_counts) |
|
else: |
|
st.warning("๊ฒ์๋ ๋ด์ค๊ฐ ์์ต๋๋ค.") |
|
|