Test / app.py
820nam's picture
Update app.py
80a6db2 verified
raw
history blame
7.73 kB
import streamlit as st
import requests
import matplotlib.pyplot as plt
import seaborn as sns
from transformers import pipeline
import openai
import os
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib
# Streamlit ํŽ˜์ด์ง€ ์„ค์ •์„ ๊ฐ€์žฅ ๋จผ์ € ํ˜ธ์ถœ
st.set_page_config(page_title="์ •์น˜์  ๊ด€์  ๋ถ„์„", page_icon="๐Ÿ“ฐ", layout="wide")
# OpenAI API ํ‚ค ์„ค์ • (ํ™˜๊ฒฝ๋ณ€์ˆ˜์—์„œ ๊ฐ€์ ธ์˜ค๋Š” ๋ฐฉ๋ฒ•)
openai.api_key = os.getenv("OPENAI_API_KEY")
# ํ•œ๊ธ€ ํฐํŠธ ์„ค์ • (Streamlit์—์„œ ์ ์šฉํ•˜๊ธฐ ์œ„ํ•ด CSS ์ถ”๊ฐ€)
st.markdown(
"""
<style>
body {
font-family: 'Nanum Gothic', sans-serif;
}
</style>
""",
unsafe_allow_html=True
)
# matplotlib ํ•œ๊ธ€ ํฐํŠธ ์„ค์ •
import matplotlib
matplotlib.rcParams['font.family'] = 'NanumGothic' # ํ•œ๊ธ€ ํฐํŠธ๋ฅผ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค.
matplotlib.rcParams['axes.unicode_minus'] = False # ๋งˆ์ด๋„ˆ์Šค ๊ธฐํ˜ธ ๊นจ์ง ๋ฐฉ์ง€
# ๋„ค์ด๋ฒ„ ๋‰ด์Šค API๋ฅผ ํ†ตํ•ด ์‹ค์ œ ๋‰ด์Šค ๊ธฐ์‚ฌ ๊ฐ€์ ธ์˜ค๊ธฐ
def fetch_naver_news(query, display=5):
client_id = "I_8koTJh3R5l4wLurQbG" # ๋„ค์ด๋ฒ„ ๊ฐœ๋ฐœ์ž ์„ผํ„ฐ์—์„œ ๋ฐœ๊ธ‰๋ฐ›์€ Client ID
client_secret = "W5oWYlAgur" # ๋„ค์ด๋ฒ„ ๊ฐœ๋ฐœ์ž ์„ผํ„ฐ์—์„œ ๋ฐœ๊ธ‰๋ฐ›์€ Client Secret
url = "https://openapi.naver.com/v1/search/news.json"
headers = {
"X-Naver-Client-Id": client_id,
"X-Naver-Client-Secret": client_secret,
}
params = {
"query": query,
"display": display,
"start": 1,
"sort": "date", # ์ตœ์‹ ์ˆœ์œผ๋กœ ์ •๋ ฌ
}
response = requests.get(url, headers=headers, params=params)
if response.status_code == 200:
news_data = response.json()
return news_data['items'] # ๋‰ด์Šค ๊ธฐ์‚ฌ ๋ฆฌ์ŠคํŠธ ๋ฐ˜ํ™˜
else:
st.error("๋‰ด์Šค ๋ฐ์ดํ„ฐ๋ฅผ ๋ถˆ๋Ÿฌ์˜ค๋Š” ๋ฐ ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค.")
return []
# ๋จธ์‹ ๋Ÿฌ๋‹ ๋ชจ๋ธ ๋กœ๋“œ ๋ฐ ํ•™์Šต
def train_ml_model():
# ์—ฌ๊ธฐ์„œ๋Š” ์ƒ˜ํ”Œ ๋ฐ์ดํ„ฐ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ํ•™์Šต
# ์‹ค์ œ ๋ฐ์ดํ„ฐ๋ฅผ ์ด์šฉํ•œ ํ•™์Šต ๊ณผ์ •์ด ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค.
data = [
("์ง„๋ณด์ ์ธ ์ •๋ถ€ ์ •์ฑ…์„ ๊ฐ•ํ™”ํ•ด์•ผ ํ•œ๋‹ค", "LEFT"),
("๋ณด์ˆ˜์ ์ธ ๊ฒฝ์ œ ์ •์ฑ…์ด ํ•„์š”ํ•˜๋‹ค", "RIGHT"),
("์ค‘๋ฆฝ์ ์ธ ์ž…์žฅ์—์„œ ์ƒํ™ฉ์„ ํ‰๊ฐ€ํ•œ๋‹ค", "NEUTRAL")
]
texts, labels = zip(*data)
# TF-IDF ๋ฒกํ„ฐํ™”
vectorizer = TfidfVectorizer(max_features=1000)
X = vectorizer.fit_transform(texts)
y = labels
# ํ›ˆ๋ จ ๋ฐ ํ…Œ์ŠคํŠธ ๋ฐ์ดํ„ฐ ๋‚˜๋ˆ„๊ธฐ
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# ๋กœ์ง€์Šคํ‹ฑ ํšŒ๊ท€ ๋ชจ๋ธ ํ•™์Šต
model = LogisticRegression()
model.fit(X_train, y_train)
# ๋ชจ๋ธ ์„ฑ๋Šฅ ํ‰๊ฐ€
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
st.write(f"๋ชจ๋ธ ์ •ํ™•๋„: {accuracy:.2f}")
# ๋ชจ๋ธ ์ €์žฅ
joblib.dump(model, 'political_bias_model.pkl')
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')
return model, vectorizer
# ๋กœ๋“œ๋œ ๋จธ์‹ ๋Ÿฌ๋‹ ๋ชจ๋ธ๋กœ ์„ฑํ–ฅ ๋ถ„์„
def analyze_article_sentiment_ml(text, model, vectorizer):
X = vectorizer.transform([text])
prediction = model.predict(X)[0]
# ์„ฑํ–ฅ์— ๋”ฐ๋ฅธ ๋ ˆ์ด๋ธ” ๋ฐ˜ํ™˜
if prediction == "LEFT":
return "์ง„๋ณด"
elif prediction == "RIGHT":
return "๋ณด์ˆ˜"
else:
return "์ค‘๋ฆฝ"
# GPT-4๋ฅผ ์ด์šฉํ•ด ๋ฐ˜๋Œ€ ๊ด€์  ๊ธฐ์‚ฌ ์ƒ์„ฑ
def generate_article_gpt4(prompt):
try:
# GPT-4 ๋ชจ๋ธ์„ ์ด์šฉํ•ด ๋ฐ˜๋Œ€ ๊ด€์  ๊ธฐ์‚ฌ๋ฅผ ์ƒ์„ฑ
response = openai.ChatCompletion.create(
model="gpt-4", # GPT-4 ๋ชจ๋ธ์„ ์‚ฌ์šฉ
messages=[
{"role": "system", "content": "You are a helpful assistant that generates articles."},
{"role": "user", "content": prompt} # ์‚ฌ์šฉ์ž๊ฐ€ ์ œ๊ณตํ•œ ํ”„๋กฌํ”„ํŠธ
],
max_tokens=1024, # ๊ธ€์ž ์ˆ˜ ์ œํ•œ ํ•ด์ œ (์ตœ๋Œ€ 1024 ํ† ํฐ)
temperature=0.7 # ์ฐฝ์˜์„ฑ ์ •๋„
)
return response['choices'][0]['message']['content'] # GPT์˜ ์‘๋‹ต ํ…์ŠคํŠธ ๋ฐ˜ํ™˜
except Exception as e:
return f"Error generating text: {e}"
# ์ •์น˜์  ๊ด€์  ๋น„๊ต ๋ฐ ๋ฐ˜๋Œ€ ๊ด€์  ์ƒ์„ฑ
def analyze_news_political_viewpoint(query, model, vectorizer):
# ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๊ฐ€์ ธ์˜ค๊ธฐ
news_items = fetch_naver_news(query)
if not news_items:
return [], {}
results = []
sentiment_counts = {"์ง„๋ณด": 0, "๋ณด์ˆ˜": 0, "์ค‘๋ฆฝ": 0} # ๋งคํ•‘๋œ ๋ผ๋ฒจ์— ๋งž๊ฒŒ ์ดˆ๊ธฐํ™”
for item in news_items:
title = item["title"]
description = item["description"]
link = item["link"] # ๋‰ด์Šค ๋งํฌ ๊ฐ€์ ธ์˜ค๊ธฐ
combined_text = f"{title}. {description}"
# ๋จธ์‹ ๋Ÿฌ๋‹ ๋ชจ๋ธ์„ ์ด์šฉํ•œ ์„ฑํ–ฅ ๋ถ„์„
sentiment = analyze_article_sentiment_ml(combined_text, model, vectorizer)
sentiment_counts[sentiment] += 1 # ๋งคํ•‘๋œ ํ‚ค๋กœ ์นด์šดํŠธ ์ฆ๊ฐ€
# ๋ฐ˜๋Œ€ ๊ด€์  ๊ธฐ์‚ฌ ์ƒ์„ฑ
opposite_perspective = "๋ณด์ˆ˜์ " if sentiment == "์ง„๋ณด" else "์ง„๋ณด์ "
prompt = f"{combined_text}๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ {opposite_perspective} ๊ด€์ ์˜ ๊ธฐ์‚ฌ๋ฅผ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”."
opposite_article = generate_article_gpt4(prompt)
results.append({
"์ œ๋ชฉ": title,
"์›๋ณธ ๊ธฐ์‚ฌ": description,
"์„ฑํ–ฅ": sentiment,
"๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ": opposite_article,
"๋‰ด์Šค ๋งํฌ": link # ๋งํฌ ์ถ”๊ฐ€
})
return results, sentiment_counts
# ์„ฑํ–ฅ ๋ถ„ํฌ ์‹œ๊ฐํ™” (๋ง‰๋Œ€ ๊ทธ๋ž˜ํ”„)
def visualize_sentiment_distribution(sentiment_counts):
fig, ax = plt.subplots(figsize=(8, 5))
labels = list(sentiment_counts.keys())
sizes = list(sentiment_counts.values())
# ์ƒ‰์ƒ ์„ค์ • (๋ถ€๋“œ๋Ÿฌ์šด ํŒ”๋ ˆํŠธ)
color_palette = sns.color_palette("pastel")[0:len(sizes)]
ax.bar(labels, sizes, color=color_palette)
ax.set_xlabel('์„ฑํ–ฅ', fontsize=14)
ax.set_ylabel('๊ฑด์ˆ˜', fontsize=14)
ax.set_title('๋‰ด์Šค ์„ฑํ–ฅ ๋ถ„ํฌ', fontsize=16)
st.pyplot(fig)
# Streamlit ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜
st.title("๐Ÿ“ฐ ์ •์น˜์  ๊ด€์  ๋น„๊ต ๋ถ„์„ ๋„๊ตฌ")
st.markdown("๋‰ด์Šค ๊ธฐ์‚ฌ์˜ ์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„๊ณผ ๋ฐ˜๋Œ€ ๊ด€์  ๊ธฐ์‚ฌ๋ฅผ ์ƒ์„ฑํ•˜์—ฌ ๋น„๊ตํ•ฉ๋‹ˆ๋‹ค.")
# ๋จธ์‹ ๋Ÿฌ๋‹ ๋ชจ๋ธ ๋กœ๋“œ
if not os.path.exists('political_bias_model.pkl'):
model, vectorizer = train_ml_model()
else:
model = joblib.load('political_bias_model.pkl')
vectorizer = joblib.load('tfidf_vectorizer.pkl')
# ์‚ฌ์šฉ์ž๋กœ๋ถ€ํ„ฐ ๊ฒ€์ƒ‰์–ด ์ž…๋ ฅ ๋ฐ›๊ธฐ
query = st.text_input("๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", value="์ •์น˜")
# ๋ถ„์„ ์‹œ์ž‘ ๋ฒ„ํŠผ
if st.button("๐Ÿ” ๋ถ„์„ ์‹œ์ž‘"):
with st.spinner("๋ถ„์„ ์ค‘..."):
analysis_results, sentiment_counts = analyze_news_political_viewpoint(query, model, vectorizer)
if analysis_results:
st.success("๋‰ด์Šค ๋ถ„์„์ด ์™„๋ฃŒ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
# ๋‰ด์Šค ๊ธฐ์‚ฌ ๋ชฉ๋ก ํ‘œ์‹œ
for result in analysis_results:
st.subheader(result["์ œ๋ชฉ"])
st.write(f"์„ฑํ–ฅ: {result['์„ฑํ–ฅ']}")
st.write(f"๊ธฐ์‚ฌ: {result['์›๋ณธ ๊ธฐ์‚ฌ']}")
st.write(f"[์›๋ณธ ๊ธฐ์‚ฌ ๋ณด๊ธฐ]({result['๋‰ด์Šค ๋งํฌ']})")
st.write(f"๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ: {result['๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ']}")
st.markdown("---")
# ์„ฑํ–ฅ ๋ถ„ํฌ ์‹œ๊ฐํ™”
visualize_sentiment_distribution(sentiment_counts)
else:
st.warning("๊ฒ€์ƒ‰๋œ ๋‰ด์Šค๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")