|
import streamlit as st |
|
import requests |
|
from transformers import pipeline |
|
import pandas as pd |
|
|
|
|
|
def fetch_naver_news(query, display=10, start=1, sort="date"): |
|
client_id = "I_8koTJh3R5l4wLurQbG" |
|
client_secret = "W5oWYlAgur" |
|
|
|
url = "https://openapi.naver.com/v1/search/news.json" |
|
headers = { |
|
"X-Naver-Client-Id": client_id, |
|
"X-Naver-Client-Secret": client_secret, |
|
} |
|
params = { |
|
"query": query, |
|
"display": display, |
|
"start": start, |
|
"sort": sort, |
|
} |
|
|
|
response = requests.get(url, headers=headers, params=params) |
|
if response.status_code == 200: |
|
news_data = response.json() |
|
return news_data |
|
else: |
|
raise Exception(f"Error: {response.status_code}, {response.text}") |
|
|
|
|
|
def load_translation_model(): |
|
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en") |
|
return translator |
|
|
|
|
|
def load_huggingface_model(): |
|
classifier = pipeline("text-classification", model="bucketresearch/politicalBiasBERT") |
|
return classifier |
|
|
|
|
|
def classify_political_sentiment(text, classifier): |
|
|
|
result = classifier(text[:512]) |
|
sentiment = result[0] |
|
label = sentiment["label"] |
|
score = sentiment["score"] |
|
|
|
|
|
sentiment_score = score if label == "POSITIVE" else -score |
|
|
|
|
|
progressive_keywords = ["๋ณต์ง", "ํ๋ฑ", "๋ฏผ์ฃผ", "ํ๊ฒฝ", "์ฌํ์ ์ฑ
์"] |
|
conservative_keywords = ["์๋ณด", "์ ํต", "๊ฒฝ์ ", "์ฑ์ฅ", "์ง์", "๊ตญ๋ฐฉ"] |
|
|
|
if any(keyword in text for keyword in progressive_keywords): |
|
return "์ง๋ณด", sentiment_score |
|
elif any(keyword in text for keyword in conservative_keywords): |
|
return "๋ณด์", sentiment_score |
|
else: |
|
return "์ค๋ฆฝ", sentiment_score |
|
|
|
|
|
def analyze_news_political_orientation(news_items, classifier, translator): |
|
results = {"์ง๋ณด": 0, "๋ณด์": 0, "์ค๋ฆฝ": 0} |
|
detailed_results = [] |
|
|
|
for item in news_items: |
|
title = item["title"] |
|
description = item["description"] |
|
combined_text = f"{title}. {description}" |
|
|
|
|
|
translated_text = translator(combined_text)[0]['translation_text'] |
|
|
|
|
|
orientation, score = classify_political_sentiment(translated_text, classifier) |
|
results[orientation] += 1 |
|
detailed_results.append({ |
|
"์ ๋ชฉ": title, |
|
"์์ฝ": description, |
|
"์ฑํฅ": orientation, |
|
"์ ์": score, |
|
"๋งํฌ": item["link"] |
|
}) |
|
|
|
return results, detailed_results |
|
|
|
|
|
st.title("์ ์น ์ฑํฅ ๋ถ์ ๋์๋ณด๋") |
|
st.markdown("### ๋ค์ด๋ฒ ๋ด์ค ๋ฐ์ดํฐ๋ฅผ ์ค์๊ฐ์ผ๋ก ์์งํ๊ณ ์ ์น ์ฑํฅ์ ๋ถ์ํฉ๋๋ค.") |
|
|
|
|
|
query = st.text_input("๊ฒ์ ํค์๋๋ฅผ ์
๋ ฅํ์ธ์", value="์ ์น") |
|
|
|
|
|
|
|
|
|
if st.button("๋ถ์ ์์"): |
|
with st.spinner("๋ฐ์ดํฐ๋ฅผ ๋ถ์ ์ค์
๋๋ค..."): |
|
try: |
|
|
|
news_data = fetch_naver_news(query, display=10) |
|
|
|
|
|
if not news_data or not news_data.get("items"): |
|
st.error("๋ด์ค ๋ฐ์ดํฐ๋ฅผ ๋ถ๋ฌ์ค๋ ๋ฐ ์คํจํ์ต๋๋ค.") |
|
return |
|
|
|
news_items = news_data["items"] |
|
|
|
|
|
classifier = load_huggingface_model() |
|
translator = load_translation_model() |
|
|
|
|
|
results, detailed_results = analyze_news_political_orientation(news_items, classifier, translator) |
|
|
|
|
|
st.subheader("๋ถ์ ๊ฒฐ๊ณผ ์์ฝ") |
|
st.write(f"์ง๋ณด: {results['์ง๋ณด']}๊ฑด") |
|
st.write(f"๋ณด์: {results['๋ณด์']}๊ฑด") |
|
st.write(f"์ค๋ฆฝ: {results['์ค๋ฆฝ']}๊ฑด") |
|
|
|
|
|
st.subheader("์ฑํฅ ๋ถํฌ ์ฐจํธ") |
|
st.bar_chart(pd.DataFrame.from_dict(results, orient='index', columns=["๊ฑด์"])) |
|
|
|
|
|
st.subheader("์ธ๋ถ ๊ฒฐ๊ณผ") |
|
df = pd.DataFrame(detailed_results) |
|
st.dataframe(df) |
|
|
|
|
|
st.subheader("๋ด์ค ๋งํฌ") |
|
for index, row in df.iterrows(): |
|
st.write(f"- [{row['์ ๋ชฉ']}]({row['๋งํฌ']}) (์ฑํฅ: {row['์ฑํฅ']}, ์ ์: {row['์ ์']:.2f})") |
|
|
|
except Exception as e: |
|
st.error(f"์ค๋ฅ ๋ฐ์: {e}") |
|
|