|
import streamlit as st |
|
import pandas as pd |
|
import requests |
|
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification |
|
import os |
|
|
|
|
|
@st.cache_resource |
|
def load_model(): |
|
model_name = "bucketresearch/politicalBiasBERT" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForSequenceClassification.from_pretrained(model_name) |
|
return pipeline("text-classification", model=model, tokenizer=tokenizer) |
|
|
|
|
|
def fetch_naver_news(query, display=5): |
|
url = "https://openapi.naver.com/v1/search/news.json" |
|
headers = { |
|
"X-Naver-Client-Id": "I_8koTJh3R5l4wLurQbG", |
|
"X-Naver-Client-Secret": "W5oWYlAgur", |
|
} |
|
|
|
params = {"query": query, "display": display, "sort": "sim"} |
|
try: |
|
response = requests.get(url, headers=headers, params=params) |
|
response.raise_for_status() |
|
return response.json() |
|
except requests.exceptions.RequestException as e: |
|
st.error(f"API ํธ์ถ ์ค ์ค๋ฅ ๋ฐ์: {e}") |
|
return None |
|
|
|
|
|
def classify_sentiment(text, classifier): |
|
result = classifier(text, truncation=True, max_length=512) |
|
label = result[0]['label'] |
|
score = result[0]['score'] |
|
if label in ['LABEL_0', 'LABEL_1']: |
|
return "๋ณด์", score |
|
elif label in ['LABEL_4']: |
|
return "์ง๋ณด", score |
|
else: |
|
return "์ค๋ฆฝ", score |
|
|
|
|
|
def analyze_news(news_items, classifier): |
|
results = {"์ง๋ณด": 0, "๋ณด์": 0, "์ค๋ฆฝ": 0} |
|
detailed_results = [] |
|
|
|
for item in news_items: |
|
title = item["title"] |
|
description = item["description"] |
|
link = item["link"] |
|
combined_text = f"{title}. {description}" |
|
|
|
|
|
orientation, score = classify_sentiment(combined_text, classifier) |
|
results[orientation] += 1 |
|
detailed_results.append({ |
|
"์ ๋ชฉ": title, |
|
"์์ฝ": description, |
|
"๋งํฌ": link, |
|
"์ฑํฅ": orientation, |
|
"์ ์": score, |
|
}) |
|
|
|
return results, detailed_results |
|
|
|
|
|
st.title("์ ์น ์ฑํฅ ๋ถ์ ๋์๋ณด๋") |
|
st.markdown("### ๋ค์ด๋ฒ ๋ด์ค ๋ฐ์ดํฐ๋ฅผ ์ค์๊ฐ์ผ๋ก ์์งํ๊ณ ์ ์น ์ฑํฅ์ ๋ถ์ํฉ๋๋ค.") |
|
|
|
|
|
query = st.text_input("๊ฒ์ ํค์๋๋ฅผ ์
๋ ฅํ์ธ์", value="์ ์น") |
|
|
|
if st.button("๋ถ์ ์์"): |
|
with st.spinner("๋ฐ์ดํฐ๋ฅผ ๋ถ์ ์ค์
๋๋ค..."): |
|
try: |
|
|
|
news_data = fetch_naver_news(query, display=10) |
|
if news_data is None: |
|
st.error("๋ด์ค ๋ฐ์ดํฐ๊ฐ ์์ต๋๋ค.") |
|
else: |
|
news_items = news_data["items"] |
|
|
|
|
|
classifier = load_model() |
|
|
|
|
|
results, detailed_results = analyze_news(news_items, classifier) |
|
|
|
|
|
st.subheader("๋ถ์ ๊ฒฐ๊ณผ ์์ฝ") |
|
st.write(f"์ง๋ณด: {results['์ง๋ณด']}๊ฑด") |
|
st.write(f"๋ณด์: {results['๋ณด์']}๊ฑด") |
|
st.write(f"์ค๋ฆฝ: {results['์ค๋ฆฝ']}๊ฑด") |
|
|
|
|
|
st.subheader("์ฑํฅ ๋ถํฌ ์ฐจํธ") |
|
st.bar_chart(pd.DataFrame.from_dict(results, orient='index', columns=["๊ฑด์"])) |
|
|
|
|
|
st.subheader("์ธ๋ถ ๊ฒฐ๊ณผ") |
|
df = pd.DataFrame(detailed_results) |
|
st.dataframe(df) |
|
|
|
|
|
st.subheader("๋ด์ค ๋งํฌ") |
|
for index, row in df.iterrows(): |
|
st.write(f"- [{row['์ ๋ชฉ']}]({row['๋งํฌ']}) (์ฑํฅ: {row['์ฑํฅ']}, ์ ์: {row['์ ์']:.2f})") |
|
except Exception as e: |
|
st.error(f"์ค๋ฅ ๋ฐ์: {e}") |
|
|