File size: 4,136 Bytes
bfd4ab7 2e4d46c bfd4ab7 2e4d46c bfd4ab7 259f7ca d49f79c bfd4ab7 2e4d46c 71dba8d bfd4ab7 2e4d46c bfd4ab7 2e4d46c bfd4ab7 2e4d46c 71dba8d bfd4ab7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import streamlit as st
import pandas as pd
import requests
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import os
# ๋ฅ๋ฌ๋ ๋ชจ๋ธ ๋ก๋
@st.cache_resource
def load_model():
model_name = "bucketresearch/politicalBiasBERT"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
return pipeline("text-classification", model=model, tokenizer=tokenizer)
# ๋ค์ด๋ฒ ๋ด์ค API ํธ์ถ
def fetch_naver_news(query, display=5):
url = "https://openapi.naver.com/v1/search/news.json"
headers = {
"X-Naver-Client-Id": os.getenv("I_8koTJh3R5l4wLurQbG"),
"X-Naver-Client-Secret": os.getenv("W5oWYlAgur"),
}
params = {"query": query, "display": display, "sort": "sim"}
try:
response = requests.get(url, headers=headers, params=params)
response.raise_for_status() # HTTP ์ค๋ฅ ์ฒ๋ฆฌ
return response.json()
except requests.exceptions.RequestException as e:
st.error(f"API ํธ์ถ ์ค ์ค๋ฅ ๋ฐ์: {e}")
return None # ๋ด์ค ๋ฐ์ดํฐ๊ฐ ์์ผ๋ฉด None ๋ฐํ
# ์ ์น ์ฑํฅ ๋ถ๋ฅ
def classify_sentiment(text, classifier):
result = classifier(text, truncation=True, max_length=512)
label = result[0]['label']
score = result[0]['score']
if label in ['LABEL_0', 'LABEL_1']: # ๋ผ๋ฒจ์ ๋ฐ๋ผ ์์ ํ์
return "๋ณด์", score
elif label in ['LABEL_4']: # ๋ผ๋ฒจ์ ๋ฐ๋ผ ์์ ํ์
return "์ง๋ณด", score
else:
return "์ค๋ฆฝ", score
# ๋ด์ค ๋ฐ์ดํฐ ๋ถ์
def analyze_news(news_items, classifier):
results = {"์ง๋ณด": 0, "๋ณด์": 0, "์ค๋ฆฝ": 0}
detailed_results = []
for item in news_items:
title = item["title"]
description = item["description"]
link = item["link"]
combined_text = f"{title}. {description}"
# ์ ์น ์ฑํฅ ๋ถ๋ฅ
orientation, score = classify_sentiment(combined_text, classifier)
results[orientation] += 1
detailed_results.append({
"์ ๋ชฉ": title,
"์์ฝ": description,
"๋งํฌ": link,
"์ฑํฅ": orientation,
"์ ์": score,
})
return results, detailed_results
# Streamlit ์ฑ ์์
st.title("์ ์น ์ฑํฅ ๋ถ์ ๋์๋ณด๋")
st.markdown("### ๋ค์ด๋ฒ ๋ด์ค ๋ฐ์ดํฐ๋ฅผ ์ค์๊ฐ์ผ๋ก ์์งํ๊ณ ์ ์น ์ฑํฅ์ ๋ถ์ํฉ๋๋ค.")
# ๊ฒ์ ํค์๋ ์
๋ ฅ
query = st.text_input("๊ฒ์ ํค์๋๋ฅผ ์
๋ ฅํ์ธ์", value="์ ์น")
if st.button("๋ถ์ ์์"):
with st.spinner("๋ฐ์ดํฐ๋ฅผ ๋ถ์ ์ค์
๋๋ค..."):
try:
# ๋ค์ด๋ฒ ๋ด์ค ๋ฐ์ดํฐ ์์ง
news_data = fetch_naver_news(query, display=10)
if news_data is None:
st.error("๋ด์ค ๋ฐ์ดํฐ๊ฐ ์์ต๋๋ค.")
else:
news_items = news_data["items"]
# ๋ชจ๋ธ ๋ก๋
classifier = load_model()
# ๋ด์ค ๋ฐ์ดํฐ ๋ถ์
results, detailed_results = analyze_news(news_items, classifier)
# ๋ถ์ ๊ฒฐ๊ณผ ์๊ฐํ
st.subheader("๋ถ์ ๊ฒฐ๊ณผ ์์ฝ")
st.write(f"์ง๋ณด: {results['์ง๋ณด']}๊ฑด")
st.write(f"๋ณด์: {results['๋ณด์']}๊ฑด")
st.write(f"์ค๋ฆฝ: {results['์ค๋ฆฝ']}๊ฑด")
# ํ์ด ์ฐจํธ
st.subheader("์ฑํฅ ๋ถํฌ ์ฐจํธ")
st.bar_chart(pd.DataFrame.from_dict(results, orient='index', columns=["๊ฑด์"]))
# ์ธ๋ถ ๊ฒฐ๊ณผ ์ถ๋ ฅ
st.subheader("์ธ๋ถ ๊ฒฐ๊ณผ")
df = pd.DataFrame(detailed_results)
st.dataframe(df)
# ๋งํฌ ํฌํจํ ๋ด์ค ์ถ๋ ฅ
st.subheader("๋ด์ค ๋งํฌ")
for index, row in df.iterrows():
st.write(f"- [{row['์ ๋ชฉ']}]({row['๋งํฌ']}) (์ฑํฅ: {row['์ฑํฅ']}, ์ ์: {row['์ ์']:.2f})")
except Exception as e:
st.error(f"์ค๋ฅ ๋ฐ์: {e}")
|