Test / app.py
820nam's picture
Update app.py
60f9cc3 verified
raw
history blame
4.22 kB
import streamlit as st
import pandas as pd
import requests
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import os
# ๋”ฅ๋Ÿฌ๋‹ ๋ชจ๋ธ ๋กœ๋“œ
@st.cache_resource
def load_model():
model_name = "bucketresearch/politicalBiasBERT"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
return pipeline("text-classification", model=model, tokenizer=tokenizer)
# ๋„ค์ด๋ฒ„ ๋‰ด์Šค API ํ˜ธ์ถœ
def fetch_naver_news(query, display=5):
url = "https://openapi.naver.com/v1/search/news.json"
headers = {
"X-Naver-Client-Id": "I_8koTJh3R5l4wLurQbG", # ์—ฌ๊ธฐ์— ์ง์ ‘ API ํด๋ผ์ด์–ธํŠธ ID ์ž…๋ ฅ
"X-Naver-Client-Secret": "W5oWYlAgur", # ์—ฌ๊ธฐ์— ์ง์ ‘ API ํด๋ผ์ด์–ธํŠธ ๋น„๋ฐ€ํ‚ค ์ž…๋ ฅ
}
params = {"query": query, "display": display, "sort": "sim"}
try:
response = requests.get(url, headers=headers, params=params)
response.raise_for_status() # HTTP ์˜ค๋ฅ˜ ์ฒ˜๋ฆฌ
return response.json()
except requests.exceptions.RequestException as e:
st.error(f"API ํ˜ธ์ถœ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
return None # ๋‰ด์Šค ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์œผ๋ฉด None ๋ฐ˜ํ™˜
# ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜
def classify_sentiment(text, classifier):
result = classifier(text, truncation=True, max_length=512)
label = result[0]['label']
score = result[0]['score']
if label in ['LABEL_0', 'LABEL_1']: # ๋ผ๋ฒจ์— ๋”ฐ๋ผ ์ˆ˜์ • ํ•„์š”
return "๋ณด์ˆ˜", score
elif label in ['LABEL_4']: # ๋ผ๋ฒจ์— ๋”ฐ๋ผ ์ˆ˜์ • ํ•„์š”
return "์ง„๋ณด", score
else:
return "์ค‘๋ฆฝ", score
# ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๋ถ„์„
def analyze_news(news_items, classifier):
results = {"์ง„๋ณด": 0, "๋ณด์ˆ˜": 0, "์ค‘๋ฆฝ": 0}
detailed_results = []
for item in news_items:
title = item["title"]
description = item["description"]
link = item["link"]
combined_text = f"{title}. {description}"
# ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜
orientation, score = classify_sentiment(combined_text, classifier)
results[orientation] += 1
detailed_results.append({
"์ œ๋ชฉ": title,
"์š”์•ฝ": description,
"๋งํฌ": link,
"์„ฑํ–ฅ": orientation,
"์ ์ˆ˜": score,
})
return results, detailed_results
# Streamlit ์•ฑ ์‹œ์ž‘
st.title("์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„ ๋Œ€์‹œ๋ณด๋“œ")
st.markdown("### ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ฐ์ดํ„ฐ๋ฅผ ์‹ค์‹œ๊ฐ„์œผ๋กœ ์ˆ˜์ง‘ํ•˜๊ณ  ์ •์น˜ ์„ฑํ–ฅ์„ ๋ถ„์„ํ•ฉ๋‹ˆ๋‹ค.")
# ๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ ์ž…๋ ฅ
query = st.text_input("๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", value="์ •์น˜")
if st.button("๋ถ„์„ ์‹œ์ž‘"):
with st.spinner("๋ฐ์ดํ„ฐ๋ฅผ ๋ถ„์„ ์ค‘์ž…๋‹ˆ๋‹ค..."):
try:
# ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘
news_data = fetch_naver_news(query, display=10)
if news_data is None:
st.error("๋‰ด์Šค ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
else:
news_items = news_data["items"]
# ๋ชจ๋ธ ๋กœ๋“œ
classifier = load_model()
# ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๋ถ„์„
results, detailed_results = analyze_news(news_items, classifier)
# ๋ถ„์„ ๊ฒฐ๊ณผ ์‹œ๊ฐํ™”
st.subheader("๋ถ„์„ ๊ฒฐ๊ณผ ์š”์•ฝ")
st.write(f"์ง„๋ณด: {results['์ง„๋ณด']}๊ฑด")
st.write(f"๋ณด์ˆ˜: {results['๋ณด์ˆ˜']}๊ฑด")
st.write(f"์ค‘๋ฆฝ: {results['์ค‘๋ฆฝ']}๊ฑด")
# ํŒŒ์ด ์ฐจํŠธ
st.subheader("์„ฑํ–ฅ ๋ถ„ํฌ ์ฐจํŠธ")
st.bar_chart(pd.DataFrame.from_dict(results, orient='index', columns=["๊ฑด์ˆ˜"]))
# ์„ธ๋ถ€ ๊ฒฐ๊ณผ ์ถœ๋ ฅ
st.subheader("์„ธ๋ถ€ ๊ฒฐ๊ณผ")
df = pd.DataFrame(detailed_results)
st.dataframe(df)
# ๋งํฌ ํฌํ•จํ•œ ๋‰ด์Šค ์ถœ๋ ฅ
st.subheader("๋‰ด์Šค ๋งํฌ")
for index, row in df.iterrows():
st.write(f"- [{row['์ œ๋ชฉ']}]({row['๋งํฌ']}) (์„ฑํ–ฅ: {row['์„ฑํ–ฅ']}, ์ ์ˆ˜: {row['์ ์ˆ˜']:.2f})")
except Exception as e:
st.error(f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")