Test / app.py
820nam's picture
Update app.py
12cb09e verified
raw
history blame
5.22 kB
import streamlit as st
import requests
from transformers import pipeline
import pandas as pd
# Step 1: ๋„ค์ด๋ฒ„ ๋‰ด์Šค API ํ˜ธ์ถœ ํ•จ์ˆ˜
def fetch_naver_news(query, display=10, start=1, sort="date"):
client_id = "I_8koTJh3R5l4wLurQbG" # ๋„ค์ด๋ฒ„ ๊ฐœ๋ฐœ์ž ์„ผํ„ฐ์—์„œ ๋ฐœ๊ธ‰๋ฐ›์€ Client ID
client_secret = "W5oWYlAgur" # ๋„ค์ด๋ฒ„ ๊ฐœ๋ฐœ์ž ์„ผํ„ฐ์—์„œ ๋ฐœ๊ธ‰๋ฐ›์€ Client Secret
url = "https://openapi.naver.com/v1/search/news.json"
headers = {
"X-Naver-Client-Id": client_id,
"X-Naver-Client-Secret": client_secret,
}
params = {
"query": query,
"display": display,
"start": start,
"sort": sort,
}
response = requests.get(url, headers=headers, params=params)
if response.status_code == 200:
news_data = response.json()
return news_data
else:
raise Exception(f"Error: {response.status_code}, {response.text}")
# Step 2: Hugging Face ๋ฒˆ์—ญ ๋ชจ๋ธ ๋กœ๋“œ (ํ•œ๊ตญ์–ด -> ์˜์–ด)
def load_translation_model():
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
return translator
# Step 3: Hugging Face ์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„ ๋ชจ๋ธ ๋กœ๋“œ
def load_huggingface_model():
classifier = pipeline("text-classification", model="bucketresearch/politicalBiasBERT")
return classifier
# Step 4: ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜ ํ•จ์ˆ˜
def classify_political_sentiment(text, classifier):
# ๊ฐ์„ฑ ๋ถ„์„ ์‹คํ–‰
result = classifier(text[:512]) # ์ž…๋ ฅ์ด ๋„ˆ๋ฌด ๊ธธ๋ฉด ์ž˜๋ผ์„œ ๋ถ„์„
sentiment = result[0]
label = sentiment["label"]
score = sentiment["score"]
# ์ ์ˆ˜ํ™”
sentiment_score = score if label == "POSITIVE" else -score
# ํ‚ค์›Œ๋“œ ๊ธฐ๋ฐ˜ ๋ถ„๋ฅ˜ (์ง„๋ณด/๋ณด์ˆ˜)
progressive_keywords = ["๋ณต์ง€", "ํ‰๋“ฑ", "๋ฏผ์ฃผ", "ํ™˜๊ฒฝ", "์‚ฌํšŒ์  ์ฑ…์ž„"]
conservative_keywords = ["์•ˆ๋ณด", "์ „ํ†ต", "๊ฒฝ์ œ", "์„ฑ์žฅ", "์งˆ์„œ", "๊ตญ๋ฐฉ"]
if any(keyword in text for keyword in progressive_keywords):
return "์ง„๋ณด", sentiment_score
elif any(keyword in text for keyword in conservative_keywords):
return "๋ณด์ˆ˜", sentiment_score
else:
return "์ค‘๋ฆฝ", sentiment_score
# Step 5: ๋‰ด์Šค ๋ถ„์„ ๋ฐ ๊ฒฐ๊ณผ ์ถœ๋ ฅ
def analyze_news_political_orientation(news_items, classifier, translator):
results = {"์ง„๋ณด": 0, "๋ณด์ˆ˜": 0, "์ค‘๋ฆฝ": 0}
detailed_results = []
for item in news_items:
title = item["title"]
description = item["description"]
combined_text = f"{title}. {description}"
# ๋ฒˆ์—ญ: ํ•œ๊ตญ์–ด -> ์˜์–ด
translated_text = translator(combined_text)[0]['translation_text']
# ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜
orientation, score = classify_political_sentiment(translated_text, classifier)
results[orientation] += 1
detailed_results.append({
"์ œ๋ชฉ": title,
"์š”์•ฝ": description,
"์„ฑํ–ฅ": orientation,
"์ ์ˆ˜": score,
"๋งํฌ": item["link"]
})
return results, detailed_results
# Streamlit ์•ฑ ์‹œ์ž‘
st.title("์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„ ๋Œ€์‹œ๋ณด๋“œ")
st.markdown("### ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ฐ์ดํ„ฐ๋ฅผ ์‹ค์‹œ๊ฐ„์œผ๋กœ ์ˆ˜์ง‘ํ•˜๊ณ  ์ •์น˜ ์„ฑํ–ฅ์„ ๋ถ„์„ํ•ฉ๋‹ˆ๋‹ค.")
# ๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ ์ž…๋ ฅ
query = st.text_input("๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", value="์ •์น˜")
# ๋ถ„์„ ์‹œ์ž‘ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ๋™์ž‘
# ๋ถ„์„ ์‹œ์ž‘ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ๋™์ž‘
# ๋ถ„์„ ์‹œ์ž‘ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ๋™์ž‘
if st.button("๋ถ„์„ ์‹œ์ž‘"):
with st.spinner("๋ฐ์ดํ„ฐ๋ฅผ ๋ถ„์„ ์ค‘์ž…๋‹ˆ๋‹ค..."):
try:
# ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘
news_data = fetch_naver_news(query, display=10)
# ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘๋œ ์ƒํƒœ ํ™•์ธ
if not news_data or not news_data.get("items"):
st.error("๋‰ด์Šค ๋ฐ์ดํ„ฐ๋ฅผ ๋ถˆ๋Ÿฌ์˜ค๋Š” ๋ฐ ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค.")
return # ์ด ์ค„์„ ์‚ญ์ œํ•˜๊ณ , ์˜ค๋ฅ˜ ๋ฉ”์‹œ์ง€๋กœ ๋Œ€์ฒดํ•ฉ๋‹ˆ๋‹ค.
news_items = news_data["items"]
# Hugging Face ๋ชจ๋ธ ๋กœ๋“œ
classifier = load_huggingface_model()
translator = load_translation_model()
# ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๋ถ„์„
results, detailed_results = analyze_news_political_orientation(news_items, classifier, translator)
# ๋ถ„์„ ๊ฒฐ๊ณผ ์‹œ๊ฐํ™”
st.subheader("๋ถ„์„ ๊ฒฐ๊ณผ ์š”์•ฝ")
st.write(f"์ง„๋ณด: {results['์ง„๋ณด']}๊ฑด")
st.write(f"๋ณด์ˆ˜: {results['๋ณด์ˆ˜']}๊ฑด")
st.write(f"์ค‘๋ฆฝ: {results['์ค‘๋ฆฝ']}๊ฑด")
# ์„ฑํ–ฅ ๋ถ„ํฌ ์ฐจํŠธ
st.subheader("์„ฑํ–ฅ ๋ถ„ํฌ ์ฐจํŠธ")
st.bar_chart(pd.DataFrame.from_dict(results, orient='index', columns=["๊ฑด์ˆ˜"]))
# ์„ธ๋ถ€ ๊ฒฐ๊ณผ ์ถœ๋ ฅ
st.subheader("์„ธ๋ถ€ ๊ฒฐ๊ณผ")
df = pd.DataFrame(detailed_results)
st.dataframe(df)
# ๋งํฌ ํฌํ•จํ•œ ๋‰ด์Šค ์ถœ๋ ฅ
st.subheader("๋‰ด์Šค ๋งํฌ")
for index, row in df.iterrows():
st.write(f"- [{row['์ œ๋ชฉ']}]({row['๋งํฌ']}) (์„ฑํ–ฅ: {row['์„ฑํ–ฅ']}, ์ ์ˆ˜: {row['์ ์ˆ˜']:.2f})")
except Exception as e:
st.error(f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")