File size: 4,136 Bytes
bfd4ab7
 
 
 
2e4d46c
bfd4ab7
 
 
 
2e4d46c
bfd4ab7
 
 
 
 
 
 
 
259f7ca
d49f79c
bfd4ab7
 
2e4d46c
 
 
 
 
 
71dba8d
bfd4ab7
 
 
 
 
 
2e4d46c
bfd4ab7
2e4d46c
bfd4ab7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2e4d46c
71dba8d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bfd4ab7
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import streamlit as st
import pandas as pd
import requests
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import os

# ๋”ฅ๋Ÿฌ๋‹ ๋ชจ๋ธ ๋กœ๋“œ
@st.cache_resource
def load_model():
    model_name = "bucketresearch/politicalBiasBERT"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    return pipeline("text-classification", model=model, tokenizer=tokenizer)

# ๋„ค์ด๋ฒ„ ๋‰ด์Šค API ํ˜ธ์ถœ
def fetch_naver_news(query, display=5):
    url = "https://openapi.naver.com/v1/search/news.json"
    headers = {
        "X-Naver-Client-Id": os.getenv("I_8koTJh3R5l4wLurQbG"),
        "X-Naver-Client-Secret": os.getenv("W5oWYlAgur"),
    }
    params = {"query": query, "display": display, "sort": "sim"}
    try:
        response = requests.get(url, headers=headers, params=params)
        response.raise_for_status()  # HTTP ์˜ค๋ฅ˜ ์ฒ˜๋ฆฌ
        return response.json()
    except requests.exceptions.RequestException as e:
        st.error(f"API ํ˜ธ์ถœ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
        return None  # ๋‰ด์Šค ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์œผ๋ฉด None ๋ฐ˜ํ™˜

# ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜
def classify_sentiment(text, classifier):
    result = classifier(text, truncation=True, max_length=512)
    label = result[0]['label']
    score = result[0]['score']
    if label in ['LABEL_0', 'LABEL_1']:  # ๋ผ๋ฒจ์— ๋”ฐ๋ผ ์ˆ˜์ • ํ•„์š”
        return "๋ณด์ˆ˜", score
    elif label in ['LABEL_4']:  # ๋ผ๋ฒจ์— ๋”ฐ๋ผ ์ˆ˜์ • ํ•„์š”
        return "์ง„๋ณด", score
    else:
        return "์ค‘๋ฆฝ", score

# ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๋ถ„์„
def analyze_news(news_items, classifier):
    results = {"์ง„๋ณด": 0, "๋ณด์ˆ˜": 0, "์ค‘๋ฆฝ": 0}
    detailed_results = []

    for item in news_items:
        title = item["title"]
        description = item["description"]
        link = item["link"]
        combined_text = f"{title}. {description}"

        # ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜
        orientation, score = classify_sentiment(combined_text, classifier)
        results[orientation] += 1
        detailed_results.append({
            "์ œ๋ชฉ": title,
            "์š”์•ฝ": description,
            "๋งํฌ": link,
            "์„ฑํ–ฅ": orientation,
            "์ ์ˆ˜": score,
        })

    return results, detailed_results

# Streamlit ์•ฑ ์‹œ์ž‘
st.title("์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„ ๋Œ€์‹œ๋ณด๋“œ")
st.markdown("### ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ฐ์ดํ„ฐ๋ฅผ ์‹ค์‹œ๊ฐ„์œผ๋กœ ์ˆ˜์ง‘ํ•˜๊ณ  ์ •์น˜ ์„ฑํ–ฅ์„ ๋ถ„์„ํ•ฉ๋‹ˆ๋‹ค.")

# ๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ ์ž…๋ ฅ
query = st.text_input("๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", value="์ •์น˜")

if st.button("๋ถ„์„ ์‹œ์ž‘"):
    with st.spinner("๋ฐ์ดํ„ฐ๋ฅผ ๋ถ„์„ ์ค‘์ž…๋‹ˆ๋‹ค..."):
        try:
            # ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘
            news_data = fetch_naver_news(query, display=10)
            if news_data is None:
                st.error("๋‰ด์Šค ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
            else:
                news_items = news_data["items"]

                # ๋ชจ๋ธ ๋กœ๋“œ
                classifier = load_model()

                # ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๋ถ„์„
                results, detailed_results = analyze_news(news_items, classifier)

                # ๋ถ„์„ ๊ฒฐ๊ณผ ์‹œ๊ฐํ™”
                st.subheader("๋ถ„์„ ๊ฒฐ๊ณผ ์š”์•ฝ")
                st.write(f"์ง„๋ณด: {results['์ง„๋ณด']}๊ฑด")
                st.write(f"๋ณด์ˆ˜: {results['๋ณด์ˆ˜']}๊ฑด")
                st.write(f"์ค‘๋ฆฝ: {results['์ค‘๋ฆฝ']}๊ฑด")

                # ํŒŒ์ด ์ฐจํŠธ
                st.subheader("์„ฑํ–ฅ ๋ถ„ํฌ ์ฐจํŠธ")
                st.bar_chart(pd.DataFrame.from_dict(results, orient='index', columns=["๊ฑด์ˆ˜"]))

                # ์„ธ๋ถ€ ๊ฒฐ๊ณผ ์ถœ๋ ฅ
                st.subheader("์„ธ๋ถ€ ๊ฒฐ๊ณผ")
                df = pd.DataFrame(detailed_results)
                st.dataframe(df)

                # ๋งํฌ ํฌํ•จํ•œ ๋‰ด์Šค ์ถœ๋ ฅ
                st.subheader("๋‰ด์Šค ๋งํฌ")
                for index, row in df.iterrows():
                    st.write(f"- [{row['์ œ๋ชฉ']}]({row['๋งํฌ']}) (์„ฑํ–ฅ: {row['์„ฑํ–ฅ']}, ์ ์ˆ˜: {row['์ ์ˆ˜']:.2f})")
        except Exception as e:
            st.error(f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")