File size: 4,624 Bytes
384f5e4
bfd4ab7
384f5e4
 
717fe8c
384f5e4
717fe8c
 
 
 
bfd4ab7
 
717fe8c
 
 
 
 
 
 
 
bfd4ab7
60f9cc3
717fe8c
 
b45b655
 
bfd4ab7
02d5bf8
 
717fe8c
0d5a75b
ad7b4a8
465e649
717fe8c
 
0d5a75b
717fe8c
384f5e4
ad7b4a8
 
 
 
b45b655
465e649
0d5a75b
ad7b4a8
 
 
 
 
 
 
 
 
a8c6507
0d5a75b
 
bfd4ab7
 
 
 
 
 
 
717fe8c
8cadfb7
0d5a75b
bfd4ab7
384f5e4
 
 
 
 
db118f0
384f5e4
bfd4ab7
 
 
4e47ea7
384f5e4
 
 
 
74f08a5
384f5e4
 
 
 
 
74f08a5
384f5e4
2dd33a7
02d5bf8
2dd33a7
02d5bf8
 
 
74f08a5
02d5bf8
 
74f08a5
0d5a75b
02d5bf8
74f08a5
02d5bf8
 
 
 
 
74f08a5
02d5bf8
 
 
74f08a5
02d5bf8
 
 
 
74f08a5
02d5bf8
 
 
384f5e4
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import streamlit as st
import requests
from transformers import pipeline
import pandas as pd

# Step 1: ๋„ค์ด๋ฒ„ ๋‰ด์Šค API ํ˜ธ์ถœ ํ•จ์ˆ˜
def fetch_naver_news(query, display=10, start=1, sort="date"):
    client_id = "I_8koTJh3R5l4wLurQbG"  # ๋„ค์ด๋ฒ„ ๊ฐœ๋ฐœ์ž ์„ผํ„ฐ์—์„œ ๋ฐœ๊ธ‰๋ฐ›์€ Client ID
    client_secret = "W5oWYlAgur"  # ๋„ค์ด๋ฒ„ ๊ฐœ๋ฐœ์ž ์„ผํ„ฐ์—์„œ ๋ฐœ๊ธ‰๋ฐ›์€ Client Secret

    url = "https://openapi.naver.com/v1/search/news.json"
    headers = {
        "X-Naver-Client-Id": client_id,
        "X-Naver-Client-Secret": client_secret,
    }
    params = {
        "query": query,
        "display": display,
        "start": start,
        "sort": sort,
    }

    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
        news_data = response.json()
        return news_data
    else:
        st.error(f"Error: {response.status_code}, {response.text}")
        return None

# Step 2: Hugging Face ์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„ ๋ชจ๋ธ ๋กœ๋“œ
def load_huggingface_model():
    classifier = pipeline("text-classification", model="bucketresearch/politicalBiasBERT")
    return classifier

# Step 3: ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜ ํ•จ์ˆ˜
def classify_political_sentiment(text, classifier):
    result = classifier(text[:512])  # ์ž…๋ ฅ์ด ๋„ˆ๋ฌด ๊ธธ๋ฉด ์ž˜๋ผ์„œ ๋ถ„์„
    sentiment = result[0]
    label = sentiment["label"]
    score = sentiment["score"]

    sentiment_score = score if label == "POSITIVE" else -score

    # ํ‚ค์›Œ๋“œ ๊ธฐ๋ฐ˜ ๋ถ„๋ฅ˜ (์ง„๋ณด/๋ณด์ˆ˜)
    progressive_keywords = ["๋ณต์ง€", "ํ‰๋“ฑ", "๋ฏผ์ฃผ", "ํ™˜๊ฒฝ", "์‚ฌํšŒ์  ์ฑ…์ž„"]
    conservative_keywords = ["์•ˆ๋ณด", "์ „ํ†ต", "๊ฒฝ์ œ", "์„ฑ์žฅ", "์งˆ์„œ", "๊ตญ๋ฐฉ"]

    if any(keyword in text for keyword in progressive_keywords):
        return "์ง„๋ณด", sentiment_score
    elif any(keyword in text for keyword in conservative_keywords):
        return "๋ณด์ˆ˜", sentiment_score
    else:
        return "์ค‘๋ฆฝ", sentiment_score

# Step 4: ๋‰ด์Šค ๋ถ„์„ ๋ฐ ๊ฒฐ๊ณผ ์ถœ๋ ฅ
def analyze_news_political_orientation(news_items, classifier):
    results = {"์ง„๋ณด": 0, "๋ณด์ˆ˜": 0, "์ค‘๋ฆฝ": 0}
    detailed_results = []

    for item in news_items:
        title = item["title"]
        description = item["description"]
        combined_text = f"{title}. {description}"

        # ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜
        orientation, score = classify_political_sentiment(combined_text, classifier)
        results[orientation] += 1
        detailed_results.append({
            "์ œ๋ชฉ": title,
            "์š”์•ฝ": description,
            "์„ฑํ–ฅ": orientation,
            "์ ์ˆ˜": score,
            "๋งํฌ": item["link"]
        })

    return results, detailed_results


# Streamlit ์•ฑ ์‹œ์ž‘
st.title("์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„ ๋Œ€์‹œ๋ณด๋“œ")
st.markdown("### ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ฐ์ดํ„ฐ๋ฅผ ์‹ค์‹œ๊ฐ„์œผ๋กœ ์ˆ˜์ง‘ํ•˜๊ณ  ์ •์น˜ ์„ฑํ–ฅ์„ ๋ถ„์„ํ•ฉ๋‹ˆ๋‹ค.")

# ๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ ์ž…๋ ฅ
query = st.text_input("๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", value="์ •์น˜")

if st.button("๋ถ„์„ ์‹œ์ž‘"):
    with st.spinner("๋ฐ์ดํ„ฐ๋ฅผ ๋ถ„์„ ์ค‘์ž…๋‹ˆ๋‹ค..."):
        try:
            # ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘
            news_data = fetch_naver_news(query, display=10)
            
            if news_data is None:
                st.error("๋‰ด์Šค ๋ฐ์ดํ„ฐ๋ฅผ ๋ถˆ๋Ÿฌ์˜ค๋Š” ๋ฐ ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค.")
            else:
                news_items = news_data["items"]

                # Hugging Face ๋ชจ๋ธ ๋กœ๋“œ
                classifier = load_huggingface_model()

                # ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๋ถ„์„
                results, detailed_results = analyze_news_political_orientation(news_items, classifier)

                # ๋ถ„์„ ๊ฒฐ๊ณผ ์‹œ๊ฐํ™”
                st.subheader("๋ถ„์„ ๊ฒฐ๊ณผ ์š”์•ฝ")
                st.write(f"์ง„๋ณด: {results['์ง„๋ณด']}๊ฑด")
                st.write(f"๋ณด์ˆ˜: {results['๋ณด์ˆ˜']}๊ฑด")
                st.write(f"์ค‘๋ฆฝ: {results['์ค‘๋ฆฝ']}๊ฑด")

                # ์„ฑํ–ฅ ๋ถ„ํฌ ์ฐจํŠธ
                st.subheader("์„ฑํ–ฅ ๋ถ„ํฌ ์ฐจํŠธ")
                st.bar_chart(pd.DataFrame.from_dict(results, orient='index', columns=["๊ฑด์ˆ˜"]))

                # ์„ธ๋ถ€ ๊ฒฐ๊ณผ ์ถœ๋ ฅ
                st.subheader("์„ธ๋ถ€ ๊ฒฐ๊ณผ")
                df = pd.DataFrame(detailed_results)
                st.dataframe(df)

                # ๋งํฌ ํฌํ•จํ•œ ๋‰ด์Šค ์ถœ๋ ฅ
                st.subheader("๋‰ด์Šค ๋งํฌ")
                for index, row in df.iterrows():
                    st.write(f"- [{row['์ œ๋ชฉ']}]({row['๋งํฌ']}) (์„ฑํ–ฅ: {row['์„ฑํ–ฅ']}, ์ ์ˆ˜: {row['์ ์ˆ˜']:.2f})")

        except Exception as e:
            st.error(f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")