File size: 5,223 Bytes
384f5e4
bfd4ab7
384f5e4
 
717fe8c
384f5e4
717fe8c
 
 
 
bfd4ab7
 
717fe8c
 
 
 
 
 
 
 
bfd4ab7
60f9cc3
717fe8c
 
b45b655
 
bfd4ab7
717fe8c
 
ad7b4a8
 
 
 
 
fd1df6b
ad7b4a8
465e649
717fe8c
 
ad7b4a8
717fe8c
ad7b4a8
384f5e4
ad7b4a8
 
 
 
 
b45b655
465e649
ad7b4a8
 
 
 
 
 
 
 
 
 
bfd4ab7
ad7b4a8
4f28bae
bfd4ab7
 
 
 
 
 
 
717fe8c
4f28bae
 
 
bfd4ab7
4f28bae
bfd4ab7
384f5e4
 
 
 
 
db118f0
384f5e4
bfd4ab7
 
 
384f5e4
 
 
 
 
 
 
12cb09e
40b493d
2dd33a7
384f5e4
 
 
 
 
2dd33a7
 
 
 
12cb09e
 
384f5e4
 
 
ad7b4a8
4f28bae
384f5e4
 
4f28bae
384f5e4
 
 
 
 
 
 
fd1df6b
384f5e4
 
 
 
 
 
 
 
 
 
 
db118f0
384f5e4
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import streamlit as st
import requests
from transformers import pipeline
import pandas as pd

# Step 1: ๋„ค์ด๋ฒ„ ๋‰ด์Šค API ํ˜ธ์ถœ ํ•จ์ˆ˜
def fetch_naver_news(query, display=10, start=1, sort="date"):
    client_id = "I_8koTJh3R5l4wLurQbG"  # ๋„ค์ด๋ฒ„ ๊ฐœ๋ฐœ์ž ์„ผํ„ฐ์—์„œ ๋ฐœ๊ธ‰๋ฐ›์€ Client ID
    client_secret = "W5oWYlAgur"  # ๋„ค์ด๋ฒ„ ๊ฐœ๋ฐœ์ž ์„ผํ„ฐ์—์„œ ๋ฐœ๊ธ‰๋ฐ›์€ Client Secret

    url = "https://openapi.naver.com/v1/search/news.json"
    headers = {
        "X-Naver-Client-Id": client_id,
        "X-Naver-Client-Secret": client_secret,
    }
    params = {
        "query": query,
        "display": display,
        "start": start,
        "sort": sort,
    }

    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
        news_data = response.json()
        return news_data
    else:
        raise Exception(f"Error: {response.status_code}, {response.text}")

# Step 2: Hugging Face ๋ฒˆ์—ญ ๋ชจ๋ธ ๋กœ๋“œ (ํ•œ๊ตญ์–ด -> ์˜์–ด)
def load_translation_model():
    translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
    return translator

# Step 3: Hugging Face ์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„ ๋ชจ๋ธ ๋กœ๋“œ
def load_huggingface_model():
    classifier = pipeline("text-classification", model="bucketresearch/politicalBiasBERT")
    return classifier

# Step 4: ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜ ํ•จ์ˆ˜
def classify_political_sentiment(text, classifier):
    # ๊ฐ์„ฑ ๋ถ„์„ ์‹คํ–‰
    result = classifier(text[:512])  # ์ž…๋ ฅ์ด ๋„ˆ๋ฌด ๊ธธ๋ฉด ์ž˜๋ผ์„œ ๋ถ„์„
    sentiment = result[0]
    label = sentiment["label"]
    score = sentiment["score"]

    # ์ ์ˆ˜ํ™”
    sentiment_score = score if label == "POSITIVE" else -score

    # ํ‚ค์›Œ๋“œ ๊ธฐ๋ฐ˜ ๋ถ„๋ฅ˜ (์ง„๋ณด/๋ณด์ˆ˜)
    progressive_keywords = ["๋ณต์ง€", "ํ‰๋“ฑ", "๋ฏผ์ฃผ", "ํ™˜๊ฒฝ", "์‚ฌํšŒ์  ์ฑ…์ž„"]
    conservative_keywords = ["์•ˆ๋ณด", "์ „ํ†ต", "๊ฒฝ์ œ", "์„ฑ์žฅ", "์งˆ์„œ", "๊ตญ๋ฐฉ"]

    if any(keyword in text for keyword in progressive_keywords):
        return "์ง„๋ณด", sentiment_score
    elif any(keyword in text for keyword in conservative_keywords):
        return "๋ณด์ˆ˜", sentiment_score
    else:
        return "์ค‘๋ฆฝ", sentiment_score

# Step 5: ๋‰ด์Šค ๋ถ„์„ ๋ฐ ๊ฒฐ๊ณผ ์ถœ๋ ฅ
def analyze_news_political_orientation(news_items, classifier, translator):
    results = {"์ง„๋ณด": 0, "๋ณด์ˆ˜": 0, "์ค‘๋ฆฝ": 0}
    detailed_results = []

    for item in news_items:
        title = item["title"]
        description = item["description"]
        combined_text = f"{title}. {description}"

        # ๋ฒˆ์—ญ: ํ•œ๊ตญ์–ด -> ์˜์–ด
        translated_text = translator(combined_text)[0]['translation_text']

        # ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜
        orientation, score = classify_political_sentiment(translated_text, classifier)
        results[orientation] += 1
        detailed_results.append({
            "์ œ๋ชฉ": title,
            "์š”์•ฝ": description,
            "์„ฑํ–ฅ": orientation,
            "์ ์ˆ˜": score,
            "๋งํฌ": item["link"]
        })

    return results, detailed_results

# Streamlit ์•ฑ ์‹œ์ž‘
st.title("์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„ ๋Œ€์‹œ๋ณด๋“œ")
st.markdown("### ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ฐ์ดํ„ฐ๋ฅผ ์‹ค์‹œ๊ฐ„์œผ๋กœ ์ˆ˜์ง‘ํ•˜๊ณ  ์ •์น˜ ์„ฑํ–ฅ์„ ๋ถ„์„ํ•ฉ๋‹ˆ๋‹ค.")

# ๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ ์ž…๋ ฅ
query = st.text_input("๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", value="์ •์น˜")

# ๋ถ„์„ ์‹œ์ž‘ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ๋™์ž‘
# ๋ถ„์„ ์‹œ์ž‘ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ๋™์ž‘
# ๋ถ„์„ ์‹œ์ž‘ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ๋™์ž‘
if st.button("๋ถ„์„ ์‹œ์ž‘"):
    with st.spinner("๋ฐ์ดํ„ฐ๋ฅผ ๋ถ„์„ ์ค‘์ž…๋‹ˆ๋‹ค..."):
        try:
            # ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘
            news_data = fetch_naver_news(query, display=10)
            
            # ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘๋œ ์ƒํƒœ ํ™•์ธ
            if not news_data or not news_data.get("items"):
                st.error("๋‰ด์Šค ๋ฐ์ดํ„ฐ๋ฅผ ๋ถˆ๋Ÿฌ์˜ค๋Š” ๋ฐ ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค.")
                return  # ์ด ์ค„์„ ์‚ญ์ œํ•˜๊ณ , ์˜ค๋ฅ˜ ๋ฉ”์‹œ์ง€๋กœ ๋Œ€์ฒดํ•ฉ๋‹ˆ๋‹ค.

            news_items = news_data["items"]

            # Hugging Face ๋ชจ๋ธ ๋กœ๋“œ
            classifier = load_huggingface_model()
            translator = load_translation_model()

            # ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๋ถ„์„
            results, detailed_results = analyze_news_political_orientation(news_items, classifier, translator)

            # ๋ถ„์„ ๊ฒฐ๊ณผ ์‹œ๊ฐํ™”
            st.subheader("๋ถ„์„ ๊ฒฐ๊ณผ ์š”์•ฝ")
            st.write(f"์ง„๋ณด: {results['์ง„๋ณด']}๊ฑด")
            st.write(f"๋ณด์ˆ˜: {results['๋ณด์ˆ˜']}๊ฑด")
            st.write(f"์ค‘๋ฆฝ: {results['์ค‘๋ฆฝ']}๊ฑด")

            # ์„ฑํ–ฅ ๋ถ„ํฌ ์ฐจํŠธ
            st.subheader("์„ฑํ–ฅ ๋ถ„ํฌ ์ฐจํŠธ")
            st.bar_chart(pd.DataFrame.from_dict(results, orient='index', columns=["๊ฑด์ˆ˜"]))

            # ์„ธ๋ถ€ ๊ฒฐ๊ณผ ์ถœ๋ ฅ
            st.subheader("์„ธ๋ถ€ ๊ฒฐ๊ณผ")
            df = pd.DataFrame(detailed_results)
            st.dataframe(df)

            # ๋งํฌ ํฌํ•จํ•œ ๋‰ด์Šค ์ถœ๋ ฅ
            st.subheader("๋‰ด์Šค ๋งํฌ")
            for index, row in df.iterrows():
                st.write(f"- [{row['์ œ๋ชฉ']}]({row['๋งํฌ']}) (์„ฑํ–ฅ: {row['์„ฑํ–ฅ']}, ์ ์ˆ˜: {row['์ ์ˆ˜']:.2f})")

        except Exception as e:
            st.error(f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")