File size: 4,723 Bytes
384f5e4
bfd4ab7
384f5e4
 
717fe8c
384f5e4
717fe8c
 
 
 
bfd4ab7
 
717fe8c
 
 
 
 
 
 
 
bfd4ab7
60f9cc3
717fe8c
 
2e4d46c
bfd4ab7
717fe8c
 
4f28bae
 
 
 
 
 
384f5e4
 
717fe8c
 
4f28bae
717fe8c
384f5e4
 
717fe8c
 
 
 
 
 
 
 
bb49491
 
717fe8c
 
 
 
 
 
 
bfd4ab7
4f28bae
 
bfd4ab7
 
 
 
 
 
 
717fe8c
4f28bae
 
 
bfd4ab7
4f28bae
bfd4ab7
384f5e4
 
 
 
 
 
bfd4ab7
 
 
384f5e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f28bae
384f5e4
 
4f28bae
384f5e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import streamlit as st
import requests
from transformers import pipeline
import pandas as pd

# Step 1: ๋„ค์ด๋ฒ„ ๋‰ด์Šค API ํ˜ธ์ถœ ํ•จ์ˆ˜
def fetch_naver_news(query, display=10, start=1, sort="date"):
    client_id = "I_8koTJh3R5l4wLurQbG"  # ๋„ค์ด๋ฒ„ ๊ฐœ๋ฐœ์ž ์„ผํ„ฐ์—์„œ ๋ฐœ๊ธ‰๋ฐ›์€ Client ID
    client_secret = "W5oWYlAgur"  # ๋„ค์ด๋ฒ„ ๊ฐœ๋ฐœ์ž ์„ผํ„ฐ์—์„œ ๋ฐœ๊ธ‰๋ฐ›์€ Client Secret

    url = "https://openapi.naver.com/v1/search/news.json"
    headers = {
        "X-Naver-Client-Id": client_id,
        "X-Naver-Client-Secret": client_secret,
    }
    params = {
        "query": query,
        "display": display,
        "start": start,
        "sort": sort,
    }

    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f"Error: {response.status_code}, {response.text}")

# Step 2: Hugging Face ๋ฒˆ์—ญ ๋ชจ๋ธ ๋กœ๋“œ (ํ•œ๊ตญ์–ด -> ์˜์–ด)
def load_translation_model():
    translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
    return translator

# Step 3: Hugging Face ๊ฐ์„ฑ ๋ถ„์„ ๋ชจ๋ธ ๋กœ๋“œ
def load_huggingface_model():
    classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
    return classifier

# Step 4: ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜ ํ•จ์ˆ˜
def classify_political_sentiment(text, classifier):
    # ๊ฐ์„ฑ ๋ถ„์„ ์‹คํ–‰
    result = classifier(text[:512])  # ์ž…๋ ฅ์ด ๋„ˆ๋ฌด ๊ธธ๋ฉด ์ž˜๋ผ์„œ ๋ถ„์„
    sentiment = result[0]
    label = sentiment["label"]
    score = sentiment["score"]

    # ์ ์ˆ˜ํ™”
    sentiment_score = score if label == "POSITIVE" else -score

    # ํ‚ค์›Œ๋“œ ๊ธฐ๋ฐ˜ ๋ถ„๋ฅ˜ (์ง„๋ณด/๋ณด์ˆ˜)
    progressive_keywords = ["๋ณต์ง€", "ํ‰๋“ฑ", "๋ฏผ์ฃผ", "ํ™˜๊ฒฝ", "์‚ฌํšŒ์  ์ฑ…์ž„"]
    conservative_keywords = ["์•ˆ๋ณด", "์ „ํ†ต", "๊ฒฝ์ œ", "์„ฑ์žฅ", "์งˆ์„œ", "๊ตญ๋ฐฉ"]

    if any(keyword in text for keyword in progressive_keywords):
        return "์ง„๋ณด", sentiment_score
    elif any(keyword in text for keyword in conservative_keywords):
        return "๋ณด์ˆ˜", sentiment_score
    else:
        return "์ค‘๋ฆฝ", sentiment_score

# Step 5: ๋‰ด์Šค ๋ถ„์„ ๋ฐ ๊ฒฐ๊ณผ ์ถœ๋ ฅ
def analyze_news_political_orientation(news_items, classifier, translator):
    results = {"์ง„๋ณด": 0, "๋ณด์ˆ˜": 0, "์ค‘๋ฆฝ": 0}
    detailed_results = []

    for item in news_items:
        title = item["title"]
        description = item["description"]
        combined_text = f"{title}. {description}"

        # ๋ฒˆ์—ญ: ํ•œ๊ตญ์–ด -> ์˜์–ด
        translated_text = translator(combined_text)[0]['translation_text']

        # ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜
        orientation, score = classify_political_sentiment(translated_text, classifier)
        results[orientation] += 1
        detailed_results.append({
            "์ œ๋ชฉ": title,
            "์š”์•ฝ": description,
            "์„ฑํ–ฅ": orientation,
            "์ ์ˆ˜": score,
        })

    return results, detailed_results

# Streamlit ์•ฑ ์‹œ์ž‘
st.title("์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„ ๋Œ€์‹œ๋ณด๋“œ")
st.markdown("### ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ฐ์ดํ„ฐ๋ฅผ ์‹ค์‹œ๊ฐ„์œผ๋กœ ์ˆ˜์ง‘ํ•˜๊ณ  ์ •์น˜ ์„ฑํ–ฅ์„ ๋ถ„์„ํ•ฉ๋‹ˆ๋‹ค.")

# ๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ ์ž…๋ ฅ
query = st.text_input("๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", value="์ •์น˜")

if st.button("๋ถ„์„ ์‹œ์ž‘"):
    with st.spinner("๋ฐ์ดํ„ฐ๋ฅผ ๋ถ„์„ ์ค‘์ž…๋‹ˆ๋‹ค..."):
        try:
            # ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘
            news_data = fetch_naver_news(query, display=10)
            news_items = news_data["items"]

            # Hugging Face ๋ชจ๋ธ ๋กœ๋“œ
            classifier = load_huggingface_model()
            translator = load_translation_model()

            # ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๋ถ„์„
            results, detailed_results = analyze_news_political_orientation(news_items, classifier, translator)

            # ๋ถ„์„ ๊ฒฐ๊ณผ ์‹œ๊ฐํ™”
            st.subheader("๋ถ„์„ ๊ฒฐ๊ณผ ์š”์•ฝ")
            st.write(f"์ง„๋ณด: {results['์ง„๋ณด']}๊ฑด")
            st.write(f"๋ณด์ˆ˜: {results['๋ณด์ˆ˜']}๊ฑด")
            st.write(f"์ค‘๋ฆฝ: {results['์ค‘๋ฆฝ']}๊ฑด")

            # ํŒŒ์ด ์ฐจํŠธ
            st.subheader("์„ฑํ–ฅ ๋ถ„ํฌ ์ฐจํŠธ")
            st.bar_chart(pd.DataFrame.from_dict(results, orient='index', columns=["๊ฑด์ˆ˜"]))

            # ์„ธ๋ถ€ ๊ฒฐ๊ณผ ์ถœ๋ ฅ
            st.subheader("์„ธ๋ถ€ ๊ฒฐ๊ณผ")
            df = pd.DataFrame(detailed_results)
            st.dataframe(df)

            # ๋งํฌ ํฌํ•จํ•œ ๋‰ด์Šค ์ถœ๋ ฅ
            st.subheader("๋‰ด์Šค ๋งํฌ")
            for index, row in df.iterrows():
                st.write(f"- [{row['์ œ๋ชฉ']}] (์„ฑํ–ฅ: {row['์„ฑํ–ฅ']}, ์ ์ˆ˜: {row['์ ์ˆ˜']:.2f})")

        except Exception as e:
            st.error(f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")