File size: 4,222 Bytes
bfd4ab7
 
 
 
2e4d46c
bfd4ab7
 
 
 
2e4d46c
bfd4ab7
 
 
 
 
 
 
 
60f9cc3
 
bfd4ab7
60f9cc3
bfd4ab7
2e4d46c
 
 
 
 
 
71dba8d
bfd4ab7
 
 
 
 
 
2e4d46c
bfd4ab7
2e4d46c
bfd4ab7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2e4d46c
71dba8d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bfd4ab7
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import streamlit as st
import pandas as pd
import requests
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import os

# ๋”ฅ๋Ÿฌ๋‹ ๋ชจ๋ธ ๋กœ๋“œ
@st.cache_resource
def load_model():
    model_name = "bucketresearch/politicalBiasBERT"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    return pipeline("text-classification", model=model, tokenizer=tokenizer)

# ๋„ค์ด๋ฒ„ ๋‰ด์Šค API ํ˜ธ์ถœ
def fetch_naver_news(query, display=5):
    url = "https://openapi.naver.com/v1/search/news.json"
    headers = {
        "X-Naver-Client-Id": "I_8koTJh3R5l4wLurQbG",  # ์—ฌ๊ธฐ์— ์ง์ ‘ API ํด๋ผ์ด์–ธํŠธ ID ์ž…๋ ฅ
        "X-Naver-Client-Secret": "W5oWYlAgur",  # ์—ฌ๊ธฐ์— ์ง์ ‘ API ํด๋ผ์ด์–ธํŠธ ๋น„๋ฐ€ํ‚ค ์ž…๋ ฅ
    }

    params = {"query": query, "display": display, "sort": "sim"}
    try:
        response = requests.get(url, headers=headers, params=params)
        response.raise_for_status()  # HTTP ์˜ค๋ฅ˜ ์ฒ˜๋ฆฌ
        return response.json()
    except requests.exceptions.RequestException as e:
        st.error(f"API ํ˜ธ์ถœ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
        return None  # ๋‰ด์Šค ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์œผ๋ฉด None ๋ฐ˜ํ™˜

# ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜
def classify_sentiment(text, classifier):
    result = classifier(text, truncation=True, max_length=512)
    label = result[0]['label']
    score = result[0]['score']
    if label in ['LABEL_0', 'LABEL_1']:  # ๋ผ๋ฒจ์— ๋”ฐ๋ผ ์ˆ˜์ • ํ•„์š”
        return "๋ณด์ˆ˜", score
    elif label in ['LABEL_4']:  # ๋ผ๋ฒจ์— ๋”ฐ๋ผ ์ˆ˜์ • ํ•„์š”
        return "์ง„๋ณด", score
    else:
        return "์ค‘๋ฆฝ", score

# ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๋ถ„์„
def analyze_news(news_items, classifier):
    results = {"์ง„๋ณด": 0, "๋ณด์ˆ˜": 0, "์ค‘๋ฆฝ": 0}
    detailed_results = []

    for item in news_items:
        title = item["title"]
        description = item["description"]
        link = item["link"]
        combined_text = f"{title}. {description}"

        # ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜
        orientation, score = classify_sentiment(combined_text, classifier)
        results[orientation] += 1
        detailed_results.append({
            "์ œ๋ชฉ": title,
            "์š”์•ฝ": description,
            "๋งํฌ": link,
            "์„ฑํ–ฅ": orientation,
            "์ ์ˆ˜": score,
        })

    return results, detailed_results

# Streamlit ์•ฑ ์‹œ์ž‘
st.title("์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„ ๋Œ€์‹œ๋ณด๋“œ")
st.markdown("### ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ฐ์ดํ„ฐ๋ฅผ ์‹ค์‹œ๊ฐ„์œผ๋กœ ์ˆ˜์ง‘ํ•˜๊ณ  ์ •์น˜ ์„ฑํ–ฅ์„ ๋ถ„์„ํ•ฉ๋‹ˆ๋‹ค.")

# ๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ ์ž…๋ ฅ
query = st.text_input("๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", value="์ •์น˜")

if st.button("๋ถ„์„ ์‹œ์ž‘"):
    with st.spinner("๋ฐ์ดํ„ฐ๋ฅผ ๋ถ„์„ ์ค‘์ž…๋‹ˆ๋‹ค..."):
        try:
            # ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘
            news_data = fetch_naver_news(query, display=10)
            if news_data is None:
                st.error("๋‰ด์Šค ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
            else:
                news_items = news_data["items"]

                # ๋ชจ๋ธ ๋กœ๋“œ
                classifier = load_model()

                # ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๋ถ„์„
                results, detailed_results = analyze_news(news_items, classifier)

                # ๋ถ„์„ ๊ฒฐ๊ณผ ์‹œ๊ฐํ™”
                st.subheader("๋ถ„์„ ๊ฒฐ๊ณผ ์š”์•ฝ")
                st.write(f"์ง„๋ณด: {results['์ง„๋ณด']}๊ฑด")
                st.write(f"๋ณด์ˆ˜: {results['๋ณด์ˆ˜']}๊ฑด")
                st.write(f"์ค‘๋ฆฝ: {results['์ค‘๋ฆฝ']}๊ฑด")

                # ํŒŒ์ด ์ฐจํŠธ
                st.subheader("์„ฑํ–ฅ ๋ถ„ํฌ ์ฐจํŠธ")
                st.bar_chart(pd.DataFrame.from_dict(results, orient='index', columns=["๊ฑด์ˆ˜"]))

                # ์„ธ๋ถ€ ๊ฒฐ๊ณผ ์ถœ๋ ฅ
                st.subheader("์„ธ๋ถ€ ๊ฒฐ๊ณผ")
                df = pd.DataFrame(detailed_results)
                st.dataframe(df)

                # ๋งํฌ ํฌํ•จํ•œ ๋‰ด์Šค ์ถœ๋ ฅ
                st.subheader("๋‰ด์Šค ๋งํฌ")
                for index, row in df.iterrows():
                    st.write(f"- [{row['์ œ๋ชฉ']}]({row['๋งํฌ']}) (์„ฑํ–ฅ: {row['์„ฑํ–ฅ']}, ์ ์ˆ˜: {row['์ ์ˆ˜']:.2f})")
        except Exception as e:
            st.error(f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")