Update app.py
Browse files
app.py
CHANGED
@@ -1,111 +1,108 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
import pandas as pd
|
3 |
import requests
|
4 |
-
from transformers import pipeline
|
5 |
-
import
|
6 |
-
|
7 |
-
#
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
13 |
-
return pipeline("text-classification", model=model, tokenizer=tokenizer)
|
14 |
-
|
15 |
-
# ๋ค์ด๋ฒ ๋ด์ค API ํธ์ถ
|
16 |
-
def fetch_naver_news(query, display=5):
|
17 |
url = "https://openapi.naver.com/v1/search/news.json"
|
18 |
headers = {
|
19 |
-
"X-Naver-Client-Id":
|
20 |
-
"X-Naver-Client-Secret":
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
}
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
response = requests.get(url, headers=headers, params=params)
|
26 |
-
response.raise_for_status() # HTTP ์ค๋ฅ ์ฒ๋ฆฌ
|
27 |
return response.json()
|
28 |
-
except requests.exceptions.RequestException as e:
|
29 |
-
st.error(f"API ํธ์ถ ์ค ์ค๋ฅ ๋ฐ์: {e}")
|
30 |
-
return None # ๋ด์ค ๋ฐ์ดํฐ๊ฐ ์์ผ๋ฉด None ๋ฐํ
|
31 |
-
|
32 |
-
# ์ ์น ์ฑํฅ ๋ถ๋ฅ
|
33 |
-
def classify_sentiment(text, classifier):
|
34 |
-
result = classifier(text, truncation=True, max_length=512)
|
35 |
-
label = result[0]['label']
|
36 |
-
score = result[0]['score']
|
37 |
-
if label in ['LABEL_0', 'LABEL_1']: # ๋ผ๋ฒจ์ ๋ฐ๋ผ ์์ ํ์
|
38 |
-
return "๋ณด์", score
|
39 |
-
elif label in ['LABEL_4']: # ๋ผ๋ฒจ์ ๋ฐ๋ผ ์์ ํ์
|
40 |
-
return "์ง๋ณด", score
|
41 |
else:
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
-
# ๋ด์ค
|
45 |
-
def
|
46 |
results = {"์ง๋ณด": 0, "๋ณด์": 0, "์ค๋ฆฝ": 0}
|
47 |
detailed_results = []
|
48 |
|
49 |
for item in news_items:
|
50 |
title = item["title"]
|
51 |
description = item["description"]
|
52 |
-
link = item["link"]
|
53 |
combined_text = f"{title}. {description}"
|
54 |
|
|
|
|
|
|
|
55 |
# ์ ์น ์ฑํฅ ๋ถ๋ฅ
|
56 |
-
orientation, score =
|
57 |
results[orientation] += 1
|
58 |
-
detailed_results.append(
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
|
66 |
return results, detailed_results
|
67 |
|
68 |
-
#
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
# ๊ฒ์ ํค์๋
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
# ๋ถ์ ๊ฒฐ๊ณผ ์๊ฐํ
|
92 |
-
st.subheader("๋ถ์ ๊ฒฐ๊ณผ ์์ฝ")
|
93 |
-
st.write(f"์ง๋ณด: {results['์ง๋ณด']}๊ฑด")
|
94 |
-
st.write(f"๋ณด์: {results['๋ณด์']}๊ฑด")
|
95 |
-
st.write(f"์ค๋ฆฝ: {results['์ค๋ฆฝ']}๊ฑด")
|
96 |
-
|
97 |
-
# ํ์ด ์ฐจํธ
|
98 |
-
st.subheader("์ฑํฅ ๋ถํฌ ์ฐจํธ")
|
99 |
-
st.bar_chart(pd.DataFrame.from_dict(results, orient='index', columns=["๊ฑด์"]))
|
100 |
-
|
101 |
-
# ์ธ๋ถ ๊ฒฐ๊ณผ ์ถ๋ ฅ
|
102 |
-
st.subheader("์ธ๋ถ ๊ฒฐ๊ณผ")
|
103 |
-
df = pd.DataFrame(detailed_results)
|
104 |
-
st.dataframe(df)
|
105 |
-
|
106 |
-
# ๋งํฌ ํฌํจํ ๋ด์ค ์ถ๋ ฅ
|
107 |
-
st.subheader("๋ด์ค ๋งํฌ")
|
108 |
-
for index, row in df.iterrows():
|
109 |
-
st.write(f"- [{row['์ ๋ชฉ']}]({row['๋งํฌ']}) (์ฑํฅ: {row['์ฑํฅ']}, ์ ์: {row['์ ์']:.2f})")
|
110 |
-
except Exception as e:
|
111 |
-
st.error(f"์ค๋ฅ ๋ฐ์: {e}")
|
|
|
|
|
|
|
1 |
import requests
|
2 |
+
from transformers import pipeline
|
3 |
+
from googletrans import Translator # ๊ตฌ๊ธ ๋ฒ์ญ API ์ฌ์ฉ
|
4 |
+
|
5 |
+
# Step 1: ๋ค์ด๋ฒ ๋ด์ค API๋ก ๋ด์ค ๋ฐ์ดํฐ ๊ฐ์ ธ์ค๊ธฐ
|
6 |
+
def fetch_naver_news(query, display=10, start=1, sort="date"):
|
7 |
+
client_id = "I_8koTJh3R5l4wLurQbG" # ๋ค์ด๋ฒ ๊ฐ๋ฐ์ ์ผํฐ์์ ๋ฐ๊ธ๋ฐ์ Client ID
|
8 |
+
client_secret = "W5oWYlAgur" # ๋ค์ด๋ฒ ๊ฐ๋ฐ์ ์ผํฐ์์ ๋ฐ๊ธ๋ฐ์ Client Secret
|
9 |
+
|
|
|
|
|
|
|
|
|
|
|
10 |
url = "https://openapi.naver.com/v1/search/news.json"
|
11 |
headers = {
|
12 |
+
"X-Naver-Client-Id": client_id,
|
13 |
+
"X-Naver-Client-Secret": client_secret,
|
14 |
+
}
|
15 |
+
params = {
|
16 |
+
"query": query,
|
17 |
+
"display": display,
|
18 |
+
"start": start,
|
19 |
+
"sort": sort,
|
20 |
}
|
21 |
|
22 |
+
response = requests.get(url, headers=headers, params=params)
|
23 |
+
if response.status_code == 200:
|
|
|
|
|
24 |
return response.json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
else:
|
26 |
+
raise Exception(f"Error: {response.status_code}, {response.text}")
|
27 |
+
|
28 |
+
# Step 2: Hugging Face ๊ฐ์ฑ ๋ถ์ ๋ชจ๋ธ ๋ก๋
|
29 |
+
def load_huggingface_model():
|
30 |
+
classifier = pipeline("text-classification", model="bucketresearch/politicalBiasBERT")
|
31 |
+
return classifier
|
32 |
+
|
33 |
+
# Step 3: ๋ฒ์ญ๊ธฐ ๋ก๋ (Google Translate)
|
34 |
+
def translate_to_english(text):
|
35 |
+
translator = Translator()
|
36 |
+
translated = translator.translate(text, src='ko', dest='en')
|
37 |
+
return translated.text
|
38 |
+
|
39 |
+
# Step 4: ์ ์น ์ฑํฅ ๋ถ๋ฅ
|
40 |
+
def classify_political_sentiment(text, classifier):
|
41 |
+
# ๊ฐ์ฑ ๋ถ์ ์คํ
|
42 |
+
result = classifier(text[:512])
|
43 |
+
sentiment = result[0]
|
44 |
+
label = sentiment["label"]
|
45 |
+
score = sentiment["score"]
|
46 |
+
|
47 |
+
# ์ ์ํ
|
48 |
+
sentiment_score = score if label == "POSITIVE" else -score
|
49 |
+
|
50 |
+
# ํค์๋ ๊ธฐ๋ฐ ๋ถ๋ฅ (์ง๋ณด/๋ณด์)
|
51 |
+
progressive_keywords = ["welfare", "equality", "democracy", "environment", "social responsibility"]
|
52 |
+
conservative_keywords = ["security", "tradition", "economy", "growth", "order", "defense"]
|
53 |
+
|
54 |
+
if any(keyword in text for keyword in progressive_keywords):
|
55 |
+
return "์ง๋ณด", sentiment_score
|
56 |
+
elif any(keyword in text for keyword in conservative_keywords):
|
57 |
+
return "๋ณด์", sentiment_score
|
58 |
+
else:
|
59 |
+
return "์ค๋ฆฝ", sentiment_score
|
60 |
|
61 |
+
# Step 5: ์ ์ฒด ๋ด์ค ๋ถ์ ๋ฐ ๊ฒฐ๊ณผ ์ถ๋ ฅ
|
62 |
+
def analyze_news_political_orientation(news_items, classifier):
|
63 |
results = {"์ง๋ณด": 0, "๋ณด์": 0, "์ค๋ฆฝ": 0}
|
64 |
detailed_results = []
|
65 |
|
66 |
for item in news_items:
|
67 |
title = item["title"]
|
68 |
description = item["description"]
|
|
|
69 |
combined_text = f"{title}. {description}"
|
70 |
|
71 |
+
# ํ๊ตญ์ด๋ฅผ ์์ด๋ก ๋ฒ์ญ
|
72 |
+
translated_text = translate_to_english(combined_text)
|
73 |
+
|
74 |
# ์ ์น ์ฑํฅ ๋ถ๋ฅ
|
75 |
+
orientation, score = classify_political_sentiment(translated_text, classifier)
|
76 |
results[orientation] += 1
|
77 |
+
detailed_results.append((title, description, orientation, score))
|
78 |
+
|
79 |
+
# ์ถ๋ ฅ
|
80 |
+
print(f"Title: {title}")
|
81 |
+
print(f"Description: {description}")
|
82 |
+
print(f"Orientation: {orientation}, Score: {score}")
|
83 |
+
print("-" * 80)
|
84 |
|
85 |
return results, detailed_results
|
86 |
|
87 |
+
# Step 6: ์คํ ํ์ดํ๋ผ์ธ
|
88 |
+
if __name__ == "__main__":
|
89 |
+
try:
|
90 |
+
# ๋ค์ด๋ฒ ๋ด์ค API ํธ์ถ
|
91 |
+
query = "์ ์น" # ๊ฒ์ ํค์๋
|
92 |
+
news_data = fetch_naver_news(query, display=5)
|
93 |
+
|
94 |
+
# Hugging Face ๋ชจ๋ธ ๋ก๋
|
95 |
+
classifier = load_huggingface_model()
|
96 |
+
|
97 |
+
# ๋ด์ค ๋ฐ์ดํฐ ๊ฐ์ฑ ๋ถ์ ๋ฐ ์ ์น ์ฑํฅ ๋ถ๋ฅ
|
98 |
+
news_items = news_data["items"]
|
99 |
+
results, detailed_results = analyze_news_political_orientation(news_items, classifier)
|
100 |
+
|
101 |
+
# ์ ์ฒด ๊ฒฐ๊ณผ ์ถ๋ ฅ
|
102 |
+
print("\n์ ์น ์ฑํฅ ๋ถ์ ๊ฒฐ๊ณผ")
|
103 |
+
print(f"์ง๋ณด: {results['์ง๋ณด']}๊ฑด")
|
104 |
+
print(f"๋ณด์: {results['๋ณด์']}๊ฑด")
|
105 |
+
print(f"์ค๋ฆฝ: {results['์ค๋ฆฝ']}๊ฑด")
|
106 |
+
|
107 |
+
except Exception as e:
|
108 |
+
print(f"์ค๋ฅ ๋ฐ์: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|