820nam commited on
Commit
717fe8c
ยท
verified ยท
1 Parent(s): 60f9cc3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -90
app.py CHANGED
@@ -1,111 +1,108 @@
1
- import streamlit as st
2
- import pandas as pd
3
  import requests
4
- from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
5
- import os
6
-
7
- # ๋”ฅ๋Ÿฌ๋‹ ๋ชจ๋ธ ๋กœ๋“œ
8
- @st.cache_resource
9
- def load_model():
10
- model_name = "bucketresearch/politicalBiasBERT"
11
- tokenizer = AutoTokenizer.from_pretrained(model_name)
12
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
13
- return pipeline("text-classification", model=model, tokenizer=tokenizer)
14
-
15
- # ๋„ค์ด๋ฒ„ ๋‰ด์Šค API ํ˜ธ์ถœ
16
- def fetch_naver_news(query, display=5):
17
  url = "https://openapi.naver.com/v1/search/news.json"
18
  headers = {
19
- "X-Naver-Client-Id": "I_8koTJh3R5l4wLurQbG", # ์—ฌ๊ธฐ์— ์ง์ ‘ API ํด๋ผ์ด์–ธํŠธ ID ์ž…๋ ฅ
20
- "X-Naver-Client-Secret": "W5oWYlAgur", # ์—ฌ๊ธฐ์— ์ง์ ‘ API ํด๋ผ์ด์–ธํŠธ ๋น„๋ฐ€ํ‚ค ์ž…๋ ฅ
 
 
 
 
 
 
21
  }
22
 
23
- params = {"query": query, "display": display, "sort": "sim"}
24
- try:
25
- response = requests.get(url, headers=headers, params=params)
26
- response.raise_for_status() # HTTP ์˜ค๋ฅ˜ ์ฒ˜๋ฆฌ
27
  return response.json()
28
- except requests.exceptions.RequestException as e:
29
- st.error(f"API ํ˜ธ์ถœ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
30
- return None # ๋‰ด์Šค ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์œผ๋ฉด None ๋ฐ˜ํ™˜
31
-
32
- # ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜
33
- def classify_sentiment(text, classifier):
34
- result = classifier(text, truncation=True, max_length=512)
35
- label = result[0]['label']
36
- score = result[0]['score']
37
- if label in ['LABEL_0', 'LABEL_1']: # ๋ผ๋ฒจ์— ๋”ฐ๋ผ ์ˆ˜์ • ํ•„์š”
38
- return "๋ณด์ˆ˜", score
39
- elif label in ['LABEL_4']: # ๋ผ๋ฒจ์— ๋”ฐ๋ผ ์ˆ˜์ • ํ•„์š”
40
- return "์ง„๋ณด", score
41
  else:
42
- return "์ค‘๋ฆฝ", score
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- # ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๋ถ„์„
45
- def analyze_news(news_items, classifier):
46
  results = {"์ง„๋ณด": 0, "๋ณด์ˆ˜": 0, "์ค‘๋ฆฝ": 0}
47
  detailed_results = []
48
 
49
  for item in news_items:
50
  title = item["title"]
51
  description = item["description"]
52
- link = item["link"]
53
  combined_text = f"{title}. {description}"
54
 
 
 
 
55
  # ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜
56
- orientation, score = classify_sentiment(combined_text, classifier)
57
  results[orientation] += 1
58
- detailed_results.append({
59
- "์ œ๋ชฉ": title,
60
- "์š”์•ฝ": description,
61
- "๋งํฌ": link,
62
- "์„ฑํ–ฅ": orientation,
63
- "์ ์ˆ˜": score,
64
- })
65
 
66
  return results, detailed_results
67
 
68
- # Streamlit ์•ฑ ์‹œ์ž‘
69
- st.title("์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„ ๋Œ€์‹œ๋ณด๋“œ")
70
- st.markdown("### ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ฐ์ดํ„ฐ๋ฅผ ์‹ค์‹œ๊ฐ„์œผ๋กœ ์ˆ˜์ง‘ํ•˜๊ณ  ์ •์น˜ ์„ฑํ–ฅ์„ ๋ถ„์„ํ•ฉ๋‹ˆ๋‹ค.")
71
-
72
- # ๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ ์ž…๋ ฅ
73
- query = st.text_input("๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", value="์ •์น˜")
74
-
75
- if st.button("๋ถ„์„ ์‹œ์ž‘"):
76
- with st.spinner("๋ฐ์ดํ„ฐ๋ฅผ ๋ถ„์„ ์ค‘์ž…๋‹ˆ๋‹ค..."):
77
- try:
78
- # ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘
79
- news_data = fetch_naver_news(query, display=10)
80
- if news_data is None:
81
- st.error("๋‰ด์Šค ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
82
- else:
83
- news_items = news_data["items"]
84
-
85
- # ๋ชจ๋ธ ๋กœ๋“œ
86
- classifier = load_model()
87
-
88
- # ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๋ถ„์„
89
- results, detailed_results = analyze_news(news_items, classifier)
90
-
91
- # ๋ถ„์„ ๊ฒฐ๊ณผ ์‹œ๊ฐํ™”
92
- st.subheader("๋ถ„์„ ๊ฒฐ๊ณผ ์š”์•ฝ")
93
- st.write(f"์ง„๋ณด: {results['์ง„๋ณด']}๊ฑด")
94
- st.write(f"๋ณด์ˆ˜: {results['๋ณด์ˆ˜']}๊ฑด")
95
- st.write(f"์ค‘๋ฆฝ: {results['์ค‘๋ฆฝ']}๊ฑด")
96
-
97
- # ํŒŒ์ด ์ฐจํŠธ
98
- st.subheader("์„ฑํ–ฅ ๋ถ„ํฌ ์ฐจํŠธ")
99
- st.bar_chart(pd.DataFrame.from_dict(results, orient='index', columns=["๊ฑด์ˆ˜"]))
100
-
101
- # ์„ธ๋ถ€ ๊ฒฐ๊ณผ ์ถœ๋ ฅ
102
- st.subheader("์„ธ๋ถ€ ๊ฒฐ๊ณผ")
103
- df = pd.DataFrame(detailed_results)
104
- st.dataframe(df)
105
-
106
- # ๋งํฌ ํฌํ•จํ•œ ๋‰ด์Šค ์ถœ๋ ฅ
107
- st.subheader("๋‰ด์Šค ๋งํฌ")
108
- for index, row in df.iterrows():
109
- st.write(f"- [{row['์ œ๋ชฉ']}]({row['๋งํฌ']}) (์„ฑํ–ฅ: {row['์„ฑํ–ฅ']}, ์ ์ˆ˜: {row['์ ์ˆ˜']:.2f})")
110
- except Exception as e:
111
- st.error(f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
 
 
 
1
  import requests
2
+ from transformers import pipeline
3
+ from googletrans import Translator # ๊ตฌ๊ธ€ ๋ฒˆ์—ญ API ์‚ฌ์šฉ
4
+
5
+ # Step 1: ๋„ค์ด๋ฒ„ ๋‰ด์Šค API๋กœ ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๊ฐ€์ ธ์˜ค๊ธฐ
6
+ def fetch_naver_news(query, display=10, start=1, sort="date"):
7
+ client_id = "I_8koTJh3R5l4wLurQbG" # ๋„ค์ด๋ฒ„ ๊ฐœ๋ฐœ์ž ์„ผํ„ฐ์—์„œ ๋ฐœ๊ธ‰๋ฐ›์€ Client ID
8
+ client_secret = "W5oWYlAgur" # ๋„ค์ด๋ฒ„ ๊ฐœ๋ฐœ์ž ์„ผํ„ฐ์—์„œ ๋ฐœ๊ธ‰๋ฐ›์€ Client Secret
9
+
 
 
 
 
 
10
  url = "https://openapi.naver.com/v1/search/news.json"
11
  headers = {
12
+ "X-Naver-Client-Id": client_id,
13
+ "X-Naver-Client-Secret": client_secret,
14
+ }
15
+ params = {
16
+ "query": query,
17
+ "display": display,
18
+ "start": start,
19
+ "sort": sort,
20
  }
21
 
22
+ response = requests.get(url, headers=headers, params=params)
23
+ if response.status_code == 200:
 
 
24
  return response.json()
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  else:
26
+ raise Exception(f"Error: {response.status_code}, {response.text}")
27
+
28
+ # Step 2: Hugging Face ๊ฐ์„ฑ ๋ถ„์„ ๋ชจ๋ธ ๋กœ๋“œ
29
+ def load_huggingface_model():
30
+ classifier = pipeline("text-classification", model="bucketresearch/politicalBiasBERT")
31
+ return classifier
32
+
33
+ # Step 3: ๋ฒˆ์—ญ๊ธฐ ๋กœ๋“œ (Google Translate)
34
+ def translate_to_english(text):
35
+ translator = Translator()
36
+ translated = translator.translate(text, src='ko', dest='en')
37
+ return translated.text
38
+
39
+ # Step 4: ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜
40
+ def classify_political_sentiment(text, classifier):
41
+ # ๊ฐ์„ฑ ๋ถ„์„ ์‹คํ–‰
42
+ result = classifier(text[:512])
43
+ sentiment = result[0]
44
+ label = sentiment["label"]
45
+ score = sentiment["score"]
46
+
47
+ # ์ ์ˆ˜ํ™”
48
+ sentiment_score = score if label == "POSITIVE" else -score
49
+
50
+ # ํ‚ค์›Œ๋“œ ๊ธฐ๋ฐ˜ ๋ถ„๋ฅ˜ (์ง„๋ณด/๋ณด์ˆ˜)
51
+ progressive_keywords = ["welfare", "equality", "democracy", "environment", "social responsibility"]
52
+ conservative_keywords = ["security", "tradition", "economy", "growth", "order", "defense"]
53
+
54
+ if any(keyword in text for keyword in progressive_keywords):
55
+ return "์ง„๋ณด", sentiment_score
56
+ elif any(keyword in text for keyword in conservative_keywords):
57
+ return "๋ณด์ˆ˜", sentiment_score
58
+ else:
59
+ return "์ค‘๋ฆฝ", sentiment_score
60
 
61
+ # Step 5: ์ „์ฒด ๋‰ด์Šค ๋ถ„์„ ๋ฐ ๊ฒฐ๊ณผ ์ถœ๋ ฅ
62
+ def analyze_news_political_orientation(news_items, classifier):
63
  results = {"์ง„๋ณด": 0, "๋ณด์ˆ˜": 0, "์ค‘๋ฆฝ": 0}
64
  detailed_results = []
65
 
66
  for item in news_items:
67
  title = item["title"]
68
  description = item["description"]
 
69
  combined_text = f"{title}. {description}"
70
 
71
+ # ํ•œ๊ตญ์–ด๋ฅผ ์˜์–ด๋กœ ๋ฒˆ์—ญ
72
+ translated_text = translate_to_english(combined_text)
73
+
74
  # ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜
75
+ orientation, score = classify_political_sentiment(translated_text, classifier)
76
  results[orientation] += 1
77
+ detailed_results.append((title, description, orientation, score))
78
+
79
+ # ์ถœ๋ ฅ
80
+ print(f"Title: {title}")
81
+ print(f"Description: {description}")
82
+ print(f"Orientation: {orientation}, Score: {score}")
83
+ print("-" * 80)
84
 
85
  return results, detailed_results
86
 
87
+ # Step 6: ์‹คํ–‰ ํŒŒ์ดํ”„๋ผ์ธ
88
+ if __name__ == "__main__":
89
+ try:
90
+ # ๋„ค์ด๋ฒ„ ๋‰ด์Šค API ํ˜ธ์ถœ
91
+ query = "์ •์น˜" # ๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ
92
+ news_data = fetch_naver_news(query, display=5)
93
+
94
+ # Hugging Face ๋ชจ๋ธ ๋กœ๋“œ
95
+ classifier = load_huggingface_model()
96
+
97
+ # ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๊ฐ์„ฑ ๋ถ„์„ ๋ฐ ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜
98
+ news_items = news_data["items"]
99
+ results, detailed_results = analyze_news_political_orientation(news_items, classifier)
100
+
101
+ # ์ „์ฒด ๊ฒฐ๊ณผ ์ถœ๋ ฅ
102
+ print("\n์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„ ๊ฒฐ๊ณผ")
103
+ print(f"์ง„๋ณด: {results['์ง„๋ณด']}๊ฑด")
104
+ print(f"๋ณด์ˆ˜: {results['๋ณด์ˆ˜']}๊ฑด")
105
+ print(f"์ค‘๋ฆฝ: {results['์ค‘๋ฆฝ']}๊ฑด")
106
+
107
+ except Exception as e:
108
+ print(f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")