820nam commited on
Commit
384f5e4
ยท
verified ยท
1 Parent(s): bb49491

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -56
app.py CHANGED
@@ -1,7 +1,9 @@
 
1
  import requests
2
- from transformers import pipeline, MarianMTModel, MarianTokenizer
 
3
 
4
- # Step 1: ๋„ค์ด๋ฒ„ ๋‰ด์Šค API๋กœ ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๊ฐ€์ ธ์˜ค๊ธฐ
5
  def fetch_naver_news(query, display=10, start=1, sort="date"):
6
  client_id = "I_8koTJh3R5l4wLurQbG" # ๋„ค์ด๋ฒ„ ๊ฐœ๋ฐœ์ž ์„ผํ„ฐ์—์„œ ๋ฐœ๊ธ‰๋ฐ›์€ Client ID
7
  client_secret = "W5oWYlAgur" # ๋„ค์ด๋ฒ„ ๊ฐœ๋ฐœ์ž ์„ผํ„ฐ์—์„œ ๋ฐœ๊ธ‰๋ฐ›์€ Client Secret
@@ -24,27 +26,15 @@ def fetch_naver_news(query, display=10, start=1, sort="date"):
24
  else:
25
  raise Exception(f"Error: {response.status_code}, {response.text}")
26
 
27
- # Step 2: ํ•œ๊ตญ์–ด -> ์˜์–ด ๋ฒˆ์—ญ ๋ชจ๋ธ ๋กœ๋“œ
28
- def load_translation_model():
29
- model_name = "Helsinki-NLP/opus-mt-ko-en" # ํ•œ๊ตญ์–ด -> ์˜์–ด ๋ฒˆ์—ญ ๋ชจ๋ธ
30
- model = MarianMTModel.from_pretrained(model_name)
31
- tokenizer = MarianTokenizer.from_pretrained(model_name)
32
- return model, tokenizer
33
-
34
- # ํ•œ๊ตญ์–ด ํ…์ŠคํŠธ๋ฅผ ์˜์–ด๋กœ ๋ฒˆ์—ญํ•˜๋Š” ํ•จ์ˆ˜
35
- def translate_to_english(text, model, tokenizer):
36
- translated = tokenizer.encode(text, return_tensors="pt", padding=True)
37
- translated_text = model.generate(translated, max_length=512)
38
- return tokenizer.decode(translated_text[0], skip_special_tokens=True)
39
-
40
- # Step 3: ์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„ ๋ชจ๋ธ ๋กœ๋“œ (PoliticalBiasBERT)
41
- def load_political_bias_model():
42
- classifier = pipeline("text-classification", model="bucketresearch/politicalBiasBERT")
43
  return classifier
44
 
45
- # Step 4: ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜
46
  def classify_political_sentiment(text, classifier):
47
- result = classifier(text[:512])
 
48
  sentiment = result[0]
49
  label = sentiment["label"]
50
  score = sentiment["score"]
@@ -63,52 +53,67 @@ def classify_political_sentiment(text, classifier):
63
  else:
64
  return "์ค‘๋ฆฝ", sentiment_score
65
 
66
- # Step 5: ์ „์ฒด ๋‰ด์Šค ๋ถ„์„ ๋ฐ ๊ฒฐ๊ณผ ์ถœ๋ ฅ
67
- def analyze_news_political_orientation(news_items, classifier, translation_model, tokenizer):
68
  results = {"์ง„๋ณด": 0, "๋ณด์ˆ˜": 0, "์ค‘๋ฆฝ": 0}
69
  detailed_results = []
70
 
71
  for item in news_items:
72
  title = item["title"]
73
  description = item["description"]
74
-
75
- # ํ•œ๊ตญ์–ด ๊ธฐ์‚ฌ ํ…์ŠคํŠธ๋ฅผ ์˜์–ด๋กœ ๋ฒˆ์—ญ
76
  combined_text = f"{title}. {description}"
77
- translated_text = translate_to_english(combined_text, translation_model, tokenizer)
78
 
79
  # ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜
80
- orientation, score = classify_political_sentiment(translated_text, classifier)
81
  results[orientation] += 1
82
- detailed_results.append((title, description, orientation, score))
83
-
84
- # ์ถœ๋ ฅ
85
- print(f"Title: {title}")
86
- print(f"Description: {description}")
87
- print(f"Orientation: {orientation}, Score: {score}")
88
- print("-" * 80)
89
 
90
  return results, detailed_results
91
 
92
- # Step 6: ์‹คํ–‰ ํŒŒ์ดํ”„๋ผ์ธ
93
- if __name__ == "__main__":
94
- try:
95
- # ๋„ค์ด๋ฒ„ ๋‰ด์Šค API ํ˜ธ์ถœ (์—ฌ๊ธฐ์„œ๋Š” ๋ฏธ๋ฆฌ ๊ฐ€์ ธ์˜จ ๋‰ด์Šค ๋ฐ์ดํ„ฐ๊ฐ€ ํ•„์š”)
96
- query = "์ •์น˜" # ๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ
97
- news_data = fetch_naver_news(query, display=5)
98
-
99
- # ๋ฒˆ์—ญ ๋ชจ๋ธ๊ณผ ์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„ ๋ชจ๋ธ ๋กœ๋“œ
100
- translation_model, tokenizer = load_translation_model()
101
- classifier = load_political_bias_model()
102
-
103
- # ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๊ฐ์„ฑ ๋ถ„์„ ๋ฐ ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜
104
- news_items = news_data["items"]
105
- results, detailed_results = analyze_news_political_orientation(news_items, classifier, translation_model, tokenizer)
106
-
107
- # ์ „์ฒด ๊ฒฐ๊ณผ ์ถœ๋ ฅ
108
- print("\n์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„ ๊ฒฐ๊ณผ")
109
- print(f"์ง„๋ณด: {results['์ง„๋ณด']}๊ฑด")
110
- print(f"๋ณด์ˆ˜: {results['๋ณด์ˆ˜']}๊ฑด")
111
- print(f"์ค‘๋ฆฝ: {results['์ค‘๋ฆฝ']}๊ฑด")
112
-
113
- except Exception as e:
114
- print(f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
  import requests
3
+ from transformers import pipeline
4
+ import pandas as pd
5
 
6
+ # Step 1: ๋„ค์ด๋ฒ„ ๋‰ด์Šค API ํ˜ธ์ถœ ํ•จ์ˆ˜
7
  def fetch_naver_news(query, display=10, start=1, sort="date"):
8
  client_id = "I_8koTJh3R5l4wLurQbG" # ๋„ค์ด๋ฒ„ ๊ฐœ๋ฐœ์ž ์„ผํ„ฐ์—์„œ ๋ฐœ๊ธ‰๋ฐ›์€ Client ID
9
  client_secret = "W5oWYlAgur" # ๋„ค์ด๋ฒ„ ๊ฐœ๋ฐœ์ž ์„ผํ„ฐ์—์„œ ๋ฐœ๊ธ‰๋ฐ›์€ Client Secret
 
26
  else:
27
  raise Exception(f"Error: {response.status_code}, {response.text}")
28
 
29
+ # Step 2: Hugging Face ๊ฐ์„ฑ ๋ถ„์„ ๋ชจ๋ธ ๋กœ๋“œ
30
+ def load_huggingface_model():
31
+ classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  return classifier
33
 
34
+ # Step 3: ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜ ํ•จ์ˆ˜
35
  def classify_political_sentiment(text, classifier):
36
+ # ๊ฐ์„ฑ ๋ถ„์„ ์‹คํ–‰
37
+ result = classifier(text[:512]) # ์ž…๋ ฅ์ด ๋„ˆ๋ฌด ๊ธธ๋ฉด ์ž˜๋ผ์„œ ๋ถ„์„
38
  sentiment = result[0]
39
  label = sentiment["label"]
40
  score = sentiment["score"]
 
53
  else:
54
  return "์ค‘๋ฆฝ", sentiment_score
55
 
56
+ # Step 4: ๋‰ด์Šค ๋ถ„์„ ๋ฐ ๊ฒฐ๊ณผ ์ถœ๋ ฅ
57
+ def analyze_news_political_orientation(news_items, classifier):
58
  results = {"์ง„๋ณด": 0, "๋ณด์ˆ˜": 0, "์ค‘๋ฆฝ": 0}
59
  detailed_results = []
60
 
61
  for item in news_items:
62
  title = item["title"]
63
  description = item["description"]
 
 
64
  combined_text = f"{title}. {description}"
 
65
 
66
  # ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜
67
+ orientation, score = classify_political_sentiment(combined_text, classifier)
68
  results[orientation] += 1
69
+ detailed_results.append({
70
+ "์ œ๋ชฉ": title,
71
+ "์š”์•ฝ": description,
72
+ "์„ฑํ–ฅ": orientation,
73
+ "์ ์ˆ˜": score,
74
+ })
 
75
 
76
  return results, detailed_results
77
 
78
+ # Streamlit ์•ฑ ์‹œ์ž‘
79
+ st.title("์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„ ๋Œ€์‹œ๋ณด๋“œ")
80
+ st.markdown("### ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ฐ์ดํ„ฐ๋ฅผ ์‹ค์‹œ๊ฐ„์œผ๋กœ ์ˆ˜์ง‘ํ•˜๊ณ  ์ •์น˜ ์„ฑํ–ฅ์„ ๋ถ„์„ํ•ฉ๋‹ˆ๋‹ค.")
81
+
82
+ # ๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ ์ž…๋ ฅ
83
+ query = st.text_input("๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", value="์ •์น˜")
84
+
85
+ if st.button("๋ถ„์„ ์‹œ์ž‘"):
86
+ with st.spinner("๋ฐ์ดํ„ฐ๋ฅผ ๋ถ„์„ ์ค‘์ž…๋‹ˆ๋‹ค..."):
87
+ try:
88
+ # ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘
89
+ news_data = fetch_naver_news(query, display=10)
90
+ news_items = news_data["items"]
91
+
92
+ # Hugging Face ๋ชจ๋ธ ๋กœ๋“œ
93
+ classifier = load_huggingface_model()
94
+
95
+ # ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๋ถ„์„
96
+ results, detailed_results = analyze_news_political_orientation(news_items, classifier)
97
+
98
+ # ๋ถ„์„ ๊ฒฐ๊ณผ ์‹œ๊ฐํ™”
99
+ st.subheader("๋ถ„์„ ๊ฒฐ๊ณผ ์š”์•ฝ")
100
+ st.write(f"์ง„๋ณด: {results['์ง„๋ณด']}๊ฑด")
101
+ st.write(f"๋ณด์ˆ˜: {results['๋ณด์ˆ˜']}๊ฑด")
102
+ st.write(f"์ค‘๋ฆฝ: {results['์ค‘๋ฆฝ']}๊ฑด")
103
+
104
+ # ํŒŒ์ด ์ฐจํŠธ
105
+ st.subheader("์„ฑํ–ฅ ๋ถ„ํฌ ์ฐจํŠธ")
106
+ st.bar_chart(pd.DataFrame.from_dict(results, orient='index', columns=["๊ฑด์ˆ˜"]))
107
+
108
+ # ์„ธ๋ถ€ ๊ฒฐ๊ณผ ์ถœ๋ ฅ
109
+ st.subheader("์„ธ๋ถ€ ๊ฒฐ๊ณผ")
110
+ df = pd.DataFrame(detailed_results)
111
+ st.dataframe(df)
112
+
113
+ # ๋งํฌ ํฌํ•จํ•œ ๋‰ด์Šค ์ถœ๋ ฅ
114
+ st.subheader("๋‰ด์Šค ๋งํฌ")
115
+ for index, row in df.iterrows():
116
+ st.write(f"- [{row['์ œ๋ชฉ']}] (์„ฑํ–ฅ: {row['์„ฑํ–ฅ']}, ์ ์ˆ˜: {row['์ ์ˆ˜']:.2f})")
117
+
118
+ except Exception as e:
119
+ st.error(f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")