820nam commited on
Commit
ebf358a
ยท
verified ยท
1 Parent(s): f53a226

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -129
app.py CHANGED
@@ -1,152 +1,111 @@
1
  import streamlit as st
2
  import requests
 
3
  from transformers import pipeline
 
4
  import pandas as pd
5
 
6
- # Step 1: ๋„ค์ด๋ฒ„ ๋‰ด์Šค API ํ˜ธ์ถœ ํ•จ์ˆ˜
7
- def fetch_naver_news(query, display=10, start=1, sort="date"):
8
- client_id = "I_8koTJh3R5l4wLurQbG" # ๋„ค์ด๋ฒ„ ๊ฐœ๋ฐœ์ž ์„ผํ„ฐ์—์„œ ๋ฐœ๊ธ‰๋ฐ›์€ Client ID
9
- client_secret = "W5oWYlAgur" # ๋„ค์ด๋ฒ„ ๊ฐœ๋ฐœ์ž ์„ผํ„ฐ์—์„œ ๋ฐœ๊ธ‰๋ฐ›์€ Client Secret
10
 
 
 
 
 
11
  url = "https://openapi.naver.com/v1/search/news.json"
12
- headers = {
13
- "X-Naver-Client-Id": client_id,
14
- "X-Naver-Client-Secret": client_secret,
15
- }
16
- params = {
17
- "query": query,
18
- "display": display,
19
- "start": start,
20
- "sort": sort,
21
- }
22
-
23
  response = requests.get(url, headers=headers, params=params)
24
- if response.status_code == 200:
25
- news_data = response.json()
26
- return news_data
27
- else:
28
- st.error(f"Error: {response.status_code}, {response.text}")
29
- return None
30
-
31
- # Step 2: GPT ๋ชจ๋ธ ๋กœ๋“œ (์ง„๋ณด์ , ๋ณด์ˆ˜์  ๊ธฐ์‚ฌ ์ƒ์„ฑ)
32
- def load_gpt_model():
33
- try:
34
- gpt_model = pipeline("text-generation", model="gpt2") # Hugging Face์—์„œ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ๋Š” GPT ๋ชจ๋ธ
35
- st.write("GPT model loaded successfully.")
36
- return gpt_model
37
- except Exception as e:
38
- st.error(f"Error loading GPT model: {e}")
39
- return None
40
 
41
- # Step 3: Hugging Face ์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„ ๋ชจ๋ธ ๋กœ๋“œ
42
- def load_huggingface_model():
 
 
 
 
43
  try:
44
- classifier = pipeline("text-classification", model="bucketresearch/politicalBiasBERT")
45
- st.write("Political bias model loaded successfully.")
46
- return classifier
 
 
 
 
47
  except Exception as e:
48
- st.error(f"Error loading political bias model: {e}")
49
- return None
50
-
51
- # Step 4: ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜ ํ•จ์ˆ˜
52
- def classify_political_sentiment(text, classifier):
53
- result = classifier(text[:512]) # ์ž…๋ ฅ์ด ๋„ˆ๋ฌด ๊ธธ๋ฉด ์ž˜๋ผ์„œ ๋ถ„์„
54
- sentiment = result[0]
55
- label = sentiment["label"]
56
- score = sentiment["score"]
57
-
58
- sentiment_score = score if label == "POSITIVE" else -score
59
-
60
- progressive_keywords = ["๋ณต์ง€", "ํ‰๋“ฑ", "๋ฏผ์ฃผ", "ํ™˜๊ฒฝ", "์‚ฌํšŒ์  ์ฑ…์ž„"]
61
- conservative_keywords = ["์•ˆ๋ณด", "์ „ํ†ต", "๊ฒฝ์ œ", "์„ฑ์žฅ", "์งˆ์„œ", "๊ตญ๋ฐฉ"]
62
-
63
- if any(keyword in text for keyword in progressive_keywords):
64
- return "์ง„๋ณด", sentiment_score
65
- elif any(keyword in text for keyword in conservative_keywords):
66
- return "๋ณด์ˆ˜", sentiment_score
67
- else:
68
- return "์ค‘๋ฆฝ", sentiment_score
69
-
70
- # Step 5: ๋‰ด์Šค ๋ถ„์„ ๋ฐ ๊ฒฐ๊ณผ ์ถœ๋ ฅ
71
- def analyze_news_political_orientation(news_items, classifier, gpt_model):
72
- results = {"์ง„๋ณด": 0, "๋ณด์ˆ˜": 0, "์ค‘๋ฆฝ": 0}
73
- detailed_results = []
74
-
75
- for item in news_items:
76
  title = item["title"]
77
  description = item["description"]
78
  combined_text = f"{title}. {description}"
79
 
80
- # ์ •์น˜ ์„ฑํ–ฅ ๋ถ„๋ฅ˜
81
- orientation, score = classify_political_sentiment(combined_text, classifier)
82
- results[orientation] += 1
83
-
84
- # ์ง„๋ณด์ /๋ณด์ˆ˜์  ๊ธฐ์‚ฌ ์ƒ์„ฑ
85
- prompt = f"์ง„๋ณด์  ๊ด€์ ์—์„œ ๊ธฐ์‚ฌ๋ฅผ ์ž‘์„ฑํ•ด ์ฃผ์„ธ์š”: {combined_text}"
86
- progressive_article = gpt_model(prompt, max_length=512, num_return_sequences=1)[0]['generated_text']
87
-
88
- prompt = f"๋ณด์ˆ˜์  ๊ด€์ ์—์„œ ๊ธฐ์‚ฌ๋ฅผ ์ž‘์„ฑํ•ด ์ฃผ์„ธ์š”: {combined_text}"
89
- conservative_article = gpt_model(prompt, max_length=512, num_return_sequences=1)[0]['generated_text']
90
-
91
- detailed_results.append({
 
 
 
 
 
 
92
  "์ œ๋ชฉ": title,
93
- "์š”์•ฝ": description,
94
- "์„ฑํ–ฅ": orientation,
95
- "์ ์ˆ˜": score,
96
- "๋งํฌ": item["link"],
97
- "์ง„๋ณด์  ๊ธฐ์‚ฌ": progressive_article,
98
- "๋ณด์ˆ˜์  ๊ธฐ์‚ฌ": conservative_article
99
  })
100
 
101
- return results, detailed_results
102
 
 
 
 
 
 
 
 
 
 
103
 
104
- # Step 6: Streamlit ์•ฑ ์‹œ์ž‘
105
- st.title("์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„ ๋Œ€์‹œ๋ณด๋“œ")
106
- st.markdown("### ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ฐ์ดํ„ฐ๋ฅผ ์‹ค์‹œ๊ฐ„์œผ๋กœ ์ˆ˜์ง‘ํ•˜๊ณ  ์ •์น˜ ์„ฑํ–ฅ์„ ๋ถ„์„ํ•ฉ๋‹ˆ๋‹ค.")
107
 
108
- # ๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ ์ž…๋ ฅ
109
  query = st.text_input("๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", value="์ •์น˜")
110
-
111
  if st.button("๋ถ„์„ ์‹œ์ž‘"):
112
- with st.spinner("๋ฐ์ดํ„ฐ๋ฅผ ๋ถ„์„ ์ค‘์ž…๋‹ˆ๋‹ค..."):
113
- try:
114
- # ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘
115
- news_data = fetch_naver_news(query, display=10)
116
-
117
- if news_data is None:
118
- st.error("๋‰ด์Šค ๋ฐ์ดํ„ฐ๋ฅผ ๋ถˆ๋Ÿฌ์˜ค๋Š” ๋ฐ ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค.")
119
- else:
120
- news_items = news_data["items"]
121
-
122
- # Hugging Face ๋ชจ๋ธ ๋กœ๋“œ
123
- classifier = load_huggingface_model()
124
- gpt_model = load_gpt_model()
125
-
126
- # ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๋ถ„์„
127
- results, detailed_results = analyze_news_political_orientation(news_items, classifier, gpt_model)
128
-
129
- # ๋ถ„์„ ๊ฒฐ๊ณผ ์‹œ๊ฐํ™”
130
- st.subheader("๋ถ„์„ ๊ฒฐ๊ณผ ์š”์•ฝ")
131
- st.write(f"์ง„๋ณด: {results['์ง„๋ณด']}๊ฑด")
132
- st.write(f"๋ณด์ˆ˜: {results['๋ณด์ˆ˜']}๊ฑด")
133
- st.write(f"์ค‘๋ฆฝ: {results['์ค‘๋ฆฝ']}๊ฑด")
134
-
135
- # ์„ฑํ–ฅ ๋ถ„ํฌ ์ฐจํŠธ (๋ง‰๋Œ€ ์ฐจํŠธ)
136
- st.subheader("์„ฑํ–ฅ ๋ถ„ํฌ ์ฐจํŠธ")
137
- st.bar_chart(pd.DataFrame.from_dict(results, orient='index', columns=["๊ฑด์ˆ˜"]))
138
-
139
- # ์„ธ๋ถ€ ๊ฒฐ๊ณผ ์ถœ๋ ฅ
140
- st.subheader("์„ธ๋ถ€ ๊ฒฐ๊ณผ")
141
- df = pd.DataFrame(detailed_results)
142
- st.dataframe(df)
143
-
144
- # ๋งํฌ ํฌํ•จํ•œ ๋‰ด์Šค ์ถœ๋ ฅ
145
- st.subheader("๋‰ด์Šค ๋งํฌ")
146
- for index, row in df.iterrows():
147
- st.write(f"- [{row['์ œ๋ชฉ']}]({row['๋งํฌ']}) (์„ฑํ–ฅ: {row['์„ฑํ–ฅ']}, ์ ์ˆ˜: {row['์ ์ˆ˜']:.2f})")
148
- st.write(f"**์ง„๋ณด์  ๊ธฐ์‚ฌ**: {row['์ง„๋ณด์  ๊ธฐ์‚ฌ']}")
149
- st.write(f"**๋ณด์ˆ˜์  ๊ธฐ์‚ฌ**: {row['๋ณด์ˆ˜์  ๊ธฐ์‚ฌ']}")
150
-
151
- except Exception as e:
152
- st.error(f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
 
1
  import streamlit as st
2
  import requests
3
+ import openai
4
  from transformers import pipeline
5
+ import matplotlib.pyplot as plt
6
  import pandas as pd
7
 
8
+ # OpenAI API ํ‚ค ์„ค์ •
9
+ openai.api_key = "YOUR_OPENAI_API_KEY"
 
 
10
 
11
+ # Step 1: ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘
12
+ def fetch_news(query, display=5):
13
+ client_id = "YOUR_NAVER_CLIENT_ID"
14
+ client_secret = "YOUR_NAVER_CLIENT_SECRET"
15
  url = "https://openapi.naver.com/v1/search/news.json"
16
+ headers = {"X-Naver-Client-Id": client_id, "X-Naver-Client-Secret": client_secret}
17
+ params = {"query": query, "display": display, "start": 1, "sort": "date"}
 
 
 
 
 
 
 
 
 
18
  response = requests.get(url, headers=headers, params=params)
19
+ return response.json()["items"] if response.status_code == 200 else []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ # Step 2: ์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„ ๋ชจ๋ธ ๋กœ๋“œ
22
+ def load_sentiment_model():
23
+ return pipeline("text-classification", model="bucketresearch/politicalBiasBERT")
24
+
25
+ # Step 3: GPT-4๋ฅผ ์‚ฌ์šฉํ•ด ๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ ์ƒ์„ฑ
26
+ def generate_article_gpt4(prompt):
27
  try:
28
+ response = openai.ChatCompletion.create(
29
+ model="gpt-4",
30
+ messages=[{"role": "user", "content": prompt}],
31
+ max_tokens=512,
32
+ temperature=0.7,
33
+ )
34
+ return response['choices'][0]['message']['content']
35
  except Exception as e:
36
+ return f"Error generating text: {e}"
37
+
38
+ # Step 4: ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๋ถ„์„ ๋ฐ ๊ฒฐ๊ณผ ์ƒ์„ฑ
39
+ def analyze_news_political_viewpoint(query, sentiment_model):
40
+ news_data = fetch_news(query)
41
+ if not news_data:
42
+ return "๋‰ด์Šค ๋ฐ์ดํ„ฐ๋ฅผ ๋ถˆ๋Ÿฌ์˜ค๋Š” ๋ฐ ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค.", None, None
43
+
44
+ results = []
45
+ sentiment_counts = {"์ง„๋ณด": 0, "๋ณด์ˆ˜": 0}
46
+
47
+ for item in news_data:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  title = item["title"]
49
  description = item["description"]
50
  combined_text = f"{title}. {description}"
51
 
52
+ # ์„ฑํ–ฅ ๋ถ„์„
53
+ sentiment = sentiment_model(combined_text[:512])[0]
54
+ sentiment_label = sentiment["label"]
55
+ sentiment_score = sentiment["score"]
56
+
57
+ # ์ง„๋ณด์ /๋ณด์ˆ˜์  ๊ด€์  ๋ฐ˜๋Œ€๋กœ ๊ธฐ์‚ฌ ์ƒ์„ฑ
58
+ if sentiment_label == "์ง„๋ณด":
59
+ prompt = f"๋‹ค์Œ ๊ธฐ์‚ฌ๋ฅผ ๋ณด์ˆ˜์  ๊ด€์ ์—์„œ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”:\n{combined_text}"
60
+ elif sentiment_label == "๋ณด์ˆ˜":
61
+ prompt = f"๋‹ค์Œ ๊ธฐ์‚ฌ๋ฅผ ์ง„๋ณด์  ๊ด€์ ์—์„œ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”:\n{combined_text}"
62
+ else:
63
+ continue # ์ค‘๋ฆฝ ๊ธฐ์‚ฌ๋Š” ์ œ์™ธ
64
+
65
+ generated_article = generate_article_gpt4(prompt)
66
+ sentiment_counts[sentiment_label] += 1
67
+
68
+ # ๊ฒฐ๊ณผ ์ €์žฅ
69
+ results.append({
70
  "์ œ๋ชฉ": title,
71
+ "์›๋ณธ ๊ธฐ์‚ฌ": description,
72
+ "์„ฑํ–ฅ": sentiment_label,
73
+ "์„ฑํ–ฅ ์ ์ˆ˜": sentiment_score,
74
+ "๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ": generated_article,
 
 
75
  })
76
 
77
+ return "๋‰ด์Šค ๋ถ„์„์ด ์™„๋ฃŒ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.", results, sentiment_counts
78
 
79
+ # Step 5: ์‹œ๊ฐํ™” ํ•จ์ˆ˜
80
+ def visualize_sentiment_distribution(sentiment_counts):
81
+ labels = list(sentiment_counts.keys())
82
+ values = list(sentiment_counts.values())
83
+ fig, ax = plt.subplots()
84
+ ax.bar(labels, values, color=['blue', 'red'])
85
+ ax.set_title("์ง„๋ณด vs ๋ณด์ˆ˜ ๊ธฐ์‚ฌ ์ˆ˜")
86
+ ax.set_ylabel("๊ธฐ์‚ฌ ์ˆ˜")
87
+ st.pyplot(fig)
88
 
89
+ # Step 6: Streamlit UI
90
+ st.title("์ •์น˜์  ๊ด€์  ๋น„๊ต ๋ถ„์„ ๋„๊ตฌ")
91
+ st.markdown("### ๋‰ด์Šค ๊ธฐ์‚ฌ์˜ ์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„๊ณผ ๋ฐ˜๋Œ€ ๊ด€์  ๊ธฐ์‚ฌ ์ƒ์„ฑ")
92
 
 
93
  query = st.text_input("๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", value="์ •์น˜")
 
94
  if st.button("๋ถ„์„ ์‹œ์ž‘"):
95
+ with st.spinner("๋ถ„์„ ์ค‘์ž…๋‹ˆ๋‹ค..."):
96
+ sentiment_model = load_sentiment_model()
97
+ status_message, analysis_results, sentiment_counts = analyze_news_political_viewpoint(query, sentiment_model)
98
+
99
+ # ๊ฒฐ๊ณผ ์ถœ๋ ฅ
100
+ st.subheader(status_message)
101
+ if analysis_results:
102
+ st.write("### ์„ฑํ–ฅ ๋ถ„ํฌ ์‹œ๊ฐํ™”")
103
+ visualize_sentiment_distribution(sentiment_counts)
104
+
105
+ st.write("### ๋ถ„์„ ๊ฒฐ๊ณผ")
106
+ for result in analysis_results:
107
+ st.write(f"#### ์ œ๋ชฉ: {result['์ œ๋ชฉ']}")
108
+ st.write(f"- **์›๋ณธ ๊ธฐ์‚ฌ**: {result['์›๋ณธ ๊ธฐ์‚ฌ']}")
109
+ st.write(f"- **์„ฑํ–ฅ**: {result['์„ฑํ–ฅ']} (์ ์ˆ˜: {result['์„ฑํ–ฅ ์ ์ˆ˜']:.2f})")
110
+ st.write(f"- **๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ**: {result['๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ']}")
111
+ st.write("---")