820nam commited on
Commit
0a4103c
ยท
verified ยท
1 Parent(s): 8b3d9e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -64
app.py CHANGED
@@ -1,111 +1,131 @@
1
  import streamlit as st
2
  import requests
3
- import openai
4
- from transformers import pipeline
5
  import matplotlib.pyplot as plt
 
 
6
  import pandas as pd
7
 
8
  # OpenAI API ํ‚ค ์„ค์ •
9
  openai.api_key = "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"
10
 
11
- # Step 1: ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘
12
- def fetch_news(query, display=5):
13
- client_id = "I_8koTJh3R5l4wLurQbG"
14
- client_secret = "W5oWYlAgur"
 
15
  url = "https://openapi.naver.com/v1/search/news.json"
16
- headers = {"X-Naver-Client-Id": client_id, "X-Naver-Client-Secret": client_secret}
17
- params = {"query": query, "display": display, "start": 1, "sort": "date"}
18
- response = requests.get(url, headers=headers, params=params)
19
- return response.json()["items"] if response.status_code == 200 else []
 
 
 
 
 
 
20
 
21
- # Step 2: ์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„ ๋ชจ๋ธ ๋กœ๋“œ
 
 
 
 
 
 
 
 
22
  def load_sentiment_model():
23
- return pipeline("text-classification", model="bucketresearch/politicalBiasBERT")
 
24
 
25
- # Step 3: GPT-4๋ฅผ ์‚ฌ์šฉํ•ด ๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ ์ƒ์„ฑ
26
  def generate_article_gpt4(prompt):
27
  try:
28
- response = openai.ChatCompletion.create(
29
- model="gpt-4",
30
- messages=[{"role": "user", "content": prompt}],
31
- max_tokens=512,
32
- temperature=0.7,
33
  )
34
- return response['choices'][0]['message']['content']
35
  except Exception as e:
36
- return f"Error generating text: {e}"
37
-
38
- # Step 4: ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๋ถ„์„ ๋ฐ ๊ฒฐ๊ณผ ์ƒ์„ฑ
39
- def analyze_news_political_viewpoint(query, sentiment_model):
40
- news_data = fetch_news(query)
41
- if not news_data:
42
- return "๋‰ด์Šค ๋ฐ์ดํ„ฐ๋ฅผ ๋ถˆ๋Ÿฌ์˜ค๋Š” ๋ฐ ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค.", None, None
43
-
 
 
 
 
 
 
 
 
 
 
44
  results = []
45
- sentiment_counts = {"์ง„๋ณด": 0, "๋ณด์ˆ˜": 0}
46
 
47
- for item in news_data:
48
  title = item["title"]
49
  description = item["description"]
50
  combined_text = f"{title}. {description}"
51
 
52
- # ์„ฑํ–ฅ ๋ถ„์„
53
- sentiment = sentiment_model(combined_text[:512])[0]
54
- sentiment_label = sentiment["label"]
55
- sentiment_score = sentiment["score"]
56
-
57
- # ์ง„๋ณด์ /๋ณด์ˆ˜์  ๊ด€์  ๋ฐ˜๋Œ€๋กœ ๊ธฐ์‚ฌ ์ƒ์„ฑ
58
- if sentiment_label == "์ง„๋ณด":
59
- prompt = f"๋‹ค์Œ ๊ธฐ์‚ฌ๋ฅผ ๋ณด์ˆ˜์  ๊ด€์ ์—์„œ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”:\n{combined_text}"
60
- elif sentiment_label == "๋ณด์ˆ˜":
61
- prompt = f"๋‹ค์Œ ๊ธฐ์‚ฌ๋ฅผ ์ง„๋ณด์  ๊ด€์ ์—์„œ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”:\n{combined_text}"
62
- else:
63
- continue # ์ค‘๋ฆฝ ๊ธฐ์‚ฌ๋Š” ์ œ์™ธ
64
 
65
- generated_article = generate_article_gpt4(prompt)
66
- sentiment_counts[sentiment_label] += 1
 
 
67
 
68
- # ๊ฒฐ๊ณผ ์ €์žฅ
69
  results.append({
70
  "์ œ๋ชฉ": title,
71
  "์›๋ณธ ๊ธฐ์‚ฌ": description,
72
- "์„ฑํ–ฅ": sentiment_label,
73
- "์„ฑํ–ฅ ์ ์ˆ˜": sentiment_score,
74
- "๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ": generated_article,
75
  })
76
 
77
- return "๋‰ด์Šค ๋ถ„์„์ด ์™„๋ฃŒ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.", results, sentiment_counts
78
 
79
- # Step 5: ์‹œ๊ฐํ™” ํ•จ์ˆ˜
80
  def visualize_sentiment_distribution(sentiment_counts):
81
- labels = list(sentiment_counts.keys())
82
- values = list(sentiment_counts.values())
83
  fig, ax = plt.subplots()
84
- ax.bar(labels, values, color=['blue', 'red'])
85
- ax.set_title("์ง„๋ณด vs ๋ณด์ˆ˜ ๊ธฐ์‚ฌ ์ˆ˜")
86
- ax.set_ylabel("๊ธฐ์‚ฌ ์ˆ˜")
 
87
  st.pyplot(fig)
88
 
89
- # Step 6: Streamlit UI
90
  st.title("์ •์น˜์  ๊ด€์  ๋น„๊ต ๋ถ„์„ ๋„๊ตฌ")
91
- st.markdown("### ๋‰ด์Šค ๊ธฐ์‚ฌ์˜ ์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„๊ณผ ๋ฐ˜๋Œ€ ๊ด€์  ๊ธฐ์‚ฌ ์ƒ์„ฑ")
92
 
93
  query = st.text_input("๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", value="์ •์น˜")
94
  if st.button("๋ถ„์„ ์‹œ์ž‘"):
95
- with st.spinner("๋ถ„์„ ์ค‘์ž…๋‹ˆ๋‹ค..."):
96
- sentiment_model = load_sentiment_model()
97
- status_message, analysis_results, sentiment_counts = analyze_news_political_viewpoint(query, sentiment_model)
98
 
99
- # ๊ฒฐ๊ณผ ์ถœ๋ ฅ
100
- st.subheader(status_message)
101
  if analysis_results:
102
- st.write("### ์„ฑํ–ฅ ๋ถ„ํฌ ์‹œ๊ฐํ™”")
 
 
 
103
  visualize_sentiment_distribution(sentiment_counts)
104
-
105
- st.write("### ๋ถ„์„ ๊ฒฐ๊ณผ")
 
106
  for result in analysis_results:
107
  st.write(f"#### ์ œ๋ชฉ: {result['์ œ๋ชฉ']}")
108
  st.write(f"- **์›๋ณธ ๊ธฐ์‚ฌ**: {result['์›๋ณธ ๊ธฐ์‚ฌ']}")
109
  st.write(f"- **์„ฑํ–ฅ**: {result['์„ฑํ–ฅ']} (์ ์ˆ˜: {result['์„ฑํ–ฅ ์ ์ˆ˜']:.2f})")
110
  st.write(f"- **๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ**: {result['๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ']}")
111
  st.write("---")
 
 
 
1
  import streamlit as st
2
  import requests
 
 
3
  import matplotlib.pyplot as plt
4
+ from transformers import pipeline
5
+ import openai
6
  import pandas as pd
7
 
8
  # OpenAI API ํ‚ค ์„ค์ •
9
  openai.api_key = "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"
10
 
11
+ # ๋„ค์ด๋ฒ„ ๋‰ด์Šค API๋ฅผ ํ†ตํ•ด ์‹ค์ œ ๋‰ด์Šค ๊ธฐ์‚ฌ ๊ฐ€์ ธ์˜ค๊ธฐ
12
+ def fetch_naver_news(query, display=5):
13
+ client_id = "I_8koTJh3R5l4wLurQbG" # ๋„ค์ด๋ฒ„ ๊ฐœ๋ฐœ์ž ์„ผํ„ฐ์—์„œ ๋ฐœ๊ธ‰๋ฐ›์€ Client ID
14
+ client_secret = "W5oWYlAgur" # ๋„ค์ด๋ฒ„ ๊ฐœ๋ฐœ์ž ์„ผํ„ฐ์—์„œ ๋ฐœ๊ธ‰๋ฐ›์€ Client Secret
15
+
16
  url = "https://openapi.naver.com/v1/search/news.json"
17
+ headers = {
18
+ "X-Naver-Client-Id": client_id,
19
+ "X-Naver-Client-Secret": client_secret,
20
+ }
21
+ params = {
22
+ "query": query,
23
+ "display": display,
24
+ "start": 1,
25
+ "sort": "date", # ์ตœ์‹ ์ˆœ์œผ๋กœ ์ •๋ ฌ
26
+ }
27
 
28
+ response = requests.get(url, headers=headers, params=params)
29
+ if response.status_code == 200:
30
+ news_data = response.json()
31
+ return news_data['items'] # ๋‰ด์Šค ๊ธฐ์‚ฌ ๋ฆฌ์ŠคํŠธ ๋ฐ˜ํ™˜
32
+ else:
33
+ st.error("๋‰ด์Šค ๋ฐ์ดํ„ฐ๋ฅผ ๋ถˆ๋Ÿฌ์˜ค๋Š” ๋ฐ ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค.")
34
+ return []
35
+
36
+ # ์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„ ๋ชจ๋ธ ๋กœ๋“œ
37
  def load_sentiment_model():
38
+ classifier = pipeline("text-classification", model="bucketresearch/politicalBiasBERT")
39
+ return classifier
40
 
41
+ # GPT-4๋ฅผ ์ด์šฉํ•ด ๋ฐ˜๋Œ€ ๊ด€์  ๊ธฐ์‚ฌ ์ƒ์„ฑ
42
  def generate_article_gpt4(prompt):
43
  try:
44
+ response = openai.Completion.create(
45
+ engine="text-davinci-004", # GPT-4 ์—”์ง„
46
+ prompt=prompt,
47
+ max_tokens=300,
48
+ temperature=0.7
49
  )
50
+ return response.choices[0].text.strip()
51
  except Exception as e:
52
+ st.error(f"Error generating text: {e}")
53
+ return "GPT-4 ํ…์ŠคํŠธ ์ƒ์„ฑ ์‹คํŒจ"
54
+
55
+ # ๊ธฐ์‚ฌ ์„ฑํ–ฅ ๋ถ„์„
56
+ def analyze_article_sentiment(text, classifier):
57
+ result = classifier(text[:512]) # ๋„ˆ๋ฌด ๊ธด ํ…์ŠคํŠธ๋Š” ์ž˜๋ผ์„œ ๋ถ„์„
58
+ label = result[0]["label"]
59
+ score = result[0]["score"]
60
+ return label, score
61
+
62
+ # ์ •์น˜์  ๊ด€์  ๋น„๊ต ๋ฐ ๋ฐ˜๋Œ€ ๊ด€์  ์ƒ์„ฑ
63
+ def analyze_news_political_viewpoint(query):
64
+ # ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๊ฐ€์ ธ์˜ค๊ธฐ
65
+ news_items = fetch_naver_news(query)
66
+ if not news_items:
67
+ return [], {}
68
+
69
+ classifier = load_sentiment_model()
70
  results = []
71
+ sentiment_counts = {"์ง„๋ณด": 0, "๋ณด์ˆ˜": 0, "์ค‘๋ฆฝ": 0}
72
 
73
+ for item in news_items:
74
  title = item["title"]
75
  description = item["description"]
76
  combined_text = f"{title}. {description}"
77
 
78
+ # ๊ธฐ์‚ฌ ์„ฑํ–ฅ ๋ถ„์„
79
+ sentiment, score = analyze_article_sentiment(combined_text, classifier)
80
+ sentiment_counts[sentiment] += 1
 
 
 
 
 
 
 
 
 
81
 
82
+ # ๋ฐ˜๋Œ€ ๊ด€์  ๊ธฐ์‚ฌ ์ƒ์„ฑ
83
+ opposite_perspective = "๋ณด์ˆ˜์ " if sentiment == "์ง„๋ณด" else "์ง„๋ณด์ "
84
+ prompt = f"{combined_text}๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ {opposite_perspective} ๊ด€์ ์˜ ๊ธฐ์‚ฌ๋ฅผ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”."
85
+ opposite_article = generate_article_gpt4(prompt)
86
 
 
87
  results.append({
88
  "์ œ๋ชฉ": title,
89
  "์›๋ณธ ๊ธฐ์‚ฌ": description,
90
+ "์„ฑํ–ฅ": sentiment,
91
+ "์„ฑํ–ฅ ์ ์ˆ˜": score,
92
+ "๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ": opposite_article
93
  })
94
 
95
+ return results, sentiment_counts
96
 
97
+ # ์„ฑํ–ฅ ๋ถ„ํฌ ์‹œ๊ฐํ™”
98
  def visualize_sentiment_distribution(sentiment_counts):
 
 
99
  fig, ax = plt.subplots()
100
+ labels = list(sentiment_counts.keys())
101
+ sizes = list(sentiment_counts.values())
102
+ ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, colors=["blue", "red", "gray"])
103
+ ax.axis("equal")
104
  st.pyplot(fig)
105
 
106
+ # Streamlit ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜
107
  st.title("์ •์น˜์  ๊ด€์  ๋น„๊ต ๋ถ„์„ ๋„๊ตฌ")
108
+ st.markdown("๋‰ด์Šค ๊ธฐ์‚ฌ์˜ ์ •์น˜ ์„ฑํ–ฅ ๋ถ„์„๊ณผ ๋ฐ˜๋Œ€ ๊ด€์  ๊ธฐ์‚ฌ๋ฅผ ์ƒ์„ฑํ•˜์—ฌ ๋น„๊ตํ•ฉ๋‹ˆ๋‹ค.")
109
 
110
  query = st.text_input("๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", value="์ •์น˜")
111
  if st.button("๋ถ„์„ ์‹œ์ž‘"):
112
+ with st.spinner("๋ถ„์„ ์ค‘..."):
113
+ analysis_results, sentiment_counts = analyze_news_political_viewpoint(query)
 
114
 
 
 
115
  if analysis_results:
116
+ st.success("๋‰ด์Šค ๋ถ„์„์ด ์™„๋ฃŒ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
117
+
118
+ # ์„ฑํ–ฅ ๋ถ„ํฌ ์‹œ๊ฐํ™”
119
+ st.subheader("์„ฑํ–ฅ ๋ถ„ํฌ ์‹œ๊ฐํ™”")
120
  visualize_sentiment_distribution(sentiment_counts)
121
+
122
+ # ์ƒ์„ธ ๋ถ„์„ ๊ฒฐ๊ณผ ์ถœ๋ ฅ
123
+ st.subheader("์ƒ์„ธ ๋ถ„์„ ๊ฒฐ๊ณผ")
124
  for result in analysis_results:
125
  st.write(f"#### ์ œ๋ชฉ: {result['์ œ๋ชฉ']}")
126
  st.write(f"- **์›๋ณธ ๊ธฐ์‚ฌ**: {result['์›๋ณธ ๊ธฐ์‚ฌ']}")
127
  st.write(f"- **์„ฑํ–ฅ**: {result['์„ฑํ–ฅ']} (์ ์ˆ˜: {result['์„ฑํ–ฅ ์ ์ˆ˜']:.2f})")
128
  st.write(f"- **๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ**: {result['๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ']}")
129
  st.write("---")
130
+ else:
131
+ st.error("๋ถ„์„๋œ ๋‰ด์Šค ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")