Update app.py
Browse files
app.py
CHANGED
@@ -1,152 +1,111 @@
|
|
1 |
import streamlit as st
|
2 |
import requests
|
|
|
3 |
from transformers import pipeline
|
|
|
4 |
import pandas as pd
|
5 |
|
6 |
-
#
|
7 |
-
|
8 |
-
client_id = "I_8koTJh3R5l4wLurQbG" # ๋ค์ด๋ฒ ๊ฐ๋ฐ์ ์ผํฐ์์ ๋ฐ๊ธ๋ฐ์ Client ID
|
9 |
-
client_secret = "W5oWYlAgur" # ๋ค์ด๋ฒ ๊ฐ๋ฐ์ ์ผํฐ์์ ๋ฐ๊ธ๋ฐ์ Client Secret
|
10 |
|
|
|
|
|
|
|
|
|
11 |
url = "https://openapi.naver.com/v1/search/news.json"
|
12 |
-
headers = {
|
13 |
-
|
14 |
-
"X-Naver-Client-Secret": client_secret,
|
15 |
-
}
|
16 |
-
params = {
|
17 |
-
"query": query,
|
18 |
-
"display": display,
|
19 |
-
"start": start,
|
20 |
-
"sort": sort,
|
21 |
-
}
|
22 |
-
|
23 |
response = requests.get(url, headers=headers, params=params)
|
24 |
-
if response.status_code == 200
|
25 |
-
news_data = response.json()
|
26 |
-
return news_data
|
27 |
-
else:
|
28 |
-
st.error(f"Error: {response.status_code}, {response.text}")
|
29 |
-
return None
|
30 |
-
|
31 |
-
# Step 2: GPT ๋ชจ๋ธ ๋ก๋ (์ง๋ณด์ , ๋ณด์์ ๊ธฐ์ฌ ์์ฑ)
|
32 |
-
def load_gpt_model():
|
33 |
-
try:
|
34 |
-
gpt_model = pipeline("text-generation", model="gpt2") # Hugging Face์์ ์ฌ์ฉํ ์ ์๋ GPT ๋ชจ๋ธ
|
35 |
-
st.write("GPT model loaded successfully.")
|
36 |
-
return gpt_model
|
37 |
-
except Exception as e:
|
38 |
-
st.error(f"Error loading GPT model: {e}")
|
39 |
-
return None
|
40 |
|
41 |
-
# Step
|
42 |
-
def
|
|
|
|
|
|
|
|
|
43 |
try:
|
44 |
-
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
|
|
47 |
except Exception as e:
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
progressive_keywords = ["๋ณต์ง", "ํ๋ฑ", "๋ฏผ์ฃผ", "ํ๊ฒฝ", "์ฌํ์ ์ฑ
์"]
|
61 |
-
conservative_keywords = ["์๋ณด", "์ ํต", "๊ฒฝ์ ", "์ฑ์ฅ", "์ง์", "๊ตญ๋ฐฉ"]
|
62 |
-
|
63 |
-
if any(keyword in text for keyword in progressive_keywords):
|
64 |
-
return "์ง๋ณด", sentiment_score
|
65 |
-
elif any(keyword in text for keyword in conservative_keywords):
|
66 |
-
return "๋ณด์", sentiment_score
|
67 |
-
else:
|
68 |
-
return "์ค๋ฆฝ", sentiment_score
|
69 |
-
|
70 |
-
# Step 5: ๋ด์ค ๋ถ์ ๋ฐ ๊ฒฐ๊ณผ ์ถ๋ ฅ
|
71 |
-
def analyze_news_political_orientation(news_items, classifier, gpt_model):
|
72 |
-
results = {"์ง๋ณด": 0, "๋ณด์": 0, "์ค๋ฆฝ": 0}
|
73 |
-
detailed_results = []
|
74 |
-
|
75 |
-
for item in news_items:
|
76 |
title = item["title"]
|
77 |
description = item["description"]
|
78 |
combined_text = f"{title}. {description}"
|
79 |
|
80 |
-
#
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
"์ ๋ชฉ": title,
|
93 |
-
"
|
94 |
-
"์ฑํฅ":
|
95 |
-
"์ ์":
|
96 |
-
"
|
97 |
-
"์ง๋ณด์ ๊ธฐ์ฌ": progressive_article,
|
98 |
-
"๋ณด์์ ๊ธฐ์ฌ": conservative_article
|
99 |
})
|
100 |
|
101 |
-
return results,
|
102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
-
# Step 6: Streamlit
|
105 |
-
st.title("
|
106 |
-
st.markdown("###
|
107 |
|
108 |
-
# ๊ฒ์ ํค์๋ ์
๋ ฅ
|
109 |
query = st.text_input("๊ฒ์ ํค์๋๋ฅผ ์
๋ ฅํ์ธ์", value="์ ์น")
|
110 |
-
|
111 |
if st.button("๋ถ์ ์์"):
|
112 |
-
with st.spinner("
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
# ๋ถ์ ๊ฒฐ๊ณผ ์๊ฐํ
|
130 |
-
st.subheader("๋ถ์ ๊ฒฐ๊ณผ ์์ฝ")
|
131 |
-
st.write(f"์ง๋ณด: {results['์ง๋ณด']}๊ฑด")
|
132 |
-
st.write(f"๋ณด์: {results['๋ณด์']}๊ฑด")
|
133 |
-
st.write(f"์ค๋ฆฝ: {results['์ค๋ฆฝ']}๊ฑด")
|
134 |
-
|
135 |
-
# ์ฑํฅ ๋ถํฌ ์ฐจํธ (๋ง๋ ์ฐจํธ)
|
136 |
-
st.subheader("์ฑํฅ ๋ถํฌ ์ฐจํธ")
|
137 |
-
st.bar_chart(pd.DataFrame.from_dict(results, orient='index', columns=["๊ฑด์"]))
|
138 |
-
|
139 |
-
# ์ธ๋ถ ๊ฒฐ๊ณผ ์ถ๋ ฅ
|
140 |
-
st.subheader("์ธ๋ถ ๊ฒฐ๊ณผ")
|
141 |
-
df = pd.DataFrame(detailed_results)
|
142 |
-
st.dataframe(df)
|
143 |
-
|
144 |
-
# ๋งํฌ ํฌํจํ ๋ด์ค ์ถ๋ ฅ
|
145 |
-
st.subheader("๋ด์ค ๋งํฌ")
|
146 |
-
for index, row in df.iterrows():
|
147 |
-
st.write(f"- [{row['์ ๋ชฉ']}]({row['๋งํฌ']}) (์ฑํฅ: {row['์ฑํฅ']}, ์ ์: {row['์ ์']:.2f})")
|
148 |
-
st.write(f"**์ง๋ณด์ ๊ธฐ์ฌ**: {row['์ง๋ณด์ ๊ธฐ์ฌ']}")
|
149 |
-
st.write(f"**๋ณด์์ ๊ธฐ์ฌ**: {row['๋ณด์์ ๊ธฐ์ฌ']}")
|
150 |
-
|
151 |
-
except Exception as e:
|
152 |
-
st.error(f"์ค๋ฅ ๋ฐ์: {e}")
|
|
|
1 |
import streamlit as st
|
2 |
import requests
|
3 |
+
import openai
|
4 |
from transformers import pipeline
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
import pandas as pd
|
7 |
|
8 |
+
# OpenAI API ํค ์ค์
|
9 |
+
openai.api_key = "YOUR_OPENAI_API_KEY"
|
|
|
|
|
10 |
|
11 |
+
# Step 1: ๋ค์ด๋ฒ ๋ด์ค ๋ฐ์ดํฐ ์์ง
|
12 |
+
def fetch_news(query, display=5):
|
13 |
+
client_id = "YOUR_NAVER_CLIENT_ID"
|
14 |
+
client_secret = "YOUR_NAVER_CLIENT_SECRET"
|
15 |
url = "https://openapi.naver.com/v1/search/news.json"
|
16 |
+
headers = {"X-Naver-Client-Id": client_id, "X-Naver-Client-Secret": client_secret}
|
17 |
+
params = {"query": query, "display": display, "start": 1, "sort": "date"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
response = requests.get(url, headers=headers, params=params)
|
19 |
+
return response.json()["items"] if response.status_code == 200 else []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
+
# Step 2: ์ ์น ์ฑํฅ ๋ถ์ ๋ชจ๋ธ ๋ก๋
|
22 |
+
def load_sentiment_model():
|
23 |
+
return pipeline("text-classification", model="bucketresearch/politicalBiasBERT")
|
24 |
+
|
25 |
+
# Step 3: GPT-4๋ฅผ ์ฌ์ฉํด ๋์กฐ ๊ด์ ๊ธฐ์ฌ ์์ฑ
|
26 |
+
def generate_article_gpt4(prompt):
|
27 |
try:
|
28 |
+
response = openai.ChatCompletion.create(
|
29 |
+
model="gpt-4",
|
30 |
+
messages=[{"role": "user", "content": prompt}],
|
31 |
+
max_tokens=512,
|
32 |
+
temperature=0.7,
|
33 |
+
)
|
34 |
+
return response['choices'][0]['message']['content']
|
35 |
except Exception as e:
|
36 |
+
return f"Error generating text: {e}"
|
37 |
+
|
38 |
+
# Step 4: ๋ด์ค ๋ฐ์ดํฐ ๋ถ์ ๋ฐ ๊ฒฐ๊ณผ ์์ฑ
|
39 |
+
def analyze_news_political_viewpoint(query, sentiment_model):
|
40 |
+
news_data = fetch_news(query)
|
41 |
+
if not news_data:
|
42 |
+
return "๋ด์ค ๋ฐ์ดํฐ๋ฅผ ๋ถ๋ฌ์ค๋ ๋ฐ ์คํจํ์ต๋๋ค.", None, None
|
43 |
+
|
44 |
+
results = []
|
45 |
+
sentiment_counts = {"์ง๋ณด": 0, "๋ณด์": 0}
|
46 |
+
|
47 |
+
for item in news_data:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
title = item["title"]
|
49 |
description = item["description"]
|
50 |
combined_text = f"{title}. {description}"
|
51 |
|
52 |
+
# ์ฑํฅ ๋ถ์
|
53 |
+
sentiment = sentiment_model(combined_text[:512])[0]
|
54 |
+
sentiment_label = sentiment["label"]
|
55 |
+
sentiment_score = sentiment["score"]
|
56 |
+
|
57 |
+
# ์ง๋ณด์ /๋ณด์์ ๊ด์ ๋ฐ๋๋ก ๊ธฐ์ฌ ์์ฑ
|
58 |
+
if sentiment_label == "์ง๋ณด":
|
59 |
+
prompt = f"๋ค์ ๊ธฐ์ฌ๋ฅผ ๋ณด์์ ๊ด์ ์์ ์์ฑํด์ฃผ์ธ์:\n{combined_text}"
|
60 |
+
elif sentiment_label == "๋ณด์":
|
61 |
+
prompt = f"๋ค์ ๊ธฐ์ฌ๋ฅผ ์ง๋ณด์ ๊ด์ ์์ ์์ฑํด์ฃผ์ธ์:\n{combined_text}"
|
62 |
+
else:
|
63 |
+
continue # ์ค๋ฆฝ ๊ธฐ์ฌ๋ ์ ์ธ
|
64 |
+
|
65 |
+
generated_article = generate_article_gpt4(prompt)
|
66 |
+
sentiment_counts[sentiment_label] += 1
|
67 |
+
|
68 |
+
# ๊ฒฐ๊ณผ ์ ์ฅ
|
69 |
+
results.append({
|
70 |
"์ ๋ชฉ": title,
|
71 |
+
"์๋ณธ ๊ธฐ์ฌ": description,
|
72 |
+
"์ฑํฅ": sentiment_label,
|
73 |
+
"์ฑํฅ ์ ์": sentiment_score,
|
74 |
+
"๋์กฐ ๊ด์ ๊ธฐ์ฌ": generated_article,
|
|
|
|
|
75 |
})
|
76 |
|
77 |
+
return "๋ด์ค ๋ถ์์ด ์๋ฃ๋์์ต๋๋ค.", results, sentiment_counts
|
78 |
|
79 |
+
# Step 5: ์๊ฐํ ํจ์
|
80 |
+
def visualize_sentiment_distribution(sentiment_counts):
|
81 |
+
labels = list(sentiment_counts.keys())
|
82 |
+
values = list(sentiment_counts.values())
|
83 |
+
fig, ax = plt.subplots()
|
84 |
+
ax.bar(labels, values, color=['blue', 'red'])
|
85 |
+
ax.set_title("์ง๋ณด vs ๋ณด์ ๊ธฐ์ฌ ์")
|
86 |
+
ax.set_ylabel("๊ธฐ์ฌ ์")
|
87 |
+
st.pyplot(fig)
|
88 |
|
89 |
+
# Step 6: Streamlit UI
|
90 |
+
st.title("์ ์น์ ๊ด์ ๋น๊ต ๋ถ์ ๋๊ตฌ")
|
91 |
+
st.markdown("### ๋ด์ค ๊ธฐ์ฌ์ ์ ์น ์ฑํฅ ๋ถ์๊ณผ ๋ฐ๋ ๊ด์ ๊ธฐ์ฌ ์์ฑ")
|
92 |
|
|
|
93 |
query = st.text_input("๊ฒ์ ํค์๋๋ฅผ ์
๋ ฅํ์ธ์", value="์ ์น")
|
|
|
94 |
if st.button("๋ถ์ ์์"):
|
95 |
+
with st.spinner("๋ถ์ ์ค์
๋๋ค..."):
|
96 |
+
sentiment_model = load_sentiment_model()
|
97 |
+
status_message, analysis_results, sentiment_counts = analyze_news_political_viewpoint(query, sentiment_model)
|
98 |
+
|
99 |
+
# ๊ฒฐ๊ณผ ์ถ๋ ฅ
|
100 |
+
st.subheader(status_message)
|
101 |
+
if analysis_results:
|
102 |
+
st.write("### ์ฑํฅ ๋ถํฌ ์๊ฐํ")
|
103 |
+
visualize_sentiment_distribution(sentiment_counts)
|
104 |
+
|
105 |
+
st.write("### ๋ถ์ ๊ฒฐ๊ณผ")
|
106 |
+
for result in analysis_results:
|
107 |
+
st.write(f"#### ์ ๋ชฉ: {result['์ ๋ชฉ']}")
|
108 |
+
st.write(f"- **์๋ณธ ๊ธฐ์ฌ**: {result['์๋ณธ ๊ธฐ์ฌ']}")
|
109 |
+
st.write(f"- **์ฑํฅ**: {result['์ฑํฅ']} (์ ์: {result['์ฑํฅ ์ ์']:.2f})")
|
110 |
+
st.write(f"- **๋์กฐ ๊ด์ ๊ธฐ์ฌ**: {result['๋์กฐ ๊ด์ ๊ธฐ์ฌ']}")
|
111 |
+
st.write("---")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|