Update app.py
Browse files
app.py
CHANGED
@@ -23,6 +23,7 @@ def fetch_naver_news(query, display=10, start=1, sort="date"):
|
|
23 |
response = requests.get(url, headers=headers, params=params)
|
24 |
if response.status_code == 200:
|
25 |
news_data = response.json()
|
|
|
26 |
return news_data
|
27 |
else:
|
28 |
st.error(f"Error: {response.status_code}, {response.text}")
|
@@ -31,16 +32,17 @@ def fetch_naver_news(query, display=10, start=1, sort="date"):
|
|
31 |
# Step 2: Hugging Face ๋ฒ์ญ ๋ชจ๋ธ ๋ก๋ (ํ๊ตญ์ด -> ์์ด)
|
32 |
def load_translation_model():
|
33 |
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
|
|
|
34 |
return translator
|
35 |
|
36 |
# Step 3: Hugging Face ์ ์น ์ฑํฅ ๋ถ์ ๋ชจ๋ธ ๋ก๋
|
37 |
def load_huggingface_model():
|
38 |
classifier = pipeline("text-classification", model="bucketresearch/politicalBiasBERT")
|
|
|
39 |
return classifier
|
40 |
|
41 |
# Step 4: ์ ์น ์ฑํฅ ๋ถ๋ฅ ํจ์
|
42 |
def classify_political_sentiment(text, classifier):
|
43 |
-
# ๊ฐ์ฑ ๋ถ์ ์คํ
|
44 |
result = classifier(text[:512]) # ์
๋ ฅ์ด ๋๋ฌด ๊ธธ๋ฉด ์๋ผ์ ๋ถ์
|
45 |
sentiment = result[0]
|
46 |
label = sentiment["label"]
|
@@ -49,7 +51,6 @@ def classify_political_sentiment(text, classifier):
|
|
49 |
# ์ ์ํ
|
50 |
sentiment_score = score if label == "POSITIVE" else -score
|
51 |
|
52 |
-
# ํค์๋ ๊ธฐ๋ฐ ๋ถ๋ฅ (์ง๋ณด/๋ณด์)
|
53 |
progressive_keywords = ["๋ณต์ง", "ํ๋ฑ", "๋ฏผ์ฃผ", "ํ๊ฒฝ", "์ฌํ์ ์ฑ
์"]
|
54 |
conservative_keywords = ["์๋ณด", "์ ํต", "๊ฒฝ์ ", "์ฑ์ฅ", "์ง์", "๊ตญ๋ฐฉ"]
|
55 |
|
@@ -59,7 +60,7 @@ def classify_political_sentiment(text, classifier):
|
|
59 |
return "๋ณด์", sentiment_score
|
60 |
else:
|
61 |
return "์ค๋ฆฝ", sentiment_score
|
62 |
-
|
63 |
# Step 5: ๋ด์ค ๋ถ์ ๋ฐ ๊ฒฐ๊ณผ ์ถ๋ ฅ
|
64 |
def analyze_news_political_orientation(news_items, classifier, translator):
|
65 |
results = {"์ง๋ณด": 0, "๋ณด์": 0, "์ค๋ฆฝ": 0}
|
@@ -70,10 +71,9 @@ def analyze_news_political_orientation(news_items, classifier, translator):
|
|
70 |
description = item["description"]
|
71 |
combined_text = f"{title}. {description}"
|
72 |
|
73 |
-
# ๋ฒ์ญ: ํ๊ตญ์ด -> ์์ด
|
74 |
translated_text = translator(combined_text)[0]['translation_text']
|
|
|
75 |
|
76 |
-
# ์ ์น ์ฑํฅ ๋ถ๋ฅ
|
77 |
orientation, score = classify_political_sentiment(translated_text, classifier)
|
78 |
results[orientation] += 1
|
79 |
detailed_results.append({
|
@@ -91,13 +91,11 @@ def analyze_news_political_orientation(news_items, classifier, translator):
|
|
91 |
st.title("์ ์น ์ฑํฅ ๋ถ์ ๋์๋ณด๋")
|
92 |
st.markdown("### ๋ค์ด๋ฒ ๋ด์ค ๋ฐ์ดํฐ๋ฅผ ์ค์๊ฐ์ผ๋ก ์์งํ๊ณ ์ ์น ์ฑํฅ์ ๋ถ์ํฉ๋๋ค.")
|
93 |
|
94 |
-
# ๊ฒ์ ํค์๋ ์
๋ ฅ
|
95 |
query = st.text_input("๊ฒ์ ํค์๋๋ฅผ ์
๋ ฅํ์ธ์", value="์ ์น")
|
96 |
|
97 |
if st.button("๋ถ์ ์์"):
|
98 |
with st.spinner("๋ฐ์ดํฐ๋ฅผ ๋ถ์ ์ค์
๋๋ค..."):
|
99 |
try:
|
100 |
-
# ๋ค์ด๋ฒ ๋ด์ค ๋ฐ์ดํฐ ์์ง
|
101 |
news_data = fetch_naver_news(query, display=10)
|
102 |
|
103 |
if news_data is None:
|
@@ -105,29 +103,23 @@ if st.button("๋ถ์ ์์"):
|
|
105 |
else:
|
106 |
news_items = news_data["items"]
|
107 |
|
108 |
-
# Hugging Face ๋ชจ๋ธ ๋ก๋
|
109 |
classifier = load_huggingface_model()
|
110 |
translator = load_translation_model()
|
111 |
|
112 |
-
# ๋ด์ค ๋ฐ์ดํฐ ๋ถ์
|
113 |
results, detailed_results = analyze_news_political_orientation(news_items, classifier, translator)
|
114 |
|
115 |
-
# ๋ถ์ ๊ฒฐ๊ณผ ์๊ฐํ
|
116 |
st.subheader("๋ถ์ ๊ฒฐ๊ณผ ์์ฝ")
|
117 |
st.write(f"์ง๋ณด: {results['์ง๋ณด']}๊ฑด")
|
118 |
st.write(f"๋ณด์: {results['๋ณด์']}๊ฑด")
|
119 |
st.write(f"์ค๋ฆฝ: {results['์ค๋ฆฝ']}๊ฑด")
|
120 |
|
121 |
-
# ์ฑํฅ ๋ถํฌ ์ฐจํธ
|
122 |
st.subheader("์ฑํฅ ๋ถํฌ ์ฐจํธ")
|
123 |
st.bar_chart(pd.DataFrame.from_dict(results, orient='index', columns=["๊ฑด์"]))
|
124 |
|
125 |
-
# ์ธ๋ถ ๊ฒฐ๊ณผ ์ถ๋ ฅ
|
126 |
st.subheader("์ธ๋ถ ๊ฒฐ๊ณผ")
|
127 |
df = pd.DataFrame(detailed_results)
|
128 |
st.dataframe(df)
|
129 |
|
130 |
-
# ๋งํฌ ํฌํจํ ๋ด์ค ์ถ๋ ฅ
|
131 |
st.subheader("๋ด์ค ๋งํฌ")
|
132 |
for index, row in df.iterrows():
|
133 |
st.write(f"- [{row['์ ๋ชฉ']}]({row['๋งํฌ']}) (์ฑํฅ: {row['์ฑํฅ']}, ์ ์: {row['์ ์']:.2f})")
|
|
|
23 |
response = requests.get(url, headers=headers, params=params)
|
24 |
if response.status_code == 200:
|
25 |
news_data = response.json()
|
26 |
+
st.write("Fetched News Data:", news_data) # ๋ค์ด๋ฒ ๋ด์ค ๋ฐ์ดํฐ ํ์ธ
|
27 |
return news_data
|
28 |
else:
|
29 |
st.error(f"Error: {response.status_code}, {response.text}")
|
|
|
32 |
# Step 2: Hugging Face ๋ฒ์ญ ๋ชจ๋ธ ๋ก๋ (ํ๊ตญ์ด -> ์์ด)
|
33 |
def load_translation_model():
|
34 |
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
|
35 |
+
st.write("Translation model loaded.") # ๋ฒ์ญ ๋ชจ๋ธ ๋ก๋ ํ์ธ
|
36 |
return translator
|
37 |
|
38 |
# Step 3: Hugging Face ์ ์น ์ฑํฅ ๋ถ์ ๋ชจ๋ธ ๋ก๋
|
39 |
def load_huggingface_model():
|
40 |
classifier = pipeline("text-classification", model="bucketresearch/politicalBiasBERT")
|
41 |
+
st.write("Political Bias model loaded.") # ์ ์น ์ฑํฅ ๋ถ์ ๋ชจ๋ธ ๋ก๋ ํ์ธ
|
42 |
return classifier
|
43 |
|
44 |
# Step 4: ์ ์น ์ฑํฅ ๋ถ๋ฅ ํจ์
|
45 |
def classify_political_sentiment(text, classifier):
|
|
|
46 |
result = classifier(text[:512]) # ์
๋ ฅ์ด ๋๋ฌด ๊ธธ๋ฉด ์๋ผ์ ๋ถ์
|
47 |
sentiment = result[0]
|
48 |
label = sentiment["label"]
|
|
|
51 |
# ์ ์ํ
|
52 |
sentiment_score = score if label == "POSITIVE" else -score
|
53 |
|
|
|
54 |
progressive_keywords = ["๋ณต์ง", "ํ๋ฑ", "๋ฏผ์ฃผ", "ํ๊ฒฝ", "์ฌํ์ ์ฑ
์"]
|
55 |
conservative_keywords = ["์๋ณด", "์ ํต", "๊ฒฝ์ ", "์ฑ์ฅ", "์ง์", "๊ตญ๋ฐฉ"]
|
56 |
|
|
|
60 |
return "๋ณด์", sentiment_score
|
61 |
else:
|
62 |
return "์ค๋ฆฝ", sentiment_score
|
63 |
+
|
64 |
# Step 5: ๋ด์ค ๋ถ์ ๋ฐ ๊ฒฐ๊ณผ ์ถ๋ ฅ
|
65 |
def analyze_news_political_orientation(news_items, classifier, translator):
|
66 |
results = {"์ง๋ณด": 0, "๋ณด์": 0, "์ค๋ฆฝ": 0}
|
|
|
71 |
description = item["description"]
|
72 |
combined_text = f"{title}. {description}"
|
73 |
|
|
|
74 |
translated_text = translator(combined_text)[0]['translation_text']
|
75 |
+
st.write("Translated Text:", translated_text) # ๋ฒ์ญ๋ ํ
์คํธ ํ์ธ
|
76 |
|
|
|
77 |
orientation, score = classify_political_sentiment(translated_text, classifier)
|
78 |
results[orientation] += 1
|
79 |
detailed_results.append({
|
|
|
91 |
st.title("์ ์น ์ฑํฅ ๋ถ์ ๋์๋ณด๋")
|
92 |
st.markdown("### ๋ค์ด๋ฒ ๋ด์ค ๋ฐ์ดํฐ๋ฅผ ์ค์๊ฐ์ผ๋ก ์์งํ๊ณ ์ ์น ์ฑํฅ์ ๋ถ์ํฉ๋๋ค.")
|
93 |
|
|
|
94 |
query = st.text_input("๊ฒ์ ํค์๋๋ฅผ ์
๋ ฅํ์ธ์", value="์ ์น")
|
95 |
|
96 |
if st.button("๋ถ์ ์์"):
|
97 |
with st.spinner("๋ฐ์ดํฐ๋ฅผ ๋ถ์ ์ค์
๋๋ค..."):
|
98 |
try:
|
|
|
99 |
news_data = fetch_naver_news(query, display=10)
|
100 |
|
101 |
if news_data is None:
|
|
|
103 |
else:
|
104 |
news_items = news_data["items"]
|
105 |
|
|
|
106 |
classifier = load_huggingface_model()
|
107 |
translator = load_translation_model()
|
108 |
|
|
|
109 |
results, detailed_results = analyze_news_political_orientation(news_items, classifier, translator)
|
110 |
|
|
|
111 |
st.subheader("๋ถ์ ๊ฒฐ๊ณผ ์์ฝ")
|
112 |
st.write(f"์ง๋ณด: {results['์ง๋ณด']}๊ฑด")
|
113 |
st.write(f"๋ณด์: {results['๋ณด์']}๊ฑด")
|
114 |
st.write(f"์ค๋ฆฝ: {results['์ค๋ฆฝ']}๊ฑด")
|
115 |
|
|
|
116 |
st.subheader("์ฑํฅ ๋ถํฌ ์ฐจํธ")
|
117 |
st.bar_chart(pd.DataFrame.from_dict(results, orient='index', columns=["๊ฑด์"]))
|
118 |
|
|
|
119 |
st.subheader("์ธ๋ถ ๊ฒฐ๊ณผ")
|
120 |
df = pd.DataFrame(detailed_results)
|
121 |
st.dataframe(df)
|
122 |
|
|
|
123 |
st.subheader("๋ด์ค ๋งํฌ")
|
124 |
for index, row in df.iterrows():
|
125 |
st.write(f"- [{row['์ ๋ชฉ']}]({row['๋งํฌ']}) (์ฑํฅ: {row['์ฑํฅ']}, ์ ์: {row['์ ์']:.2f})")
|