Spaces:

820nam
/

Test

Sleeping

App Files Files Community

820nam commited on Nov 28, 2024

Commit

717fe8c

verified ·

1 Parent(s): 60f9cc3

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -90

app.py CHANGED Viewed

@@ -1,111 +1,108 @@
-import streamlit as st
-import pandas as pd
 import requests
-from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
-import os
-# 딥러닝 모델 로드
-@st.cache_resource
-def load_model():
-    model_name = "bucketresearch/politicalBiasBERT"
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = AutoModelForSequenceClassification.from_pretrained(model_name)
-    return pipeline("text-classification", model=model, tokenizer=tokenizer)
-# 네이버 뉴스 API 호출
-def fetch_naver_news(query, display=5):
     url = "https://openapi.naver.com/v1/search/news.json"
     headers = {
-        "X-Naver-Client-Id": "I_8koTJh3R5l4wLurQbG",  # 여기에 직접 API 클라이언트 ID 입력
-        "X-Naver-Client-Secret": "W5oWYlAgur",  # 여기에 직접 API 클라이언트 비밀키 입력
     }
-    params = {"query": query, "display": display, "sort": "sim"}
-    try:
-        response = requests.get(url, headers=headers, params=params)
-        response.raise_for_status()  # HTTP 오류 처리
         return response.json()
-    except requests.exceptions.RequestException as e:
-        st.error(f"API 호출 중 오류 발생: {e}")
-        return None  # 뉴스 데이터가 없으면 None 반환
-# 정치 성향 분류
-def classify_sentiment(text, classifier):
-    result = classifier(text, truncation=True, max_length=512)
-    label = result[0]['label']
-    score = result[0]['score']
-    if label in ['LABEL_0', 'LABEL_1']:  # 라벨에 따라 수정 필요
-        return "보수", score
-    elif label in ['LABEL_4']:  # 라벨에 따라 수정 필요
-        return "진보", score
     else:
-        return "중립", score
-# 뉴스 데이터 분석
-def analyze_news(news_items, classifier):
     results = {"진보": 0, "보수": 0, "중립": 0}
     detailed_results = []
     for item in news_items:
         title = item["title"]
         description = item["description"]
-        link = item["link"]
         combined_text = f"{title}. {description}"
         # 정치 성향 분류
-        orientation, score = classify_sentiment(combined_text, classifier)
         results[orientation] += 1
-        detailed_results.append({
-            "제목": title,
-            "요약": description,
-            "링크": link,
-            "성향": orientation,
-            "점수": score,
-        })
     return results, detailed_results
-# Streamlit 앱 시작
-st.title("정치 성향 분석 대시보드")
-st.markdown("### 네이버 뉴스 데이터를 실시간으로 수집하고 정치 성향을 분석합니다.")
-# 검색 키워드 입력
-query = st.text_input("검색 키워드를 입력하세요", value="정치")
-if st.button("분석 시작"):
-    with st.spinner("데이터를 분석 중입니다..."):
-        try:
-            # 네이버 뉴스 데이터 수집
-            news_data = fetch_naver_news(query, display=10)
-            if news_data is None:
-                st.error("뉴스 데이터가 없습니다.")
-            else:
-                news_items = news_data["items"]
-                # 모델 로드
-                classifier = load_model()
-                # 뉴스 데이터 분석
-                results, detailed_results = analyze_news(news_items, classifier)
-                # 분석 결과 시각화
-                st.subheader("분석 결과 요약")
-                st.write(f"진보: {results['진보']}건")
-                st.write(f"보수: {results['보수']}건")
-                st.write(f"중립: {results['중립']}건")
-                # 파이 차트
-                st.subheader("성향 분포 차트")
-                st.bar_chart(pd.DataFrame.from_dict(results, orient='index', columns=["건수"]))
-                # 세부 결과 출력
-                st.subheader("세부 결과")
-                df = pd.DataFrame(detailed_results)
-                st.dataframe(df)
-                # 링크 포함한 뉴스 출력
-                st.subheader("뉴스 링크")
-                for index, row in df.iterrows():
-                    st.write(f"- [{row['제목']}]({row['링크']}) (성향: {row['성향']}, 점수: {row['점수']:.2f})")
-        except Exception as e:
-            st.error(f"오류 발생: {e}")

 import requests
+from transformers import pipeline
+from googletrans import Translator  # 구글 번역 API 사용
+# Step 1: 네이버 뉴스 API로 뉴스 데이터 가져오기
+def fetch_naver_news(query, display=10, start=1, sort="date"):
+    client_id = "I_8koTJh3R5l4wLurQbG"  # 네이버 개발자 센터에서 발급받은 Client ID
+    client_secret = "W5oWYlAgur"  # 네이버 개발자 센터에서 발급받은 Client Secret
     url = "https://openapi.naver.com/v1/search/news.json"
     headers = {
+        "X-Naver-Client-Id": client_id,
+        "X-Naver-Client-Secret": client_secret,
+    }
+    params = {
+        "query": query,
+        "display": display,
+        "start": start,
+        "sort": sort,
     }
+    response = requests.get(url, headers=headers, params=params)
+    if response.status_code == 200:
         return response.json()
     else:
+        raise Exception(f"Error: {response.status_code}, {response.text}")
+# Step 2: Hugging Face 감성 분석 모델 로드
+def load_huggingface_model():
+    classifier = pipeline("text-classification", model="bucketresearch/politicalBiasBERT")
+    return classifier
+# Step 3: 번역기 로드 (Google Translate)
+def translate_to_english(text):
+    translator = Translator()
+    translated = translator.translate(text, src='ko', dest='en')
+    return translated.text
+# Step 4: 정치 성향 분류
+def classify_political_sentiment(text, classifier):
+    # 감성 분석 실행
+    result = classifier(text[:512])
+    sentiment = result[0]
+    label = sentiment["label"]
+    score = sentiment["score"]
+    # 점수화
+    sentiment_score = score if label == "POSITIVE" else -score
+    # 키워드 기반 분류 (진보/보수)
+    progressive_keywords = ["welfare", "equality", "democracy", "environment", "social responsibility"]
+    conservative_keywords = ["security", "tradition", "economy", "growth", "order", "defense"]
+    if any(keyword in text for keyword in progressive_keywords):
+        return "진보", sentiment_score
+    elif any(keyword in text for keyword in conservative_keywords):
+        return "보수", sentiment_score
+    else:
+        return "중립", sentiment_score
+# Step 5: 전체 뉴스 분석 및 결과 출력
+def analyze_news_political_orientation(news_items, classifier):
     results = {"진보": 0, "보수": 0, "중립": 0}
     detailed_results = []
     for item in news_items:
         title = item["title"]
         description = item["description"]
         combined_text = f"{title}. {description}"
+        # 한국어를 영어로 번역
+        translated_text = translate_to_english(combined_text)
         # 정치 성향 분류
+        orientation, score = classify_political_sentiment(translated_text, classifier)
         results[orientation] += 1
+        detailed_results.append((title, description, orientation, score))
+        # 출력
+        print(f"Title: {title}")
+        print(f"Description: {description}")
+        print(f"Orientation: {orientation}, Score: {score}")
+        print("-" * 80)
     return results, detailed_results
+# Step 6: 실행 파이프라인
+if __name__ == "__main__":
+    try:
+        # 네이버 뉴스 API 호출
+        query = "정치"  # 검색 키워드
+        news_data = fetch_naver_news(query, display=5)
+        # Hugging Face 모델 로드
+        classifier = load_huggingface_model()
+        # 뉴스 데이터 감성 분석 및 정치 성향 분류
+        news_items = news_data["items"]
+        results, detailed_results = analyze_news_political_orientation(news_items, classifier)
+        # 전체 결과 출력
+        print("\n정치 성향 분석 결과")
+        print(f"진보: {results['진보']}건")
+        print(f"보수: {results['보수']}건")
+        print(f"중립: {results['중립']}건")
+    except Exception as e:
+        print(f"오류 발생: {e}")