Spaces:
Sleeping
Sleeping
File size: 4,652 Bytes
22acb53 2130106 e29024a 2130106 fec291e f2496ac 22acb53 24d4881 aaef8fd fdb1d41 aaef8fd 879bc79 404f618 879bc79 2130106 6c0af57 2130106 6c0af57 2130106 6c0af57 5bb7d28 6c0af57 1d56eaf 6c0af57 5bb7d28 6c0af57 5bb7d28 fec291e 6c0af57 6465ce2 6c0af57 73b9bd4 fec291e cfe892d fec291e 5a17582 6c0af57 9c6f4a1 e94df39 5a17582 e94df39 5c4531d fec291e 6c0af57 9c6f4a1 fec291e e94df39 6c0af57 fec291e 6c0af57 fec291e 6c0af57 e94df39 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import streamlit as st
import tensorflow as tf
import numpy as np
import pandas as pd
import json
from transformers import *
from tqdm import tqdm
from tensorflow.python.client import device_lib
import requests
from bs4 import BeautifulSoup
import time
PATH = './checkpoint-7500/'
SEQ_LEN = 128
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
def create_sentiment_bert():
# ๋ฒํธ pretrained ๋ชจ๋ธ ๋ก๋
model = TFAutoModel.from_pretrained(PATH,local_files_only=True)
# ํ ํฐ ์ธํ, ๋ง์คํฌ ์ธํ, ์ธ๊ทธ๋จผํธ ์ธํ ์ ์
token_inputs = tf.keras.layers.Input((SEQ_LEN,), dtype=tf.int32, name='input_word_ids')
mask_inputs = tf.keras.layers.Input((SEQ_LEN,), dtype=tf.int32, name='input_masks')
segment_inputs = tf.keras.layers.Input((SEQ_LEN,), dtype=tf.int32, name='input_segment')
# ์ธํ์ด [ํ ํฐ, ๋ง์คํฌ, ์ธ๊ทธ๋จผํธ]์ธ ๋ชจ๋ธ ์ ์
bert_outputs = model([token_inputs, mask_inputs, segment_inputs])
bert_outputs = bert_outputs[1]
sentiment_first = tf.keras.layers.Dense(1, activation='sigmoid', kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02))(bert_outputs)
sentiment_model = tf.keras.Model([token_inputs, mask_inputs, segment_inputs], sentiment_first)
sentiment_model.compile(loss=tf.keras.losses.BinaryCrossentropy(), metrics = ['accuracy'])
return sentiment_model
def sentence_convert_data(data):
global tokenizer
tokens, masks, segments = [], [], []
token = tokenizer.encode(data, max_length=SEQ_LEN, truncation=True, padding='max_length')
num_zeros = token.count(0)
mask = [1]*(SEQ_LEN-num_zeros) + [0]*num_zeros
segment = [0]*SEQ_LEN
tokens.append(token)
segments.append(segment)
masks.append(mask)
tokens = np.array(tokens)
masks = np.array(masks)
segments = np.array(segments)
return [tokens, masks, segments]
def movie_evaluation_predict(sentence):
data_x = sentence_convert_data(sentence)
predict = sentiment_model.predict(data_x)
predict_value = np.ravel(predict)
# 0:๋ถ์ , 1:๊ธ์
predict_answer = np.round(predict_value,0).item()
return predict_answer
def get_comments(news_url):
# oid, aid ์ถ์ถ
list = news_url.split("/")
oid = list[-2]
aid = list[-1]
if len(aid) > 10:
aid = aid[:10]
# API URL ๊ตฌ์ฑ
api_url = "https://apis.naver.com/commentBox/cbox/web_naver_list_jsonp.json"
params = {
"ticket": "news",
"templateId": "default_society",
"pool": "cbox5",
"lang": "ko",
"country": "KR",
"objectId": f"news{oid},{aid}",
"pageSize": 100,
"indexSize": 10,
"page": 1,
"sort": "FAVORITE" # 'NEW'(์ต์ ์), 'FAVORITE'(์๊ณต๊ฐ์)
}
headers = {
"User-Agent": "Mozilla/5.0",
"Referer": news_url
}
# API ํธ์ถ ๋ฐ ๋ฐ์ดํฐ ์ฒ๋ฆฌ
response = requests.get(api_url, params=params, headers=headers)
content = response.text.replace("_callback(", "").replace(");", "")
json_data = json.loads(content)
response = requests.get(news_url)
article_soup = BeautifulSoup(response.text, "html.parser")
# ์ ๋ชฉ ์ถ์ถ
title = article_soup.select_one("#ct > div.media_end_head.go_trans > div.media_end_head_title > h2")
if title is None:
title = article_soup.select_one("#content > div.end_ct > div > h2")
# ๋ณธ๋ฌธ ์ถ์ถ
article = article_soup.select_one("#dic_area")
if article is None:
article = article_soup.select_one("#articeBody")
return title.text.strip(), article.text.strip(), processing_data(json_data['result']['commentList'])
def processing_data(comments):
comment_list = []
for comment in comments:
comment_list.append(comment['contents'])
comment_listR = [x for x in comment_list if x]
return comment_listR
def main():
global sentiment_model
sentiment_model = create_sentiment_bert()
st.title("๋๊ธ ํํฐ๋ง ์๋น์ค")
# URL ์
๋ ฅ ๋ฐ๊ธฐ
url = st.text_input("url์ ์
๋ ฅํ์ธ์")
if st.button("์คํฌ๋ฉ ์์"):
if url:
title, content, comments = get_comments(url)
# ๊ฒฐ๊ณผ ํ์
st.subheader("๊ธฐ์ฌ ์ ๋ชฉ")
st.write(title)
st.subheader("๋ณธ๋ฌธ ๋ด์ฉ")
st.write(content)
st.subheader("๋๊ธ")
for comment in comments:
if movie_evaluation_predict(comment) == 1:
st.write(comment)
return 0
if __name__ == "__main__":
main()
|