Spaces:
Sleeping
Sleeping
File size: 2,594 Bytes
22acb53 2130106 24d4881 22acb53 24d4881 aaef8fd fdb1d41 aaef8fd 879bc79 24d4881 879bc79 2130106 879bc79 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import streamlit as st
import tensorflow as tf
import numpy as np
import pandas as pd
from transformers import *
import json
import numpy as np
import pandas as pd
from tqdm import tqdm
import os
from tensorflow.python.client import device_lib
import safetensor
PATH = './checkpoint-7500/'
SEQ_LEN = 128
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
def create_sentiment_bert():
# ๋ฒํธ pretrained ๋ชจ๋ธ ๋ก๋
model = model = AutoModel.from_pretrained(PATH,local_files_only=True)
# ํ ํฐ ์ธํ, ๋ง์คํฌ ์ธํ, ์ธ๊ทธ๋จผํธ ์ธํ ์ ์
token_inputs = tf.keras.layers.Input((SEQ_LEN,), dtype=tf.int32, name='input_word_ids')
mask_inputs = tf.keras.layers.Input((SEQ_LEN,), dtype=tf.int32, name='input_masks')
segment_inputs = tf.keras.layers.Input((SEQ_LEN,), dtype=tf.int32, name='input_segment')
# ์ธํ์ด [ํ ํฐ, ๋ง์คํฌ, ์ธ๊ทธ๋จผํธ]์ธ ๋ชจ๋ธ ์ ์
bert_outputs = model([token_inputs, mask_inputs, segment_inputs])
bert_outputs = bert_outputs[1]
sentiment_first = tf.keras.layers.Dense(1, activation='sigmoid', kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02))(bert_outputs)
sentiment_model = tf.keras.Model([token_inputs, mask_inputs, segment_inputs], sentiment_first)
sentiment_model.compile(loss=tf.keras.losses.BinaryCrossentropy(), metrics = ['accuracy'])
return sentiment_model
def sentence_convert_data(data):
global tokenizer
tokens, masks, segments = [], [], []
token = tokenizer.encode(data, max_length=SEQ_LEN, truncation=True, padding='max_length')
num_zeros = token.count(0)
mask = [1]*(SEQ_LEN-num_zeros) + [0]*num_zeros
segment = [0]*SEQ_LEN
tokens.append(token)
segments.append(segment)
masks.append(mask)
tokens = np.array(tokens)
masks = np.array(masks)
segments = np.array(segments)
return [tokens, masks, segments]
def movie_evaluation_predict(sentence):
data_x = sentence_convert_data(sentence)
predict = sentiment_model.predict(data_x)
predict_value = np.ravel(predict)
predict_answer = np.round(predict_value,0).item()
print(predict_value)
if predict_answer == 0:
st.write("(๋ถ์ ํ๋ฅ : %.2f) ๋ถ์ ์ ์ธ ์ํ ํ๊ฐ์
๋๋ค." % (1.0-predict_value))
elif predict_answer == 1:
st.write("(๊ธ์ ํ๋ฅ : %.2f) ๊ธ์ ์ ์ธ ์ํ ํ๊ฐ์
๋๋ค." % predict_value)
sentiment_model = create_sentiment_bert()
movie_evaluation_predict("๋ณด๋๊ฑฐ๋ผ ๊ณ์๋ณด๊ณ ์๋๋ฐ ์ ๊ฐ๋ ๋๋ฆฌ๊ณ ์ฃผ์ธ๊ณต์ธ ์ํฌ๋ ํ๋์ปท ๋์ค๋ฉด์ ์๊ทน์ ์ธ๋ชจ์ต์ ")
|