import streamlit as st import tensorflow as tf import numpy as np import pandas as pd from transformers import * import json import numpy as np import pandas as pd from tqdm import tqdm import os from tensorflow.python.client import device_lib import safetensor PATH = './checkpoint-7500/' SEQ_LEN = 128 tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased') def create_sentiment_bert(): # 버트 pretrained 모델 로드 model = model = AutoModel.from_pretrained(PATH,local_files_only=True) # 토큰 인풋, 마스크 인풋, 세그먼트 인풋 정의 token_inputs = tf.keras.layers.Input((SEQ_LEN,), dtype=tf.int32, name='input_word_ids') mask_inputs = tf.keras.layers.Input((SEQ_LEN,), dtype=tf.int32, name='input_masks') segment_inputs = tf.keras.layers.Input((SEQ_LEN,), dtype=tf.int32, name='input_segment') # 인풋이 [토큰, 마스크, 세그먼트]인 모델 정의 bert_outputs = model([token_inputs, mask_inputs, segment_inputs]) bert_outputs = bert_outputs[1] sentiment_first = tf.keras.layers.Dense(1, activation='sigmoid', kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02))(bert_outputs) sentiment_model = tf.keras.Model([token_inputs, mask_inputs, segment_inputs], sentiment_first) sentiment_model.compile(loss=tf.keras.losses.BinaryCrossentropy(), metrics = ['accuracy']) return sentiment_model def sentence_convert_data(data): global tokenizer tokens, masks, segments = [], [], [] token = tokenizer.encode(data, max_length=SEQ_LEN, truncation=True, padding='max_length') num_zeros = token.count(0) mask = [1]*(SEQ_LEN-num_zeros) + [0]*num_zeros segment = [0]*SEQ_LEN tokens.append(token) segments.append(segment) masks.append(mask) tokens = np.array(tokens) masks = np.array(masks) segments = np.array(segments) return [tokens, masks, segments] def movie_evaluation_predict(sentence): data_x = sentence_convert_data(sentence) predict = sentiment_model.predict(data_x) predict_value = np.ravel(predict) predict_answer = np.round(predict_value,0).item() print(predict_value) if predict_answer == 0: st.write("(부정 확률 : %.2f) 부정적인 영화 평가입니다." % (1.0-predict_value)) elif predict_answer == 1: st.write("(긍정 확률 : %.2f) 긍정적인 영화 평가입니다." % predict_value) sentiment_model = create_sentiment_bert() movie_evaluation_predict("보던거라 계속보고있는데 전개도 느리고 주인공인 은희는 한두컷 나오면서 소극적인모습에 ")