Spaces:

Yuchan5386
/

KeraLux-API

Running

File size: 6,760 Bytes

a19f837

import json      
import numpy as np      
import tensorflow as tf      
from tensorflow.keras import layers      
import gradio as gr      
import re    
import requests    
import math    
import sentencepiece as spm

# SentencePiece 로드 (토크나이저랑 특수 토큰 ID도 동일하게 세팅)
sp = spm.SentencePieceProcessor()
sp.load("ko_unigram3.model")

pad_id = sp.piece_to_id("<pad>")
if pad_id == -1: pad_id = 0
start_id = sp.piece_to_id("<start>")
if start_id == -1: start_id = 1
end_id = sp.piece_to_id("< end >")
if end_id == -1: end_id = 2
unk_id = sp.piece_to_id("<unk>")
if unk_id == -1: unk_id = 3

vocab_size = sp.get_piece_size()
max_len = 128

def text_to_ids(text):
    return sp.encode(text, out_type=int)

def ids_to_text(ids):
    return sp.decode(ids)

# GEGLU 레이어
class GEGLU(tf.keras.layers.Layer):
    def __init__(self, d_model, d_ff):
        super().__init__()
        self.proj = layers.Dense(d_ff * 2)
        self.out = layers.Dense(d_model)
    def call(self, x):
        x_proj = self.proj(x)
        x_val, x_gate = tf.split(x_proj, 2, axis=-1)
        return self.out(x_val * tf.nn.gelu(x_gate))

# GPT 블록
class GPTBlock(tf.keras.layers.Layer):
    def __init__(self, d_model, d_ff, num_heads=16, dropout_rate=0.1):
        super().__init__()
        self.ln1 = layers.LayerNormalization(epsilon=1e-5)
        self.attn = layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model // num_heads)
        self.dropout1 = layers.Dropout(dropout_rate)
        self.ln2 = layers.LayerNormalization(epsilon=1e-5)
        self.ffn = GEGLU(d_model, d_ff)
        self.dropout2 = layers.Dropout(dropout_rate)
    def call(self, x, training=False):
        x_norm = self.ln1(x)
        attn_out = self.attn(query=x_norm, value=x_norm, key=x_norm,
                             use_causal_mask=True, training=training)
        x = x + self.dropout1(attn_out, training=training)
        ffn_out = self.ffn(self.ln2(x))
        x = x + self.dropout2(ffn_out, training=training)
        return x

# GPT 모델
class GPT(tf.keras.Model):
    def __init__(self, vocab_size, seq_len, d_model, d_ff, n_layers, num_heads=16, dropout_rate=0.1):
        super().__init__()
        self.token_embedding = layers.Embedding(vocab_size, d_model)
        self.pos_embedding = self.add_weight(
            name="pos_embedding",
            shape=[seq_len, d_model],
            initializer=tf.keras.initializers.RandomNormal(stddev=0.01)
        )
        self.blocks = [GPTBlock(d_model, d_ff, num_heads, dropout_rate) for _ in range(n_layers)]
        self.ln_f = layers.LayerNormalization(epsilon=1e-5)
    def call(self, x, training=False):
        seq_len = tf.shape(x)[1]
        x = self.token_embedding(x) + self.pos_embedding[tf.newaxis, :seq_len, :]
        for block in self.blocks:
            x = block(x, training=training)
        x = self.ln_f(x)
        logits = tf.matmul(x, self.token_embedding.embeddings, transpose_b=True)
        return logits

# 모델 생성 & 가중치 불러오기
model = GPT(vocab_size=vocab_size, seq_len=max_len, d_model=128, d_ff=512, n_layers=6)
dummy_input = tf.zeros((1, max_len), dtype=tf.int32)  # 배치1, 시퀀스길이 max_len
_ = model(dummy_input)  # 모델이 빌드됨
model.load_weights("KeraLux3.weights.h5")
print("모델 가중치 로드 완료!")

def decode_sp_tokens(tokens):
    text = ''.join(tokens).replace('▁', ' ').strip()
    return text

def generate_text_topkp_stream(model, prompt, max_len=100, max_gen=98, p=0.9, k=50, temperature=0.8, min_len=20):
    model_input = text_to_ids(f"<start> {prompt}")
    model_input = model_input[:max_len]
    generated = list(model_input)
    text_so_far = []

    for step in range(max_gen):
        pad_length = max(0, max_len - len(generated))
        input_padded = np.pad(generated, (0, pad_length), constant_values=pad_id)
        input_tensor = tf.convert_to_tensor([input_padded])
        logits = model(input_tensor, training=False)
        next_token_logits = logits[0, len(generated) - 1].numpy()

        if len(generated) >= min_len:
            next_token_logits[end_id] -= 5.0
        next_token_logits[pad_id] -= 10.0

        # 온도 적용
        logits_temp = next_token_logits / temperature

        # 1. 확률 계산
        probs = tf.nn.softmax(logits_temp).numpy()

        # 2. Top-k 필터링
        top_k_indices = np.argpartition(probs, -k)[-k:]
        top_k_probs = probs[top_k_indices]

        # 3. Top-p 필터링 (누적합 계산용 정렬)
        sorted_idx = np.argsort(top_k_probs)[::-1]
        top_k_indices = top_k_indices[sorted_idx]
        top_k_probs = top_k_probs[sorted_idx]
        cumulative_probs = np.cumsum(top_k_probs)

        # p 넘는 부분 자르기
        cutoff = np.searchsorted(cumulative_probs, p, side='right') + 1

        filtered_indices = top_k_indices[:cutoff]
        filtered_probs = top_k_probs[:cutoff]

        # 확률 정규화
        filtered_probs /= filtered_probs.sum()

        # 샘플링
        next_token_id = np.random.choice(filtered_indices, p=filtered_probs)

        generated.append(int(next_token_id))
        next_word = sp.id_to_piece(int(next_token_id))
        text_so_far.append(next_word)

        decoded_text = decode_sp_tokens(text_so_far)

        if len(generated) >= min_len and next_token_id == end_id:
            break
        if len(generated) >= min_len and decoded_text.endswith(('.', '!', '?')):
            break

        yield decoded_text

def chat(user_input, history):      
    if history is None:      
        history = []      

    for partial_response in generate_text_topkp_stream(model, user_input, p=0.9):      
        yield history + [(user_input, partial_response)], history + [(user_input, partial_response)]

with gr.Blocks(title="KeraLux Chat") as demo:      
    gr.Markdown(      
        """      
        # 💡 KeraLux와 대화해보세요!      
        대화를 입력하면 KeraLux가 똑똑하게 대답해줄 거예요.      
        """,      
        elem_id="title",      
    )      
    gr.Markdown("---")      
      
    with gr.Row():      
        with gr.Column(scale=1):      
            chatbot = gr.Chatbot(label="KeraLux 채팅창", bubble_full_width=False)      
        with gr.Column(scale=0):      
            msg = gr.Textbox(      
                label="당신의 질문을 입력하세요!",      
                placeholder="ex) 나 좀 도와줄 수 있니?",      
                lines=1,      
            )      
            state = gr.State([])      
      
    msg.submit(chat, inputs=[msg, state], outputs=[chatbot, state])      
    msg.submit(lambda: "", None, msg)  # 입력창 초기화      
      
demo.launch(share=True)