Spaces:

Yuchan5386
/

KeraLux-API

Sleeping

App Files Files Community

Yuchan5386 commited on 26 days ago

Commit

11fb468

verified ·

1 Parent(s): 6ae900b

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -8

app.py CHANGED Viewed

@@ -142,7 +142,7 @@ def decode_sp_tokens(tokens):
     text = ''.join(tokens).replace('▁', ' ').strip()
     return text
-def generate_text_topz_stream(model, prompt, max_len=100, max_gen=98, alpha=1.5, temperature=0.8, min_len=20):
     model_input = text_to_ids(f"<start> {prompt}")
     model_input = model_input[:max_len]
     generated = list(model_input)
@@ -155,29 +155,37 @@ def generate_text_topz_stream(model, prompt, max_len=100, max_gen=98, alpha=1.5,
         logits = model(input_tensor, training=False)
         next_token_logits = logits[0, len(generated) - 1].numpy()
         if len(generated) >= min_len:
             next_token_logits[end_id] -= 5.0
         next_token_logits[pad_id] -= 10.0
         # 온도 적용
         logits_temp = next_token_logits / temperature
-        # 확률 계산
         probs = tf.nn.softmax(logits_temp).numpy()
-        # Top-z 변형 (z-transform: 평탄화)
-        transformed_probs = probs ** (1 / alpha)
-        transformed_probs /= transformed_probs.sum()
         # 샘플링
-        next_token_id = np.random.choice(len(transformed_probs), p=transformed_probs)
         generated.append(int(next_token_id))
         next_word = sp.id_to_piece(int(next_token_id))
         text_so_far.append(next_word)
         decoded_text = decode_sp_tokens(text_so_far)
         if len(generated) >= min_len and next_token_id == end_id:
             break
         if len(generated) >= min_len and decoded_text.endswith(('.', '!', '?')):
@@ -187,7 +195,7 @@ def generate_text_topz_stream(model, prompt, max_len=100, max_gen=98, alpha=1.5,
 def chat_stream(user_input, history_text):
     partial_text = ""
-    for partial_response in generate_text_topz_stream(model, user_input):
         partial_text = partial_response
         yield history_text + f"사용자: {user_input}\nKeraLux: {partial_text}\n", \
               history_text + f"사용자: {user_input}\nKeraLux: {partial_text}\n"

     text = ''.join(tokens).replace('▁', ' ').strip()
     return text
+def generate_text_topp_stream(model, prompt, max_len=100, max_gen=98, p=0.9, temperature=0.8, min_len=20):
     model_input = text_to_ids(f"<start> {prompt}")
     model_input = model_input[:max_len]
     generated = list(model_input)
         logits = model(input_tensor, training=False)
         next_token_logits = logits[0, len(generated) - 1].numpy()
+        # 특정 토큰들 확률 낮춤
         if len(generated) >= min_len:
             next_token_logits[end_id] -= 5.0
         next_token_logits[pad_id] -= 10.0
         # 온도 적용
         logits_temp = next_token_logits / temperature
         probs = tf.nn.softmax(logits_temp).numpy()
+        # 확률 내림차순 정렬
+        sorted_idx = np.argsort(probs)[::-1]
+        sorted_probs = probs[sorted_idx]
+        cumulative_probs = np.cumsum(sorted_probs)
+        # 누적합이 p 넘는 위치까지만 선택
+        cutoff = np.searchsorted(cumulative_probs, p, side='right') + 1
+        filtered_indices = sorted_idx[:cutoff]
+        filtered_probs = sorted_probs[:cutoff]
+        filtered_probs /= filtered_probs.sum()
         # 샘플링
+        next_token_id = np.random.choice(filtered_indices, p=filtered_probs)
+        # 결과 누적
         generated.append(int(next_token_id))
         next_word = sp.id_to_piece(int(next_token_id))
         text_so_far.append(next_word)
         decoded_text = decode_sp_tokens(text_so_far)
+        # 정지 조건
         if len(generated) >= min_len and next_token_id == end_id:
             break
         if len(generated) >= min_len and decoded_text.endswith(('.', '!', '?')):
 def chat_stream(user_input, history_text):
     partial_text = ""
+    for partial_response in generate_text_topp_stream(model, user_input):
         partial_text = partial_response
         yield history_text + f"사용자: {user_input}\nKeraLux: {partial_text}\n", \
               history_text + f"사용자: {user_input}\nKeraLux: {partial_text}\n"