Spaces:

Yuchan5386
/

InteractGPT-API

Sleeping

App Files Files Community

Yuchan5386 commited on 19 days ago

Commit

5480bd4

verified ·

1 Parent(s): b2b2dad

Update api.py

Browse files

Files changed (1) hide show

api.py +82 -49

api.py CHANGED Viewed

@@ -126,55 +126,88 @@ print("모델 가중치 로드 완료!")
 repetition_penalty = 1.2
-async def generate_text_stream(prompt: str):
-    model_input = text_to_ids(f"<start> {prompt} <sep>")
-    model_input = model_input[:max_len]
-    generated = list(model_input)
-    tau = 5.0
-    while True:
-        pad_length = max(0, max_len - len(generated))
-        input_padded = np.pad(generated, (0, pad_length), constant_values=pad_id)
-        input_tensor = tf.convert_to_tensor([input_padded])
-        logits = model(input_tensor, training=False)
-        next_token_logits = logits[0, len(generated) - 1].numpy()
-        # 반복 페널티 적용
-        token_counts = {}
-        for t in generated:
-            token_counts[t] = token_counts.get(t, 0) + 1
-        for token_id, count in token_counts.items():
-            next_token_logits[token_id] /= (repetition_penalty ** count)
-        if len(generated) >= 20:
-            next_token_logits[end_id] -= 5.0
-        next_token_logits[pad_id] -= 10.0
-        next_token_logits = next_token_logits / 1.0  # temperature 고정
-        logits_stable = next_token_logits - np.max(next_token_logits)
-        probs = np.exp(logits_stable)
-        probs /= probs.sum()
-        sorted_indices = np.argsort(-probs)
-        top_indices = sorted_indices[:100]
-        top_probs = probs[top_indices]
-        top_probs /= top_probs.sum()
-        sampled_index = np.random.choice(top_indices, p=top_probs)
-        generated.append(int(sampled_index))
-        new_token_text = sp.decode([int(sampled_index)])
-        # 특수 토큰 무시 및 종료 처리
-        if any(tok in new_token_text for tok in ["<start>", "<sep>", "<end>", "<pad>"]):
-            if sampled_index == end_id:
-                break
-            continue
-        yield new_token_text
-        await asyncio.sleep(0.1)
 @app.get("/generate")
 async def generate(request: Request):
     prompt = request.query_params.get("prompt", "안녕하세요")

 repetition_penalty = 1.2
+async def generate_text_mirostat_top_p(model, prompt, max_len=100, max_gen=98,
+                                 temperature=1.0, min_len=20,
+                                 repetition_penalty=1.2, eta=0.1, m=100, p=0.9):
+    model_input = text_to_ids(f"<start> {prompt} <sep>")
+    model_input = model_input[:max_len]
+    generated = list(model_input)
+    tau = 5.0  # 초기 목표 surprise
+    for step in range(max_gen):
+        pad_length = max(0, max_len - len(generated))
+        input_padded = np.pad(generated, (0, pad_length), constant_values=pad_id)
+        input_tensor = tf.convert_to_tensor([input_padded])
+        logits = model(input_tensor, training=False)
+        next_token_logits = logits[0, len(generated) - 1].numpy()
+        # 반복 페널티 적용
+        token_counts = {}
+        for t in generated:
+            token_counts[t] = token_counts.get(t, 0) + 1
+        for token_id, count in token_counts.items():
+            next_token_logits[token_id] /= (repetition_penalty ** count)
+        # 최소 길이 넘으면 종료 토큰 확률 낮추기
+        if len(generated) >= min_len:
+            next_token_logits[end_id] -= 5.0
+        next_token_logits[pad_id] -= 10.0
+        # 온도 조절
+        next_token_logits = next_token_logits / temperature
+        # --- 미로스타트 + Top-p 샘플링 ---
+        logits_stable = next_token_logits - np.max(next_token_logits)
+        probs = np.exp(logits_stable)
+        probs /= probs.sum()
+        # 1. mirostat top-m 후보 추리기
+        sorted_indices = np.argsort(-probs)
+        top_indices = sorted_indices[:m]
+        top_probs = probs[top_indices]
+        top_probs /= top_probs.sum()
+        # 2. mirostat 샘플링
+        sampled_index = np.random.choice(top_indices, p=top_probs)
+        sampled_prob = probs[sampled_index]
+        observed_surprise = -np.log(sampled_prob + 1e-9)
+        tau += eta * (observed_surprise - tau)
+        # 3. top-p 필터링
+        sorted_top_indices = top_indices[np.argsort(-top_probs)]
+        sorted_top_probs = np.sort(top_probs)[::-1]
+        cumulative_probs = np.cumsum(sorted_top_probs)
+        cutoff = np.searchsorted(cumulative_probs, p, side='left') + 1
+        filtered_indices = sorted_top_indices[:cutoff]
+        filtered_probs = sorted_top_probs[:cutoff]
+        filtered_probs /= filtered_probs.sum()
+        # 4. 최종 토큰 샘플링
+        final_token = np.random.choice(filtered_indices, p=filtered_probs)
+        generated.append(int(final_token))
+        # 특수 토큰 UI에 표시하지 않기 위해 필터링
+        if final_token == end_id:
+            # 종료 토큰 만나면 멈춤
+            decoded_text = sp.decode(generated)
+            for token in ["<start>", "<sep>", "<end>"]:
+                decoded_text = decoded_text.replace(token, "")
+            decoded_text = decoded_text.strip()
+            yield decoded_text
+            break
+        if final_token in [start_id, pad_id] or sp.id_to_piece(final_token) == "<sep>":
+            # 특수 토큰은 무시하고 출력 안 함
+            continue
+        # 일반 토큰인 경우에만 출력
+        decoded_text = sp.decode(generated)
+        for token in ["<start>", "<sep>", "<end>"]:
+            decoded_text = decoded_text.replace(token, "")
+        decoded_text = decoded_text.strip()
+        yield decoded_text
 @app.get("/generate")
 async def generate(request: Request):
     prompt = request.query_params.get("prompt", "안녕하세요")