Spaces:

Yuchan5386
/

InteractGPT-API

Sleeping

App Files Files Community

Yuchan5386 commited on 15 days ago

Commit

257ebb3

verified ·

1 Parent(s): 9b8d3c9

Update api.py

Browse files

Files changed (1) hide show

api.py +47 -80

api.py CHANGED Viewed

@@ -166,88 +166,55 @@ def is_greedy_response_acceptable(text):
     return True
-def generate_text_flex(model, prompt, max_len=100, max_gen=98,
-                       repetition_penalty=1.2, temperature=0.55,
-                       top_k=50, top_p=0.70, typical_p=None,
-                       min_len=12):
     model_input = text_to_ids(f"<start> {prompt} <sep>")
     model_input = model_input[:max_len]
-    generated = list(model_input)
-    for _ in range(max_gen):
-        pad_len = max(0, max_len - len(generated))
-        input_padded = np.pad(generated, (0, pad_len), constant_values=pad_id)
-        input_tensor = tf.convert_to_tensor([input_padded])
-        logits = model(input_tensor, training=False)
-        next_logits = logits[0, len(generated) - 1].numpy()
-        # Repetition penalty 적용
-        for t in set(generated):
-            count = generated.count(t)
-            next_logits[t] /= (repetition_penalty ** count)
-        # Temperature scaling
-        next_logits = next_logits / temperature
-        # 확률 계산
-        probs = np.exp(next_logits - np.max(next_logits))
-        probs = probs / probs.sum()
-        # Top-K 필터링
-        if top_k is not None and top_k > 0:
-            indices_to_remove = probs < np.sort(probs)[-top_k]
-            probs[indices_to_remove] = 0
-            probs /= probs.sum()
-        # Top-P (Nucleus) 필터링
-        if top_p is not None and 0 < top_p < 1:
-            sorted_indices = np.argsort(probs)[::-1]
-            sorted_probs = probs[sorted_indices]
-            cumulative_probs = np.cumsum(sorted_probs)
-            cutoff_index = np.searchsorted(cumulative_probs, top_p, side='right')
-            keep_indices = sorted_indices[:cutoff_index + 1]
-            filtered_probs = np.zeros_like(probs)
-            filtered_probs[keep_indices] = probs[keep_indices]
-            filtered_probs /= filtered_probs.sum()
-            probs = filtered_probs
-        # Typical-p 필터링
-        if typical_p is not None and 0 < typical_p < 1:
-            log_probs = -np.log(probs + 1e-10)
-            mean_info = np.mean(log_probs)
-            deviation = np.abs(log_probs - mean_info)
-            sorted_indices = np.argsort(deviation)
-            filtered_indices = []
-            cumulative_prob = 0.0
-            for idx in sorted_indices:
-                cumulative_prob += probs[idx]
-                filtered_indices.append(idx)
-                if cumulative_prob >= typical_p:
-                    break
-            filtered_probs = np.zeros_like(probs)
-            filtered_probs[filtered_indices] = probs[filtered_indices]
-            filtered_probs /= filtered_probs.sum()
-            probs = filtered_probs
-        # 다음 토큰 샘플링
-        next_token = np.random.choice(len(probs), p=probs)
-        generated.append(int(next_token))
-        decoded = sp.decode(generated)
-        for t in ["<start>", "<sep>", "<end>"]:
-            decoded = decoded.replace(t, "")
-        decoded = decoded.strip()
-        if len(generated) >= min_len and (next_token == end_id or decoded.endswith(('요', '다', '.', '!', '?'))):
-            if is_greedy_response_acceptable(decoded):
                 return decoded
-            else:
-                continue
-    return sp.decode(generated)
 def mismatch_tone(input_text, output_text):
     if "ㅋㅋ" in input_text and not re.search(r'ㅋㅋ|ㅎ|재밌|놀|만나|맛집|여행', output_text):
@@ -372,9 +339,9 @@ def respond(input_text):
         return f"{summary}\n다른 궁금한 점 있으신가요?"
     # 일상 대화: 샘플링 + fallback
-    response = generate_text_flex(model, input_text)
     if not is_valid_response(response) or mismatch_tone(input_text, response):
-        response = generate_text_flex(model, input_text)
     return response
 @app.get("/generate", response_class=PlainTextResponse)

     return True
+def generate_text_beam(model, prompt, max_len=100, beam_width=4, length_penalty=0.7):
     model_input = text_to_ids(f"<start> {prompt} <sep>")
     model_input = model_input[:max_len]
+    beams = [{
+        "sequence": list(model_input),
+        "score": 0.0
+    }]
+    for _ in range(max_len):
+        all_candidates = []
+        for beam in beams:
+            seq = beam["sequence"]
+            pad_len = max(0, max_len - len(seq))
+            input_padded = np.pad(seq, (0, pad_len), constant_values=pad_id)
+            input_tensor = tf.convert_to_tensor([input_padded])
+            logits = model(input_tensor, training=False)[0, len(seq) - 1].numpy()
+            probs = np.exp(logits - np.max(logits))
+            probs = probs / probs.sum()
+            top_indices = probs.argsort()[-beam_width:][::-1]
+            for idx in top_indices:
+                new_seq = seq + [int(idx)]
+                new_score = beam["score"] + np.log(probs[idx])
+                all_candidates.append({
+                    "sequence": new_seq,
+                    "score": new_score
+                })
+        # 길이 보정
+        for cand in all_candidates:
+            cand["score"] /= (len(cand["sequence"]) ** length_penalty)
+        # 상위 beam_width개만 유지
+        beams = sorted(all_candidates, key=lambda x: x["score"], reverse=True)[:beam_width]
+        # 조기 종료 (EOS 토큰 또는 끝나는 말투)
+        for b in beams:
+            decoded = sp.decode(b["sequence"]).strip()
+            if end_id in b["sequence"] and is_greedy_response_acceptable(decoded):
                 return decoded
+    # 최종 후보 중 가장 점수 높은 거 반환
+    final = beams[0]["sequence"]
+    return sp.decode(final)
 def mismatch_tone(input_text, output_text):
     if "ㅋㅋ" in input_text and not re.search(r'ㅋㅋ|ㅎ|재밌|놀|만나|맛집|여행', output_text):
         return f"{summary}\n다른 궁금한 점 있으신가요?"
     # 일상 대화: 샘플링 + fallback
+    response = generate_text_beam(model, input_text)
     if not is_valid_response(response) or mismatch_tone(input_text, response):
+        response = generate_text_beam(model, input_text)
     return response
 @app.get("/generate", response_class=PlainTextResponse)