dev-mode-orpheus

Paused

App Files Files Community

Tomtom84 commited on Apr 21

Commit

7d18470

verified ·

1 Parent(s): bb5c241

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -15

app.py CHANGED Viewed

@@ -41,18 +41,19 @@ class AudioMask(LogitsProcessor):
         end_token = start_token + 4096
         allowed_audio = torch.arange(start_token, end_token, device=self.allow.device)
-        allowed = torch.cat([
-            torch.tensor([NEW_BLOCK], device=self.allow.device),
-            allowed_audio
-        ])
-        # Penalize NEW_BLOCK if buffer is not empty
-        if self.buffer_pos > 0:
-            logits[:, NEW_BLOCK] = float("-inf") # Apply a large negative penalty
         if self.sent_blocks:                        # ab 1. Block EOS zulassen
             allowed = torch.cat([allowed, self.eos])
         mask = logits.new_full(logits.shape, float("-inf"))
         mask[:, allowed] = 0
         return logits + mask
@@ -161,13 +162,18 @@ async def tts(ws: WebSocket):
                 if t == NEW_BLOCK:
                     buf.clear()
                     continue
-                buf.append(t - AUDIO_BASE)
-                # masker.buffer_pos += 1 # Removed increment here
-                if len(buf) == 7:
-                    await ws.send_bytes(decode_block(buf))
-                    buf.clear()
-                    masker.sent_blocks = 1      # ab jetzt EOS zulässig
-                    # masker.buffer_pos = 0 # Removed reset here
     except (StopIteration, WebSocketDisconnect):
         pass

         end_token = start_token + 4096
         allowed_audio = torch.arange(start_token, end_token, device=self.allow.device)
+        # Only allow NEW_BLOCK if buffer is full, otherwise only allow audio tokens
+        if self.buffer_pos == 7:
+            allowed = torch.cat([
+                torch.tensor([NEW_BLOCK], device=self.allow.device),
+                allowed_audio
+            ])
+        else:
+            allowed = allowed_audio # Only allow audio tokens
         if self.sent_blocks:                        # ab 1. Block EOS zulassen
             allowed = torch.cat([allowed, self.eos])
+        mask = logits.new_full(logits.shape, float("-inf"))
         mask = logits.new_full(logits.shape, float("-inf"))
         mask[:, allowed] = 0
         return logits + mask
                 if t == NEW_BLOCK:
                     buf.clear()
                     continue
+                # Only append if it's an audio token
+                if t >= AUDIO_BASE and t < AUDIO_BASE + AUDIO_SPAN:
+                    buf.append(t - AUDIO_BASE)
+                    # masker.buffer_pos += 1 # Removed increment here
+                    if len(buf) == 7:
+                        await ws.send_bytes(decode_block(buf))
+                        buf.clear()
+                        masker.sent_blocks = 1      # ab jetzt EOS zulässig
+                        # masker.buffer_pos = 0 # Removed reset here
+                else:
+                    # Optional: Log unexpected tokens
+                    print(f"DEBUG: Skipping non-audio token: {t}", flush=True)
     except (StopIteration, WebSocketDisconnect):
         pass