dev-mode-orpheus

Paused

Tomtom84 commited on Apr 21

Commit

0ca2533

verified ·

1 Parent(s): e28caba

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -94,11 +94,11 @@ def build_prompt(text: str, voice: str):
 def decode_block(block7: list[int]) -> bytes:
     l1,l2,l3=[],[],[]
-    l1.append(block7[0]) # Use value directly
-    l2.append(block7[1]) # Removed offset subtraction
-    l3 += [block7[2], block7[3]] # Removed offset subtractions
-    l2.append(block7[4]) # Removed offset subtraction
-    l3 += [block7[5], block7[6]] # Removed offset subtractions
     with torch.no_grad():
         codes = [torch.tensor(x, device=device).unsqueeze(0)
@@ -163,8 +163,9 @@ async def tts(ws: WebSocket):
                     buf.clear()
                     continue
                 # Only append if it's an audio token
                 if t >= AUDIO_BASE and t < AUDIO_BASE + AUDIO_SPAN:
-                    buf.append(t - AUDIO_BASE)
                     # masker.buffer_pos += 1 # Removed increment here
                     if len(buf) == 7:
                         await ws.send_bytes(decode_block(buf))

 def decode_block(block7: list[int]) -> bytes:
     l1,l2,l3=[],[],[]
+    l1.append(block7[0] - AUDIO_BASE) # Subtract AUDIO_BASE and position offset
+    l2.append(block7[1] - AUDIO_BASE - 4096) # Subtract AUDIO_BASE and position offset
+    l3 += [block7[2] - AUDIO_BASE - 8192, block7[3] - AUDIO_BASE - 12288] # Subtract AUDIO_BASE and position offsets
+    l2.append(block7[4] - AUDIO_BASE - 16384) # Subtract AUDIO_BASE and position offset
+    l3 += [block7[5] - AUDIO_BASE - 20480, block7[6] - AUDIO_BASE - 24576] # Subtract AUDIO_BASE and position offsets
     with torch.no_grad():
         codes = [torch.tensor(x, device=device).unsqueeze(0)
                     buf.clear()
                     continue
                 # Only append if it's an audio token
+                # Only append if it's an audio token
                 if t >= AUDIO_BASE and t < AUDIO_BASE + AUDIO_SPAN:
+                    buf.append(t) # Append original token
                     # masker.buffer_pos += 1 # Removed increment here
                     if len(buf) == 7:
                         await ws.send_bytes(decode_block(buf))