Tomtom84 commited on
Commit
7f32a0e
·
verified ·
1 Parent(s): 96246a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -94,11 +94,11 @@ def build_prompt(text: str, voice: str):
94
 
95
  def decode_block(block7: list[int]) -> bytes:
96
  l1,l2,l3=[],[],[]
97
- l1.append((block7[0] - AUDIO_BASE)) # Subtract AUDIO_BASE and position 0 offset
98
- l2.append((block7[1] - AUDIO_BASE)) # Subtract AUDIO_BASE and position 1 offset
99
- l3 += [(block7[2] - AUDIO_BASE), (block7[3] - AUDIO_BASE)] # Subtract AUDIO_BASE and position offsets
100
- l2.append((block7[4] - AUDIO_BASE)) # Subtract AUDIO_BASE and position 4 offset
101
- l3 += [(block7[5] - AUDIO_BASE), (block7[6] - AUDIO_BASE)] # Subtract AUDIO_BASE and position offsets
102
 
103
  with torch.no_grad():
104
  codes = [torch.tensor(x, device=device).unsqueeze(0)
@@ -165,7 +165,7 @@ async def tts(ws: WebSocket):
165
  # Only append if it's an audio token
166
  # Only append if it's an audio token
167
  if t >= AUDIO_BASE and t < AUDIO_BASE + AUDIO_SPAN:
168
- buf.append(t) # Append original token
169
  # masker.buffer_pos += 1 # Removed increment here
170
  if len(buf) == 7:
171
  await ws.send_bytes(decode_block(buf))
 
94
 
95
  def decode_block(block7: list[int]) -> bytes:
96
  l1,l2,l3=[],[],[]
97
+ l1.append(block7[0] - (AUDIO_BASE + 0 * 4096)) # Subtract AUDIO_BASE + position 0 offset
98
+ l2.append(block7[1] - (AUDIO_BASE + 1 * 4096)) # Subtract AUDIO_BASE + position 1 offset
99
+ l3 += [block7[2] - (AUDIO_BASE + 2 * 4096), block7[3] - (AUDIO_BASE + 3 * 4096)] # Subtract AUDIO_BASE + position offsets
100
+ l2.append(block7[4] - (AUDIO_BASE + 4 * 4096)) # Subtract AUDIO_BASE + position 4 offset
101
+ l3 += [block7[5] - (AUDIO_BASE + 5 * 4096), block7[6] - (AUDIO_BASE + 6 * 4096)] # Subtract AUDIO_BASE + position offsets
102
 
103
  with torch.no_grad():
104
  codes = [torch.tensor(x, device=device).unsqueeze(0)
 
165
  # Only append if it's an audio token
166
  # Only append if it's an audio token
167
  if t >= AUDIO_BASE and t < AUDIO_BASE + AUDIO_SPAN:
168
+ buf.append(t - AUDIO_BASE) # Append token relative to AUDIO_BASE
169
  # masker.buffer_pos += 1 # Removed increment here
170
  if len(buf) == 7:
171
  await ws.send_bytes(decode_block(buf))