Spaces:
Running
on
Zero
Running
on
Zero
Harry Coultas Blum
commited on
Commit
·
c976192
1
Parent(s):
dbb5fad
Trying to fix kvcache
Browse files- vui/inference.py +9 -15
vui/inference.py
CHANGED
@@ -10,7 +10,6 @@ from torch.nn.attention import SDPBackend, sdpa_kernel
|
|
10 |
|
11 |
from vui.model import Vui
|
12 |
from vui.sampling import multinomial, sample_top_k, sample_top_p, sample_top_p_top_k
|
13 |
-
from vui.utils import timer
|
14 |
from vui.vad import detect_voice_activity as vad
|
15 |
|
16 |
|
@@ -155,7 +154,6 @@ def generate(
|
|
155 |
with (
|
156 |
torch.autocast("cuda", torch.bfloat16, True),
|
157 |
sdpa_kernel([SDPBackend.MATH]),
|
158 |
-
timer("generate"),
|
159 |
):
|
160 |
t1 = time.perf_counter()
|
161 |
batch_size = 1
|
@@ -362,19 +360,15 @@ def render(
|
|
362 |
|
363 |
try:
|
364 |
print("rendering", current_text)
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
top_k=top_k,
|
375 |
-
top_p=top_p,
|
376 |
-
max_gen_len=maxlen,
|
377 |
-
)
|
378 |
|
379 |
codes = codes[..., :-10]
|
380 |
audio = self.codec.from_indices(codes)
|
|
|
10 |
|
11 |
from vui.model import Vui
|
12 |
from vui.sampling import multinomial, sample_top_k, sample_top_p, sample_top_p_top_k
|
|
|
13 |
from vui.vad import detect_voice_activity as vad
|
14 |
|
15 |
|
|
|
154 |
with (
|
155 |
torch.autocast("cuda", torch.bfloat16, True),
|
156 |
sdpa_kernel([SDPBackend.MATH]),
|
|
|
157 |
):
|
158 |
t1 = time.perf_counter()
|
159 |
batch_size = 1
|
|
|
360 |
|
361 |
try:
|
362 |
print("rendering", current_text)
|
363 |
+
codes = generate(
|
364 |
+
self,
|
365 |
+
current_text,
|
366 |
+
prompt_codes=prev_codes,
|
367 |
+
temperature=temperature,
|
368 |
+
top_k=top_k,
|
369 |
+
top_p=top_p,
|
370 |
+
max_gen_len=maxlen,
|
371 |
+
)
|
|
|
|
|
|
|
|
|
372 |
|
373 |
codes = codes[..., :-10]
|
374 |
audio = self.codec.from_indices(codes)
|