Spaces:

fluxions
/

vui-space

Running on Zero

Harry Coultas Blum commited on 7 days ago

Commit

c976192

1 Parent(s): dbb5fad

Trying to fix kvcache

Files changed (1) hide show

vui/inference.py CHANGED Viewed

@@ -10,7 +10,6 @@ from torch.nn.attention import SDPBackend, sdpa_kernel
 from vui.model import Vui
 from vui.sampling import multinomial, sample_top_k, sample_top_p, sample_top_p_top_k
-from vui.utils import timer
 from vui.vad import detect_voice_activity as vad
@@ -155,7 +154,6 @@ def generate(
     with (
         torch.autocast("cuda", torch.bfloat16, True),
         sdpa_kernel([SDPBackend.MATH]),
-        timer("generate"),
     ):
         t1 = time.perf_counter()
         batch_size = 1
@@ -362,19 +360,15 @@ def render(
             try:
                 print("rendering", current_text)
-                with (
-                    torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH),
-                    torch.autocast("cuda", dtype=torch.bfloat16, enabled=True),
-                ):
-                    codes = generate(
-                        self,
-                        current_text,
-                        prompt_codes=prev_codes,
-                        temperature=temperature,
-                        top_k=top_k,
-                        top_p=top_p,
-                        max_gen_len=maxlen,
-                    )
                 codes = codes[..., :-10]
                 audio = self.codec.from_indices(codes)

 from vui.model import Vui
 from vui.sampling import multinomial, sample_top_k, sample_top_p, sample_top_p_top_k
 from vui.vad import detect_voice_activity as vad
     with (
         torch.autocast("cuda", torch.bfloat16, True),
         sdpa_kernel([SDPBackend.MATH]),
     ):
         t1 = time.perf_counter()
         batch_size = 1
             try:
                 print("rendering", current_text)
+                codes = generate(
+                    self,
+                    current_text,
+                    prompt_codes=prev_codes,
+                    temperature=temperature,
+                    top_k=top_k,
+                    top_p=top_p,
+                    max_gen_len=maxlen,
+                )
                 codes = codes[..., :-10]
                 audio = self.codec.from_indices(codes)