Harry Coultas Blum commited on
Commit
c976192
·
1 Parent(s): dbb5fad

Trying to fix kvcache

Browse files
Files changed (1) hide show
  1. vui/inference.py +9 -15
vui/inference.py CHANGED
@@ -10,7 +10,6 @@ from torch.nn.attention import SDPBackend, sdpa_kernel
10
 
11
  from vui.model import Vui
12
  from vui.sampling import multinomial, sample_top_k, sample_top_p, sample_top_p_top_k
13
- from vui.utils import timer
14
  from vui.vad import detect_voice_activity as vad
15
 
16
 
@@ -155,7 +154,6 @@ def generate(
155
  with (
156
  torch.autocast("cuda", torch.bfloat16, True),
157
  sdpa_kernel([SDPBackend.MATH]),
158
- timer("generate"),
159
  ):
160
  t1 = time.perf_counter()
161
  batch_size = 1
@@ -362,19 +360,15 @@ def render(
362
 
363
  try:
364
  print("rendering", current_text)
365
- with (
366
- torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH),
367
- torch.autocast("cuda", dtype=torch.bfloat16, enabled=True),
368
- ):
369
- codes = generate(
370
- self,
371
- current_text,
372
- prompt_codes=prev_codes,
373
- temperature=temperature,
374
- top_k=top_k,
375
- top_p=top_p,
376
- max_gen_len=maxlen,
377
- )
378
 
379
  codes = codes[..., :-10]
380
  audio = self.codec.from_indices(codes)
 
10
 
11
  from vui.model import Vui
12
  from vui.sampling import multinomial, sample_top_k, sample_top_p, sample_top_p_top_k
 
13
  from vui.vad import detect_voice_activity as vad
14
 
15
 
 
154
  with (
155
  torch.autocast("cuda", torch.bfloat16, True),
156
  sdpa_kernel([SDPBackend.MATH]),
 
157
  ):
158
  t1 = time.perf_counter()
159
  batch_size = 1
 
360
 
361
  try:
362
  print("rendering", current_text)
363
+ codes = generate(
364
+ self,
365
+ current_text,
366
+ prompt_codes=prev_codes,
367
+ temperature=temperature,
368
+ top_k=top_k,
369
+ top_p=top_p,
370
+ max_gen_len=maxlen,
371
+ )
 
 
 
 
372
 
373
  codes = codes[..., :-10]
374
  audio = self.codec.from_indices(codes)