Tomtom84 commited on
Commit
b32ff77
·
verified ·
1 Parent(s): a4cfefc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -41,10 +41,12 @@ async def load_models():
41
  REPO,
42
  device_map={"": 0} if device=="cuda" else None,
43
  torch_dtype=torch.bfloat16 if device=="cuda" else None,
44
- low_cpu_mem_usage=True,
45
- return_legacy_cache=True # für compatibility mit past_key_values als Tuple
46
  ).to(device)
47
  model.config.pad_token_id = model.config.eos_token_id
 
 
48
 
49
  # --- Logit‑Masking vorbereiten ---
50
  # reine Audio‑Tokens laufen von 128266 bis 128266+4096-1
 
41
  REPO,
42
  device_map={"": 0} if device=="cuda" else None,
43
  torch_dtype=torch.bfloat16 if device=="cuda" else None,
44
+ low_cpu_mem_usage=True
45
+ #return_legacy_cache=True # für compatibility mit past_key_values als Tuple
46
  ).to(device)
47
  model.config.pad_token_id = model.config.eos_token_id
48
+ # optional, aber explizit:
49
+ model.config.use_cache = True
50
 
51
  # --- Logit‑Masking vorbereiten ---
52
  # reine Audio‑Tokens laufen von 128266 bis 128266+4096-1