Tomtom84 commited on
Commit
4520cbe
Β·
verified Β·
1 Parent(s): 479f253

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -2,8 +2,8 @@
2
  import os, json, asyncio, torch, logging
3
  from fastapi import FastAPI, WebSocket, WebSocketDisconnect
4
  from huggingface_hub import login
5
- from transformers import (AutoTokenizer, AutoModelForCausalLM,
6
- LogitsProcessor, generation_utils)
7
  from snac import SNAC
8
 
9
  # ── 0. Auth & Device ────────────────────────────────────────────────
@@ -58,6 +58,7 @@ async def load_models():
58
  model = AutoModelForCausalLM.from_pretrained(
59
  MODEL_REPO,
60
  low_cpu_mem_usage=True,
 
61
  device_map={"": 0} if device == "cuda" else None,
62
  torch_dtype=torch.bfloat16 if device == "cuda" else None,
63
  )
 
2
  import os, json, asyncio, torch, logging
3
  from fastapi import FastAPI, WebSocket, WebSocketDisconnect
4
  from huggingface_hub import login
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM, LogitsProcessor
6
+ from transformers.generation.utils import Cache
7
  from snac import SNAC
8
 
9
  # ── 0. Auth & Device ────────────────────────────────────────────────
 
58
  model = AutoModelForCausalLM.from_pretrained(
59
  MODEL_REPO,
60
  low_cpu_mem_usage=True,
61
+ return_legacy_cache=True,
62
  device_map={"": 0} if device == "cuda" else None,
63
  torch_dtype=torch.bfloat16 if device == "cuda" else None,
64
  )