Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -2,8 +2,8 @@
|
|
2 |
import os, json, asyncio, torch, logging
|
3 |
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
|
4 |
from huggingface_hub import login
|
5 |
-
from transformers import
|
6 |
-
|
7 |
from snac import SNAC
|
8 |
|
9 |
# ββ 0. Auth & Device ββββββββββββββββββββββββββββββββββββββββββββββββ
|
@@ -58,6 +58,7 @@ async def load_models():
|
|
58 |
model = AutoModelForCausalLM.from_pretrained(
|
59 |
MODEL_REPO,
|
60 |
low_cpu_mem_usage=True,
|
|
|
61 |
device_map={"": 0} if device == "cuda" else None,
|
62 |
torch_dtype=torch.bfloat16 if device == "cuda" else None,
|
63 |
)
|
|
|
2 |
import os, json, asyncio, torch, logging
|
3 |
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
|
4 |
from huggingface_hub import login
|
5 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, LogitsProcessor
|
6 |
+
from transformers.generation.utils import Cache
|
7 |
from snac import SNAC
|
8 |
|
9 |
# ββ 0. Auth & Device ββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
58 |
model = AutoModelForCausalLM.from_pretrained(
|
59 |
MODEL_REPO,
|
60 |
low_cpu_mem_usage=True,
|
61 |
+
return_legacy_cache=True,
|
62 |
device_map={"": 0} if device == "cuda" else None,
|
63 |
torch_dtype=torch.bfloat16 if device == "cuda" else None,
|
64 |
)
|