dasomaru commited on
Commit
a91f908
ยท
verified ยท
1 Parent(s): 2a32abb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -0
app.py CHANGED
@@ -6,6 +6,16 @@ from retriever.vectordb import search_documents # ๐Ÿง  RAG ๊ฒ€์ƒ‰๊ธฐ ๋ถˆ๋Ÿฌ์˜ค
6
 
7
  model_name = "dasomaru/gemma-3-4bit-it-demo"
8
 
 
 
 
 
 
 
 
 
 
 
9
  @spaces.GPU(duration=300)
10
  def generate_response(query):
11
  # ๐Ÿš€ generate_response ํ•จ์ˆ˜ ์•ˆ์—์„œ ๋งค๋ฒˆ ๋กœ๋“œ
 
6
 
7
  model_name = "dasomaru/gemma-3-4bit-it-demo"
8
 
9
+
10
+ # ๐Ÿš€ tokenizer๋Š” CPU์—์„œ๋„ ๋ฏธ๋ฆฌ ๋ถˆ๋Ÿฌ์˜ฌ ์ˆ˜ ์žˆ์Œ
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
12
+ # ๐Ÿš€ model์€ CPU๋กœ๋งŒ ๋จผ์ € ์˜ฌ๋ฆผ (GPU ์•„์ง ์—†์Œ)
13
+ model = AutoModelForCausalLM.from_pretrained(
14
+ model_name,
15
+ torch_dtype=torch.float16, # 4bit model์ด๋‹ˆ๊นŒ
16
+ trust_remote_code=True,
17
+ )
18
+
19
  @spaces.GPU(duration=300)
20
  def generate_response(query):
21
  # ๐Ÿš€ generate_response ํ•จ์ˆ˜ ์•ˆ์—์„œ ๋งค๋ฒˆ ๋กœ๋“œ