Rausda6 commited on
Commit
83b101f
·
verified ·
1 Parent(s): 33c27dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -18
app.py CHANGED
@@ -14,8 +14,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
14
  import PyPDF2
15
  import traceback
16
  import os
17
- import spaces
18
- from spaces import GPU
19
 
20
  import shutil
21
  from pathlib import Path
@@ -52,11 +51,11 @@ model = None
52
  tokenizer = None
53
  generation_config = None
54
 
55
- @spaces.GPU
56
  def test_llm_generation():
57
  try:
58
  test_prompt = "Hello, how are you today?"
59
- inputs = tokenizer(test_prompt, return_tensors="pt")#.to(model.device)
60
  with torch.no_grad():
61
  outputs = model.generate(
62
  **inputs,
@@ -70,7 +69,7 @@ def test_llm_generation():
70
  except Exception as e:
71
  add_log(f"❌ LLM quick test failed: {e}")
72
 
73
- @spaces.GPU
74
  def initialize_model():
75
  global model, tokenizer, generation_config
76
 
@@ -90,20 +89,20 @@ def initialize_model():
90
  add_log("✅ Set pad_token to eos_token")
91
 
92
  # Force GPU settings
93
- # model = AutoModelForCausalLM.from_pretrained(
94
- # MODEL_ID,
95
- # torch_dtype=torch.float16,
96
- # cache_dir="/data/models",
97
- # trust_remote_code=True,
98
- # token=glotoken,
99
- # device_map={"": 0}, # <- force GPU:0
100
- # low_cpu_mem_usage=True
101
- # )
102
  model = AutoModelForCausalLM.from_pretrained(
103
- MODEL_ID,
 
104
  cache_dir="/data/models",
105
- trust_remote_code=True
 
 
 
106
  )
 
 
 
 
 
107
  model.eval()
108
 
109
  generation_config = GenerationConfig(
@@ -198,8 +197,8 @@ Now format the following:
198
  truncation=True,
199
  max_length=2048
200
  )
201
- #inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
202
- inputs = {k: v for k, v in inputs.items()}
203
  with torch.no_grad():
204
  outputs = self.model.generate(
205
  **inputs,
 
14
  import PyPDF2
15
  import traceback
16
  import os
17
+
 
18
 
19
  import shutil
20
  from pathlib import Path
 
51
  tokenizer = None
52
  generation_config = None
53
 
54
+
55
  def test_llm_generation():
56
  try:
57
  test_prompt = "Hello, how are you today?"
58
+ inputs = tokenizer(test_prompt, return_tensors="pt").to(model.device)
59
  with torch.no_grad():
60
  outputs = model.generate(
61
  **inputs,
 
69
  except Exception as e:
70
  add_log(f"❌ LLM quick test failed: {e}")
71
 
72
+
73
  def initialize_model():
74
  global model, tokenizer, generation_config
75
 
 
89
  add_log("✅ Set pad_token to eos_token")
90
 
91
  # Force GPU settings
 
 
 
 
 
 
 
 
 
92
  model = AutoModelForCausalLM.from_pretrained(
93
+ MODEL_ID,
94
+ torch_dtype=torch.float16,
95
  cache_dir="/data/models",
96
+ trust_remote_code=True,
97
+ token=glotoken,
98
+ device_map={"": 0}, # <- force GPU:0
99
+ low_cpu_mem_usage=True
100
  )
101
+ # model = AutoModelForCausalLM.from_pretrained(
102
+ # MODEL_ID,
103
+ # cache_dir="/data/models",
104
+ # trust_remote_code=True
105
+ # )
106
  model.eval()
107
 
108
  generation_config = GenerationConfig(
 
197
  truncation=True,
198
  max_length=2048
199
  )
200
+ inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
201
+ #inputs = {k: v for k, v in inputs.items()}
202
  with torch.no_grad():
203
  outputs = self.model.generate(
204
  **inputs,