Metal3d commited on
Commit
633edd7
·
unverified ·
1 Parent(s): 4b73699

Moving spaces.GPU

Browse files
Files changed (1) hide show
  1. main.py +23 -24
main.py CHANGED
@@ -57,12 +57,6 @@ def reformat_math(text):
57
  return text
58
 
59
 
60
- @spaces.GPU
61
- def generate(model, **kwargs):
62
- """Geneerate text using the model."""
63
- model.generate(**kwargs)
64
-
65
-
66
  async def chat(prompt, history):
67
  """Respond to a chat prompt."""
68
  message = {
@@ -71,25 +65,30 @@ async def chat(prompt, history):
71
  }
72
 
73
  history = [] if history is None else history
74
- text = tokenizer.apply_chat_template(
75
- history + [message],
76
- tokenize=False,
77
- add_generation_prompt=True,
78
- )
79
 
80
- model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
81
- streamer = AsyncTextIteratorStreamer(tokenizer, skip_special_tokens=True)
82
-
83
- task = asyncio.get_running_loop().run_in_executor(
84
- None,
85
- functools.partial(
86
- generate,
87
- model,
88
- **model_inputs,
89
- max_new_tokens=1024 * 128,
90
- streamer=streamer,
91
- ),
92
- )
 
 
 
 
 
 
 
 
 
 
93
 
94
  buffer = ""
95
  reasoning = ""
 
57
  return text
58
 
59
 
 
 
 
 
 
 
60
  async def chat(prompt, history):
61
  """Respond to a chat prompt."""
62
  message = {
 
65
  }
66
 
67
  history = [] if history is None else history
 
 
 
 
 
68
 
69
+ @spaces.GPU
70
+ def _generate():
71
+ text = tokenizer.apply_chat_template(
72
+ history + [message],
73
+ tokenize=False,
74
+ add_generation_prompt=True,
75
+ )
76
+
77
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
78
+ streamer = AsyncTextIteratorStreamer(tokenizer, skip_special_tokens=True)
79
+
80
+ task = asyncio.get_running_loop().run_in_executor(
81
+ None,
82
+ functools.partial(
83
+ model.generate,
84
+ max_new_tokens=1024 * 128,
85
+ streamer=streamer,
86
+ **model_inputs,
87
+ ),
88
+ )
89
+ return task, streamer
90
+
91
+ task, streamer = _generate()
92
 
93
  buffer = ""
94
  reasoning = ""