90 commited on
Commit
b3fa588
·
1 Parent(s): 9e47a22

Revert "Update app.py"

Browse files

This reverts commit 9e47a22e6791fbd7332a48a35ea72f1d33a26b35.

Files changed (1) hide show
  1. app.py +7 -14
app.py CHANGED
@@ -12,7 +12,7 @@ model = AutoModelForCausalLM.from_pretrained(
12
  device_map="auto",
13
  trust_remote_code=True
14
  )
15
- @spaces.GPU(duration=120)
16
  def predict(message, history):
17
  # 构建输入
18
  history_text = ""
@@ -22,25 +22,18 @@ def predict(message, history):
22
 
23
  # 生成回复
24
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
25
-
26
- # 使用流式生成
27
- streamer = tokenizer.decode
28
- response = ""
29
-
30
- for outputs in model.generate(
31
  **inputs,
32
  max_new_tokens=10000,
33
  do_sample=True,
34
  temperature=0.7,
35
  top_p=0.9,
36
  repetition_penalty=1.1,
37
- pad_token_id=tokenizer.eos_token_id,
38
- stream_output=True
39
- ):
40
- next_token = outputs[0][inputs.input_ids.shape[1]:]
41
- next_token_text = streamer(next_token, skip_special_tokens=True)
42
- response += next_token_text
43
- yield response.strip()
44
 
45
  # 创建Gradio界面
46
  demo = gr.ChatInterface(
 
12
  device_map="auto",
13
  trust_remote_code=True
14
  )
15
+ @spaces.GPU(duration=60)
16
  def predict(message, history):
17
  # 构建输入
18
  history_text = ""
 
22
 
23
  # 生成回复
24
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
25
+ outputs = model.generate(
 
 
 
 
 
26
  **inputs,
27
  max_new_tokens=10000,
28
  do_sample=True,
29
  temperature=0.7,
30
  top_p=0.9,
31
  repetition_penalty=1.1,
32
+ pad_token_id=tokenizer.eos_token_id
33
+ )
34
+ response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
35
+
36
+ return response.strip()
 
 
37
 
38
  # 创建Gradio界面
39
  demo = gr.ChatInterface(