Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -102,7 +102,7 @@ def model_inference(input_dict, history):
|
|
102 |
padding=True,
|
103 |
).to("cuda")
|
104 |
# Set up streaming generation.
|
105 |
-
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=
|
106 |
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
|
107 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
108 |
thread.start()
|
@@ -144,7 +144,7 @@ def model_inference(input_dict, history):
|
|
144 |
return_tensors="pt",
|
145 |
padding=True,
|
146 |
).to("cuda")
|
147 |
-
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=
|
148 |
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
|
149 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
150 |
thread.start()
|
|
|
102 |
padding=True,
|
103 |
).to("cuda")
|
104 |
# Set up streaming generation.
|
105 |
+
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=False)
|
106 |
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
|
107 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
108 |
thread.start()
|
|
|
144 |
return_tensors="pt",
|
145 |
padding=True,
|
146 |
).to("cuda")
|
147 |
+
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=False)
|
148 |
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
|
149 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
150 |
thread.start()
|