KraTUZen commited on
Commit
c4c9524
·
1 Parent(s): 7c31d85
Files changed (1) hide show
  1. app.py +7 -9
app.py CHANGED
@@ -22,8 +22,7 @@ model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0
22
  # Using CUDA for an optimal experience
23
 
24
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
25
- model = model.to(device).eval()
26
-
27
 
28
  # Defining a custom stopping criteria class for the model's text generation
29
 
@@ -52,11 +51,11 @@ def generate_response(user_input, history):
52
  generate_kwargs = dict(
53
  **model_inputs,
54
  streamer=streamer,
55
- max_new_tokens=256,
56
- do_sample=False,
57
- # top_p=0.95,
58
- # top_k=50,
59
- # temperature=0.7,
60
  num_beams=1,
61
  stopping_criteria=StoppingCriteriaList([stop])
62
  )
@@ -389,5 +388,4 @@ with gr.Blocks(css=css, fill_width=True, title="LogicLinkV5") as demo:
389
  queue=False
390
  )
391
 
392
- demo.queue().launch(share=True, debug=True)
393
- model.eval()
 
22
  # Using CUDA for an optimal experience
23
 
24
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
25
+ model = model.to(device)
 
26
 
27
  # Defining a custom stopping criteria class for the model's text generation
28
 
 
51
  generate_kwargs = dict(
52
  **model_inputs,
53
  streamer=streamer,
54
+ max_new_tokens=1024,
55
+ do_sample=True,
56
+ top_p=0.95,
57
+ top_k=50,
58
+ temperature=0.7,
59
  num_beams=1,
60
  stopping_criteria=StoppingCriteriaList([stop])
61
  )
 
388
  queue=False
389
  )
390
 
391
+ demo.queue().launch(share=True, debug=True)