dharmendra commited on
Commit
0cb7726
·
1 Parent(s): 89183a0

Implement streaming responses for LLM API

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -48,7 +48,7 @@ llm = HuggingFacePipeline(pipeline=pipeline(
48
  tokenizer=tokenizer,
49
  max_new_tokens=512, # Adjust as needed for desired response length
50
  return_full_text=False, # Crucial for getting only the AI's response, esp when ans is small
51
- temperature=0.5, # Controls randomness (0.0 for deterministic, 1.0 for very creative)
52
  do_sample=True # Enable sampling for more varied outputs
53
  ))
54
 
 
48
  tokenizer=tokenizer,
49
  max_new_tokens=512, # Adjust as needed for desired response length
50
  return_full_text=False, # Crucial for getting only the AI's response, esp when ans is small
51
+ temperature=0.3, # Controls randomness (0.0 for deterministic, 1.0 for very creative)
52
  do_sample=True # Enable sampling for more varied outputs
53
  ))
54