Spaces:
Paused
Paused
dharmendra
commited on
Commit
·
0cb7726
1
Parent(s):
89183a0
Implement streaming responses for LLM API
Browse files
app.py
CHANGED
@@ -48,7 +48,7 @@ llm = HuggingFacePipeline(pipeline=pipeline(
|
|
48 |
tokenizer=tokenizer,
|
49 |
max_new_tokens=512, # Adjust as needed for desired response length
|
50 |
return_full_text=False, # Crucial for getting only the AI's response, esp when ans is small
|
51 |
-
temperature=0.
|
52 |
do_sample=True # Enable sampling for more varied outputs
|
53 |
))
|
54 |
|
|
|
48 |
tokenizer=tokenizer,
|
49 |
max_new_tokens=512, # Adjust as needed for desired response length
|
50 |
return_full_text=False, # Crucial for getting only the AI's response, esp when ans is small
|
51 |
+
temperature=0.3, # Controls randomness (0.0 for deterministic, 1.0 for very creative)
|
52 |
do_sample=True # Enable sampling for more varied outputs
|
53 |
))
|
54 |
|