Spaces:
Paused
Paused
dharmendra
commited on
Commit
·
20960a5
1
Parent(s):
9f54674
Implement streaming responses for LLM API
Browse files
app.py
CHANGED
@@ -75,7 +75,11 @@ async def generate_text(request: QuestionRequest):
|
|
75 |
# For more robust streaming, consider Server-Sent Events (SSE) format:
|
76 |
# yield f"data: {json.dumps({'token': chunk.content})}\n\n"
|
77 |
# For simplicity, we'll just yield the content directly for now.
|
78 |
-
|
|
|
|
|
|
|
|
|
79 |
await asyncio.sleep(0.01) # Small delay to allow client to process chunks
|
80 |
|
81 |
except Exception as e:
|
|
|
75 |
# For more robust streaming, consider Server-Sent Events (SSE) format:
|
76 |
# yield f"data: {json.dumps({'token': chunk.content})}\n\n"
|
77 |
# For simplicity, we'll just yield the content directly for now.
|
78 |
+
if 'response' in chunk:
|
79 |
+
yield chunk['response']
|
80 |
+
else:
|
81 |
+
yield str(chunk)
|
82 |
+
|
83 |
await asyncio.sleep(0.01) # Small delay to allow client to process chunks
|
84 |
|
85 |
except Exception as e:
|