dharmendra commited on
Commit
20960a5
·
1 Parent(s): 9f54674

Implement streaming responses for LLM API

Browse files
Files changed (1) hide show
  1. app.py +5 -1
app.py CHANGED
@@ -75,7 +75,11 @@ async def generate_text(request: QuestionRequest):
75
  # For more robust streaming, consider Server-Sent Events (SSE) format:
76
  # yield f"data: {json.dumps({'token': chunk.content})}\n\n"
77
  # For simplicity, we'll just yield the content directly for now.
78
- yield chunk['content']
 
 
 
 
79
  await asyncio.sleep(0.01) # Small delay to allow client to process chunks
80
 
81
  except Exception as e:
 
75
  # For more robust streaming, consider Server-Sent Events (SSE) format:
76
  # yield f"data: {json.dumps({'token': chunk.content})}\n\n"
77
  # For simplicity, we'll just yield the content directly for now.
78
+ if 'response' in chunk:
79
+ yield chunk['response']
80
+ else:
81
+ yield str(chunk)
82
+
83
  await asyncio.sleep(0.01) # Small delay to allow client to process chunks
84
 
85
  except Exception as e: