Spaces:

rathore11
/

PY_LLM_NEW

Paused

dharmendra commited on Jul 12

Commit

20960a5

1 Parent(s): 9f54674

Implement streaming responses for LLM API

Files changed (1) hide show

app.py CHANGED Viewed

@@ -75,7 +75,11 @@ async def generate_text(request: QuestionRequest):
                 # For more robust streaming, consider Server-Sent Events (SSE) format:
                 # yield f"data: {json.dumps({'token': chunk.content})}\n\n"
                 # For simplicity, we'll just yield the content directly for now.
-                yield chunk['content']
                 await asyncio.sleep(0.01) # Small delay to allow client to process chunks
         except Exception as e:

                 # For more robust streaming, consider Server-Sent Events (SSE) format:
                 # yield f"data: {json.dumps({'token': chunk.content})}\n\n"
                 # For simplicity, we'll just yield the content directly for now.
+                if 'response' in chunk:
+                    yield chunk['response']
+                else:
+                    yield str(chunk)
                 await asyncio.sleep(0.01) # Small delay to allow client to process chunks
         except Exception as e: