Spaces:

rathore11
/

PY_LLM_NEW

Paused

App Files Files Community

dharmendra commited on Jul 12

Commit

a05ac69

1 Parent(s): 58966a1

Implement streaming responses for LLM API

Browse files

Files changed (1) hide show

app.py +27 -7

app.py CHANGED Viewed

@@ -60,13 +60,33 @@ class ChatResponse(BaseModel):
 @app.post("/api/generate")
 async def generate_text(request: QuestionRequest):
-    try:
-        response = conversation.predict(input=request.question)
-        return {"response": response}
-    except Exception as e:
-        print("Error during generation:")
-        traceback.print_exc()
-        raise HTTPException(status_code=500, detail=str(e))
     # below when not using langchain fully

 @app.post("/api/generate")
 async def generate_text(request: QuestionRequest):
+    async def generate_stream():
+        try:
+            # Use LangChain's .stream() method for token-by-token generation
+            # This will yield chunks of the response as they are produced
+            response_stream = conversation.stream({"input": request.question})
+            for chunk in response_stream:
+                # Each chunk is typically a dictionary with a 'content' key
+                # We want to send just the new token/text back.
+                # Ensure the chunk is stringified and followed by a newline for client parsing.
+                # For more robust streaming, consider Server-Sent Events (SSE) format:
+                # yield f"data: {json.dumps({'token': chunk.content})}\n\n"
+                # For simplicity, we'll just yield the content directly for now.
+                yield chunk.content
+                await asyncio.sleep(0.01) # Small delay to allow client to process chunks
+        except Exception as e:
+            print("Error during streaming generation:")
+            traceback.print_exc()
+            # You might want to yield an error message to the client here
+            yield f"ERROR: {str(e)}\n"
+    # Return a StreamingResponse, which will send chunks as they are yielded by generate_stream()
+    # media_type can be "text/event-stream" for SSE, or "text/plain" for simple newline-delimited text.
+    # For simplicity, we'll start with "text/plain" for easier initial client parsing.
+    return StreamingResponse(generate_stream(), media_type="text/plain")
     # below when not using langchain fully