Spaces:

HumbleBeeAI
/

llm_host

Runtime error

File size: 1,035 Bytes

8b883c8
4fb1c18
9f8a30b
e4f5d4a
8b883c8
9f8a30b
4fb1c18
 
667244c
4fb1c18
 
 
 
 
 
9f8a30b
 
4fb1c18
 
9f8a30b

import asyncio
import ollama
import json

async def generate_stream(query: str):
    """Generates streamed responses from Ollama using LLaMA 3 in JSON format."""
    try:
        stream = ollama.chat(
            model="llama3.2",
            messages=[{"role": "user", "content": query}],
            stream=True
        )

        for chunk in stream:
            if "message" in chunk and "content" in chunk["message"]:
                response_data = json.dumps({"content": chunk["message"]["content"]})
                yield f"data: {response_data}\n\n"  # SSE format

    except Exception as e:
        error_data = json.dumps({"error": str(e)})
        yield f"data: {error_data}\n\n"

async def generate_response(query: str):
    """Returns a non-streamed response."""
    try:
        response = ollama.chat(
            model="llama3.2",
            messages=[{"role": "user", "content": query}]
        )
        return {"content": response["message"]["content"]}
    except Exception as e:
        return {"error": str(e)}