from llama_cpp import Llama # Load quantized TinyLlama llm = Llama(model_path="models/tinyllama-1.1b-chat.gguf", n_ctx=2048) def tinyllama_chat(prompt: str) -> str: try: response = llm.create_chat_completion( messages=[{"role": "user", "content": prompt}], temperature=0.7 ) return response["choices"][0]["message"]["content"] except Exception as e: return f"⚠️ TinyLlama failed: {str(e)}"