Spaces:
Runtime error
Runtime error
import gradio as gr | |
import spaces | |
from transformers import pipeline | |
import torch | |
# Initialize model with explicit dtype | |
model = pipeline( | |
"text-generation", | |
model="unsloth/DeepSeek-R1-Distill-Llama-8B", | |
device_map="auto", | |
torch_dtype=torch.float16, # Now recognizes 'torch' | |
model_kwargs={"load_in_8bit": True} | |
) | |
# Increased to 5 minutes | |
def chat_response(message, history): | |
# Add explicit initialization check | |
if not hasattr(chat_response, "pipe"): | |
chat_response.pipe = pipeline(...) | |
# Add timeout handling | |
try: | |
response = chat_response.pipe(...) | |
return response[0]['generated_text'][-1]["content"] | |
except RuntimeError as e: | |
return f"GPU timeout: {str(e)}" | |
demo = gr.ChatInterface( | |
chat_response, | |
chatbot=gr.Chatbot(height=500, type="messages"), # Explicit type | |
textbox=gr.Textbox(placeholder="Ask me anything...", container=False, scale=7), | |
title="DeepSeek-Llama-8B Chat", | |
examples=[["What is AI?"]], | |
retry_btn=None, | |
undo_btn=None | |
) | |
demo.launch() | |