Spaces:

legaltextai
/

test_model

Runtime error

File size: 1,095 Bytes

21b2e67
c0eecae
 
a65b66d
c0eecae
a65b66d
d46b7dc
 
 
5b130c8
a65b66d
 
d46b7dc
 
5b130c8
d46b7dc
5b130c8
 
 
d46b7dc
5b130c8
 
 
 
 
 
 
c0eecae
d46b7dc
 
5b130c8
d46b7dc
5b130c8
 
 
 
21b2e67
5b130c8
c0eecae

import gradio as gr
import spaces
from transformers import pipeline
import torch

# Initialize model with explicit dtype
model = pipeline(
    "text-generation",
    model="unsloth/DeepSeek-R1-Distill-Llama-8B",
    device_map="auto",
    torch_dtype=torch.float16,  # Now recognizes 'torch'
    model_kwargs={"load_in_8bit": True}
)

@spaces.GPU(duration=300)  # Increased to 5 minutes
def chat_response(message, history):
    # Add explicit initialization check
    if not hasattr(chat_response, "pipe"):
        chat_response.pipe = pipeline(...)
    
    # Add timeout handling
    try:
        response = chat_response.pipe(...)
        return response[0]['generated_text'][-1]["content"]
    except RuntimeError as e:
        return f"GPU timeout: {str(e)}"
        

demo = gr.ChatInterface(
    chat_response,
    chatbot=gr.Chatbot(height=500, type="messages"),  # Explicit type
    textbox=gr.Textbox(placeholder="Ask me anything...", container=False, scale=7),
    title="DeepSeek-Llama-8B Chat",
    examples=[["What is AI?"]],
    retry_btn=None,
    undo_btn=None
)

demo.launch()