import os
import gradio as gr
from huggingface_hub import InferenceClient

class CodingAssistant:
    def __init__(self):
        self.client = InferenceClient(
            model="codellama/CodeLlama-7b-Instruct-hf",  # Your custom model here
            token=os.getenv("HF_TOKEN")
        )
        self.chat_history = []
    
    def respond(self, message, history):
        # Build prompt with conversation context
        prompt = f"""<s>[INST] <<SYS>>
        You are an expert Python programmer. Provide safe, efficient code solutions.
        Maintain conversation history: {self.chat_history[-3:] if self.chat_history else 'None'}
        <</SYS>> {message} [/INST]"""
        
        # Generate response
        response = self.client.text_generation(
            prompt=prompt,
            max_new_tokens=1024,
            temperature=0.2,
            repetition_penalty=1.1
        )
        
        # Update history
        self.chat_history.append((message, response))
        return response

# Initialize assistant
assistant = CodingAssistant()

# Create Gradio interface
demo = gr.ChatInterface(
    fn=assistant.respond,
    examples=[
        "How to implement a neural network in PyTorch?",
        "Write a Python decorator for rate limiting",
        "Optimize this pandas code: ..."
    ],
    title="Code Expert Assistant",
    description="Ask me complex Python programming questions",
    theme="soft",
    retry_btn=None,
    undo_btn=None
)

if __name__ == "__main__":
    demo.launch()