import os import gradio as gr from huggingface_hub import InferenceClient class CodingAssistant: def __init__(self): self.client = InferenceClient( model="codellama/CodeLlama-7b-Instruct-hf", # Your custom model here token=os.getenv("HF_TOKEN") ) self.chat_history = [] def respond(self, message, history): # Build prompt with conversation context prompt = f"""[INST] <> You are an expert Python programmer. Provide safe, efficient code solutions. Maintain conversation history: {self.chat_history[-3:] if self.chat_history else 'None'} <> {message} [/INST]""" # Generate response response = self.client.text_generation( prompt=prompt, max_new_tokens=1024, temperature=0.2, repetition_penalty=1.1 ) # Update history self.chat_history.append((message, response)) return response # Initialize assistant assistant = CodingAssistant() # Create Gradio interface demo = gr.ChatInterface( fn=assistant.respond, examples=[ "How to implement a neural network in PyTorch?", "Write a Python decorator for rate limiting", "Optimize this pandas code: ..." ], title="Code Expert Assistant", description="Ask me complex Python programming questions", theme="soft", retry_btn=None, undo_btn=None ) if __name__ == "__main__": demo.launch()