import os import gradio as gr from openai import OpenAI # Load API key securely from Hugging Face secrets or environment api_key = os.getenv("NV_API_KEY") if not api_key: raise ValueError("Please set the NV_API_KEY environment variable in your Hugging Face Space.") # NVIDIA-compatible OpenAI client client = OpenAI( base_url="https://integrate.api.nvidia.com/v1", api_key=api_key ) # System message system_prompt = { "role": "system", "content": "You are a helpful assistant to answer user queries." } # Main chat function with memory from Gradio def get_text_response(user_message, history): # Convert Gradio history to OpenAI format formatted_history = [{"role": "user" if i % 2 == 0 else "assistant", "content": msg} for i, msg in enumerate(sum(history, []))] # Combine system prompt, history, and current user input messages = [system_prompt] + formatted_history + [{"role": "user", "content": user_message}] # Stream the response response = "" completion = client.chat.completions.create( model="nvidia/llama-3.1-nemotron-70b-instruct", messages=messages, temperature=0.5, top_p=1, max_tokens=100, stream=True ) for chunk in completion: delta = chunk.choices[0].delta if delta and delta.content: response += delta.content return response # Gradio Chat Interface demo = gr.ChatInterface( fn=get_text_response, theme="soft", textbox=gr.Textbox(placeholder="Ask me anything...", container=False), examples=["How are you doing?", "What are your interests?", "Which places do you like to visit?"] ) if __name__ == "__main__": demo.queue().launch(share=True)