import gradio as gr import requests # Function to send your prompt to NVIDIA LLaMA 4 Scout def talk_to_llama(prompt): url = "https://api.nvcf.nvidia.com/v2/completions" headers = { "Authorization": "Bearer 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", "Content-Type": "application/json" } data = { "messages": [{"role": "user", "content": prompt}] } response = requests.post(url, headers=headers, json=data) try: return response.json()["choices"][0]["message"]["content"] except: return "Something went wrong. Here's what the server said: " + str(response.text) # Build the chatbot interface chat = gr.Interface( fn=talk_to_llama, inputs="text", outputs="text", title="Chat with LLaMA 4 Scout", description="Ask anything! This chatbot uses NVIDIA’s 3.5M token LLaMA 4 Scout model." ) chat.launch()