Spaces:
Running
Running
import gradio as gr | |
import requests | |
def talk_to_llama(prompt): | |
url = "https://integrate.api.nvidia.com/v1/chat/completions" | |
headers = { | |
"Authorization": "Bearer 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", # Replace if you regenerate key | |
"Content-Type": "application/json" | |
} | |
data = { | |
"model": "meta/llama-4-maverick-17b-128e-instruct", | |
"messages": [ | |
{"role": "user", "content": prompt} | |
], | |
"max_tokens": 512, | |
"temperature": 1.0, | |
"top_p": 1.0, | |
"stream": False | |
} | |
response = requests.post(url, headers=headers, json=data) | |
try: | |
return response.json()["choices"][0]["message"]["content"] | |
except Exception: | |
return f"Error:\n{response.text}" | |
chat = gr.Interface( | |
fn=talk_to_llama, | |
inputs="text", | |
outputs="text", | |
title="Chat with LLaMA 4 Maverick", | |
description="Ask anything! This chatbot uses NVIDIA’s 3.5M token LLaMA 4 Maverick 17B model." | |
) | |
chat.launch() |