ChatGPT

Running

File size: 1,037 Bytes

eb3516b
 
a887949
0bb6457
6a92c4a
eb3516b
6a92c4a
0bb6457
eb3516b
0bb6457
6a92c4a
 
 
 
 
 
 
 
0bb6457
6a92c4a
0bb6457
6a92c4a
257fbd4
 
6a92c4a
 
eb3516b
257fbd4
 
 
 
d2d0180
6a92c4a
257fbd4
eb3516b
0bb6457

import gradio as gr
import requests

def talk_to_llama(prompt):
    url = "https://integrate.api.nvidia.com/v1/chat/completions"
    headers = {
        "Authorization": "Bearer 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",  # Replace if you regenerate key
        "Content-Type": "application/json"
    }
    data = {
        "model": "meta/llama-4-maverick-17b-128e-instruct",
        "messages": [
            {"role": "user", "content": prompt}
        ],
        "max_tokens": 512,
        "temperature": 1.0,
        "top_p": 1.0,
        "stream": False
    }

    response = requests.post(url, headers=headers, json=data)

    try:
        return response.json()["choices"][0]["message"]["content"]
    except Exception:
        return f"Error:\n{response.text}"

chat = gr.Interface(
    fn=talk_to_llama,
    inputs="text",
    outputs="text",
    title="Chat with LLaMA 4 Maverick",
    description="Ask anything! This chatbot uses NVIDIA’s 3.5M token LLaMA 4 Maverick 17B model."
)

chat.launch()