|
import gradio as gr |
|
from openai import OpenAI |
|
|
|
|
|
client = OpenAI( |
|
base_url="https://integrate.api.nvidia.com/v1", |
|
api_key="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" |
|
) |
|
|
|
|
|
system_prompt = { |
|
"role": "system", |
|
"content": "You are a helpful assistant to answer user queries." |
|
} |
|
|
|
|
|
def get_text_response(user_message, history): |
|
|
|
messages = [system_prompt] + history + [{"role": "user", "content": user_message}] |
|
|
|
|
|
response = "" |
|
completion = client.chat.completions.create( |
|
model="nvidia/llama-3.1-nemotron-70b-instruct", |
|
messages=messages, |
|
temperature=0.5, |
|
top_p=1, |
|
max_tokens=1024, |
|
stream=True |
|
) |
|
|
|
for chunk in completion: |
|
delta = chunk.choices[0].delta |
|
if delta and delta.content: |
|
response += delta.content |
|
|
|
return response |
|
|
|
|
|
demo = gr.ChatInterface( |
|
fn=get_text_response, |
|
title="🧠 Nemotron 70B Assistant", |
|
theme="soft", |
|
chatbot=gr.Chatbot(height=400, type="messages"), |
|
textbox=gr.Textbox(placeholder="Ask me anything...", container=False), |
|
examples=["How are you doing?", "What are your interests?", "Which places do you like to visit?"] |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.queue().launch(share=True, debug=True) |
|
|