|
import gradio as gr |
|
from openai import OpenAI |
|
|
|
client = OpenAI( |
|
base_url="https://integrate.api.nvidia.com/v1", |
|
api_key="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" |
|
) |
|
|
|
system_prompt = {"role": "system", "content": "You are a helpful assistant to answer user queries."} |
|
|
|
def get_text_response(user_message, history=None): |
|
if history is None: |
|
history = [] |
|
|
|
|
|
messages = [system_prompt] |
|
for user_msg, assistant_msg in history: |
|
messages.append({"role": "user", "content": user_msg}) |
|
messages.append({"role": "assistant", "content": assistant_msg}) |
|
messages.append({"role": "user", "content": user_message}) |
|
|
|
response = "" |
|
completion = client.chat.completions.create( |
|
model="nvidia/llama-3.1-nemotron-70b-instruct", |
|
messages=messages, |
|
temperature=0.5, |
|
top_p=1, |
|
max_tokens=100, |
|
stream=True |
|
) |
|
|
|
for chunk in completion: |
|
delta = chunk.choices[0].delta |
|
if delta and delta.content: |
|
response += delta.content |
|
|
|
history.append((user_message, response)) |
|
return history, history |
|
|
|
demo = gr.ChatInterface( |
|
fn=get_text_response, |
|
title="🧠 Nemotron 70B Assistant", |
|
theme="soft", |
|
examples=["How are you doing?", "What are your interests?", "Which places do you like to visit?"] |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.queue().launch(share=True, debug=True) |
|
|