|
import os |
|
import gradio as gr |
|
from openai import OpenAI |
|
|
|
|
|
api_key = os.getenv("NV_API_KEY") |
|
if not api_key: |
|
raise ValueError("Please set the NV_API_KEY environment variable in your Hugging Face Space.") |
|
|
|
|
|
client = OpenAI( |
|
base_url="https://integrate.api.nvidia.com/v1", |
|
api_key=api_key |
|
) |
|
|
|
|
|
system_prompt = { |
|
"role": "system", |
|
"content": "You are a helpful assistant to answer user queries." |
|
} |
|
|
|
|
|
def get_text_response(user_message, history): |
|
|
|
formatted_history = [{"role": "user" if i % 2 == 0 else "assistant", "content": msg} |
|
for i, msg in enumerate(sum(history, []))] |
|
|
|
|
|
messages = [system_prompt] + formatted_history + [{"role": "user", "content": user_message}] |
|
|
|
|
|
response = "" |
|
completion = client.chat.completions.create( |
|
model="nvidia/llama-3.1-nemotron-70b-instruct", |
|
messages=messages, |
|
temperature=0.5, |
|
top_p=1, |
|
max_tokens=100, |
|
stream=True |
|
) |
|
|
|
for chunk in completion: |
|
delta = chunk.choices[0].delta |
|
if delta and delta.content: |
|
response += delta.content |
|
|
|
return response |
|
|
|
|
|
demo = gr.ChatInterface( |
|
fn=get_text_response, |
|
theme="soft", |
|
textbox=gr.Textbox(placeholder="Ask me anything...", container=False), |
|
examples=["How are you doing?", "What are your interests?", "Which places do you like to visit?"] |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.queue().launch(share=True) |
|
|