Update app.py
Browse files
app.py
CHANGED
@@ -1,47 +1,50 @@
|
|
1 |
import gradio as gr
|
2 |
-
from
|
3 |
-
from langchain import LLMChain, PromptTemplate
|
4 |
-
from langchain.memory import ConversationBufferMemory
|
5 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
6 |
|
7 |
-
#
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
# Create pipeline
|
13 |
-
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=1000, do_sample=True)
|
14 |
-
|
15 |
-
# Wrap with HuggingFacePipeline
|
16 |
-
llm = HuggingFacePipeline(pipeline=pipe)
|
17 |
-
|
18 |
-
template = """You are a helpful assistant to answer user queries.
|
19 |
-
{chat_history}
|
20 |
-
User: {user_message}
|
21 |
-
Chatbot:"""
|
22 |
-
|
23 |
-
prompt = PromptTemplate(
|
24 |
-
input_variables=["chat_history", "user_message"], template=template
|
25 |
)
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
verbose=True,
|
33 |
-
memory=memory,
|
34 |
-
)
|
35 |
|
|
|
36 |
def get_text_response(user_message, history):
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
return response
|
39 |
|
|
|
40 |
demo = gr.ChatInterface(
|
41 |
-
get_text_response,
|
|
|
|
|
|
|
|
|
42 |
examples=["How are you doing?", "What are your interests?", "Which places do you like to visit?"]
|
43 |
)
|
44 |
|
45 |
-
|
46 |
if __name__ == "__main__":
|
47 |
demo.queue().launch(share=True, debug=True)
|
|
|
1 |
import gradio as gr
|
2 |
+
from openai import OpenAI
|
|
|
|
|
|
|
3 |
|
4 |
+
# NVIDIA-compatible OpenAI client
|
5 |
+
client = OpenAI(
|
6 |
+
base_url="https://integrate.api.nvidia.com/v1",
|
7 |
+
api_key="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"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
)
|
9 |
|
10 |
+
# System message
|
11 |
+
system_prompt = {
|
12 |
+
"role": "system",
|
13 |
+
"content": "You are a helpful assistant to answer user queries."
|
14 |
+
}
|
|
|
|
|
|
|
15 |
|
16 |
+
# Main chat function with memory from Gradio (OpenAI-style history)
|
17 |
def get_text_response(user_message, history):
|
18 |
+
# Convert Gradio message history (OpenAI format) + new user message
|
19 |
+
messages = [system_prompt] + history + [{"role": "user", "content": user_message}]
|
20 |
+
|
21 |
+
# Stream response
|
22 |
+
response = ""
|
23 |
+
completion = client.chat.completions.create(
|
24 |
+
model="nvidia/llama-3.1-nemotron-70b-instruct",
|
25 |
+
messages=messages,
|
26 |
+
temperature=0.5,
|
27 |
+
top_p=1,
|
28 |
+
max_tokens=1024,
|
29 |
+
stream=True
|
30 |
+
)
|
31 |
+
|
32 |
+
for chunk in completion:
|
33 |
+
delta = chunk.choices[0].delta
|
34 |
+
if delta and delta.content:
|
35 |
+
response += delta.content
|
36 |
+
|
37 |
return response
|
38 |
|
39 |
+
# Gradio Chat UI
|
40 |
demo = gr.ChatInterface(
|
41 |
+
fn=get_text_response,
|
42 |
+
title="🧠 Nemotron 70B Assistant",
|
43 |
+
theme="soft",
|
44 |
+
chatbot=gr.Chatbot(height=400, type="messages"), # <-- important: type="messages"
|
45 |
+
textbox=gr.Textbox(placeholder="Ask me anything...", container=False),
|
46 |
examples=["How are you doing?", "What are your interests?", "Which places do you like to visit?"]
|
47 |
)
|
48 |
|
|
|
49 |
if __name__ == "__main__":
|
50 |
demo.queue().launch(share=True, debug=True)
|