manoj555 commited on
Commit
ee6c602
·
verified ·
1 Parent(s): 8a773b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -33
app.py CHANGED
@@ -1,47 +1,50 @@
1
  import gradio as gr
2
- from langchain.llms import HuggingFacePipeline
3
- from langchain import LLMChain, PromptTemplate
4
- from langchain.memory import ConversationBufferMemory
5
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
6
 
7
- # Load a free model from Hugging Face
8
- model_name = "microsoft/DialoGPT-medium" # Or try "tiiuae/falcon-rw-1b" or "gpt2"
9
- tokenizer = AutoTokenizer.from_pretrained(model_name)
10
- model = AutoModelForCausalLM.from_pretrained(model_name)
11
-
12
- # Create pipeline
13
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=1000, do_sample=True)
14
-
15
- # Wrap with HuggingFacePipeline
16
- llm = HuggingFacePipeline(pipeline=pipe)
17
-
18
- template = """You are a helpful assistant to answer user queries.
19
- {chat_history}
20
- User: {user_message}
21
- Chatbot:"""
22
-
23
- prompt = PromptTemplate(
24
- input_variables=["chat_history", "user_message"], template=template
25
  )
26
 
27
- memory = ConversationBufferMemory(memory_key="chat_history")
28
-
29
- llm_chain = LLMChain(
30
- llm=llm,
31
- prompt=prompt,
32
- verbose=True,
33
- memory=memory,
34
- )
35
 
 
36
  def get_text_response(user_message, history):
37
- response = llm_chain.predict(user_message=user_message)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  return response
39
 
 
40
  demo = gr.ChatInterface(
41
- get_text_response,
 
 
 
 
42
  examples=["How are you doing?", "What are your interests?", "Which places do you like to visit?"]
43
  )
44
 
45
-
46
  if __name__ == "__main__":
47
  demo.queue().launch(share=True, debug=True)
 
1
  import gradio as gr
2
+ from openai import OpenAI
 
 
 
3
 
4
+ # NVIDIA-compatible OpenAI client
5
+ client = OpenAI(
6
+ base_url="https://integrate.api.nvidia.com/v1",
7
+ api_key="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"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  )
9
 
10
+ # System message
11
+ system_prompt = {
12
+ "role": "system",
13
+ "content": "You are a helpful assistant to answer user queries."
14
+ }
 
 
 
15
 
16
+ # Main chat function with memory from Gradio (OpenAI-style history)
17
  def get_text_response(user_message, history):
18
+ # Convert Gradio message history (OpenAI format) + new user message
19
+ messages = [system_prompt] + history + [{"role": "user", "content": user_message}]
20
+
21
+ # Stream response
22
+ response = ""
23
+ completion = client.chat.completions.create(
24
+ model="nvidia/llama-3.1-nemotron-70b-instruct",
25
+ messages=messages,
26
+ temperature=0.5,
27
+ top_p=1,
28
+ max_tokens=1024,
29
+ stream=True
30
+ )
31
+
32
+ for chunk in completion:
33
+ delta = chunk.choices[0].delta
34
+ if delta and delta.content:
35
+ response += delta.content
36
+
37
  return response
38
 
39
+ # Gradio Chat UI
40
  demo = gr.ChatInterface(
41
+ fn=get_text_response,
42
+ title="🧠 Nemotron 70B Assistant",
43
+ theme="soft",
44
+ chatbot=gr.Chatbot(height=400, type="messages"), # <-- important: type="messages"
45
+ textbox=gr.Textbox(placeholder="Ask me anything...", container=False),
46
  examples=["How are you doing?", "What are your interests?", "Which places do you like to visit?"]
47
  )
48
 
 
49
  if __name__ == "__main__":
50
  demo.queue().launch(share=True, debug=True)