LeviLima commited on
Commit
d10eead
·
verified ·
1 Parent(s): 6a77b1e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -64
app.py CHANGED
@@ -1,64 +1,46 @@
1
- import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- demo = gr.ChatInterface(
47
- respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
- ],
60
- )
61
-
62
-
63
- if __name__ == "__main__":
64
- demo.launch()
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+
4
+
5
+ tokenizer = AutoTokenizer.from_pretrained("MadeAgents/Hammer2.1-3b")
6
+ model = AutoModelForCausalLM.from_pretrained("MadeAgents/Hammer2.1-3b", torch_dtype=torch.bfloat16, device_map="auto")
7
+
8
+ # Example conversation
9
+ messages = [
10
+ {"role": "user", "content": "What's the weather like in New York?"},
11
+ {"role": "assistant","content": '```\n{"name": "get_weather", "arguments": {"location": "New York, NY ", "unit": "celsius"}\n```'},
12
+ {"role": "tool", "name": "get_weather", "content": '{"temperature": 72, "description": "Partly cloudy"}'},
13
+ {"role": "user", "content": "Now, search for the weather in San Francisco."}
14
+ ]
15
+
16
+ # Example function definition (optional)
17
+ tools = [
18
+ {
19
+ "name": "get_weather",
20
+ "description": "Get the current weather for a location",
21
+ "parameters": {
22
+ "type": "object",
23
+ "properties": {
24
+ "location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"},
25
+ "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The unit of temperature to return"}
26
+ },
27
+ "required": ["location"]
28
+ }
29
+ },
30
+ {
31
+ "name": "respond",
32
+ "description": "When you are ready to respond, use this function. This function allows the assistant to formulate and deliver appropriate replies based on the input message and the context of the conversation. Generate a concise response for simple questions, and a more detailed response for complex questions.",
33
+ "parameters": {
34
+ "type": "object",
35
+ "properties": {
36
+ "message": {"type": "string", "description": "The content of the message to respond to."}
37
+ },
38
+ "required": ["message"]
39
+ }
40
+ }
41
+ ]
42
+
43
+ inputs = tokenizer.apply_chat_template(messages, tools=tools, add_generation_prompt=True, return_dict=True, return_tensors="pt")
44
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
45
+ out = model.generate(**inputs, max_new_tokens=128)
46
+ print(tokenizer.decode(out[0][len(inputs["input_ids"][0]):], skip_special_tokens=True))