thanglekdi commited on
Commit
3afc718
·
1 Parent(s): a793a92

test deepseek

Browse files
Files changed (2) hide show
  1. phoGPT.py +62 -62
  2. test.py +192 -192
phoGPT.py CHANGED
@@ -1,74 +1,74 @@
1
- # app.py
2
- import gradio as gr # type: ignore
3
- import torch # type: ignore
4
- from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
5
 
6
- # 1️⃣ Cấu hình và load model + tokenizer
7
- model_path = "vinai/PhoGPT-4B-Chat"
8
 
9
- config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
10
- config.init_device = "cpu"
11
 
12
- model = AutoModelForCausalLM.from_pretrained("vinai/PhoGPT-4B-Chat", trust_remote_code=True)
13
- model.eval()
14
- tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
15
 
16
 
17
- def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
18
- # 2.1 — Gom system message và history vào messages list
19
- messages = [{"role": "system", "content": system_message}]
20
- for u, b in history:
21
- if u:
22
- messages.append({"role": "user", "content": u})
23
- if b:
24
- messages.append({"role": "assistant", "content": b})
25
- messages.append({"role": "user", "content": message})
26
 
27
- # 2.2 — Tạo prompt chuẩn
28
- input_prompt = tokenizer.apply_chat_template(
29
- messages,
30
- tokenize=False,
31
- add_generation_prompt=True
32
- )
33
 
34
- # 2.3 — Tokenize và đưa lên device
35
- # inputs = tokenizer(input_prompt, return_tensors="pt")
36
- input_ids = tokenizer(input_prompt, return_tensors="pt")
37
- # inputs = {k: v.to(model.device) for k, v in inputs.items()}
38
 
39
- # 2.4 — Sinh text
40
- outputs = model.generate(
41
- inputs=input_ids["input_ids"],
42
- max_new_tokens=max_tokens,
43
- temperature=temperature,
44
- top_p=top_p,
45
- do_sample=True,
46
- eos_token_id=tokenizer.eos_token_id,
47
- pad_token_id=tokenizer.pad_token_id,
48
- )
49
- # print('!!!! OUTPUTS 1: ',outputs)
50
- # 2.5 — Decode và tách phần assistant trả lời
51
- response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
52
- print('!! OUTPUTS 2: ',response)
53
 
54
- response = response.split("### Trả lời:")[1]
55
- print('!!!! OUTPUTS 3: ',response)
56
- return response
57
 
58
- # 2.6 — Cập nhật history và trả về
59
- # history.append((message, response))
60
- # return history
61
 
62
- # 3️⃣ Giao diện Gradio
63
- demo = gr.ChatInterface(
64
- respond, #câu phản hồi
65
- additional_inputs=[
66
- gr.Textbox("Bạn là một chatbot tiếng Việt thân thiện.", label="System message"),
67
- gr.Slider(1, 2048, value=512, step=1, label="Max new tokens"),
68
- gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature"),
69
- gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
70
- ],
71
- )
72
 
73
- if __name__ == "__main__":
74
- demo.launch()
 
1
+ # # app.py
2
+ # import gradio as gr # type: ignore
3
+ # import torch # type: ignore
4
+ # from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
5
 
6
+ # # 1️⃣ Cấu hình và load model + tokenizer
7
+ # model_path = "vinai/PhoGPT-4B-Chat"
8
 
9
+ # config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
10
+ # config.init_device = "cpu"
11
 
12
+ # model = AutoModelForCausalLM.from_pretrained("vinai/PhoGPT-4B-Chat", trust_remote_code=True)
13
+ # model.eval()
14
+ # tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
15
 
16
 
17
+ # def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
18
+ # # 2.1 — Gom system message và history vào messages list
19
+ # messages = [{"role": "system", "content": system_message}]
20
+ # for u, b in history:
21
+ # if u:
22
+ # messages.append({"role": "user", "content": u})
23
+ # if b:
24
+ # messages.append({"role": "assistant", "content": b})
25
+ # messages.append({"role": "user", "content": message})
26
 
27
+ # # 2.2 — Tạo prompt chuẩn
28
+ # input_prompt = tokenizer.apply_chat_template(
29
+ # messages,
30
+ # tokenize=False,
31
+ # add_generation_prompt=True
32
+ # )
33
 
34
+ # # 2.3 — Tokenize và đưa lên device
35
+ # # inputs = tokenizer(input_prompt, return_tensors="pt")
36
+ # input_ids = tokenizer(input_prompt, return_tensors="pt")
37
+ # # inputs = {k: v.to(model.device) for k, v in inputs.items()}
38
 
39
+ # # 2.4 — Sinh text
40
+ # outputs = model.generate(
41
+ # inputs=input_ids["input_ids"],
42
+ # max_new_tokens=max_tokens,
43
+ # temperature=temperature,
44
+ # top_p=top_p,
45
+ # do_sample=True,
46
+ # eos_token_id=tokenizer.eos_token_id,
47
+ # pad_token_id=tokenizer.pad_token_id,
48
+ # )
49
+ # # print('!!!! OUTPUTS 1: ',outputs)
50
+ # # 2.5 — Decode và tách phần assistant trả lời
51
+ # response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
52
+ # print('!! OUTPUTS 2: ',response)
53
 
54
+ # response = response.split("### Trả lời:")[1]
55
+ # print('!!!! OUTPUTS 3: ',response)
56
+ # return response
57
 
58
+ # # 2.6 — Cập nhật history và trả về
59
+ # # history.append((message, response))
60
+ # # return history
61
 
62
+ # # 3️⃣ Giao diện Gradio
63
+ # demo = gr.ChatInterface(
64
+ # respond, #câu phản hồi
65
+ # additional_inputs=[
66
+ # gr.Textbox("Bạn là một chatbot ti��ng Việt thân thiện.", label="System message"),
67
+ # gr.Slider(1, 2048, value=512, step=1, label="Max new tokens"),
68
+ # gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature"),
69
+ # gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
70
+ # ],
71
+ # )
72
 
73
+ # if __name__ == "__main__":
74
+ # demo.launch()
test.py CHANGED
@@ -1,205 +1,205 @@
1
- import gradio as gr # type: ignore
2
- from huggingface_hub import InferenceClient # type: ignore
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- demo = gr.ChatInterface(
47
- respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
- ],
60
- )
61
-
62
-
63
- if __name__ == "__main__":
64
- demo.launch()
65
-
66
-
67
-
68
- ###########################
69
-
70
- # app.py
71
- import gradio as gr # type: ignore
72
- import os
73
-
74
- # import openai # type: ignore
75
- # # openai.api_key = os.getenv("OPENAI_API_KEY")
76
- # client = openai.OpenAI()
77
  # def respond(
78
- # message,
79
- # history: list[tuple[str, str]],
80
- # system_message,
81
- # max_tokens,
82
- # temperature,
83
- # top_p,
84
- # image_uploaded,
85
- # file_uploaded
86
- # ):
87
-
88
- # #read system message
89
  # messages = [{"role": "system", "content": system_message}]
90
-
91
- # #read history
92
  # for val in history:
93
  # if val[0]:
94
  # messages.append({"role": "user", "content": val[0]})
95
  # if val[1]:
96
  # messages.append({"role": "assistant", "content": val[1]})
97
-
98
- # #read output
99
  # messages.append({"role": "user", "content": message})
100
- # print("## Messages: \n", messages) #debug output
101
 
102
- # #create output
103
- # response = client.responses.create(
104
- # model="gpt-4.1-nano",
105
- # input=messages,
 
 
106
  # temperature=temperature,
107
  # top_p=top_p,
108
- # max_output_tokens=max_tokens
109
- # )
110
-
111
- # #read output
112
- # response = response.output_text
113
- # print("## Response: ", response) #debug output
114
- # print("\n")
115
- # yield response #chat reply
116
-
117
- # import torch
118
- # from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
119
- # model_name = "deepseek-ai/deepseek-math-7b-base"
120
- # tokenizer = AutoTokenizer.from_pretrained(model_name)
121
- # model = AutoModelForCausalLM.from_pretrained(model_name)
122
- # # model.generation_config = GenerationConfig.from_pretrained(model_name)
123
- # # model.generation_config.pad_token_id = model.generation_config.eos_token_id
124
- # def deepseek(
125
- # message,
126
- # history: list[tuple[str, str]],
127
- # system_message,
128
- # max_tokens,
129
- # temperature,
130
- # top_p):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
 
133
- # # messages = [
134
- # # {"role": "user", "content": "what is the integral of x^2 from 0 to 2?\nPlease reason step by step, and put your final answer within \\boxed{}."}
135
- # # ]
136
- # messages = [
137
- # {"role": "user", "content": message}
138
- # ]
139
-
140
- # input_tensor = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
141
- # outputs = model.generate(input_tensor.to(model.device), max_new_tokens=100)
142
- # print(outputs)
143
- # print("\n")
144
- # result = tokenizer.decode(outputs[0][input_tensor.shape[1]:], skip_special_tokens=True)
145
- # print(result)
146
- # return result
147
-
148
- # import replicate
149
- # def deepseek_api_replicate(
150
- # user_message,
151
- # history: list[tuple[str, str]],
152
- # system_message,
153
- # max_new_tokens,
154
- # temperature,
155
- # top_p):
156
- # """
157
- # Gọi DeepSeek Math trên Replicate và trả ngay kết quả.
158
-
159
- # Trả về:
160
- # str hoặc [bytes]: output model sinh ra
161
- # """
162
- # # 1. Khởi tạo client và xác thực
163
- # # token = os.getenv("REPLICATE_API_TOKEN")
164
- # # if not token:
165
- # # raise RuntimeError("Missing REPLICATE_API_TOKEN") # bảo mật bằng biến môi trường
166
- # client = replicate.Client(api_token="REPLICATE_API_TOKEN")
167
-
168
- # # 2. Gọi model
169
- # output = client.run(
170
- # "deepseek-ai/deepseek-math-7b-base:61f572dae0985541cdaeb4a114fd5d2d16cb40dac3894da10558992fc60547c7",
171
- # input={
172
- # "system_prompt": system_message,
173
- # "user_prompt": user_message,
174
- # "max_new_tokens": max_new_tokens,
175
- # "temperature": temperature,
176
- # "top_p": top_p
177
- # }
178
- # )
179
-
180
- # # 3. Trả kết quả
181
- # return output
182
-
183
- import call_api
184
-
185
-
186
- chat = gr.ChatInterface(
187
- call_api.respond, #chat
188
- title="Trợ lý Học Tập AI",
189
- description="Nhập câu hỏi của bạn về Toán, Lý, Hóa, Văn… và nhận giải đáp chi tiết ngay lập tức!",
190
- additional_inputs=[
191
- gr.Textbox("Bạn là một chatbot tiếng Việt thân thiện.", label="System message"),
192
- gr.Slider(1, 2048, value=200, step=1, label="Max new tokens"),
193
- gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature"),
194
- gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
195
- # gr.Image(type="pil", label="Attach an image (optional)"),
196
- # gr.File(label="Upload a file (optional)"),
197
- ],
198
- examples=[
199
- # Mỗi item: [message, system_message, max_tokens, temperature, top_p]
200
- ["tích phân của x^2 từ 0 đến 2 là gì? vui lòng lập luận từng bước, và đặt kết quả cuối cùng trong \boxed{}", "bạn là nhà toán học", 100, 0.7, 0.95],
201
- ],
202
- )
203
-
204
- if __name__ == "__main__":
205
- chat.launch()
 
1
+ # import gradio as gr # type: ignore
2
+ # from huggingface_hub import InferenceClient # type: ignore
3
+
4
+ # """
5
+ # For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
+ # """
7
+ # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
+
9
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  # def respond(
11
+ # message,
12
+ # history: list[tuple[str, str]],
13
+ # system_message,
14
+ # max_tokens,
15
+ # temperature,
16
+ # top_p,
17
+ # ):
 
 
 
 
18
  # messages = [{"role": "system", "content": system_message}]
19
+
 
20
  # for val in history:
21
  # if val[0]:
22
  # messages.append({"role": "user", "content": val[0]})
23
  # if val[1]:
24
  # messages.append({"role": "assistant", "content": val[1]})
25
+
 
26
  # messages.append({"role": "user", "content": message})
 
27
 
28
+ # response = ""
29
+
30
+ # for message in client.chat_completion(
31
+ # messages,
32
+ # max_tokens=max_tokens,
33
+ # stream=True,
34
  # temperature=temperature,
35
  # top_p=top_p,
36
+ # ):
37
+ # token = message.choices[0].delta.content
38
+
39
+ # response += token
40
+ # yield response
41
+
42
+
43
+ # """
44
+ # For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
+ # """
46
+ # demo = gr.ChatInterface(
47
+ # respond,
48
+ # additional_inputs=[
49
+ # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
+ # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
+ # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
+ # gr.Slider(
53
+ # minimum=0.1,
54
+ # maximum=1.0,
55
+ # value=0.95,
56
+ # step=0.05,
57
+ # label="Top-p (nucleus sampling)",
58
+ # ),
59
+ # ],
60
+ # )
61
+
62
+
63
+ # if __name__ == "__main__":
64
+ # demo.launch()
65
+
66
+
67
+
68
+ # ###########################
69
+
70
+ # # app.py
71
+ # import gradio as gr # type: ignore
72
+ # import os
73
+
74
+ # # import openai # type: ignore
75
+ # # # openai.api_key = os.getenv("OPENAI_API_KEY")
76
+ # # client = openai.OpenAI()
77
+ # # def respond(
78
+ # # message,
79
+ # # history: list[tuple[str, str]],
80
+ # # system_message,
81
+ # # max_tokens,
82
+ # # temperature,
83
+ # # top_p,
84
+ # # image_uploaded,
85
+ # # file_uploaded
86
+ # # ):
87
+
88
+ # # #read system message
89
+ # # messages = [{"role": "system", "content": system_message}]
90
+
91
+ # # #read history
92
+ # # for val in history:
93
+ # # if val[0]:
94
+ # # messages.append({"role": "user", "content": val[0]})
95
+ # # if val[1]:
96
+ # # messages.append({"role": "assistant", "content": val[1]})
97
+
98
+ # # #read output
99
+ # # messages.append({"role": "user", "content": message})
100
+ # # print("## Messages: \n", messages) #debug output
101
+
102
+ # # #create output
103
+ # # response = client.responses.create(
104
+ # # model="gpt-4.1-nano",
105
+ # # input=messages,
106
+ # # temperature=temperature,
107
+ # # top_p=top_p,
108
+ # # max_output_tokens=max_tokens
109
+ # # )
110
+
111
+ # # #read output
112
+ # # response = response.output_text
113
+ # # print("## Response: ", response) #debug output
114
+ # # print("\n")
115
+ # # yield response #chat reply
116
+
117
+ # # import torch
118
+ # # from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
119
+ # # model_name = "deepseek-ai/deepseek-math-7b-base"
120
+ # # tokenizer = AutoTokenizer.from_pretrained(model_name)
121
+ # # model = AutoModelForCausalLM.from_pretrained(model_name)
122
+ # # # model.generation_config = GenerationConfig.from_pretrained(model_name)
123
+ # # # model.generation_config.pad_token_id = model.generation_config.eos_token_id
124
+ # # def deepseek(
125
+ # # message,
126
+ # # history: list[tuple[str, str]],
127
+ # # system_message,
128
+ # # max_tokens,
129
+ # # temperature,
130
+ # # top_p):
131
 
132
 
133
+ # # # messages = [
134
+ # # # {"role": "user", "content": "what is the integral of x^2 from 0 to 2?\nPlease reason step by step, and put your final answer within \\boxed{}."}
135
+ # # # ]
136
+ # # messages = [
137
+ # # {"role": "user", "content": message}
138
+ # # ]
139
+
140
+ # # input_tensor = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
141
+ # # outputs = model.generate(input_tensor.to(model.device), max_new_tokens=100)
142
+ # # print(outputs)
143
+ # # print("\n")
144
+ # # result = tokenizer.decode(outputs[0][input_tensor.shape[1]:], skip_special_tokens=True)
145
+ # # print(result)
146
+ # # return result
147
+
148
+ # # import replicate
149
+ # # def deepseek_api_replicate(
150
+ # # user_message,
151
+ # # history: list[tuple[str, str]],
152
+ # # system_message,
153
+ # # max_new_tokens,
154
+ # # temperature,
155
+ # # top_p):
156
+ # # """
157
+ # # Gọi DeepSeek Math trên Replicate và trả ngay kết quả.
158
+
159
+ # # Trả về:
160
+ # # str hoặc [bytes]: output model sinh ra
161
+ # # """
162
+ # # # 1. Khởi tạo client và xác thực
163
+ # # # token = os.getenv("REPLICATE_API_TOKEN")
164
+ # # # if not token:
165
+ # # # raise RuntimeError("Missing REPLICATE_API_TOKEN") # bảo mật bằng biến môi trường
166
+ # # client = replicate.Client(api_token="REPLICATE_API_TOKEN")
167
+
168
+ # # # 2. Gọi model
169
+ # # output = client.run(
170
+ # # "deepseek-ai/deepseek-math-7b-base:61f572dae0985541cdaeb4a114fd5d2d16cb40dac3894da10558992fc60547c7",
171
+ # # input={
172
+ # # "system_prompt": system_message,
173
+ # # "user_prompt": user_message,
174
+ # # "max_new_tokens": max_new_tokens,
175
+ # # "temperature": temperature,
176
+ # # "top_p": top_p
177
+ # # }
178
+ # # )
179
+
180
+ # # # 3. Trả kết quả
181
+ # # return output
182
+
183
+ # import call_api
184
+
185
+
186
+ # chat = gr.ChatInterface(
187
+ # call_api.respond, #chat
188
+ # title="Trợ lý Học Tập AI",
189
+ # description="Nhập câu hỏi của bạn về Toán, Lý, Hóa, Văn… và nhận giải đáp chi tiết ngay lập tức!",
190
+ # additional_inputs=[
191
+ # gr.Textbox("Bạn là một chatbot tiếng Việt thân thiện.", label="System message"),
192
+ # gr.Slider(1, 2048, value=200, step=1, label="Max new tokens"),
193
+ # gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature"),
194
+ # gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
195
+ # # gr.Image(type="pil", label="Attach an image (optional)"),
196
+ # # gr.File(label="Upload a file (optional)"),
197
+ # ],
198
+ # examples=[
199
+ # # Mỗi item: [message, system_message, max_tokens, temperature, top_p]
200
+ # ["tích phân của x^2 từ 0 đến 2 là gì? vui lòng lập luận từng bước, và đặt kết quả cuối cùng trong \boxed{}", "bạn là nhà toán học", 100, 0.7, 0.95],
201
+ # ],
202
+ # )
203
+
204
+ # if __name__ == "__main__":
205
+ # chat.launch()