thanglekdi commited on
Commit
7fe0df9
·
1 Parent(s): e4b4bcf
Files changed (1) hide show
  1. app.py +59 -49
app.py CHANGED
@@ -1,69 +1,79 @@
 
 
1
  import gradio as gr
2
- # from huggingface_hub import InferenceClient
3
 
4
- # """
5
- # For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- # """
7
- # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
8
 
9
- # Load model directly
10
- from transformers import AutoTokenizer, AutoModelForCausalLM
11
-
12
- tokenizer = AutoTokenizer.from_pretrained("vinai/PhoGPT-4B-Chat", trust_remote_code=True)
13
- model = AutoModelForCausalLM.from_pretrained("vinai/PhoGPT-4B-Chat", trust_remote_code=True)
 
 
14
 
15
- def respond(
16
- message,
17
- history: list[tuple[str, str]],
18
- system_message,
19
- max_tokens,
20
- temperature,
21
- top_p,
22
- ):
23
- messages = [{"role": "system", "content": system_message}]
24
 
25
- for val in history:
26
- if val[0]:
27
- messages.append({"role": "user", "content": val[0]})
28
- if val[1]:
29
- messages.append({"role": "assistant", "content": val[1]})
30
 
 
 
 
 
 
 
 
 
31
  messages.append({"role": "user", "content": message})
32
 
33
- response = ""
34
-
35
- for message in model.chat_completion(
36
  messages,
37
- max_tokens=max_tokens,
38
- stream=True,
 
 
 
 
 
 
 
 
 
 
39
  temperature=temperature,
40
  top_p=top_p,
41
- ):
42
- token = message.choices[0].delta.content
 
 
43
 
44
- response += token
45
- yield response
 
46
 
 
 
 
47
 
48
- """
49
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
50
- """
51
  demo = gr.ChatInterface(
52
- respond,
53
  additional_inputs=[
54
- gr.Textbox(value=" ", label="System message"),
55
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
56
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
57
- gr.Slider(
58
- minimum=0.1,
59
- maximum=1.0,
60
- value=0.95,
61
- step=0.05,
62
- label="Top-p (nucleus sampling)",
63
- ),
64
  ],
65
  )
66
 
67
-
68
  if __name__ == "__main__":
69
- demo.launch()
 
1
+ # app.py
2
+ import torch
3
  import gradio as gr
4
+ from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
5
 
6
+ # 1️⃣ Cấu hình và load model + tokenizer
7
+ model_path = "vinai/PhoGPT-4B-Chat"
8
+
9
+ config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
10
+ config.init_device = "cpu" if torch.cuda.is_available() else "cpu"
11
+ # Nếu có FlashAttention, bật thêm:
12
+ # config.attn_config['attn_impl'] = 'flash'
13
 
14
+ model = AutoModelForCausalLM.from_pretrained(
15
+ model_path,
16
+ config=config,
17
+ torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
18
+ trust_remote_code=True,
19
+ )
20
+ model.eval()
21
 
22
+ tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 
 
 
 
 
 
 
 
23
 
24
+ # 2️⃣ Hàm chat theo template “### Câu hỏi / ### Trả lời”
25
+ PROMPT_TEMPLATE = "### Câu hỏi: {instruction}\n### Trả lời:"
 
 
 
26
 
27
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
28
+ # 2.1 — Gom system message và history vào messages list
29
+ messages = [{"role": "system", "content": system_message}]
30
+ for u, b in history:
31
+ if u:
32
+ messages.append({"role": "user", "content": u})
33
+ if b:
34
+ messages.append({"role": "assistant", "content": b})
35
  messages.append({"role": "user", "content": message})
36
 
37
+ # 2.2 — Tạo prompt chuẩn
38
+ prompt = tokenizer.apply_chat_template(
 
39
  messages,
40
+ tokenize=False,
41
+ add_generation_prompt=True
42
+ )
43
+
44
+ # 2.3 — Tokenize và đưa lên device
45
+ inputs = tokenizer(prompt, return_tensors="pt")
46
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
47
+
48
+ # 2.4 — Sinh text
49
+ outputs = model.generate(
50
+ **inputs,
51
+ max_new_tokens=max_tokens,
52
  temperature=temperature,
53
  top_p=top_p,
54
+ do_sample=True,
55
+ eos_token_id=tokenizer.eos_token_id,
56
+ pad_token_id=tokenizer.pad_token_id,
57
+ )
58
 
59
+ # 2.5 — Decode và tách phần assistant trả lời
60
+ full = tokenizer.decode(outputs[0], skip_special_tokens=True)
61
+ answer = full.replace(prompt, "").strip()
62
 
63
+ # 2.6 — Cập nhật history và trả về
64
+ history.append((message, answer))
65
+ return history
66
 
67
+ # 3️⃣ Giao diện Gradio
 
 
68
  demo = gr.ChatInterface(
69
+ fn=respond,
70
  additional_inputs=[
71
+ gr.Textbox("Bạn là một chatbot tiếng Việt thân thiện.", label="System message"),
72
+ gr.Slider(1, 2048, value=512, step=1, label="Max new tokens"),
73
+ gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature"),
74
+ gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
 
 
 
 
 
 
75
  ],
76
  )
77
 
 
78
  if __name__ == "__main__":
79
+ demo.launch()