hieu-nguyen2208 commited on
Commit
7d0be36
·
1 Parent(s): bf62ebb
Files changed (2) hide show
  1. app.py +14 -0
  2. src/generation/llm.py +21 -22
app.py CHANGED
@@ -1,5 +1,19 @@
1
  import gradio as gr
2
  from src.chatbot import RestaurantChatbot
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  chatbot = RestaurantChatbot()
5
  chat_history = []
 
1
  import gradio as gr
2
  from src.chatbot import RestaurantChatbot
3
+ import subprocess
4
+
5
+ command = [
6
+ "pip",
7
+ "install",
8
+ "git+https://github.com/huggingface/transformers.git@096f25ae1f501a084d8ff2dcaf25fbc2bd60eba4"
9
+ ]
10
+
11
+ # Chạy lệnh và in ra stdout/stderr nếu cần
12
+ result = subprocess.run(command, capture_output=True, text=True)
13
+
14
+ # In kết quả
15
+ print("STDOUT:", result.stdout)
16
+ print("STDERR:", result.stderr)
17
 
18
  chatbot = RestaurantChatbot()
19
  chat_history = []
src/generation/llm.py CHANGED
@@ -2,9 +2,10 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
2
  from langchain_core.prompts import PromptTemplate
3
  import os
4
  from typing import List
 
5
 
6
  class LLM:
7
- def __init__(self, model_repo: str = "Qwen/Qwen2-1.5B-Instruct",
8
  local_path: str = "models"):
9
  """
10
  Initialize the LLM with Qwen2-1.5B-Instruct using Hugging Face Transformers.
@@ -17,19 +18,14 @@ class LLM:
17
 
18
  try:
19
  # Load the model
20
- self.llm = AutoModelForCausalLM.from_pretrained(
21
- model_repo,
22
- device_map="auto", # Automatically map to CPU
23
- cache_dir=local_path,
24
- trust_remote_code=True
25
- )
26
-
27
- # Load the tokenizer
28
- self.tokenizer = AutoTokenizer.from_pretrained(
29
- model_repo,
30
- cache_dir=local_path,
31
- trust_remote_code=True
32
  )
 
 
33
  print(f"Model successfully loaded from {model_repo}")
34
  except Exception as e:
35
  raise RuntimeError(
@@ -88,19 +84,22 @@ class LLM:
88
  messages, tokenize=False, add_generation_prompt=True
89
  )
90
  # Tokenize input prompt
91
- inputs = self.tokenizer(prompt_with_template, return_tensors="pt").to(self.llm.device)
92
  # Generate text
93
- outputs = self.llm.generate(
94
- **inputs,
95
- max_new_tokens=max_length,
96
- temperature=0.7,
97
- do_sample=True,
98
- pad_token_id=self.tokenizer.eos_token_id,
99
- )
100
  # Decode the generated tokens
101
  response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
102
  print("Response generated successfully!")
103
- return response.split('assistant')[2]
 
 
 
104
  except Exception as e:
105
  raise RuntimeError(f"Failed to generate response: {str(e)}")
106
 
 
2
  from langchain_core.prompts import PromptTemplate
3
  import os
4
  from typing import List
5
+ import torch
6
 
7
  class LLM:
8
+ def __init__(self, model_repo: str = "microsoft/bitnet-b1.58-2B-4T",
9
  local_path: str = "models"):
10
  """
11
  Initialize the LLM with Qwen2-1.5B-Instruct using Hugging Face Transformers.
 
18
 
19
  try:
20
  # Load the model
21
+ model_id = "microsoft/bitnet-b1.58-2B-4T"
22
+ self.tokenizer = AutoTokenizer.from_pretrained(model_id)
23
+ self.model = AutoModelForCausalLM.from_pretrained(
24
+ model_id,
25
+ torch_dtype=torch.bfloat16
 
 
 
 
 
 
 
26
  )
27
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
28
+ self.model.to(self.device)
29
  print(f"Model successfully loaded from {model_repo}")
30
  except Exception as e:
31
  raise RuntimeError(
 
84
  messages, tokenize=False, add_generation_prompt=True
85
  )
86
  # Tokenize input prompt
87
+ inputs = self.tokenizer(prompt_with_template, return_tensors="pt").to(self.device)
88
  # Generate text
89
+ outputs = self.model.generate(
90
+ **inputs,
91
+ max_new_tokens=max_length,
92
+ temperature=0.7,
93
+ do_sample=True,
94
+ pad_token_id=self.tokenizer.eos_token_id,
95
+ )
96
  # Decode the generated tokens
97
  response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
98
  print("Response generated successfully!")
99
+ # Adjust response parsing to handle the output structure
100
+ if "assistant" in response:
101
+ return response.split("assistant")[-1].strip()
102
+ return response
103
  except Exception as e:
104
  raise RuntimeError(f"Failed to generate response: {str(e)}")
105