DragonProgrammer commited on
Commit
619ec30
·
verified ·
1 Parent(s): 451d4d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -20
app.py CHANGED
@@ -66,8 +66,7 @@ class LangChainAgentWrapper:
66
  def __init__(self):
67
  print("Initializing LangChainAgentWrapper...")
68
 
69
- # We will keep using the gemma-2b-it model, but load it in 4-bit
70
- model_id = "google/gemma-2b-it"
71
 
72
  try:
73
  hf_auth_token = os.getenv("HF_TOKEN")
@@ -76,34 +75,48 @@ class LangChainAgentWrapper:
76
  else:
77
  print("HF_TOKEN secret found.")
78
 
79
- # --- NEW: 4-Bit Quantization Configuration ---
80
- # Create a configuration for loading the model in 4-bit precision.
81
- # This makes the model faster and use less memory.
82
  print("Creating 4-bit quantization config...")
83
  quantization_config = transformers.BitsAndBytesConfig(
84
  load_in_4bit=True,
85
  bnb_4bit_quant_type="nf4",
86
- bnb_4bit_compute_dtype="bfloat16" # Use bfloat16 for faster computation
87
  )
88
  print("Quantization config created.")
89
- # --- END NEW ---
90
 
91
- # Create the Hugging Face pipeline
92
- print(f"Loading model pipeline for: {model_id} with quantization")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  llm_pipeline = transformers.pipeline(
94
  "text-generation",
95
- model=model_id,
96
- model_kwargs={"torch_dtype": "auto"},
97
- device_map="auto",
98
- token=hf_auth_token,
99
- quantization_config=quantization_config # <<< --- PASS THE NEW CONFIG HERE
100
  )
101
- print("Model pipeline loaded successfully.")
 
 
102
 
103
  # Wrap the pipeline in a LangChain LLM object
104
  self.llm = HuggingFacePipeline(pipeline=llm_pipeline)
105
 
106
- # Define the list of LangChain tools (this part is unchanged)
107
  self.tools = [
108
  Tool(
109
  name="get_current_time_in_timezone",
@@ -119,7 +132,7 @@ class LangChainAgentWrapper:
119
  ]
120
  print(f"Tools prepared for agent: {[tool.name for tool in self.tools]}")
121
 
122
- # Create the ReAct agent prompt from a template (this part is unchanged)
123
  react_prompt = PromptTemplate.from_template(
124
  """
125
  You are a helpful assistant. Answer the following questions as best you can.
@@ -145,10 +158,8 @@ class LangChainAgentWrapper:
145
  """
146
  )
147
 
148
- # Create the agent (this part is unchanged)
149
  agent = create_react_agent(self.llm, self.tools, react_prompt)
150
-
151
- # Create the agent executor (this part is unchanged)
152
  self.agent_executor = AgentExecutor(agent=agent, tools=self.tools, verbose=True, handle_parsing_errors=True)
153
  print("LangChain agent created successfully.")
154
 
 
66
  def __init__(self):
67
  print("Initializing LangChainAgentWrapper...")
68
 
69
+ model_id = "google/gemma-2b-it"
 
70
 
71
  try:
72
  hf_auth_token = os.getenv("HF_TOKEN")
 
75
  else:
76
  print("HF_TOKEN secret found.")
77
 
78
+ # --- CORRECTED MODEL LOADING ---
79
+
80
+ # 1. Create the 4-bit quantization configuration
81
  print("Creating 4-bit quantization config...")
82
  quantization_config = transformers.BitsAndBytesConfig(
83
  load_in_4bit=True,
84
  bnb_4bit_quant_type="nf4",
85
+ bnb_4bit_compute_dtype="bfloat16"
86
  )
87
  print("Quantization config created.")
 
88
 
89
+ # 2. Load the tokenizer
90
+ print(f"Loading tokenizer for: {model_id}")
91
+ tokenizer = transformers.AutoTokenizer.from_pretrained(model_id, token=hf_auth_token)
92
+ print("Tokenizer loaded successfully.")
93
+
94
+ # 3. Load the model with the quantization config
95
+ print(f"Loading model '{model_id}' with quantization...")
96
+ model = transformers.AutoModelForCausalLM.from_pretrained(
97
+ model_id,
98
+ quantization_config=quantization_config,
99
+ device_map="auto", # Automatically maps model to available hardware (CPU/GPU)
100
+ token=hf_auth_token
101
+ )
102
+ print("Model loaded successfully.")
103
+
104
+ # 4. Create the Hugging Face pipeline with the pre-loaded model and tokenizer
105
+ print("Creating text-generation pipeline...")
106
  llm_pipeline = transformers.pipeline(
107
  "text-generation",
108
+ model=model,
109
+ tokenizer=tokenizer,
110
+ # No need to pass quantization_config here anymore
 
 
111
  )
112
+ print("Model pipeline created successfully.")
113
+
114
+ # --- END CORRECTION ---
115
 
116
  # Wrap the pipeline in a LangChain LLM object
117
  self.llm = HuggingFacePipeline(pipeline=llm_pipeline)
118
 
119
+ # Define the list of LangChain tools (this part is unchanged and correct)
120
  self.tools = [
121
  Tool(
122
  name="get_current_time_in_timezone",
 
132
  ]
133
  print(f"Tools prepared for agent: {[tool.name for tool in self.tools]}")
134
 
135
+ # Create the ReAct agent prompt (this part is unchanged and correct)
136
  react_prompt = PromptTemplate.from_template(
137
  """
138
  You are a helpful assistant. Answer the following questions as best you can.
 
158
  """
159
  )
160
 
161
+ # Create the agent and executor (this part is unchanged and correct)
162
  agent = create_react_agent(self.llm, self.tools, react_prompt)
 
 
163
  self.agent_executor = AgentExecutor(agent=agent, tools=self.tools, verbose=True, handle_parsing_errors=True)
164
  print("LangChain agent created successfully.")
165