zmeeks commited on
Commit
a43a8a2
·
verified ·
1 Parent(s): 8b4bcba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -30,7 +30,7 @@ Question: {question}
30
 
31
  To answer this question:
32
  1. If you need current information or facts you're unsure about, use the search tool
33
- 2. If you need to transcribe audio, import Transcriber from smolagents and instantiate 'transcriber = Transcriber()'
34
  3. Write Python code to solve the problem
35
  4. Make sure your final answer is clear and direct
36
 
@@ -69,7 +69,7 @@ class BasicAgent:
69
  # Initialize the model
70
  print("🤖 Initializing TransformersModel...")
71
  self.model = TransformersModel(
72
- model_id="google/gemma-3-4b-it",
73
  torch_dtype=torch.bfloat16,
74
  device_map="auto",
75
  )
@@ -81,7 +81,7 @@ class BasicAgent:
81
  if self.model.tokenizer.pad_token is None:
82
  self.model.tokenizer.pad_token = self.model.tokenizer.eos_token
83
  # Set default padding behavior for FlashAttention
84
- self.model.tokenizer.pad_to_multiple_of = 8
85
  print("✅ Applied tokenizer padding fix for FlashAttention alignment")
86
 
87
  # If the model has a processor with tokenizer, fix that too
@@ -89,7 +89,7 @@ class BasicAgent:
89
  self.model.processor.tokenizer.padding_side = "left"
90
  if self.model.processor.tokenizer.pad_token is None:
91
  self.model.processor.tokenizer.pad_token = self.model.processor.tokenizer.eos_token
92
- self.model.processor.tokenizer.pad_to_multiple_of = 8
93
  print("✅ Applied processor tokenizer padding fix")
94
 
95
 
 
30
 
31
  To answer this question:
32
  1. If you need current information or facts you're unsure about, use the search tool
33
+ 2. If you need to transcribe audio, import smolagents and instantiate 'transcriber = smolagents.Transcriber()'
34
  3. Write Python code to solve the problem
35
  4. Make sure your final answer is clear and direct
36
 
 
69
  # Initialize the model
70
  print("🤖 Initializing TransformersModel...")
71
  self.model = TransformersModel(
72
+ model_id="google/gemma-3-12b-it",
73
  torch_dtype=torch.bfloat16,
74
  device_map="auto",
75
  )
 
81
  if self.model.tokenizer.pad_token is None:
82
  self.model.tokenizer.pad_token = self.model.tokenizer.eos_token
83
  # Set default padding behavior for FlashAttention
84
+ self.model.tokenizer.pad_to_multiple_of = 64
85
  print("✅ Applied tokenizer padding fix for FlashAttention alignment")
86
 
87
  # If the model has a processor with tokenizer, fix that too
 
89
  self.model.processor.tokenizer.padding_side = "left"
90
  if self.model.processor.tokenizer.pad_token is None:
91
  self.model.processor.tokenizer.pad_token = self.model.processor.tokenizer.eos_token
92
+ self.model.processor.tokenizer.pad_to_multiple_of = 64
93
  print("✅ Applied processor tokenizer padding fix")
94
 
95