Spaces:
Running on Zero

Ruurd commited on
Commit
47aa004
·
verified ·
1 Parent(s): b1cf46e

Update LoRa layer size

Browse files
Files changed (1) hide show
  1. llama_diffusion_model.py +2 -2
llama_diffusion_model.py CHANGED
@@ -28,7 +28,7 @@ class CustomTransformerModel(PreTrainedModel):
28
 
29
  def __init__(self, config):
30
  super().__init__(config)
31
- self.llama = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B", torch_dtype=torch.float16, device_map="auto", token=hf_token)
32
  self.llama.resize_token_embeddings(config.vocab_size)
33
 
34
  for param in self.llama.parameters():
@@ -37,7 +37,7 @@ class CustomTransformerModel(PreTrainedModel):
37
  param.requires_grad = True
38
 
39
  lora_config = LoraConfig(
40
- r=512, lora_alpha=512, lora_dropout=0.0,
41
  target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
42
  bias="none", task_type=None
43
  )
 
28
 
29
  def __init__(self, config):
30
  super().__init__(config)
31
+ self.llama = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct", torch_dtype=torch.float16, device_map="auto", token=hf_token)
32
  self.llama.resize_token_embeddings(config.vocab_size)
33
 
34
  for param in self.llama.parameters():
 
37
  param.requires_grad = True
38
 
39
  lora_config = LoraConfig(
40
+ r=64, lora_alpha=64, lora_dropout=0.0,
41
  target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
42
  bias="none", task_type=None
43
  )