Spaces:
Running
on
Zero
Running
on
Zero
Update LoRa layer size
Browse files- llama_diffusion_model.py +2 -2
llama_diffusion_model.py
CHANGED
@@ -28,7 +28,7 @@ class CustomTransformerModel(PreTrainedModel):
|
|
28 |
|
29 |
def __init__(self, config):
|
30 |
super().__init__(config)
|
31 |
-
self.llama = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B", torch_dtype=torch.float16, device_map="auto", token=hf_token)
|
32 |
self.llama.resize_token_embeddings(config.vocab_size)
|
33 |
|
34 |
for param in self.llama.parameters():
|
@@ -37,7 +37,7 @@ class CustomTransformerModel(PreTrainedModel):
|
|
37 |
param.requires_grad = True
|
38 |
|
39 |
lora_config = LoraConfig(
|
40 |
-
r=
|
41 |
target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
|
42 |
bias="none", task_type=None
|
43 |
)
|
|
|
28 |
|
29 |
def __init__(self, config):
|
30 |
super().__init__(config)
|
31 |
+
self.llama = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct", torch_dtype=torch.float16, device_map="auto", token=hf_token)
|
32 |
self.llama.resize_token_embeddings(config.vocab_size)
|
33 |
|
34 |
for param in self.llama.parameters():
|
|
|
37 |
param.requires_grad = True
|
38 |
|
39 |
lora_config = LoraConfig(
|
40 |
+
r=64, lora_alpha=64, lora_dropout=0.0,
|
41 |
target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
|
42 |
bias="none", task_type=None
|
43 |
)
|