defaults:
  - _self_

# Model configuration
model:
  name: "unsloth/SmolLM2-135M-Instruct-bnb-4bit"
  max_seq_length: 2048  # Auto supports RoPE Scaling internally
  dtype: null  # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
  load_in_4bit: true  # Use 4bit quantization to reduce memory usage

# PEFT configuration
peft:
  r: 64
  lora_alpha: 128
  lora_dropout: 0.05
  bias: "none"
  use_gradient_checkpointing: "unsloth"
  random_state: 3407
  use_rslora: true
  loftq_config: null
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"

# Dataset configuration
dataset:
  validation_split: 0.1  # 10% of data for validation
  seed: 3407  # Random seed for dataset splitting

# Training configuration
training:
  args:
    per_device_train_batch_size: 2
    per_device_eval_batch_size: 2
    gradient_accumulation_steps: 16
    warmup_steps: 100
    max_steps: 120
    learning_rate: 5e-5
    logging_steps: 1
    save_strategy: "steps"
    save_steps: 30
    eval_strategy: "steps"
    eval_steps: 30
    save_total_limit: 2
    optim: "adamw_8bit"
    weight_decay: 0.01
    lr_scheduler_type: "cosine_with_restarts"
    seed: 3407
    output_dir: "outputs"
    gradient_checkpointing: true
    load_best_model_at_end: true
    metric_for_best_model: "eval_loss"
    greater_is_better: false

  sft:
    dataset_num_proc: 2
    packing: false
    data_collator:
      mlm: false
      pad_to_multiple_of: 8

# Output configuration
output:
  dir: "final_model"

# Training control
train: false

# Testing configuration
test: true  # Whether to run testing after training
test_dataset:
  name: "gaia-benchmark/GAIA"
  config: "2023_level1"  # Use level 1 questions for testing
  split: "test"  # Use test split for testing
  max_samples: 10  # Number of samples to test on
  max_length: 2048  # Maximum sequence length for testing