|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations |
|
|
|
from dataclasses import dataclass, field |
|
from typing import Optional, Union |
|
|
|
from peft.config import PeftConfig |
|
from peft.utils import PeftType |
|
|
|
|
|
@dataclass |
|
class HRAConfig(PeftConfig): |
|
""" |
|
This is the configuration class to store the configuration of a [`HRAModel`]. |
|
|
|
Args: |
|
r (`int`): |
|
The rank of HRA across different layers. It is best to set 'r' to an even number; otherwise, the default |
|
initialization method will not work. |
|
apply_GS (`bool`): |
|
Whether to apply Gram-Schmidt orthogonalization. |
|
target_modules (`Optional[Union[List[str], str]]`): |
|
The names of the modules to apply the adapter to. If this is specified, only the modules with the specified |
|
names will be replaced. When passing a string, a regex match will be performed. When passing a list of |
|
strings, either an exact match will be performed or it is checked if the name of the module ends with any |
|
of the passed strings. If this is specified as 'all-linear', then all linear modules are chosen, excluding |
|
the output layer. If this is not specified, modules will be chosen according to the model architecture. If |
|
the architecture is not known, an error will be raised -- in this case, you should specify the target |
|
modules manually. |
|
exclude_modules (`Optional[Union[List[str], str]]`): |
|
The names of the modules to not apply the adapter. When passing a string, a regex match will be performed. |
|
When passing a list of strings, either an exact match will be performed or it is checked if the name of the |
|
module ends with any of the passed strings. |
|
init_weights (`bool`): |
|
Whether to perform initialization of HRA weights. |
|
layers_to_transform (`Union[List[int], int]`): |
|
The layer indices to transform. If a list of ints is passed, it will apply the adapter to the layer indices |
|
that are specified in this list. If a single integer is passed, it will apply the transformations on the |
|
layer at this index. |
|
layers_pattern (`Optional[Union[List[str], str]]`): |
|
The layer pattern name, used only if `layers_to_transform` is different from `None`. This should target the |
|
`nn.ModuleList` of the model, which is often called `'layers'` or `'h'`. |
|
rank_pattern (`dict`): |
|
The mapping from layer names or regexp expression to ranks which are different from the default rank |
|
specified by `r`. |
|
modules_to_save (`List[str]`): |
|
List of modules apart from adapter layers to be set as trainable and saved in the final checkpoint. |
|
""" |
|
|
|
r: int = field( |
|
default=8, |
|
metadata={ |
|
"help": "The rank of HRA across different layers.", |
|
"note": "It is best to set 'r' to an even number; otherwise, the default initialization method will not work.", |
|
}, |
|
) |
|
apply_GS: bool = field( |
|
default=False, |
|
metadata={"help": "Whether to apply Gram-Schmidt orthogonalization or not."}, |
|
) |
|
target_modules: Optional[Union[list[str], str]] = field( |
|
default=None, |
|
metadata={ |
|
"help": "List of module names or regex expression of the module names to replace with HRA.", |
|
"example": "For example, ['q', 'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$' ", |
|
}, |
|
) |
|
exclude_modules: Optional[Union[list[str], str]] = field( |
|
default=None, |
|
metadata={"help": "List of module names or regex expression of the module names to exclude from HRA."}, |
|
) |
|
init_weights: bool = field( |
|
default=True, |
|
metadata={ |
|
"help": ( |
|
"Whether to initialize the weights of the HRA layers with their default initialization. Don't change " |
|
"this setting, except if you know exactly what you're doing." |
|
), |
|
}, |
|
) |
|
layers_to_transform: Optional[Union[list[int], int]] = field( |
|
default=None, |
|
metadata={ |
|
"help": "The layer indexes to transform, is this argument is specified, PEFT will transform only the layers indexes that are specified inside this list. If a single integer is passed, PEFT will transform only the layer at this index." |
|
}, |
|
) |
|
layers_pattern: Optional[Union[list[str], str]] = field( |
|
default=None, |
|
metadata={ |
|
"help": "The layer pattern name, used only if `layers_to_transform` is different to None and if the layer pattern is not in the common layers pattern. " |
|
"This should target the `nn.ModuleList` of the model, which is often called `'layers'` or `'h'`." |
|
}, |
|
) |
|
bias: str = field(default="none", metadata={"help": "Bias type for HRA. Can be 'none', 'all' or 'hra_only'"}) |
|
modules_to_save: Optional[list[str]] = field( |
|
default=None, |
|
metadata={ |
|
"help": "List of modules apart from HRA layers to be set as trainable and saved in the final checkpoint. " |
|
"For example, in Sequence Classification or Token Classification tasks, " |
|
"the final layer `classifier/score` are randomly initialized and as such need to be trainable and saved." |
|
}, |
|
) |
|
|
|
def __post_init__(self): |
|
super().__post_init__() |
|
self.peft_type = PeftType.HRA |
|
self.target_modules = ( |
|
set(self.target_modules) if isinstance(self.target_modules, list) else self.target_modules |
|
) |
|
self.exclude_modules = ( |
|
set(self.exclude_modules) if isinstance(self.exclude_modules, list) else self.exclude_modules |
|
) |
|
|
|
if isinstance(self.target_modules, str) and self.layers_to_transform is not None: |
|
raise ValueError("`layers_to_transform` cannot be used when `target_modules` is a str.") |
|
|
|
|
|
if isinstance(self.target_modules, str) and self.layers_pattern is not None: |
|
raise ValueError("`layers_pattern` cannot be used when `target_modules` is a str.") |
|
|
|
|
|
if self.layers_pattern and not self.layers_to_transform: |
|
raise ValueError("When `layers_pattern` is specified, `layers_to_transform` must also be specified. ") |
|
|