We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 6e08b13 commit d8c2f8cCopy full SHA for d8c2f8c
1 file changed
examples/puzzletron/configs/llama-3_1-8B_pruneffn_memory/pruning/attn_pruning.yaml
@@ -1,8 +1,15 @@
1
defaults:
2
- pruning_defaults
3
4
+hook_class: ${get_object:modelopt.torch.prune.importance_hooks.base_hooks.IndependentKvHeadContributionHook}
5
+
6
activations_log_dir: ${puzzle_dir}/pruning/pruning_scores/attn_${pruning.activation_hooks_kwargs.method}/${pruning.experiment_id}
7
8
+pruning_mixin:
9
+ _target_: modelopt.torch.puzzletron.pruning.kv_heads_pruning_mixin.KVHeadsPruningMixIn
10
+ layer_descriptor:
11
+ _target_: modelopt.torch.puzzletron.anymodel.models.llama.llama_model_descriptor.LlamaKVHeadsLayerDescriptor
12
13
activation_hooks_kwargs:
14
method: independent_kv_head_contribution
15
optimize_for: memory # IndependentKvHeadContributionHook implementation that consumes less memory
0 commit comments