File tree Expand file tree Collapse file tree
modelopt_recipes/models/Nemotron-3-Super-120B-A12B Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -42,14 +42,14 @@ quantize:
4242 enable : false
4343
4444 # MoE routed experts -> NVFP4 W4A4, block_size 16, e4m3 scale.
45- # Weight uses static block scales (chosen by MSE); activations stay dynamic .
45+ # Max/amax calibration uses dynamic block scales for both weight and activation .
4646 # HF/export names: backbone.layers.*.mixer.experts.*.{up,down}_proj.
4747 - quantizer_name : ' *mixer.experts.*weight_quantizer'
4848 enable : true
4949 cfg :
5050 block_sizes :
5151 -1 : 16
52- type : static
52+ type : dynamic
5353 scale_bits : e4m3
5454 num_bits : e2m1
5555 - quantizer_name : ' *mixer.experts.*input_quantizer'
@@ -66,7 +66,7 @@ quantize:
6666 cfg :
6767 block_sizes :
6868 -1 : 16
69- type : static
69+ type : dynamic
7070 scale_bits : e4m3
7171 num_bits : e2m1
7272 - quantizer_name : ' *mlp.experts*input_quantizer'
You can’t perform that action at this time.
0 commit comments