cyankiwi
/

GLM-4.7-Flash-AWQ-8bit

@@ -1,46 +0,0 @@
-default_stage:
-  default_modifiers:
-    AWQModifier:
-      config_groups:
-        group_0:
-          targets: [Linear]
-          weights:
-            num_bits: 8
-            type: int
-            symmetric: true
-            group_size: 32
-            strategy: group
-            block_structure: null
-            dynamic: false
-            actorder: null
-            scale_dtype: null
-            zp_dtype: null
-            observer: mse
-            observer_kwargs: {}
-          input_activations: null
-          output_activations: null
-          format: null
-      targets: [Linear]
-      ignore: [lm_head, model.embed_tokens, 're:.*shared_experts.*', 're:model[.]layers[.]0[.].*',
-        're:.*mlp[.]gate$']
-      bypass_divisibility_checks: false
-      mappings:
-      - smooth_layer: re:.*layers[.]((?:[1-9]|[1-3][0-9]|4[0-6]))[.].*input_layernorm$
-        balance_layers: ['re:.*layers[.]((?:[1-9]|[1-3][0-9]|4[0-6]))[.].*q_a_proj$', 're:.*layers[.]((?:[1-9]|[1-3][0-9]|4[0-6]))[.].*kv_a_proj_with_mqa$']
-        activation_hook_target: null
-      - smooth_layer: re:.*layers[.]((?:[1-9]|[1-3][0-9]|4[0-6]))[.].*q_a_layernorm$
-        balance_layers: ['re:.*layers[.]((?:[1-9]|[1-3][0-9]|4[0-6]))[.].*q_b_proj$']
-        activation_hook_target: null
-      - smooth_layer: re:.*layers[.]((?:[1-9]|[1-3][0-9]|4[0-6]))[.].*kv_a_layernorm$
-        balance_layers: ['re:.*layers[.]((?:[1-9]|[1-3][0-9]|4[0-6]))[.].*kv_b_proj$']
-        activation_hook_target: null
-      - smooth_layer: re:.*layers[.]((?:[1-9]|[1-3][0-9]|4[0-6]))[.].*post_attention_layernorm$
-        balance_layers: ['re:.*layers[.]((?:[1-9]|[1-3][0-9]|4[0-6]))[.].*gate_proj$', 're:.*layers[.]((?:[1-9]|[1-3][0-9]|4[0-6]))[.].*up_proj$',
-          're:.*layers[.]((?:[1-9]|[1-3][0-9]|4[0-6]))[.].*mlp[.]gate$']
-        activation_hook_target: null
-      - smooth_layer: re:.*layers[.]((?:[1-9]|[1-3][0-9]|4[0-6]))[.].*mlp[.]experts.*up_proj$
-        balance_layers: ['re:.*layers[.]((?:[1-9]|[1-3][0-9]|4[0-6]))[.].*mlp[.]experts.*down_proj$']
-        activation_hook_target: null
-      offload_device: !!python/object/apply:torch.device [cuda]
-      duo_scaling: true
-      n_grid: 20