{ "vocab_size": 32128, "hidden_dim": 4096, "num_layers": 24, "n_head": 64, "kv_heads": 64, "head_dim": 64, "ff_dim": 10240, "dropout": 0.1, "eps": 1e-6, "emb_weight_dtype": "bfloat16", "linear_weight_dtype": "bfloat16", "norm_weight_dtype": "bfloat16", "ffn_type": "GEGLU", "gelu_approx": "tanh", "attn_impl": "sdpa", "flex_kernel_options": {}, "relative_attention_num_buckets": 32, "relative_attention_max_distance": 128, "scale_qk": false, "pad_token_id": 0, "decoder_start_token_id": 0, "label_ignore_index": -100, "pos_emb_per_layer": false, "elementwise_affine": true }