sumitdotml commited on
Commit
c190823
·
verified ·
1 Parent(s): 13bb335

Upload no-lb-ablation/final-model.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. no-lb-ablation/final-model.json +41 -0
no-lb-ablation/final-model.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "format_version": 1,
3
+ "step": 500,
4
+ "preset": "no-lb",
5
+ "mode": "moe",
6
+ "config": {
7
+ "preset": "no-lb",
8
+ "mode": "moe",
9
+ "run_name": "no-lb-ablation",
10
+ "seed": 42,
11
+ "max_steps": 2000,
12
+ "batch_size": 2,
13
+ "grad_accum_steps": 4,
14
+ "effective_batch_size": 8,
15
+ "block_size": 512,
16
+ "learning_rate": 5e-05,
17
+ "weight_decay": 0.01,
18
+ "warmup_fraction": 0.1,
19
+ "max_grad_norm": 1.0,
20
+ "lb_coef": 0.0,
21
+ "z_coef": 0.001,
22
+ "n_experts": 8,
23
+ "topk": 1,
24
+ "noise_std": 0.0,
25
+ "moe_layers": [
26
+ 8,
27
+ 9,
28
+ 10,
29
+ 11
30
+ ],
31
+ "size_mb": 10.0,
32
+ "balance_tokens": true,
33
+ "eval_every": 100,
34
+ "save_every": 100,
35
+ "collapse_early_stop": true
36
+ },
37
+ "metrics_summary": {
38
+ "train_loss": 2.845479725241661,
39
+ "lm_loss": 2.84168404340744
40
+ }
41
+ }