shorecode commited on
Commit
f704be0
·
verified ·
1 Parent(s): 0786448

Upload folder using huggingface_hub

Browse files
checkpoint-latest/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:795ea54c34c88b85a3fb35a48aa425148ae783dfc82bb534fc7c366e4eb1ab26
3
  size 62293080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85e66d01dfa2825d177f148be01e123916ae8ad822352fa3991ddadbd4df846c
3
  size 62293080
checkpoint-latest/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:945fb964074ba34d7300992c6c559faceb117e18c0aaf4c9961ee8e7f009ba2f
3
  size 124642443
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2bbd20b1ee747110998970c84ba11f5687a29984dd589bbdc3cd412bf3cf865
3
  size 124642443
checkpoint-latest/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3791abf31ec6e82ba3d4fb71351beccb33024121dff3a0af48258eb8e3dcf267
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47f551f352c3582aab145c6f54f30ffbeee0b864e47c66aae4b8bf65aa86b3ab
3
  size 14645
checkpoint-latest/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8b56065bfa25797c49f5e0d00ff60a8f7f9ff8c3dc27a413f7721c344a954c2
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99c0caf2011a7cb0034062f49b1c20f2067d88b910ef9cf32d24c7e9ddd08314
3
  size 1465
checkpoint-latest/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.2165674066053059,
6
  "eval_steps": 500,
7
- "global_step": 2000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -96,6 +96,50 @@
96
  "eval_samples_per_second": 112.517,
97
  "eval_steps_per_second": 7.037,
98
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  }
100
  ],
101
  "logging_steps": 250,
@@ -115,7 +159,7 @@
115
  "attributes": {}
116
  }
117
  },
118
- "total_flos": 722108153856000.0,
119
  "train_batch_size": 16,
120
  "trial_name": null,
121
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.32485110990795885,
6
  "eval_steps": 500,
7
+ "global_step": 3000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
96
  "eval_samples_per_second": 112.517,
97
  "eval_steps_per_second": 7.037,
98
  "step": 2000
99
+ },
100
+ {
101
+ "epoch": 0.24363833243096913,
102
+ "grad_norm": 1.0419113636016846,
103
+ "learning_rate": 9.188233170907778e-05,
104
+ "loss": 3.8416,
105
+ "step": 2250
106
+ },
107
+ {
108
+ "epoch": 0.2707092582566324,
109
+ "grad_norm": 0.9652225375175476,
110
+ "learning_rate": 9.0979967514889e-05,
111
+ "loss": 3.807,
112
+ "step": 2500
113
+ },
114
+ {
115
+ "epoch": 0.2707092582566324,
116
+ "eval_loss": 3.119335174560547,
117
+ "eval_runtime": 87.4769,
118
+ "eval_samples_per_second": 112.601,
119
+ "eval_steps_per_second": 7.042,
120
+ "step": 2500
121
+ },
122
+ {
123
+ "epoch": 0.2977801840822956,
124
+ "grad_norm": 0.8792561888694763,
125
+ "learning_rate": 9.007760332070024e-05,
126
+ "loss": 3.7754,
127
+ "step": 2750
128
+ },
129
+ {
130
+ "epoch": 0.32485110990795885,
131
+ "grad_norm": 0.9625837206840515,
132
+ "learning_rate": 8.917523912651147e-05,
133
+ "loss": 3.7471,
134
+ "step": 3000
135
+ },
136
+ {
137
+ "epoch": 0.32485110990795885,
138
+ "eval_loss": 3.0782463550567627,
139
+ "eval_runtime": 87.7273,
140
+ "eval_samples_per_second": 112.28,
141
+ "eval_steps_per_second": 7.022,
142
+ "step": 3000
143
  }
144
  ],
145
  "logging_steps": 250,
 
159
  "attributes": {}
160
  }
161
  },
162
+ "total_flos": 1083162230784000.0,
163
  "train_batch_size": 16,
164
  "trial_name": null,
165
  "trial_params": null