shorecode commited on
Commit
07fdd73
·
verified ·
1 Parent(s): 099a830

Upload folder using huggingface_hub

Browse files
checkpoint-latest/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85e66d01dfa2825d177f148be01e123916ae8ad822352fa3991ddadbd4df846c
3
  size 62293080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd6622eb6d2f06c8805c5d1b53d336aea5992511f00713a9075b698b54fdcece
3
  size 62293080
checkpoint-latest/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2bbd20b1ee747110998970c84ba11f5687a29984dd589bbdc3cd412bf3cf865
3
  size 124642443
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fb0ac521b6f564d97622ade5cc991f93211f59608f70262ff6d645930ba004a
3
  size 124642443
checkpoint-latest/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47f551f352c3582aab145c6f54f30ffbeee0b864e47c66aae4b8bf65aa86b3ab
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26cadf98409e0764d6e8fd5269af09ca75aa8d4f36721d4b135f9710aa32d59e
3
  size 14645
checkpoint-latest/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99c0caf2011a7cb0034062f49b1c20f2067d88b910ef9cf32d24c7e9ddd08314
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0463f6582fe857e6419932817a025a3e7560bae906a02d87b6dfe3b560ecd651
3
  size 1465
checkpoint-latest/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.32485110990795885,
6
  "eval_steps": 500,
7
- "global_step": 3000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -140,6 +140,50 @@
140
  "eval_samples_per_second": 112.28,
141
  "eval_steps_per_second": 7.022,
142
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  }
144
  ],
145
  "logging_steps": 250,
@@ -159,7 +203,7 @@
159
  "attributes": {}
160
  }
161
  },
162
- "total_flos": 1083162230784000.0,
163
  "train_batch_size": 16,
164
  "trial_name": null,
165
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.4331348132106118,
6
  "eval_steps": 500,
7
+ "global_step": 4000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
140
  "eval_samples_per_second": 112.28,
141
  "eval_steps_per_second": 7.022,
142
  "step": 3000
143
+ },
144
+ {
145
+ "epoch": 0.3519220357336221,
146
+ "grad_norm": 1.0289523601531982,
147
+ "learning_rate": 8.827287493232269e-05,
148
+ "loss": 3.7326,
149
+ "step": 3250
150
+ },
151
+ {
152
+ "epoch": 0.3789929615592853,
153
+ "grad_norm": 0.9764179587364197,
154
+ "learning_rate": 8.737051073813391e-05,
155
+ "loss": 3.6939,
156
+ "step": 3500
157
+ },
158
+ {
159
+ "epoch": 0.3789929615592853,
160
+ "eval_loss": 3.052320718765259,
161
+ "eval_runtime": 87.6344,
162
+ "eval_samples_per_second": 112.399,
163
+ "eval_steps_per_second": 7.029,
164
+ "step": 3500
165
+ },
166
+ {
167
+ "epoch": 0.4060638873849486,
168
+ "grad_norm": 0.9247903227806091,
169
+ "learning_rate": 8.646814654394514e-05,
170
+ "loss": 3.6782,
171
+ "step": 3750
172
+ },
173
+ {
174
+ "epoch": 0.4331348132106118,
175
+ "grad_norm": 0.9769233465194702,
176
+ "learning_rate": 8.556578234975636e-05,
177
+ "loss": 3.6654,
178
+ "step": 4000
179
+ },
180
+ {
181
+ "epoch": 0.4331348132106118,
182
+ "eval_loss": 3.0321156978607178,
183
+ "eval_runtime": 87.6462,
184
+ "eval_samples_per_second": 112.384,
185
+ "eval_steps_per_second": 7.028,
186
+ "step": 4000
187
  }
188
  ],
189
  "logging_steps": 250,
 
203
  "attributes": {}
204
  }
205
  },
206
+ "total_flos": 1444216307712000.0,
207
  "train_batch_size": 16,
208
  "trial_name": null,
209
  "trial_params": null