ToastyPigeon commited on
Commit
7d36ef5
·
verified ·
1 Parent(s): d67a1fa

Training in progress, step 984, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1089357c04ec4a0de85e536d52bb4c8df60d290b4d9d5b00a873e9fd046dbbc
3
  size 456206152
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aba653b21fbd27e90fa714e31d72bee8efe48fa6d68df3f7881b0f0894aab893
3
  size 456206152
last-checkpoint/optimizer.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:341005da48ef83ba8e839e0b70ed4e82e9000785e704bde8bfccb97361384f99
3
  size 912763251
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3676ffa9d2a4a6b45ff054c1c5e6d9f894f3b60e7b6788564173fff0bcaa9893
3
  size 912763251
last-checkpoint/pytorch_model_fsdp.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26402a2eca103da6a9d310b909392899395babe69e239568a171a2b21830103e
3
  size 456340209
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56b3eee5fbed0d56604e1bc7489601d03cdfcc77e7ec58abb77fccc0880116b1
3
  size 456340209
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cdab9c82a05ed01f13b244c083ffefdc46b875ecbe29601f180ef3e698088da
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b69aed956fe05a1bdbff0aa9f2338ca77aa7f5e0f752b5b5b9d31579d891cfac
3
  size 14917
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53d0d0d70f1e731a3047262bd6862bc5a552fb1c97f56fe3ab8a8bfb39f818e9
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23b176d2ab9e2baaafc3d2752100872fba127bb76b8c8299ae0e041821c6ac5a
3
  size 14917
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92949f20b07ea4400476cbbf4d64075409dbdf1f6201cbb60ef6c1f93ae34bd6
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bbdcb7c5a15d09694d6927296d9c5c9ee11f0dbd3ee3bb9a8d6bf66b0db98eb
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.9308943089430894,
6
  "eval_steps": 50,
7
- "global_step": 950,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -6818,6 +6818,244 @@
6818
  "eval_samples_per_second": 0.257,
6819
  "eval_steps_per_second": 0.134,
6820
  "step": 950
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6821
  }
6822
  ],
6823
  "logging_steps": 1,
@@ -6832,12 +7070,12 @@
6832
  "should_evaluate": false,
6833
  "should_log": false,
6834
  "should_save": true,
6835
- "should_training_stop": false
6836
  },
6837
  "attributes": {}
6838
  }
6839
  },
6840
- "total_flos": 9.195368100613063e+18,
6841
  "train_batch_size": 1,
6842
  "trial_name": null,
6843
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.0,
6
  "eval_steps": 50,
7
+ "global_step": 984,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
6818
  "eval_samples_per_second": 0.257,
6819
  "eval_steps_per_second": 0.134,
6820
  "step": 950
6821
+ },
6822
+ {
6823
+ "epoch": 1.9329268292682928,
6824
+ "grad_norm": 0.30121544003486633,
6825
+ "learning_rate": 1.5620972104934408e-08,
6826
+ "loss": 2.6291,
6827
+ "step": 951
6828
+ },
6829
+ {
6830
+ "epoch": 1.934959349593496,
6831
+ "grad_norm": 0.48420703411102295,
6832
+ "learning_rate": 1.4716493680226596e-08,
6833
+ "loss": 2.4335,
6834
+ "step": 952
6835
+ },
6836
+ {
6837
+ "epoch": 1.9369918699186992,
6838
+ "grad_norm": 0.3064318299293518,
6839
+ "learning_rate": 1.3838910069708944e-08,
6840
+ "loss": 2.4035,
6841
+ "step": 953
6842
+ },
6843
+ {
6844
+ "epoch": 1.9390243902439024,
6845
+ "grad_norm": 0.42194581031799316,
6846
+ "learning_rate": 1.2988230770265287e-08,
6847
+ "loss": 2.7248,
6848
+ "step": 954
6849
+ },
6850
+ {
6851
+ "epoch": 1.9410569105691056,
6852
+ "grad_norm": 0.3294110894203186,
6853
+ "learning_rate": 1.2164464987630131e-08,
6854
+ "loss": 2.2102,
6855
+ "step": 955
6856
+ },
6857
+ {
6858
+ "epoch": 1.943089430894309,
6859
+ "grad_norm": 0.37185436487197876,
6860
+ "learning_rate": 1.1367621636291237e-08,
6861
+ "loss": 2.8291,
6862
+ "step": 956
6863
+ },
6864
+ {
6865
+ "epoch": 1.9451219512195121,
6866
+ "grad_norm": 0.4209384024143219,
6867
+ "learning_rate": 1.0597709339390806e-08,
6868
+ "loss": 2.2557,
6869
+ "step": 957
6870
+ },
6871
+ {
6872
+ "epoch": 1.9471544715447155,
6873
+ "grad_norm": 0.30791327357292175,
6874
+ "learning_rate": 9.854736428633605e-09,
6875
+ "loss": 2.594,
6876
+ "step": 958
6877
+ },
6878
+ {
6879
+ "epoch": 1.9491869918699187,
6880
+ "grad_norm": 0.4715903401374817,
6881
+ "learning_rate": 9.138710944195938e-09,
6882
+ "loss": 2.3257,
6883
+ "step": 959
6884
+ },
6885
+ {
6886
+ "epoch": 1.951219512195122,
6887
+ "grad_norm": 0.3530280590057373,
6888
+ "learning_rate": 8.449640634639878e-09,
6889
+ "loss": 2.4252,
6890
+ "step": 960
6891
+ },
6892
+ {
6893
+ "epoch": 1.953252032520325,
6894
+ "grad_norm": 0.3186476528644562,
6895
+ "learning_rate": 7.787532956828048e-09,
6896
+ "loss": 2.548,
6897
+ "step": 961
6898
+ },
6899
+ {
6900
+ "epoch": 1.9552845528455285,
6901
+ "grad_norm": 0.29438072443008423,
6902
+ "learning_rate": 7.152395075843421e-09,
6903
+ "loss": 2.4232,
6904
+ "step": 962
6905
+ },
6906
+ {
6907
+ "epoch": 1.9573170731707317,
6908
+ "grad_norm": 0.3598599135875702,
6909
+ "learning_rate": 6.544233864911875e-09,
6910
+ "loss": 2.1371,
6911
+ "step": 963
6912
+ },
6913
+ {
6914
+ "epoch": 1.959349593495935,
6915
+ "grad_norm": 0.43737784028053284,
6916
+ "learning_rate": 5.963055905328363e-09,
6917
+ "loss": 2.3519,
6918
+ "step": 964
6919
+ },
6920
+ {
6921
+ "epoch": 1.9613821138211383,
6922
+ "grad_norm": 0.40080031752586365,
6923
+ "learning_rate": 5.408867486384473e-09,
6924
+ "loss": 2.7944,
6925
+ "step": 965
6926
+ },
6927
+ {
6928
+ "epoch": 1.9634146341463414,
6929
+ "grad_norm": 0.35485896468162537,
6930
+ "learning_rate": 4.881674605301534e-09,
6931
+ "loss": 2.5407,
6932
+ "step": 966
6933
+ },
6934
+ {
6935
+ "epoch": 1.9654471544715446,
6936
+ "grad_norm": 0.2693948745727539,
6937
+ "learning_rate": 4.381482967164285e-09,
6938
+ "loss": 2.5014,
6939
+ "step": 967
6940
+ },
6941
+ {
6942
+ "epoch": 1.967479674796748,
6943
+ "grad_norm": 0.36196112632751465,
6944
+ "learning_rate": 3.908297984861198e-09,
6945
+ "loss": 2.3405,
6946
+ "step": 968
6947
+ },
6948
+ {
6949
+ "epoch": 1.9695121951219512,
6950
+ "grad_norm": 0.3092941343784332,
6951
+ "learning_rate": 3.4621247790245227e-09,
6952
+ "loss": 2.4542,
6953
+ "step": 969
6954
+ },
6955
+ {
6956
+ "epoch": 1.9715447154471546,
6957
+ "grad_norm": 0.3844071328639984,
6958
+ "learning_rate": 3.0429681779739485e-09,
6959
+ "loss": 2.4375,
6960
+ "step": 970
6961
+ },
6962
+ {
6963
+ "epoch": 1.9735772357723578,
6964
+ "grad_norm": 0.34891366958618164,
6965
+ "learning_rate": 2.6508327176671953e-09,
6966
+ "loss": 2.5139,
6967
+ "step": 971
6968
+ },
6969
+ {
6970
+ "epoch": 1.975609756097561,
6971
+ "grad_norm": 0.6044580340385437,
6972
+ "learning_rate": 2.285722641647836e-09,
6973
+ "loss": 2.5668,
6974
+ "step": 972
6975
+ },
6976
+ {
6977
+ "epoch": 1.9776422764227641,
6978
+ "grad_norm": 0.389663964509964,
6979
+ "learning_rate": 1.947641901001995e-09,
6980
+ "loss": 2.4824,
6981
+ "step": 973
6982
+ },
6983
+ {
6984
+ "epoch": 1.9796747967479673,
6985
+ "grad_norm": 0.34469953179359436,
6986
+ "learning_rate": 1.6365941543131093e-09,
6987
+ "loss": 2.5478,
6988
+ "step": 974
6989
+ },
6990
+ {
6991
+ "epoch": 1.9817073170731707,
6992
+ "grad_norm": 0.34834548830986023,
6993
+ "learning_rate": 1.3525827676247327e-09,
6994
+ "loss": 2.6576,
6995
+ "step": 975
6996
+ },
6997
+ {
6998
+ "epoch": 1.9837398373983741,
6999
+ "grad_norm": 0.39558902382850647,
7000
+ "learning_rate": 1.0956108144025145e-09,
7001
+ "loss": 2.3903,
7002
+ "step": 976
7003
+ },
7004
+ {
7005
+ "epoch": 1.9857723577235773,
7006
+ "grad_norm": 0.38120120763778687,
7007
+ "learning_rate": 8.656810755008904e-10,
7008
+ "loss": 2.7058,
7009
+ "step": 977
7010
+ },
7011
+ {
7012
+ "epoch": 1.9878048780487805,
7013
+ "grad_norm": 0.37062206864356995,
7014
+ "learning_rate": 6.62796039134772e-10,
7015
+ "loss": 2.5938,
7016
+ "step": 978
7017
+ },
7018
+ {
7019
+ "epoch": 1.9898373983739837,
7020
+ "grad_norm": 0.3332228362560272,
7021
+ "learning_rate": 4.869579008498493e-10,
7022
+ "loss": 2.4417,
7023
+ "step": 979
7024
+ },
7025
+ {
7026
+ "epoch": 1.9918699186991868,
7027
+ "grad_norm": 0.37683814764022827,
7028
+ "learning_rate": 3.3816856350177284e-10,
7029
+ "loss": 2.5171,
7030
+ "step": 980
7031
+ },
7032
+ {
7033
+ "epoch": 1.9939024390243902,
7034
+ "grad_norm": 0.5217941999435425,
7035
+ "learning_rate": 2.1642963723284006e-10,
7036
+ "loss": 3.2324,
7037
+ "step": 981
7038
+ },
7039
+ {
7040
+ "epoch": 1.9959349593495936,
7041
+ "grad_norm": 0.35729387402534485,
7042
+ "learning_rate": 1.2174243945672905e-10,
7043
+ "loss": 2.2729,
7044
+ "step": 982
7045
+ },
7046
+ {
7047
+ "epoch": 1.9979674796747968,
7048
+ "grad_norm": 0.3736533522605896,
7049
+ "learning_rate": 5.410799484323326e-11,
7050
+ "loss": 2.3411,
7051
+ "step": 983
7052
+ },
7053
+ {
7054
+ "epoch": 2.0,
7055
+ "grad_norm": 0.780532717704773,
7056
+ "learning_rate": 1.3527035306881708e-11,
7057
+ "loss": 2.3983,
7058
+ "step": 984
7059
  }
7060
  ],
7061
  "logging_steps": 1,
 
7070
  "should_evaluate": false,
7071
  "should_log": false,
7072
  "should_save": true,
7073
+ "should_training_stop": true
7074
  },
7075
  "attributes": {}
7076
  }
7077
  },
7078
+ "total_flos": 9.518718229460025e+18,
7079
  "train_batch_size": 1,
7080
  "trial_name": null,
7081
  "trial_params": null