Training in progress, step 984, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 456206152
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aba653b21fbd27e90fa714e31d72bee8efe48fa6d68df3f7881b0f0894aab893
|
| 3 |
size 456206152
|
last-checkpoint/optimizer.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 912763251
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3676ffa9d2a4a6b45ff054c1c5e6d9f894f3b60e7b6788564173fff0bcaa9893
|
| 3 |
size 912763251
|
last-checkpoint/pytorch_model_fsdp.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 456340209
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:56b3eee5fbed0d56604e1bc7489601d03cdfcc77e7ec58abb77fccc0880116b1
|
| 3 |
size 456340209
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14917
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b69aed956fe05a1bdbff0aa9f2338ca77aa7f5e0f752b5b5b9d31579d891cfac
|
| 3 |
size 14917
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14917
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:23b176d2ab9e2baaafc3d2752100872fba127bb76b8c8299ae0e041821c6ac5a
|
| 3 |
size 14917
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4bbdcb7c5a15d09694d6927296d9c5c9ee11f0dbd3ee3bb9a8d6bf66b0db98eb
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -6818,6 +6818,244 @@
|
|
| 6818 |
"eval_samples_per_second": 0.257,
|
| 6819 |
"eval_steps_per_second": 0.134,
|
| 6820 |
"step": 950
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6821 |
}
|
| 6822 |
],
|
| 6823 |
"logging_steps": 1,
|
|
@@ -6832,12 +7070,12 @@
|
|
| 6832 |
"should_evaluate": false,
|
| 6833 |
"should_log": false,
|
| 6834 |
"should_save": true,
|
| 6835 |
-
"should_training_stop":
|
| 6836 |
},
|
| 6837 |
"attributes": {}
|
| 6838 |
}
|
| 6839 |
},
|
| 6840 |
-
"total_flos": 9.
|
| 6841 |
"train_batch_size": 1,
|
| 6842 |
"trial_name": null,
|
| 6843 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 2.0,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 984,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 6818 |
"eval_samples_per_second": 0.257,
|
| 6819 |
"eval_steps_per_second": 0.134,
|
| 6820 |
"step": 950
|
| 6821 |
+
},
|
| 6822 |
+
{
|
| 6823 |
+
"epoch": 1.9329268292682928,
|
| 6824 |
+
"grad_norm": 0.30121544003486633,
|
| 6825 |
+
"learning_rate": 1.5620972104934408e-08,
|
| 6826 |
+
"loss": 2.6291,
|
| 6827 |
+
"step": 951
|
| 6828 |
+
},
|
| 6829 |
+
{
|
| 6830 |
+
"epoch": 1.934959349593496,
|
| 6831 |
+
"grad_norm": 0.48420703411102295,
|
| 6832 |
+
"learning_rate": 1.4716493680226596e-08,
|
| 6833 |
+
"loss": 2.4335,
|
| 6834 |
+
"step": 952
|
| 6835 |
+
},
|
| 6836 |
+
{
|
| 6837 |
+
"epoch": 1.9369918699186992,
|
| 6838 |
+
"grad_norm": 0.3064318299293518,
|
| 6839 |
+
"learning_rate": 1.3838910069708944e-08,
|
| 6840 |
+
"loss": 2.4035,
|
| 6841 |
+
"step": 953
|
| 6842 |
+
},
|
| 6843 |
+
{
|
| 6844 |
+
"epoch": 1.9390243902439024,
|
| 6845 |
+
"grad_norm": 0.42194581031799316,
|
| 6846 |
+
"learning_rate": 1.2988230770265287e-08,
|
| 6847 |
+
"loss": 2.7248,
|
| 6848 |
+
"step": 954
|
| 6849 |
+
},
|
| 6850 |
+
{
|
| 6851 |
+
"epoch": 1.9410569105691056,
|
| 6852 |
+
"grad_norm": 0.3294110894203186,
|
| 6853 |
+
"learning_rate": 1.2164464987630131e-08,
|
| 6854 |
+
"loss": 2.2102,
|
| 6855 |
+
"step": 955
|
| 6856 |
+
},
|
| 6857 |
+
{
|
| 6858 |
+
"epoch": 1.943089430894309,
|
| 6859 |
+
"grad_norm": 0.37185436487197876,
|
| 6860 |
+
"learning_rate": 1.1367621636291237e-08,
|
| 6861 |
+
"loss": 2.8291,
|
| 6862 |
+
"step": 956
|
| 6863 |
+
},
|
| 6864 |
+
{
|
| 6865 |
+
"epoch": 1.9451219512195121,
|
| 6866 |
+
"grad_norm": 0.4209384024143219,
|
| 6867 |
+
"learning_rate": 1.0597709339390806e-08,
|
| 6868 |
+
"loss": 2.2557,
|
| 6869 |
+
"step": 957
|
| 6870 |
+
},
|
| 6871 |
+
{
|
| 6872 |
+
"epoch": 1.9471544715447155,
|
| 6873 |
+
"grad_norm": 0.30791327357292175,
|
| 6874 |
+
"learning_rate": 9.854736428633605e-09,
|
| 6875 |
+
"loss": 2.594,
|
| 6876 |
+
"step": 958
|
| 6877 |
+
},
|
| 6878 |
+
{
|
| 6879 |
+
"epoch": 1.9491869918699187,
|
| 6880 |
+
"grad_norm": 0.4715903401374817,
|
| 6881 |
+
"learning_rate": 9.138710944195938e-09,
|
| 6882 |
+
"loss": 2.3257,
|
| 6883 |
+
"step": 959
|
| 6884 |
+
},
|
| 6885 |
+
{
|
| 6886 |
+
"epoch": 1.951219512195122,
|
| 6887 |
+
"grad_norm": 0.3530280590057373,
|
| 6888 |
+
"learning_rate": 8.449640634639878e-09,
|
| 6889 |
+
"loss": 2.4252,
|
| 6890 |
+
"step": 960
|
| 6891 |
+
},
|
| 6892 |
+
{
|
| 6893 |
+
"epoch": 1.953252032520325,
|
| 6894 |
+
"grad_norm": 0.3186476528644562,
|
| 6895 |
+
"learning_rate": 7.787532956828048e-09,
|
| 6896 |
+
"loss": 2.548,
|
| 6897 |
+
"step": 961
|
| 6898 |
+
},
|
| 6899 |
+
{
|
| 6900 |
+
"epoch": 1.9552845528455285,
|
| 6901 |
+
"grad_norm": 0.29438072443008423,
|
| 6902 |
+
"learning_rate": 7.152395075843421e-09,
|
| 6903 |
+
"loss": 2.4232,
|
| 6904 |
+
"step": 962
|
| 6905 |
+
},
|
| 6906 |
+
{
|
| 6907 |
+
"epoch": 1.9573170731707317,
|
| 6908 |
+
"grad_norm": 0.3598599135875702,
|
| 6909 |
+
"learning_rate": 6.544233864911875e-09,
|
| 6910 |
+
"loss": 2.1371,
|
| 6911 |
+
"step": 963
|
| 6912 |
+
},
|
| 6913 |
+
{
|
| 6914 |
+
"epoch": 1.959349593495935,
|
| 6915 |
+
"grad_norm": 0.43737784028053284,
|
| 6916 |
+
"learning_rate": 5.963055905328363e-09,
|
| 6917 |
+
"loss": 2.3519,
|
| 6918 |
+
"step": 964
|
| 6919 |
+
},
|
| 6920 |
+
{
|
| 6921 |
+
"epoch": 1.9613821138211383,
|
| 6922 |
+
"grad_norm": 0.40080031752586365,
|
| 6923 |
+
"learning_rate": 5.408867486384473e-09,
|
| 6924 |
+
"loss": 2.7944,
|
| 6925 |
+
"step": 965
|
| 6926 |
+
},
|
| 6927 |
+
{
|
| 6928 |
+
"epoch": 1.9634146341463414,
|
| 6929 |
+
"grad_norm": 0.35485896468162537,
|
| 6930 |
+
"learning_rate": 4.881674605301534e-09,
|
| 6931 |
+
"loss": 2.5407,
|
| 6932 |
+
"step": 966
|
| 6933 |
+
},
|
| 6934 |
+
{
|
| 6935 |
+
"epoch": 1.9654471544715446,
|
| 6936 |
+
"grad_norm": 0.2693948745727539,
|
| 6937 |
+
"learning_rate": 4.381482967164285e-09,
|
| 6938 |
+
"loss": 2.5014,
|
| 6939 |
+
"step": 967
|
| 6940 |
+
},
|
| 6941 |
+
{
|
| 6942 |
+
"epoch": 1.967479674796748,
|
| 6943 |
+
"grad_norm": 0.36196112632751465,
|
| 6944 |
+
"learning_rate": 3.908297984861198e-09,
|
| 6945 |
+
"loss": 2.3405,
|
| 6946 |
+
"step": 968
|
| 6947 |
+
},
|
| 6948 |
+
{
|
| 6949 |
+
"epoch": 1.9695121951219512,
|
| 6950 |
+
"grad_norm": 0.3092941343784332,
|
| 6951 |
+
"learning_rate": 3.4621247790245227e-09,
|
| 6952 |
+
"loss": 2.4542,
|
| 6953 |
+
"step": 969
|
| 6954 |
+
},
|
| 6955 |
+
{
|
| 6956 |
+
"epoch": 1.9715447154471546,
|
| 6957 |
+
"grad_norm": 0.3844071328639984,
|
| 6958 |
+
"learning_rate": 3.0429681779739485e-09,
|
| 6959 |
+
"loss": 2.4375,
|
| 6960 |
+
"step": 970
|
| 6961 |
+
},
|
| 6962 |
+
{
|
| 6963 |
+
"epoch": 1.9735772357723578,
|
| 6964 |
+
"grad_norm": 0.34891366958618164,
|
| 6965 |
+
"learning_rate": 2.6508327176671953e-09,
|
| 6966 |
+
"loss": 2.5139,
|
| 6967 |
+
"step": 971
|
| 6968 |
+
},
|
| 6969 |
+
{
|
| 6970 |
+
"epoch": 1.975609756097561,
|
| 6971 |
+
"grad_norm": 0.6044580340385437,
|
| 6972 |
+
"learning_rate": 2.285722641647836e-09,
|
| 6973 |
+
"loss": 2.5668,
|
| 6974 |
+
"step": 972
|
| 6975 |
+
},
|
| 6976 |
+
{
|
| 6977 |
+
"epoch": 1.9776422764227641,
|
| 6978 |
+
"grad_norm": 0.389663964509964,
|
| 6979 |
+
"learning_rate": 1.947641901001995e-09,
|
| 6980 |
+
"loss": 2.4824,
|
| 6981 |
+
"step": 973
|
| 6982 |
+
},
|
| 6983 |
+
{
|
| 6984 |
+
"epoch": 1.9796747967479673,
|
| 6985 |
+
"grad_norm": 0.34469953179359436,
|
| 6986 |
+
"learning_rate": 1.6365941543131093e-09,
|
| 6987 |
+
"loss": 2.5478,
|
| 6988 |
+
"step": 974
|
| 6989 |
+
},
|
| 6990 |
+
{
|
| 6991 |
+
"epoch": 1.9817073170731707,
|
| 6992 |
+
"grad_norm": 0.34834548830986023,
|
| 6993 |
+
"learning_rate": 1.3525827676247327e-09,
|
| 6994 |
+
"loss": 2.6576,
|
| 6995 |
+
"step": 975
|
| 6996 |
+
},
|
| 6997 |
+
{
|
| 6998 |
+
"epoch": 1.9837398373983741,
|
| 6999 |
+
"grad_norm": 0.39558902382850647,
|
| 7000 |
+
"learning_rate": 1.0956108144025145e-09,
|
| 7001 |
+
"loss": 2.3903,
|
| 7002 |
+
"step": 976
|
| 7003 |
+
},
|
| 7004 |
+
{
|
| 7005 |
+
"epoch": 1.9857723577235773,
|
| 7006 |
+
"grad_norm": 0.38120120763778687,
|
| 7007 |
+
"learning_rate": 8.656810755008904e-10,
|
| 7008 |
+
"loss": 2.7058,
|
| 7009 |
+
"step": 977
|
| 7010 |
+
},
|
| 7011 |
+
{
|
| 7012 |
+
"epoch": 1.9878048780487805,
|
| 7013 |
+
"grad_norm": 0.37062206864356995,
|
| 7014 |
+
"learning_rate": 6.62796039134772e-10,
|
| 7015 |
+
"loss": 2.5938,
|
| 7016 |
+
"step": 978
|
| 7017 |
+
},
|
| 7018 |
+
{
|
| 7019 |
+
"epoch": 1.9898373983739837,
|
| 7020 |
+
"grad_norm": 0.3332228362560272,
|
| 7021 |
+
"learning_rate": 4.869579008498493e-10,
|
| 7022 |
+
"loss": 2.4417,
|
| 7023 |
+
"step": 979
|
| 7024 |
+
},
|
| 7025 |
+
{
|
| 7026 |
+
"epoch": 1.9918699186991868,
|
| 7027 |
+
"grad_norm": 0.37683814764022827,
|
| 7028 |
+
"learning_rate": 3.3816856350177284e-10,
|
| 7029 |
+
"loss": 2.5171,
|
| 7030 |
+
"step": 980
|
| 7031 |
+
},
|
| 7032 |
+
{
|
| 7033 |
+
"epoch": 1.9939024390243902,
|
| 7034 |
+
"grad_norm": 0.5217941999435425,
|
| 7035 |
+
"learning_rate": 2.1642963723284006e-10,
|
| 7036 |
+
"loss": 3.2324,
|
| 7037 |
+
"step": 981
|
| 7038 |
+
},
|
| 7039 |
+
{
|
| 7040 |
+
"epoch": 1.9959349593495936,
|
| 7041 |
+
"grad_norm": 0.35729387402534485,
|
| 7042 |
+
"learning_rate": 1.2174243945672905e-10,
|
| 7043 |
+
"loss": 2.2729,
|
| 7044 |
+
"step": 982
|
| 7045 |
+
},
|
| 7046 |
+
{
|
| 7047 |
+
"epoch": 1.9979674796747968,
|
| 7048 |
+
"grad_norm": 0.3736533522605896,
|
| 7049 |
+
"learning_rate": 5.410799484323326e-11,
|
| 7050 |
+
"loss": 2.3411,
|
| 7051 |
+
"step": 983
|
| 7052 |
+
},
|
| 7053 |
+
{
|
| 7054 |
+
"epoch": 2.0,
|
| 7055 |
+
"grad_norm": 0.780532717704773,
|
| 7056 |
+
"learning_rate": 1.3527035306881708e-11,
|
| 7057 |
+
"loss": 2.3983,
|
| 7058 |
+
"step": 984
|
| 7059 |
}
|
| 7060 |
],
|
| 7061 |
"logging_steps": 1,
|
|
|
|
| 7070 |
"should_evaluate": false,
|
| 7071 |
"should_log": false,
|
| 7072 |
"should_save": true,
|
| 7073 |
+
"should_training_stop": true
|
| 7074 |
},
|
| 7075 |
"attributes": {}
|
| 7076 |
}
|
| 7077 |
},
|
| 7078 |
+
"total_flos": 9.518718229460025e+18,
|
| 7079 |
"train_batch_size": 1,
|
| 7080 |
"trial_name": null,
|
| 7081 |
"trial_params": null
|