| layer,module,loss,samples,damp,time | |
| 0,self_attention.query_key_value,0.0000381449,0.05000,0.577 | |
| 0,self_attention.dense,0.0000001857,0.05000,0.267 | |
| 0,mlp.dense_h_to_4h,0.0000368609,0.05000,0.214 | |
| 0,mlp.dense_4h_to_h,0.0000006347,0.05000,0.887 | |
| 1,self_attention.query_key_value,0.0000500630,0.05000,0.227 | |
| 1,self_attention.dense,0.0000001825,0.05000,0.224 | |
| 1,mlp.dense_h_to_4h,0.0000487011,0.05000,0.235 | |
| 1,mlp.dense_4h_to_h,0.0000008219,0.05000,1.018 | |
| 2,self_attention.query_key_value,0.0000561378,0.05000,0.502 | |
| 2,self_attention.dense,0.0000001609,0.05000,0.506 | |
| 2,mlp.dense_h_to_4h,0.0000726370,0.05000,0.561 | |
| 2,mlp.dense_4h_to_h,0.0000010389,0.05000,2.045 | |
| 3,self_attention.query_key_value,0.0000580876,0.05000,0.572 | |
| 3,self_attention.dense,0.0000002400,0.05000,0.565 | |
| 3,mlp.dense_h_to_4h,0.0000801581,0.05000,0.576 | |
| 3,mlp.dense_4h_to_h,0.0000013766,0.05000,2.265 | |
| 4,self_attention.query_key_value,0.0000581960,0.05000,0.554 | |
| 4,self_attention.dense,0.0000003266,0.05000,0.386 | |
| 4,mlp.dense_h_to_4h,0.0000759618,0.05000,0.584 | |
| 4,mlp.dense_4h_to_h,0.0000015705,0.05000,2.402 | |
| 5,self_attention.query_key_value,0.0000669416,0.05000,0.624 | |
| 5,self_attention.dense,0.0000004256,0.05000,0.633 | |
| 5,mlp.dense_h_to_4h,0.0000628368,0.05000,0.610 | |
| 5,mlp.dense_4h_to_h,0.0000012865,0.05000,2.523 | |
| 6,self_attention.query_key_value,0.0000840233,0.05000,0.642 | |
| 6,self_attention.dense,0.0000005805,0.05000,0.608 | |
| 6,mlp.dense_h_to_4h,0.0000768948,0.05000,0.663 | |
| 6,mlp.dense_4h_to_h,0.0000013620,0.05000,2.504 | |
| 7,self_attention.query_key_value,0.0000712257,0.05000,0.658 | |
| 7,self_attention.dense,0.0000005356,0.05000,0.657 | |
| 7,mlp.dense_h_to_4h,0.0000416796,0.05000,0.678 | |
| 7,mlp.dense_4h_to_h,0.0000009094,0.05000,2.609 | |
| 8,self_attention.query_key_value,0.0001037150,0.05000,0.657 | |
| 8,self_attention.dense,0.0000006358,0.05000,0.647 | |
| 8,mlp.dense_h_to_4h,0.0000305985,0.05000,0.682 | |
| 8,mlp.dense_4h_to_h,0.0000007244,0.05000,2.597 | |
| 9,self_attention.query_key_value,0.0001000133,0.05000,0.672 | |
| 9,self_attention.dense,0.0000005241,0.05000,0.685 | |
| 9,mlp.dense_h_to_4h,0.0000235288,0.05000,0.719 | |
| 9,mlp.dense_4h_to_h,0.0000006183,0.05000,2.701 | |
| 10,self_attention.query_key_value,0.0001578843,0.05000,0.646 | |
| 10,self_attention.dense,0.0000007396,0.05000,0.697 | |
| 10,mlp.dense_h_to_4h,0.0000708152,0.05000,0.690 | |
| 10,mlp.dense_4h_to_h,0.0000014019,0.05000,2.684 | |
| 11,self_attention.query_key_value,0.0001074080,0.05000,0.675 | |
| 11,self_attention.dense,0.0000006555,0.05000,0.695 | |
| 11,mlp.dense_h_to_4h,0.0000729902,0.05000,0.668 | |
| 11,mlp.dense_4h_to_h,0.0000014114,0.05000,2.677 | |
| 12,self_attention.query_key_value,0.0001312283,0.05000,0.691 | |
| 12,self_attention.dense,0.0000007142,0.05000,0.702 | |
| 12,mlp.dense_h_to_4h,0.0000915113,0.05000,0.690 | |
| 12,mlp.dense_4h_to_h,0.0000016252,0.05000,2.804 | |
| 13,self_attention.query_key_value,0.0001273397,0.05000,0.693 | |
| 13,self_attention.dense,0.0000007266,0.05000,0.673 | |
| 13,mlp.dense_h_to_4h,0.0000935424,0.05000,0.680 | |
| 13,mlp.dense_4h_to_h,0.0000015646,0.05000,2.833 | |
| 14,self_attention.query_key_value,0.0001211264,0.05000,0.678 | |
| 14,self_attention.dense,0.0000004718,0.05000,0.681 | |
| 14,mlp.dense_h_to_4h,0.0000927910,0.05000,0.670 | |
| 14,mlp.dense_4h_to_h,0.0000015917,0.05000,2.816 | |
| 15,self_attention.query_key_value,0.0001268751,0.05000,0.705 | |
| 15,self_attention.dense,0.0000005188,0.05000,0.688 | |
| 15,mlp.dense_h_to_4h,0.0000943705,0.05000,0.730 | |
| 15,mlp.dense_4h_to_h,0.0000016674,0.05000,2.914 | |
| 16,self_attention.query_key_value,0.0001314014,0.05000,0.710 | |
| 16,self_attention.dense,0.0000006596,0.05000,0.696 | |
| 16,mlp.dense_h_to_4h,0.0000949185,0.05000,0.743 | |
| 16,mlp.dense_4h_to_h,0.0000019402,0.05000,2.793 | |
| 17,self_attention.query_key_value,0.0001188330,0.05000,0.688 | |
| 17,self_attention.dense,0.0000006238,0.05000,0.684 | |
| 17,mlp.dense_h_to_4h,0.0000951754,0.05000,0.707 | |
| 17,mlp.dense_4h_to_h,0.0000023557,0.05000,2.790 | |
| 18,self_attention.query_key_value,0.0001277351,0.05000,0.711 | |
| 18,self_attention.dense,0.0000008486,0.05000,0.696 | |
| 18,mlp.dense_h_to_4h,0.0000987540,0.05000,0.722 | |
| 18,mlp.dense_4h_to_h,0.0000028473,0.05000,2.822 | |
| 19,self_attention.query_key_value,0.0001311309,0.05000,0.717 | |
| 19,self_attention.dense,0.0000011270,0.05000,0.718 | |
| 19,mlp.dense_h_to_4h,0.0001040803,0.05000,0.720 | |
| 19,mlp.dense_4h_to_h,0.0000035554,0.05000,2.850 | |
| 20,self_attention.query_key_value,0.0001372522,0.05000,0.747 | |
| 20,self_attention.dense,0.0000016063,0.05000,0.716 | |
| 20,mlp.dense_h_to_4h,0.0001093306,0.05000,0.750 | |
| 20,mlp.dense_4h_to_h,0.0000055216,0.05000,2.812 | |
| 21,self_attention.query_key_value,0.0001562902,0.05000,0.747 | |
| 21,self_attention.dense,0.0000033466,0.05000,0.724 | |
| 21,mlp.dense_h_to_4h,0.0001095468,0.05000,0.742 | |
| 21,mlp.dense_4h_to_h,0.0000084182,0.05000,2.795 | |
| 22,self_attention.query_key_value,0.0001581889,0.05000,0.717 | |
| 22,self_attention.dense,0.0000028682,0.05000,0.696 | |
| 22,mlp.dense_h_to_4h,0.0001288263,0.05000,0.756 | |
| 22,mlp.dense_4h_to_h,0.0000098412,0.05000,2.843 | |
| 23,self_attention.query_key_value,0.0000716782,0.05000,0.750 | |
| 23,self_attention.dense,0.0000005194,0.05000,0.707 | |
| 23,mlp.dense_h_to_4h,0.0001178907,0.05000,0.723 | |
| 23,mlp.dense_4h_to_h,0.0000037693,0.05000,2.867 | |