|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.151515151515152, |
|
"eval_steps": 50, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.5151515151515151, |
|
"grad_norm": 1735.82958984375, |
|
"learning_rate": 6.02409638554217e-06, |
|
"loss": 178.0369, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.5151515151515151, |
|
"eval_loss": 141.81396484375, |
|
"eval_runtime": 0.68, |
|
"eval_samples_per_second": 44.12, |
|
"eval_steps_per_second": 5.883, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.0303030303030303, |
|
"grad_norm": 878.3958129882812, |
|
"learning_rate": 9.77088948787062e-06, |
|
"loss": 140.1058, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.0303030303030303, |
|
"eval_loss": 125.39718627929688, |
|
"eval_runtime": 0.6771, |
|
"eval_samples_per_second": 44.308, |
|
"eval_steps_per_second": 5.908, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.545454545454545, |
|
"grad_norm": 1332.434326171875, |
|
"learning_rate": 9.097035040431268e-06, |
|
"loss": 114.4198, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.545454545454545, |
|
"eval_loss": 114.40504455566406, |
|
"eval_runtime": 0.6721, |
|
"eval_samples_per_second": 44.639, |
|
"eval_steps_per_second": 5.952, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 6.0606060606060606, |
|
"grad_norm": 1204.0079345703125, |
|
"learning_rate": 8.423180592991915e-06, |
|
"loss": 105.3163, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.0606060606060606, |
|
"eval_loss": 108.37521362304688, |
|
"eval_runtime": 0.6767, |
|
"eval_samples_per_second": 44.33, |
|
"eval_steps_per_second": 5.911, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 7.575757575757576, |
|
"grad_norm": 1180.3629150390625, |
|
"learning_rate": 7.749326145552561e-06, |
|
"loss": 94.5081, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 7.575757575757576, |
|
"eval_loss": 113.66868591308594, |
|
"eval_runtime": 0.6817, |
|
"eval_samples_per_second": 44.009, |
|
"eval_steps_per_second": 5.868, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 9.090909090909092, |
|
"grad_norm": 1183.9482421875, |
|
"learning_rate": 7.0754716981132075e-06, |
|
"loss": 83.6025, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 9.090909090909092, |
|
"eval_loss": 109.58412170410156, |
|
"eval_runtime": 0.6798, |
|
"eval_samples_per_second": 44.127, |
|
"eval_steps_per_second": 5.884, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 10.606060606060606, |
|
"grad_norm": 1322.03515625, |
|
"learning_rate": 6.401617250673855e-06, |
|
"loss": 80.1657, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 10.606060606060606, |
|
"eval_loss": 106.66357421875, |
|
"eval_runtime": 0.6829, |
|
"eval_samples_per_second": 43.928, |
|
"eval_steps_per_second": 5.857, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 12.121212121212121, |
|
"grad_norm": 1885.264404296875, |
|
"learning_rate": 5.727762803234502e-06, |
|
"loss": 73.8062, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 12.121212121212121, |
|
"eval_loss": 115.17276763916016, |
|
"eval_runtime": 0.6809, |
|
"eval_samples_per_second": 44.056, |
|
"eval_steps_per_second": 5.874, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 13.636363636363637, |
|
"grad_norm": 869.5314331054688, |
|
"learning_rate": 5.053908355795149e-06, |
|
"loss": 68.8767, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 13.636363636363637, |
|
"eval_loss": 109.55915832519531, |
|
"eval_runtime": 0.6804, |
|
"eval_samples_per_second": 44.093, |
|
"eval_steps_per_second": 5.879, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 15.151515151515152, |
|
"grad_norm": 1756.1275634765625, |
|
"learning_rate": 4.380053908355795e-06, |
|
"loss": 69.1187, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 15.151515151515152, |
|
"eval_loss": 111.99463653564453, |
|
"eval_runtime": 0.6735, |
|
"eval_samples_per_second": 44.543, |
|
"eval_steps_per_second": 5.939, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 825, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 25, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|