{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.789564097058193, "eval_steps": 500, "global_step": 1440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.99, "learning_rate": 0.0001, "loss": 3.9659, "step": 72 }, { "epoch": 0.99, "eval_bleu": 10.8438, "eval_bp": 0.7379, "eval_counts_1": 7244, "eval_counts_2": 2547, "eval_counts_3": 1183, "eval_counts_4": 565, "eval_exact_match": 0.0136, "eval_f1": 0.3139, "eval_gen_len": 11.7786, "eval_loss": 1.4144511222839355, "eval_precisions_1": 44.4526, "eval_precisions_2": 18.0741, "eval_precisions_3": 9.9512, "eval_precisions_4": 5.8344, "eval_ref_len": 21250, "eval_rouge1": 0.3213, "eval_rouge2": 0.1608, "eval_rougeL": 0.3091, "eval_rougeLsum": 0.309, "eval_runtime": 2106.9539, "eval_samples_per_second": 1.046, "eval_steps_per_second": 1.046, "eval_sys_len": 16296, "eval_totals_1": 16296, "eval_totals_2": 14092, "eval_totals_3": 11888, "eval_totals_4": 9684, "step": 72 }, { "epoch": 1.99, "learning_rate": 0.0001, "loss": 1.7081, "step": 145 }, { "epoch": 1.99, "eval_bleu": 13.2044, "eval_bp": 0.7697, "eval_counts_1": 7865, "eval_counts_2": 3037, "eval_counts_3": 1498, "eval_counts_4": 759, "eval_exact_match": 0.0181, "eval_f1": 0.3481, "eval_gen_len": 12.225, "eval_loss": 1.263157844543457, "eval_precisions_1": 46.7015, "eval_precisions_2": 20.7488, "eval_precisions_3": 12.0486, "eval_precisions_4": 7.4201, "eval_ref_len": 21250, "eval_rouge1": 0.3577, "eval_rouge2": 0.189, "eval_rougeL": 0.3438, "eval_rougeLsum": 0.3439, "eval_runtime": 3942.8178, "eval_samples_per_second": 0.559, "eval_steps_per_second": 0.559, "eval_sys_len": 16841, "eval_totals_1": 16841, "eval_totals_2": 14637, "eval_totals_3": 12433, "eval_totals_4": 10229, "step": 145 }, { "epoch": 3.0, "learning_rate": 0.0001, "loss": 1.4856, "step": 218 }, { "epoch": 3.0, "eval_bleu": 15.6014, "eval_bp": 0.8142, "eval_counts_1": 8608, "eval_counts_2": 3519, "eval_counts_3": 1818, "eval_counts_4": 969, "eval_exact_match": 0.0268, "eval_f1": 0.3882, "eval_gen_len": 13.0027, "eval_loss": 1.1974213123321533, "eval_precisions_1": 48.8342, "eval_precisions_2": 22.8166, "eval_precisions_3": 13.7529, "eval_precisions_4": 8.7971, "eval_ref_len": 21250, "eval_rouge1": 0.3969, "eval_rouge2": 0.2181, "eval_rougeL": 0.381, "eval_rougeLsum": 0.3812, "eval_runtime": 4069.754, "eval_samples_per_second": 0.542, "eval_steps_per_second": 0.542, "eval_sys_len": 17627, "eval_totals_1": 17627, "eval_totals_2": 15423, "eval_totals_3": 13219, "eval_totals_4": 11015, "step": 218 }, { "epoch": 4.0, "learning_rate": 0.0001, "loss": 1.3277, "step": 291 }, { "epoch": 4.0, "eval_bleu": 16.4313, "eval_bp": 0.8052, "eval_counts_1": 9018, "eval_counts_2": 3702, "eval_counts_3": 1907, "eval_counts_4": 1029, "eval_exact_match": 0.0313, "eval_f1": 0.4156, "eval_gen_len": 12.8716, "eval_loss": 1.1393847465515137, "eval_precisions_1": 51.6347, "eval_precisions_2": 24.2579, "eval_precisions_3": 14.6052, "eval_precisions_4": 9.4812, "eval_ref_len": 21250, "eval_rouge1": 0.424, "eval_rouge2": 0.2321, "eval_rougeL": 0.4087, "eval_rougeLsum": 0.4085, "eval_runtime": 4037.7601, "eval_samples_per_second": 0.546, "eval_steps_per_second": 0.546, "eval_sys_len": 17465, "eval_totals_1": 17465, "eval_totals_2": 15261, "eval_totals_3": 13057, "eval_totals_4": 10853, "step": 291 }, { "epoch": 4.99, "learning_rate": 0.0001, "loss": 1.2314, "step": 363 }, { "epoch": 4.99, "eval_bleu": 17.0718, "eval_bp": 0.8235, "eval_counts_1": 9240, "eval_counts_2": 3869, "eval_counts_3": 1994, "eval_counts_4": 1076, "eval_exact_match": 0.0363, "eval_f1": 0.4256, "eval_gen_len": 13.2137, "eval_loss": 1.1193382740020752, "eval_precisions_1": 51.9276, "eval_precisions_2": 24.8172, "eval_precisions_3": 14.8962, "eval_precisions_4": 9.6226, "eval_ref_len": 21250, "eval_rouge1": 0.4336, "eval_rouge2": 0.2413, "eval_rougeL": 0.4183, "eval_rougeLsum": 0.418, "eval_runtime": 4116.6581, "eval_samples_per_second": 0.535, "eval_steps_per_second": 0.535, "eval_sys_len": 17794, "eval_totals_1": 17794, "eval_totals_2": 15590, "eval_totals_3": 13386, "eval_totals_4": 11182, "step": 363 }, { "epoch": 5.99, "learning_rate": 0.0001, "loss": 1.1264, "step": 436 }, { "epoch": 5.99, "eval_bleu": 17.4744, "eval_bp": 0.8072, "eval_counts_1": 9263, "eval_counts_2": 3908, "eval_counts_3": 2055, "eval_counts_4": 1127, "eval_exact_match": 0.0372, "eval_f1": 0.4309, "eval_gen_len": 13.034, "eval_loss": 1.1085509061813354, "eval_precisions_1": 52.9254, "eval_precisions_2": 25.5458, "eval_precisions_3": 15.6942, "eval_precisions_4": 10.3489, "eval_ref_len": 21250, "eval_rouge1": 0.4383, "eval_rouge2": 0.2452, "eval_rougeL": 0.4239, "eval_rougeLsum": 0.4237, "eval_runtime": 3709.3886, "eval_samples_per_second": 0.594, "eval_steps_per_second": 0.594, "eval_sys_len": 17502, "eval_totals_1": 17502, "eval_totals_2": 15298, "eval_totals_3": 13094, "eval_totals_4": 10890, "step": 436 }, { "epoch": 7.0, "learning_rate": 0.0001, "loss": 1.0469, "step": 509 }, { "epoch": 7.0, "eval_bleu": 18.0906, "eval_bp": 0.8363, "eval_counts_1": 9434, "eval_counts_2": 4034, "eval_counts_3": 2146, "eval_counts_4": 1189, "eval_exact_match": 0.039, "eval_f1": 0.4348, "eval_gen_len": 13.422, "eval_loss": 1.103752851486206, "eval_precisions_1": 52.3297, "eval_precisions_2": 25.4929, "eval_precisions_3": 15.7562, "eval_precisions_4": 10.4152, "eval_ref_len": 21250, "eval_rouge1": 0.4433, "eval_rouge2": 0.2505, "eval_rougeL": 0.4286, "eval_rougeLsum": 0.4282, "eval_runtime": 4081.2971, "eval_samples_per_second": 0.54, "eval_steps_per_second": 0.54, "eval_sys_len": 18028, "eval_totals_1": 18028, "eval_totals_2": 15824, "eval_totals_3": 13620, "eval_totals_4": 11416, "step": 509 }, { "epoch": 8.0, "learning_rate": 0.0001, "loss": 0.9874, "step": 582 }, { "epoch": 8.0, "eval_bleu": 19.1287, "eval_bp": 0.8539, "eval_counts_1": 9746, "eval_counts_2": 4265, "eval_counts_3": 2287, "eval_counts_4": 1285, "eval_exact_match": 0.0454, "eval_f1": 0.4498, "eval_gen_len": 13.6466, "eval_loss": 1.0989724397659302, "eval_precisions_1": 53.1088, "eval_precisions_2": 26.4136, "eval_precisions_3": 16.4025, "eval_precisions_4": 10.9464, "eval_ref_len": 21250, "eval_rouge1": 0.457, "eval_rouge2": 0.2627, "eval_rougeL": 0.4417, "eval_rougeLsum": 0.4416, "eval_runtime": 2875.9709, "eval_samples_per_second": 0.766, "eval_steps_per_second": 0.766, "eval_sys_len": 18351, "eval_totals_1": 18351, "eval_totals_2": 16147, "eval_totals_3": 13943, "eval_totals_4": 11739, "step": 582 }, { "epoch": 8.99, "learning_rate": 0.0001, "loss": 0.9488, "step": 654 }, { "epoch": 8.99, "eval_bleu": 18.2172, "eval_bp": 0.8255, "eval_counts_1": 9484, "eval_counts_2": 4062, "eval_counts_3": 2158, "eval_counts_4": 1197, "eval_exact_match": 0.0431, "eval_f1": 0.4399, "eval_gen_len": 13.2763, "eval_loss": 1.1175453662872314, "eval_precisions_1": 53.1883, "eval_precisions_2": 25.9935, "eval_precisions_3": 16.0769, "eval_precisions_4": 10.6694, "eval_ref_len": 21250, "eval_rouge1": 0.4482, "eval_rouge2": 0.2548, "eval_rougeL": 0.4338, "eval_rougeLsum": 0.4333, "eval_runtime": 4231.6184, "eval_samples_per_second": 0.521, "eval_steps_per_second": 0.521, "eval_sys_len": 17831, "eval_totals_1": 17831, "eval_totals_2": 15627, "eval_totals_3": 13423, "eval_totals_4": 11219, "step": 654 }, { "epoch": 9.99, "learning_rate": 0.0001, "loss": 0.8893, "step": 727 }, { "epoch": 9.99, "eval_bleu": 19.064, "eval_bp": 0.8357, "eval_counts_1": 9650, "eval_counts_2": 4205, "eval_counts_3": 2289, "eval_counts_4": 1289, "eval_exact_match": 0.0463, "eval_f1": 0.4472, "eval_gen_len": 13.4251, "eval_loss": 1.1221915483474731, "eval_precisions_1": 53.5605, "eval_precisions_2": 26.592, "eval_precisions_3": 16.8198, "eval_precisions_4": 11.3021, "eval_ref_len": 21250, "eval_rouge1": 0.4543, "eval_rouge2": 0.262, "eval_rougeL": 0.4396, "eval_rougeLsum": 0.4394, "eval_runtime": 4369.7974, "eval_samples_per_second": 0.504, "eval_steps_per_second": 0.504, "eval_sys_len": 18017, "eval_totals_1": 18017, "eval_totals_2": 15813, "eval_totals_3": 13609, "eval_totals_4": 11405, "step": 727 }, { "epoch": 10.99, "learning_rate": 0.0001, "loss": 0.8362, "step": 800 }, { "epoch": 10.99, "eval_bleu": 19.052, "eval_bp": 0.8474, "eval_counts_1": 9706, "eval_counts_2": 4232, "eval_counts_3": 2279, "eval_counts_4": 1281, "eval_exact_match": 0.0472, "eval_f1": 0.4473, "eval_gen_len": 13.6021, "eval_loss": 1.1342219114303589, "eval_precisions_1": 53.2361, "eval_precisions_2": 26.4038, "eval_precisions_3": 16.4858, "eval_precisions_4": 11.0241, "eval_ref_len": 21250, "eval_rouge1": 0.4551, "eval_rouge2": 0.2632, "eval_rougeL": 0.4395, "eval_rougeLsum": 0.4393, "eval_runtime": 4741.4712, "eval_samples_per_second": 0.465, "eval_steps_per_second": 0.465, "eval_sys_len": 18232, "eval_totals_1": 18232, "eval_totals_2": 16028, "eval_totals_3": 13824, "eval_totals_4": 11620, "step": 800 }, { "epoch": 12.0, "learning_rate": 0.0001, "loss": 0.7835, "step": 873 }, { "epoch": 12.0, "eval_bleu": 19.169, "eval_bp": 0.8614, "eval_counts_1": 9802, "eval_counts_2": 4280, "eval_counts_3": 2292, "eval_counts_4": 1285, "eval_exact_match": 0.0472, "eval_f1": 0.4497, "eval_gen_len": 14.0168, "eval_loss": 1.1426819562911987, "eval_precisions_1": 53.0096, "eval_precisions_2": 26.2786, "eval_precisions_3": 16.2749, "eval_precisions_4": 10.8174, "eval_ref_len": 21250, "eval_rouge1": 0.458, "eval_rouge2": 0.2634, "eval_rougeL": 0.4414, "eval_rougeLsum": 0.4412, "eval_runtime": 2858.9204, "eval_samples_per_second": 0.771, "eval_steps_per_second": 0.771, "eval_sys_len": 18491, "eval_totals_1": 18491, "eval_totals_2": 16287, "eval_totals_3": 14083, "eval_totals_4": 11879, "step": 873 }, { "epoch": 12.99, "learning_rate": 0.0001, "loss": 0.7441, "step": 945 }, { "epoch": 12.99, "eval_bleu": 19.3443, "eval_bp": 0.8618, "eval_counts_1": 9816, "eval_counts_2": 4323, "eval_counts_3": 2334, "eval_counts_4": 1294, "eval_exact_match": 0.0463, "eval_f1": 0.4493, "eval_gen_len": 13.8348, "eval_loss": 1.1669002771377563, "eval_precisions_1": 53.0652, "eval_precisions_2": 26.5312, "eval_precisions_3": 16.5649, "eval_precisions_4": 10.8868, "eval_ref_len": 21250, "eval_rouge1": 0.4577, "eval_rouge2": 0.2659, "eval_rougeL": 0.4418, "eval_rougeLsum": 0.4417, "eval_runtime": 2130.8, "eval_samples_per_second": 1.034, "eval_steps_per_second": 1.034, "eval_sys_len": 18498, "eval_totals_1": 18498, "eval_totals_2": 16294, "eval_totals_3": 14090, "eval_totals_4": 11886, "step": 945 }, { "epoch": 13.99, "learning_rate": 0.0001, "loss": 0.7012, "step": 1018 }, { "epoch": 13.99, "eval_bleu": 19.7341, "eval_bp": 0.8639, "eval_counts_1": 9856, "eval_counts_2": 4364, "eval_counts_3": 2375, "eval_counts_4": 1360, "eval_exact_match": 0.0476, "eval_f1": 0.4514, "eval_gen_len": 13.976, "eval_loss": 1.1739834547042847, "eval_precisions_1": 53.1693, "eval_precisions_2": 26.7189, "eval_precisions_3": 16.8094, "eval_precisions_4": 11.4046, "eval_ref_len": 21250, "eval_rouge1": 0.4591, "eval_rouge2": 0.2653, "eval_rougeL": 0.443, "eval_rougeLsum": 0.4428, "eval_runtime": 2149.1056, "eval_samples_per_second": 1.026, "eval_steps_per_second": 1.026, "eval_sys_len": 18537, "eval_totals_1": 18537, "eval_totals_2": 16333, "eval_totals_3": 14129, "eval_totals_4": 11925, "step": 1018 }, { "epoch": 14.99, "learning_rate": 0.0001, "loss": 0.6597, "step": 1091 }, { "epoch": 14.99, "eval_bleu": 19.3289, "eval_bp": 0.8602, "eval_counts_1": 9780, "eval_counts_2": 4292, "eval_counts_3": 2336, "eval_counts_4": 1302, "eval_exact_match": 0.0485, "eval_f1": 0.4492, "eval_gen_len": 13.8802, "eval_loss": 1.1987030506134033, "eval_precisions_1": 52.9565, "eval_precisions_2": 26.3896, "eval_precisions_3": 16.6145, "eval_precisions_4": 10.9818, "eval_ref_len": 21250, "eval_rouge1": 0.457, "eval_rouge2": 0.2633, "eval_rougeL": 0.4418, "eval_rougeLsum": 0.4416, "eval_runtime": 2149.2833, "eval_samples_per_second": 1.025, "eval_steps_per_second": 1.025, "eval_sys_len": 18468, "eval_totals_1": 18468, "eval_totals_2": 16264, "eval_totals_3": 14060, "eval_totals_4": 11856, "step": 1091 }, { "epoch": 16.0, "learning_rate": 0.0001, "loss": 0.6236, "step": 1164 }, { "epoch": 16.0, "eval_bleu": 19.8055, "eval_bp": 0.8734, "eval_counts_1": 9931, "eval_counts_2": 4388, "eval_counts_3": 2390, "eval_counts_4": 1359, "eval_exact_match": 0.0495, "eval_f1": 0.4538, "eval_gen_len": 14.044, "eval_loss": 1.2135030031204224, "eval_precisions_1": 53.0587, "eval_precisions_2": 26.573, "eval_precisions_3": 16.7028, "eval_precisions_4": 11.2268, "eval_ref_len": 21250, "eval_rouge1": 0.4618, "eval_rouge2": 0.2682, "eval_rougeL": 0.4452, "eval_rougeLsum": 0.445, "eval_runtime": 2168.1341, "eval_samples_per_second": 1.017, "eval_steps_per_second": 1.017, "eval_sys_len": 18717, "eval_totals_1": 18717, "eval_totals_2": 16513, "eval_totals_3": 14309, "eval_totals_4": 12105, "step": 1164 }, { "epoch": 17.0, "learning_rate": 0.0001, "loss": 0.5933, "step": 1237 }, { "epoch": 17.0, "eval_bleu": 19.5893, "eval_bp": 0.8654, "eval_counts_1": 9806, "eval_counts_2": 4316, "eval_counts_3": 2366, "eval_counts_4": 1348, "eval_exact_match": 0.049, "eval_f1": 0.4485, "eval_gen_len": 14.0622, "eval_loss": 1.2305341958999634, "eval_precisions_1": 52.817, "eval_precisions_2": 26.3782, "eval_precisions_3": 16.7114, "eval_precisions_4": 11.2766, "eval_ref_len": 21250, "eval_rouge1": 0.4571, "eval_rouge2": 0.2628, "eval_rougeL": 0.4407, "eval_rougeLsum": 0.4409, "eval_runtime": 2171.7325, "eval_samples_per_second": 1.015, "eval_steps_per_second": 1.015, "eval_sys_len": 18566, "eval_totals_1": 18566, "eval_totals_2": 16362, "eval_totals_3": 14158, "eval_totals_4": 11954, "step": 1237 }, { "epoch": 17.99, "learning_rate": 0.0001, "loss": 0.5622, "step": 1309 }, { "epoch": 17.99, "eval_bleu": 19.4914, "eval_bp": 0.865, "eval_counts_1": 9787, "eval_counts_2": 4306, "eval_counts_3": 2346, "eval_counts_4": 1338, "eval_exact_match": 0.0476, "eval_f1": 0.447, "eval_gen_len": 13.7763, "eval_loss": 1.2796473503112793, "eval_precisions_1": 52.7345, "eval_precisions_2": 26.3283, "eval_precisions_3": 16.5783, "eval_precisions_4": 11.1995, "eval_ref_len": 21250, "eval_rouge1": 0.4549, "eval_rouge2": 0.2609, "eval_rougeL": 0.4383, "eval_rougeLsum": 0.4382, "eval_runtime": 2158.5699, "eval_samples_per_second": 1.021, "eval_steps_per_second": 1.021, "eval_sys_len": 18559, "eval_totals_1": 18559, "eval_totals_2": 16355, "eval_totals_3": 14151, "eval_totals_4": 11947, "step": 1309 }, { "epoch": 18.99, "learning_rate": 0.0001, "loss": 0.5275, "step": 1382 }, { "epoch": 18.99, "eval_bleu": 19.6947, "eval_bp": 0.8857, "eval_counts_1": 9918, "eval_counts_2": 4363, "eval_counts_3": 2374, "eval_counts_4": 1355, "eval_exact_match": 0.0508, "eval_f1": 0.4499, "eval_gen_len": 14.1647, "eval_loss": 1.2833356857299805, "eval_precisions_1": 52.3377, "eval_precisions_2": 26.054, "eval_precisions_3": 16.3251, "eval_precisions_4": 10.9823, "eval_ref_len": 21250, "eval_rouge1": 0.4573, "eval_rouge2": 0.2624, "eval_rougeL": 0.441, "eval_rougeLsum": 0.4408, "eval_runtime": 2190.1704, "eval_samples_per_second": 1.006, "eval_steps_per_second": 1.006, "eval_sys_len": 18950, "eval_totals_1": 18950, "eval_totals_2": 16746, "eval_totals_3": 14542, "eval_totals_4": 12338, "step": 1382 }, { "epoch": 19.79, "learning_rate": 0.0001, "loss": 0.4986, "step": 1440 }, { "epoch": 19.79, "eval_bleu": 19.4544, "eval_bp": 0.8847, "eval_counts_1": 9879, "eval_counts_2": 4315, "eval_counts_3": 2347, "eval_counts_4": 1324, "eval_exact_match": 0.0495, "eval_f1": 0.4478, "eval_gen_len": 14.2827, "eval_loss": 1.3059108257293701, "eval_precisions_1": 52.1842, "eval_precisions_2": 25.7966, "eval_precisions_3": 16.1606, "eval_precisions_4": 10.7476, "eval_ref_len": 21250, "eval_rouge1": 0.4564, "eval_rouge2": 0.2622, "eval_rougeL": 0.4407, "eval_rougeLsum": 0.4403, "eval_runtime": 3646.8693, "eval_samples_per_second": 0.604, "eval_steps_per_second": 0.604, "eval_sys_len": 18931, "eval_totals_1": 18931, "eval_totals_2": 16727, "eval_totals_3": 14523, "eval_totals_4": 12319, "step": 1440 }, { "epoch": 19.79, "step": 1440, "total_flos": 1.102412878184448e+18, "train_loss": 1.0667428798145717, "train_runtime": 140813.6912, "train_samples_per_second": 1.323, "train_steps_per_second": 0.01 } ], "logging_steps": 500, "max_steps": 1440, "num_train_epochs": 20, "save_steps": 500, "total_flos": 1.102412878184448e+18, "trial_name": null, "trial_params": null }