{
  "best_metric": 1.0874613523483276,
  "best_model_checkpoint": "_save_11-11-2023/checkpoint-1001200",
  "epoch": 7.0,
  "global_step": 1401680,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0,
      "learning_rate": 1.2410107870555335e-07,
      "loss": 3.4578,
      "step": 500
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.4895125848981223e-07,
      "loss": 3.0472,
      "step": 1000
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.738014382740711e-07,
      "loss": 2.7035,
      "step": 1500
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9865161805833e-07,
      "loss": 2.5837,
      "step": 2000
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.235017978425889e-07,
      "loss": 2.4866,
      "step": 2500
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.483519776268478e-07,
      "loss": 2.3836,
      "step": 3000
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.732021574111068e-07,
      "loss": 2.3417,
      "step": 3500
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.980523371953656e-07,
      "loss": 2.2788,
      "step": 4000
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1224031162604875e-06,
      "loss": 2.2242,
      "step": 4500
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.2472532960447463e-06,
      "loss": 2.1375,
      "step": 5000
    },
    {
      "epoch": 0.03,
      "learning_rate": 1.3721034758290053e-06,
      "loss": 2.1452,
      "step": 5500
    },
    {
      "epoch": 0.03,
      "learning_rate": 1.496953655613264e-06,
      "loss": 2.1145,
      "step": 6000
    },
    {
      "epoch": 0.03,
      "learning_rate": 1.6218038353975229e-06,
      "loss": 2.0481,
      "step": 6500
    },
    {
      "epoch": 0.03,
      "learning_rate": 1.7466540151817818e-06,
      "loss": 2.0433,
      "step": 7000
    },
    {
      "epoch": 0.04,
      "learning_rate": 1.8715041949660408e-06,
      "loss": 2.0522,
      "step": 7500
    },
    {
      "epoch": 0.04,
      "learning_rate": 1.9963543747503e-06,
      "loss": 2.0218,
      "step": 8000
    },
    {
      "epoch": 0.04,
      "learning_rate": 2.1212045545345586e-06,
      "loss": 1.9539,
      "step": 8500
    },
    {
      "epoch": 0.04,
      "learning_rate": 2.2458050339592492e-06,
      "loss": 1.9942,
      "step": 9000
    },
    {
      "epoch": 0.05,
      "learning_rate": 2.3706552137435082e-06,
      "loss": 1.956,
      "step": 9500
    },
    {
      "epoch": 0.05,
      "learning_rate": 2.495505393527767e-06,
      "loss": 1.925,
      "step": 10000
    },
    {
      "epoch": 0.05,
      "learning_rate": 2.620355573312026e-06,
      "loss": 1.8949,
      "step": 10500
    },
    {
      "epoch": 0.05,
      "learning_rate": 2.7452057530962848e-06,
      "loss": 1.8588,
      "step": 11000
    },
    {
      "epoch": 0.06,
      "learning_rate": 2.8700559328805438e-06,
      "loss": 1.8517,
      "step": 11500
    },
    {
      "epoch": 0.06,
      "learning_rate": 2.9949061126648023e-06,
      "loss": 1.8297,
      "step": 12000
    },
    {
      "epoch": 0.06,
      "learning_rate": 3.119506592089493e-06,
      "loss": 1.8312,
      "step": 12500
    },
    {
      "epoch": 0.06,
      "learning_rate": 3.2443567718737516e-06,
      "loss": 1.8389,
      "step": 13000
    },
    {
      "epoch": 0.07,
      "learning_rate": 3.368957251298442e-06,
      "loss": 1.7809,
      "step": 13500
    },
    {
      "epoch": 0.07,
      "learning_rate": 3.493557730723132e-06,
      "loss": 1.831,
      "step": 14000
    },
    {
      "epoch": 0.07,
      "learning_rate": 3.618407910507391e-06,
      "loss": 1.8014,
      "step": 14500
    },
    {
      "epoch": 0.07,
      "learning_rate": 3.74325809029165e-06,
      "loss": 1.8165,
      "step": 15000
    },
    {
      "epoch": 0.08,
      "learning_rate": 3.868108270075909e-06,
      "loss": 1.796,
      "step": 15500
    },
    {
      "epoch": 0.08,
      "learning_rate": 3.992958449860168e-06,
      "loss": 1.795,
      "step": 16000
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.117808629644427e-06,
      "loss": 1.7513,
      "step": 16500
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.242658809428686e-06,
      "loss": 1.7725,
      "step": 17000
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.3675089892129445e-06,
      "loss": 1.7347,
      "step": 17500
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.492359168997204e-06,
      "loss": 1.8111,
      "step": 18000
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.6172093487814625e-06,
      "loss": 1.6936,
      "step": 18500
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.742059528565721e-06,
      "loss": 1.7192,
      "step": 19000
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.8669097083499805e-06,
      "loss": 1.731,
      "step": 19500
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.991510187774671e-06,
      "loss": 1.6963,
      "step": 20000
    },
    {
      "epoch": 0.1,
      "learning_rate": 5.11636036755893e-06,
      "loss": 1.6988,
      "step": 20500
    },
    {
      "epoch": 0.1,
      "learning_rate": 5.241210547343189e-06,
      "loss": 1.6991,
      "step": 21000
    },
    {
      "epoch": 0.11,
      "learning_rate": 5.366060727127447e-06,
      "loss": 1.6945,
      "step": 21500
    },
    {
      "epoch": 0.11,
      "learning_rate": 5.4906612065521375e-06,
      "loss": 1.6944,
      "step": 22000
    },
    {
      "epoch": 0.11,
      "learning_rate": 5.615511386336397e-06,
      "loss": 1.6637,
      "step": 22500
    },
    {
      "epoch": 0.11,
      "learning_rate": 5.7403615661206554e-06,
      "loss": 1.6884,
      "step": 23000
    },
    {
      "epoch": 0.12,
      "learning_rate": 5.865211745904915e-06,
      "loss": 1.6598,
      "step": 23500
    },
    {
      "epoch": 0.12,
      "learning_rate": 5.990061925689173e-06,
      "loss": 1.6603,
      "step": 24000
    },
    {
      "epoch": 0.12,
      "learning_rate": 6.114912105473433e-06,
      "loss": 1.6701,
      "step": 24500
    },
    {
      "epoch": 0.12,
      "learning_rate": 6.2397622852576906e-06,
      "loss": 1.6728,
      "step": 25000
    },
    {
      "epoch": 0.13,
      "learning_rate": 6.36461246504195e-06,
      "loss": 1.658,
      "step": 25500
    },
    {
      "epoch": 0.13,
      "learning_rate": 6.489462644826209e-06,
      "loss": 1.6716,
      "step": 26000
    },
    {
      "epoch": 0.13,
      "learning_rate": 6.613813423891331e-06,
      "loss": 1.6472,
      "step": 26500
    },
    {
      "epoch": 0.13,
      "learning_rate": 6.738663603675589e-06,
      "loss": 1.65,
      "step": 27000
    },
    {
      "epoch": 0.14,
      "learning_rate": 6.863513783459849e-06,
      "loss": 1.599,
      "step": 27500
    },
    {
      "epoch": 0.14,
      "learning_rate": 6.988363963244107e-06,
      "loss": 1.6091,
      "step": 28000
    },
    {
      "epoch": 0.14,
      "learning_rate": 7.113214143028367e-06,
      "loss": 1.674,
      "step": 28500
    },
    {
      "epoch": 0.14,
      "learning_rate": 7.238064322812625e-06,
      "loss": 1.6326,
      "step": 29000
    },
    {
      "epoch": 0.15,
      "learning_rate": 7.3629145025968835e-06,
      "loss": 1.6274,
      "step": 29500
    },
    {
      "epoch": 0.15,
      "learning_rate": 7.487764682381143e-06,
      "loss": 1.6078,
      "step": 30000
    },
    {
      "epoch": 0.15,
      "learning_rate": 7.6126148621654015e-06,
      "loss": 1.5981,
      "step": 30500
    },
    {
      "epoch": 0.15,
      "learning_rate": 7.737465041949662e-06,
      "loss": 1.6662,
      "step": 31000
    },
    {
      "epoch": 0.16,
      "learning_rate": 7.862315221733919e-06,
      "loss": 1.5996,
      "step": 31500
    },
    {
      "epoch": 0.16,
      "learning_rate": 7.987165401518179e-06,
      "loss": 1.6093,
      "step": 32000
    },
    {
      "epoch": 0.16,
      "learning_rate": 8.112015581302437e-06,
      "loss": 1.6045,
      "step": 32500
    },
    {
      "epoch": 0.16,
      "learning_rate": 8.236616060727128e-06,
      "loss": 1.6014,
      "step": 33000
    },
    {
      "epoch": 0.17,
      "learning_rate": 8.361216540151819e-06,
      "loss": 1.5772,
      "step": 33500
    },
    {
      "epoch": 0.17,
      "learning_rate": 8.486066719936077e-06,
      "loss": 1.5974,
      "step": 34000
    },
    {
      "epoch": 0.17,
      "learning_rate": 8.610916899720336e-06,
      "loss": 1.594,
      "step": 34500
    },
    {
      "epoch": 0.17,
      "learning_rate": 8.735767079504594e-06,
      "loss": 1.5636,
      "step": 35000
    },
    {
      "epoch": 0.18,
      "learning_rate": 8.860617259288853e-06,
      "loss": 1.5498,
      "step": 35500
    },
    {
      "epoch": 0.18,
      "learning_rate": 8.985467439073113e-06,
      "loss": 1.6006,
      "step": 36000
    },
    {
      "epoch": 0.18,
      "learning_rate": 9.110317618857372e-06,
      "loss": 1.6172,
      "step": 36500
    },
    {
      "epoch": 0.18,
      "learning_rate": 9.23516779864163e-06,
      "loss": 1.5328,
      "step": 37000
    },
    {
      "epoch": 0.19,
      "learning_rate": 9.360017978425889e-06,
      "loss": 1.58,
      "step": 37500
    },
    {
      "epoch": 0.19,
      "learning_rate": 9.48461845785058e-06,
      "loss": 1.5854,
      "step": 38000
    },
    {
      "epoch": 0.19,
      "learning_rate": 9.60921893727527e-06,
      "loss": 1.5848,
      "step": 38500
    },
    {
      "epoch": 0.19,
      "learning_rate": 9.734069117059529e-06,
      "loss": 1.5577,
      "step": 39000
    },
    {
      "epoch": 0.2,
      "learning_rate": 9.858919296843787e-06,
      "loss": 1.5529,
      "step": 39500
    },
    {
      "epoch": 0.2,
      "learning_rate": 9.983769476628046e-06,
      "loss": 1.5857,
      "step": 40000
    },
    {
      "epoch": 0.2,
      "learning_rate": 1.0108120255693169e-05,
      "loss": 1.5575,
      "step": 40500
    },
    {
      "epoch": 0.2,
      "learning_rate": 1.0232970435477427e-05,
      "loss": 1.563,
      "step": 41000
    },
    {
      "epoch": 0.21,
      "learning_rate": 1.0357820615261686e-05,
      "loss": 1.5404,
      "step": 41500
    },
    {
      "epoch": 0.21,
      "learning_rate": 1.0482670795045946e-05,
      "loss": 1.5715,
      "step": 42000
    },
    {
      "epoch": 0.21,
      "learning_rate": 1.0607520974830205e-05,
      "loss": 1.5955,
      "step": 42500
    },
    {
      "epoch": 0.21,
      "learning_rate": 1.0732371154614463e-05,
      "loss": 1.5371,
      "step": 43000
    },
    {
      "epoch": 0.22,
      "learning_rate": 1.0857221334398722e-05,
      "loss": 1.568,
      "step": 43500
    },
    {
      "epoch": 0.22,
      "learning_rate": 1.0982071514182982e-05,
      "loss": 1.5265,
      "step": 44000
    },
    {
      "epoch": 0.22,
      "learning_rate": 1.1106671993607671e-05,
      "loss": 1.5436,
      "step": 44500
    },
    {
      "epoch": 0.22,
      "learning_rate": 1.123152217339193e-05,
      "loss": 1.5177,
      "step": 45000
    },
    {
      "epoch": 0.23,
      "learning_rate": 1.135637235317619e-05,
      "loss": 1.5491,
      "step": 45500
    },
    {
      "epoch": 0.23,
      "learning_rate": 1.1481222532960448e-05,
      "loss": 1.5434,
      "step": 46000
    },
    {
      "epoch": 0.23,
      "learning_rate": 1.1606072712744707e-05,
      "loss": 1.547,
      "step": 46500
    },
    {
      "epoch": 0.23,
      "learning_rate": 1.1730922892528966e-05,
      "loss": 1.5393,
      "step": 47000
    },
    {
      "epoch": 0.24,
      "learning_rate": 1.1855773072313224e-05,
      "loss": 1.5036,
      "step": 47500
    },
    {
      "epoch": 0.24,
      "learning_rate": 1.1980373551737915e-05,
      "loss": 1.5236,
      "step": 48000
    },
    {
      "epoch": 0.24,
      "learning_rate": 1.2105223731522173e-05,
      "loss": 1.5557,
      "step": 48500
    },
    {
      "epoch": 0.24,
      "learning_rate": 1.2230073911306434e-05,
      "loss": 1.5224,
      "step": 49000
    },
    {
      "epoch": 0.25,
      "learning_rate": 1.2354924091090692e-05,
      "loss": 1.5168,
      "step": 49500
    },
    {
      "epoch": 0.25,
      "learning_rate": 1.2479524570515381e-05,
      "loss": 1.551,
      "step": 50000
    },
    {
      "epoch": 0.25,
      "learning_rate": 1.2604374750299641e-05,
      "loss": 1.5109,
      "step": 50500
    },
    {
      "epoch": 0.25,
      "learning_rate": 1.27292249300839e-05,
      "loss": 1.531,
      "step": 51000
    },
    {
      "epoch": 0.26,
      "learning_rate": 1.2854075109868158e-05,
      "loss": 1.5001,
      "step": 51500
    },
    {
      "epoch": 0.26,
      "learning_rate": 1.2978925289652419e-05,
      "loss": 1.5533,
      "step": 52000
    },
    {
      "epoch": 0.26,
      "learning_rate": 1.3103775469436677e-05,
      "loss": 1.5349,
      "step": 52500
    },
    {
      "epoch": 0.26,
      "learning_rate": 1.3228625649220936e-05,
      "loss": 1.5097,
      "step": 53000
    },
    {
      "epoch": 0.27,
      "learning_rate": 1.3353475829005194e-05,
      "loss": 1.5036,
      "step": 53500
    },
    {
      "epoch": 0.27,
      "learning_rate": 1.3477826608070315e-05,
      "loss": 1.4875,
      "step": 54000
    },
    {
      "epoch": 0.27,
      "learning_rate": 1.3602676787854576e-05,
      "loss": 1.514,
      "step": 54500
    },
    {
      "epoch": 0.27,
      "learning_rate": 1.3727526967638834e-05,
      "loss": 1.5359,
      "step": 55000
    },
    {
      "epoch": 0.28,
      "learning_rate": 1.3852377147423093e-05,
      "loss": 1.5305,
      "step": 55500
    },
    {
      "epoch": 0.28,
      "learning_rate": 1.3977227327207351e-05,
      "loss": 1.5194,
      "step": 56000
    },
    {
      "epoch": 0.28,
      "learning_rate": 1.410207750699161e-05,
      "loss": 1.5095,
      "step": 56500
    },
    {
      "epoch": 0.28,
      "learning_rate": 1.422692768677587e-05,
      "loss": 1.5229,
      "step": 57000
    },
    {
      "epoch": 0.29,
      "learning_rate": 1.435152816620056e-05,
      "loss": 1.4926,
      "step": 57500
    },
    {
      "epoch": 0.29,
      "learning_rate": 1.447637834598482e-05,
      "loss": 1.536,
      "step": 58000
    },
    {
      "epoch": 0.29,
      "learning_rate": 1.4601228525769078e-05,
      "loss": 1.4916,
      "step": 58500
    },
    {
      "epoch": 0.29,
      "learning_rate": 1.4726078705553337e-05,
      "loss": 1.5029,
      "step": 59000
    },
    {
      "epoch": 0.3,
      "learning_rate": 1.4850679184978027e-05,
      "loss": 1.5114,
      "step": 59500
    },
    {
      "epoch": 0.3,
      "learning_rate": 1.4975529364762286e-05,
      "loss": 1.5399,
      "step": 60000
    },
    {
      "epoch": 0.3,
      "learning_rate": 1.5100379544546544e-05,
      "loss": 1.4844,
      "step": 60500
    },
    {
      "epoch": 0.3,
      "learning_rate": 1.5225229724330803e-05,
      "loss": 1.4936,
      "step": 61000
    },
    {
      "epoch": 0.31,
      "learning_rate": 1.5350079904115063e-05,
      "loss": 1.4887,
      "step": 61500
    },
    {
      "epoch": 0.31,
      "learning_rate": 1.5474930083899323e-05,
      "loss": 1.5303,
      "step": 62000
    },
    {
      "epoch": 0.31,
      "learning_rate": 1.559978026368358e-05,
      "loss": 1.4958,
      "step": 62500
    },
    {
      "epoch": 0.31,
      "learning_rate": 1.5724630443467837e-05,
      "loss": 1.5299,
      "step": 63000
    },
    {
      "epoch": 0.32,
      "learning_rate": 1.5849480623252097e-05,
      "loss": 1.5146,
      "step": 63500
    },
    {
      "epoch": 0.32,
      "learning_rate": 1.597408110267679e-05,
      "loss": 1.4705,
      "step": 64000
    },
    {
      "epoch": 0.32,
      "learning_rate": 1.6098931282461047e-05,
      "loss": 1.4664,
      "step": 64500
    },
    {
      "epoch": 0.32,
      "learning_rate": 1.6223781462245307e-05,
      "loss": 1.5142,
      "step": 65000
    },
    {
      "epoch": 0.33,
      "learning_rate": 1.6348381941669996e-05,
      "loss": 1.459,
      "step": 65500
    },
    {
      "epoch": 0.33,
      "learning_rate": 1.6473232121454256e-05,
      "loss": 1.4805,
      "step": 66000
    },
    {
      "epoch": 0.33,
      "learning_rate": 1.6598082301238516e-05,
      "loss": 1.4857,
      "step": 66500
    },
    {
      "epoch": 0.33,
      "learning_rate": 1.6722932481022773e-05,
      "loss": 1.4668,
      "step": 67000
    },
    {
      "epoch": 0.34,
      "learning_rate": 1.6847782660807034e-05,
      "loss": 1.496,
      "step": 67500
    },
    {
      "epoch": 0.34,
      "learning_rate": 1.697263284059129e-05,
      "loss": 1.5133,
      "step": 68000
    },
    {
      "epoch": 0.34,
      "learning_rate": 1.709748302037555e-05,
      "loss": 1.4868,
      "step": 68500
    },
    {
      "epoch": 0.34,
      "learning_rate": 1.722233320015981e-05,
      "loss": 1.4531,
      "step": 69000
    },
    {
      "epoch": 0.35,
      "learning_rate": 1.7347183379944068e-05,
      "loss": 1.4753,
      "step": 69500
    },
    {
      "epoch": 0.35,
      "learning_rate": 1.747178385936876e-05,
      "loss": 1.4623,
      "step": 70000
    },
    {
      "epoch": 0.35,
      "learning_rate": 1.7596634039153017e-05,
      "loss": 1.4668,
      "step": 70500
    },
    {
      "epoch": 0.35,
      "learning_rate": 1.7721484218937277e-05,
      "loss": 1.5053,
      "step": 71000
    },
    {
      "epoch": 0.36,
      "learning_rate": 1.7846334398721534e-05,
      "loss": 1.5218,
      "step": 71500
    },
    {
      "epoch": 0.36,
      "learning_rate": 1.7971184578505794e-05,
      "loss": 1.4769,
      "step": 72000
    },
    {
      "epoch": 0.36,
      "learning_rate": 1.8095785057930483e-05,
      "loss": 1.4967,
      "step": 72500
    },
    {
      "epoch": 0.36,
      "learning_rate": 1.8220635237714744e-05,
      "loss": 1.46,
      "step": 73000
    },
    {
      "epoch": 0.37,
      "learning_rate": 1.8345485417499004e-05,
      "loss": 1.4861,
      "step": 73500
    },
    {
      "epoch": 0.37,
      "learning_rate": 1.847033559728326e-05,
      "loss": 1.5179,
      "step": 74000
    },
    {
      "epoch": 0.37,
      "learning_rate": 1.859493607670795e-05,
      "loss": 1.5239,
      "step": 74500
    },
    {
      "epoch": 0.37,
      "learning_rate": 1.871978625649221e-05,
      "loss": 1.4649,
      "step": 75000
    },
    {
      "epoch": 0.38,
      "learning_rate": 1.884463643627647e-05,
      "loss": 1.4809,
      "step": 75500
    },
    {
      "epoch": 0.38,
      "learning_rate": 1.8969486616060727e-05,
      "loss": 1.4589,
      "step": 76000
    },
    {
      "epoch": 0.38,
      "learning_rate": 1.9094336795844987e-05,
      "loss": 1.4644,
      "step": 76500
    },
    {
      "epoch": 0.38,
      "learning_rate": 1.9219186975629248e-05,
      "loss": 1.4814,
      "step": 77000
    },
    {
      "epoch": 0.39,
      "learning_rate": 1.9344037155413505e-05,
      "loss": 1.4607,
      "step": 77500
    },
    {
      "epoch": 0.39,
      "learning_rate": 1.9468887335197765e-05,
      "loss": 1.4951,
      "step": 78000
    },
    {
      "epoch": 0.39,
      "learning_rate": 1.959373751498202e-05,
      "loss": 1.4434,
      "step": 78500
    },
    {
      "epoch": 0.39,
      "learning_rate": 1.971858769476628e-05,
      "loss": 1.4315,
      "step": 79000
    },
    {
      "epoch": 0.4,
      "learning_rate": 1.9843437874550542e-05,
      "loss": 1.4781,
      "step": 79500
    },
    {
      "epoch": 0.4,
      "learning_rate": 1.99682880543348e-05,
      "loss": 1.477,
      "step": 80000
    },
    {
      "epoch": 0.4,
      "learning_rate": 2.009288853375949e-05,
      "loss": 1.48,
      "step": 80500
    },
    {
      "epoch": 0.4,
      "learning_rate": 2.0217738713543748e-05,
      "loss": 1.4852,
      "step": 81000
    },
    {
      "epoch": 0.41,
      "learning_rate": 2.034258889332801e-05,
      "loss": 1.4732,
      "step": 81500
    },
    {
      "epoch": 0.41,
      "learning_rate": 2.0467439073112265e-05,
      "loss": 1.4968,
      "step": 82000
    },
    {
      "epoch": 0.41,
      "learning_rate": 2.0592039552536958e-05,
      "loss": 1.4653,
      "step": 82500
    },
    {
      "epoch": 0.41,
      "learning_rate": 2.0716640031961647e-05,
      "loss": 1.4591,
      "step": 83000
    },
    {
      "epoch": 0.42,
      "learning_rate": 2.0841490211745907e-05,
      "loss": 1.5043,
      "step": 83500
    },
    {
      "epoch": 0.42,
      "learning_rate": 2.0966340391530164e-05,
      "loss": 1.479,
      "step": 84000
    },
    {
      "epoch": 0.42,
      "learning_rate": 2.1091190571314424e-05,
      "loss": 1.466,
      "step": 84500
    },
    {
      "epoch": 0.42,
      "learning_rate": 2.121604075109868e-05,
      "loss": 1.4813,
      "step": 85000
    },
    {
      "epoch": 0.43,
      "learning_rate": 2.134089093088294e-05,
      "loss": 1.5276,
      "step": 85500
    },
    {
      "epoch": 0.43,
      "learning_rate": 2.14657411106672e-05,
      "loss": 1.4346,
      "step": 86000
    },
    {
      "epoch": 0.43,
      "learning_rate": 2.159059129045146e-05,
      "loss": 1.4844,
      "step": 86500
    },
    {
      "epoch": 0.43,
      "learning_rate": 2.171544147023572e-05,
      "loss": 1.466,
      "step": 87000
    },
    {
      "epoch": 0.44,
      "learning_rate": 2.1840041949660408e-05,
      "loss": 1.4633,
      "step": 87500
    },
    {
      "epoch": 0.44,
      "learning_rate": 2.1964892129444668e-05,
      "loss": 1.483,
      "step": 88000
    },
    {
      "epoch": 0.44,
      "learning_rate": 2.2089742309228925e-05,
      "loss": 1.4759,
      "step": 88500
    },
    {
      "epoch": 0.44,
      "learning_rate": 2.2214592489013185e-05,
      "loss": 1.4894,
      "step": 89000
    },
    {
      "epoch": 0.45,
      "learning_rate": 2.2339192968437874e-05,
      "loss": 1.439,
      "step": 89500
    },
    {
      "epoch": 0.45,
      "learning_rate": 2.2464043148222134e-05,
      "loss": 1.4774,
      "step": 90000
    },
    {
      "epoch": 0.45,
      "learning_rate": 2.2588893328006394e-05,
      "loss": 1.4442,
      "step": 90500
    },
    {
      "epoch": 0.45,
      "learning_rate": 2.271374350779065e-05,
      "loss": 1.456,
      "step": 91000
    },
    {
      "epoch": 0.46,
      "learning_rate": 2.283859368757491e-05,
      "loss": 1.46,
      "step": 91500
    },
    {
      "epoch": 0.46,
      "learning_rate": 2.29631941669996e-05,
      "loss": 1.4778,
      "step": 92000
    },
    {
      "epoch": 0.46,
      "learning_rate": 2.308804434678386e-05,
      "loss": 1.4968,
      "step": 92500
    },
    {
      "epoch": 0.46,
      "learning_rate": 2.3212894526568118e-05,
      "loss": 1.4858,
      "step": 93000
    },
    {
      "epoch": 0.47,
      "learning_rate": 2.333774470635238e-05,
      "loss": 1.4862,
      "step": 93500
    },
    {
      "epoch": 0.47,
      "learning_rate": 2.3462594886136638e-05,
      "loss": 1.4475,
      "step": 94000
    },
    {
      "epoch": 0.47,
      "learning_rate": 2.3587445065920895e-05,
      "loss": 1.5057,
      "step": 94500
    },
    {
      "epoch": 0.47,
      "learning_rate": 2.3712295245705155e-05,
      "loss": 1.457,
      "step": 95000
    },
    {
      "epoch": 0.48,
      "learning_rate": 2.3837145425489412e-05,
      "loss": 1.4663,
      "step": 95500
    },
    {
      "epoch": 0.48,
      "learning_rate": 2.3961745904914105e-05,
      "loss": 1.4571,
      "step": 96000
    },
    {
      "epoch": 0.48,
      "learning_rate": 2.408659608469836e-05,
      "loss": 1.5009,
      "step": 96500
    },
    {
      "epoch": 0.48,
      "learning_rate": 2.421144626448262e-05,
      "loss": 1.5043,
      "step": 97000
    },
    {
      "epoch": 0.49,
      "learning_rate": 2.4336296444266882e-05,
      "loss": 1.4506,
      "step": 97500
    },
    {
      "epoch": 0.49,
      "learning_rate": 2.446089692369157e-05,
      "loss": 1.4424,
      "step": 98000
    },
    {
      "epoch": 0.49,
      "learning_rate": 2.4585497403116263e-05,
      "loss": 1.4328,
      "step": 98500
    },
    {
      "epoch": 0.49,
      "learning_rate": 2.471034758290052e-05,
      "loss": 1.4627,
      "step": 99000
    },
    {
      "epoch": 0.5,
      "learning_rate": 2.4835197762684777e-05,
      "loss": 1.4597,
      "step": 99500
    },
    {
      "epoch": 0.5,
      "learning_rate": 2.496004794246904e-05,
      "loss": 1.4409,
      "step": 100000
    },
    {
      "epoch": 0.5,
      "learning_rate": 2.5084898122253297e-05,
      "loss": 1.4582,
      "step": 100500
    },
    {
      "epoch": 0.5,
      "learning_rate": 2.5209748302037554e-05,
      "loss": 1.4795,
      "step": 101000
    },
    {
      "epoch": 0.51,
      "learning_rate": 2.5334598481821815e-05,
      "loss": 1.4349,
      "step": 101500
    },
    {
      "epoch": 0.51,
      "learning_rate": 2.545944866160607e-05,
      "loss": 1.4816,
      "step": 102000
    },
    {
      "epoch": 0.51,
      "learning_rate": 2.5584298841390332e-05,
      "loss": 1.4699,
      "step": 102500
    },
    {
      "epoch": 0.51,
      "learning_rate": 2.5709149021174595e-05,
      "loss": 1.4489,
      "step": 103000
    },
    {
      "epoch": 0.52,
      "learning_rate": 2.5833999200958852e-05,
      "loss": 1.4521,
      "step": 103500
    },
    {
      "epoch": 0.52,
      "learning_rate": 2.5958599680383538e-05,
      "loss": 1.4874,
      "step": 104000
    },
    {
      "epoch": 0.52,
      "learning_rate": 2.60834498601678e-05,
      "loss": 1.4806,
      "step": 104500
    },
    {
      "epoch": 0.52,
      "learning_rate": 2.6208300039952062e-05,
      "loss": 1.4524,
      "step": 105000
    },
    {
      "epoch": 0.53,
      "learning_rate": 2.633315021973632e-05,
      "loss": 1.4219,
      "step": 105500
    },
    {
      "epoch": 0.53,
      "learning_rate": 2.645775069916101e-05,
      "loss": 1.4903,
      "step": 106000
    },
    {
      "epoch": 0.53,
      "learning_rate": 2.6582600878945268e-05,
      "loss": 1.4491,
      "step": 106500
    },
    {
      "epoch": 0.53,
      "learning_rate": 2.6707451058729528e-05,
      "loss": 1.4556,
      "step": 107000
    },
    {
      "epoch": 0.54,
      "learning_rate": 2.6832301238513785e-05,
      "loss": 1.4657,
      "step": 107500
    },
    {
      "epoch": 0.54,
      "learning_rate": 2.6957151418298042e-05,
      "loss": 1.4684,
      "step": 108000
    },
    {
      "epoch": 0.54,
      "learning_rate": 2.7081751897722734e-05,
      "loss": 1.4843,
      "step": 108500
    },
    {
      "epoch": 0.54,
      "learning_rate": 2.7206602077506994e-05,
      "loss": 1.4569,
      "step": 109000
    },
    {
      "epoch": 0.55,
      "learning_rate": 2.733145225729125e-05,
      "loss": 1.4437,
      "step": 109500
    },
    {
      "epoch": 0.55,
      "learning_rate": 2.7456302437075508e-05,
      "loss": 1.4934,
      "step": 110000
    },
    {
      "epoch": 0.55,
      "learning_rate": 2.75809029165002e-05,
      "loss": 1.4498,
      "step": 110500
    },
    {
      "epoch": 0.55,
      "learning_rate": 2.770575309628446e-05,
      "loss": 1.4642,
      "step": 111000
    },
    {
      "epoch": 0.56,
      "learning_rate": 2.7830603276068718e-05,
      "loss": 1.4794,
      "step": 111500
    },
    {
      "epoch": 0.56,
      "learning_rate": 2.7955453455852975e-05,
      "loss": 1.4609,
      "step": 112000
    },
    {
      "epoch": 0.56,
      "learning_rate": 2.8080303635637235e-05,
      "loss": 1.5038,
      "step": 112500
    },
    {
      "epoch": 0.56,
      "learning_rate": 2.82051538154215e-05,
      "loss": 1.452,
      "step": 113000
    },
    {
      "epoch": 0.57,
      "learning_rate": 2.8329504594486616e-05,
      "loss": 1.4257,
      "step": 113500
    },
    {
      "epoch": 0.57,
      "learning_rate": 2.8454354774270876e-05,
      "loss": 1.483,
      "step": 114000
    },
    {
      "epoch": 0.57,
      "learning_rate": 2.8579204954055133e-05,
      "loss": 1.4829,
      "step": 114500
    },
    {
      "epoch": 0.57,
      "learning_rate": 2.870405513383939e-05,
      "loss": 1.4577,
      "step": 115000
    },
    {
      "epoch": 0.58,
      "learning_rate": 2.8828905313623654e-05,
      "loss": 1.4688,
      "step": 115500
    },
    {
      "epoch": 0.58,
      "learning_rate": 2.8953755493407914e-05,
      "loss": 1.4465,
      "step": 116000
    },
    {
      "epoch": 0.58,
      "learning_rate": 2.907860567319217e-05,
      "loss": 1.4617,
      "step": 116500
    },
    {
      "epoch": 0.58,
      "learning_rate": 2.920345585297643e-05,
      "loss": 1.4797,
      "step": 117000
    },
    {
      "epoch": 0.59,
      "learning_rate": 2.9328306032760688e-05,
      "loss": 1.4689,
      "step": 117500
    },
    {
      "epoch": 0.59,
      "learning_rate": 2.945290651218538e-05,
      "loss": 1.4654,
      "step": 118000
    },
    {
      "epoch": 0.59,
      "learning_rate": 2.9577756691969637e-05,
      "loss": 1.4124,
      "step": 118500
    },
    {
      "epoch": 0.59,
      "learning_rate": 2.9702606871753897e-05,
      "loss": 1.498,
      "step": 119000
    },
    {
      "epoch": 0.6,
      "learning_rate": 2.9827457051538154e-05,
      "loss": 1.4595,
      "step": 119500
    },
    {
      "epoch": 0.6,
      "learning_rate": 2.995230723132241e-05,
      "loss": 1.4187,
      "step": 120000
    },
    {
      "epoch": 0.6,
      "learning_rate": 3.007715741110667e-05,
      "loss": 1.4218,
      "step": 120500
    },
    {
      "epoch": 0.6,
      "learning_rate": 3.0202007590890935e-05,
      "loss": 1.4525,
      "step": 121000
    },
    {
      "epoch": 0.61,
      "learning_rate": 3.032660807031562e-05,
      "loss": 1.4783,
      "step": 121500
    },
    {
      "epoch": 0.61,
      "learning_rate": 3.0451458250099878e-05,
      "loss": 1.5095,
      "step": 122000
    },
    {
      "epoch": 0.61,
      "learning_rate": 3.0576308429884145e-05,
      "loss": 1.4712,
      "step": 122500
    },
    {
      "epoch": 0.61,
      "learning_rate": 3.07011586096684e-05,
      "loss": 1.4217,
      "step": 123000
    },
    {
      "epoch": 0.62,
      "learning_rate": 3.082600878945266e-05,
      "loss": 1.4516,
      "step": 123500
    },
    {
      "epoch": 0.62,
      "learning_rate": 3.095085896923692e-05,
      "loss": 1.5002,
      "step": 124000
    },
    {
      "epoch": 0.62,
      "learning_rate": 3.107570914902118e-05,
      "loss": 1.4642,
      "step": 124500
    },
    {
      "epoch": 0.62,
      "learning_rate": 3.120055932880543e-05,
      "loss": 1.4427,
      "step": 125000
    },
    {
      "epoch": 0.63,
      "learning_rate": 3.132515980823013e-05,
      "loss": 1.4821,
      "step": 125500
    },
    {
      "epoch": 0.63,
      "learning_rate": 3.144976028765482e-05,
      "loss": 1.4516,
      "step": 126000
    },
    {
      "epoch": 0.63,
      "learning_rate": 3.157461046743908e-05,
      "loss": 1.4376,
      "step": 126500
    },
    {
      "epoch": 0.63,
      "learning_rate": 3.169946064722333e-05,
      "loss": 1.4411,
      "step": 127000
    },
    {
      "epoch": 0.64,
      "learning_rate": 3.182431082700759e-05,
      "loss": 1.4608,
      "step": 127500
    },
    {
      "epoch": 0.64,
      "learning_rate": 3.194916100679185e-05,
      "loss": 1.4477,
      "step": 128000
    },
    {
      "epoch": 0.64,
      "learning_rate": 3.207401118657611e-05,
      "loss": 1.4303,
      "step": 128500
    },
    {
      "epoch": 0.64,
      "learning_rate": 3.2198861366360365e-05,
      "loss": 1.4611,
      "step": 129000
    },
    {
      "epoch": 0.65,
      "learning_rate": 3.232371154614463e-05,
      "loss": 1.4877,
      "step": 129500
    },
    {
      "epoch": 0.65,
      "learning_rate": 3.2448561725928886e-05,
      "loss": 1.5076,
      "step": 130000
    },
    {
      "epoch": 0.65,
      "learning_rate": 3.2573162205353575e-05,
      "loss": 1.4485,
      "step": 130500
    },
    {
      "epoch": 0.65,
      "learning_rate": 3.269801238513784e-05,
      "loss": 1.4656,
      "step": 131000
    },
    {
      "epoch": 0.66,
      "learning_rate": 3.2822612864562524e-05,
      "loss": 1.4975,
      "step": 131500
    },
    {
      "epoch": 0.66,
      "learning_rate": 3.294746304434679e-05,
      "loss": 1.4555,
      "step": 132000
    },
    {
      "epoch": 0.66,
      "learning_rate": 3.3072313224131044e-05,
      "loss": 1.434,
      "step": 132500
    },
    {
      "epoch": 0.66,
      "learning_rate": 3.3197163403915305e-05,
      "loss": 1.468,
      "step": 133000
    },
    {
      "epoch": 0.67,
      "learning_rate": 3.3321763883339994e-05,
      "loss": 1.4499,
      "step": 133500
    },
    {
      "epoch": 0.67,
      "learning_rate": 3.3446614063124254e-05,
      "loss": 1.4367,
      "step": 134000
    },
    {
      "epoch": 0.67,
      "learning_rate": 3.3571464242908514e-05,
      "loss": 1.4415,
      "step": 134500
    },
    {
      "epoch": 0.67,
      "learning_rate": 3.369631442269277e-05,
      "loss": 1.5146,
      "step": 135000
    },
    {
      "epoch": 0.68,
      "learning_rate": 3.382116460247703e-05,
      "loss": 1.4601,
      "step": 135500
    },
    {
      "epoch": 0.68,
      "learning_rate": 3.394601478226129e-05,
      "loss": 1.4335,
      "step": 136000
    },
    {
      "epoch": 0.68,
      "learning_rate": 3.407086496204555e-05,
      "loss": 1.4488,
      "step": 136500
    },
    {
      "epoch": 0.68,
      "learning_rate": 3.41957151418298e-05,
      "loss": 1.4612,
      "step": 137000
    },
    {
      "epoch": 0.69,
      "learning_rate": 3.432056532161406e-05,
      "loss": 1.418,
      "step": 137500
    },
    {
      "epoch": 0.69,
      "learning_rate": 3.444541550139833e-05,
      "loss": 1.4817,
      "step": 138000
    },
    {
      "epoch": 0.69,
      "learning_rate": 3.457026568118258e-05,
      "loss": 1.4322,
      "step": 138500
    },
    {
      "epoch": 0.69,
      "learning_rate": 3.469511586096684e-05,
      "loss": 1.4039,
      "step": 139000
    },
    {
      "epoch": 0.7,
      "learning_rate": 3.481971634039153e-05,
      "loss": 1.4704,
      "step": 139500
    },
    {
      "epoch": 0.7,
      "learning_rate": 3.494431681981622e-05,
      "loss": 1.4412,
      "step": 140000
    },
    {
      "epoch": 0.7,
      "learning_rate": 3.506916699960048e-05,
      "loss": 1.4852,
      "step": 140500
    },
    {
      "epoch": 0.7,
      "learning_rate": 3.519401717938474e-05,
      "loss": 1.4531,
      "step": 141000
    },
    {
      "epoch": 0.71,
      "learning_rate": 3.5318867359169e-05,
      "loss": 1.4989,
      "step": 141500
    },
    {
      "epoch": 0.71,
      "learning_rate": 3.544346783859369e-05,
      "loss": 1.4456,
      "step": 142000
    },
    {
      "epoch": 0.71,
      "learning_rate": 3.556831801837795e-05,
      "loss": 1.4773,
      "step": 142500
    },
    {
      "epoch": 0.71,
      "learning_rate": 3.569316819816221e-05,
      "loss": 1.4313,
      "step": 143000
    },
    {
      "epoch": 0.72,
      "learning_rate": 3.5818018377946464e-05,
      "loss": 1.4755,
      "step": 143500
    },
    {
      "epoch": 0.72,
      "learning_rate": 3.5942868557730725e-05,
      "loss": 1.4167,
      "step": 144000
    },
    {
      "epoch": 0.72,
      "learning_rate": 3.6067718737514985e-05,
      "loss": 1.4511,
      "step": 144500
    },
    {
      "epoch": 0.72,
      "learning_rate": 3.619256891729924e-05,
      "loss": 1.4815,
      "step": 145000
    },
    {
      "epoch": 0.73,
      "learning_rate": 3.63174190970835e-05,
      "loss": 1.4422,
      "step": 145500
    },
    {
      "epoch": 0.73,
      "learning_rate": 3.6442269276867766e-05,
      "loss": 1.4707,
      "step": 146000
    },
    {
      "epoch": 0.73,
      "learning_rate": 3.6566620055932883e-05,
      "loss": 1.4659,
      "step": 146500
    },
    {
      "epoch": 0.73,
      "learning_rate": 3.669147023571714e-05,
      "loss": 1.431,
      "step": 147000
    },
    {
      "epoch": 0.74,
      "learning_rate": 3.68163204155014e-05,
      "loss": 1.4571,
      "step": 147500
    },
    {
      "epoch": 0.74,
      "learning_rate": 3.694092089492609e-05,
      "loss": 1.4657,
      "step": 148000
    },
    {
      "epoch": 0.74,
      "learning_rate": 3.7065771074710346e-05,
      "loss": 1.4526,
      "step": 148500
    },
    {
      "epoch": 0.74,
      "learning_rate": 3.719062125449461e-05,
      "loss": 1.451,
      "step": 149000
    },
    {
      "epoch": 0.75,
      "learning_rate": 3.731547143427887e-05,
      "loss": 1.4446,
      "step": 149500
    },
    {
      "epoch": 0.75,
      "learning_rate": 3.744032161406313e-05,
      "loss": 1.4179,
      "step": 150000
    },
    {
      "epoch": 0.75,
      "learning_rate": 3.756517179384739e-05,
      "loss": 1.4378,
      "step": 150500
    },
    {
      "epoch": 0.75,
      "learning_rate": 3.769002197363165e-05,
      "loss": 1.4459,
      "step": 151000
    },
    {
      "epoch": 0.76,
      "learning_rate": 3.78148721534159e-05,
      "loss": 1.4741,
      "step": 151500
    },
    {
      "epoch": 0.76,
      "learning_rate": 3.79394726328406e-05,
      "loss": 1.4415,
      "step": 152000
    },
    {
      "epoch": 0.76,
      "learning_rate": 3.806432281262485e-05,
      "loss": 1.456,
      "step": 152500
    },
    {
      "epoch": 0.76,
      "learning_rate": 3.818917299240911e-05,
      "loss": 1.4832,
      "step": 153000
    },
    {
      "epoch": 0.77,
      "learning_rate": 3.831402317219337e-05,
      "loss": 1.4356,
      "step": 153500
    },
    {
      "epoch": 0.77,
      "learning_rate": 3.843887335197763e-05,
      "loss": 1.4558,
      "step": 154000
    },
    {
      "epoch": 0.77,
      "learning_rate": 3.856347383140232e-05,
      "loss": 1.4987,
      "step": 154500
    },
    {
      "epoch": 0.77,
      "learning_rate": 3.868832401118658e-05,
      "loss": 1.4446,
      "step": 155000
    },
    {
      "epoch": 0.78,
      "learning_rate": 3.8813174190970834e-05,
      "loss": 1.4152,
      "step": 155500
    },
    {
      "epoch": 0.78,
      "learning_rate": 3.8938024370755094e-05,
      "loss": 1.4269,
      "step": 156000
    },
    {
      "epoch": 0.78,
      "learning_rate": 3.9062874550539354e-05,
      "loss": 1.4571,
      "step": 156500
    },
    {
      "epoch": 0.78,
      "learning_rate": 3.9187724730323615e-05,
      "loss": 1.4229,
      "step": 157000
    },
    {
      "epoch": 0.79,
      "learning_rate": 3.9312325209748304e-05,
      "loss": 1.4429,
      "step": 157500
    },
    {
      "epoch": 0.79,
      "learning_rate": 3.943717538953256e-05,
      "loss": 1.4469,
      "step": 158000
    },
    {
      "epoch": 0.79,
      "learning_rate": 3.9562025569316824e-05,
      "loss": 1.4311,
      "step": 158500
    },
    {
      "epoch": 0.79,
      "learning_rate": 3.968662604874151e-05,
      "loss": 1.4501,
      "step": 159000
    },
    {
      "epoch": 0.8,
      "learning_rate": 3.981147622852577e-05,
      "loss": 1.4527,
      "step": 159500
    },
    {
      "epoch": 0.8,
      "learning_rate": 3.9936326408310034e-05,
      "loss": 1.5065,
      "step": 160000
    },
    {
      "epoch": 0.8,
      "learning_rate": 4.0060926887734716e-05,
      "loss": 1.4811,
      "step": 160500
    },
    {
      "epoch": 0.8,
      "learning_rate": 4.018577706751898e-05,
      "loss": 1.4526,
      "step": 161000
    },
    {
      "epoch": 0.81,
      "learning_rate": 4.031062724730324e-05,
      "loss": 1.4387,
      "step": 161500
    },
    {
      "epoch": 0.81,
      "learning_rate": 4.0435477427087497e-05,
      "loss": 1.4457,
      "step": 162000
    },
    {
      "epoch": 0.81,
      "learning_rate": 4.056032760687176e-05,
      "loss": 1.4905,
      "step": 162500
    },
    {
      "epoch": 0.81,
      "learning_rate": 4.068517778665602e-05,
      "loss": 1.4661,
      "step": 163000
    },
    {
      "epoch": 0.82,
      "learning_rate": 4.081002796644027e-05,
      "loss": 1.4479,
      "step": 163500
    },
    {
      "epoch": 0.82,
      "learning_rate": 4.093487814622453e-05,
      "loss": 1.4725,
      "step": 164000
    },
    {
      "epoch": 0.82,
      "learning_rate": 4.105972832600879e-05,
      "loss": 1.4802,
      "step": 164500
    },
    {
      "epoch": 0.82,
      "learning_rate": 4.1184578505793045e-05,
      "loss": 1.433,
      "step": 165000
    },
    {
      "epoch": 0.83,
      "learning_rate": 4.130942868557731e-05,
      "loss": 1.432,
      "step": 165500
    },
    {
      "epoch": 0.83,
      "learning_rate": 4.1434029165002e-05,
      "loss": 1.4591,
      "step": 166000
    },
    {
      "epoch": 0.83,
      "learning_rate": 4.155887934478626e-05,
      "loss": 1.4711,
      "step": 166500
    },
    {
      "epoch": 0.83,
      "learning_rate": 4.168372952457052e-05,
      "loss": 1.4554,
      "step": 167000
    },
    {
      "epoch": 0.84,
      "learning_rate": 4.1808579704354775e-05,
      "loss": 1.4672,
      "step": 167500
    },
    {
      "epoch": 0.84,
      "learning_rate": 4.1933429884139035e-05,
      "loss": 1.4332,
      "step": 168000
    },
    {
      "epoch": 0.84,
      "learning_rate": 4.2058280063923295e-05,
      "loss": 1.4679,
      "step": 168500
    },
    {
      "epoch": 0.84,
      "learning_rate": 4.2183130243707555e-05,
      "loss": 1.4584,
      "step": 169000
    },
    {
      "epoch": 0.85,
      "learning_rate": 4.230798042349181e-05,
      "loss": 1.4208,
      "step": 169500
    },
    {
      "epoch": 0.85,
      "learning_rate": 4.243283060327607e-05,
      "loss": 1.4596,
      "step": 170000
    },
    {
      "epoch": 0.85,
      "learning_rate": 4.255743108270076e-05,
      "loss": 1.4446,
      "step": 170500
    },
    {
      "epoch": 0.85,
      "learning_rate": 4.268228126248502e-05,
      "loss": 1.4636,
      "step": 171000
    },
    {
      "epoch": 0.86,
      "learning_rate": 4.280713144226928e-05,
      "loss": 1.4662,
      "step": 171500
    },
    {
      "epoch": 0.86,
      "learning_rate": 4.293198162205353e-05,
      "loss": 1.4546,
      "step": 172000
    },
    {
      "epoch": 0.86,
      "learning_rate": 4.30568318018378e-05,
      "loss": 1.4565,
      "step": 172500
    },
    {
      "epoch": 0.86,
      "learning_rate": 4.318143228126249e-05,
      "loss": 1.5003,
      "step": 173000
    },
    {
      "epoch": 0.87,
      "learning_rate": 4.330628246104674e-05,
      "loss": 1.4631,
      "step": 173500
    },
    {
      "epoch": 0.87,
      "learning_rate": 4.343113264083101e-05,
      "loss": 1.4398,
      "step": 174000
    },
    {
      "epoch": 0.87,
      "learning_rate": 4.355598282061526e-05,
      "loss": 1.4654,
      "step": 174500
    },
    {
      "epoch": 0.87,
      "learning_rate": 4.368083300039952e-05,
      "loss": 1.4713,
      "step": 175000
    },
    {
      "epoch": 0.88,
      "learning_rate": 4.380543347982422e-05,
      "loss": 1.4766,
      "step": 175500
    },
    {
      "epoch": 0.88,
      "learning_rate": 4.393028365960847e-05,
      "loss": 1.4628,
      "step": 176000
    },
    {
      "epoch": 0.88,
      "learning_rate": 4.405513383939273e-05,
      "loss": 1.4834,
      "step": 176500
    },
    {
      "epoch": 0.88,
      "learning_rate": 4.417998401917699e-05,
      "loss": 1.4594,
      "step": 177000
    },
    {
      "epoch": 0.89,
      "learning_rate": 4.4304834198961246e-05,
      "loss": 1.4703,
      "step": 177500
    },
    {
      "epoch": 0.89,
      "learning_rate": 4.4429684378745506e-05,
      "loss": 1.4395,
      "step": 178000
    },
    {
      "epoch": 0.89,
      "learning_rate": 4.4554284858170195e-05,
      "loss": 1.4602,
      "step": 178500
    },
    {
      "epoch": 0.89,
      "learning_rate": 4.4679135037954455e-05,
      "loss": 1.4557,
      "step": 179000
    },
    {
      "epoch": 0.9,
      "learning_rate": 4.4803985217738715e-05,
      "loss": 1.4673,
      "step": 179500
    },
    {
      "epoch": 0.9,
      "learning_rate": 4.4928835397522976e-05,
      "loss": 1.4467,
      "step": 180000
    },
    {
      "epoch": 0.9,
      "learning_rate": 4.5053435876947664e-05,
      "loss": 1.5001,
      "step": 180500
    },
    {
      "epoch": 0.9,
      "learning_rate": 4.5178286056731925e-05,
      "loss": 1.4868,
      "step": 181000
    },
    {
      "epoch": 0.91,
      "learning_rate": 4.5302886536156614e-05,
      "loss": 1.4649,
      "step": 181500
    },
    {
      "epoch": 0.91,
      "learning_rate": 4.5427736715940874e-05,
      "loss": 1.4427,
      "step": 182000
    },
    {
      "epoch": 0.91,
      "learning_rate": 4.555258689572513e-05,
      "loss": 1.4397,
      "step": 182500
    },
    {
      "epoch": 0.91,
      "learning_rate": 4.567743707550939e-05,
      "loss": 1.4666,
      "step": 183000
    },
    {
      "epoch": 0.92,
      "learning_rate": 4.5802287255293655e-05,
      "loss": 1.4373,
      "step": 183500
    },
    {
      "epoch": 0.92,
      "learning_rate": 4.592713743507791e-05,
      "loss": 1.437,
      "step": 184000
    },
    {
      "epoch": 0.92,
      "learning_rate": 4.605198761486217e-05,
      "loss": 1.4771,
      "step": 184500
    },
    {
      "epoch": 0.92,
      "learning_rate": 4.617683779464643e-05,
      "loss": 1.4554,
      "step": 185000
    },
    {
      "epoch": 0.93,
      "learning_rate": 4.630143827407112e-05,
      "loss": 1.4347,
      "step": 185500
    },
    {
      "epoch": 0.93,
      "learning_rate": 4.642628845385538e-05,
      "loss": 1.4422,
      "step": 186000
    },
    {
      "epoch": 0.93,
      "learning_rate": 4.655113863363964e-05,
      "loss": 1.4437,
      "step": 186500
    },
    {
      "epoch": 0.93,
      "learning_rate": 4.667598881342389e-05,
      "loss": 1.4733,
      "step": 187000
    },
    {
      "epoch": 0.94,
      "learning_rate": 4.680083899320815e-05,
      "loss": 1.4635,
      "step": 187500
    },
    {
      "epoch": 0.94,
      "learning_rate": 4.692543947263284e-05,
      "loss": 1.4325,
      "step": 188000
    },
    {
      "epoch": 0.94,
      "learning_rate": 4.705003995205754e-05,
      "loss": 1.4384,
      "step": 188500
    },
    {
      "epoch": 0.94,
      "learning_rate": 4.717489013184179e-05,
      "loss": 1.4598,
      "step": 189000
    },
    {
      "epoch": 0.95,
      "learning_rate": 4.729974031162605e-05,
      "loss": 1.4687,
      "step": 189500
    },
    {
      "epoch": 0.95,
      "learning_rate": 4.742459049141031e-05,
      "loss": 1.4671,
      "step": 190000
    },
    {
      "epoch": 0.95,
      "learning_rate": 4.7549440671194564e-05,
      "loss": 1.4524,
      "step": 190500
    },
    {
      "epoch": 0.95,
      "learning_rate": 4.767404115061926e-05,
      "loss": 1.4646,
      "step": 191000
    },
    {
      "epoch": 0.96,
      "learning_rate": 4.779889133040352e-05,
      "loss": 1.4562,
      "step": 191500
    },
    {
      "epoch": 0.96,
      "learning_rate": 4.7923741510187774e-05,
      "loss": 1.4408,
      "step": 192000
    },
    {
      "epoch": 0.96,
      "learning_rate": 4.8048591689972034e-05,
      "loss": 1.4606,
      "step": 192500
    },
    {
      "epoch": 0.96,
      "learning_rate": 4.8173441869756294e-05,
      "loss": 1.4644,
      "step": 193000
    },
    {
      "epoch": 0.97,
      "learning_rate": 4.8298292049540554e-05,
      "loss": 1.4459,
      "step": 193500
    },
    {
      "epoch": 0.97,
      "learning_rate": 4.842289252896524e-05,
      "loss": 1.4621,
      "step": 194000
    },
    {
      "epoch": 0.97,
      "learning_rate": 4.8547742708749504e-05,
      "loss": 1.4567,
      "step": 194500
    },
    {
      "epoch": 0.97,
      "learning_rate": 4.8672592888533764e-05,
      "loss": 1.4202,
      "step": 195000
    },
    {
      "epoch": 0.98,
      "learning_rate": 4.8797443068318024e-05,
      "loss": 1.489,
      "step": 195500
    },
    {
      "epoch": 0.98,
      "learning_rate": 4.892229324810228e-05,
      "loss": 1.4573,
      "step": 196000
    },
    {
      "epoch": 0.98,
      "learning_rate": 4.904714342788654e-05,
      "loss": 1.4363,
      "step": 196500
    },
    {
      "epoch": 0.98,
      "learning_rate": 4.91719936076708e-05,
      "loss": 1.4576,
      "step": 197000
    },
    {
      "epoch": 0.99,
      "learning_rate": 4.929684378745505e-05,
      "loss": 1.4733,
      "step": 197500
    },
    {
      "epoch": 0.99,
      "learning_rate": 4.942169396723931e-05,
      "loss": 1.4598,
      "step": 198000
    },
    {
      "epoch": 0.99,
      "learning_rate": 4.954629444666401e-05,
      "loss": 1.4295,
      "step": 198500
    },
    {
      "epoch": 0.99,
      "learning_rate": 4.967114462644826e-05,
      "loss": 1.4793,
      "step": 199000
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.979599480623252e-05,
      "loss": 1.4627,
      "step": 199500
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.992084498601678e-05,
      "loss": 1.4313,
      "step": 200000
    },
    {
      "epoch": 1.0,
      "eval_loss": 1.4563966989517212,
      "eval_runtime": 2516.1945,
      "eval_samples_per_second": 46.299,
      "eval_steps_per_second": 11.575,
      "step": 200240
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.999495050383984e-05,
      "loss": 1.4208,
      "step": 200500
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.998107826164159e-05,
      "loss": 1.3879,
      "step": 201000
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.996720601944334e-05,
      "loss": 1.3907,
      "step": 201500
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.995333377724509e-05,
      "loss": 1.3531,
      "step": 202000
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.993946153504683e-05,
      "loss": 1.3505,
      "step": 202500
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.992561703733298e-05,
      "loss": 1.371,
      "step": 203000
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.991174479513473e-05,
      "loss": 1.3653,
      "step": 203500
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.989787255293648e-05,
      "loss": 1.3361,
      "step": 204000
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.988400031073823e-05,
      "loss": 1.3285,
      "step": 204500
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.9870183557508774e-05,
      "loss": 1.375,
      "step": 205000
    },
    {
      "epoch": 1.03,
      "learning_rate": 4.985631131531052e-05,
      "loss": 1.3511,
      "step": 205500
    },
    {
      "epoch": 1.03,
      "learning_rate": 4.984243907311227e-05,
      "loss": 1.3561,
      "step": 206000
    },
    {
      "epoch": 1.03,
      "learning_rate": 4.982856683091401e-05,
      "loss": 1.3748,
      "step": 206500
    },
    {
      "epoch": 1.03,
      "learning_rate": 4.981469458871577e-05,
      "loss": 1.3754,
      "step": 207000
    },
    {
      "epoch": 1.04,
      "learning_rate": 4.980082234651752e-05,
      "loss": 1.341,
      "step": 207500
    },
    {
      "epoch": 1.04,
      "learning_rate": 4.978697784880366e-05,
      "loss": 1.3558,
      "step": 208000
    },
    {
      "epoch": 1.04,
      "learning_rate": 4.9773133351089807e-05,
      "loss": 1.3702,
      "step": 208500
    },
    {
      "epoch": 1.04,
      "learning_rate": 4.975926110889155e-05,
      "loss": 1.3548,
      "step": 209000
    },
    {
      "epoch": 1.05,
      "learning_rate": 4.974538886669331e-05,
      "loss": 1.3747,
      "step": 209500
    },
    {
      "epoch": 1.05,
      "learning_rate": 4.973151662449505e-05,
      "loss": 1.3648,
      "step": 210000
    },
    {
      "epoch": 1.05,
      "learning_rate": 4.97176443822968e-05,
      "loss": 1.3758,
      "step": 210500
    },
    {
      "epoch": 1.05,
      "learning_rate": 4.970377214009855e-05,
      "loss": 1.3715,
      "step": 211000
    },
    {
      "epoch": 1.06,
      "learning_rate": 4.96898998979003e-05,
      "loss": 1.3755,
      "step": 211500
    },
    {
      "epoch": 1.06,
      "learning_rate": 4.967602765570205e-05,
      "loss": 1.3851,
      "step": 212000
    },
    {
      "epoch": 1.06,
      "learning_rate": 4.966218315798819e-05,
      "loss": 1.3845,
      "step": 212500
    },
    {
      "epoch": 1.06,
      "learning_rate": 4.964831091578994e-05,
      "loss": 1.3633,
      "step": 213000
    },
    {
      "epoch": 1.07,
      "learning_rate": 4.9634438673591694e-05,
      "loss": 1.3522,
      "step": 213500
    },
    {
      "epoch": 1.07,
      "learning_rate": 4.962056643139344e-05,
      "loss": 1.3855,
      "step": 214000
    },
    {
      "epoch": 1.07,
      "learning_rate": 4.960669418919519e-05,
      "loss": 1.3785,
      "step": 214500
    },
    {
      "epoch": 1.07,
      "learning_rate": 4.959282194699694e-05,
      "loss": 1.3861,
      "step": 215000
    },
    {
      "epoch": 1.08,
      "learning_rate": 4.957894970479869e-05,
      "loss": 1.3716,
      "step": 215500
    },
    {
      "epoch": 1.08,
      "learning_rate": 4.956507746260044e-05,
      "loss": 1.3827,
      "step": 216000
    },
    {
      "epoch": 1.08,
      "learning_rate": 4.9551232964886586e-05,
      "loss": 1.3651,
      "step": 216500
    },
    {
      "epoch": 1.08,
      "learning_rate": 4.953736072268833e-05,
      "loss": 1.3829,
      "step": 217000
    },
    {
      "epoch": 1.09,
      "learning_rate": 4.952348848049008e-05,
      "loss": 1.3753,
      "step": 217500
    },
    {
      "epoch": 1.09,
      "learning_rate": 4.950961623829183e-05,
      "loss": 1.3416,
      "step": 218000
    },
    {
      "epoch": 1.09,
      "learning_rate": 4.949577174057797e-05,
      "loss": 1.3547,
      "step": 218500
    },
    {
      "epoch": 1.09,
      "learning_rate": 4.948189949837973e-05,
      "loss": 1.3754,
      "step": 219000
    },
    {
      "epoch": 1.1,
      "learning_rate": 4.946802725618148e-05,
      "loss": 1.3919,
      "step": 219500
    },
    {
      "epoch": 1.1,
      "learning_rate": 4.945415501398322e-05,
      "loss": 1.3312,
      "step": 220000
    },
    {
      "epoch": 1.1,
      "learning_rate": 4.944031051626937e-05,
      "loss": 1.3704,
      "step": 220500
    },
    {
      "epoch": 1.1,
      "learning_rate": 4.942643827407112e-05,
      "loss": 1.3703,
      "step": 221000
    },
    {
      "epoch": 1.11,
      "learning_rate": 4.941256603187286e-05,
      "loss": 1.3831,
      "step": 221500
    },
    {
      "epoch": 1.11,
      "learning_rate": 4.939869378967461e-05,
      "loss": 1.3408,
      "step": 222000
    },
    {
      "epoch": 1.11,
      "learning_rate": 4.9384821547476363e-05,
      "loss": 1.3882,
      "step": 222500
    },
    {
      "epoch": 1.11,
      "learning_rate": 4.9370949305278114e-05,
      "loss": 1.3806,
      "step": 223000
    },
    {
      "epoch": 1.12,
      "learning_rate": 4.935710480756426e-05,
      "loss": 1.353,
      "step": 223500
    },
    {
      "epoch": 1.12,
      "learning_rate": 4.934323256536601e-05,
      "loss": 1.3723,
      "step": 224000
    },
    {
      "epoch": 1.12,
      "learning_rate": 4.9329360323167755e-05,
      "loss": 1.3419,
      "step": 224500
    },
    {
      "epoch": 1.12,
      "learning_rate": 4.9315488080969505e-05,
      "loss": 1.3507,
      "step": 225000
    },
    {
      "epoch": 1.13,
      "learning_rate": 4.9301615838771256e-05,
      "loss": 1.3154,
      "step": 225500
    },
    {
      "epoch": 1.13,
      "learning_rate": 4.9287743596573006e-05,
      "loss": 1.3258,
      "step": 226000
    },
    {
      "epoch": 1.13,
      "learning_rate": 4.927387135437475e-05,
      "loss": 1.324,
      "step": 226500
    },
    {
      "epoch": 1.13,
      "learning_rate": 4.92599991121765e-05,
      "loss": 1.3536,
      "step": 227000
    },
    {
      "epoch": 1.14,
      "learning_rate": 4.924615461446265e-05,
      "loss": 1.318,
      "step": 227500
    },
    {
      "epoch": 1.14,
      "learning_rate": 4.92322823722644e-05,
      "loss": 1.3288,
      "step": 228000
    },
    {
      "epoch": 1.14,
      "learning_rate": 4.921841013006615e-05,
      "loss": 1.3475,
      "step": 228500
    },
    {
      "epoch": 1.14,
      "learning_rate": 4.92045378878679e-05,
      "loss": 1.3361,
      "step": 229000
    },
    {
      "epoch": 1.15,
      "learning_rate": 4.919066564566964e-05,
      "loss": 1.3554,
      "step": 229500
    },
    {
      "epoch": 1.15,
      "learning_rate": 4.917682114795579e-05,
      "loss": 1.3873,
      "step": 230000
    },
    {
      "epoch": 1.15,
      "learning_rate": 4.916294890575754e-05,
      "loss": 1.3753,
      "step": 230500
    },
    {
      "epoch": 1.15,
      "learning_rate": 4.914907666355928e-05,
      "loss": 1.3623,
      "step": 231000
    },
    {
      "epoch": 1.16,
      "learning_rate": 4.913523216584543e-05,
      "loss": 1.3568,
      "step": 231500
    },
    {
      "epoch": 1.16,
      "learning_rate": 4.912135992364718e-05,
      "loss": 1.3624,
      "step": 232000
    },
    {
      "epoch": 1.16,
      "learning_rate": 4.910748768144893e-05,
      "loss": 1.3308,
      "step": 232500
    },
    {
      "epoch": 1.16,
      "learning_rate": 4.909361543925068e-05,
      "loss": 1.3636,
      "step": 233000
    },
    {
      "epoch": 1.17,
      "learning_rate": 4.907974319705243e-05,
      "loss": 1.3502,
      "step": 233500
    },
    {
      "epoch": 1.17,
      "learning_rate": 4.906589869933857e-05,
      "loss": 1.3749,
      "step": 234000
    },
    {
      "epoch": 1.17,
      "learning_rate": 4.905202645714032e-05,
      "loss": 1.3725,
      "step": 234500
    },
    {
      "epoch": 1.17,
      "learning_rate": 4.903815421494207e-05,
      "loss": 1.344,
      "step": 235000
    },
    {
      "epoch": 1.18,
      "learning_rate": 4.9024281972743816e-05,
      "loss": 1.3671,
      "step": 235500
    },
    {
      "epoch": 1.18,
      "learning_rate": 4.901040973054557e-05,
      "loss": 1.4075,
      "step": 236000
    },
    {
      "epoch": 1.18,
      "learning_rate": 4.8996537488347324e-05,
      "loss": 1.366,
      "step": 236500
    },
    {
      "epoch": 1.18,
      "learning_rate": 4.898266524614907e-05,
      "loss": 1.3422,
      "step": 237000
    },
    {
      "epoch": 1.19,
      "learning_rate": 4.896879300395082e-05,
      "loss": 1.361,
      "step": 237500
    },
    {
      "epoch": 1.19,
      "learning_rate": 4.8954948506236965e-05,
      "loss": 1.3096,
      "step": 238000
    },
    {
      "epoch": 1.19,
      "learning_rate": 4.894107626403871e-05,
      "loss": 1.3631,
      "step": 238500
    },
    {
      "epoch": 1.19,
      "learning_rate": 4.8927231766324856e-05,
      "loss": 1.3503,
      "step": 239000
    },
    {
      "epoch": 1.2,
      "learning_rate": 4.8913359524126606e-05,
      "loss": 1.3679,
      "step": 239500
    },
    {
      "epoch": 1.2,
      "learning_rate": 4.889948728192835e-05,
      "loss": 1.3112,
      "step": 240000
    },
    {
      "epoch": 1.2,
      "learning_rate": 4.888561503973011e-05,
      "loss": 1.3265,
      "step": 240500
    },
    {
      "epoch": 1.2,
      "learning_rate": 4.887174279753186e-05,
      "loss": 1.3667,
      "step": 241000
    },
    {
      "epoch": 1.21,
      "learning_rate": 4.88578705553336e-05,
      "loss": 1.3415,
      "step": 241500
    },
    {
      "epoch": 1.21,
      "learning_rate": 4.884399831313535e-05,
      "loss": 1.3625,
      "step": 242000
    },
    {
      "epoch": 1.21,
      "learning_rate": 4.88301260709371e-05,
      "loss": 1.3603,
      "step": 242500
    },
    {
      "epoch": 1.21,
      "learning_rate": 4.881628157322324e-05,
      "loss": 1.3902,
      "step": 243000
    },
    {
      "epoch": 1.22,
      "learning_rate": 4.880240933102499e-05,
      "loss": 1.3413,
      "step": 243500
    },
    {
      "epoch": 1.22,
      "learning_rate": 4.878853708882674e-05,
      "loss": 1.3088,
      "step": 244000
    },
    {
      "epoch": 1.22,
      "learning_rate": 4.877466484662849e-05,
      "loss": 1.3458,
      "step": 244500
    },
    {
      "epoch": 1.22,
      "learning_rate": 4.8760792604430243e-05,
      "loss": 1.3286,
      "step": 245000
    },
    {
      "epoch": 1.23,
      "learning_rate": 4.874692036223199e-05,
      "loss": 1.3683,
      "step": 245500
    },
    {
      "epoch": 1.23,
      "learning_rate": 4.8733048120033744e-05,
      "loss": 1.3349,
      "step": 246000
    },
    {
      "epoch": 1.23,
      "learning_rate": 4.871917587783549e-05,
      "loss": 1.3221,
      "step": 246500
    },
    {
      "epoch": 1.23,
      "learning_rate": 4.8705331380121635e-05,
      "loss": 1.3223,
      "step": 247000
    },
    {
      "epoch": 1.24,
      "learning_rate": 4.8691459137923385e-05,
      "loss": 1.3135,
      "step": 247500
    },
    {
      "epoch": 1.24,
      "learning_rate": 4.867758689572513e-05,
      "loss": 1.3001,
      "step": 248000
    },
    {
      "epoch": 1.24,
      "learning_rate": 4.866371465352688e-05,
      "loss": 1.3433,
      "step": 248500
    },
    {
      "epoch": 1.24,
      "learning_rate": 4.8649870155813026e-05,
      "loss": 1.3482,
      "step": 249000
    },
    {
      "epoch": 1.25,
      "learning_rate": 4.863599791361478e-05,
      "loss": 1.3694,
      "step": 249500
    },
    {
      "epoch": 1.25,
      "learning_rate": 4.862212567141653e-05,
      "loss": 1.3353,
      "step": 250000
    },
    {
      "epoch": 1.25,
      "learning_rate": 4.860825342921828e-05,
      "loss": 1.3506,
      "step": 250500
    },
    {
      "epoch": 1.25,
      "learning_rate": 4.859440893150442e-05,
      "loss": 1.3457,
      "step": 251000
    },
    {
      "epoch": 1.26,
      "learning_rate": 4.858053668930617e-05,
      "loss": 1.3705,
      "step": 251500
    },
    {
      "epoch": 1.26,
      "learning_rate": 4.856666444710792e-05,
      "loss": 1.3015,
      "step": 252000
    },
    {
      "epoch": 1.26,
      "learning_rate": 4.855279220490966e-05,
      "loss": 1.3915,
      "step": 252500
    },
    {
      "epoch": 1.26,
      "learning_rate": 4.853891996271141e-05,
      "loss": 1.3534,
      "step": 253000
    },
    {
      "epoch": 1.27,
      "learning_rate": 4.852504772051317e-05,
      "loss": 1.3353,
      "step": 253500
    },
    {
      "epoch": 1.27,
      "learning_rate": 4.851117547831491e-05,
      "loss": 1.3541,
      "step": 254000
    },
    {
      "epoch": 1.27,
      "learning_rate": 4.8497303236116664e-05,
      "loss": 1.3503,
      "step": 254500
    },
    {
      "epoch": 1.27,
      "learning_rate": 4.84834864828872e-05,
      "loss": 1.3415,
      "step": 255000
    },
    {
      "epoch": 1.28,
      "learning_rate": 4.846961424068895e-05,
      "loss": 1.3313,
      "step": 255500
    },
    {
      "epoch": 1.28,
      "learning_rate": 4.84557419984907e-05,
      "loss": 1.3433,
      "step": 256000
    },
    {
      "epoch": 1.28,
      "learning_rate": 4.844186975629245e-05,
      "loss": 1.3172,
      "step": 256500
    },
    {
      "epoch": 1.28,
      "learning_rate": 4.8427997514094196e-05,
      "loss": 1.3275,
      "step": 257000
    },
    {
      "epoch": 1.29,
      "learning_rate": 4.8414125271895946e-05,
      "loss": 1.3579,
      "step": 257500
    },
    {
      "epoch": 1.29,
      "learning_rate": 4.84002530296977e-05,
      "loss": 1.3232,
      "step": 258000
    },
    {
      "epoch": 1.29,
      "learning_rate": 4.838638078749945e-05,
      "loss": 1.3482,
      "step": 258500
    },
    {
      "epoch": 1.29,
      "learning_rate": 4.8372536289785594e-05,
      "loss": 1.3177,
      "step": 259000
    },
    {
      "epoch": 1.3,
      "learning_rate": 4.8358664047587344e-05,
      "loss": 1.3034,
      "step": 259500
    },
    {
      "epoch": 1.3,
      "learning_rate": 4.834479180538909e-05,
      "loss": 1.3291,
      "step": 260000
    },
    {
      "epoch": 1.3,
      "learning_rate": 4.833091956319084e-05,
      "loss": 1.3378,
      "step": 260500
    },
    {
      "epoch": 1.3,
      "learning_rate": 4.8317075065476985e-05,
      "loss": 1.3314,
      "step": 261000
    },
    {
      "epoch": 1.31,
      "learning_rate": 4.8303202823278736e-05,
      "loss": 1.3243,
      "step": 261500
    },
    {
      "epoch": 1.31,
      "learning_rate": 4.8289330581080486e-05,
      "loss": 1.313,
      "step": 262000
    },
    {
      "epoch": 1.31,
      "learning_rate": 4.8275458338882236e-05,
      "loss": 1.2869,
      "step": 262500
    },
    {
      "epoch": 1.31,
      "learning_rate": 4.826161384116838e-05,
      "loss": 1.3344,
      "step": 263000
    },
    {
      "epoch": 1.32,
      "learning_rate": 4.824774159897013e-05,
      "loss": 1.3188,
      "step": 263500
    },
    {
      "epoch": 1.32,
      "learning_rate": 4.823386935677188e-05,
      "loss": 1.3312,
      "step": 264000
    },
    {
      "epoch": 1.32,
      "learning_rate": 4.821999711457362e-05,
      "loss": 1.3407,
      "step": 264500
    },
    {
      "epoch": 1.32,
      "learning_rate": 4.8206152616859775e-05,
      "loss": 1.3387,
      "step": 265000
    },
    {
      "epoch": 1.33,
      "learning_rate": 4.819228037466152e-05,
      "loss": 1.3095,
      "step": 265500
    },
    {
      "epoch": 1.33,
      "learning_rate": 4.817840813246327e-05,
      "loss": 1.3298,
      "step": 266000
    },
    {
      "epoch": 1.33,
      "learning_rate": 4.816453589026502e-05,
      "loss": 1.3309,
      "step": 266500
    },
    {
      "epoch": 1.33,
      "learning_rate": 4.815069139255116e-05,
      "loss": 1.334,
      "step": 267000
    },
    {
      "epoch": 1.34,
      "learning_rate": 4.813681915035291e-05,
      "loss": 1.3459,
      "step": 267500
    },
    {
      "epoch": 1.34,
      "learning_rate": 4.812294690815466e-05,
      "loss": 1.3318,
      "step": 268000
    },
    {
      "epoch": 1.34,
      "learning_rate": 4.810907466595641e-05,
      "loss": 1.3446,
      "step": 268500
    },
    {
      "epoch": 1.34,
      "learning_rate": 4.809523016824256e-05,
      "loss": 1.3404,
      "step": 269000
    },
    {
      "epoch": 1.35,
      "learning_rate": 4.808135792604431e-05,
      "loss": 1.3227,
      "step": 269500
    },
    {
      "epoch": 1.35,
      "learning_rate": 4.806748568384605e-05,
      "loss": 1.3357,
      "step": 270000
    },
    {
      "epoch": 1.35,
      "learning_rate": 4.80536134416478e-05,
      "loss": 1.3389,
      "step": 270500
    },
    {
      "epoch": 1.35,
      "learning_rate": 4.803974119944955e-05,
      "loss": 1.323,
      "step": 271000
    },
    {
      "epoch": 1.36,
      "learning_rate": 4.80258689572513e-05,
      "loss": 1.3206,
      "step": 271500
    },
    {
      "epoch": 1.36,
      "learning_rate": 4.801199671505305e-05,
      "loss": 1.3287,
      "step": 272000
    },
    {
      "epoch": 1.36,
      "learning_rate": 4.79981244728548e-05,
      "loss": 1.3157,
      "step": 272500
    },
    {
      "epoch": 1.36,
      "learning_rate": 4.7984279975140944e-05,
      "loss": 1.31,
      "step": 273000
    },
    {
      "epoch": 1.37,
      "learning_rate": 4.7970407732942695e-05,
      "loss": 1.3425,
      "step": 273500
    },
    {
      "epoch": 1.37,
      "learning_rate": 4.7956535490744445e-05,
      "loss": 1.3561,
      "step": 274000
    },
    {
      "epoch": 1.37,
      "learning_rate": 4.7942663248546195e-05,
      "loss": 1.345,
      "step": 274500
    },
    {
      "epoch": 1.37,
      "learning_rate": 4.7928818750832336e-05,
      "loss": 1.3272,
      "step": 275000
    },
    {
      "epoch": 1.38,
      "learning_rate": 4.7914946508634086e-05,
      "loss": 1.3361,
      "step": 275500
    },
    {
      "epoch": 1.38,
      "learning_rate": 4.7901074266435837e-05,
      "loss": 1.31,
      "step": 276000
    },
    {
      "epoch": 1.38,
      "learning_rate": 4.788720202423758e-05,
      "loss": 1.3102,
      "step": 276500
    },
    {
      "epoch": 1.38,
      "learning_rate": 4.787335752652373e-05,
      "loss": 1.3493,
      "step": 277000
    },
    {
      "epoch": 1.39,
      "learning_rate": 4.785948528432548e-05,
      "loss": 1.3135,
      "step": 277500
    },
    {
      "epoch": 1.39,
      "learning_rate": 4.784561304212723e-05,
      "loss": 1.297,
      "step": 278000
    },
    {
      "epoch": 1.39,
      "learning_rate": 4.783174079992898e-05,
      "loss": 1.3359,
      "step": 278500
    },
    {
      "epoch": 1.39,
      "learning_rate": 4.781792404669952e-05,
      "loss": 1.3437,
      "step": 279000
    },
    {
      "epoch": 1.4,
      "learning_rate": 4.7804051804501266e-05,
      "loss": 1.3432,
      "step": 279500
    },
    {
      "epoch": 1.4,
      "learning_rate": 4.7790179562303016e-05,
      "loss": 1.2729,
      "step": 280000
    },
    {
      "epoch": 1.4,
      "learning_rate": 4.777630732010476e-05,
      "loss": 1.3253,
      "step": 280500
    },
    {
      "epoch": 1.4,
      "learning_rate": 4.7762462822390914e-05,
      "loss": 1.3552,
      "step": 281000
    },
    {
      "epoch": 1.41,
      "learning_rate": 4.774859058019266e-05,
      "loss": 1.3395,
      "step": 281500
    },
    {
      "epoch": 1.41,
      "learning_rate": 4.773471833799441e-05,
      "loss": 1.273,
      "step": 282000
    },
    {
      "epoch": 1.41,
      "learning_rate": 4.772084609579616e-05,
      "loss": 1.3137,
      "step": 282500
    },
    {
      "epoch": 1.41,
      "learning_rate": 4.770697385359791e-05,
      "loss": 1.3078,
      "step": 283000
    },
    {
      "epoch": 1.42,
      "learning_rate": 4.769310161139965e-05,
      "loss": 1.3247,
      "step": 283500
    },
    {
      "epoch": 1.42,
      "learning_rate": 4.76792293692014e-05,
      "loss": 1.3042,
      "step": 284000
    },
    {
      "epoch": 1.42,
      "learning_rate": 4.766538487148755e-05,
      "loss": 1.2906,
      "step": 284500
    },
    {
      "epoch": 1.42,
      "learning_rate": 4.76515403737737e-05,
      "loss": 1.3243,
      "step": 285000
    },
    {
      "epoch": 1.43,
      "learning_rate": 4.763766813157545e-05,
      "loss": 1.3719,
      "step": 285500
    },
    {
      "epoch": 1.43,
      "learning_rate": 4.762379588937719e-05,
      "loss": 1.3113,
      "step": 286000
    },
    {
      "epoch": 1.43,
      "learning_rate": 4.760992364717894e-05,
      "loss": 1.3103,
      "step": 286500
    },
    {
      "epoch": 1.43,
      "learning_rate": 4.759605140498069e-05,
      "loss": 1.2904,
      "step": 287000
    },
    {
      "epoch": 1.44,
      "learning_rate": 4.758217916278244e-05,
      "loss": 1.3344,
      "step": 287500
    },
    {
      "epoch": 1.44,
      "learning_rate": 4.7568306920584186e-05,
      "loss": 1.3021,
      "step": 288000
    },
    {
      "epoch": 1.44,
      "learning_rate": 4.755443467838594e-05,
      "loss": 1.3723,
      "step": 288500
    },
    {
      "epoch": 1.44,
      "learning_rate": 4.7540562436187686e-05,
      "loss": 1.3532,
      "step": 289000
    },
    {
      "epoch": 1.45,
      "learning_rate": 4.752669019398944e-05,
      "loss": 1.3245,
      "step": 289500
    },
    {
      "epoch": 1.45,
      "learning_rate": 4.7512845696275584e-05,
      "loss": 1.3453,
      "step": 290000
    },
    {
      "epoch": 1.45,
      "learning_rate": 4.7498973454077334e-05,
      "loss": 1.3119,
      "step": 290500
    },
    {
      "epoch": 1.45,
      "learning_rate": 4.748510121187908e-05,
      "loss": 1.3234,
      "step": 291000
    },
    {
      "epoch": 1.46,
      "learning_rate": 4.747122896968083e-05,
      "loss": 1.3102,
      "step": 291500
    },
    {
      "epoch": 1.46,
      "learning_rate": 4.745735672748258e-05,
      "loss": 1.3051,
      "step": 292000
    },
    {
      "epoch": 1.46,
      "learning_rate": 4.744351222976872e-05,
      "loss": 1.3589,
      "step": 292500
    },
    {
      "epoch": 1.46,
      "learning_rate": 4.7429639987570476e-05,
      "loss": 1.3058,
      "step": 293000
    },
    {
      "epoch": 1.47,
      "learning_rate": 4.7415767745372226e-05,
      "loss": 1.3334,
      "step": 293500
    },
    {
      "epoch": 1.47,
      "learning_rate": 4.740189550317397e-05,
      "loss": 1.3294,
      "step": 294000
    },
    {
      "epoch": 1.47,
      "learning_rate": 4.738802326097572e-05,
      "loss": 1.2933,
      "step": 294500
    },
    {
      "epoch": 1.47,
      "learning_rate": 4.737415101877747e-05,
      "loss": 1.3376,
      "step": 295000
    },
    {
      "epoch": 1.48,
      "learning_rate": 4.736030652106361e-05,
      "loss": 1.2835,
      "step": 295500
    },
    {
      "epoch": 1.48,
      "learning_rate": 4.734643427886536e-05,
      "loss": 1.321,
      "step": 296000
    },
    {
      "epoch": 1.48,
      "learning_rate": 4.733256203666711e-05,
      "loss": 1.2582,
      "step": 296500
    },
    {
      "epoch": 1.48,
      "learning_rate": 4.731868979446886e-05,
      "loss": 1.2864,
      "step": 297000
    },
    {
      "epoch": 1.49,
      "learning_rate": 4.7304817552270606e-05,
      "loss": 1.3298,
      "step": 297500
    },
    {
      "epoch": 1.49,
      "learning_rate": 4.729094531007236e-05,
      "loss": 1.3206,
      "step": 298000
    },
    {
      "epoch": 1.49,
      "learning_rate": 4.72771008123585e-05,
      "loss": 1.3118,
      "step": 298500
    },
    {
      "epoch": 1.49,
      "learning_rate": 4.7263228570160254e-05,
      "loss": 1.3081,
      "step": 299000
    },
    {
      "epoch": 1.5,
      "learning_rate": 4.7249356327962004e-05,
      "loss": 1.2876,
      "step": 299500
    },
    {
      "epoch": 1.5,
      "learning_rate": 4.7235484085763754e-05,
      "loss": 1.3226,
      "step": 300000
    },
    {
      "epoch": 1.5,
      "learning_rate": 4.72216118435655e-05,
      "loss": 1.3127,
      "step": 300500
    },
    {
      "epoch": 1.5,
      "learning_rate": 4.7207767345851645e-05,
      "loss": 1.29,
      "step": 301000
    },
    {
      "epoch": 1.51,
      "learning_rate": 4.7193895103653396e-05,
      "loss": 1.3011,
      "step": 301500
    },
    {
      "epoch": 1.51,
      "learning_rate": 4.7180022861455146e-05,
      "loss": 1.3011,
      "step": 302000
    },
    {
      "epoch": 1.51,
      "learning_rate": 4.7166150619256896e-05,
      "loss": 1.3014,
      "step": 302500
    },
    {
      "epoch": 1.51,
      "learning_rate": 4.715227837705865e-05,
      "loss": 1.3127,
      "step": 303000
    },
    {
      "epoch": 1.52,
      "learning_rate": 4.713840613486039e-05,
      "loss": 1.2935,
      "step": 303500
    },
    {
      "epoch": 1.52,
      "learning_rate": 4.712453389266214e-05,
      "loss": 1.3107,
      "step": 304000
    },
    {
      "epoch": 1.52,
      "learning_rate": 4.711066165046389e-05,
      "loss": 1.2991,
      "step": 304500
    },
    {
      "epoch": 1.52,
      "learning_rate": 4.709681715275003e-05,
      "loss": 1.2907,
      "step": 305000
    },
    {
      "epoch": 1.53,
      "learning_rate": 4.7082972655036185e-05,
      "loss": 1.2879,
      "step": 305500
    },
    {
      "epoch": 1.53,
      "learning_rate": 4.706910041283793e-05,
      "loss": 1.3135,
      "step": 306000
    },
    {
      "epoch": 1.53,
      "learning_rate": 4.705522817063968e-05,
      "loss": 1.2922,
      "step": 306500
    },
    {
      "epoch": 1.53,
      "learning_rate": 4.704135592844143e-05,
      "loss": 1.3146,
      "step": 307000
    },
    {
      "epoch": 1.54,
      "learning_rate": 4.702751143072757e-05,
      "loss": 1.2842,
      "step": 307500
    },
    {
      "epoch": 1.54,
      "learning_rate": 4.701363918852932e-05,
      "loss": 1.3103,
      "step": 308000
    },
    {
      "epoch": 1.54,
      "learning_rate": 4.699976694633107e-05,
      "loss": 1.2861,
      "step": 308500
    },
    {
      "epoch": 1.54,
      "learning_rate": 4.698589470413282e-05,
      "loss": 1.3134,
      "step": 309000
    },
    {
      "epoch": 1.55,
      "learning_rate": 4.697205020641897e-05,
      "loss": 1.3005,
      "step": 309500
    },
    {
      "epoch": 1.55,
      "learning_rate": 4.695817796422072e-05,
      "loss": 1.2971,
      "step": 310000
    },
    {
      "epoch": 1.55,
      "learning_rate": 4.694430572202246e-05,
      "loss": 1.2832,
      "step": 310500
    },
    {
      "epoch": 1.55,
      "learning_rate": 4.693043347982421e-05,
      "loss": 1.3099,
      "step": 311000
    },
    {
      "epoch": 1.56,
      "learning_rate": 4.691656123762596e-05,
      "loss": 1.3203,
      "step": 311500
    },
    {
      "epoch": 1.56,
      "learning_rate": 4.6902688995427713e-05,
      "loss": 1.2847,
      "step": 312000
    },
    {
      "epoch": 1.56,
      "learning_rate": 4.688881675322946e-05,
      "loss": 1.2896,
      "step": 312500
    },
    {
      "epoch": 1.56,
      "learning_rate": 4.6874972255515604e-05,
      "loss": 1.2826,
      "step": 313000
    },
    {
      "epoch": 1.57,
      "learning_rate": 4.6861100013317355e-05,
      "loss": 1.298,
      "step": 313500
    },
    {
      "epoch": 1.57,
      "learning_rate": 4.68472277711191e-05,
      "loss": 1.2922,
      "step": 314000
    },
    {
      "epoch": 1.57,
      "learning_rate": 4.6833355528920855e-05,
      "loss": 1.2957,
      "step": 314500
    },
    {
      "epoch": 1.57,
      "learning_rate": 4.6819483286722606e-05,
      "loss": 1.2934,
      "step": 315000
    },
    {
      "epoch": 1.58,
      "learning_rate": 4.6805638789008746e-05,
      "loss": 1.3029,
      "step": 315500
    },
    {
      "epoch": 1.58,
      "learning_rate": 4.6791766546810496e-05,
      "loss": 1.3037,
      "step": 316000
    },
    {
      "epoch": 1.58,
      "learning_rate": 4.677789430461225e-05,
      "loss": 1.2823,
      "step": 316500
    },
    {
      "epoch": 1.58,
      "learning_rate": 4.676402206241399e-05,
      "loss": 1.3058,
      "step": 317000
    },
    {
      "epoch": 1.59,
      "learning_rate": 4.675014982021574e-05,
      "loss": 1.3105,
      "step": 317500
    },
    {
      "epoch": 1.59,
      "learning_rate": 4.673627757801749e-05,
      "loss": 1.248,
      "step": 318000
    },
    {
      "epoch": 1.59,
      "learning_rate": 4.672243308030364e-05,
      "loss": 1.2707,
      "step": 318500
    },
    {
      "epoch": 1.59,
      "learning_rate": 4.670856083810539e-05,
      "loss": 1.3229,
      "step": 319000
    },
    {
      "epoch": 1.6,
      "learning_rate": 4.669468859590714e-05,
      "loss": 1.2902,
      "step": 319500
    },
    {
      "epoch": 1.6,
      "learning_rate": 4.668081635370888e-05,
      "loss": 1.2678,
      "step": 320000
    },
    {
      "epoch": 1.6,
      "learning_rate": 4.666694411151063e-05,
      "loss": 1.3075,
      "step": 320500
    },
    {
      "epoch": 1.6,
      "learning_rate": 4.665309961379678e-05,
      "loss": 1.2832,
      "step": 321000
    },
    {
      "epoch": 1.61,
      "learning_rate": 4.6639227371598524e-05,
      "loss": 1.2886,
      "step": 321500
    },
    {
      "epoch": 1.61,
      "learning_rate": 4.662535512940028e-05,
      "loss": 1.3049,
      "step": 322000
    },
    {
      "epoch": 1.61,
      "learning_rate": 4.6611482887202024e-05,
      "loss": 1.2485,
      "step": 322500
    },
    {
      "epoch": 1.61,
      "learning_rate": 4.6597610645003775e-05,
      "loss": 1.2815,
      "step": 323000
    },
    {
      "epoch": 1.62,
      "learning_rate": 4.658376614728992e-05,
      "loss": 1.3188,
      "step": 323500
    },
    {
      "epoch": 1.62,
      "learning_rate": 4.656992164957606e-05,
      "loss": 1.3256,
      "step": 324000
    },
    {
      "epoch": 1.62,
      "learning_rate": 4.655604940737782e-05,
      "loss": 1.269,
      "step": 324500
    },
    {
      "epoch": 1.62,
      "learning_rate": 4.654217716517956e-05,
      "loss": 1.3165,
      "step": 325000
    },
    {
      "epoch": 1.63,
      "learning_rate": 4.6528304922981313e-05,
      "loss": 1.2916,
      "step": 325500
    },
    {
      "epoch": 1.63,
      "learning_rate": 4.651443268078306e-05,
      "loss": 1.3085,
      "step": 326000
    },
    {
      "epoch": 1.63,
      "learning_rate": 4.6500560438584814e-05,
      "loss": 1.3153,
      "step": 326500
    },
    {
      "epoch": 1.63,
      "learning_rate": 4.6486688196386565e-05,
      "loss": 1.3079,
      "step": 327000
    },
    {
      "epoch": 1.64,
      "learning_rate": 4.647281595418831e-05,
      "loss": 1.3126,
      "step": 327500
    },
    {
      "epoch": 1.64,
      "learning_rate": 4.645894371199006e-05,
      "loss": 1.3031,
      "step": 328000
    },
    {
      "epoch": 1.64,
      "learning_rate": 4.644507146979181e-05,
      "loss": 1.3136,
      "step": 328500
    },
    {
      "epoch": 1.64,
      "learning_rate": 4.643119922759356e-05,
      "loss": 1.3009,
      "step": 329000
    },
    {
      "epoch": 1.65,
      "learning_rate": 4.64173547298797e-05,
      "loss": 1.2933,
      "step": 329500
    },
    {
      "epoch": 1.65,
      "learning_rate": 4.640348248768145e-05,
      "loss": 1.2836,
      "step": 330000
    },
    {
      "epoch": 1.65,
      "learning_rate": 4.63896102454832e-05,
      "loss": 1.2893,
      "step": 330500
    },
    {
      "epoch": 1.65,
      "learning_rate": 4.6375738003284944e-05,
      "loss": 1.2711,
      "step": 331000
    },
    {
      "epoch": 1.66,
      "learning_rate": 4.63618935055711e-05,
      "loss": 1.2959,
      "step": 331500
    },
    {
      "epoch": 1.66,
      "learning_rate": 4.634802126337284e-05,
      "loss": 1.2881,
      "step": 332000
    },
    {
      "epoch": 1.66,
      "learning_rate": 4.633414902117459e-05,
      "loss": 1.2638,
      "step": 332500
    },
    {
      "epoch": 1.66,
      "learning_rate": 4.632027677897634e-05,
      "loss": 1.2692,
      "step": 333000
    },
    {
      "epoch": 1.67,
      "learning_rate": 4.630640453677809e-05,
      "loss": 1.284,
      "step": 333500
    },
    {
      "epoch": 1.67,
      "learning_rate": 4.6292532294579836e-05,
      "loss": 1.2902,
      "step": 334000
    },
    {
      "epoch": 1.67,
      "learning_rate": 4.6278687796865983e-05,
      "loss": 1.2538,
      "step": 334500
    },
    {
      "epoch": 1.67,
      "learning_rate": 4.6264815554667734e-05,
      "loss": 1.2696,
      "step": 335000
    },
    {
      "epoch": 1.68,
      "learning_rate": 4.6250943312469484e-05,
      "loss": 1.2782,
      "step": 335500
    },
    {
      "epoch": 1.68,
      "learning_rate": 4.6237071070271234e-05,
      "loss": 1.3024,
      "step": 336000
    },
    {
      "epoch": 1.68,
      "learning_rate": 4.6223198828072985e-05,
      "loss": 1.2837,
      "step": 336500
    },
    {
      "epoch": 1.68,
      "learning_rate": 4.620932658587473e-05,
      "loss": 1.2803,
      "step": 337000
    },
    {
      "epoch": 1.69,
      "learning_rate": 4.6195482088160876e-05,
      "loss": 1.2449,
      "step": 337500
    },
    {
      "epoch": 1.69,
      "learning_rate": 4.6181609845962626e-05,
      "loss": 1.2975,
      "step": 338000
    },
    {
      "epoch": 1.69,
      "learning_rate": 4.616773760376437e-05,
      "loss": 1.2743,
      "step": 338500
    },
    {
      "epoch": 1.69,
      "learning_rate": 4.615386536156612e-05,
      "loss": 1.2704,
      "step": 339000
    },
    {
      "epoch": 1.7,
      "learning_rate": 4.613999311936787e-05,
      "loss": 1.2868,
      "step": 339500
    },
    {
      "epoch": 1.7,
      "learning_rate": 4.612614862165402e-05,
      "loss": 1.2899,
      "step": 340000
    },
    {
      "epoch": 1.7,
      "learning_rate": 4.611227637945577e-05,
      "loss": 1.3224,
      "step": 340500
    },
    {
      "epoch": 1.7,
      "learning_rate": 4.609840413725752e-05,
      "loss": 1.2663,
      "step": 341000
    },
    {
      "epoch": 1.71,
      "learning_rate": 4.608453189505926e-05,
      "loss": 1.2835,
      "step": 341500
    },
    {
      "epoch": 1.71,
      "learning_rate": 4.607068739734541e-05,
      "loss": 1.2671,
      "step": 342000
    },
    {
      "epoch": 1.71,
      "learning_rate": 4.605681515514716e-05,
      "loss": 1.2504,
      "step": 342500
    },
    {
      "epoch": 1.71,
      "learning_rate": 4.60429429129489e-05,
      "loss": 1.3007,
      "step": 343000
    },
    {
      "epoch": 1.72,
      "learning_rate": 4.602909841523506e-05,
      "loss": 1.2662,
      "step": 343500
    },
    {
      "epoch": 1.72,
      "learning_rate": 4.60152261730368e-05,
      "loss": 1.2757,
      "step": 344000
    },
    {
      "epoch": 1.72,
      "learning_rate": 4.600135393083855e-05,
      "loss": 1.2698,
      "step": 344500
    },
    {
      "epoch": 1.72,
      "learning_rate": 4.59874816886403e-05,
      "loss": 1.272,
      "step": 345000
    },
    {
      "epoch": 1.73,
      "learning_rate": 4.597360944644205e-05,
      "loss": 1.2693,
      "step": 345500
    },
    {
      "epoch": 1.73,
      "learning_rate": 4.5959737204243795e-05,
      "loss": 1.3116,
      "step": 346000
    },
    {
      "epoch": 1.73,
      "learning_rate": 4.5945864962045546e-05,
      "loss": 1.2794,
      "step": 346500
    },
    {
      "epoch": 1.73,
      "learning_rate": 4.5931992719847296e-05,
      "loss": 1.2252,
      "step": 347000
    },
    {
      "epoch": 1.74,
      "learning_rate": 4.591814822213344e-05,
      "loss": 1.2587,
      "step": 347500
    },
    {
      "epoch": 1.74,
      "learning_rate": 4.5904275979935193e-05,
      "loss": 1.2447,
      "step": 348000
    },
    {
      "epoch": 1.74,
      "learning_rate": 4.5890403737736944e-05,
      "loss": 1.2944,
      "step": 348500
    },
    {
      "epoch": 1.74,
      "learning_rate": 4.587653149553869e-05,
      "loss": 1.2759,
      "step": 349000
    },
    {
      "epoch": 1.75,
      "learning_rate": 4.586265925334044e-05,
      "loss": 1.2658,
      "step": 349500
    },
    {
      "epoch": 1.75,
      "learning_rate": 4.5848814755626585e-05,
      "loss": 1.2826,
      "step": 350000
    },
    {
      "epoch": 1.75,
      "learning_rate": 4.583494251342833e-05,
      "loss": 1.2592,
      "step": 350500
    },
    {
      "epoch": 1.75,
      "learning_rate": 4.582107027123008e-05,
      "loss": 1.2461,
      "step": 351000
    },
    {
      "epoch": 1.76,
      "learning_rate": 4.580719802903183e-05,
      "loss": 1.2691,
      "step": 351500
    },
    {
      "epoch": 1.76,
      "learning_rate": 4.579332578683358e-05,
      "loss": 1.2966,
      "step": 352000
    },
    {
      "epoch": 1.76,
      "learning_rate": 4.577945354463533e-05,
      "loss": 1.2993,
      "step": 352500
    },
    {
      "epoch": 1.76,
      "learning_rate": 4.576560904692148e-05,
      "loss": 1.254,
      "step": 353000
    },
    {
      "epoch": 1.77,
      "learning_rate": 4.575173680472322e-05,
      "loss": 1.2732,
      "step": 353500
    },
    {
      "epoch": 1.77,
      "learning_rate": 4.573786456252497e-05,
      "loss": 1.2816,
      "step": 354000
    },
    {
      "epoch": 1.77,
      "learning_rate": 4.572399232032672e-05,
      "loss": 1.2768,
      "step": 354500
    },
    {
      "epoch": 1.77,
      "learning_rate": 4.571014782261286e-05,
      "loss": 1.279,
      "step": 355000
    },
    {
      "epoch": 1.78,
      "learning_rate": 4.569627558041462e-05,
      "loss": 1.2957,
      "step": 355500
    },
    {
      "epoch": 1.78,
      "learning_rate": 4.568240333821637e-05,
      "loss": 1.2515,
      "step": 356000
    },
    {
      "epoch": 1.78,
      "learning_rate": 4.566853109601811e-05,
      "loss": 1.2891,
      "step": 356500
    },
    {
      "epoch": 1.78,
      "learning_rate": 4.565468659830426e-05,
      "loss": 1.2723,
      "step": 357000
    },
    {
      "epoch": 1.79,
      "learning_rate": 4.564081435610601e-05,
      "loss": 1.2763,
      "step": 357500
    },
    {
      "epoch": 1.79,
      "learning_rate": 4.5626942113907754e-05,
      "loss": 1.2471,
      "step": 358000
    },
    {
      "epoch": 1.79,
      "learning_rate": 4.5613069871709505e-05,
      "loss": 1.2291,
      "step": 358500
    },
    {
      "epoch": 1.79,
      "learning_rate": 4.559922537399565e-05,
      "loss": 1.2655,
      "step": 359000
    },
    {
      "epoch": 1.8,
      "learning_rate": 4.55853531317974e-05,
      "loss": 1.2443,
      "step": 359500
    },
    {
      "epoch": 1.8,
      "learning_rate": 4.557148088959915e-05,
      "loss": 1.2998,
      "step": 360000
    },
    {
      "epoch": 1.8,
      "learning_rate": 4.55576086474009e-05,
      "loss": 1.2553,
      "step": 360500
    },
    {
      "epoch": 1.8,
      "learning_rate": 4.554376414968704e-05,
      "loss": 1.2723,
      "step": 361000
    },
    {
      "epoch": 1.81,
      "learning_rate": 4.5529891907488794e-05,
      "loss": 1.2735,
      "step": 361500
    },
    {
      "epoch": 1.81,
      "learning_rate": 4.5516019665290544e-05,
      "loss": 1.2581,
      "step": 362000
    },
    {
      "epoch": 1.81,
      "learning_rate": 4.550214742309229e-05,
      "loss": 1.2624,
      "step": 362500
    },
    {
      "epoch": 1.81,
      "learning_rate": 4.548827518089404e-05,
      "loss": 1.2685,
      "step": 363000
    },
    {
      "epoch": 1.82,
      "learning_rate": 4.5474430683180185e-05,
      "loss": 1.2706,
      "step": 363500
    },
    {
      "epoch": 1.82,
      "learning_rate": 4.5460558440981935e-05,
      "loss": 1.2788,
      "step": 364000
    },
    {
      "epoch": 1.82,
      "learning_rate": 4.5446686198783686e-05,
      "loss": 1.3057,
      "step": 364500
    },
    {
      "epoch": 1.82,
      "learning_rate": 4.5432813956585436e-05,
      "loss": 1.2847,
      "step": 365000
    },
    {
      "epoch": 1.83,
      "learning_rate": 4.541894171438718e-05,
      "loss": 1.2807,
      "step": 365500
    },
    {
      "epoch": 1.83,
      "learning_rate": 4.540509721667333e-05,
      "loss": 1.2659,
      "step": 366000
    },
    {
      "epoch": 1.83,
      "learning_rate": 4.539122497447508e-05,
      "loss": 1.2732,
      "step": 366500
    },
    {
      "epoch": 1.83,
      "learning_rate": 4.537735273227682e-05,
      "loss": 1.2695,
      "step": 367000
    },
    {
      "epoch": 1.84,
      "learning_rate": 4.536348049007858e-05,
      "loss": 1.2648,
      "step": 367500
    },
    {
      "epoch": 1.84,
      "learning_rate": 4.534963599236472e-05,
      "loss": 1.2207,
      "step": 368000
    },
    {
      "epoch": 1.84,
      "learning_rate": 4.533576375016647e-05,
      "loss": 1.2936,
      "step": 368500
    },
    {
      "epoch": 1.84,
      "learning_rate": 4.532189150796822e-05,
      "loss": 1.2724,
      "step": 369000
    },
    {
      "epoch": 1.85,
      "learning_rate": 4.530801926576997e-05,
      "loss": 1.2814,
      "step": 369500
    },
    {
      "epoch": 1.85,
      "learning_rate": 4.529414702357171e-05,
      "loss": 1.2753,
      "step": 370000
    },
    {
      "epoch": 1.85,
      "learning_rate": 4.528030252585786e-05,
      "loss": 1.252,
      "step": 370500
    },
    {
      "epoch": 1.85,
      "learning_rate": 4.526643028365961e-05,
      "loss": 1.2635,
      "step": 371000
    },
    {
      "epoch": 1.86,
      "learning_rate": 4.5252558041461354e-05,
      "loss": 1.2968,
      "step": 371500
    },
    {
      "epoch": 1.86,
      "learning_rate": 4.523868579926311e-05,
      "loss": 1.2547,
      "step": 372000
    },
    {
      "epoch": 1.86,
      "learning_rate": 4.522484130154925e-05,
      "loss": 1.235,
      "step": 372500
    },
    {
      "epoch": 1.86,
      "learning_rate": 4.5210969059351e-05,
      "loss": 1.255,
      "step": 373000
    },
    {
      "epoch": 1.87,
      "learning_rate": 4.519709681715275e-05,
      "loss": 1.2676,
      "step": 373500
    },
    {
      "epoch": 1.87,
      "learning_rate": 4.51832245749545e-05,
      "loss": 1.2566,
      "step": 374000
    },
    {
      "epoch": 1.87,
      "learning_rate": 4.516938007724065e-05,
      "loss": 1.2292,
      "step": 374500
    },
    {
      "epoch": 1.87,
      "learning_rate": 4.5155507835042394e-05,
      "loss": 1.2656,
      "step": 375000
    },
    {
      "epoch": 1.88,
      "learning_rate": 4.5141635592844144e-05,
      "loss": 1.2738,
      "step": 375500
    },
    {
      "epoch": 1.88,
      "learning_rate": 4.5127763350645894e-05,
      "loss": 1.229,
      "step": 376000
    },
    {
      "epoch": 1.88,
      "learning_rate": 4.5113891108447645e-05,
      "loss": 1.3016,
      "step": 376500
    },
    {
      "epoch": 1.88,
      "learning_rate": 4.5100018866249395e-05,
      "loss": 1.2916,
      "step": 377000
    },
    {
      "epoch": 1.89,
      "learning_rate": 4.508614662405114e-05,
      "loss": 1.2814,
      "step": 377500
    },
    {
      "epoch": 1.89,
      "learning_rate": 4.507227438185289e-05,
      "loss": 1.2738,
      "step": 378000
    },
    {
      "epoch": 1.89,
      "learning_rate": 4.505840213965464e-05,
      "loss": 1.2265,
      "step": 378500
    },
    {
      "epoch": 1.89,
      "learning_rate": 4.504455764194078e-05,
      "loss": 1.2091,
      "step": 379000
    },
    {
      "epoch": 1.9,
      "learning_rate": 4.503068539974254e-05,
      "loss": 1.2825,
      "step": 379500
    },
    {
      "epoch": 1.9,
      "learning_rate": 4.501681315754428e-05,
      "loss": 1.253,
      "step": 380000
    },
    {
      "epoch": 1.9,
      "learning_rate": 4.500294091534603e-05,
      "loss": 1.2273,
      "step": 380500
    },
    {
      "epoch": 1.9,
      "learning_rate": 4.498909641763218e-05,
      "loss": 1.2472,
      "step": 381000
    },
    {
      "epoch": 1.91,
      "learning_rate": 4.497525191991832e-05,
      "loss": 1.285,
      "step": 381500
    },
    {
      "epoch": 1.91,
      "learning_rate": 4.4961379677720076e-05,
      "loss": 1.2579,
      "step": 382000
    },
    {
      "epoch": 1.91,
      "learning_rate": 4.494750743552182e-05,
      "loss": 1.2309,
      "step": 382500
    },
    {
      "epoch": 1.91,
      "learning_rate": 4.493363519332357e-05,
      "loss": 1.2488,
      "step": 383000
    },
    {
      "epoch": 1.92,
      "learning_rate": 4.491976295112531e-05,
      "loss": 1.2319,
      "step": 383500
    },
    {
      "epoch": 1.92,
      "learning_rate": 4.490589070892707e-05,
      "loss": 1.3062,
      "step": 384000
    },
    {
      "epoch": 1.92,
      "learning_rate": 4.489201846672882e-05,
      "loss": 1.2417,
      "step": 384500
    },
    {
      "epoch": 1.92,
      "learning_rate": 4.4878146224530564e-05,
      "loss": 1.2496,
      "step": 385000
    },
    {
      "epoch": 1.93,
      "learning_rate": 4.4864273982332315e-05,
      "loss": 1.249,
      "step": 385500
    },
    {
      "epoch": 1.93,
      "learning_rate": 4.485042948461846e-05,
      "loss": 1.2136,
      "step": 386000
    },
    {
      "epoch": 1.93,
      "learning_rate": 4.4836557242420205e-05,
      "loss": 1.2657,
      "step": 386500
    },
    {
      "epoch": 1.93,
      "learning_rate": 4.4822685000221956e-05,
      "loss": 1.2413,
      "step": 387000
    },
    {
      "epoch": 1.94,
      "learning_rate": 4.4808812758023706e-05,
      "loss": 1.2245,
      "step": 387500
    },
    {
      "epoch": 1.94,
      "learning_rate": 4.4794940515825457e-05,
      "loss": 1.2395,
      "step": 388000
    },
    {
      "epoch": 1.94,
      "learning_rate": 4.4781096018111604e-05,
      "loss": 1.2537,
      "step": 388500
    },
    {
      "epoch": 1.94,
      "learning_rate": 4.4767223775913354e-05,
      "loss": 1.2553,
      "step": 389000
    },
    {
      "epoch": 1.95,
      "learning_rate": 4.47533515337151e-05,
      "loss": 1.2622,
      "step": 389500
    },
    {
      "epoch": 1.95,
      "learning_rate": 4.473947929151685e-05,
      "loss": 1.2503,
      "step": 390000
    },
    {
      "epoch": 1.95,
      "learning_rate": 4.4725634793802995e-05,
      "loss": 1.3117,
      "step": 390500
    },
    {
      "epoch": 1.95,
      "learning_rate": 4.471176255160474e-05,
      "loss": 1.2587,
      "step": 391000
    },
    {
      "epoch": 1.96,
      "learning_rate": 4.4697890309406496e-05,
      "loss": 1.2507,
      "step": 391500
    },
    {
      "epoch": 1.96,
      "learning_rate": 4.468401806720824e-05,
      "loss": 1.2427,
      "step": 392000
    },
    {
      "epoch": 1.96,
      "learning_rate": 4.467014582500999e-05,
      "loss": 1.1994,
      "step": 392500
    },
    {
      "epoch": 1.96,
      "learning_rate": 4.465627358281174e-05,
      "loss": 1.2408,
      "step": 393000
    },
    {
      "epoch": 1.97,
      "learning_rate": 4.464240134061349e-05,
      "loss": 1.237,
      "step": 393500
    },
    {
      "epoch": 1.97,
      "learning_rate": 4.462852909841524e-05,
      "loss": 1.2105,
      "step": 394000
    },
    {
      "epoch": 1.97,
      "learning_rate": 4.4614656856216985e-05,
      "loss": 1.2598,
      "step": 394500
    },
    {
      "epoch": 1.97,
      "learning_rate": 4.460081235850313e-05,
      "loss": 1.2353,
      "step": 395000
    },
    {
      "epoch": 1.98,
      "learning_rate": 4.458696786078927e-05,
      "loss": 1.2353,
      "step": 395500
    },
    {
      "epoch": 1.98,
      "learning_rate": 4.457309561859103e-05,
      "loss": 1.2567,
      "step": 396000
    },
    {
      "epoch": 1.98,
      "learning_rate": 4.455922337639278e-05,
      "loss": 1.2429,
      "step": 396500
    },
    {
      "epoch": 1.98,
      "learning_rate": 4.454535113419452e-05,
      "loss": 1.2592,
      "step": 397000
    },
    {
      "epoch": 1.99,
      "learning_rate": 4.4531478891996274e-05,
      "loss": 1.259,
      "step": 397500
    },
    {
      "epoch": 1.99,
      "learning_rate": 4.4517606649798024e-05,
      "loss": 1.2563,
      "step": 398000
    },
    {
      "epoch": 1.99,
      "learning_rate": 4.4503734407599774e-05,
      "loss": 1.2384,
      "step": 398500
    },
    {
      "epoch": 1.99,
      "learning_rate": 4.448986216540152e-05,
      "loss": 1.2497,
      "step": 399000
    },
    {
      "epoch": 2.0,
      "learning_rate": 4.4476017667687665e-05,
      "loss": 1.2643,
      "step": 399500
    },
    {
      "epoch": 2.0,
      "learning_rate": 4.4462145425489415e-05,
      "loss": 1.2586,
      "step": 400000
    },
    {
      "epoch": 2.0,
      "eval_loss": 1.2657673358917236,
      "eval_runtime": 2292.1899,
      "eval_samples_per_second": 50.824,
      "eval_steps_per_second": 12.706,
      "step": 400480
    },
    {
      "epoch": 2.0,
      "learning_rate": 4.444827318329116e-05,
      "loss": 1.2327,
      "step": 400500
    },
    {
      "epoch": 2.0,
      "learning_rate": 4.4434400941092916e-05,
      "loss": 1.0655,
      "step": 401000
    },
    {
      "epoch": 2.01,
      "learning_rate": 4.4420528698894667e-05,
      "loss": 1.1033,
      "step": 401500
    },
    {
      "epoch": 2.01,
      "learning_rate": 4.440668420118081e-05,
      "loss": 1.0973,
      "step": 402000
    },
    {
      "epoch": 2.01,
      "learning_rate": 4.439281195898256e-05,
      "loss": 1.0936,
      "step": 402500
    },
    {
      "epoch": 2.01,
      "learning_rate": 4.437893971678431e-05,
      "loss": 1.0838,
      "step": 403000
    },
    {
      "epoch": 2.02,
      "learning_rate": 4.436506747458605e-05,
      "loss": 1.0732,
      "step": 403500
    },
    {
      "epoch": 2.02,
      "learning_rate": 4.43512229768722e-05,
      "loss": 1.0999,
      "step": 404000
    },
    {
      "epoch": 2.02,
      "learning_rate": 4.433735073467395e-05,
      "loss": 1.1151,
      "step": 404500
    },
    {
      "epoch": 2.02,
      "learning_rate": 4.43234784924757e-05,
      "loss": 1.0716,
      "step": 405000
    },
    {
      "epoch": 2.03,
      "learning_rate": 4.4309633994761846e-05,
      "loss": 1.1045,
      "step": 405500
    },
    {
      "epoch": 2.03,
      "learning_rate": 4.429576175256359e-05,
      "loss": 1.0497,
      "step": 406000
    },
    {
      "epoch": 2.03,
      "learning_rate": 4.428188951036534e-05,
      "loss": 1.0898,
      "step": 406500
    },
    {
      "epoch": 2.03,
      "learning_rate": 4.426801726816709e-05,
      "loss": 1.0862,
      "step": 407000
    },
    {
      "epoch": 2.04,
      "learning_rate": 4.425414502596884e-05,
      "loss": 1.1012,
      "step": 407500
    },
    {
      "epoch": 2.04,
      "learning_rate": 4.4240272783770585e-05,
      "loss": 1.0961,
      "step": 408000
    },
    {
      "epoch": 2.04,
      "learning_rate": 4.4226400541572335e-05,
      "loss": 1.0975,
      "step": 408500
    },
    {
      "epoch": 2.04,
      "learning_rate": 4.4212528299374085e-05,
      "loss": 1.0917,
      "step": 409000
    },
    {
      "epoch": 2.05,
      "learning_rate": 4.419868380166023e-05,
      "loss": 1.0844,
      "step": 409500
    },
    {
      "epoch": 2.05,
      "learning_rate": 4.418481155946198e-05,
      "loss": 1.0518,
      "step": 410000
    },
    {
      "epoch": 2.05,
      "learning_rate": 4.417093931726373e-05,
      "loss": 1.1364,
      "step": 410500
    },
    {
      "epoch": 2.05,
      "learning_rate": 4.415706707506548e-05,
      "loss": 1.0604,
      "step": 411000
    },
    {
      "epoch": 2.06,
      "learning_rate": 4.414319483286723e-05,
      "loss": 1.0885,
      "step": 411500
    },
    {
      "epoch": 2.06,
      "learning_rate": 4.4129350335153374e-05,
      "loss": 1.1087,
      "step": 412000
    },
    {
      "epoch": 2.06,
      "learning_rate": 4.411547809295512e-05,
      "loss": 1.092,
      "step": 412500
    },
    {
      "epoch": 2.06,
      "learning_rate": 4.4101605850756875e-05,
      "loss": 1.091,
      "step": 413000
    },
    {
      "epoch": 2.07,
      "learning_rate": 4.408773360855862e-05,
      "loss": 1.0414,
      "step": 413500
    },
    {
      "epoch": 2.07,
      "learning_rate": 4.4073889110844766e-05,
      "loss": 1.1122,
      "step": 414000
    },
    {
      "epoch": 2.07,
      "learning_rate": 4.4060016868646516e-05,
      "loss": 1.0946,
      "step": 414500
    },
    {
      "epoch": 2.07,
      "learning_rate": 4.404614462644827e-05,
      "loss": 1.093,
      "step": 415000
    },
    {
      "epoch": 2.08,
      "learning_rate": 4.403227238425001e-05,
      "loss": 1.0725,
      "step": 415500
    },
    {
      "epoch": 2.08,
      "learning_rate": 4.401840014205176e-05,
      "loss": 1.0945,
      "step": 416000
    },
    {
      "epoch": 2.08,
      "learning_rate": 4.400455564433791e-05,
      "loss": 1.104,
      "step": 416500
    },
    {
      "epoch": 2.08,
      "learning_rate": 4.399068340213965e-05,
      "loss": 1.0747,
      "step": 417000
    },
    {
      "epoch": 2.08,
      "learning_rate": 4.397681115994141e-05,
      "loss": 1.0999,
      "step": 417500
    },
    {
      "epoch": 2.09,
      "learning_rate": 4.396293891774316e-05,
      "loss": 1.1117,
      "step": 418000
    },
    {
      "epoch": 2.09,
      "learning_rate": 4.39490944200293e-05,
      "loss": 1.0901,
      "step": 418500
    },
    {
      "epoch": 2.09,
      "learning_rate": 4.393522217783105e-05,
      "loss": 1.0883,
      "step": 419000
    },
    {
      "epoch": 2.09,
      "learning_rate": 4.39213499356328e-05,
      "loss": 1.0798,
      "step": 419500
    },
    {
      "epoch": 2.1,
      "learning_rate": 4.3907477693434544e-05,
      "loss": 1.086,
      "step": 420000
    },
    {
      "epoch": 2.1,
      "learning_rate": 4.3893605451236294e-05,
      "loss": 1.1024,
      "step": 420500
    },
    {
      "epoch": 2.1,
      "learning_rate": 4.387976095352244e-05,
      "loss": 1.0672,
      "step": 421000
    },
    {
      "epoch": 2.1,
      "learning_rate": 4.386588871132419e-05,
      "loss": 1.0902,
      "step": 421500
    },
    {
      "epoch": 2.11,
      "learning_rate": 4.385201646912594e-05,
      "loss": 1.1149,
      "step": 422000
    },
    {
      "epoch": 2.11,
      "learning_rate": 4.383814422692769e-05,
      "loss": 1.0958,
      "step": 422500
    },
    {
      "epoch": 2.11,
      "learning_rate": 4.382429972921383e-05,
      "loss": 1.1035,
      "step": 423000
    },
    {
      "epoch": 2.11,
      "learning_rate": 4.381042748701558e-05,
      "loss": 1.0739,
      "step": 423500
    },
    {
      "epoch": 2.12,
      "learning_rate": 4.379655524481733e-05,
      "loss": 1.0844,
      "step": 424000
    },
    {
      "epoch": 2.12,
      "learning_rate": 4.378268300261908e-05,
      "loss": 1.1205,
      "step": 424500
    },
    {
      "epoch": 2.12,
      "learning_rate": 4.3768810760420834e-05,
      "loss": 1.0836,
      "step": 425000
    },
    {
      "epoch": 2.12,
      "learning_rate": 4.3754966262706975e-05,
      "loss": 1.0796,
      "step": 425500
    },
    {
      "epoch": 2.13,
      "learning_rate": 4.3741094020508725e-05,
      "loss": 1.0996,
      "step": 426000
    },
    {
      "epoch": 2.13,
      "learning_rate": 4.3727221778310475e-05,
      "loss": 1.0967,
      "step": 426500
    },
    {
      "epoch": 2.13,
      "learning_rate": 4.3713349536112226e-05,
      "loss": 1.1074,
      "step": 427000
    },
    {
      "epoch": 2.13,
      "learning_rate": 4.369950503839837e-05,
      "loss": 1.0822,
      "step": 427500
    },
    {
      "epoch": 2.14,
      "learning_rate": 4.3685632796200116e-05,
      "loss": 1.0917,
      "step": 428000
    },
    {
      "epoch": 2.14,
      "learning_rate": 4.367176055400187e-05,
      "loss": 1.0884,
      "step": 428500
    },
    {
      "epoch": 2.14,
      "learning_rate": 4.365788831180361e-05,
      "loss": 1.0985,
      "step": 429000
    },
    {
      "epoch": 2.14,
      "learning_rate": 4.364401606960537e-05,
      "loss": 1.081,
      "step": 429500
    },
    {
      "epoch": 2.15,
      "learning_rate": 4.363017157189151e-05,
      "loss": 1.0867,
      "step": 430000
    },
    {
      "epoch": 2.15,
      "learning_rate": 4.361629932969326e-05,
      "loss": 1.1071,
      "step": 430500
    },
    {
      "epoch": 2.15,
      "learning_rate": 4.360242708749501e-05,
      "loss": 1.0765,
      "step": 431000
    },
    {
      "epoch": 2.15,
      "learning_rate": 4.358855484529676e-05,
      "loss": 1.0979,
      "step": 431500
    },
    {
      "epoch": 2.16,
      "learning_rate": 4.35746826030985e-05,
      "loss": 1.0881,
      "step": 432000
    },
    {
      "epoch": 2.16,
      "learning_rate": 4.356083810538465e-05,
      "loss": 1.0822,
      "step": 432500
    },
    {
      "epoch": 2.16,
      "learning_rate": 4.35469658631864e-05,
      "loss": 1.1083,
      "step": 433000
    },
    {
      "epoch": 2.16,
      "learning_rate": 4.353309362098815e-05,
      "loss": 1.136,
      "step": 433500
    },
    {
      "epoch": 2.17,
      "learning_rate": 4.35192213787899e-05,
      "loss": 1.1163,
      "step": 434000
    },
    {
      "epoch": 2.17,
      "learning_rate": 4.350537688107604e-05,
      "loss": 1.1421,
      "step": 434500
    },
    {
      "epoch": 2.17,
      "learning_rate": 4.349150463887779e-05,
      "loss": 1.1119,
      "step": 435000
    },
    {
      "epoch": 2.17,
      "learning_rate": 4.347763239667954e-05,
      "loss": 1.0897,
      "step": 435500
    },
    {
      "epoch": 2.18,
      "learning_rate": 4.346376015448129e-05,
      "loss": 1.083,
      "step": 436000
    },
    {
      "epoch": 2.18,
      "learning_rate": 4.3449887912283036e-05,
      "loss": 1.0644,
      "step": 436500
    },
    {
      "epoch": 2.18,
      "learning_rate": 4.343604341456918e-05,
      "loss": 1.1132,
      "step": 437000
    },
    {
      "epoch": 2.18,
      "learning_rate": 4.3422171172370933e-05,
      "loss": 1.1225,
      "step": 437500
    },
    {
      "epoch": 2.19,
      "learning_rate": 4.340832667465708e-05,
      "loss": 1.1152,
      "step": 438000
    },
    {
      "epoch": 2.19,
      "learning_rate": 4.339445443245883e-05,
      "loss": 1.0896,
      "step": 438500
    },
    {
      "epoch": 2.19,
      "learning_rate": 4.3380582190260575e-05,
      "loss": 1.091,
      "step": 439000
    },
    {
      "epoch": 2.19,
      "learning_rate": 4.336670994806233e-05,
      "loss": 1.1104,
      "step": 439500
    },
    {
      "epoch": 2.2,
      "learning_rate": 4.3352837705864075e-05,
      "loss": 1.1043,
      "step": 440000
    },
    {
      "epoch": 2.2,
      "learning_rate": 4.3338965463665826e-05,
      "loss": 1.0988,
      "step": 440500
    },
    {
      "epoch": 2.2,
      "learning_rate": 4.332509322146757e-05,
      "loss": 1.0753,
      "step": 441000
    },
    {
      "epoch": 2.2,
      "learning_rate": 4.3311220979269326e-05,
      "loss": 1.1075,
      "step": 441500
    },
    {
      "epoch": 2.21,
      "learning_rate": 4.329737648155547e-05,
      "loss": 1.1,
      "step": 442000
    },
    {
      "epoch": 2.21,
      "learning_rate": 4.3283531983841614e-05,
      "loss": 1.1051,
      "step": 442500
    },
    {
      "epoch": 2.21,
      "learning_rate": 4.3269659741643364e-05,
      "loss": 1.0745,
      "step": 443000
    },
    {
      "epoch": 2.21,
      "learning_rate": 4.325578749944511e-05,
      "loss": 1.1184,
      "step": 443500
    },
    {
      "epoch": 2.22,
      "learning_rate": 4.3241915257246865e-05,
      "loss": 1.1368,
      "step": 444000
    },
    {
      "epoch": 2.22,
      "learning_rate": 4.322804301504861e-05,
      "loss": 1.1034,
      "step": 444500
    },
    {
      "epoch": 2.22,
      "learning_rate": 4.321417077285036e-05,
      "loss": 1.0925,
      "step": 445000
    },
    {
      "epoch": 2.22,
      "learning_rate": 4.32002985306521e-05,
      "loss": 1.1288,
      "step": 445500
    },
    {
      "epoch": 2.23,
      "learning_rate": 4.318642628845386e-05,
      "loss": 1.0803,
      "step": 446000
    },
    {
      "epoch": 2.23,
      "learning_rate": 4.317258179074e-05,
      "loss": 1.1356,
      "step": 446500
    },
    {
      "epoch": 2.23,
      "learning_rate": 4.315870954854175e-05,
      "loss": 1.1141,
      "step": 447000
    },
    {
      "epoch": 2.23,
      "learning_rate": 4.31448373063435e-05,
      "loss": 1.0926,
      "step": 447500
    },
    {
      "epoch": 2.24,
      "learning_rate": 4.313096506414525e-05,
      "loss": 1.12,
      "step": 448000
    },
    {
      "epoch": 2.24,
      "learning_rate": 4.3117092821946995e-05,
      "loss": 1.1076,
      "step": 448500
    },
    {
      "epoch": 2.24,
      "learning_rate": 4.310324832423314e-05,
      "loss": 1.1147,
      "step": 449000
    },
    {
      "epoch": 2.24,
      "learning_rate": 4.308937608203489e-05,
      "loss": 1.098,
      "step": 449500
    },
    {
      "epoch": 2.25,
      "learning_rate": 4.307550383983664e-05,
      "loss": 1.0824,
      "step": 450000
    },
    {
      "epoch": 2.25,
      "learning_rate": 4.306163159763839e-05,
      "loss": 1.1417,
      "step": 450500
    },
    {
      "epoch": 2.25,
      "learning_rate": 4.3047759355440144e-05,
      "loss": 1.1199,
      "step": 451000
    },
    {
      "epoch": 2.25,
      "learning_rate": 4.303394260221068e-05,
      "loss": 1.1545,
      "step": 451500
    },
    {
      "epoch": 2.26,
      "learning_rate": 4.302007036001243e-05,
      "loss": 1.1059,
      "step": 452000
    },
    {
      "epoch": 2.26,
      "learning_rate": 4.300619811781418e-05,
      "loss": 1.0453,
      "step": 452500
    },
    {
      "epoch": 2.26,
      "learning_rate": 4.299232587561593e-05,
      "loss": 1.1085,
      "step": 453000
    },
    {
      "epoch": 2.26,
      "learning_rate": 4.297845363341768e-05,
      "loss": 1.0962,
      "step": 453500
    },
    {
      "epoch": 2.27,
      "learning_rate": 4.2964581391219426e-05,
      "loss": 1.1348,
      "step": 454000
    },
    {
      "epoch": 2.27,
      "learning_rate": 4.2950709149021176e-05,
      "loss": 1.0984,
      "step": 454500
    },
    {
      "epoch": 2.27,
      "learning_rate": 4.2936836906822927e-05,
      "loss": 1.1013,
      "step": 455000
    },
    {
      "epoch": 2.27,
      "learning_rate": 4.292299240910907e-05,
      "loss": 1.1083,
      "step": 455500
    },
    {
      "epoch": 2.28,
      "learning_rate": 4.2909120166910824e-05,
      "loss": 1.1032,
      "step": 456000
    },
    {
      "epoch": 2.28,
      "learning_rate": 4.289524792471257e-05,
      "loss": 1.1065,
      "step": 456500
    },
    {
      "epoch": 2.28,
      "learning_rate": 4.2881403426998715e-05,
      "loss": 1.1183,
      "step": 457000
    },
    {
      "epoch": 2.28,
      "learning_rate": 4.2867531184800465e-05,
      "loss": 1.1236,
      "step": 457500
    },
    {
      "epoch": 2.29,
      "learning_rate": 4.2853658942602216e-05,
      "loss": 1.1211,
      "step": 458000
    },
    {
      "epoch": 2.29,
      "learning_rate": 4.283978670040396e-05,
      "loss": 1.117,
      "step": 458500
    },
    {
      "epoch": 2.29,
      "learning_rate": 4.282591445820571e-05,
      "loss": 1.0777,
      "step": 459000
    },
    {
      "epoch": 2.29,
      "learning_rate": 4.281204221600746e-05,
      "loss": 1.1025,
      "step": 459500
    },
    {
      "epoch": 2.3,
      "learning_rate": 4.279816997380921e-05,
      "loss": 1.1627,
      "step": 460000
    },
    {
      "epoch": 2.3,
      "learning_rate": 4.278432547609536e-05,
      "loss": 1.1341,
      "step": 460500
    },
    {
      "epoch": 2.3,
      "learning_rate": 4.27704532338971e-05,
      "loss": 1.1444,
      "step": 461000
    },
    {
      "epoch": 2.3,
      "learning_rate": 4.275658099169885e-05,
      "loss": 1.1139,
      "step": 461500
    },
    {
      "epoch": 2.31,
      "learning_rate": 4.27427087495006e-05,
      "loss": 1.1051,
      "step": 462000
    },
    {
      "epoch": 2.31,
      "learning_rate": 4.272883650730235e-05,
      "loss": 1.1291,
      "step": 462500
    },
    {
      "epoch": 2.31,
      "learning_rate": 4.27149642651041e-05,
      "loss": 1.0814,
      "step": 463000
    },
    {
      "epoch": 2.31,
      "learning_rate": 4.2701092022905846e-05,
      "loss": 1.1142,
      "step": 463500
    },
    {
      "epoch": 2.32,
      "learning_rate": 4.2687219780707596e-05,
      "loss": 1.1263,
      "step": 464000
    },
    {
      "epoch": 2.32,
      "learning_rate": 4.2673375282993744e-05,
      "loss": 1.13,
      "step": 464500
    },
    {
      "epoch": 2.32,
      "learning_rate": 4.265950304079549e-05,
      "loss": 1.1257,
      "step": 465000
    },
    {
      "epoch": 2.32,
      "learning_rate": 4.2645630798597244e-05,
      "loss": 1.0925,
      "step": 465500
    },
    {
      "epoch": 2.33,
      "learning_rate": 4.263175855639899e-05,
      "loss": 1.1491,
      "step": 466000
    },
    {
      "epoch": 2.33,
      "learning_rate": 4.261788631420074e-05,
      "loss": 1.1021,
      "step": 466500
    },
    {
      "epoch": 2.33,
      "learning_rate": 4.2604041816486885e-05,
      "loss": 1.1233,
      "step": 467000
    },
    {
      "epoch": 2.33,
      "learning_rate": 4.2590169574288636e-05,
      "loss": 1.1222,
      "step": 467500
    },
    {
      "epoch": 2.34,
      "learning_rate": 4.257629733209038e-05,
      "loss": 1.061,
      "step": 468000
    },
    {
      "epoch": 2.34,
      "learning_rate": 4.256242508989213e-05,
      "loss": 1.089,
      "step": 468500
    },
    {
      "epoch": 2.34,
      "learning_rate": 4.254858059217828e-05,
      "loss": 1.1274,
      "step": 469000
    },
    {
      "epoch": 2.34,
      "learning_rate": 4.253470834998002e-05,
      "loss": 1.1332,
      "step": 469500
    },
    {
      "epoch": 2.35,
      "learning_rate": 4.252083610778178e-05,
      "loss": 1.0956,
      "step": 470000
    },
    {
      "epoch": 2.35,
      "learning_rate": 4.250696386558353e-05,
      "loss": 1.0845,
      "step": 470500
    },
    {
      "epoch": 2.35,
      "learning_rate": 4.249311936786967e-05,
      "loss": 1.0919,
      "step": 471000
    },
    {
      "epoch": 2.35,
      "learning_rate": 4.247924712567142e-05,
      "loss": 1.1001,
      "step": 471500
    },
    {
      "epoch": 2.36,
      "learning_rate": 4.246537488347317e-05,
      "loss": 1.1168,
      "step": 472000
    },
    {
      "epoch": 2.36,
      "learning_rate": 4.245150264127491e-05,
      "loss": 1.071,
      "step": 472500
    },
    {
      "epoch": 2.36,
      "learning_rate": 4.243765814356106e-05,
      "loss": 1.0977,
      "step": 473000
    },
    {
      "epoch": 2.36,
      "learning_rate": 4.242378590136281e-05,
      "loss": 1.0885,
      "step": 473500
    },
    {
      "epoch": 2.37,
      "learning_rate": 4.240991365916456e-05,
      "loss": 1.0984,
      "step": 474000
    },
    {
      "epoch": 2.37,
      "learning_rate": 4.239604141696631e-05,
      "loss": 1.1272,
      "step": 474500
    },
    {
      "epoch": 2.37,
      "learning_rate": 4.238219691925245e-05,
      "loss": 1.0955,
      "step": 475000
    },
    {
      "epoch": 2.37,
      "learning_rate": 4.23683524215386e-05,
      "loss": 1.1229,
      "step": 475500
    },
    {
      "epoch": 2.38,
      "learning_rate": 4.235448017934035e-05,
      "loss": 1.119,
      "step": 476000
    },
    {
      "epoch": 2.38,
      "learning_rate": 4.23406079371421e-05,
      "loss": 1.1319,
      "step": 476500
    },
    {
      "epoch": 2.38,
      "learning_rate": 4.232673569494385e-05,
      "loss": 1.1302,
      "step": 477000
    },
    {
      "epoch": 2.38,
      "learning_rate": 4.231286345274559e-05,
      "loss": 1.1348,
      "step": 477500
    },
    {
      "epoch": 2.39,
      "learning_rate": 4.2298991210547344e-05,
      "loss": 1.1264,
      "step": 478000
    },
    {
      "epoch": 2.39,
      "learning_rate": 4.2285118968349094e-05,
      "loss": 1.135,
      "step": 478500
    },
    {
      "epoch": 2.39,
      "learning_rate": 4.2271246726150844e-05,
      "loss": 1.1015,
      "step": 479000
    },
    {
      "epoch": 2.39,
      "learning_rate": 4.2257402228436985e-05,
      "loss": 1.1008,
      "step": 479500
    },
    {
      "epoch": 2.4,
      "learning_rate": 4.224352998623874e-05,
      "loss": 1.099,
      "step": 480000
    },
    {
      "epoch": 2.4,
      "learning_rate": 4.2229657744040486e-05,
      "loss": 1.1121,
      "step": 480500
    },
    {
      "epoch": 2.4,
      "learning_rate": 4.2215785501842236e-05,
      "loss": 1.1028,
      "step": 481000
    },
    {
      "epoch": 2.4,
      "learning_rate": 4.220194100412838e-05,
      "loss": 1.1097,
      "step": 481500
    },
    {
      "epoch": 2.41,
      "learning_rate": 4.2188068761930133e-05,
      "loss": 1.1354,
      "step": 482000
    },
    {
      "epoch": 2.41,
      "learning_rate": 4.217422426421628e-05,
      "loss": 1.0963,
      "step": 482500
    },
    {
      "epoch": 2.41,
      "learning_rate": 4.2160352022018024e-05,
      "loss": 1.0982,
      "step": 483000
    },
    {
      "epoch": 2.41,
      "learning_rate": 4.2146479779819775e-05,
      "loss": 1.1094,
      "step": 483500
    },
    {
      "epoch": 2.42,
      "learning_rate": 4.213260753762152e-05,
      "loss": 1.1051,
      "step": 484000
    },
    {
      "epoch": 2.42,
      "learning_rate": 4.2118735295423275e-05,
      "loss": 1.0978,
      "step": 484500
    },
    {
      "epoch": 2.42,
      "learning_rate": 4.210486305322502e-05,
      "loss": 1.0913,
      "step": 485000
    },
    {
      "epoch": 2.42,
      "learning_rate": 4.209099081102677e-05,
      "loss": 1.1152,
      "step": 485500
    },
    {
      "epoch": 2.43,
      "learning_rate": 4.2077146313312916e-05,
      "loss": 1.1406,
      "step": 486000
    },
    {
      "epoch": 2.43,
      "learning_rate": 4.206327407111467e-05,
      "loss": 1.1645,
      "step": 486500
    },
    {
      "epoch": 2.43,
      "learning_rate": 4.204940182891641e-05,
      "loss": 1.0845,
      "step": 487000
    },
    {
      "epoch": 2.43,
      "learning_rate": 4.203552958671817e-05,
      "loss": 1.0932,
      "step": 487500
    },
    {
      "epoch": 2.44,
      "learning_rate": 4.202165734451991e-05,
      "loss": 1.113,
      "step": 488000
    },
    {
      "epoch": 2.44,
      "learning_rate": 4.200778510232166e-05,
      "loss": 1.1081,
      "step": 488500
    },
    {
      "epoch": 2.44,
      "learning_rate": 4.1993912860123405e-05,
      "loss": 1.1152,
      "step": 489000
    },
    {
      "epoch": 2.44,
      "learning_rate": 4.198004061792516e-05,
      "loss": 1.1242,
      "step": 489500
    },
    {
      "epoch": 2.45,
      "learning_rate": 4.19661961202113e-05,
      "loss": 1.1411,
      "step": 490000
    },
    {
      "epoch": 2.45,
      "learning_rate": 4.195235162249745e-05,
      "loss": 1.0706,
      "step": 490500
    },
    {
      "epoch": 2.45,
      "learning_rate": 4.19384793802992e-05,
      "loss": 1.124,
      "step": 491000
    },
    {
      "epoch": 2.45,
      "learning_rate": 4.1924607138100944e-05,
      "loss": 1.1307,
      "step": 491500
    },
    {
      "epoch": 2.46,
      "learning_rate": 4.19107348959027e-05,
      "loss": 1.0815,
      "step": 492000
    },
    {
      "epoch": 2.46,
      "learning_rate": 4.1896862653704445e-05,
      "loss": 1.0534,
      "step": 492500
    },
    {
      "epoch": 2.46,
      "learning_rate": 4.1882990411506195e-05,
      "loss": 1.1443,
      "step": 493000
    },
    {
      "epoch": 2.46,
      "learning_rate": 4.186911816930794e-05,
      "loss": 1.0887,
      "step": 493500
    },
    {
      "epoch": 2.47,
      "learning_rate": 4.1855245927109696e-05,
      "loss": 1.1223,
      "step": 494000
    },
    {
      "epoch": 2.47,
      "learning_rate": 4.1841401429395836e-05,
      "loss": 1.1186,
      "step": 494500
    },
    {
      "epoch": 2.47,
      "learning_rate": 4.1827529187197586e-05,
      "loss": 1.1148,
      "step": 495000
    },
    {
      "epoch": 2.47,
      "learning_rate": 4.181365694499934e-05,
      "loss": 1.145,
      "step": 495500
    },
    {
      "epoch": 2.48,
      "learning_rate": 4.179978470280109e-05,
      "loss": 1.1121,
      "step": 496000
    },
    {
      "epoch": 2.48,
      "learning_rate": 4.178591246060283e-05,
      "loss": 1.1051,
      "step": 496500
    },
    {
      "epoch": 2.48,
      "learning_rate": 4.177204021840459e-05,
      "loss": 1.1094,
      "step": 497000
    },
    {
      "epoch": 2.48,
      "learning_rate": 4.175819572069073e-05,
      "loss": 1.0992,
      "step": 497500
    },
    {
      "epoch": 2.49,
      "learning_rate": 4.174432347849248e-05,
      "loss": 1.0994,
      "step": 498000
    },
    {
      "epoch": 2.49,
      "learning_rate": 4.173045123629423e-05,
      "loss": 1.095,
      "step": 498500
    },
    {
      "epoch": 2.49,
      "learning_rate": 4.171657899409598e-05,
      "loss": 1.1039,
      "step": 499000
    },
    {
      "epoch": 2.49,
      "learning_rate": 4.170270675189772e-05,
      "loss": 1.1136,
      "step": 499500
    },
    {
      "epoch": 2.5,
      "learning_rate": 4.168883450969947e-05,
      "loss": 1.0978,
      "step": 500000
    },
    {
      "epoch": 2.5,
      "learning_rate": 4.1674962267501224e-05,
      "loss": 1.1082,
      "step": 500500
    },
    {
      "epoch": 2.5,
      "learning_rate": 4.1661090025302974e-05,
      "loss": 1.0924,
      "step": 501000
    },
    {
      "epoch": 2.5,
      "learning_rate": 4.164721778310472e-05,
      "loss": 1.0882,
      "step": 501500
    },
    {
      "epoch": 2.51,
      "learning_rate": 4.1633373285390865e-05,
      "loss": 1.1354,
      "step": 502000
    },
    {
      "epoch": 2.51,
      "learning_rate": 4.1619501043192615e-05,
      "loss": 1.1038,
      "step": 502500
    },
    {
      "epoch": 2.51,
      "learning_rate": 4.160565654547876e-05,
      "loss": 1.0737,
      "step": 503000
    },
    {
      "epoch": 2.51,
      "learning_rate": 4.159178430328051e-05,
      "loss": 1.1022,
      "step": 503500
    },
    {
      "epoch": 2.52,
      "learning_rate": 4.1577912061082256e-05,
      "loss": 1.0954,
      "step": 504000
    },
    {
      "epoch": 2.52,
      "learning_rate": 4.156403981888401e-05,
      "loss": 1.1156,
      "step": 504500
    },
    {
      "epoch": 2.52,
      "learning_rate": 4.155016757668576e-05,
      "loss": 1.1346,
      "step": 505000
    },
    {
      "epoch": 2.52,
      "learning_rate": 4.15363230789719e-05,
      "loss": 1.1189,
      "step": 505500
    },
    {
      "epoch": 2.53,
      "learning_rate": 4.1522450836773655e-05,
      "loss": 1.0778,
      "step": 506000
    },
    {
      "epoch": 2.53,
      "learning_rate": 4.15085785945754e-05,
      "loss": 1.1144,
      "step": 506500
    },
    {
      "epoch": 2.53,
      "learning_rate": 4.149470635237715e-05,
      "loss": 1.0853,
      "step": 507000
    },
    {
      "epoch": 2.53,
      "learning_rate": 4.14808341101789e-05,
      "loss": 1.125,
      "step": 507500
    },
    {
      "epoch": 2.54,
      "learning_rate": 4.146696186798065e-05,
      "loss": 1.1305,
      "step": 508000
    },
    {
      "epoch": 2.54,
      "learning_rate": 4.145311737026679e-05,
      "loss": 1.1017,
      "step": 508500
    },
    {
      "epoch": 2.54,
      "learning_rate": 4.143924512806855e-05,
      "loss": 1.0967,
      "step": 509000
    },
    {
      "epoch": 2.54,
      "learning_rate": 4.142537288587029e-05,
      "loss": 1.1069,
      "step": 509500
    },
    {
      "epoch": 2.55,
      "learning_rate": 4.141150064367204e-05,
      "loss": 1.1048,
      "step": 510000
    },
    {
      "epoch": 2.55,
      "learning_rate": 4.1397628401473784e-05,
      "loss": 1.1098,
      "step": 510500
    },
    {
      "epoch": 2.55,
      "learning_rate": 4.138378390375993e-05,
      "loss": 1.1263,
      "step": 511000
    },
    {
      "epoch": 2.55,
      "learning_rate": 4.136991166156168e-05,
      "loss": 1.1172,
      "step": 511500
    },
    {
      "epoch": 2.56,
      "learning_rate": 4.135603941936343e-05,
      "loss": 1.0926,
      "step": 512000
    },
    {
      "epoch": 2.56,
      "learning_rate": 4.134216717716518e-05,
      "loss": 1.1185,
      "step": 512500
    },
    {
      "epoch": 2.56,
      "learning_rate": 4.132829493496693e-05,
      "loss": 1.1351,
      "step": 513000
    },
    {
      "epoch": 2.56,
      "learning_rate": 4.1314422692768677e-05,
      "loss": 1.1204,
      "step": 513500
    },
    {
      "epoch": 2.57,
      "learning_rate": 4.130055045057043e-05,
      "loss": 1.1484,
      "step": 514000
    },
    {
      "epoch": 2.57,
      "learning_rate": 4.128667820837218e-05,
      "loss": 1.0971,
      "step": 514500
    },
    {
      "epoch": 2.57,
      "learning_rate": 4.127283371065832e-05,
      "loss": 1.116,
      "step": 515000
    },
    {
      "epoch": 2.57,
      "learning_rate": 4.1258961468460075e-05,
      "loss": 1.109,
      "step": 515500
    },
    {
      "epoch": 2.58,
      "learning_rate": 4.1245089226261825e-05,
      "loss": 1.0935,
      "step": 516000
    },
    {
      "epoch": 2.58,
      "learning_rate": 4.123121698406357e-05,
      "loss": 1.1423,
      "step": 516500
    },
    {
      "epoch": 2.58,
      "learning_rate": 4.121734474186532e-05,
      "loss": 1.1505,
      "step": 517000
    },
    {
      "epoch": 2.58,
      "learning_rate": 4.120347249966707e-05,
      "loss": 1.0841,
      "step": 517500
    },
    {
      "epoch": 2.59,
      "learning_rate": 4.118960025746882e-05,
      "loss": 1.0756,
      "step": 518000
    },
    {
      "epoch": 2.59,
      "learning_rate": 4.1175728015270563e-05,
      "loss": 1.1074,
      "step": 518500
    },
    {
      "epoch": 2.59,
      "learning_rate": 4.116188351755671e-05,
      "loss": 1.1054,
      "step": 519000
    },
    {
      "epoch": 2.59,
      "learning_rate": 4.114801127535846e-05,
      "loss": 1.0905,
      "step": 519500
    },
    {
      "epoch": 2.6,
      "learning_rate": 4.1134139033160205e-05,
      "loss": 1.1113,
      "step": 520000
    },
    {
      "epoch": 2.6,
      "learning_rate": 4.112026679096196e-05,
      "loss": 1.0938,
      "step": 520500
    },
    {
      "epoch": 2.6,
      "learning_rate": 4.11064222932481e-05,
      "loss": 1.0985,
      "step": 521000
    },
    {
      "epoch": 2.6,
      "learning_rate": 4.109255005104985e-05,
      "loss": 1.0955,
      "step": 521500
    },
    {
      "epoch": 2.61,
      "learning_rate": 4.1078705553336e-05,
      "loss": 1.1235,
      "step": 522000
    },
    {
      "epoch": 2.61,
      "learning_rate": 4.106483331113774e-05,
      "loss": 1.0996,
      "step": 522500
    },
    {
      "epoch": 2.61,
      "learning_rate": 4.10509610689395e-05,
      "loss": 1.101,
      "step": 523000
    },
    {
      "epoch": 2.61,
      "learning_rate": 4.1037088826741244e-05,
      "loss": 1.1064,
      "step": 523500
    },
    {
      "epoch": 2.62,
      "learning_rate": 4.1023216584542994e-05,
      "loss": 1.0665,
      "step": 524000
    },
    {
      "epoch": 2.62,
      "learning_rate": 4.100937208682914e-05,
      "loss": 1.0797,
      "step": 524500
    },
    {
      "epoch": 2.62,
      "learning_rate": 4.099549984463089e-05,
      "loss": 1.0958,
      "step": 525000
    },
    {
      "epoch": 2.62,
      "learning_rate": 4.0981627602432636e-05,
      "loss": 1.1071,
      "step": 525500
    },
    {
      "epoch": 2.63,
      "learning_rate": 4.0967755360234386e-05,
      "loss": 1.1177,
      "step": 526000
    },
    {
      "epoch": 2.63,
      "learning_rate": 4.0953883118036136e-05,
      "loss": 1.0834,
      "step": 526500
    },
    {
      "epoch": 2.63,
      "learning_rate": 4.094001087583789e-05,
      "loss": 1.1253,
      "step": 527000
    },
    {
      "epoch": 2.63,
      "learning_rate": 4.092613863363963e-05,
      "loss": 1.0928,
      "step": 527500
    },
    {
      "epoch": 2.64,
      "learning_rate": 4.091226639144138e-05,
      "loss": 1.1232,
      "step": 528000
    },
    {
      "epoch": 2.64,
      "learning_rate": 4.089839414924313e-05,
      "loss": 1.0798,
      "step": 528500
    },
    {
      "epoch": 2.64,
      "learning_rate": 4.088454965152928e-05,
      "loss": 1.0863,
      "step": 529000
    },
    {
      "epoch": 2.64,
      "learning_rate": 4.087067740933103e-05,
      "loss": 1.0987,
      "step": 529500
    },
    {
      "epoch": 2.65,
      "learning_rate": 4.085683291161717e-05,
      "loss": 1.1093,
      "step": 530000
    },
    {
      "epoch": 2.65,
      "learning_rate": 4.084296066941892e-05,
      "loss": 1.0878,
      "step": 530500
    },
    {
      "epoch": 2.65,
      "learning_rate": 4.082908842722067e-05,
      "loss": 1.0839,
      "step": 531000
    },
    {
      "epoch": 2.65,
      "learning_rate": 4.081521618502242e-05,
      "loss": 1.1009,
      "step": 531500
    },
    {
      "epoch": 2.66,
      "learning_rate": 4.0801343942824164e-05,
      "loss": 1.0937,
      "step": 532000
    },
    {
      "epoch": 2.66,
      "learning_rate": 4.078747170062592e-05,
      "loss": 1.1018,
      "step": 532500
    },
    {
      "epoch": 2.66,
      "learning_rate": 4.077362720291206e-05,
      "loss": 1.1453,
      "step": 533000
    },
    {
      "epoch": 2.66,
      "learning_rate": 4.075975496071381e-05,
      "loss": 1.1311,
      "step": 533500
    },
    {
      "epoch": 2.67,
      "learning_rate": 4.074588271851556e-05,
      "loss": 1.1424,
      "step": 534000
    },
    {
      "epoch": 2.67,
      "learning_rate": 4.073201047631731e-05,
      "loss": 1.1194,
      "step": 534500
    },
    {
      "epoch": 2.67,
      "learning_rate": 4.071816597860346e-05,
      "loss": 1.1085,
      "step": 535000
    },
    {
      "epoch": 2.67,
      "learning_rate": 4.07042937364052e-05,
      "loss": 1.0833,
      "step": 535500
    },
    {
      "epoch": 2.68,
      "learning_rate": 4.069042149420695e-05,
      "loss": 1.08,
      "step": 536000
    },
    {
      "epoch": 2.68,
      "learning_rate": 4.06765492520087e-05,
      "loss": 1.1101,
      "step": 536500
    },
    {
      "epoch": 2.68,
      "learning_rate": 4.0662677009810454e-05,
      "loss": 1.0785,
      "step": 537000
    },
    {
      "epoch": 2.68,
      "learning_rate": 4.0648804767612204e-05,
      "loss": 1.1189,
      "step": 537500
    },
    {
      "epoch": 2.69,
      "learning_rate": 4.063493252541395e-05,
      "loss": 1.119,
      "step": 538000
    },
    {
      "epoch": 2.69,
      "learning_rate": 4.06210602832157e-05,
      "loss": 1.0982,
      "step": 538500
    },
    {
      "epoch": 2.69,
      "learning_rate": 4.0607215785501846e-05,
      "loss": 1.0951,
      "step": 539000
    },
    {
      "epoch": 2.69,
      "learning_rate": 4.059334354330359e-05,
      "loss": 1.087,
      "step": 539500
    },
    {
      "epoch": 2.7,
      "learning_rate": 4.057947130110534e-05,
      "loss": 1.0691,
      "step": 540000
    },
    {
      "epoch": 2.7,
      "learning_rate": 4.056559905890709e-05,
      "loss": 1.1077,
      "step": 540500
    },
    {
      "epoch": 2.7,
      "learning_rate": 4.055175456119324e-05,
      "loss": 1.0765,
      "step": 541000
    },
    {
      "epoch": 2.7,
      "learning_rate": 4.053788231899499e-05,
      "loss": 1.0911,
      "step": 541500
    },
    {
      "epoch": 2.71,
      "learning_rate": 4.052401007679674e-05,
      "loss": 1.1488,
      "step": 542000
    },
    {
      "epoch": 2.71,
      "learning_rate": 4.051016557908288e-05,
      "loss": 1.0862,
      "step": 542500
    },
    {
      "epoch": 2.71,
      "learning_rate": 4.049629333688463e-05,
      "loss": 1.139,
      "step": 543000
    },
    {
      "epoch": 2.71,
      "learning_rate": 4.048242109468638e-05,
      "loss": 1.1142,
      "step": 543500
    },
    {
      "epoch": 2.72,
      "learning_rate": 4.046854885248812e-05,
      "loss": 1.1093,
      "step": 544000
    },
    {
      "epoch": 2.72,
      "learning_rate": 4.0454704354774276e-05,
      "loss": 1.0768,
      "step": 544500
    },
    {
      "epoch": 2.72,
      "learning_rate": 4.044083211257602e-05,
      "loss": 1.0967,
      "step": 545000
    },
    {
      "epoch": 2.72,
      "learning_rate": 4.042695987037777e-05,
      "loss": 1.076,
      "step": 545500
    },
    {
      "epoch": 2.73,
      "learning_rate": 4.041308762817952e-05,
      "loss": 1.0979,
      "step": 546000
    },
    {
      "epoch": 2.73,
      "learning_rate": 4.039921538598127e-05,
      "loss": 1.1001,
      "step": 546500
    },
    {
      "epoch": 2.73,
      "learning_rate": 4.038537088826742e-05,
      "loss": 1.0963,
      "step": 547000
    },
    {
      "epoch": 2.73,
      "learning_rate": 4.037149864606916e-05,
      "loss": 1.0873,
      "step": 547500
    },
    {
      "epoch": 2.74,
      "learning_rate": 4.035762640387091e-05,
      "loss": 1.1024,
      "step": 548000
    },
    {
      "epoch": 2.74,
      "learning_rate": 4.0343754161672656e-05,
      "loss": 1.0893,
      "step": 548500
    },
    {
      "epoch": 2.74,
      "learning_rate": 4.032988191947441e-05,
      "loss": 1.1132,
      "step": 549000
    },
    {
      "epoch": 2.74,
      "learning_rate": 4.0316009677276163e-05,
      "loss": 1.0923,
      "step": 549500
    },
    {
      "epoch": 2.75,
      "learning_rate": 4.0302165179562304e-05,
      "loss": 1.09,
      "step": 550000
    },
    {
      "epoch": 2.75,
      "learning_rate": 4.0288292937364054e-05,
      "loss": 1.0656,
      "step": 550500
    },
    {
      "epoch": 2.75,
      "learning_rate": 4.0274420695165805e-05,
      "loss": 1.0778,
      "step": 551000
    },
    {
      "epoch": 2.75,
      "learning_rate": 4.026054845296755e-05,
      "loss": 1.0686,
      "step": 551500
    },
    {
      "epoch": 2.76,
      "learning_rate": 4.02466762107693e-05,
      "loss": 1.0693,
      "step": 552000
    },
    {
      "epoch": 2.76,
      "learning_rate": 4.023280396857105e-05,
      "loss": 1.0223,
      "step": 552500
    },
    {
      "epoch": 2.76,
      "learning_rate": 4.0218959470857196e-05,
      "loss": 1.1032,
      "step": 553000
    },
    {
      "epoch": 2.76,
      "learning_rate": 4.0205087228658946e-05,
      "loss": 1.0982,
      "step": 553500
    },
    {
      "epoch": 2.77,
      "learning_rate": 4.01912149864607e-05,
      "loss": 1.0819,
      "step": 554000
    },
    {
      "epoch": 2.77,
      "learning_rate": 4.017734274426244e-05,
      "loss": 1.0925,
      "step": 554500
    },
    {
      "epoch": 2.77,
      "learning_rate": 4.016347050206419e-05,
      "loss": 1.0636,
      "step": 555000
    },
    {
      "epoch": 2.77,
      "learning_rate": 4.014959825986594e-05,
      "loss": 1.0749,
      "step": 555500
    },
    {
      "epoch": 2.78,
      "learning_rate": 4.013572601766769e-05,
      "loss": 1.118,
      "step": 556000
    },
    {
      "epoch": 2.78,
      "learning_rate": 4.0121853775469435e-05,
      "loss": 1.1173,
      "step": 556500
    },
    {
      "epoch": 2.78,
      "learning_rate": 4.010800927775558e-05,
      "loss": 1.0461,
      "step": 557000
    },
    {
      "epoch": 2.78,
      "learning_rate": 4.009413703555733e-05,
      "loss": 1.1185,
      "step": 557500
    },
    {
      "epoch": 2.79,
      "learning_rate": 4.008026479335908e-05,
      "loss": 1.1213,
      "step": 558000
    },
    {
      "epoch": 2.79,
      "learning_rate": 4.006642029564523e-05,
      "loss": 1.1101,
      "step": 558500
    },
    {
      "epoch": 2.79,
      "learning_rate": 4.0052548053446974e-05,
      "loss": 1.1088,
      "step": 559000
    },
    {
      "epoch": 2.79,
      "learning_rate": 4.0038675811248724e-05,
      "loss": 1.0732,
      "step": 559500
    },
    {
      "epoch": 2.8,
      "learning_rate": 4.0024803569050474e-05,
      "loss": 1.0912,
      "step": 560000
    },
    {
      "epoch": 2.8,
      "learning_rate": 4.0010931326852225e-05,
      "loss": 1.0618,
      "step": 560500
    },
    {
      "epoch": 2.8,
      "learning_rate": 3.999705908465397e-05,
      "loss": 1.1204,
      "step": 561000
    },
    {
      "epoch": 2.8,
      "learning_rate": 3.998321458694012e-05,
      "loss": 1.1075,
      "step": 561500
    },
    {
      "epoch": 2.81,
      "learning_rate": 3.9969342344741866e-05,
      "loss": 1.1045,
      "step": 562000
    },
    {
      "epoch": 2.81,
      "learning_rate": 3.9955470102543616e-05,
      "loss": 1.0815,
      "step": 562500
    },
    {
      "epoch": 2.81,
      "learning_rate": 3.994159786034537e-05,
      "loss": 1.1111,
      "step": 563000
    },
    {
      "epoch": 2.81,
      "learning_rate": 3.992772561814712e-05,
      "loss": 1.1399,
      "step": 563500
    },
    {
      "epoch": 2.82,
      "learning_rate": 3.991388112043326e-05,
      "loss": 1.0952,
      "step": 564000
    },
    {
      "epoch": 2.82,
      "learning_rate": 3.990000887823501e-05,
      "loss": 1.1219,
      "step": 564500
    },
    {
      "epoch": 2.82,
      "learning_rate": 3.988613663603676e-05,
      "loss": 1.1349,
      "step": 565000
    },
    {
      "epoch": 2.82,
      "learning_rate": 3.98722643938385e-05,
      "loss": 1.0847,
      "step": 565500
    },
    {
      "epoch": 2.83,
      "learning_rate": 3.985839215164026e-05,
      "loss": 1.1166,
      "step": 566000
    },
    {
      "epoch": 2.83,
      "learning_rate": 3.98445476539264e-05,
      "loss": 1.1165,
      "step": 566500
    },
    {
      "epoch": 2.83,
      "learning_rate": 3.983067541172815e-05,
      "loss": 1.1057,
      "step": 567000
    },
    {
      "epoch": 2.83,
      "learning_rate": 3.98168031695299e-05,
      "loss": 1.1162,
      "step": 567500
    },
    {
      "epoch": 2.84,
      "learning_rate": 3.980293092733165e-05,
      "loss": 1.1167,
      "step": 568000
    },
    {
      "epoch": 2.84,
      "learning_rate": 3.9789058685133394e-05,
      "loss": 1.1194,
      "step": 568500
    },
    {
      "epoch": 2.84,
      "learning_rate": 3.977521418741954e-05,
      "loss": 1.0923,
      "step": 569000
    },
    {
      "epoch": 2.84,
      "learning_rate": 3.976134194522129e-05,
      "loss": 1.0792,
      "step": 569500
    },
    {
      "epoch": 2.85,
      "learning_rate": 3.974746970302304e-05,
      "loss": 1.1047,
      "step": 570000
    },
    {
      "epoch": 2.85,
      "learning_rate": 3.973359746082479e-05,
      "loss": 1.0987,
      "step": 570500
    },
    {
      "epoch": 2.85,
      "learning_rate": 3.971972521862654e-05,
      "loss": 1.0963,
      "step": 571000
    },
    {
      "epoch": 2.85,
      "learning_rate": 3.9705852976428286e-05,
      "loss": 1.0841,
      "step": 571500
    },
    {
      "epoch": 2.86,
      "learning_rate": 3.9691980734230037e-05,
      "loss": 1.1015,
      "step": 572000
    },
    {
      "epoch": 2.86,
      "learning_rate": 3.967810849203179e-05,
      "loss": 1.1341,
      "step": 572500
    },
    {
      "epoch": 2.86,
      "learning_rate": 3.966426399431793e-05,
      "loss": 1.0903,
      "step": 573000
    },
    {
      "epoch": 2.86,
      "learning_rate": 3.965039175211968e-05,
      "loss": 1.0936,
      "step": 573500
    },
    {
      "epoch": 2.87,
      "learning_rate": 3.9636547254405825e-05,
      "loss": 1.0917,
      "step": 574000
    },
    {
      "epoch": 2.87,
      "learning_rate": 3.9622675012207575e-05,
      "loss": 1.07,
      "step": 574500
    },
    {
      "epoch": 2.87,
      "learning_rate": 3.9608802770009326e-05,
      "loss": 1.0916,
      "step": 575000
    },
    {
      "epoch": 2.87,
      "learning_rate": 3.9594930527811076e-05,
      "loss": 1.074,
      "step": 575500
    },
    {
      "epoch": 2.88,
      "learning_rate": 3.958105828561282e-05,
      "loss": 1.1075,
      "step": 576000
    },
    {
      "epoch": 2.88,
      "learning_rate": 3.956718604341457e-05,
      "loss": 1.0674,
      "step": 576500
    },
    {
      "epoch": 2.88,
      "learning_rate": 3.955331380121632e-05,
      "loss": 1.0929,
      "step": 577000
    },
    {
      "epoch": 2.88,
      "learning_rate": 3.953944155901807e-05,
      "loss": 1.0728,
      "step": 577500
    },
    {
      "epoch": 2.89,
      "learning_rate": 3.952559706130422e-05,
      "loss": 1.0697,
      "step": 578000
    },
    {
      "epoch": 2.89,
      "learning_rate": 3.951172481910597e-05,
      "loss": 1.0574,
      "step": 578500
    },
    {
      "epoch": 2.89,
      "learning_rate": 3.949788032139211e-05,
      "loss": 1.1078,
      "step": 579000
    },
    {
      "epoch": 2.89,
      "learning_rate": 3.948400807919386e-05,
      "loss": 1.0723,
      "step": 579500
    },
    {
      "epoch": 2.9,
      "learning_rate": 3.947013583699561e-05,
      "loss": 1.0879,
      "step": 580000
    },
    {
      "epoch": 2.9,
      "learning_rate": 3.945626359479735e-05,
      "loss": 1.0655,
      "step": 580500
    },
    {
      "epoch": 2.9,
      "learning_rate": 3.94423913525991e-05,
      "loss": 1.0959,
      "step": 581000
    },
    {
      "epoch": 2.9,
      "learning_rate": 3.9428519110400854e-05,
      "loss": 1.118,
      "step": 581500
    },
    {
      "epoch": 2.91,
      "learning_rate": 3.9414646868202604e-05,
      "loss": 1.0341,
      "step": 582000
    },
    {
      "epoch": 2.91,
      "learning_rate": 3.940077462600435e-05,
      "loss": 1.1139,
      "step": 582500
    },
    {
      "epoch": 2.91,
      "learning_rate": 3.93869301282905e-05,
      "loss": 1.0957,
      "step": 583000
    },
    {
      "epoch": 2.91,
      "learning_rate": 3.937308563057664e-05,
      "loss": 1.0957,
      "step": 583500
    },
    {
      "epoch": 2.92,
      "learning_rate": 3.935921338837839e-05,
      "loss": 1.105,
      "step": 584000
    },
    {
      "epoch": 2.92,
      "learning_rate": 3.934534114618014e-05,
      "loss": 1.0748,
      "step": 584500
    },
    {
      "epoch": 2.92,
      "learning_rate": 3.9331468903981886e-05,
      "loss": 1.1071,
      "step": 585000
    },
    {
      "epoch": 2.92,
      "learning_rate": 3.931759666178364e-05,
      "loss": 1.1259,
      "step": 585500
    },
    {
      "epoch": 2.93,
      "learning_rate": 3.9303752164069784e-05,
      "loss": 1.084,
      "step": 586000
    },
    {
      "epoch": 2.93,
      "learning_rate": 3.9289879921871534e-05,
      "loss": 1.1081,
      "step": 586500
    },
    {
      "epoch": 2.93,
      "learning_rate": 3.9276007679673285e-05,
      "loss": 1.0677,
      "step": 587000
    },
    {
      "epoch": 2.93,
      "learning_rate": 3.9262135437475035e-05,
      "loss": 1.0736,
      "step": 587500
    },
    {
      "epoch": 2.94,
      "learning_rate": 3.924826319527678e-05,
      "loss": 1.0918,
      "step": 588000
    },
    {
      "epoch": 2.94,
      "learning_rate": 3.923439095307853e-05,
      "loss": 1.0801,
      "step": 588500
    },
    {
      "epoch": 2.94,
      "learning_rate": 3.922051871088028e-05,
      "loss": 1.082,
      "step": 589000
    },
    {
      "epoch": 2.94,
      "learning_rate": 3.920667421316642e-05,
      "loss": 1.1083,
      "step": 589500
    },
    {
      "epoch": 2.95,
      "learning_rate": 3.919280197096818e-05,
      "loss": 1.1054,
      "step": 590000
    },
    {
      "epoch": 2.95,
      "learning_rate": 3.917892972876992e-05,
      "loss": 1.0868,
      "step": 590500
    },
    {
      "epoch": 2.95,
      "learning_rate": 3.916505748657167e-05,
      "loss": 1.094,
      "step": 591000
    },
    {
      "epoch": 2.95,
      "learning_rate": 3.915118524437342e-05,
      "loss": 1.1091,
      "step": 591500
    },
    {
      "epoch": 2.96,
      "learning_rate": 3.913731300217517e-05,
      "loss": 1.0918,
      "step": 592000
    },
    {
      "epoch": 2.96,
      "learning_rate": 3.912344075997692e-05,
      "loss": 1.1179,
      "step": 592500
    },
    {
      "epoch": 2.96,
      "learning_rate": 3.910959626226306e-05,
      "loss": 1.101,
      "step": 593000
    },
    {
      "epoch": 2.96,
      "learning_rate": 3.909572402006481e-05,
      "loss": 1.0977,
      "step": 593500
    },
    {
      "epoch": 2.97,
      "learning_rate": 3.908185177786656e-05,
      "loss": 1.1326,
      "step": 594000
    },
    {
      "epoch": 2.97,
      "learning_rate": 3.9067979535668307e-05,
      "loss": 1.1518,
      "step": 594500
    },
    {
      "epoch": 2.97,
      "learning_rate": 3.905410729347006e-05,
      "loss": 1.1017,
      "step": 595000
    },
    {
      "epoch": 2.97,
      "learning_rate": 3.9040235051271814e-05,
      "loss": 1.1119,
      "step": 595500
    },
    {
      "epoch": 2.98,
      "learning_rate": 3.902641829804235e-05,
      "loss": 1.0979,
      "step": 596000
    },
    {
      "epoch": 2.98,
      "learning_rate": 3.90125460558441e-05,
      "loss": 1.0659,
      "step": 596500
    },
    {
      "epoch": 2.98,
      "learning_rate": 3.8998673813645845e-05,
      "loss": 1.1008,
      "step": 597000
    },
    {
      "epoch": 2.98,
      "learning_rate": 3.8984801571447596e-05,
      "loss": 1.1047,
      "step": 597500
    },
    {
      "epoch": 2.99,
      "learning_rate": 3.8970929329249346e-05,
      "loss": 1.0828,
      "step": 598000
    },
    {
      "epoch": 2.99,
      "learning_rate": 3.8957057087051096e-05,
      "loss": 1.0944,
      "step": 598500
    },
    {
      "epoch": 2.99,
      "learning_rate": 3.894318484485284e-05,
      "loss": 1.0751,
      "step": 599000
    },
    {
      "epoch": 2.99,
      "learning_rate": 3.89293126026546e-05,
      "loss": 1.1355,
      "step": 599500
    },
    {
      "epoch": 3.0,
      "learning_rate": 3.891544036045635e-05,
      "loss": 1.091,
      "step": 600000
    },
    {
      "epoch": 3.0,
      "learning_rate": 3.890156811825809e-05,
      "loss": 1.0794,
      "step": 600500
    },
    {
      "epoch": 3.0,
      "eval_loss": 1.174296498298645,
      "eval_runtime": 2890.815,
      "eval_samples_per_second": 40.299,
      "eval_steps_per_second": 10.075,
      "step": 600720
    },
    {
      "epoch": 3.0,
      "learning_rate": 3.888772362054424e-05,
      "loss": 0.9755,
      "step": 601000
    },
    {
      "epoch": 3.0,
      "learning_rate": 3.887385137834599e-05,
      "loss": 0.884,
      "step": 601500
    },
    {
      "epoch": 3.01,
      "learning_rate": 3.885997913614773e-05,
      "loss": 0.9047,
      "step": 602000
    },
    {
      "epoch": 3.01,
      "learning_rate": 3.884610689394948e-05,
      "loss": 0.9205,
      "step": 602500
    },
    {
      "epoch": 3.01,
      "learning_rate": 3.883223465175123e-05,
      "loss": 0.8909,
      "step": 603000
    },
    {
      "epoch": 3.01,
      "learning_rate": 3.881836240955298e-05,
      "loss": 0.8974,
      "step": 603500
    },
    {
      "epoch": 3.02,
      "learning_rate": 3.880449016735473e-05,
      "loss": 0.8712,
      "step": 604000
    },
    {
      "epoch": 3.02,
      "learning_rate": 3.879061792515648e-05,
      "loss": 0.8829,
      "step": 604500
    },
    {
      "epoch": 3.02,
      "learning_rate": 3.8776745682958234e-05,
      "loss": 0.9126,
      "step": 605000
    },
    {
      "epoch": 3.02,
      "learning_rate": 3.876292892972877e-05,
      "loss": 0.8997,
      "step": 605500
    },
    {
      "epoch": 3.03,
      "learning_rate": 3.874905668753052e-05,
      "loss": 0.916,
      "step": 606000
    },
    {
      "epoch": 3.03,
      "learning_rate": 3.8735184445332266e-05,
      "loss": 0.8956,
      "step": 606500
    },
    {
      "epoch": 3.03,
      "learning_rate": 3.8721312203134016e-05,
      "loss": 0.9245,
      "step": 607000
    },
    {
      "epoch": 3.03,
      "learning_rate": 3.8707439960935766e-05,
      "loss": 0.8832,
      "step": 607500
    },
    {
      "epoch": 3.04,
      "learning_rate": 3.8693595463221913e-05,
      "loss": 0.9069,
      "step": 608000
    },
    {
      "epoch": 3.04,
      "learning_rate": 3.8679723221023664e-05,
      "loss": 0.9059,
      "step": 608500
    },
    {
      "epoch": 3.04,
      "learning_rate": 3.8665850978825414e-05,
      "loss": 0.9102,
      "step": 609000
    },
    {
      "epoch": 3.04,
      "learning_rate": 3.865197873662716e-05,
      "loss": 0.9267,
      "step": 609500
    },
    {
      "epoch": 3.05,
      "learning_rate": 3.863810649442891e-05,
      "loss": 0.8839,
      "step": 610000
    },
    {
      "epoch": 3.05,
      "learning_rate": 3.8624261996715055e-05,
      "loss": 0.9095,
      "step": 610500
    },
    {
      "epoch": 3.05,
      "learning_rate": 3.86103897545168e-05,
      "loss": 0.9016,
      "step": 611000
    },
    {
      "epoch": 3.05,
      "learning_rate": 3.8596517512318556e-05,
      "loss": 0.888,
      "step": 611500
    },
    {
      "epoch": 3.06,
      "learning_rate": 3.8582645270120306e-05,
      "loss": 0.9022,
      "step": 612000
    },
    {
      "epoch": 3.06,
      "learning_rate": 3.856877302792205e-05,
      "loss": 0.8968,
      "step": 612500
    },
    {
      "epoch": 3.06,
      "learning_rate": 3.85549007857238e-05,
      "loss": 0.894,
      "step": 613000
    },
    {
      "epoch": 3.06,
      "learning_rate": 3.854102854352555e-05,
      "loss": 0.9111,
      "step": 613500
    },
    {
      "epoch": 3.07,
      "learning_rate": 3.85271563013273e-05,
      "loss": 0.9453,
      "step": 614000
    },
    {
      "epoch": 3.07,
      "learning_rate": 3.851331180361344e-05,
      "loss": 0.9204,
      "step": 614500
    },
    {
      "epoch": 3.07,
      "learning_rate": 3.849946730589959e-05,
      "loss": 0.8829,
      "step": 615000
    },
    {
      "epoch": 3.07,
      "learning_rate": 3.848559506370134e-05,
      "loss": 0.9192,
      "step": 615500
    },
    {
      "epoch": 3.08,
      "learning_rate": 3.847172282150309e-05,
      "loss": 0.8969,
      "step": 616000
    },
    {
      "epoch": 3.08,
      "learning_rate": 3.845785057930484e-05,
      "loss": 0.9024,
      "step": 616500
    },
    {
      "epoch": 3.08,
      "learning_rate": 3.844397833710658e-05,
      "loss": 0.9147,
      "step": 617000
    },
    {
      "epoch": 3.08,
      "learning_rate": 3.843013383939273e-05,
      "loss": 0.9149,
      "step": 617500
    },
    {
      "epoch": 3.09,
      "learning_rate": 3.841626159719448e-05,
      "loss": 0.8996,
      "step": 618000
    },
    {
      "epoch": 3.09,
      "learning_rate": 3.8402389354996225e-05,
      "loss": 0.9007,
      "step": 618500
    },
    {
      "epoch": 3.09,
      "learning_rate": 3.8388517112797975e-05,
      "loss": 0.9248,
      "step": 619000
    },
    {
      "epoch": 3.09,
      "learning_rate": 3.8374644870599725e-05,
      "loss": 0.9232,
      "step": 619500
    },
    {
      "epoch": 3.1,
      "learning_rate": 3.836080037288587e-05,
      "loss": 0.9383,
      "step": 620000
    },
    {
      "epoch": 3.1,
      "learning_rate": 3.834692813068762e-05,
      "loss": 0.9127,
      "step": 620500
    },
    {
      "epoch": 3.1,
      "learning_rate": 3.833305588848937e-05,
      "loss": 0.8906,
      "step": 621000
    },
    {
      "epoch": 3.1,
      "learning_rate": 3.831918364629112e-05,
      "loss": 0.8873,
      "step": 621500
    },
    {
      "epoch": 3.11,
      "learning_rate": 3.830531140409287e-05,
      "loss": 0.9107,
      "step": 622000
    },
    {
      "epoch": 3.11,
      "learning_rate": 3.8291466906379014e-05,
      "loss": 0.9131,
      "step": 622500
    },
    {
      "epoch": 3.11,
      "learning_rate": 3.827759466418076e-05,
      "loss": 0.9131,
      "step": 623000
    },
    {
      "epoch": 3.11,
      "learning_rate": 3.8263722421982515e-05,
      "loss": 0.9096,
      "step": 623500
    },
    {
      "epoch": 3.12,
      "learning_rate": 3.8249850179784265e-05,
      "loss": 0.9231,
      "step": 624000
    },
    {
      "epoch": 3.12,
      "learning_rate": 3.823597793758601e-05,
      "loss": 0.9184,
      "step": 624500
    },
    {
      "epoch": 3.12,
      "learning_rate": 3.822210569538776e-05,
      "loss": 0.9311,
      "step": 625000
    },
    {
      "epoch": 3.12,
      "learning_rate": 3.820823345318951e-05,
      "loss": 0.9352,
      "step": 625500
    },
    {
      "epoch": 3.13,
      "learning_rate": 3.819436121099126e-05,
      "loss": 0.9326,
      "step": 626000
    },
    {
      "epoch": 3.13,
      "learning_rate": 3.81805167132774e-05,
      "loss": 0.9368,
      "step": 626500
    },
    {
      "epoch": 3.13,
      "learning_rate": 3.816664447107915e-05,
      "loss": 0.9298,
      "step": 627000
    },
    {
      "epoch": 3.13,
      "learning_rate": 3.81527722288809e-05,
      "loss": 0.9033,
      "step": 627500
    },
    {
      "epoch": 3.14,
      "learning_rate": 3.8138899986682645e-05,
      "loss": 0.8821,
      "step": 628000
    },
    {
      "epoch": 3.14,
      "learning_rate": 3.8125027744484395e-05,
      "loss": 0.9039,
      "step": 628500
    },
    {
      "epoch": 3.14,
      "learning_rate": 3.811118324677054e-05,
      "loss": 0.9305,
      "step": 629000
    },
    {
      "epoch": 3.14,
      "learning_rate": 3.809731100457229e-05,
      "loss": 0.9387,
      "step": 629500
    },
    {
      "epoch": 3.15,
      "learning_rate": 3.808346650685844e-05,
      "loss": 0.9338,
      "step": 630000
    },
    {
      "epoch": 3.15,
      "learning_rate": 3.8069594264660183e-05,
      "loss": 0.9255,
      "step": 630500
    },
    {
      "epoch": 3.15,
      "learning_rate": 3.8055722022461934e-05,
      "loss": 0.9017,
      "step": 631000
    },
    {
      "epoch": 3.15,
      "learning_rate": 3.8041849780263684e-05,
      "loss": 0.928,
      "step": 631500
    },
    {
      "epoch": 3.16,
      "learning_rate": 3.8027977538065435e-05,
      "loss": 0.9072,
      "step": 632000
    },
    {
      "epoch": 3.16,
      "learning_rate": 3.8014105295867185e-05,
      "loss": 0.9037,
      "step": 632500
    },
    {
      "epoch": 3.16,
      "learning_rate": 3.8000233053668935e-05,
      "loss": 0.9244,
      "step": 633000
    },
    {
      "epoch": 3.16,
      "learning_rate": 3.7986360811470686e-05,
      "loss": 0.9185,
      "step": 633500
    },
    {
      "epoch": 3.17,
      "learning_rate": 3.7972516313756826e-05,
      "loss": 0.9212,
      "step": 634000
    },
    {
      "epoch": 3.17,
      "learning_rate": 3.7958644071558576e-05,
      "loss": 0.9466,
      "step": 634500
    },
    {
      "epoch": 3.17,
      "learning_rate": 3.794477182936033e-05,
      "loss": 0.9501,
      "step": 635000
    },
    {
      "epoch": 3.17,
      "learning_rate": 3.793089958716207e-05,
      "loss": 0.9214,
      "step": 635500
    },
    {
      "epoch": 3.18,
      "learning_rate": 3.7917082833932614e-05,
      "loss": 0.9204,
      "step": 636000
    },
    {
      "epoch": 3.18,
      "learning_rate": 3.7903210591734365e-05,
      "loss": 0.9254,
      "step": 636500
    },
    {
      "epoch": 3.18,
      "learning_rate": 3.7889338349536115e-05,
      "loss": 0.9566,
      "step": 637000
    },
    {
      "epoch": 3.18,
      "learning_rate": 3.7875466107337865e-05,
      "loss": 0.9398,
      "step": 637500
    },
    {
      "epoch": 3.19,
      "learning_rate": 3.786159386513961e-05,
      "loss": 0.955,
      "step": 638000
    },
    {
      "epoch": 3.19,
      "learning_rate": 3.784772162294136e-05,
      "loss": 0.9227,
      "step": 638500
    },
    {
      "epoch": 3.19,
      "learning_rate": 3.783384938074311e-05,
      "loss": 0.9306,
      "step": 639000
    },
    {
      "epoch": 3.19,
      "learning_rate": 3.781997713854486e-05,
      "loss": 0.922,
      "step": 639500
    },
    {
      "epoch": 3.2,
      "learning_rate": 3.780613264083101e-05,
      "loss": 0.9704,
      "step": 640000
    },
    {
      "epoch": 3.2,
      "learning_rate": 3.779226039863276e-05,
      "loss": 0.9309,
      "step": 640500
    },
    {
      "epoch": 3.2,
      "learning_rate": 3.77783881564345e-05,
      "loss": 0.9061,
      "step": 641000
    },
    {
      "epoch": 3.2,
      "learning_rate": 3.776454365872065e-05,
      "loss": 0.9418,
      "step": 641500
    },
    {
      "epoch": 3.21,
      "learning_rate": 3.77506714165224e-05,
      "loss": 0.9279,
      "step": 642000
    },
    {
      "epoch": 3.21,
      "learning_rate": 3.773679917432414e-05,
      "loss": 0.922,
      "step": 642500
    },
    {
      "epoch": 3.21,
      "learning_rate": 3.772292693212589e-05,
      "loss": 0.9447,
      "step": 643000
    },
    {
      "epoch": 3.21,
      "learning_rate": 3.770905468992764e-05,
      "loss": 0.9561,
      "step": 643500
    },
    {
      "epoch": 3.22,
      "learning_rate": 3.769521019221379e-05,
      "loss": 0.9373,
      "step": 644000
    },
    {
      "epoch": 3.22,
      "learning_rate": 3.768133795001554e-05,
      "loss": 0.9343,
      "step": 644500
    },
    {
      "epoch": 3.22,
      "learning_rate": 3.766746570781729e-05,
      "loss": 0.9211,
      "step": 645000
    },
    {
      "epoch": 3.22,
      "learning_rate": 3.7653593465619035e-05,
      "loss": 0.935,
      "step": 645500
    },
    {
      "epoch": 3.23,
      "learning_rate": 3.763974896790518e-05,
      "loss": 0.9209,
      "step": 646000
    },
    {
      "epoch": 3.23,
      "learning_rate": 3.762587672570693e-05,
      "loss": 0.9508,
      "step": 646500
    },
    {
      "epoch": 3.23,
      "learning_rate": 3.7612004483508676e-05,
      "loss": 0.9323,
      "step": 647000
    },
    {
      "epoch": 3.23,
      "learning_rate": 3.759813224131043e-05,
      "loss": 0.9406,
      "step": 647500
    },
    {
      "epoch": 3.24,
      "learning_rate": 3.7584259999112177e-05,
      "loss": 0.9469,
      "step": 648000
    },
    {
      "epoch": 3.24,
      "learning_rate": 3.757038775691393e-05,
      "loss": 0.9369,
      "step": 648500
    },
    {
      "epoch": 3.24,
      "learning_rate": 3.7556543259200074e-05,
      "loss": 0.94,
      "step": 649000
    },
    {
      "epoch": 3.24,
      "learning_rate": 3.7542671017001824e-05,
      "loss": 0.9289,
      "step": 649500
    },
    {
      "epoch": 3.25,
      "learning_rate": 3.752879877480357e-05,
      "loss": 0.9597,
      "step": 650000
    },
    {
      "epoch": 3.25,
      "learning_rate": 3.751492653260532e-05,
      "loss": 0.9292,
      "step": 650500
    },
    {
      "epoch": 3.25,
      "learning_rate": 3.750105429040707e-05,
      "loss": 0.9522,
      "step": 651000
    },
    {
      "epoch": 3.25,
      "learning_rate": 3.748718204820882e-05,
      "loss": 0.9733,
      "step": 651500
    },
    {
      "epoch": 3.26,
      "learning_rate": 3.747330980601056e-05,
      "loss": 0.9505,
      "step": 652000
    },
    {
      "epoch": 3.26,
      "learning_rate": 3.745949305278111e-05,
      "loss": 0.9068,
      "step": 652500
    },
    {
      "epoch": 3.26,
      "learning_rate": 3.744562081058286e-05,
      "loss": 0.9329,
      "step": 653000
    },
    {
      "epoch": 3.26,
      "learning_rate": 3.743174856838461e-05,
      "loss": 0.9943,
      "step": 653500
    },
    {
      "epoch": 3.27,
      "learning_rate": 3.741787632618636e-05,
      "loss": 0.9518,
      "step": 654000
    },
    {
      "epoch": 3.27,
      "learning_rate": 3.74040040839881e-05,
      "loss": 0.9662,
      "step": 654500
    },
    {
      "epoch": 3.27,
      "learning_rate": 3.739013184178985e-05,
      "loss": 0.9452,
      "step": 655000
    },
    {
      "epoch": 3.27,
      "learning_rate": 3.73762595995916e-05,
      "loss": 0.9301,
      "step": 655500
    },
    {
      "epoch": 3.28,
      "learning_rate": 3.736238735739335e-05,
      "loss": 0.9773,
      "step": 656000
    },
    {
      "epoch": 3.28,
      "learning_rate": 3.73485428596795e-05,
      "loss": 0.9339,
      "step": 656500
    },
    {
      "epoch": 3.28,
      "learning_rate": 3.733467061748125e-05,
      "loss": 0.9553,
      "step": 657000
    },
    {
      "epoch": 3.28,
      "learning_rate": 3.7320798375282994e-05,
      "loss": 0.9533,
      "step": 657500
    },
    {
      "epoch": 3.29,
      "learning_rate": 3.7306926133084744e-05,
      "loss": 0.9273,
      "step": 658000
    },
    {
      "epoch": 3.29,
      "learning_rate": 3.7293053890886494e-05,
      "loss": 0.9396,
      "step": 658500
    },
    {
      "epoch": 3.29,
      "learning_rate": 3.7279181648688245e-05,
      "loss": 0.9544,
      "step": 659000
    },
    {
      "epoch": 3.29,
      "learning_rate": 3.726533715097439e-05,
      "loss": 0.927,
      "step": 659500
    },
    {
      "epoch": 3.3,
      "learning_rate": 3.7251464908776135e-05,
      "loss": 0.9375,
      "step": 660000
    },
    {
      "epoch": 3.3,
      "learning_rate": 3.7237592666577886e-05,
      "loss": 0.9363,
      "step": 660500
    },
    {
      "epoch": 3.3,
      "learning_rate": 3.7223720424379636e-05,
      "loss": 0.9574,
      "step": 661000
    },
    {
      "epoch": 3.3,
      "learning_rate": 3.7209848182181387e-05,
      "loss": 0.9568,
      "step": 661500
    },
    {
      "epoch": 3.31,
      "learning_rate": 3.719597593998314e-05,
      "loss": 0.9379,
      "step": 662000
    },
    {
      "epoch": 3.31,
      "learning_rate": 3.718210369778488e-05,
      "loss": 0.9658,
      "step": 662500
    },
    {
      "epoch": 3.31,
      "learning_rate": 3.716823145558663e-05,
      "loss": 0.9329,
      "step": 663000
    },
    {
      "epoch": 3.31,
      "learning_rate": 3.715438695787278e-05,
      "loss": 0.9565,
      "step": 663500
    },
    {
      "epoch": 3.32,
      "learning_rate": 3.714051471567452e-05,
      "loss": 0.9747,
      "step": 664000
    },
    {
      "epoch": 3.32,
      "learning_rate": 3.712664247347627e-05,
      "loss": 0.9359,
      "step": 664500
    },
    {
      "epoch": 3.32,
      "learning_rate": 3.711277023127802e-05,
      "loss": 0.9304,
      "step": 665000
    },
    {
      "epoch": 3.32,
      "learning_rate": 3.709892573356417e-05,
      "loss": 0.9446,
      "step": 665500
    },
    {
      "epoch": 3.33,
      "learning_rate": 3.708505349136592e-05,
      "loss": 0.9223,
      "step": 666000
    },
    {
      "epoch": 3.33,
      "learning_rate": 3.707118124916767e-05,
      "loss": 0.9588,
      "step": 666500
    },
    {
      "epoch": 3.33,
      "learning_rate": 3.7057309006969414e-05,
      "loss": 0.9197,
      "step": 667000
    },
    {
      "epoch": 3.33,
      "learning_rate": 3.7043436764771164e-05,
      "loss": 0.9523,
      "step": 667500
    },
    {
      "epoch": 3.34,
      "learning_rate": 3.7029564522572915e-05,
      "loss": 0.9693,
      "step": 668000
    },
    {
      "epoch": 3.34,
      "learning_rate": 3.7015692280374665e-05,
      "loss": 0.9552,
      "step": 668500
    },
    {
      "epoch": 3.34,
      "learning_rate": 3.700184778266081e-05,
      "loss": 0.9512,
      "step": 669000
    },
    {
      "epoch": 3.34,
      "learning_rate": 3.698797554046256e-05,
      "loss": 0.9655,
      "step": 669500
    },
    {
      "epoch": 3.35,
      "learning_rate": 3.6974103298264306e-05,
      "loss": 0.9285,
      "step": 670000
    },
    {
      "epoch": 3.35,
      "learning_rate": 3.6960231056066056e-05,
      "loss": 0.9173,
      "step": 670500
    },
    {
      "epoch": 3.35,
      "learning_rate": 3.694635881386781e-05,
      "loss": 0.9655,
      "step": 671000
    },
    {
      "epoch": 3.35,
      "learning_rate": 3.693248657166956e-05,
      "loss": 0.9395,
      "step": 671500
    },
    {
      "epoch": 3.36,
      "learning_rate": 3.69186420739557e-05,
      "loss": 0.9527,
      "step": 672000
    },
    {
      "epoch": 3.36,
      "learning_rate": 3.690476983175745e-05,
      "loss": 0.9543,
      "step": 672500
    },
    {
      "epoch": 3.36,
      "learning_rate": 3.68908975895592e-05,
      "loss": 0.9641,
      "step": 673000
    },
    {
      "epoch": 3.36,
      "learning_rate": 3.687702534736094e-05,
      "loss": 0.981,
      "step": 673500
    },
    {
      "epoch": 3.37,
      "learning_rate": 3.6863180849647096e-05,
      "loss": 0.9253,
      "step": 674000
    },
    {
      "epoch": 3.37,
      "learning_rate": 3.684930860744884e-05,
      "loss": 0.9657,
      "step": 674500
    },
    {
      "epoch": 3.37,
      "learning_rate": 3.683543636525059e-05,
      "loss": 0.9024,
      "step": 675000
    },
    {
      "epoch": 3.37,
      "learning_rate": 3.682156412305234e-05,
      "loss": 0.9256,
      "step": 675500
    },
    {
      "epoch": 3.38,
      "learning_rate": 3.680769188085409e-05,
      "loss": 0.9549,
      "step": 676000
    },
    {
      "epoch": 3.38,
      "learning_rate": 3.6793819638655834e-05,
      "loss": 0.956,
      "step": 676500
    },
    {
      "epoch": 3.38,
      "learning_rate": 3.6779947396457585e-05,
      "loss": 0.9307,
      "step": 677000
    },
    {
      "epoch": 3.38,
      "learning_rate": 3.676610289874373e-05,
      "loss": 0.9272,
      "step": 677500
    },
    {
      "epoch": 3.39,
      "learning_rate": 3.675223065654548e-05,
      "loss": 0.9559,
      "step": 678000
    },
    {
      "epoch": 3.39,
      "learning_rate": 3.673835841434723e-05,
      "loss": 0.9658,
      "step": 678500
    },
    {
      "epoch": 3.39,
      "learning_rate": 3.672448617214898e-05,
      "loss": 0.9447,
      "step": 679000
    },
    {
      "epoch": 3.39,
      "learning_rate": 3.671064167443512e-05,
      "loss": 0.9718,
      "step": 679500
    },
    {
      "epoch": 3.4,
      "learning_rate": 3.6696769432236874e-05,
      "loss": 0.9592,
      "step": 680000
    },
    {
      "epoch": 3.4,
      "learning_rate": 3.6682897190038624e-05,
      "loss": 0.9668,
      "step": 680500
    },
    {
      "epoch": 3.4,
      "learning_rate": 3.666902494784037e-05,
      "loss": 0.9767,
      "step": 681000
    },
    {
      "epoch": 3.4,
      "learning_rate": 3.665515270564212e-05,
      "loss": 0.9449,
      "step": 681500
    },
    {
      "epoch": 3.41,
      "learning_rate": 3.664128046344387e-05,
      "loss": 0.9177,
      "step": 682000
    },
    {
      "epoch": 3.41,
      "learning_rate": 3.662740822124562e-05,
      "loss": 0.949,
      "step": 682500
    },
    {
      "epoch": 3.41,
      "learning_rate": 3.661353597904737e-05,
      "loss": 0.9651,
      "step": 683000
    },
    {
      "epoch": 3.41,
      "learning_rate": 3.6599719225817906e-05,
      "loss": 0.943,
      "step": 683500
    },
    {
      "epoch": 3.42,
      "learning_rate": 3.6585846983619657e-05,
      "loss": 0.9467,
      "step": 684000
    },
    {
      "epoch": 3.42,
      "learning_rate": 3.657197474142141e-05,
      "loss": 0.9323,
      "step": 684500
    },
    {
      "epoch": 3.42,
      "learning_rate": 3.655810249922316e-05,
      "loss": 0.9861,
      "step": 685000
    },
    {
      "epoch": 3.42,
      "learning_rate": 3.65442302570249e-05,
      "loss": 0.9185,
      "step": 685500
    },
    {
      "epoch": 3.43,
      "learning_rate": 3.653035801482665e-05,
      "loss": 0.9715,
      "step": 686000
    },
    {
      "epoch": 3.43,
      "learning_rate": 3.65165135171128e-05,
      "loss": 0.9548,
      "step": 686500
    },
    {
      "epoch": 3.43,
      "learning_rate": 3.650264127491455e-05,
      "loss": 0.9559,
      "step": 687000
    },
    {
      "epoch": 3.43,
      "learning_rate": 3.64887690327163e-05,
      "loss": 0.9627,
      "step": 687500
    },
    {
      "epoch": 3.44,
      "learning_rate": 3.647489679051805e-05,
      "loss": 0.9946,
      "step": 688000
    },
    {
      "epoch": 3.44,
      "learning_rate": 3.646102454831979e-05,
      "loss": 0.9409,
      "step": 688500
    },
    {
      "epoch": 3.44,
      "learning_rate": 3.6447152306121544e-05,
      "loss": 0.9806,
      "step": 689000
    },
    {
      "epoch": 3.44,
      "learning_rate": 3.6433280063923294e-05,
      "loss": 0.9729,
      "step": 689500
    },
    {
      "epoch": 3.45,
      "learning_rate": 3.6419407821725044e-05,
      "loss": 0.9759,
      "step": 690000
    },
    {
      "epoch": 3.45,
      "learning_rate": 3.640559106849559e-05,
      "loss": 0.9498,
      "step": 690500
    },
    {
      "epoch": 3.45,
      "learning_rate": 3.639171882629733e-05,
      "loss": 0.9642,
      "step": 691000
    },
    {
      "epoch": 3.45,
      "learning_rate": 3.637784658409908e-05,
      "loss": 0.9583,
      "step": 691500
    },
    {
      "epoch": 3.46,
      "learning_rate": 3.636397434190083e-05,
      "loss": 0.9424,
      "step": 692000
    },
    {
      "epoch": 3.46,
      "learning_rate": 3.635010209970258e-05,
      "loss": 0.9334,
      "step": 692500
    },
    {
      "epoch": 3.46,
      "learning_rate": 3.6336229857504326e-05,
      "loss": 0.9584,
      "step": 693000
    },
    {
      "epoch": 3.46,
      "learning_rate": 3.6322385359790474e-05,
      "loss": 0.9939,
      "step": 693500
    },
    {
      "epoch": 3.47,
      "learning_rate": 3.6308513117592224e-05,
      "loss": 0.9782,
      "step": 694000
    },
    {
      "epoch": 3.47,
      "learning_rate": 3.6294640875393974e-05,
      "loss": 0.9888,
      "step": 694500
    },
    {
      "epoch": 3.47,
      "learning_rate": 3.6280768633195725e-05,
      "loss": 0.9489,
      "step": 695000
    },
    {
      "epoch": 3.47,
      "learning_rate": 3.6266924135481865e-05,
      "loss": 0.9544,
      "step": 695500
    },
    {
      "epoch": 3.48,
      "learning_rate": 3.6253051893283616e-05,
      "loss": 0.9498,
      "step": 696000
    },
    {
      "epoch": 3.48,
      "learning_rate": 3.6239179651085366e-05,
      "loss": 0.9403,
      "step": 696500
    },
    {
      "epoch": 3.48,
      "learning_rate": 3.6225307408887116e-05,
      "loss": 0.9712,
      "step": 697000
    },
    {
      "epoch": 3.48,
      "learning_rate": 3.621143516668886e-05,
      "loss": 0.9438,
      "step": 697500
    },
    {
      "epoch": 3.49,
      "learning_rate": 3.6197590668975014e-05,
      "loss": 0.9819,
      "step": 698000
    },
    {
      "epoch": 3.49,
      "learning_rate": 3.618371842677676e-05,
      "loss": 0.9606,
      "step": 698500
    },
    {
      "epoch": 3.49,
      "learning_rate": 3.616984618457851e-05,
      "loss": 0.9625,
      "step": 699000
    },
    {
      "epoch": 3.49,
      "learning_rate": 3.615597394238026e-05,
      "loss": 0.9617,
      "step": 699500
    },
    {
      "epoch": 3.5,
      "learning_rate": 3.614210170018201e-05,
      "loss": 0.9583,
      "step": 700000
    },
    {
      "epoch": 3.5,
      "learning_rate": 3.612825720246815e-05,
      "loss": 0.9577,
      "step": 700500
    },
    {
      "epoch": 3.5,
      "learning_rate": 3.61143849602699e-05,
      "loss": 0.9692,
      "step": 701000
    },
    {
      "epoch": 3.5,
      "learning_rate": 3.610051271807165e-05,
      "loss": 0.9751,
      "step": 701500
    },
    {
      "epoch": 3.51,
      "learning_rate": 3.608664047587339e-05,
      "loss": 0.9446,
      "step": 702000
    },
    {
      "epoch": 3.51,
      "learning_rate": 3.607276823367515e-05,
      "loss": 0.9745,
      "step": 702500
    },
    {
      "epoch": 3.51,
      "learning_rate": 3.60588959914769e-05,
      "loss": 0.9323,
      "step": 703000
    },
    {
      "epoch": 3.51,
      "learning_rate": 3.6045023749278644e-05,
      "loss": 0.9402,
      "step": 703500
    },
    {
      "epoch": 3.52,
      "learning_rate": 3.6031151507080395e-05,
      "loss": 0.9245,
      "step": 704000
    },
    {
      "epoch": 3.52,
      "learning_rate": 3.601730700936654e-05,
      "loss": 0.9801,
      "step": 704500
    },
    {
      "epoch": 3.52,
      "learning_rate": 3.600346251165269e-05,
      "loss": 0.9593,
      "step": 705000
    },
    {
      "epoch": 3.52,
      "learning_rate": 3.598959026945443e-05,
      "loss": 0.959,
      "step": 705500
    },
    {
      "epoch": 3.53,
      "learning_rate": 3.597571802725618e-05,
      "loss": 0.9839,
      "step": 706000
    },
    {
      "epoch": 3.53,
      "learning_rate": 3.596184578505793e-05,
      "loss": 0.9695,
      "step": 706500
    },
    {
      "epoch": 3.53,
      "learning_rate": 3.5947973542859684e-05,
      "loss": 0.988,
      "step": 707000
    },
    {
      "epoch": 3.53,
      "learning_rate": 3.5934101300661434e-05,
      "loss": 0.9811,
      "step": 707500
    },
    {
      "epoch": 3.54,
      "learning_rate": 3.5920256802947574e-05,
      "loss": 0.9549,
      "step": 708000
    },
    {
      "epoch": 3.54,
      "learning_rate": 3.5906384560749325e-05,
      "loss": 0.9819,
      "step": 708500
    },
    {
      "epoch": 3.54,
      "learning_rate": 3.5892512318551075e-05,
      "loss": 0.9617,
      "step": 709000
    },
    {
      "epoch": 3.54,
      "learning_rate": 3.587864007635282e-05,
      "loss": 0.9295,
      "step": 709500
    },
    {
      "epoch": 3.55,
      "learning_rate": 3.586476783415457e-05,
      "loss": 0.9468,
      "step": 710000
    },
    {
      "epoch": 3.55,
      "learning_rate": 3.585089559195632e-05,
      "loss": 0.981,
      "step": 710500
    },
    {
      "epoch": 3.55,
      "learning_rate": 3.583702334975807e-05,
      "loss": 0.9196,
      "step": 711000
    },
    {
      "epoch": 3.55,
      "learning_rate": 3.582315110755982e-05,
      "loss": 0.9435,
      "step": 711500
    },
    {
      "epoch": 3.56,
      "learning_rate": 3.580930660984597e-05,
      "loss": 0.9271,
      "step": 712000
    },
    {
      "epoch": 3.56,
      "learning_rate": 3.579546211213211e-05,
      "loss": 0.9744,
      "step": 712500
    },
    {
      "epoch": 3.56,
      "learning_rate": 3.578158986993386e-05,
      "loss": 0.9337,
      "step": 713000
    },
    {
      "epoch": 3.56,
      "learning_rate": 3.576771762773561e-05,
      "loss": 0.9646,
      "step": 713500
    },
    {
      "epoch": 3.57,
      "learning_rate": 3.575384538553735e-05,
      "loss": 0.9732,
      "step": 714000
    },
    {
      "epoch": 3.57,
      "learning_rate": 3.5740000887823506e-05,
      "loss": 0.9646,
      "step": 714500
    },
    {
      "epoch": 3.57,
      "learning_rate": 3.572612864562525e-05,
      "loss": 0.9775,
      "step": 715000
    },
    {
      "epoch": 3.57,
      "learning_rate": 3.5712256403427e-05,
      "loss": 0.9895,
      "step": 715500
    },
    {
      "epoch": 3.58,
      "learning_rate": 3.569841190571315e-05,
      "loss": 0.9603,
      "step": 716000
    },
    {
      "epoch": 3.58,
      "learning_rate": 3.568453966351489e-05,
      "loss": 0.9575,
      "step": 716500
    },
    {
      "epoch": 3.58,
      "learning_rate": 3.567066742131665e-05,
      "loss": 0.9748,
      "step": 717000
    },
    {
      "epoch": 3.58,
      "learning_rate": 3.565679517911839e-05,
      "loss": 0.9392,
      "step": 717500
    },
    {
      "epoch": 3.59,
      "learning_rate": 3.564292293692014e-05,
      "loss": 0.9656,
      "step": 718000
    },
    {
      "epoch": 3.59,
      "learning_rate": 3.5629050694721886e-05,
      "loss": 0.9681,
      "step": 718500
    },
    {
      "epoch": 3.59,
      "learning_rate": 3.561517845252364e-05,
      "loss": 0.9332,
      "step": 719000
    },
    {
      "epoch": 3.59,
      "learning_rate": 3.560130621032539e-05,
      "loss": 0.9565,
      "step": 719500
    },
    {
      "epoch": 3.6,
      "learning_rate": 3.5587461712611533e-05,
      "loss": 0.9809,
      "step": 720000
    },
    {
      "epoch": 3.6,
      "learning_rate": 3.5573589470413284e-05,
      "loss": 0.956,
      "step": 720500
    },
    {
      "epoch": 3.6,
      "learning_rate": 3.5559717228215034e-05,
      "loss": 0.9593,
      "step": 721000
    },
    {
      "epoch": 3.6,
      "learning_rate": 3.554584498601678e-05,
      "loss": 0.9493,
      "step": 721500
    },
    {
      "epoch": 3.61,
      "learning_rate": 3.553197274381853e-05,
      "loss": 0.9228,
      "step": 722000
    },
    {
      "epoch": 3.61,
      "learning_rate": 3.5518128246104675e-05,
      "loss": 0.9616,
      "step": 722500
    },
    {
      "epoch": 3.61,
      "learning_rate": 3.5504256003906426e-05,
      "loss": 0.9573,
      "step": 723000
    },
    {
      "epoch": 3.61,
      "learning_rate": 3.5490383761708176e-05,
      "loss": 0.9408,
      "step": 723500
    },
    {
      "epoch": 3.62,
      "learning_rate": 3.5476511519509926e-05,
      "loss": 0.9636,
      "step": 724000
    },
    {
      "epoch": 3.62,
      "learning_rate": 3.546266702179607e-05,
      "loss": 0.9432,
      "step": 724500
    },
    {
      "epoch": 3.62,
      "learning_rate": 3.544879477959782e-05,
      "loss": 0.9722,
      "step": 725000
    },
    {
      "epoch": 3.62,
      "learning_rate": 3.543492253739957e-05,
      "loss": 0.9547,
      "step": 725500
    },
    {
      "epoch": 3.63,
      "learning_rate": 3.542105029520131e-05,
      "loss": 0.9437,
      "step": 726000
    },
    {
      "epoch": 3.63,
      "learning_rate": 3.540717805300307e-05,
      "loss": 0.954,
      "step": 726500
    },
    {
      "epoch": 3.63,
      "learning_rate": 3.539333355528921e-05,
      "loss": 0.9634,
      "step": 727000
    },
    {
      "epoch": 3.63,
      "learning_rate": 3.537946131309096e-05,
      "loss": 0.9364,
      "step": 727500
    },
    {
      "epoch": 3.64,
      "learning_rate": 3.536558907089271e-05,
      "loss": 0.9632,
      "step": 728000
    },
    {
      "epoch": 3.64,
      "learning_rate": 3.535171682869446e-05,
      "loss": 0.9575,
      "step": 728500
    },
    {
      "epoch": 3.64,
      "learning_rate": 3.533787233098061e-05,
      "loss": 0.9726,
      "step": 729000
    },
    {
      "epoch": 3.64,
      "learning_rate": 3.532400008878235e-05,
      "loss": 0.9545,
      "step": 729500
    },
    {
      "epoch": 3.65,
      "learning_rate": 3.53101278465841e-05,
      "loss": 0.9412,
      "step": 730000
    },
    {
      "epoch": 3.65,
      "learning_rate": 3.5296255604385844e-05,
      "loss": 0.9645,
      "step": 730500
    },
    {
      "epoch": 3.65,
      "learning_rate": 3.52823833621876e-05,
      "loss": 0.9969,
      "step": 731000
    },
    {
      "epoch": 3.65,
      "learning_rate": 3.526853886447374e-05,
      "loss": 0.9559,
      "step": 731500
    },
    {
      "epoch": 3.66,
      "learning_rate": 3.525466662227549e-05,
      "loss": 0.9712,
      "step": 732000
    },
    {
      "epoch": 3.66,
      "learning_rate": 3.524079438007724e-05,
      "loss": 0.9624,
      "step": 732500
    },
    {
      "epoch": 3.66,
      "learning_rate": 3.522692213787899e-05,
      "loss": 0.9588,
      "step": 733000
    },
    {
      "epoch": 3.66,
      "learning_rate": 3.521304989568074e-05,
      "loss": 0.9414,
      "step": 733500
    },
    {
      "epoch": 3.67,
      "learning_rate": 3.5199205397966884e-05,
      "loss": 0.9481,
      "step": 734000
    },
    {
      "epoch": 3.67,
      "learning_rate": 3.5185333155768634e-05,
      "loss": 0.9475,
      "step": 734500
    },
    {
      "epoch": 3.67,
      "learning_rate": 3.5171460913570385e-05,
      "loss": 0.9941,
      "step": 735000
    },
    {
      "epoch": 3.67,
      "learning_rate": 3.5157588671372135e-05,
      "loss": 0.9529,
      "step": 735500
    },
    {
      "epoch": 3.68,
      "learning_rate": 3.5143716429173885e-05,
      "loss": 0.9926,
      "step": 736000
    },
    {
      "epoch": 3.68,
      "learning_rate": 3.512984418697563e-05,
      "loss": 0.9803,
      "step": 736500
    },
    {
      "epoch": 3.68,
      "learning_rate": 3.5115999689261776e-05,
      "loss": 0.9315,
      "step": 737000
    },
    {
      "epoch": 3.68,
      "learning_rate": 3.5102127447063527e-05,
      "loss": 0.954,
      "step": 737500
    },
    {
      "epoch": 3.69,
      "learning_rate": 3.508825520486527e-05,
      "loss": 0.9588,
      "step": 738000
    },
    {
      "epoch": 3.69,
      "learning_rate": 3.507438296266703e-05,
      "loss": 0.9252,
      "step": 738500
    },
    {
      "epoch": 3.69,
      "learning_rate": 3.506051072046877e-05,
      "loss": 0.945,
      "step": 739000
    },
    {
      "epoch": 3.69,
      "learning_rate": 3.504663847827052e-05,
      "loss": 0.9741,
      "step": 739500
    },
    {
      "epoch": 3.7,
      "learning_rate": 3.503279398055667e-05,
      "loss": 0.9472,
      "step": 740000
    },
    {
      "epoch": 3.7,
      "learning_rate": 3.501892173835842e-05,
      "loss": 0.9405,
      "step": 740500
    },
    {
      "epoch": 3.7,
      "learning_rate": 3.500504949616016e-05,
      "loss": 0.9632,
      "step": 741000
    },
    {
      "epoch": 3.7,
      "learning_rate": 3.499117725396191e-05,
      "loss": 0.9362,
      "step": 741500
    },
    {
      "epoch": 3.71,
      "learning_rate": 3.497733275624806e-05,
      "loss": 0.9455,
      "step": 742000
    },
    {
      "epoch": 3.71,
      "learning_rate": 3.4963460514049803e-05,
      "loss": 0.9369,
      "step": 742500
    },
    {
      "epoch": 3.71,
      "learning_rate": 3.494958827185156e-05,
      "loss": 0.9565,
      "step": 743000
    },
    {
      "epoch": 3.71,
      "learning_rate": 3.493571602965331e-05,
      "loss": 0.9357,
      "step": 743500
    },
    {
      "epoch": 3.72,
      "learning_rate": 3.492187153193945e-05,
      "loss": 0.9621,
      "step": 744000
    },
    {
      "epoch": 3.72,
      "learning_rate": 3.49079992897412e-05,
      "loss": 0.9717,
      "step": 744500
    },
    {
      "epoch": 3.72,
      "learning_rate": 3.489412704754295e-05,
      "loss": 0.9353,
      "step": 745000
    },
    {
      "epoch": 3.72,
      "learning_rate": 3.4880254805344696e-05,
      "loss": 0.9377,
      "step": 745500
    },
    {
      "epoch": 3.73,
      "learning_rate": 3.4866382563146446e-05,
      "loss": 0.9518,
      "step": 746000
    },
    {
      "epoch": 3.73,
      "learning_rate": 3.485253806543259e-05,
      "loss": 0.9494,
      "step": 746500
    },
    {
      "epoch": 3.73,
      "learning_rate": 3.483866582323434e-05,
      "loss": 0.9593,
      "step": 747000
    },
    {
      "epoch": 3.73,
      "learning_rate": 3.4824793581036094e-05,
      "loss": 0.967,
      "step": 747500
    },
    {
      "epoch": 3.74,
      "learning_rate": 3.4810921338837844e-05,
      "loss": 0.9495,
      "step": 748000
    },
    {
      "epoch": 3.74,
      "learning_rate": 3.4797076841123985e-05,
      "loss": 0.9496,
      "step": 748500
    },
    {
      "epoch": 3.74,
      "learning_rate": 3.4783204598925735e-05,
      "loss": 0.9739,
      "step": 749000
    },
    {
      "epoch": 3.74,
      "learning_rate": 3.4769332356727485e-05,
      "loss": 0.9679,
      "step": 749500
    },
    {
      "epoch": 3.75,
      "learning_rate": 3.475546011452923e-05,
      "loss": 0.959,
      "step": 750000
    },
    {
      "epoch": 3.75,
      "learning_rate": 3.4741615616815376e-05,
      "loss": 0.9738,
      "step": 750500
    },
    {
      "epoch": 3.75,
      "learning_rate": 3.4727743374617127e-05,
      "loss": 0.9623,
      "step": 751000
    },
    {
      "epoch": 3.75,
      "learning_rate": 3.471387113241888e-05,
      "loss": 0.9706,
      "step": 751500
    },
    {
      "epoch": 3.76,
      "learning_rate": 3.469999889022063e-05,
      "loss": 0.9221,
      "step": 752000
    },
    {
      "epoch": 3.76,
      "learning_rate": 3.468618213699117e-05,
      "loss": 0.9642,
      "step": 752500
    },
    {
      "epoch": 3.76,
      "learning_rate": 3.4672309894792915e-05,
      "loss": 0.9569,
      "step": 753000
    },
    {
      "epoch": 3.76,
      "learning_rate": 3.4658437652594665e-05,
      "loss": 0.9559,
      "step": 753500
    },
    {
      "epoch": 3.77,
      "learning_rate": 3.4644565410396416e-05,
      "loss": 0.9537,
      "step": 754000
    },
    {
      "epoch": 3.77,
      "learning_rate": 3.4630693168198166e-05,
      "loss": 0.9646,
      "step": 754500
    },
    {
      "epoch": 3.77,
      "learning_rate": 3.4616820925999916e-05,
      "loss": 0.9562,
      "step": 755000
    },
    {
      "epoch": 3.77,
      "learning_rate": 3.4602976428286064e-05,
      "loss": 0.9569,
      "step": 755500
    },
    {
      "epoch": 3.78,
      "learning_rate": 3.458910418608781e-05,
      "loss": 0.9659,
      "step": 756000
    },
    {
      "epoch": 3.78,
      "learning_rate": 3.457523194388956e-05,
      "loss": 0.9381,
      "step": 756500
    },
    {
      "epoch": 3.78,
      "learning_rate": 3.45613597016913e-05,
      "loss": 0.9842,
      "step": 757000
    },
    {
      "epoch": 3.78,
      "learning_rate": 3.454748745949306e-05,
      "loss": 0.9599,
      "step": 757500
    },
    {
      "epoch": 3.79,
      "learning_rate": 3.45336152172948e-05,
      "loss": 0.9537,
      "step": 758000
    },
    {
      "epoch": 3.79,
      "learning_rate": 3.451974297509655e-05,
      "loss": 0.9676,
      "step": 758500
    },
    {
      "epoch": 3.79,
      "learning_rate": 3.4505870732898296e-05,
      "loss": 0.9722,
      "step": 759000
    },
    {
      "epoch": 3.79,
      "learning_rate": 3.449199849070005e-05,
      "loss": 0.9518,
      "step": 759500
    },
    {
      "epoch": 3.8,
      "learning_rate": 3.447815399298619e-05,
      "loss": 0.977,
      "step": 760000
    },
    {
      "epoch": 3.8,
      "learning_rate": 3.4464281750787944e-05,
      "loss": 0.9497,
      "step": 760500
    },
    {
      "epoch": 3.8,
      "learning_rate": 3.4450409508589694e-05,
      "loss": 0.9554,
      "step": 761000
    },
    {
      "epoch": 3.8,
      "learning_rate": 3.4436537266391444e-05,
      "loss": 0.9666,
      "step": 761500
    },
    {
      "epoch": 3.81,
      "learning_rate": 3.442269276867759e-05,
      "loss": 0.9768,
      "step": 762000
    },
    {
      "epoch": 3.81,
      "learning_rate": 3.4408820526479335e-05,
      "loss": 0.9498,
      "step": 762500
    },
    {
      "epoch": 3.81,
      "learning_rate": 3.4394948284281086e-05,
      "loss": 0.9459,
      "step": 763000
    },
    {
      "epoch": 3.81,
      "learning_rate": 3.4381076042082836e-05,
      "loss": 0.9512,
      "step": 763500
    },
    {
      "epoch": 3.82,
      "learning_rate": 3.4367203799884586e-05,
      "loss": 0.9579,
      "step": 764000
    },
    {
      "epoch": 3.82,
      "learning_rate": 3.435335930217073e-05,
      "loss": 0.9714,
      "step": 764500
    },
    {
      "epoch": 3.82,
      "learning_rate": 3.4339487059972484e-05,
      "loss": 0.9656,
      "step": 765000
    },
    {
      "epoch": 3.82,
      "learning_rate": 3.432561481777423e-05,
      "loss": 0.9499,
      "step": 765500
    },
    {
      "epoch": 3.83,
      "learning_rate": 3.431174257557598e-05,
      "loss": 0.9318,
      "step": 766000
    },
    {
      "epoch": 3.83,
      "learning_rate": 3.4297898077862125e-05,
      "loss": 0.9827,
      "step": 766500
    },
    {
      "epoch": 3.83,
      "learning_rate": 3.4284025835663875e-05,
      "loss": 0.9511,
      "step": 767000
    },
    {
      "epoch": 3.83,
      "learning_rate": 3.427015359346562e-05,
      "loss": 0.9603,
      "step": 767500
    },
    {
      "epoch": 3.84,
      "learning_rate": 3.425628135126737e-05,
      "loss": 0.951,
      "step": 768000
    },
    {
      "epoch": 3.84,
      "learning_rate": 3.424240910906912e-05,
      "loss": 0.9724,
      "step": 768500
    },
    {
      "epoch": 3.84,
      "learning_rate": 3.422856461135526e-05,
      "loss": 0.9763,
      "step": 769000
    },
    {
      "epoch": 3.84,
      "learning_rate": 3.421469236915702e-05,
      "loss": 0.9636,
      "step": 769500
    },
    {
      "epoch": 3.85,
      "learning_rate": 3.420082012695876e-05,
      "loss": 0.9942,
      "step": 770000
    },
    {
      "epoch": 3.85,
      "learning_rate": 3.418694788476051e-05,
      "loss": 0.942,
      "step": 770500
    },
    {
      "epoch": 3.85,
      "learning_rate": 3.417310338704666e-05,
      "loss": 0.974,
      "step": 771000
    },
    {
      "epoch": 3.85,
      "learning_rate": 3.415923114484841e-05,
      "loss": 0.9388,
      "step": 771500
    },
    {
      "epoch": 3.86,
      "learning_rate": 3.414535890265015e-05,
      "loss": 0.9647,
      "step": 772000
    },
    {
      "epoch": 3.86,
      "learning_rate": 3.41314866604519e-05,
      "loss": 0.9807,
      "step": 772500
    },
    {
      "epoch": 3.86,
      "learning_rate": 3.411761441825365e-05,
      "loss": 0.9684,
      "step": 773000
    },
    {
      "epoch": 3.86,
      "learning_rate": 3.4103769920539793e-05,
      "loss": 0.9738,
      "step": 773500
    },
    {
      "epoch": 3.87,
      "learning_rate": 3.408989767834155e-05,
      "loss": 0.968,
      "step": 774000
    },
    {
      "epoch": 3.87,
      "learning_rate": 3.4076025436143294e-05,
      "loss": 0.9681,
      "step": 774500
    },
    {
      "epoch": 3.87,
      "learning_rate": 3.4062153193945045e-05,
      "loss": 0.9416,
      "step": 775000
    },
    {
      "epoch": 3.87,
      "learning_rate": 3.4048280951746795e-05,
      "loss": 0.9688,
      "step": 775500
    },
    {
      "epoch": 3.88,
      "learning_rate": 3.4034408709548545e-05,
      "loss": 0.9923,
      "step": 776000
    },
    {
      "epoch": 3.88,
      "learning_rate": 3.4020536467350296e-05,
      "loss": 0.9392,
      "step": 776500
    },
    {
      "epoch": 3.88,
      "learning_rate": 3.400669196963644e-05,
      "loss": 0.9497,
      "step": 777000
    },
    {
      "epoch": 3.88,
      "learning_rate": 3.3992819727438186e-05,
      "loss": 0.9623,
      "step": 777500
    },
    {
      "epoch": 3.89,
      "learning_rate": 3.397894748523994e-05,
      "loss": 0.9729,
      "step": 778000
    },
    {
      "epoch": 3.89,
      "learning_rate": 3.396507524304168e-05,
      "loss": 0.9836,
      "step": 778500
    },
    {
      "epoch": 3.89,
      "learning_rate": 3.395120300084344e-05,
      "loss": 0.9671,
      "step": 779000
    },
    {
      "epoch": 3.89,
      "learning_rate": 3.393733075864518e-05,
      "loss": 0.9249,
      "step": 779500
    },
    {
      "epoch": 3.9,
      "learning_rate": 3.392348626093133e-05,
      "loss": 0.9523,
      "step": 780000
    },
    {
      "epoch": 3.9,
      "learning_rate": 3.390961401873308e-05,
      "loss": 0.9401,
      "step": 780500
    },
    {
      "epoch": 3.9,
      "learning_rate": 3.389574177653483e-05,
      "loss": 0.9747,
      "step": 781000
    },
    {
      "epoch": 3.9,
      "learning_rate": 3.388186953433657e-05,
      "loss": 0.9713,
      "step": 781500
    },
    {
      "epoch": 3.91,
      "learning_rate": 3.386802503662272e-05,
      "loss": 0.9588,
      "step": 782000
    },
    {
      "epoch": 3.91,
      "learning_rate": 3.385415279442447e-05,
      "loss": 0.9721,
      "step": 782500
    },
    {
      "epoch": 3.91,
      "learning_rate": 3.3840280552226214e-05,
      "loss": 0.9743,
      "step": 783000
    },
    {
      "epoch": 3.91,
      "learning_rate": 3.382640831002797e-05,
      "loss": 0.9522,
      "step": 783500
    },
    {
      "epoch": 3.92,
      "learning_rate": 3.381253606782972e-05,
      "loss": 0.9883,
      "step": 784000
    },
    {
      "epoch": 3.92,
      "learning_rate": 3.379869157011586e-05,
      "loss": 0.9876,
      "step": 784500
    },
    {
      "epoch": 3.92,
      "learning_rate": 3.378481932791761e-05,
      "loss": 0.9464,
      "step": 785000
    },
    {
      "epoch": 3.92,
      "learning_rate": 3.377094708571936e-05,
      "loss": 0.9795,
      "step": 785500
    },
    {
      "epoch": 3.93,
      "learning_rate": 3.3757074843521106e-05,
      "loss": 0.9886,
      "step": 786000
    },
    {
      "epoch": 3.93,
      "learning_rate": 3.374320260132286e-05,
      "loss": 0.968,
      "step": 786500
    },
    {
      "epoch": 3.93,
      "learning_rate": 3.3729358103609003e-05,
      "loss": 0.9361,
      "step": 787000
    },
    {
      "epoch": 3.93,
      "learning_rate": 3.3715485861410754e-05,
      "loss": 0.9615,
      "step": 787500
    },
    {
      "epoch": 3.94,
      "learning_rate": 3.3701613619212504e-05,
      "loss": 0.9669,
      "step": 788000
    },
    {
      "epoch": 3.94,
      "learning_rate": 3.3687741377014255e-05,
      "loss": 0.9435,
      "step": 788500
    },
    {
      "epoch": 3.94,
      "learning_rate": 3.3673869134816e-05,
      "loss": 0.9702,
      "step": 789000
    },
    {
      "epoch": 3.94,
      "learning_rate": 3.3660024637102145e-05,
      "loss": 0.9573,
      "step": 789500
    },
    {
      "epoch": 3.95,
      "learning_rate": 3.3646152394903896e-05,
      "loss": 0.9893,
      "step": 790000
    },
    {
      "epoch": 3.95,
      "learning_rate": 3.363228015270564e-05,
      "loss": 0.9359,
      "step": 790500
    },
    {
      "epoch": 3.95,
      "learning_rate": 3.3618407910507396e-05,
      "loss": 0.9614,
      "step": 791000
    },
    {
      "epoch": 3.95,
      "learning_rate": 3.360453566830914e-05,
      "loss": 0.9161,
      "step": 791500
    },
    {
      "epoch": 3.96,
      "learning_rate": 3.359066342611089e-05,
      "loss": 0.9249,
      "step": 792000
    },
    {
      "epoch": 3.96,
      "learning_rate": 3.357681892839704e-05,
      "loss": 0.9503,
      "step": 792500
    },
    {
      "epoch": 3.96,
      "learning_rate": 3.356297443068318e-05,
      "loss": 0.9393,
      "step": 793000
    },
    {
      "epoch": 3.96,
      "learning_rate": 3.3549102188484935e-05,
      "loss": 0.9643,
      "step": 793500
    },
    {
      "epoch": 3.97,
      "learning_rate": 3.353522994628668e-05,
      "loss": 0.9567,
      "step": 794000
    },
    {
      "epoch": 3.97,
      "learning_rate": 3.352135770408843e-05,
      "loss": 0.9664,
      "step": 794500
    },
    {
      "epoch": 3.97,
      "learning_rate": 3.350748546189017e-05,
      "loss": 0.9547,
      "step": 795000
    },
    {
      "epoch": 3.97,
      "learning_rate": 3.349361321969193e-05,
      "loss": 0.9509,
      "step": 795500
    },
    {
      "epoch": 3.98,
      "learning_rate": 3.347976872197807e-05,
      "loss": 0.934,
      "step": 796000
    },
    {
      "epoch": 3.98,
      "learning_rate": 3.346589647977982e-05,
      "loss": 0.966,
      "step": 796500
    },
    {
      "epoch": 3.98,
      "learning_rate": 3.345202423758157e-05,
      "loss": 0.9433,
      "step": 797000
    },
    {
      "epoch": 3.98,
      "learning_rate": 3.343815199538332e-05,
      "loss": 0.9208,
      "step": 797500
    },
    {
      "epoch": 3.99,
      "learning_rate": 3.3424279753185065e-05,
      "loss": 0.9883,
      "step": 798000
    },
    {
      "epoch": 3.99,
      "learning_rate": 3.341040751098682e-05,
      "loss": 0.9474,
      "step": 798500
    },
    {
      "epoch": 3.99,
      "learning_rate": 3.3396535268788566e-05,
      "loss": 0.9681,
      "step": 799000
    },
    {
      "epoch": 3.99,
      "learning_rate": 3.3382663026590316e-05,
      "loss": 0.9499,
      "step": 799500
    },
    {
      "epoch": 4.0,
      "learning_rate": 3.336881852887646e-05,
      "loss": 0.967,
      "step": 800000
    },
    {
      "epoch": 4.0,
      "learning_rate": 3.3354946286678214e-05,
      "loss": 0.958,
      "step": 800500
    },
    {
      "epoch": 4.0,
      "eval_loss": 1.1211416721343994,
      "eval_runtime": 2618.6075,
      "eval_samples_per_second": 44.489,
      "eval_steps_per_second": 11.122,
      "step": 800960
    },
    {
      "epoch": 4.0,
      "learning_rate": 3.334107404447996e-05,
      "loss": 0.9451,
      "step": 801000
    },
    {
      "epoch": 4.0,
      "learning_rate": 3.332720180228171e-05,
      "loss": 0.7567,
      "step": 801500
    },
    {
      "epoch": 4.01,
      "learning_rate": 3.331332956008346e-05,
      "loss": 0.7645,
      "step": 802000
    },
    {
      "epoch": 4.01,
      "learning_rate": 3.32994850623696e-05,
      "loss": 0.7537,
      "step": 802500
    },
    {
      "epoch": 4.01,
      "learning_rate": 3.3285612820171355e-05,
      "loss": 0.7549,
      "step": 803000
    },
    {
      "epoch": 4.01,
      "learning_rate": 3.32717405779731e-05,
      "loss": 0.7726,
      "step": 803500
    },
    {
      "epoch": 4.02,
      "learning_rate": 3.3257896080259246e-05,
      "loss": 0.7614,
      "step": 804000
    },
    {
      "epoch": 4.02,
      "learning_rate": 3.3244023838060997e-05,
      "loss": 0.7549,
      "step": 804500
    },
    {
      "epoch": 4.02,
      "learning_rate": 3.323015159586275e-05,
      "loss": 0.7834,
      "step": 805000
    },
    {
      "epoch": 4.02,
      "learning_rate": 3.321627935366449e-05,
      "loss": 0.782,
      "step": 805500
    },
    {
      "epoch": 4.03,
      "learning_rate": 3.320240711146624e-05,
      "loss": 0.7667,
      "step": 806000
    },
    {
      "epoch": 4.03,
      "learning_rate": 3.318853486926799e-05,
      "loss": 0.766,
      "step": 806500
    },
    {
      "epoch": 4.03,
      "learning_rate": 3.317466262706974e-05,
      "loss": 0.7635,
      "step": 807000
    },
    {
      "epoch": 4.03,
      "learning_rate": 3.3160790384871485e-05,
      "loss": 0.7994,
      "step": 807500
    },
    {
      "epoch": 4.04,
      "learning_rate": 3.314691814267324e-05,
      "loss": 0.7706,
      "step": 808000
    },
    {
      "epoch": 4.04,
      "learning_rate": 3.313307364495938e-05,
      "loss": 0.7653,
      "step": 808500
    },
    {
      "epoch": 4.04,
      "learning_rate": 3.311920140276113e-05,
      "loss": 0.7316,
      "step": 809000
    },
    {
      "epoch": 4.04,
      "learning_rate": 3.3105329160562883e-05,
      "loss": 0.7481,
      "step": 809500
    },
    {
      "epoch": 4.05,
      "learning_rate": 3.3091456918364634e-05,
      "loss": 0.7586,
      "step": 810000
    },
    {
      "epoch": 4.05,
      "learning_rate": 3.307758467616638e-05,
      "loss": 0.7432,
      "step": 810500
    },
    {
      "epoch": 4.05,
      "learning_rate": 3.306371243396813e-05,
      "loss": 0.7403,
      "step": 811000
    },
    {
      "epoch": 4.05,
      "learning_rate": 3.304984019176988e-05,
      "loss": 0.802,
      "step": 811500
    },
    {
      "epoch": 4.06,
      "learning_rate": 3.303596794957163e-05,
      "loss": 0.772,
      "step": 812000
    },
    {
      "epoch": 4.06,
      "learning_rate": 3.302215119634217e-05,
      "loss": 0.7436,
      "step": 812500
    },
    {
      "epoch": 4.06,
      "learning_rate": 3.3008278954143916e-05,
      "loss": 0.761,
      "step": 813000
    },
    {
      "epoch": 4.06,
      "learning_rate": 3.2994406711945666e-05,
      "loss": 0.7815,
      "step": 813500
    },
    {
      "epoch": 4.07,
      "learning_rate": 3.298053446974742e-05,
      "loss": 0.7364,
      "step": 814000
    },
    {
      "epoch": 4.07,
      "learning_rate": 3.296666222754917e-05,
      "loss": 0.7529,
      "step": 814500
    },
    {
      "epoch": 4.07,
      "learning_rate": 3.295278998535091e-05,
      "loss": 0.7633,
      "step": 815000
    },
    {
      "epoch": 4.07,
      "learning_rate": 3.293894548763706e-05,
      "loss": 0.7741,
      "step": 815500
    },
    {
      "epoch": 4.08,
      "learning_rate": 3.292507324543881e-05,
      "loss": 0.7898,
      "step": 816000
    },
    {
      "epoch": 4.08,
      "learning_rate": 3.291120100324055e-05,
      "loss": 0.7595,
      "step": 816500
    },
    {
      "epoch": 4.08,
      "learning_rate": 3.289732876104231e-05,
      "loss": 0.7977,
      "step": 817000
    },
    {
      "epoch": 4.08,
      "learning_rate": 3.288345651884406e-05,
      "loss": 0.7821,
      "step": 817500
    },
    {
      "epoch": 4.09,
      "learning_rate": 3.28696120211302e-05,
      "loss": 0.7867,
      "step": 818000
    },
    {
      "epoch": 4.09,
      "learning_rate": 3.285573977893195e-05,
      "loss": 0.7922,
      "step": 818500
    },
    {
      "epoch": 4.09,
      "learning_rate": 3.28418675367337e-05,
      "loss": 0.7953,
      "step": 819000
    },
    {
      "epoch": 4.09,
      "learning_rate": 3.2827995294535444e-05,
      "loss": 0.7751,
      "step": 819500
    },
    {
      "epoch": 4.1,
      "learning_rate": 3.2814123052337194e-05,
      "loss": 0.7861,
      "step": 820000
    },
    {
      "epoch": 4.1,
      "learning_rate": 3.280027855462334e-05,
      "loss": 0.7771,
      "step": 820500
    },
    {
      "epoch": 4.1,
      "learning_rate": 3.278640631242509e-05,
      "loss": 0.7827,
      "step": 821000
    },
    {
      "epoch": 4.1,
      "learning_rate": 3.277253407022684e-05,
      "loss": 0.7778,
      "step": 821500
    },
    {
      "epoch": 4.11,
      "learning_rate": 3.275866182802859e-05,
      "loss": 0.7942,
      "step": 822000
    },
    {
      "epoch": 4.11,
      "learning_rate": 3.2744789585830336e-05,
      "loss": 0.7961,
      "step": 822500
    },
    {
      "epoch": 4.11,
      "learning_rate": 3.2730945088116484e-05,
      "loss": 0.7775,
      "step": 823000
    },
    {
      "epoch": 4.11,
      "learning_rate": 3.2717072845918234e-05,
      "loss": 0.8011,
      "step": 823500
    },
    {
      "epoch": 4.12,
      "learning_rate": 3.270320060371998e-05,
      "loss": 0.7441,
      "step": 824000
    },
    {
      "epoch": 4.12,
      "learning_rate": 3.2689328361521735e-05,
      "loss": 0.7635,
      "step": 824500
    },
    {
      "epoch": 4.12,
      "learning_rate": 3.267545611932348e-05,
      "loss": 0.7849,
      "step": 825000
    },
    {
      "epoch": 4.12,
      "learning_rate": 3.2661611621609625e-05,
      "loss": 0.7931,
      "step": 825500
    },
    {
      "epoch": 4.13,
      "learning_rate": 3.2647739379411376e-05,
      "loss": 0.7893,
      "step": 826000
    },
    {
      "epoch": 4.13,
      "learning_rate": 3.2633867137213126e-05,
      "loss": 0.767,
      "step": 826500
    },
    {
      "epoch": 4.13,
      "learning_rate": 3.261999489501487e-05,
      "loss": 0.8096,
      "step": 827000
    },
    {
      "epoch": 4.13,
      "learning_rate": 3.260615039730102e-05,
      "loss": 0.7847,
      "step": 827500
    },
    {
      "epoch": 4.14,
      "learning_rate": 3.259227815510277e-05,
      "loss": 0.7966,
      "step": 828000
    },
    {
      "epoch": 4.14,
      "learning_rate": 3.257840591290451e-05,
      "loss": 0.7554,
      "step": 828500
    },
    {
      "epoch": 4.14,
      "learning_rate": 3.256453367070627e-05,
      "loss": 0.8046,
      "step": 829000
    },
    {
      "epoch": 4.14,
      "learning_rate": 3.255066142850802e-05,
      "loss": 0.7922,
      "step": 829500
    },
    {
      "epoch": 4.15,
      "learning_rate": 3.253681693079416e-05,
      "loss": 0.7857,
      "step": 830000
    },
    {
      "epoch": 4.15,
      "learning_rate": 3.252294468859591e-05,
      "loss": 0.7793,
      "step": 830500
    },
    {
      "epoch": 4.15,
      "learning_rate": 3.250907244639766e-05,
      "loss": 0.7799,
      "step": 831000
    },
    {
      "epoch": 4.15,
      "learning_rate": 3.24952002041994e-05,
      "loss": 0.8033,
      "step": 831500
    },
    {
      "epoch": 4.16,
      "learning_rate": 3.248135570648555e-05,
      "loss": 0.7978,
      "step": 832000
    },
    {
      "epoch": 4.16,
      "learning_rate": 3.24674834642873e-05,
      "loss": 0.78,
      "step": 832500
    },
    {
      "epoch": 4.16,
      "learning_rate": 3.2453611222089044e-05,
      "loss": 0.7705,
      "step": 833000
    },
    {
      "epoch": 4.16,
      "learning_rate": 3.24397389798908e-05,
      "loss": 0.7784,
      "step": 833500
    },
    {
      "epoch": 4.17,
      "learning_rate": 3.242586673769255e-05,
      "loss": 0.7951,
      "step": 834000
    },
    {
      "epoch": 4.17,
      "learning_rate": 3.241202223997869e-05,
      "loss": 0.8211,
      "step": 834500
    },
    {
      "epoch": 4.17,
      "learning_rate": 3.239814999778044e-05,
      "loss": 0.8055,
      "step": 835000
    },
    {
      "epoch": 4.17,
      "learning_rate": 3.238427775558219e-05,
      "loss": 0.7712,
      "step": 835500
    },
    {
      "epoch": 4.17,
      "learning_rate": 3.2370405513383936e-05,
      "loss": 0.8143,
      "step": 836000
    },
    {
      "epoch": 4.18,
      "learning_rate": 3.2356533271185694e-05,
      "loss": 0.8101,
      "step": 836500
    },
    {
      "epoch": 4.18,
      "learning_rate": 3.2342688773471834e-05,
      "loss": 0.756,
      "step": 837000
    },
    {
      "epoch": 4.18,
      "learning_rate": 3.2328816531273584e-05,
      "loss": 0.7973,
      "step": 837500
    },
    {
      "epoch": 4.18,
      "learning_rate": 3.2314944289075335e-05,
      "loss": 0.7874,
      "step": 838000
    },
    {
      "epoch": 4.19,
      "learning_rate": 3.2301099791361475e-05,
      "loss": 0.794,
      "step": 838500
    },
    {
      "epoch": 4.19,
      "learning_rate": 3.228722754916323e-05,
      "loss": 0.8058,
      "step": 839000
    },
    {
      "epoch": 4.19,
      "learning_rate": 3.2273355306964976e-05,
      "loss": 0.8026,
      "step": 839500
    },
    {
      "epoch": 4.19,
      "learning_rate": 3.2259483064766726e-05,
      "loss": 0.7931,
      "step": 840000
    },
    {
      "epoch": 4.2,
      "learning_rate": 3.224561082256847e-05,
      "loss": 0.7854,
      "step": 840500
    },
    {
      "epoch": 4.2,
      "learning_rate": 3.223173858037023e-05,
      "loss": 0.8166,
      "step": 841000
    },
    {
      "epoch": 4.2,
      "learning_rate": 3.221786633817197e-05,
      "loss": 0.7869,
      "step": 841500
    },
    {
      "epoch": 4.2,
      "learning_rate": 3.220399409597372e-05,
      "loss": 0.7911,
      "step": 842000
    },
    {
      "epoch": 4.21,
      "learning_rate": 3.219012185377547e-05,
      "loss": 0.7906,
      "step": 842500
    },
    {
      "epoch": 4.21,
      "learning_rate": 3.217624961157722e-05,
      "loss": 0.8091,
      "step": 843000
    },
    {
      "epoch": 4.21,
      "learning_rate": 3.216240511386336e-05,
      "loss": 0.794,
      "step": 843500
    },
    {
      "epoch": 4.21,
      "learning_rate": 3.214853287166511e-05,
      "loss": 0.7849,
      "step": 844000
    },
    {
      "epoch": 4.22,
      "learning_rate": 3.213466062946686e-05,
      "loss": 0.7999,
      "step": 844500
    },
    {
      "epoch": 4.22,
      "learning_rate": 3.2120816131753e-05,
      "loss": 0.7864,
      "step": 845000
    },
    {
      "epoch": 4.22,
      "learning_rate": 3.210694388955476e-05,
      "loss": 0.8044,
      "step": 845500
    },
    {
      "epoch": 4.22,
      "learning_rate": 3.209307164735651e-05,
      "loss": 0.7934,
      "step": 846000
    },
    {
      "epoch": 4.23,
      "learning_rate": 3.2079199405158254e-05,
      "loss": 0.7901,
      "step": 846500
    },
    {
      "epoch": 4.23,
      "learning_rate": 3.2065327162960005e-05,
      "loss": 0.8011,
      "step": 847000
    },
    {
      "epoch": 4.23,
      "learning_rate": 3.2051454920761755e-05,
      "loss": 0.8196,
      "step": 847500
    },
    {
      "epoch": 4.23,
      "learning_rate": 3.2037582678563505e-05,
      "loss": 0.7792,
      "step": 848000
    },
    {
      "epoch": 4.24,
      "learning_rate": 3.202371043636525e-05,
      "loss": 0.8133,
      "step": 848500
    },
    {
      "epoch": 4.24,
      "learning_rate": 3.2009865938651396e-05,
      "loss": 0.8091,
      "step": 849000
    },
    {
      "epoch": 4.24,
      "learning_rate": 3.1995993696453146e-05,
      "loss": 0.7698,
      "step": 849500
    },
    {
      "epoch": 4.24,
      "learning_rate": 3.198212145425489e-05,
      "loss": 0.8078,
      "step": 850000
    },
    {
      "epoch": 4.25,
      "learning_rate": 3.196824921205665e-05,
      "loss": 0.8048,
      "step": 850500
    },
    {
      "epoch": 4.25,
      "learning_rate": 3.19543769698584e-05,
      "loss": 0.8061,
      "step": 851000
    },
    {
      "epoch": 4.25,
      "learning_rate": 3.194053247214454e-05,
      "loss": 0.7961,
      "step": 851500
    },
    {
      "epoch": 4.25,
      "learning_rate": 3.192666022994629e-05,
      "loss": 0.8158,
      "step": 852000
    },
    {
      "epoch": 4.26,
      "learning_rate": 3.191278798774804e-05,
      "loss": 0.8026,
      "step": 852500
    },
    {
      "epoch": 4.26,
      "learning_rate": 3.189891574554978e-05,
      "loss": 0.7919,
      "step": 853000
    },
    {
      "epoch": 4.26,
      "learning_rate": 3.188504350335153e-05,
      "loss": 0.7896,
      "step": 853500
    },
    {
      "epoch": 4.26,
      "learning_rate": 3.187117126115328e-05,
      "loss": 0.7847,
      "step": 854000
    },
    {
      "epoch": 4.27,
      "learning_rate": 3.185732676343943e-05,
      "loss": 0.8135,
      "step": 854500
    },
    {
      "epoch": 4.27,
      "learning_rate": 3.184345452124118e-05,
      "loss": 0.7805,
      "step": 855000
    },
    {
      "epoch": 4.27,
      "learning_rate": 3.182958227904293e-05,
      "loss": 0.8274,
      "step": 855500
    },
    {
      "epoch": 4.27,
      "learning_rate": 3.1815710036844675e-05,
      "loss": 0.8311,
      "step": 856000
    },
    {
      "epoch": 4.28,
      "learning_rate": 3.1801837794646425e-05,
      "loss": 0.7755,
      "step": 856500
    },
    {
      "epoch": 4.28,
      "learning_rate": 3.178799329693257e-05,
      "loss": 0.7917,
      "step": 857000
    },
    {
      "epoch": 4.28,
      "learning_rate": 3.1774121054734316e-05,
      "loss": 0.7885,
      "step": 857500
    },
    {
      "epoch": 4.28,
      "learning_rate": 3.176024881253607e-05,
      "loss": 0.8066,
      "step": 858000
    },
    {
      "epoch": 4.29,
      "learning_rate": 3.1746376570337816e-05,
      "loss": 0.812,
      "step": 858500
    },
    {
      "epoch": 4.29,
      "learning_rate": 3.173250432813957e-05,
      "loss": 0.789,
      "step": 859000
    },
    {
      "epoch": 4.29,
      "learning_rate": 3.1718659830425714e-05,
      "loss": 0.7939,
      "step": 859500
    },
    {
      "epoch": 4.29,
      "learning_rate": 3.1704787588227464e-05,
      "loss": 0.8163,
      "step": 860000
    },
    {
      "epoch": 4.3,
      "learning_rate": 3.169091534602921e-05,
      "loss": 0.7945,
      "step": 860500
    },
    {
      "epoch": 4.3,
      "learning_rate": 3.167704310383096e-05,
      "loss": 0.8203,
      "step": 861000
    },
    {
      "epoch": 4.3,
      "learning_rate": 3.1663198606117105e-05,
      "loss": 0.8033,
      "step": 861500
    },
    {
      "epoch": 4.3,
      "learning_rate": 3.164932636391885e-05,
      "loss": 0.8127,
      "step": 862000
    },
    {
      "epoch": 4.31,
      "learning_rate": 3.1635454121720606e-05,
      "loss": 0.7923,
      "step": 862500
    },
    {
      "epoch": 4.31,
      "learning_rate": 3.1621581879522357e-05,
      "loss": 0.8071,
      "step": 863000
    },
    {
      "epoch": 4.31,
      "learning_rate": 3.16077373818085e-05,
      "loss": 0.8308,
      "step": 863500
    },
    {
      "epoch": 4.31,
      "learning_rate": 3.159386513961025e-05,
      "loss": 0.8438,
      "step": 864000
    },
    {
      "epoch": 4.32,
      "learning_rate": 3.1579992897412e-05,
      "loss": 0.7915,
      "step": 864500
    },
    {
      "epoch": 4.32,
      "learning_rate": 3.156612065521374e-05,
      "loss": 0.7914,
      "step": 865000
    },
    {
      "epoch": 4.32,
      "learning_rate": 3.155224841301549e-05,
      "loss": 0.8056,
      "step": 865500
    },
    {
      "epoch": 4.32,
      "learning_rate": 3.153837617081724e-05,
      "loss": 0.8192,
      "step": 866000
    },
    {
      "epoch": 4.33,
      "learning_rate": 3.152453167310339e-05,
      "loss": 0.8376,
      "step": 866500
    },
    {
      "epoch": 4.33,
      "learning_rate": 3.151065943090514e-05,
      "loss": 0.8012,
      "step": 867000
    },
    {
      "epoch": 4.33,
      "learning_rate": 3.149678718870689e-05,
      "loss": 0.8178,
      "step": 867500
    },
    {
      "epoch": 4.33,
      "learning_rate": 3.1482914946508633e-05,
      "loss": 0.784,
      "step": 868000
    },
    {
      "epoch": 4.34,
      "learning_rate": 3.1469042704310384e-05,
      "loss": 0.833,
      "step": 868500
    },
    {
      "epoch": 4.34,
      "learning_rate": 3.1455170462112134e-05,
      "loss": 0.8075,
      "step": 869000
    },
    {
      "epoch": 4.34,
      "learning_rate": 3.1441325964398275e-05,
      "loss": 0.8037,
      "step": 869500
    },
    {
      "epoch": 4.34,
      "learning_rate": 3.142745372220003e-05,
      "loss": 0.814,
      "step": 870000
    },
    {
      "epoch": 4.35,
      "learning_rate": 3.1413581480001775e-05,
      "loss": 0.8211,
      "step": 870500
    },
    {
      "epoch": 4.35,
      "learning_rate": 3.1399709237803526e-05,
      "loss": 0.7995,
      "step": 871000
    },
    {
      "epoch": 4.35,
      "learning_rate": 3.138586474008967e-05,
      "loss": 0.8193,
      "step": 871500
    },
    {
      "epoch": 4.35,
      "learning_rate": 3.137199249789142e-05,
      "loss": 0.8085,
      "step": 872000
    },
    {
      "epoch": 4.36,
      "learning_rate": 3.135812025569317e-05,
      "loss": 0.8061,
      "step": 872500
    },
    {
      "epoch": 4.36,
      "learning_rate": 3.134424801349492e-05,
      "loss": 0.7964,
      "step": 873000
    },
    {
      "epoch": 4.36,
      "learning_rate": 3.133037577129667e-05,
      "loss": 0.8011,
      "step": 873500
    },
    {
      "epoch": 4.36,
      "learning_rate": 3.131650352909842e-05,
      "loss": 0.794,
      "step": 874000
    },
    {
      "epoch": 4.37,
      "learning_rate": 3.1302659031384565e-05,
      "loss": 0.8218,
      "step": 874500
    },
    {
      "epoch": 4.37,
      "learning_rate": 3.1288814533670706e-05,
      "loss": 0.7945,
      "step": 875000
    },
    {
      "epoch": 4.37,
      "learning_rate": 3.1274942291472456e-05,
      "loss": 0.8122,
      "step": 875500
    },
    {
      "epoch": 4.37,
      "learning_rate": 3.1261070049274206e-05,
      "loss": 0.83,
      "step": 876000
    },
    {
      "epoch": 4.38,
      "learning_rate": 3.124719780707596e-05,
      "loss": 0.8119,
      "step": 876500
    },
    {
      "epoch": 4.38,
      "learning_rate": 3.12333255648777e-05,
      "loss": 0.7982,
      "step": 877000
    },
    {
      "epoch": 4.38,
      "learning_rate": 3.121945332267945e-05,
      "loss": 0.8158,
      "step": 877500
    },
    {
      "epoch": 4.38,
      "learning_rate": 3.12055810804812e-05,
      "loss": 0.8019,
      "step": 878000
    },
    {
      "epoch": 4.39,
      "learning_rate": 3.119170883828295e-05,
      "loss": 0.8406,
      "step": 878500
    },
    {
      "epoch": 4.39,
      "learning_rate": 3.11778643405691e-05,
      "loss": 0.8006,
      "step": 879000
    },
    {
      "epoch": 4.39,
      "learning_rate": 3.116399209837085e-05,
      "loss": 0.8371,
      "step": 879500
    },
    {
      "epoch": 4.39,
      "learning_rate": 3.115011985617259e-05,
      "loss": 0.7991,
      "step": 880000
    },
    {
      "epoch": 4.4,
      "learning_rate": 3.113627535845874e-05,
      "loss": 0.8229,
      "step": 880500
    },
    {
      "epoch": 4.4,
      "learning_rate": 3.112240311626049e-05,
      "loss": 0.7984,
      "step": 881000
    },
    {
      "epoch": 4.4,
      "learning_rate": 3.1108530874062234e-05,
      "loss": 0.8093,
      "step": 881500
    },
    {
      "epoch": 4.4,
      "learning_rate": 3.109465863186399e-05,
      "loss": 0.8353,
      "step": 882000
    },
    {
      "epoch": 4.41,
      "learning_rate": 3.1080786389665734e-05,
      "loss": 0.8706,
      "step": 882500
    },
    {
      "epoch": 4.41,
      "learning_rate": 3.1066914147467485e-05,
      "loss": 0.8218,
      "step": 883000
    },
    {
      "epoch": 4.41,
      "learning_rate": 3.1053041905269235e-05,
      "loss": 0.7969,
      "step": 883500
    },
    {
      "epoch": 4.41,
      "learning_rate": 3.103919740755538e-05,
      "loss": 0.8147,
      "step": 884000
    },
    {
      "epoch": 4.42,
      "learning_rate": 3.1025325165357126e-05,
      "loss": 0.8208,
      "step": 884500
    },
    {
      "epoch": 4.42,
      "learning_rate": 3.1011452923158876e-05,
      "loss": 0.8244,
      "step": 885000
    },
    {
      "epoch": 4.42,
      "learning_rate": 3.0997580680960627e-05,
      "loss": 0.8225,
      "step": 885500
    },
    {
      "epoch": 4.42,
      "learning_rate": 3.098370843876238e-05,
      "loss": 0.8353,
      "step": 886000
    },
    {
      "epoch": 4.43,
      "learning_rate": 3.096983619656412e-05,
      "loss": 0.8172,
      "step": 886500
    },
    {
      "epoch": 4.43,
      "learning_rate": 3.095596395436587e-05,
      "loss": 0.8023,
      "step": 887000
    },
    {
      "epoch": 4.43,
      "learning_rate": 3.094209171216762e-05,
      "loss": 0.8118,
      "step": 887500
    },
    {
      "epoch": 4.43,
      "learning_rate": 3.092821946996937e-05,
      "loss": 0.8186,
      "step": 888000
    },
    {
      "epoch": 4.44,
      "learning_rate": 3.091437497225552e-05,
      "loss": 0.8401,
      "step": 888500
    },
    {
      "epoch": 4.44,
      "learning_rate": 3.090050273005727e-05,
      "loss": 0.7928,
      "step": 889000
    },
    {
      "epoch": 4.44,
      "learning_rate": 3.088663048785901e-05,
      "loss": 0.8397,
      "step": 889500
    },
    {
      "epoch": 4.44,
      "learning_rate": 3.087278599014516e-05,
      "loss": 0.817,
      "step": 890000
    },
    {
      "epoch": 4.45,
      "learning_rate": 3.085891374794691e-05,
      "loss": 0.8163,
      "step": 890500
    },
    {
      "epoch": 4.45,
      "learning_rate": 3.0845041505748654e-05,
      "loss": 0.8189,
      "step": 891000
    },
    {
      "epoch": 4.45,
      "learning_rate": 3.083116926355041e-05,
      "loss": 0.7854,
      "step": 891500
    },
    {
      "epoch": 4.45,
      "learning_rate": 3.0817297021352155e-05,
      "loss": 0.8212,
      "step": 892000
    },
    {
      "epoch": 4.46,
      "learning_rate": 3.0803424779153905e-05,
      "loss": 0.81,
      "step": 892500
    },
    {
      "epoch": 4.46,
      "learning_rate": 3.0789552536955655e-05,
      "loss": 0.8546,
      "step": 893000
    },
    {
      "epoch": 4.46,
      "learning_rate": 3.0775680294757406e-05,
      "loss": 0.8256,
      "step": 893500
    },
    {
      "epoch": 4.46,
      "learning_rate": 3.0761808052559156e-05,
      "loss": 0.8275,
      "step": 894000
    },
    {
      "epoch": 4.47,
      "learning_rate": 3.0747963554845296e-05,
      "loss": 0.8132,
      "step": 894500
    },
    {
      "epoch": 4.47,
      "learning_rate": 3.073409131264705e-05,
      "loss": 0.8155,
      "step": 895000
    },
    {
      "epoch": 4.47,
      "learning_rate": 3.07202190704488e-05,
      "loss": 0.8218,
      "step": 895500
    },
    {
      "epoch": 4.47,
      "learning_rate": 3.070634682825054e-05,
      "loss": 0.8259,
      "step": 896000
    },
    {
      "epoch": 4.48,
      "learning_rate": 3.0692502330536695e-05,
      "loss": 0.8322,
      "step": 896500
    },
    {
      "epoch": 4.48,
      "learning_rate": 3.067863008833844e-05,
      "loss": 0.8328,
      "step": 897000
    },
    {
      "epoch": 4.48,
      "learning_rate": 3.066475784614019e-05,
      "loss": 0.8132,
      "step": 897500
    },
    {
      "epoch": 4.48,
      "learning_rate": 3.065088560394194e-05,
      "loss": 0.8138,
      "step": 898000
    },
    {
      "epoch": 4.49,
      "learning_rate": 3.063701336174369e-05,
      "loss": 0.8494,
      "step": 898500
    },
    {
      "epoch": 4.49,
      "learning_rate": 3.062316886402983e-05,
      "loss": 0.8209,
      "step": 899000
    },
    {
      "epoch": 4.49,
      "learning_rate": 3.060929662183158e-05,
      "loss": 0.817,
      "step": 899500
    },
    {
      "epoch": 4.49,
      "learning_rate": 3.059542437963333e-05,
      "loss": 0.8043,
      "step": 900000
    },
    {
      "epoch": 4.5,
      "learning_rate": 3.0581552137435074e-05,
      "loss": 0.8257,
      "step": 900500
    },
    {
      "epoch": 4.5,
      "learning_rate": 3.056767989523683e-05,
      "loss": 0.8389,
      "step": 901000
    },
    {
      "epoch": 4.5,
      "learning_rate": 3.055386314200737e-05,
      "loss": 0.8203,
      "step": 901500
    },
    {
      "epoch": 4.5,
      "learning_rate": 3.053999089980912e-05,
      "loss": 0.8136,
      "step": 902000
    },
    {
      "epoch": 4.51,
      "learning_rate": 3.052611865761087e-05,
      "loss": 0.8073,
      "step": 902500
    },
    {
      "epoch": 4.51,
      "learning_rate": 3.0512246415412616e-05,
      "loss": 0.7981,
      "step": 903000
    },
    {
      "epoch": 4.51,
      "learning_rate": 3.0498374173214367e-05,
      "loss": 0.8275,
      "step": 903500
    },
    {
      "epoch": 4.51,
      "learning_rate": 3.0484501931016114e-05,
      "loss": 0.8456,
      "step": 904000
    },
    {
      "epoch": 4.52,
      "learning_rate": 3.0470629688817864e-05,
      "loss": 0.8152,
      "step": 904500
    },
    {
      "epoch": 4.52,
      "learning_rate": 3.0456757446619614e-05,
      "loss": 0.812,
      "step": 905000
    },
    {
      "epoch": 4.52,
      "learning_rate": 3.044288520442136e-05,
      "loss": 0.7891,
      "step": 905500
    },
    {
      "epoch": 4.52,
      "learning_rate": 3.0429040706707505e-05,
      "loss": 0.8093,
      "step": 906000
    },
    {
      "epoch": 4.53,
      "learning_rate": 3.041516846450926e-05,
      "loss": 0.8189,
      "step": 906500
    },
    {
      "epoch": 4.53,
      "learning_rate": 3.0401296222311006e-05,
      "loss": 0.8276,
      "step": 907000
    },
    {
      "epoch": 4.53,
      "learning_rate": 3.0387423980112756e-05,
      "loss": 0.8126,
      "step": 907500
    },
    {
      "epoch": 4.53,
      "learning_rate": 3.03735794823989e-05,
      "loss": 0.8287,
      "step": 908000
    },
    {
      "epoch": 4.54,
      "learning_rate": 3.0359707240200654e-05,
      "loss": 0.8245,
      "step": 908500
    },
    {
      "epoch": 4.54,
      "learning_rate": 3.0345834998002397e-05,
      "loss": 0.841,
      "step": 909000
    },
    {
      "epoch": 4.54,
      "learning_rate": 3.033196275580415e-05,
      "loss": 0.8446,
      "step": 909500
    },
    {
      "epoch": 4.54,
      "learning_rate": 3.0318090513605895e-05,
      "loss": 0.8006,
      "step": 910000
    },
    {
      "epoch": 4.55,
      "learning_rate": 3.030421827140765e-05,
      "loss": 0.8547,
      "step": 910500
    },
    {
      "epoch": 4.55,
      "learning_rate": 3.0290373773693792e-05,
      "loss": 0.8194,
      "step": 911000
    },
    {
      "epoch": 4.55,
      "learning_rate": 3.027650153149554e-05,
      "loss": 0.809,
      "step": 911500
    },
    {
      "epoch": 4.55,
      "learning_rate": 3.026262928929729e-05,
      "loss": 0.8392,
      "step": 912000
    },
    {
      "epoch": 4.56,
      "learning_rate": 3.0248757047099036e-05,
      "loss": 0.8172,
      "step": 912500
    },
    {
      "epoch": 4.56,
      "learning_rate": 3.0234884804900787e-05,
      "loss": 0.8376,
      "step": 913000
    },
    {
      "epoch": 4.56,
      "learning_rate": 3.022104030718693e-05,
      "loss": 0.8151,
      "step": 913500
    },
    {
      "epoch": 4.56,
      "learning_rate": 3.0207168064988684e-05,
      "loss": 0.8229,
      "step": 914000
    },
    {
      "epoch": 4.57,
      "learning_rate": 3.0193295822790428e-05,
      "loss": 0.8111,
      "step": 914500
    },
    {
      "epoch": 4.57,
      "learning_rate": 3.0179423580592182e-05,
      "loss": 0.8173,
      "step": 915000
    },
    {
      "epoch": 4.57,
      "learning_rate": 3.0165579082878326e-05,
      "loss": 0.8481,
      "step": 915500
    },
    {
      "epoch": 4.57,
      "learning_rate": 3.0151706840680073e-05,
      "loss": 0.8158,
      "step": 916000
    },
    {
      "epoch": 4.58,
      "learning_rate": 3.0137834598481823e-05,
      "loss": 0.8123,
      "step": 916500
    },
    {
      "epoch": 4.58,
      "learning_rate": 3.0123962356283573e-05,
      "loss": 0.7957,
      "step": 917000
    },
    {
      "epoch": 4.58,
      "learning_rate": 3.011009011408532e-05,
      "loss": 0.8267,
      "step": 917500
    },
    {
      "epoch": 4.58,
      "learning_rate": 3.0096245616371464e-05,
      "loss": 0.8121,
      "step": 918000
    },
    {
      "epoch": 4.59,
      "learning_rate": 3.0082373374173218e-05,
      "loss": 0.8024,
      "step": 918500
    },
    {
      "epoch": 4.59,
      "learning_rate": 3.0068501131974965e-05,
      "loss": 0.8101,
      "step": 919000
    },
    {
      "epoch": 4.59,
      "learning_rate": 3.0054628889776715e-05,
      "loss": 0.8478,
      "step": 919500
    },
    {
      "epoch": 4.59,
      "learning_rate": 3.0040756647578462e-05,
      "loss": 0.8262,
      "step": 920000
    },
    {
      "epoch": 4.6,
      "learning_rate": 3.0026912149864613e-05,
      "loss": 0.8193,
      "step": 920500
    },
    {
      "epoch": 4.6,
      "learning_rate": 3.0013039907666356e-05,
      "loss": 0.8071,
      "step": 921000
    },
    {
      "epoch": 4.6,
      "learning_rate": 2.999916766546811e-05,
      "loss": 0.8102,
      "step": 921500
    },
    {
      "epoch": 4.6,
      "learning_rate": 2.9985295423269854e-05,
      "loss": 0.8219,
      "step": 922000
    },
    {
      "epoch": 4.61,
      "learning_rate": 2.9971423181071607e-05,
      "loss": 0.8422,
      "step": 922500
    },
    {
      "epoch": 4.61,
      "learning_rate": 2.995757868335775e-05,
      "loss": 0.8186,
      "step": 923000
    },
    {
      "epoch": 4.61,
      "learning_rate": 2.9943706441159498e-05,
      "loss": 0.8203,
      "step": 923500
    },
    {
      "epoch": 4.61,
      "learning_rate": 2.992983419896125e-05,
      "loss": 0.833,
      "step": 924000
    },
    {
      "epoch": 4.62,
      "learning_rate": 2.9915961956762995e-05,
      "loss": 0.8372,
      "step": 924500
    },
    {
      "epoch": 4.62,
      "learning_rate": 2.9902117459049146e-05,
      "loss": 0.8265,
      "step": 925000
    },
    {
      "epoch": 4.62,
      "learning_rate": 2.988827296133529e-05,
      "loss": 0.8418,
      "step": 925500
    },
    {
      "epoch": 4.62,
      "learning_rate": 2.9874400719137037e-05,
      "loss": 0.8341,
      "step": 926000
    },
    {
      "epoch": 4.63,
      "learning_rate": 2.9860528476938787e-05,
      "loss": 0.806,
      "step": 926500
    },
    {
      "epoch": 4.63,
      "learning_rate": 2.9846656234740534e-05,
      "loss": 0.862,
      "step": 927000
    },
    {
      "epoch": 4.63,
      "learning_rate": 2.9832783992542284e-05,
      "loss": 0.8022,
      "step": 927500
    },
    {
      "epoch": 4.63,
      "learning_rate": 2.981891175034403e-05,
      "loss": 0.8364,
      "step": 928000
    },
    {
      "epoch": 4.64,
      "learning_rate": 2.9805039508145782e-05,
      "loss": 0.8339,
      "step": 928500
    },
    {
      "epoch": 4.64,
      "learning_rate": 2.9791195010431926e-05,
      "loss": 0.8288,
      "step": 929000
    },
    {
      "epoch": 4.64,
      "learning_rate": 2.977732276823368e-05,
      "loss": 0.8381,
      "step": 929500
    },
    {
      "epoch": 4.64,
      "learning_rate": 2.9763450526035423e-05,
      "loss": 0.8303,
      "step": 930000
    },
    {
      "epoch": 4.65,
      "learning_rate": 2.9749578283837177e-05,
      "loss": 0.8289,
      "step": 930500
    },
    {
      "epoch": 4.65,
      "learning_rate": 2.9735706041638924e-05,
      "loss": 0.885,
      "step": 931000
    },
    {
      "epoch": 4.65,
      "learning_rate": 2.9721833799440674e-05,
      "loss": 0.8216,
      "step": 931500
    },
    {
      "epoch": 4.65,
      "learning_rate": 2.970796155724242e-05,
      "loss": 0.8186,
      "step": 932000
    },
    {
      "epoch": 4.66,
      "learning_rate": 2.969408931504417e-05,
      "loss": 0.8183,
      "step": 932500
    },
    {
      "epoch": 4.66,
      "learning_rate": 2.968021707284592e-05,
      "loss": 0.802,
      "step": 933000
    },
    {
      "epoch": 4.66,
      "learning_rate": 2.966637257513207e-05,
      "loss": 0.8372,
      "step": 933500
    },
    {
      "epoch": 4.66,
      "learning_rate": 2.9652500332933813e-05,
      "loss": 0.841,
      "step": 934000
    },
    {
      "epoch": 4.67,
      "learning_rate": 2.9638628090735566e-05,
      "loss": 0.8073,
      "step": 934500
    },
    {
      "epoch": 4.67,
      "learning_rate": 2.962475584853731e-05,
      "loss": 0.8498,
      "step": 935000
    },
    {
      "epoch": 4.67,
      "learning_rate": 2.9610911350823457e-05,
      "loss": 0.8229,
      "step": 935500
    },
    {
      "epoch": 4.67,
      "learning_rate": 2.9597039108625207e-05,
      "loss": 0.8461,
      "step": 936000
    },
    {
      "epoch": 4.68,
      "learning_rate": 2.9583166866426954e-05,
      "loss": 0.8314,
      "step": 936500
    },
    {
      "epoch": 4.68,
      "learning_rate": 2.9569294624228705e-05,
      "loss": 0.8289,
      "step": 937000
    },
    {
      "epoch": 4.68,
      "learning_rate": 2.9555422382030452e-05,
      "loss": 0.8334,
      "step": 937500
    },
    {
      "epoch": 4.68,
      "learning_rate": 2.9541577884316602e-05,
      "loss": 0.8159,
      "step": 938000
    },
    {
      "epoch": 4.69,
      "learning_rate": 2.9527705642118346e-05,
      "loss": 0.8288,
      "step": 938500
    },
    {
      "epoch": 4.69,
      "learning_rate": 2.95138333999201e-05,
      "loss": 0.8348,
      "step": 939000
    },
    {
      "epoch": 4.69,
      "learning_rate": 2.9499961157721843e-05,
      "loss": 0.8078,
      "step": 939500
    },
    {
      "epoch": 4.69,
      "learning_rate": 2.948611666000799e-05,
      "loss": 0.8257,
      "step": 940000
    },
    {
      "epoch": 4.7,
      "learning_rate": 2.947224441780974e-05,
      "loss": 0.8204,
      "step": 940500
    },
    {
      "epoch": 4.7,
      "learning_rate": 2.9458372175611488e-05,
      "loss": 0.8334,
      "step": 941000
    },
    {
      "epoch": 4.7,
      "learning_rate": 2.9444499933413238e-05,
      "loss": 0.8435,
      "step": 941500
    },
    {
      "epoch": 4.7,
      "learning_rate": 2.9430655435699382e-05,
      "loss": 0.8146,
      "step": 942000
    },
    {
      "epoch": 4.71,
      "learning_rate": 2.9416783193501136e-05,
      "loss": 0.7994,
      "step": 942500
    },
    {
      "epoch": 4.71,
      "learning_rate": 2.9402910951302883e-05,
      "loss": 0.8369,
      "step": 943000
    },
    {
      "epoch": 4.71,
      "learning_rate": 2.9389038709104633e-05,
      "loss": 0.8192,
      "step": 943500
    },
    {
      "epoch": 4.71,
      "learning_rate": 2.9375194211390777e-05,
      "loss": 0.833,
      "step": 944000
    },
    {
      "epoch": 4.72,
      "learning_rate": 2.9361321969192524e-05,
      "loss": 0.7825,
      "step": 944500
    },
    {
      "epoch": 4.72,
      "learning_rate": 2.9347449726994274e-05,
      "loss": 0.8148,
      "step": 945000
    },
    {
      "epoch": 4.72,
      "learning_rate": 2.9333577484796028e-05,
      "loss": 0.8055,
      "step": 945500
    },
    {
      "epoch": 4.72,
      "learning_rate": 2.931973298708217e-05,
      "loss": 0.8125,
      "step": 946000
    },
    {
      "epoch": 4.73,
      "learning_rate": 2.930586074488392e-05,
      "loss": 0.8294,
      "step": 946500
    },
    {
      "epoch": 4.73,
      "learning_rate": 2.929198850268567e-05,
      "loss": 0.8302,
      "step": 947000
    },
    {
      "epoch": 4.73,
      "learning_rate": 2.9278116260487416e-05,
      "loss": 0.8151,
      "step": 947500
    },
    {
      "epoch": 4.73,
      "learning_rate": 2.9264244018289166e-05,
      "loss": 0.8387,
      "step": 948000
    },
    {
      "epoch": 4.74,
      "learning_rate": 2.925039952057531e-05,
      "loss": 0.841,
      "step": 948500
    },
    {
      "epoch": 4.74,
      "learning_rate": 2.9236527278377064e-05,
      "loss": 0.8143,
      "step": 949000
    },
    {
      "epoch": 4.74,
      "learning_rate": 2.9222655036178808e-05,
      "loss": 0.843,
      "step": 949500
    },
    {
      "epoch": 4.74,
      "learning_rate": 2.920878279398056e-05,
      "loss": 0.8201,
      "step": 950000
    },
    {
      "epoch": 4.75,
      "learning_rate": 2.9194910551782305e-05,
      "loss": 0.839,
      "step": 950500
    },
    {
      "epoch": 4.75,
      "learning_rate": 2.918103830958406e-05,
      "loss": 0.8114,
      "step": 951000
    },
    {
      "epoch": 4.75,
      "learning_rate": 2.9167193811870202e-05,
      "loss": 0.8407,
      "step": 951500
    },
    {
      "epoch": 4.75,
      "learning_rate": 2.915332156967195e-05,
      "loss": 0.8081,
      "step": 952000
    },
    {
      "epoch": 4.76,
      "learning_rate": 2.91394493274737e-05,
      "loss": 0.806,
      "step": 952500
    },
    {
      "epoch": 4.76,
      "learning_rate": 2.9125577085275447e-05,
      "loss": 0.8465,
      "step": 953000
    },
    {
      "epoch": 4.76,
      "learning_rate": 2.9111704843077197e-05,
      "loss": 0.8157,
      "step": 953500
    },
    {
      "epoch": 4.76,
      "learning_rate": 2.909783260087895e-05,
      "loss": 0.8062,
      "step": 954000
    },
    {
      "epoch": 4.77,
      "learning_rate": 2.9083988103165095e-05,
      "loss": 0.8314,
      "step": 954500
    },
    {
      "epoch": 4.77,
      "learning_rate": 2.907011586096684e-05,
      "loss": 0.8224,
      "step": 955000
    },
    {
      "epoch": 4.77,
      "learning_rate": 2.9056243618768592e-05,
      "loss": 0.8342,
      "step": 955500
    },
    {
      "epoch": 4.77,
      "learning_rate": 2.904237137657034e-05,
      "loss": 0.8306,
      "step": 956000
    },
    {
      "epoch": 4.78,
      "learning_rate": 2.9028526878856483e-05,
      "loss": 0.8211,
      "step": 956500
    },
    {
      "epoch": 4.78,
      "learning_rate": 2.9014654636658233e-05,
      "loss": 0.819,
      "step": 957000
    },
    {
      "epoch": 4.78,
      "learning_rate": 2.9000782394459987e-05,
      "loss": 0.8271,
      "step": 957500
    },
    {
      "epoch": 4.78,
      "learning_rate": 2.898691015226173e-05,
      "loss": 0.8193,
      "step": 958000
    },
    {
      "epoch": 4.79,
      "learning_rate": 2.8973037910063484e-05,
      "loss": 0.8531,
      "step": 958500
    },
    {
      "epoch": 4.79,
      "learning_rate": 2.8959193412349628e-05,
      "loss": 0.8292,
      "step": 959000
    },
    {
      "epoch": 4.79,
      "learning_rate": 2.8945321170151375e-05,
      "loss": 0.8519,
      "step": 959500
    },
    {
      "epoch": 4.79,
      "learning_rate": 2.8931448927953125e-05,
      "loss": 0.8057,
      "step": 960000
    },
    {
      "epoch": 4.8,
      "learning_rate": 2.8917576685754872e-05,
      "loss": 0.7889,
      "step": 960500
    },
    {
      "epoch": 4.8,
      "learning_rate": 2.8903732188041016e-05,
      "loss": 0.8585,
      "step": 961000
    },
    {
      "epoch": 4.8,
      "learning_rate": 2.8889887690327167e-05,
      "loss": 0.8141,
      "step": 961500
    },
    {
      "epoch": 4.8,
      "learning_rate": 2.8876015448128914e-05,
      "loss": 0.8349,
      "step": 962000
    },
    {
      "epoch": 4.81,
      "learning_rate": 2.8862143205930664e-05,
      "loss": 0.8498,
      "step": 962500
    },
    {
      "epoch": 4.81,
      "learning_rate": 2.884827096373241e-05,
      "loss": 0.8441,
      "step": 963000
    },
    {
      "epoch": 4.81,
      "learning_rate": 2.883439872153416e-05,
      "loss": 0.8151,
      "step": 963500
    },
    {
      "epoch": 4.81,
      "learning_rate": 2.882052647933591e-05,
      "loss": 0.8206,
      "step": 964000
    },
    {
      "epoch": 4.82,
      "learning_rate": 2.880665423713766e-05,
      "loss": 0.8324,
      "step": 964500
    },
    {
      "epoch": 4.82,
      "learning_rate": 2.8792781994939406e-05,
      "loss": 0.823,
      "step": 965000
    },
    {
      "epoch": 4.82,
      "learning_rate": 2.8778937497225556e-05,
      "loss": 0.8289,
      "step": 965500
    },
    {
      "epoch": 4.82,
      "learning_rate": 2.87650652550273e-05,
      "loss": 0.8227,
      "step": 966000
    },
    {
      "epoch": 4.83,
      "learning_rate": 2.8751193012829054e-05,
      "loss": 0.8284,
      "step": 966500
    },
    {
      "epoch": 4.83,
      "learning_rate": 2.87373207706308e-05,
      "loss": 0.8221,
      "step": 967000
    },
    {
      "epoch": 4.83,
      "learning_rate": 2.8723476272916944e-05,
      "loss": 0.8369,
      "step": 967500
    },
    {
      "epoch": 4.83,
      "learning_rate": 2.8709604030718695e-05,
      "loss": 0.8391,
      "step": 968000
    },
    {
      "epoch": 4.84,
      "learning_rate": 2.869573178852044e-05,
      "loss": 0.8018,
      "step": 968500
    },
    {
      "epoch": 4.84,
      "learning_rate": 2.8681859546322192e-05,
      "loss": 0.8385,
      "step": 969000
    },
    {
      "epoch": 4.84,
      "learning_rate": 2.866798730412394e-05,
      "loss": 0.8503,
      "step": 969500
    },
    {
      "epoch": 4.84,
      "learning_rate": 2.865411506192569e-05,
      "loss": 0.7973,
      "step": 970000
    },
    {
      "epoch": 4.85,
      "learning_rate": 2.8640242819727443e-05,
      "loss": 0.8364,
      "step": 970500
    },
    {
      "epoch": 4.85,
      "learning_rate": 2.8626398322013587e-05,
      "loss": 0.8346,
      "step": 971000
    },
    {
      "epoch": 4.85,
      "learning_rate": 2.8612526079815334e-05,
      "loss": 0.8092,
      "step": 971500
    },
    {
      "epoch": 4.85,
      "learning_rate": 2.8598653837617084e-05,
      "loss": 0.8294,
      "step": 972000
    },
    {
      "epoch": 4.86,
      "learning_rate": 2.858478159541883e-05,
      "loss": 0.8076,
      "step": 972500
    },
    {
      "epoch": 4.86,
      "learning_rate": 2.8570937097704975e-05,
      "loss": 0.8361,
      "step": 973000
    },
    {
      "epoch": 4.86,
      "learning_rate": 2.8557064855506725e-05,
      "loss": 0.8146,
      "step": 973500
    },
    {
      "epoch": 4.86,
      "learning_rate": 2.854319261330848e-05,
      "loss": 0.826,
      "step": 974000
    },
    {
      "epoch": 4.87,
      "learning_rate": 2.8529348115594623e-05,
      "loss": 0.8254,
      "step": 974500
    },
    {
      "epoch": 4.87,
      "learning_rate": 2.851547587339637e-05,
      "loss": 0.8201,
      "step": 975000
    },
    {
      "epoch": 4.87,
      "learning_rate": 2.850160363119812e-05,
      "loss": 0.8175,
      "step": 975500
    },
    {
      "epoch": 4.87,
      "learning_rate": 2.8487731388999867e-05,
      "loss": 0.8518,
      "step": 976000
    },
    {
      "epoch": 4.88,
      "learning_rate": 2.8473859146801618e-05,
      "loss": 0.8288,
      "step": 976500
    },
    {
      "epoch": 4.88,
      "learning_rate": 2.8459986904603365e-05,
      "loss": 0.8301,
      "step": 977000
    },
    {
      "epoch": 4.88,
      "learning_rate": 2.8446114662405115e-05,
      "loss": 0.833,
      "step": 977500
    },
    {
      "epoch": 4.88,
      "learning_rate": 2.8432242420206862e-05,
      "loss": 0.8243,
      "step": 978000
    },
    {
      "epoch": 4.89,
      "learning_rate": 2.8418397922493013e-05,
      "loss": 0.8466,
      "step": 978500
    },
    {
      "epoch": 4.89,
      "learning_rate": 2.8404553424779156e-05,
      "loss": 0.8527,
      "step": 979000
    },
    {
      "epoch": 4.89,
      "learning_rate": 2.8390681182580903e-05,
      "loss": 0.8059,
      "step": 979500
    },
    {
      "epoch": 4.89,
      "learning_rate": 2.8376808940382654e-05,
      "loss": 0.8421,
      "step": 980000
    },
    {
      "epoch": 4.9,
      "learning_rate": 2.83629366981844e-05,
      "loss": 0.8169,
      "step": 980500
    },
    {
      "epoch": 4.9,
      "learning_rate": 2.834906445598615e-05,
      "loss": 0.8322,
      "step": 981000
    },
    {
      "epoch": 4.9,
      "learning_rate": 2.8335192213787898e-05,
      "loss": 0.8449,
      "step": 981500
    },
    {
      "epoch": 4.9,
      "learning_rate": 2.832131997158965e-05,
      "loss": 0.8049,
      "step": 982000
    },
    {
      "epoch": 4.91,
      "learning_rate": 2.8307447729391402e-05,
      "loss": 0.8537,
      "step": 982500
    },
    {
      "epoch": 4.91,
      "learning_rate": 2.8293603231677546e-05,
      "loss": 0.7908,
      "step": 983000
    },
    {
      "epoch": 4.91,
      "learning_rate": 2.8279730989479293e-05,
      "loss": 0.8413,
      "step": 983500
    },
    {
      "epoch": 4.91,
      "learning_rate": 2.8265858747281043e-05,
      "loss": 0.8011,
      "step": 984000
    },
    {
      "epoch": 4.92,
      "learning_rate": 2.825198650508279e-05,
      "loss": 0.8192,
      "step": 984500
    },
    {
      "epoch": 4.92,
      "learning_rate": 2.8238142007368934e-05,
      "loss": 0.8225,
      "step": 985000
    },
    {
      "epoch": 4.92,
      "learning_rate": 2.8224269765170684e-05,
      "loss": 0.8327,
      "step": 985500
    },
    {
      "epoch": 4.92,
      "learning_rate": 2.8210397522972438e-05,
      "loss": 0.8316,
      "step": 986000
    },
    {
      "epoch": 4.93,
      "learning_rate": 2.8196525280774182e-05,
      "loss": 0.8163,
      "step": 986500
    },
    {
      "epoch": 4.93,
      "learning_rate": 2.818268078306033e-05,
      "loss": 0.8209,
      "step": 987000
    },
    {
      "epoch": 4.93,
      "learning_rate": 2.816880854086208e-05,
      "loss": 0.8384,
      "step": 987500
    },
    {
      "epoch": 4.93,
      "learning_rate": 2.8154936298663826e-05,
      "loss": 0.8027,
      "step": 988000
    },
    {
      "epoch": 4.94,
      "learning_rate": 2.8141064056465577e-05,
      "loss": 0.8347,
      "step": 988500
    },
    {
      "epoch": 4.94,
      "learning_rate": 2.8127191814267324e-05,
      "loss": 0.8512,
      "step": 989000
    },
    {
      "epoch": 4.94,
      "learning_rate": 2.8113319572069074e-05,
      "loss": 0.8224,
      "step": 989500
    },
    {
      "epoch": 4.94,
      "learning_rate": 2.8099475074355218e-05,
      "loss": 0.8436,
      "step": 990000
    },
    {
      "epoch": 4.95,
      "learning_rate": 2.808560283215697e-05,
      "loss": 0.8263,
      "step": 990500
    },
    {
      "epoch": 4.95,
      "learning_rate": 2.807173058995872e-05,
      "loss": 0.8297,
      "step": 991000
    },
    {
      "epoch": 4.95,
      "learning_rate": 2.805785834776047e-05,
      "loss": 0.8275,
      "step": 991500
    },
    {
      "epoch": 4.95,
      "learning_rate": 2.8044013850046613e-05,
      "loss": 0.8358,
      "step": 992000
    },
    {
      "epoch": 4.96,
      "learning_rate": 2.803014160784836e-05,
      "loss": 0.8094,
      "step": 992500
    },
    {
      "epoch": 4.96,
      "learning_rate": 2.801626936565011e-05,
      "loss": 0.8499,
      "step": 993000
    },
    {
      "epoch": 4.96,
      "learning_rate": 2.8002397123451857e-05,
      "loss": 0.839,
      "step": 993500
    },
    {
      "epoch": 4.96,
      "learning_rate": 2.7988524881253607e-05,
      "loss": 0.8222,
      "step": 994000
    },
    {
      "epoch": 4.97,
      "learning_rate": 2.7974680383539754e-05,
      "loss": 0.8568,
      "step": 994500
    },
    {
      "epoch": 4.97,
      "learning_rate": 2.7960808141341505e-05,
      "loss": 0.8386,
      "step": 995000
    },
    {
      "epoch": 4.97,
      "learning_rate": 2.7946935899143252e-05,
      "loss": 0.8461,
      "step": 995500
    },
    {
      "epoch": 4.97,
      "learning_rate": 2.7933063656945002e-05,
      "loss": 0.8088,
      "step": 996000
    },
    {
      "epoch": 4.98,
      "learning_rate": 2.7919219159231146e-05,
      "loss": 0.8248,
      "step": 996500
    },
    {
      "epoch": 4.98,
      "learning_rate": 2.7905374661517293e-05,
      "loss": 0.8086,
      "step": 997000
    },
    {
      "epoch": 4.98,
      "learning_rate": 2.7891502419319044e-05,
      "loss": 0.8322,
      "step": 997500
    },
    {
      "epoch": 4.98,
      "learning_rate": 2.787763017712079e-05,
      "loss": 0.8477,
      "step": 998000
    },
    {
      "epoch": 4.99,
      "learning_rate": 2.786375793492254e-05,
      "loss": 0.8299,
      "step": 998500
    },
    {
      "epoch": 4.99,
      "learning_rate": 2.7849885692724288e-05,
      "loss": 0.8361,
      "step": 999000
    },
    {
      "epoch": 4.99,
      "learning_rate": 2.7836013450526038e-05,
      "loss": 0.8324,
      "step": 999500
    },
    {
      "epoch": 4.99,
      "learning_rate": 2.7822141208327785e-05,
      "loss": 0.8493,
      "step": 1000000
    },
    {
      "epoch": 5.0,
      "learning_rate": 2.7808268966129536e-05,
      "loss": 0.8202,
      "step": 1000500
    },
    {
      "epoch": 5.0,
      "learning_rate": 2.7794396723931283e-05,
      "loss": 0.8326,
      "step": 1001000
    },
    {
      "epoch": 5.0,
      "eval_loss": 1.0874613523483276,
      "eval_runtime": 2238.5705,
      "eval_samples_per_second": 52.041,
      "eval_steps_per_second": 13.011,
      "step": 1001200
    },
    {
      "epoch": 5.0,
      "learning_rate": 2.7780552226217426e-05,
      "loss": 0.7118,
      "step": 1001500
    },
    {
      "epoch": 5.0,
      "learning_rate": 2.7766679984019177e-05,
      "loss": 0.6279,
      "step": 1002000
    },
    {
      "epoch": 5.01,
      "learning_rate": 2.775280774182093e-05,
      "loss": 0.6261,
      "step": 1002500
    },
    {
      "epoch": 5.01,
      "learning_rate": 2.7738935499622677e-05,
      "loss": 0.6433,
      "step": 1003000
    },
    {
      "epoch": 5.01,
      "learning_rate": 2.772509100190882e-05,
      "loss": 0.6499,
      "step": 1003500
    },
    {
      "epoch": 5.01,
      "learning_rate": 2.771121875971057e-05,
      "loss": 0.6081,
      "step": 1004000
    },
    {
      "epoch": 5.02,
      "learning_rate": 2.769734651751232e-05,
      "loss": 0.6242,
      "step": 1004500
    },
    {
      "epoch": 5.02,
      "learning_rate": 2.768347427531407e-05,
      "loss": 0.6246,
      "step": 1005000
    },
    {
      "epoch": 5.02,
      "learning_rate": 2.7669602033115816e-05,
      "loss": 0.6455,
      "step": 1005500
    },
    {
      "epoch": 5.02,
      "learning_rate": 2.7655757535401966e-05,
      "loss": 0.6238,
      "step": 1006000
    },
    {
      "epoch": 5.03,
      "learning_rate": 2.7641885293203713e-05,
      "loss": 0.6394,
      "step": 1006500
    },
    {
      "epoch": 5.03,
      "learning_rate": 2.7628013051005464e-05,
      "loss": 0.6233,
      "step": 1007000
    },
    {
      "epoch": 5.03,
      "learning_rate": 2.761414080880721e-05,
      "loss": 0.6066,
      "step": 1007500
    },
    {
      "epoch": 5.03,
      "learning_rate": 2.7600296311093355e-05,
      "loss": 0.636,
      "step": 1008000
    },
    {
      "epoch": 5.04,
      "learning_rate": 2.7586424068895105e-05,
      "loss": 0.6174,
      "step": 1008500
    },
    {
      "epoch": 5.04,
      "learning_rate": 2.7572551826696852e-05,
      "loss": 0.6383,
      "step": 1009000
    },
    {
      "epoch": 5.04,
      "learning_rate": 2.7558679584498602e-05,
      "loss": 0.6361,
      "step": 1009500
    },
    {
      "epoch": 5.04,
      "learning_rate": 2.754483508678475e-05,
      "loss": 0.6264,
      "step": 1010000
    },
    {
      "epoch": 5.05,
      "learning_rate": 2.75309628445865e-05,
      "loss": 0.643,
      "step": 1010500
    },
    {
      "epoch": 5.05,
      "learning_rate": 2.7517090602388247e-05,
      "loss": 0.6427,
      "step": 1011000
    },
    {
      "epoch": 5.05,
      "learning_rate": 2.7503218360189997e-05,
      "loss": 0.6322,
      "step": 1011500
    },
    {
      "epoch": 5.05,
      "learning_rate": 2.748937386247614e-05,
      "loss": 0.6328,
      "step": 1012000
    },
    {
      "epoch": 5.06,
      "learning_rate": 2.7475501620277888e-05,
      "loss": 0.6332,
      "step": 1012500
    },
    {
      "epoch": 5.06,
      "learning_rate": 2.7461629378079638e-05,
      "loss": 0.6407,
      "step": 1013000
    },
    {
      "epoch": 5.06,
      "learning_rate": 2.7447757135881385e-05,
      "loss": 0.6393,
      "step": 1013500
    },
    {
      "epoch": 5.06,
      "learning_rate": 2.7433884893683136e-05,
      "loss": 0.6291,
      "step": 1014000
    },
    {
      "epoch": 5.07,
      "learning_rate": 2.7420040395969283e-05,
      "loss": 0.6579,
      "step": 1014500
    },
    {
      "epoch": 5.07,
      "learning_rate": 2.7406168153771033e-05,
      "loss": 0.6457,
      "step": 1015000
    },
    {
      "epoch": 5.07,
      "learning_rate": 2.739229591157278e-05,
      "loss": 0.6519,
      "step": 1015500
    },
    {
      "epoch": 5.07,
      "learning_rate": 2.737842366937453e-05,
      "loss": 0.6581,
      "step": 1016000
    },
    {
      "epoch": 5.08,
      "learning_rate": 2.7364551427176278e-05,
      "loss": 0.6401,
      "step": 1016500
    },
    {
      "epoch": 5.08,
      "learning_rate": 2.735070692946242e-05,
      "loss": 0.6495,
      "step": 1017000
    },
    {
      "epoch": 5.08,
      "learning_rate": 2.7336862431748572e-05,
      "loss": 0.6492,
      "step": 1017500
    },
    {
      "epoch": 5.08,
      "learning_rate": 2.732299018955032e-05,
      "loss": 0.6596,
      "step": 1018000
    },
    {
      "epoch": 5.09,
      "learning_rate": 2.730911794735207e-05,
      "loss": 0.6583,
      "step": 1018500
    },
    {
      "epoch": 5.09,
      "learning_rate": 2.7295273449638213e-05,
      "loss": 0.6502,
      "step": 1019000
    },
    {
      "epoch": 5.09,
      "learning_rate": 2.728140120743996e-05,
      "loss": 0.656,
      "step": 1019500
    },
    {
      "epoch": 5.09,
      "learning_rate": 2.7267528965241714e-05,
      "loss": 0.6158,
      "step": 1020000
    },
    {
      "epoch": 5.1,
      "learning_rate": 2.7253656723043457e-05,
      "loss": 0.646,
      "step": 1020500
    },
    {
      "epoch": 5.1,
      "learning_rate": 2.723978448084521e-05,
      "loss": 0.6526,
      "step": 1021000
    },
    {
      "epoch": 5.1,
      "learning_rate": 2.7225912238646955e-05,
      "loss": 0.6587,
      "step": 1021500
    },
    {
      "epoch": 5.1,
      "learning_rate": 2.721203999644871e-05,
      "loss": 0.6535,
      "step": 1022000
    },
    {
      "epoch": 5.11,
      "learning_rate": 2.719816775425046e-05,
      "loss": 0.6659,
      "step": 1022500
    },
    {
      "epoch": 5.11,
      "learning_rate": 2.7184295512052206e-05,
      "loss": 0.6478,
      "step": 1023000
    },
    {
      "epoch": 5.11,
      "learning_rate": 2.7170423269853956e-05,
      "loss": 0.6559,
      "step": 1023500
    },
    {
      "epoch": 5.11,
      "learning_rate": 2.7156551027655703e-05,
      "loss": 0.6307,
      "step": 1024000
    },
    {
      "epoch": 5.12,
      "learning_rate": 2.7142678785457453e-05,
      "loss": 0.6525,
      "step": 1024500
    },
    {
      "epoch": 5.12,
      "learning_rate": 2.71288065432592e-05,
      "loss": 0.6599,
      "step": 1025000
    },
    {
      "epoch": 5.12,
      "learning_rate": 2.7114962045545344e-05,
      "loss": 0.6505,
      "step": 1025500
    },
    {
      "epoch": 5.12,
      "learning_rate": 2.7101089803347095e-05,
      "loss": 0.6219,
      "step": 1026000
    },
    {
      "epoch": 5.13,
      "learning_rate": 2.708721756114885e-05,
      "loss": 0.6538,
      "step": 1026500
    },
    {
      "epoch": 5.13,
      "learning_rate": 2.7073345318950595e-05,
      "loss": 0.653,
      "step": 1027000
    },
    {
      "epoch": 5.13,
      "learning_rate": 2.705950082123674e-05,
      "loss": 0.6334,
      "step": 1027500
    },
    {
      "epoch": 5.13,
      "learning_rate": 2.7045656323522883e-05,
      "loss": 0.641,
      "step": 1028000
    },
    {
      "epoch": 5.14,
      "learning_rate": 2.7031784081324633e-05,
      "loss": 0.6652,
      "step": 1028500
    },
    {
      "epoch": 5.14,
      "learning_rate": 2.701791183912638e-05,
      "loss": 0.633,
      "step": 1029000
    },
    {
      "epoch": 5.14,
      "learning_rate": 2.7004039596928134e-05,
      "loss": 0.659,
      "step": 1029500
    },
    {
      "epoch": 5.14,
      "learning_rate": 2.6990167354729878e-05,
      "loss": 0.6523,
      "step": 1030000
    },
    {
      "epoch": 5.15,
      "learning_rate": 2.697629511253163e-05,
      "loss": 0.6231,
      "step": 1030500
    },
    {
      "epoch": 5.15,
      "learning_rate": 2.6962422870333382e-05,
      "loss": 0.6689,
      "step": 1031000
    },
    {
      "epoch": 5.15,
      "learning_rate": 2.694855062813513e-05,
      "loss": 0.6738,
      "step": 1031500
    },
    {
      "epoch": 5.15,
      "learning_rate": 2.6934706130421272e-05,
      "loss": 0.6577,
      "step": 1032000
    },
    {
      "epoch": 5.16,
      "learning_rate": 2.6920833888223023e-05,
      "loss": 0.6641,
      "step": 1032500
    },
    {
      "epoch": 5.16,
      "learning_rate": 2.690696164602477e-05,
      "loss": 0.6579,
      "step": 1033000
    },
    {
      "epoch": 5.16,
      "learning_rate": 2.689308940382652e-05,
      "loss": 0.6513,
      "step": 1033500
    },
    {
      "epoch": 5.16,
      "learning_rate": 2.6879244906112667e-05,
      "loss": 0.688,
      "step": 1034000
    },
    {
      "epoch": 5.17,
      "learning_rate": 2.6865372663914418e-05,
      "loss": 0.6535,
      "step": 1034500
    },
    {
      "epoch": 5.17,
      "learning_rate": 2.6851500421716165e-05,
      "loss": 0.6706,
      "step": 1035000
    },
    {
      "epoch": 5.17,
      "learning_rate": 2.6837628179517915e-05,
      "loss": 0.6535,
      "step": 1035500
    },
    {
      "epoch": 5.17,
      "learning_rate": 2.682378368180406e-05,
      "loss": 0.6729,
      "step": 1036000
    },
    {
      "epoch": 5.18,
      "learning_rate": 2.6809911439605806e-05,
      "loss": 0.6391,
      "step": 1036500
    },
    {
      "epoch": 5.18,
      "learning_rate": 2.6796039197407556e-05,
      "loss": 0.6539,
      "step": 1037000
    },
    {
      "epoch": 5.18,
      "learning_rate": 2.6782166955209303e-05,
      "loss": 0.6482,
      "step": 1037500
    },
    {
      "epoch": 5.18,
      "learning_rate": 2.6768294713011054e-05,
      "loss": 0.6664,
      "step": 1038000
    },
    {
      "epoch": 5.19,
      "learning_rate": 2.67544224708128e-05,
      "loss": 0.6269,
      "step": 1038500
    },
    {
      "epoch": 5.19,
      "learning_rate": 2.6740550228614554e-05,
      "loss": 0.6482,
      "step": 1039000
    },
    {
      "epoch": 5.19,
      "learning_rate": 2.6726705730900698e-05,
      "loss": 0.6794,
      "step": 1039500
    },
    {
      "epoch": 5.19,
      "learning_rate": 2.671283348870245e-05,
      "loss": 0.6749,
      "step": 1040000
    },
    {
      "epoch": 5.2,
      "learning_rate": 2.6698961246504195e-05,
      "loss": 0.6755,
      "step": 1040500
    },
    {
      "epoch": 5.2,
      "learning_rate": 2.6685089004305946e-05,
      "loss": 0.6583,
      "step": 1041000
    },
    {
      "epoch": 5.2,
      "learning_rate": 2.6671244506592093e-05,
      "loss": 0.6594,
      "step": 1041500
    },
    {
      "epoch": 5.2,
      "learning_rate": 2.6657372264393837e-05,
      "loss": 0.6764,
      "step": 1042000
    },
    {
      "epoch": 5.21,
      "learning_rate": 2.664350002219559e-05,
      "loss": 0.6612,
      "step": 1042500
    },
    {
      "epoch": 5.21,
      "learning_rate": 2.662962777999734e-05,
      "loss": 0.6439,
      "step": 1043000
    },
    {
      "epoch": 5.21,
      "learning_rate": 2.6615755537799088e-05,
      "loss": 0.6824,
      "step": 1043500
    },
    {
      "epoch": 5.21,
      "learning_rate": 2.660191104008523e-05,
      "loss": 0.6508,
      "step": 1044000
    },
    {
      "epoch": 5.22,
      "learning_rate": 2.6588066542371375e-05,
      "loss": 0.6655,
      "step": 1044500
    },
    {
      "epoch": 5.22,
      "learning_rate": 2.657419430017313e-05,
      "loss": 0.6534,
      "step": 1045000
    },
    {
      "epoch": 5.22,
      "learning_rate": 2.6560322057974873e-05,
      "loss": 0.6868,
      "step": 1045500
    },
    {
      "epoch": 5.22,
      "learning_rate": 2.6546449815776626e-05,
      "loss": 0.6507,
      "step": 1046000
    },
    {
      "epoch": 5.23,
      "learning_rate": 2.6532577573578377e-05,
      "loss": 0.6583,
      "step": 1046500
    },
    {
      "epoch": 5.23,
      "learning_rate": 2.6518705331380124e-05,
      "loss": 0.6318,
      "step": 1047000
    },
    {
      "epoch": 5.23,
      "learning_rate": 2.6504833089181874e-05,
      "loss": 0.6761,
      "step": 1047500
    },
    {
      "epoch": 5.23,
      "learning_rate": 2.649096084698362e-05,
      "loss": 0.6703,
      "step": 1048000
    },
    {
      "epoch": 5.24,
      "learning_rate": 2.647708860478537e-05,
      "loss": 0.6752,
      "step": 1048500
    },
    {
      "epoch": 5.24,
      "learning_rate": 2.6463244107071515e-05,
      "loss": 0.6543,
      "step": 1049000
    },
    {
      "epoch": 5.24,
      "learning_rate": 2.6449371864873262e-05,
      "loss": 0.6802,
      "step": 1049500
    },
    {
      "epoch": 5.24,
      "learning_rate": 2.6435527367159406e-05,
      "loss": 0.6719,
      "step": 1050000
    },
    {
      "epoch": 5.25,
      "learning_rate": 2.642165512496116e-05,
      "loss": 0.6768,
      "step": 1050500
    },
    {
      "epoch": 5.25,
      "learning_rate": 2.640778288276291e-05,
      "loss": 0.6577,
      "step": 1051000
    },
    {
      "epoch": 5.25,
      "learning_rate": 2.6393910640564657e-05,
      "loss": 0.6873,
      "step": 1051500
    },
    {
      "epoch": 5.25,
      "learning_rate": 2.6380038398366407e-05,
      "loss": 0.6536,
      "step": 1052000
    },
    {
      "epoch": 5.26,
      "learning_rate": 2.6366166156168154e-05,
      "loss": 0.6698,
      "step": 1052500
    },
    {
      "epoch": 5.26,
      "learning_rate": 2.6352293913969905e-05,
      "loss": 0.6758,
      "step": 1053000
    },
    {
      "epoch": 5.26,
      "learning_rate": 2.6338421671771652e-05,
      "loss": 0.6843,
      "step": 1053500
    },
    {
      "epoch": 5.26,
      "learning_rate": 2.6324577174057796e-05,
      "loss": 0.6832,
      "step": 1054000
    },
    {
      "epoch": 5.27,
      "learning_rate": 2.631070493185955e-05,
      "loss": 0.677,
      "step": 1054500
    },
    {
      "epoch": 5.27,
      "learning_rate": 2.62968326896613e-05,
      "loss": 0.648,
      "step": 1055000
    },
    {
      "epoch": 5.27,
      "learning_rate": 2.6282960447463047e-05,
      "loss": 0.6613,
      "step": 1055500
    },
    {
      "epoch": 5.27,
      "learning_rate": 2.6269088205264797e-05,
      "loss": 0.6689,
      "step": 1056000
    },
    {
      "epoch": 5.28,
      "learning_rate": 2.625524370755094e-05,
      "loss": 0.672,
      "step": 1056500
    },
    {
      "epoch": 5.28,
      "learning_rate": 2.6241371465352688e-05,
      "loss": 0.6481,
      "step": 1057000
    },
    {
      "epoch": 5.28,
      "learning_rate": 2.622752696763883e-05,
      "loss": 0.6635,
      "step": 1057500
    },
    {
      "epoch": 5.28,
      "learning_rate": 2.6213654725440585e-05,
      "loss": 0.6861,
      "step": 1058000
    },
    {
      "epoch": 5.29,
      "learning_rate": 2.619978248324233e-05,
      "loss": 0.6816,
      "step": 1058500
    },
    {
      "epoch": 5.29,
      "learning_rate": 2.6185910241044083e-05,
      "loss": 0.6766,
      "step": 1059000
    },
    {
      "epoch": 5.29,
      "learning_rate": 2.6172037998845833e-05,
      "loss": 0.6808,
      "step": 1059500
    },
    {
      "epoch": 5.29,
      "learning_rate": 2.615816575664758e-05,
      "loss": 0.6917,
      "step": 1060000
    },
    {
      "epoch": 5.3,
      "learning_rate": 2.614429351444933e-05,
      "loss": 0.6841,
      "step": 1060500
    },
    {
      "epoch": 5.3,
      "learning_rate": 2.6130421272251077e-05,
      "loss": 0.6814,
      "step": 1061000
    },
    {
      "epoch": 5.3,
      "learning_rate": 2.611657677453722e-05,
      "loss": 0.6549,
      "step": 1061500
    },
    {
      "epoch": 5.3,
      "learning_rate": 2.610270453233897e-05,
      "loss": 0.6544,
      "step": 1062000
    },
    {
      "epoch": 5.31,
      "learning_rate": 2.608883229014072e-05,
      "loss": 0.6687,
      "step": 1062500
    },
    {
      "epoch": 5.31,
      "learning_rate": 2.607496004794247e-05,
      "loss": 0.6748,
      "step": 1063000
    },
    {
      "epoch": 5.31,
      "learning_rate": 2.6061115550228616e-05,
      "loss": 0.6733,
      "step": 1063500
    },
    {
      "epoch": 5.31,
      "learning_rate": 2.6047243308030366e-05,
      "loss": 0.682,
      "step": 1064000
    },
    {
      "epoch": 5.32,
      "learning_rate": 2.6033371065832113e-05,
      "loss": 0.7032,
      "step": 1064500
    },
    {
      "epoch": 5.32,
      "learning_rate": 2.6019498823633864e-05,
      "loss": 0.6775,
      "step": 1065000
    },
    {
      "epoch": 5.32,
      "learning_rate": 2.6005654325920007e-05,
      "loss": 0.6472,
      "step": 1065500
    },
    {
      "epoch": 5.32,
      "learning_rate": 2.5991782083721754e-05,
      "loss": 0.6729,
      "step": 1066000
    },
    {
      "epoch": 5.33,
      "learning_rate": 2.5977909841523508e-05,
      "loss": 0.6733,
      "step": 1066500
    },
    {
      "epoch": 5.33,
      "learning_rate": 2.5964037599325252e-05,
      "loss": 0.6847,
      "step": 1067000
    },
    {
      "epoch": 5.33,
      "learning_rate": 2.5950165357127006e-05,
      "loss": 0.6699,
      "step": 1067500
    },
    {
      "epoch": 5.33,
      "learning_rate": 2.593632085941315e-05,
      "loss": 0.676,
      "step": 1068000
    },
    {
      "epoch": 5.34,
      "learning_rate": 2.5922476361699293e-05,
      "loss": 0.6627,
      "step": 1068500
    },
    {
      "epoch": 5.34,
      "learning_rate": 2.5908604119501047e-05,
      "loss": 0.6802,
      "step": 1069000
    },
    {
      "epoch": 5.34,
      "learning_rate": 2.589473187730279e-05,
      "loss": 0.6986,
      "step": 1069500
    },
    {
      "epoch": 5.34,
      "learning_rate": 2.5880859635104544e-05,
      "loss": 0.6846,
      "step": 1070000
    },
    {
      "epoch": 5.35,
      "learning_rate": 2.5866987392906288e-05,
      "loss": 0.6829,
      "step": 1070500
    },
    {
      "epoch": 5.35,
      "learning_rate": 2.585311515070804e-05,
      "loss": 0.6604,
      "step": 1071000
    },
    {
      "epoch": 5.35,
      "learning_rate": 2.5839242908509792e-05,
      "loss": 0.6578,
      "step": 1071500
    },
    {
      "epoch": 5.35,
      "learning_rate": 2.582537066631154e-05,
      "loss": 0.6757,
      "step": 1072000
    },
    {
      "epoch": 5.36,
      "learning_rate": 2.581149842411329e-05,
      "loss": 0.6485,
      "step": 1072500
    },
    {
      "epoch": 5.36,
      "learning_rate": 2.5797653926399433e-05,
      "loss": 0.6684,
      "step": 1073000
    },
    {
      "epoch": 5.36,
      "learning_rate": 2.578378168420118e-05,
      "loss": 0.6863,
      "step": 1073500
    },
    {
      "epoch": 5.36,
      "learning_rate": 2.576990944200293e-05,
      "loss": 0.7075,
      "step": 1074000
    },
    {
      "epoch": 5.37,
      "learning_rate": 2.5756037199804677e-05,
      "loss": 0.7014,
      "step": 1074500
    },
    {
      "epoch": 5.37,
      "learning_rate": 2.5742164957606428e-05,
      "loss": 0.6756,
      "step": 1075000
    },
    {
      "epoch": 5.37,
      "learning_rate": 2.5728320459892575e-05,
      "loss": 0.6829,
      "step": 1075500
    },
    {
      "epoch": 5.37,
      "learning_rate": 2.5714448217694325e-05,
      "loss": 0.6736,
      "step": 1076000
    },
    {
      "epoch": 5.38,
      "learning_rate": 2.5700575975496072e-05,
      "loss": 0.6736,
      "step": 1076500
    },
    {
      "epoch": 5.38,
      "learning_rate": 2.5686703733297823e-05,
      "loss": 0.6846,
      "step": 1077000
    },
    {
      "epoch": 5.38,
      "learning_rate": 2.5672859235583966e-05,
      "loss": 0.6889,
      "step": 1077500
    },
    {
      "epoch": 5.38,
      "learning_rate": 2.5658986993385713e-05,
      "loss": 0.6925,
      "step": 1078000
    },
    {
      "epoch": 5.39,
      "learning_rate": 2.5645114751187467e-05,
      "loss": 0.6718,
      "step": 1078500
    },
    {
      "epoch": 5.39,
      "learning_rate": 2.563124250898921e-05,
      "loss": 0.6791,
      "step": 1079000
    },
    {
      "epoch": 5.39,
      "learning_rate": 2.561739801127536e-05,
      "loss": 0.667,
      "step": 1079500
    },
    {
      "epoch": 5.39,
      "learning_rate": 2.5603553513561505e-05,
      "loss": 0.6773,
      "step": 1080000
    },
    {
      "epoch": 5.4,
      "learning_rate": 2.5589681271363252e-05,
      "loss": 0.7037,
      "step": 1080500
    },
    {
      "epoch": 5.4,
      "learning_rate": 2.5575809029165006e-05,
      "loss": 0.699,
      "step": 1081000
    },
    {
      "epoch": 5.4,
      "learning_rate": 2.556193678696675e-05,
      "loss": 0.689,
      "step": 1081500
    },
    {
      "epoch": 5.4,
      "learning_rate": 2.5548064544768503e-05,
      "loss": 0.6742,
      "step": 1082000
    },
    {
      "epoch": 5.41,
      "learning_rate": 2.5534192302570247e-05,
      "loss": 0.6854,
      "step": 1082500
    },
    {
      "epoch": 5.41,
      "learning_rate": 2.5520320060372e-05,
      "loss": 0.6899,
      "step": 1083000
    },
    {
      "epoch": 5.41,
      "learning_rate": 2.550644781817375e-05,
      "loss": 0.6673,
      "step": 1083500
    },
    {
      "epoch": 5.41,
      "learning_rate": 2.5492603320459895e-05,
      "loss": 0.6664,
      "step": 1084000
    },
    {
      "epoch": 5.42,
      "learning_rate": 2.547873107826164e-05,
      "loss": 0.6685,
      "step": 1084500
    },
    {
      "epoch": 5.42,
      "learning_rate": 2.5464858836063392e-05,
      "loss": 0.6802,
      "step": 1085000
    },
    {
      "epoch": 5.42,
      "learning_rate": 2.545098659386514e-05,
      "loss": 0.6728,
      "step": 1085500
    },
    {
      "epoch": 5.42,
      "learning_rate": 2.543711435166689e-05,
      "loss": 0.6805,
      "step": 1086000
    },
    {
      "epoch": 5.43,
      "learning_rate": 2.5423269853953037e-05,
      "loss": 0.6753,
      "step": 1086500
    },
    {
      "epoch": 5.43,
      "learning_rate": 2.540939761175478e-05,
      "loss": 0.6878,
      "step": 1087000
    },
    {
      "epoch": 5.43,
      "learning_rate": 2.5395525369556534e-05,
      "loss": 0.6817,
      "step": 1087500
    },
    {
      "epoch": 5.43,
      "learning_rate": 2.5381653127358284e-05,
      "loss": 0.6876,
      "step": 1088000
    },
    {
      "epoch": 5.44,
      "learning_rate": 2.5367808629644428e-05,
      "loss": 0.6836,
      "step": 1088500
    },
    {
      "epoch": 5.44,
      "learning_rate": 2.5353936387446175e-05,
      "loss": 0.6848,
      "step": 1089000
    },
    {
      "epoch": 5.44,
      "learning_rate": 2.5340064145247925e-05,
      "loss": 0.691,
      "step": 1089500
    },
    {
      "epoch": 5.44,
      "learning_rate": 2.5326191903049672e-05,
      "loss": 0.7009,
      "step": 1090000
    },
    {
      "epoch": 5.45,
      "learning_rate": 2.5312319660851426e-05,
      "loss": 0.6907,
      "step": 1090500
    },
    {
      "epoch": 5.45,
      "learning_rate": 2.529847516313757e-05,
      "loss": 0.6939,
      "step": 1091000
    },
    {
      "epoch": 5.45,
      "learning_rate": 2.528460292093932e-05,
      "loss": 0.6838,
      "step": 1091500
    },
    {
      "epoch": 5.45,
      "learning_rate": 2.5270758423225464e-05,
      "loss": 0.708,
      "step": 1092000
    },
    {
      "epoch": 5.46,
      "learning_rate": 2.525688618102721e-05,
      "loss": 0.6896,
      "step": 1092500
    },
    {
      "epoch": 5.46,
      "learning_rate": 2.5243013938828965e-05,
      "loss": 0.6913,
      "step": 1093000
    },
    {
      "epoch": 5.46,
      "learning_rate": 2.522914169663071e-05,
      "loss": 0.7024,
      "step": 1093500
    },
    {
      "epoch": 5.46,
      "learning_rate": 2.5215269454432462e-05,
      "loss": 0.6716,
      "step": 1094000
    },
    {
      "epoch": 5.47,
      "learning_rate": 2.5201397212234206e-05,
      "loss": 0.7027,
      "step": 1094500
    },
    {
      "epoch": 5.47,
      "learning_rate": 2.518752497003596e-05,
      "loss": 0.6861,
      "step": 1095000
    },
    {
      "epoch": 5.47,
      "learning_rate": 2.5173652727837703e-05,
      "loss": 0.6899,
      "step": 1095500
    },
    {
      "epoch": 5.47,
      "learning_rate": 2.5159808230123854e-05,
      "loss": 0.6704,
      "step": 1096000
    },
    {
      "epoch": 5.48,
      "learning_rate": 2.51459359879256e-05,
      "loss": 0.6865,
      "step": 1096500
    },
    {
      "epoch": 5.48,
      "learning_rate": 2.513206374572735e-05,
      "loss": 0.6679,
      "step": 1097000
    },
    {
      "epoch": 5.48,
      "learning_rate": 2.5118191503529098e-05,
      "loss": 0.7013,
      "step": 1097500
    },
    {
      "epoch": 5.48,
      "learning_rate": 2.510431926133085e-05,
      "loss": 0.6729,
      "step": 1098000
    },
    {
      "epoch": 5.49,
      "learning_rate": 2.5090474763616996e-05,
      "loss": 0.7199,
      "step": 1098500
    },
    {
      "epoch": 5.49,
      "learning_rate": 2.507660252141874e-05,
      "loss": 0.6902,
      "step": 1099000
    },
    {
      "epoch": 5.49,
      "learning_rate": 2.5062730279220493e-05,
      "loss": 0.6954,
      "step": 1099500
    },
    {
      "epoch": 5.49,
      "learning_rate": 2.5048858037022243e-05,
      "loss": 0.7123,
      "step": 1100000
    },
    {
      "epoch": 5.5,
      "learning_rate": 2.5035013539308387e-05,
      "loss": 0.6847,
      "step": 1100500
    },
    {
      "epoch": 5.5,
      "learning_rate": 2.5021141297110134e-05,
      "loss": 0.694,
      "step": 1101000
    },
    {
      "epoch": 5.5,
      "learning_rate": 2.5007269054911884e-05,
      "loss": 0.685,
      "step": 1101500
    },
    {
      "epoch": 5.5,
      "learning_rate": 2.4993396812713635e-05,
      "loss": 0.694,
      "step": 1102000
    },
    {
      "epoch": 5.51,
      "learning_rate": 2.497955231499978e-05,
      "loss": 0.6854,
      "step": 1102500
    },
    {
      "epoch": 5.51,
      "learning_rate": 2.496568007280153e-05,
      "loss": 0.6932,
      "step": 1103000
    },
    {
      "epoch": 5.51,
      "learning_rate": 2.4951807830603276e-05,
      "loss": 0.6885,
      "step": 1103500
    },
    {
      "epoch": 5.51,
      "learning_rate": 2.4937935588405026e-05,
      "loss": 0.6858,
      "step": 1104000
    },
    {
      "epoch": 5.52,
      "learning_rate": 2.4924063346206773e-05,
      "loss": 0.6915,
      "step": 1104500
    },
    {
      "epoch": 5.52,
      "learning_rate": 2.4910191104008524e-05,
      "loss": 0.6806,
      "step": 1105000
    },
    {
      "epoch": 5.52,
      "learning_rate": 2.489631886181027e-05,
      "loss": 0.7054,
      "step": 1105500
    },
    {
      "epoch": 5.52,
      "learning_rate": 2.488244661961202e-05,
      "loss": 0.6822,
      "step": 1106000
    },
    {
      "epoch": 5.53,
      "learning_rate": 2.4868602121898168e-05,
      "loss": 0.6867,
      "step": 1106500
    },
    {
      "epoch": 5.53,
      "learning_rate": 2.485472987969992e-05,
      "loss": 0.6895,
      "step": 1107000
    },
    {
      "epoch": 5.53,
      "learning_rate": 2.4840857637501665e-05,
      "loss": 0.7178,
      "step": 1107500
    },
    {
      "epoch": 5.53,
      "learning_rate": 2.4826985395303416e-05,
      "loss": 0.6963,
      "step": 1108000
    },
    {
      "epoch": 5.54,
      "learning_rate": 2.4813113153105163e-05,
      "loss": 0.6937,
      "step": 1108500
    },
    {
      "epoch": 5.54,
      "learning_rate": 2.4799240910906913e-05,
      "loss": 0.7112,
      "step": 1109000
    },
    {
      "epoch": 5.54,
      "learning_rate": 2.4785396413193057e-05,
      "loss": 0.6984,
      "step": 1109500
    },
    {
      "epoch": 5.54,
      "learning_rate": 2.4771524170994807e-05,
      "loss": 0.7129,
      "step": 1110000
    },
    {
      "epoch": 5.55,
      "learning_rate": 2.4757651928796558e-05,
      "loss": 0.7168,
      "step": 1110500
    },
    {
      "epoch": 5.55,
      "learning_rate": 2.4743779686598305e-05,
      "loss": 0.6939,
      "step": 1111000
    },
    {
      "epoch": 5.55,
      "learning_rate": 2.4729935188884452e-05,
      "loss": 0.7012,
      "step": 1111500
    },
    {
      "epoch": 5.55,
      "learning_rate": 2.47160629466862e-05,
      "loss": 0.6841,
      "step": 1112000
    },
    {
      "epoch": 5.56,
      "learning_rate": 2.4702218448972346e-05,
      "loss": 0.7118,
      "step": 1112500
    },
    {
      "epoch": 5.56,
      "learning_rate": 2.4688346206774093e-05,
      "loss": 0.696,
      "step": 1113000
    },
    {
      "epoch": 5.56,
      "learning_rate": 2.4674473964575843e-05,
      "loss": 0.7128,
      "step": 1113500
    },
    {
      "epoch": 5.56,
      "learning_rate": 2.4660601722377594e-05,
      "loss": 0.7022,
      "step": 1114000
    },
    {
      "epoch": 5.57,
      "learning_rate": 2.4646757224663737e-05,
      "loss": 0.6931,
      "step": 1114500
    },
    {
      "epoch": 5.57,
      "learning_rate": 2.4632884982465488e-05,
      "loss": 0.6889,
      "step": 1115000
    },
    {
      "epoch": 5.57,
      "learning_rate": 2.4619012740267235e-05,
      "loss": 0.69,
      "step": 1115500
    },
    {
      "epoch": 5.57,
      "learning_rate": 2.4605140498068985e-05,
      "loss": 0.6989,
      "step": 1116000
    },
    {
      "epoch": 5.58,
      "learning_rate": 2.4591268255870732e-05,
      "loss": 0.7188,
      "step": 1116500
    },
    {
      "epoch": 5.58,
      "learning_rate": 2.4577396013672483e-05,
      "loss": 0.6803,
      "step": 1117000
    },
    {
      "epoch": 5.58,
      "learning_rate": 2.456352377147423e-05,
      "loss": 0.6971,
      "step": 1117500
    },
    {
      "epoch": 5.58,
      "learning_rate": 2.454965152927598e-05,
      "loss": 0.7172,
      "step": 1118000
    },
    {
      "epoch": 5.59,
      "learning_rate": 2.453577928707773e-05,
      "loss": 0.6834,
      "step": 1118500
    },
    {
      "epoch": 5.59,
      "learning_rate": 2.4521934789363877e-05,
      "loss": 0.6746,
      "step": 1119000
    },
    {
      "epoch": 5.59,
      "learning_rate": 2.4508062547165624e-05,
      "loss": 0.6798,
      "step": 1119500
    },
    {
      "epoch": 5.59,
      "learning_rate": 2.4494190304967375e-05,
      "loss": 0.68,
      "step": 1120000
    },
    {
      "epoch": 5.6,
      "learning_rate": 2.4480318062769122e-05,
      "loss": 0.6953,
      "step": 1120500
    },
    {
      "epoch": 5.6,
      "learning_rate": 2.4466445820570872e-05,
      "loss": 0.69,
      "step": 1121000
    },
    {
      "epoch": 5.6,
      "learning_rate": 2.445257357837262e-05,
      "loss": 0.6992,
      "step": 1121500
    },
    {
      "epoch": 5.6,
      "learning_rate": 2.4438729080658766e-05,
      "loss": 0.6962,
      "step": 1122000
    },
    {
      "epoch": 5.61,
      "learning_rate": 2.4424884582944913e-05,
      "loss": 0.6951,
      "step": 1122500
    },
    {
      "epoch": 5.61,
      "learning_rate": 2.441101234074666e-05,
      "loss": 0.6832,
      "step": 1123000
    },
    {
      "epoch": 5.61,
      "learning_rate": 2.439714009854841e-05,
      "loss": 0.6824,
      "step": 1123500
    },
    {
      "epoch": 5.61,
      "learning_rate": 2.4383267856350158e-05,
      "loss": 0.6925,
      "step": 1124000
    },
    {
      "epoch": 5.62,
      "learning_rate": 2.4369395614151908e-05,
      "loss": 0.6901,
      "step": 1124500
    },
    {
      "epoch": 5.62,
      "learning_rate": 2.4355523371953655e-05,
      "loss": 0.6721,
      "step": 1125000
    },
    {
      "epoch": 5.62,
      "learning_rate": 2.4341651129755405e-05,
      "loss": 0.7094,
      "step": 1125500
    },
    {
      "epoch": 5.62,
      "learning_rate": 2.4327778887557152e-05,
      "loss": 0.7232,
      "step": 1126000
    },
    {
      "epoch": 5.63,
      "learning_rate": 2.4313906645358903e-05,
      "loss": 0.7023,
      "step": 1126500
    },
    {
      "epoch": 5.63,
      "learning_rate": 2.430006214764505e-05,
      "loss": 0.7102,
      "step": 1127000
    },
    {
      "epoch": 5.63,
      "learning_rate": 2.42861899054468e-05,
      "loss": 0.6914,
      "step": 1127500
    },
    {
      "epoch": 5.63,
      "learning_rate": 2.4272317663248547e-05,
      "loss": 0.6917,
      "step": 1128000
    },
    {
      "epoch": 5.64,
      "learning_rate": 2.4258445421050298e-05,
      "loss": 0.7015,
      "step": 1128500
    },
    {
      "epoch": 5.64,
      "learning_rate": 2.4244573178852045e-05,
      "loss": 0.7173,
      "step": 1129000
    },
    {
      "epoch": 5.64,
      "learning_rate": 2.4230756425622585e-05,
      "loss": 0.7163,
      "step": 1129500
    },
    {
      "epoch": 5.64,
      "learning_rate": 2.421688418342434e-05,
      "loss": 0.7092,
      "step": 1130000
    },
    {
      "epoch": 5.65,
      "learning_rate": 2.4203011941226086e-05,
      "loss": 0.703,
      "step": 1130500
    },
    {
      "epoch": 5.65,
      "learning_rate": 2.4189139699027836e-05,
      "loss": 0.7087,
      "step": 1131000
    },
    {
      "epoch": 5.65,
      "learning_rate": 2.4175267456829583e-05,
      "loss": 0.7269,
      "step": 1131500
    },
    {
      "epoch": 5.65,
      "learning_rate": 2.4161422959115727e-05,
      "loss": 0.7057,
      "step": 1132000
    },
    {
      "epoch": 5.66,
      "learning_rate": 2.4147550716917477e-05,
      "loss": 0.6838,
      "step": 1132500
    },
    {
      "epoch": 5.66,
      "learning_rate": 2.4133678474719228e-05,
      "loss": 0.7046,
      "step": 1133000
    },
    {
      "epoch": 5.66,
      "learning_rate": 2.4119806232520975e-05,
      "loss": 0.693,
      "step": 1133500
    },
    {
      "epoch": 5.66,
      "learning_rate": 2.4105933990322725e-05,
      "loss": 0.7057,
      "step": 1134000
    },
    {
      "epoch": 5.67,
      "learning_rate": 2.4092061748124476e-05,
      "loss": 0.6724,
      "step": 1134500
    },
    {
      "epoch": 5.67,
      "learning_rate": 2.4078189505926223e-05,
      "loss": 0.7014,
      "step": 1135000
    },
    {
      "epoch": 5.67,
      "learning_rate": 2.4064317263727973e-05,
      "loss": 0.7044,
      "step": 1135500
    },
    {
      "epoch": 5.67,
      "learning_rate": 2.4050445021529723e-05,
      "loss": 0.6895,
      "step": 1136000
    },
    {
      "epoch": 5.68,
      "learning_rate": 2.4036600523815867e-05,
      "loss": 0.7006,
      "step": 1136500
    },
    {
      "epoch": 5.68,
      "learning_rate": 2.4022728281617614e-05,
      "loss": 0.6986,
      "step": 1137000
    },
    {
      "epoch": 5.68,
      "learning_rate": 2.4008856039419364e-05,
      "loss": 0.7078,
      "step": 1137500
    },
    {
      "epoch": 5.68,
      "learning_rate": 2.399498379722111e-05,
      "loss": 0.6816,
      "step": 1138000
    },
    {
      "epoch": 5.69,
      "learning_rate": 2.3981111555022862e-05,
      "loss": 0.6973,
      "step": 1138500
    },
    {
      "epoch": 5.69,
      "learning_rate": 2.396726705730901e-05,
      "loss": 0.7052,
      "step": 1139000
    },
    {
      "epoch": 5.69,
      "learning_rate": 2.395339481511076e-05,
      "loss": 0.6989,
      "step": 1139500
    },
    {
      "epoch": 5.69,
      "learning_rate": 2.3939522572912506e-05,
      "loss": 0.7015,
      "step": 1140000
    },
    {
      "epoch": 5.7,
      "learning_rate": 2.3925650330714257e-05,
      "loss": 0.6972,
      "step": 1140500
    },
    {
      "epoch": 5.7,
      "learning_rate": 2.39118058330004e-05,
      "loss": 0.6779,
      "step": 1141000
    },
    {
      "epoch": 5.7,
      "learning_rate": 2.3897933590802147e-05,
      "loss": 0.7035,
      "step": 1141500
    },
    {
      "epoch": 5.7,
      "learning_rate": 2.3884061348603898e-05,
      "loss": 0.7086,
      "step": 1142000
    },
    {
      "epoch": 5.71,
      "learning_rate": 2.3870189106405648e-05,
      "loss": 0.6784,
      "step": 1142500
    },
    {
      "epoch": 5.71,
      "learning_rate": 2.3856344608691795e-05,
      "loss": 0.7053,
      "step": 1143000
    },
    {
      "epoch": 5.71,
      "learning_rate": 2.3842472366493542e-05,
      "loss": 0.685,
      "step": 1143500
    },
    {
      "epoch": 5.71,
      "learning_rate": 2.3828627868779686e-05,
      "loss": 0.7098,
      "step": 1144000
    },
    {
      "epoch": 5.72,
      "learning_rate": 2.3814755626581436e-05,
      "loss": 0.7145,
      "step": 1144500
    },
    {
      "epoch": 5.72,
      "learning_rate": 2.3800883384383187e-05,
      "loss": 0.6975,
      "step": 1145000
    },
    {
      "epoch": 5.72,
      "learning_rate": 2.3787011142184934e-05,
      "loss": 0.6805,
      "step": 1145500
    },
    {
      "epoch": 5.72,
      "learning_rate": 2.3773138899986684e-05,
      "loss": 0.7007,
      "step": 1146000
    },
    {
      "epoch": 5.73,
      "learning_rate": 2.375926665778843e-05,
      "loss": 0.6947,
      "step": 1146500
    },
    {
      "epoch": 5.73,
      "learning_rate": 2.374539441559018e-05,
      "loss": 0.684,
      "step": 1147000
    },
    {
      "epoch": 5.73,
      "learning_rate": 2.3731522173391932e-05,
      "loss": 0.6988,
      "step": 1147500
    },
    {
      "epoch": 5.73,
      "learning_rate": 2.3717649931193682e-05,
      "loss": 0.7208,
      "step": 1148000
    },
    {
      "epoch": 5.74,
      "learning_rate": 2.370377768899543e-05,
      "loss": 0.668,
      "step": 1148500
    },
    {
      "epoch": 5.74,
      "learning_rate": 2.3689933191281573e-05,
      "loss": 0.6789,
      "step": 1149000
    },
    {
      "epoch": 5.74,
      "learning_rate": 2.3676060949083323e-05,
      "loss": 0.7617,
      "step": 1149500
    },
    {
      "epoch": 5.74,
      "learning_rate": 2.366218870688507e-05,
      "loss": 0.6686,
      "step": 1150000
    },
    {
      "epoch": 5.75,
      "learning_rate": 2.364831646468682e-05,
      "loss": 0.7141,
      "step": 1150500
    },
    {
      "epoch": 5.75,
      "learning_rate": 2.3634444222488568e-05,
      "loss": 0.7181,
      "step": 1151000
    },
    {
      "epoch": 5.75,
      "learning_rate": 2.3620599724774718e-05,
      "loss": 0.725,
      "step": 1151500
    },
    {
      "epoch": 5.75,
      "learning_rate": 2.3606727482576465e-05,
      "loss": 0.692,
      "step": 1152000
    },
    {
      "epoch": 5.76,
      "learning_rate": 2.3592855240378216e-05,
      "loss": 0.7108,
      "step": 1152500
    },
    {
      "epoch": 5.76,
      "learning_rate": 2.3578982998179963e-05,
      "loss": 0.7031,
      "step": 1153000
    },
    {
      "epoch": 5.76,
      "learning_rate": 2.3565138500466106e-05,
      "loss": 0.7001,
      "step": 1153500
    },
    {
      "epoch": 5.76,
      "learning_rate": 2.3551266258267857e-05,
      "loss": 0.6714,
      "step": 1154000
    },
    {
      "epoch": 5.77,
      "learning_rate": 2.3537394016069607e-05,
      "loss": 0.6984,
      "step": 1154500
    },
    {
      "epoch": 5.77,
      "learning_rate": 2.3523521773871354e-05,
      "loss": 0.6968,
      "step": 1155000
    },
    {
      "epoch": 5.77,
      "learning_rate": 2.35096772761575e-05,
      "loss": 0.7061,
      "step": 1155500
    },
    {
      "epoch": 5.77,
      "learning_rate": 2.349580503395925e-05,
      "loss": 0.6982,
      "step": 1156000
    },
    {
      "epoch": 5.78,
      "learning_rate": 2.3481932791761e-05,
      "loss": 0.6999,
      "step": 1156500
    },
    {
      "epoch": 5.78,
      "learning_rate": 2.346806054956275e-05,
      "loss": 0.6957,
      "step": 1157000
    },
    {
      "epoch": 5.78,
      "learning_rate": 2.3454188307364496e-05,
      "loss": 0.6989,
      "step": 1157500
    },
    {
      "epoch": 5.78,
      "learning_rate": 2.3440343809650643e-05,
      "loss": 0.6963,
      "step": 1158000
    },
    {
      "epoch": 5.79,
      "learning_rate": 2.342647156745239e-05,
      "loss": 0.7185,
      "step": 1158500
    },
    {
      "epoch": 5.79,
      "learning_rate": 2.341259932525414e-05,
      "loss": 0.7058,
      "step": 1159000
    },
    {
      "epoch": 5.79,
      "learning_rate": 2.339872708305589e-05,
      "loss": 0.6936,
      "step": 1159500
    },
    {
      "epoch": 5.79,
      "learning_rate": 2.338485484085764e-05,
      "loss": 0.7033,
      "step": 1160000
    },
    {
      "epoch": 5.8,
      "learning_rate": 2.3371010343143785e-05,
      "loss": 0.6936,
      "step": 1160500
    },
    {
      "epoch": 5.8,
      "learning_rate": 2.3357138100945532e-05,
      "loss": 0.6856,
      "step": 1161000
    },
    {
      "epoch": 5.8,
      "learning_rate": 2.3343265858747282e-05,
      "loss": 0.6963,
      "step": 1161500
    },
    {
      "epoch": 5.8,
      "learning_rate": 2.332939361654903e-05,
      "loss": 0.6948,
      "step": 1162000
    },
    {
      "epoch": 5.81,
      "learning_rate": 2.3315549118835176e-05,
      "loss": 0.6988,
      "step": 1162500
    },
    {
      "epoch": 5.81,
      "learning_rate": 2.3301676876636927e-05,
      "loss": 0.6768,
      "step": 1163000
    },
    {
      "epoch": 5.81,
      "learning_rate": 2.3287804634438677e-05,
      "loss": 0.7068,
      "step": 1163500
    },
    {
      "epoch": 5.81,
      "learning_rate": 2.3273932392240424e-05,
      "loss": 0.6693,
      "step": 1164000
    },
    {
      "epoch": 5.82,
      "learning_rate": 2.3260060150042175e-05,
      "loss": 0.7287,
      "step": 1164500
    },
    {
      "epoch": 5.82,
      "learning_rate": 2.324618790784392e-05,
      "loss": 0.7204,
      "step": 1165000
    },
    {
      "epoch": 5.82,
      "learning_rate": 2.3232315665645672e-05,
      "loss": 0.7019,
      "step": 1165500
    },
    {
      "epoch": 5.82,
      "learning_rate": 2.321844342344742e-05,
      "loss": 0.7233,
      "step": 1166000
    },
    {
      "epoch": 5.83,
      "learning_rate": 2.3204598925733566e-05,
      "loss": 0.7207,
      "step": 1166500
    },
    {
      "epoch": 5.83,
      "learning_rate": 2.3190726683535313e-05,
      "loss": 0.713,
      "step": 1167000
    },
    {
      "epoch": 5.83,
      "learning_rate": 2.3176854441337063e-05,
      "loss": 0.6861,
      "step": 1167500
    },
    {
      "epoch": 5.83,
      "learning_rate": 2.3162982199138814e-05,
      "loss": 0.6947,
      "step": 1168000
    },
    {
      "epoch": 5.84,
      "learning_rate": 2.314910995694056e-05,
      "loss": 0.713,
      "step": 1168500
    },
    {
      "epoch": 5.84,
      "learning_rate": 2.3135265459226708e-05,
      "loss": 0.7096,
      "step": 1169000
    },
    {
      "epoch": 5.84,
      "learning_rate": 2.3121393217028455e-05,
      "loss": 0.7055,
      "step": 1169500
    },
    {
      "epoch": 5.84,
      "learning_rate": 2.3107520974830205e-05,
      "loss": 0.7126,
      "step": 1170000
    },
    {
      "epoch": 5.85,
      "learning_rate": 2.3093648732631952e-05,
      "loss": 0.6992,
      "step": 1170500
    },
    {
      "epoch": 5.85,
      "learning_rate": 2.3079776490433703e-05,
      "loss": 0.7107,
      "step": 1171000
    },
    {
      "epoch": 5.85,
      "learning_rate": 2.306593199271985e-05,
      "loss": 0.6998,
      "step": 1171500
    },
    {
      "epoch": 5.85,
      "learning_rate": 2.30520597505216e-05,
      "loss": 0.7152,
      "step": 1172000
    },
    {
      "epoch": 5.86,
      "learning_rate": 2.3038187508323347e-05,
      "loss": 0.7175,
      "step": 1172500
    },
    {
      "epoch": 5.86,
      "learning_rate": 2.3024315266125097e-05,
      "loss": 0.7193,
      "step": 1173000
    },
    {
      "epoch": 5.86,
      "learning_rate": 2.301047076841124e-05,
      "loss": 0.6747,
      "step": 1173500
    },
    {
      "epoch": 5.86,
      "learning_rate": 2.2996598526212988e-05,
      "loss": 0.7042,
      "step": 1174000
    },
    {
      "epoch": 5.87,
      "learning_rate": 2.298272628401474e-05,
      "loss": 0.6944,
      "step": 1174500
    },
    {
      "epoch": 5.87,
      "learning_rate": 2.2968854041816486e-05,
      "loss": 0.6973,
      "step": 1175000
    },
    {
      "epoch": 5.87,
      "learning_rate": 2.2954981799618236e-05,
      "loss": 0.7022,
      "step": 1175500
    },
    {
      "epoch": 5.87,
      "learning_rate": 2.2941137301904383e-05,
      "loss": 0.7098,
      "step": 1176000
    },
    {
      "epoch": 5.88,
      "learning_rate": 2.2927265059706134e-05,
      "loss": 0.6937,
      "step": 1176500
    },
    {
      "epoch": 5.88,
      "learning_rate": 2.291339281750788e-05,
      "loss": 0.7048,
      "step": 1177000
    },
    {
      "epoch": 5.88,
      "learning_rate": 2.289952057530963e-05,
      "loss": 0.7204,
      "step": 1177500
    },
    {
      "epoch": 5.88,
      "learning_rate": 2.2885676077595775e-05,
      "loss": 0.7185,
      "step": 1178000
    },
    {
      "epoch": 5.89,
      "learning_rate": 2.2871803835397525e-05,
      "loss": 0.7044,
      "step": 1178500
    },
    {
      "epoch": 5.89,
      "learning_rate": 2.2857931593199272e-05,
      "loss": 0.7265,
      "step": 1179000
    },
    {
      "epoch": 5.89,
      "learning_rate": 2.284408709548542e-05,
      "loss": 0.7075,
      "step": 1179500
    },
    {
      "epoch": 5.89,
      "learning_rate": 2.283021485328717e-05,
      "loss": 0.6978,
      "step": 1180000
    },
    {
      "epoch": 5.9,
      "learning_rate": 2.2816342611088917e-05,
      "loss": 0.6895,
      "step": 1180500
    },
    {
      "epoch": 5.9,
      "learning_rate": 2.2802470368890667e-05,
      "loss": 0.7285,
      "step": 1181000
    },
    {
      "epoch": 5.9,
      "learning_rate": 2.2788598126692414e-05,
      "loss": 0.722,
      "step": 1181500
    },
    {
      "epoch": 5.9,
      "learning_rate": 2.2774725884494164e-05,
      "loss": 0.7016,
      "step": 1182000
    },
    {
      "epoch": 5.91,
      "learning_rate": 2.276085364229591e-05,
      "loss": 0.7205,
      "step": 1182500
    },
    {
      "epoch": 5.91,
      "learning_rate": 2.274698140009766e-05,
      "loss": 0.7179,
      "step": 1183000
    },
    {
      "epoch": 5.91,
      "learning_rate": 2.273310915789941e-05,
      "loss": 0.6987,
      "step": 1183500
    },
    {
      "epoch": 5.91,
      "learning_rate": 2.271926466018556e-05,
      "loss": 0.7147,
      "step": 1184000
    },
    {
      "epoch": 5.92,
      "learning_rate": 2.2705392417987306e-05,
      "loss": 0.711,
      "step": 1184500
    },
    {
      "epoch": 5.92,
      "learning_rate": 2.2691520175789056e-05,
      "loss": 0.7176,
      "step": 1185000
    },
    {
      "epoch": 5.92,
      "learning_rate": 2.2677647933590803e-05,
      "loss": 0.6923,
      "step": 1185500
    },
    {
      "epoch": 5.92,
      "learning_rate": 2.2663775691392554e-05,
      "loss": 0.6943,
      "step": 1186000
    },
    {
      "epoch": 5.93,
      "learning_rate": 2.2649931193678698e-05,
      "loss": 0.6998,
      "step": 1186500
    },
    {
      "epoch": 5.93,
      "learning_rate": 2.2636058951480445e-05,
      "loss": 0.6999,
      "step": 1187000
    },
    {
      "epoch": 5.93,
      "learning_rate": 2.2622186709282195e-05,
      "loss": 0.6929,
      "step": 1187500
    },
    {
      "epoch": 5.93,
      "learning_rate": 2.2608314467083945e-05,
      "loss": 0.6907,
      "step": 1188000
    },
    {
      "epoch": 5.94,
      "learning_rate": 2.2594469969370092e-05,
      "loss": 0.7042,
      "step": 1188500
    },
    {
      "epoch": 5.94,
      "learning_rate": 2.258059772717184e-05,
      "loss": 0.669,
      "step": 1189000
    },
    {
      "epoch": 5.94,
      "learning_rate": 2.256672548497359e-05,
      "loss": 0.7111,
      "step": 1189500
    },
    {
      "epoch": 5.94,
      "learning_rate": 2.2552880987259734e-05,
      "loss": 0.7415,
      "step": 1190000
    },
    {
      "epoch": 5.95,
      "learning_rate": 2.2539008745061484e-05,
      "loss": 0.7129,
      "step": 1190500
    },
    {
      "epoch": 5.95,
      "learning_rate": 2.252513650286323e-05,
      "loss": 0.6936,
      "step": 1191000
    },
    {
      "epoch": 5.95,
      "learning_rate": 2.251126426066498e-05,
      "loss": 0.7028,
      "step": 1191500
    },
    {
      "epoch": 5.95,
      "learning_rate": 2.2497392018466728e-05,
      "loss": 0.7394,
      "step": 1192000
    },
    {
      "epoch": 5.96,
      "learning_rate": 2.248351977626848e-05,
      "loss": 0.6914,
      "step": 1192500
    },
    {
      "epoch": 5.96,
      "learning_rate": 2.246964753407023e-05,
      "loss": 0.6946,
      "step": 1193000
    },
    {
      "epoch": 5.96,
      "learning_rate": 2.245577529187198e-05,
      "loss": 0.7232,
      "step": 1193500
    },
    {
      "epoch": 5.96,
      "learning_rate": 2.2441903049673726e-05,
      "loss": 0.7171,
      "step": 1194000
    },
    {
      "epoch": 5.97,
      "learning_rate": 2.242805855195987e-05,
      "loss": 0.6945,
      "step": 1194500
    },
    {
      "epoch": 5.97,
      "learning_rate": 2.241418630976162e-05,
      "loss": 0.6837,
      "step": 1195000
    },
    {
      "epoch": 5.97,
      "learning_rate": 2.2400314067563367e-05,
      "loss": 0.7063,
      "step": 1195500
    },
    {
      "epoch": 5.97,
      "learning_rate": 2.2386441825365118e-05,
      "loss": 0.6954,
      "step": 1196000
    },
    {
      "epoch": 5.98,
      "learning_rate": 2.2372569583166865e-05,
      "loss": 0.7113,
      "step": 1196500
    },
    {
      "epoch": 5.98,
      "learning_rate": 2.2358725085453015e-05,
      "loss": 0.6897,
      "step": 1197000
    },
    {
      "epoch": 5.98,
      "learning_rate": 2.2344852843254762e-05,
      "loss": 0.7058,
      "step": 1197500
    },
    {
      "epoch": 5.98,
      "learning_rate": 2.2330980601056513e-05,
      "loss": 0.7125,
      "step": 1198000
    },
    {
      "epoch": 5.99,
      "learning_rate": 2.231710835885826e-05,
      "loss": 0.6993,
      "step": 1198500
    },
    {
      "epoch": 5.99,
      "learning_rate": 2.230323611666001e-05,
      "loss": 0.7013,
      "step": 1199000
    },
    {
      "epoch": 5.99,
      "learning_rate": 2.2289391618946154e-05,
      "loss": 0.7146,
      "step": 1199500
    },
    {
      "epoch": 5.99,
      "learning_rate": 2.2275519376747904e-05,
      "loss": 0.6979,
      "step": 1200000
    },
    {
      "epoch": 6.0,
      "learning_rate": 2.226164713454965e-05,
      "loss": 0.706,
      "step": 1200500
    },
    {
      "epoch": 6.0,
      "learning_rate": 2.22477748923514e-05,
      "loss": 0.6788,
      "step": 1201000
    },
    {
      "epoch": 6.0,
      "eval_loss": 1.0897212028503418,
      "eval_runtime": 1604.2722,
      "eval_samples_per_second": 72.617,
      "eval_steps_per_second": 18.155,
      "step": 1201440
    },
    {
      "epoch": 6.0,
      "learning_rate": 2.2233902650153152e-05,
      "loss": 0.6831,
      "step": 1201500
    },
    {
      "epoch": 6.0,
      "learning_rate": 2.2220058152439296e-05,
      "loss": 0.5066,
      "step": 1202000
    },
    {
      "epoch": 6.01,
      "learning_rate": 2.2206185910241046e-05,
      "loss": 0.4997,
      "step": 1202500
    },
    {
      "epoch": 6.01,
      "learning_rate": 2.2192313668042793e-05,
      "loss": 0.4977,
      "step": 1203000
    },
    {
      "epoch": 6.01,
      "learning_rate": 2.2178441425844543e-05,
      "loss": 0.5086,
      "step": 1203500
    },
    {
      "epoch": 6.01,
      "learning_rate": 2.216456918364629e-05,
      "loss": 0.5177,
      "step": 1204000
    },
    {
      "epoch": 6.02,
      "learning_rate": 2.2150724685932438e-05,
      "loss": 0.5094,
      "step": 1204500
    },
    {
      "epoch": 6.02,
      "learning_rate": 2.2136852443734188e-05,
      "loss": 0.5072,
      "step": 1205000
    },
    {
      "epoch": 6.02,
      "learning_rate": 2.2122980201535935e-05,
      "loss": 0.5035,
      "step": 1205500
    },
    {
      "epoch": 6.02,
      "learning_rate": 2.2109107959337685e-05,
      "loss": 0.525,
      "step": 1206000
    },
    {
      "epoch": 6.03,
      "learning_rate": 2.2095235717139436e-05,
      "loss": 0.5185,
      "step": 1206500
    },
    {
      "epoch": 6.03,
      "learning_rate": 2.2081363474941183e-05,
      "loss": 0.5157,
      "step": 1207000
    },
    {
      "epoch": 6.03,
      "learning_rate": 2.2067518977227326e-05,
      "loss": 0.5108,
      "step": 1207500
    },
    {
      "epoch": 6.03,
      "learning_rate": 2.2053646735029077e-05,
      "loss": 0.5272,
      "step": 1208000
    },
    {
      "epoch": 6.04,
      "learning_rate": 2.2039774492830824e-05,
      "loss": 0.5243,
      "step": 1208500
    },
    {
      "epoch": 6.04,
      "learning_rate": 2.2025902250632574e-05,
      "loss": 0.5195,
      "step": 1209000
    },
    {
      "epoch": 6.04,
      "learning_rate": 2.2012030008434325e-05,
      "loss": 0.4861,
      "step": 1209500
    },
    {
      "epoch": 6.04,
      "learning_rate": 2.1998157766236075e-05,
      "loss": 0.5057,
      "step": 1210000
    },
    {
      "epoch": 6.05,
      "learning_rate": 2.198431326852222e-05,
      "loss": 0.5024,
      "step": 1210500
    },
    {
      "epoch": 6.05,
      "learning_rate": 2.197044102632397e-05,
      "loss": 0.5167,
      "step": 1211000
    },
    {
      "epoch": 6.05,
      "learning_rate": 2.1956568784125716e-05,
      "loss": 0.531,
      "step": 1211500
    },
    {
      "epoch": 6.05,
      "learning_rate": 2.1942696541927466e-05,
      "loss": 0.5482,
      "step": 1212000
    },
    {
      "epoch": 6.06,
      "learning_rate": 2.1928824299729213e-05,
      "loss": 0.5323,
      "step": 1212500
    },
    {
      "epoch": 6.06,
      "learning_rate": 2.191497980201536e-05,
      "loss": 0.5213,
      "step": 1213000
    },
    {
      "epoch": 6.06,
      "learning_rate": 2.190110755981711e-05,
      "loss": 0.5123,
      "step": 1213500
    },
    {
      "epoch": 6.06,
      "learning_rate": 2.1887235317618858e-05,
      "loss": 0.5334,
      "step": 1214000
    },
    {
      "epoch": 6.07,
      "learning_rate": 2.1873363075420608e-05,
      "loss": 0.5026,
      "step": 1214500
    },
    {
      "epoch": 6.07,
      "learning_rate": 2.1859518577706752e-05,
      "loss": 0.5411,
      "step": 1215000
    },
    {
      "epoch": 6.07,
      "learning_rate": 2.1845646335508502e-05,
      "loss": 0.5411,
      "step": 1215500
    },
    {
      "epoch": 6.07,
      "learning_rate": 2.183177409331025e-05,
      "loss": 0.5232,
      "step": 1216000
    },
    {
      "epoch": 6.08,
      "learning_rate": 2.1817901851112e-05,
      "loss": 0.5121,
      "step": 1216500
    },
    {
      "epoch": 6.08,
      "learning_rate": 2.1804029608913747e-05,
      "loss": 0.5203,
      "step": 1217000
    },
    {
      "epoch": 6.08,
      "learning_rate": 2.1790157366715497e-05,
      "loss": 0.5271,
      "step": 1217500
    },
    {
      "epoch": 6.08,
      "learning_rate": 2.1776312869001644e-05,
      "loss": 0.5177,
      "step": 1218000
    },
    {
      "epoch": 6.09,
      "learning_rate": 2.1762468371287788e-05,
      "loss": 0.5266,
      "step": 1218500
    },
    {
      "epoch": 6.09,
      "learning_rate": 2.174859612908954e-05,
      "loss": 0.5187,
      "step": 1219000
    },
    {
      "epoch": 6.09,
      "learning_rate": 2.1734723886891285e-05,
      "loss": 0.5101,
      "step": 1219500
    },
    {
      "epoch": 6.09,
      "learning_rate": 2.1720851644693036e-05,
      "loss": 0.5489,
      "step": 1220000
    },
    {
      "epoch": 6.1,
      "learning_rate": 2.1706979402494783e-05,
      "loss": 0.5355,
      "step": 1220500
    },
    {
      "epoch": 6.1,
      "learning_rate": 2.1693107160296533e-05,
      "loss": 0.5364,
      "step": 1221000
    },
    {
      "epoch": 6.1,
      "learning_rate": 2.167923491809828e-05,
      "loss": 0.527,
      "step": 1221500
    },
    {
      "epoch": 6.1,
      "learning_rate": 2.1665362675900034e-05,
      "loss": 0.5316,
      "step": 1222000
    },
    {
      "epoch": 6.11,
      "learning_rate": 2.165149043370178e-05,
      "loss": 0.5297,
      "step": 1222500
    },
    {
      "epoch": 6.11,
      "learning_rate": 2.163761819150353e-05,
      "loss": 0.5215,
      "step": 1223000
    },
    {
      "epoch": 6.11,
      "learning_rate": 2.1623773693789675e-05,
      "loss": 0.5167,
      "step": 1223500
    },
    {
      "epoch": 6.11,
      "learning_rate": 2.1609901451591425e-05,
      "loss": 0.5123,
      "step": 1224000
    },
    {
      "epoch": 6.12,
      "learning_rate": 2.1596029209393172e-05,
      "loss": 0.516,
      "step": 1224500
    },
    {
      "epoch": 6.12,
      "learning_rate": 2.1582156967194923e-05,
      "loss": 0.5268,
      "step": 1225000
    },
    {
      "epoch": 6.12,
      "learning_rate": 2.156831246948107e-05,
      "loss": 0.5465,
      "step": 1225500
    },
    {
      "epoch": 6.12,
      "learning_rate": 2.1554440227282817e-05,
      "loss": 0.5203,
      "step": 1226000
    },
    {
      "epoch": 6.13,
      "learning_rate": 2.1540567985084567e-05,
      "loss": 0.5247,
      "step": 1226500
    },
    {
      "epoch": 6.13,
      "learning_rate": 2.152672348737071e-05,
      "loss": 0.5237,
      "step": 1227000
    },
    {
      "epoch": 6.13,
      "learning_rate": 2.151285124517246e-05,
      "loss": 0.5375,
      "step": 1227500
    },
    {
      "epoch": 6.13,
      "learning_rate": 2.149897900297421e-05,
      "loss": 0.5292,
      "step": 1228000
    },
    {
      "epoch": 6.14,
      "learning_rate": 2.148510676077596e-05,
      "loss": 0.5275,
      "step": 1228500
    },
    {
      "epoch": 6.14,
      "learning_rate": 2.1471234518577706e-05,
      "loss": 0.5266,
      "step": 1229000
    },
    {
      "epoch": 6.14,
      "learning_rate": 2.1457362276379456e-05,
      "loss": 0.5341,
      "step": 1229500
    },
    {
      "epoch": 6.14,
      "learning_rate": 2.1443490034181203e-05,
      "loss": 0.5186,
      "step": 1230000
    },
    {
      "epoch": 6.15,
      "learning_rate": 2.1429617791982957e-05,
      "loss": 0.534,
      "step": 1230500
    },
    {
      "epoch": 6.15,
      "learning_rate": 2.1415745549784704e-05,
      "loss": 0.5274,
      "step": 1231000
    },
    {
      "epoch": 6.15,
      "learning_rate": 2.1401873307586454e-05,
      "loss": 0.5256,
      "step": 1231500
    },
    {
      "epoch": 6.15,
      "learning_rate": 2.1388028809872598e-05,
      "loss": 0.5363,
      "step": 1232000
    },
    {
      "epoch": 6.16,
      "learning_rate": 2.1374156567674348e-05,
      "loss": 0.5345,
      "step": 1232500
    },
    {
      "epoch": 6.16,
      "learning_rate": 2.1360284325476095e-05,
      "loss": 0.5431,
      "step": 1233000
    },
    {
      "epoch": 6.16,
      "learning_rate": 2.1346412083277846e-05,
      "loss": 0.5502,
      "step": 1233500
    },
    {
      "epoch": 6.16,
      "learning_rate": 2.1332539841079593e-05,
      "loss": 0.5338,
      "step": 1234000
    },
    {
      "epoch": 6.17,
      "learning_rate": 2.131869534336574e-05,
      "loss": 0.5339,
      "step": 1234500
    },
    {
      "epoch": 6.17,
      "learning_rate": 2.130482310116749e-05,
      "loss": 0.5585,
      "step": 1235000
    },
    {
      "epoch": 6.17,
      "learning_rate": 2.1290950858969237e-05,
      "loss": 0.5193,
      "step": 1235500
    },
    {
      "epoch": 6.17,
      "learning_rate": 2.1277078616770987e-05,
      "loss": 0.5353,
      "step": 1236000
    },
    {
      "epoch": 6.18,
      "learning_rate": 2.1263206374572734e-05,
      "loss": 0.5415,
      "step": 1236500
    },
    {
      "epoch": 6.18,
      "learning_rate": 2.124936187685888e-05,
      "loss": 0.5359,
      "step": 1237000
    },
    {
      "epoch": 6.18,
      "learning_rate": 2.123548963466063e-05,
      "loss": 0.5087,
      "step": 1237500
    },
    {
      "epoch": 6.18,
      "learning_rate": 2.122161739246238e-05,
      "loss": 0.5228,
      "step": 1238000
    },
    {
      "epoch": 6.19,
      "learning_rate": 2.1207745150264126e-05,
      "loss": 0.5417,
      "step": 1238500
    },
    {
      "epoch": 6.19,
      "learning_rate": 2.1193900652550273e-05,
      "loss": 0.5493,
      "step": 1239000
    },
    {
      "epoch": 6.19,
      "learning_rate": 2.1180028410352024e-05,
      "loss": 0.5399,
      "step": 1239500
    },
    {
      "epoch": 6.19,
      "learning_rate": 2.1166156168153774e-05,
      "loss": 0.5415,
      "step": 1240000
    },
    {
      "epoch": 6.2,
      "learning_rate": 2.1152311670439918e-05,
      "loss": 0.5546,
      "step": 1240500
    },
    {
      "epoch": 6.2,
      "learning_rate": 2.1138439428241665e-05,
      "loss": 0.5364,
      "step": 1241000
    },
    {
      "epoch": 6.2,
      "learning_rate": 2.1124567186043415e-05,
      "loss": 0.5497,
      "step": 1241500
    },
    {
      "epoch": 6.2,
      "learning_rate": 2.1110694943845162e-05,
      "loss": 0.5603,
      "step": 1242000
    },
    {
      "epoch": 6.21,
      "learning_rate": 2.1096822701646916e-05,
      "loss": 0.5277,
      "step": 1242500
    },
    {
      "epoch": 6.21,
      "learning_rate": 2.1082950459448663e-05,
      "loss": 0.5393,
      "step": 1243000
    },
    {
      "epoch": 6.21,
      "learning_rate": 2.1069078217250413e-05,
      "loss": 0.533,
      "step": 1243500
    },
    {
      "epoch": 6.21,
      "learning_rate": 2.105520597505216e-05,
      "loss": 0.5361,
      "step": 1244000
    },
    {
      "epoch": 6.22,
      "learning_rate": 2.1041361477338307e-05,
      "loss": 0.5451,
      "step": 1244500
    },
    {
      "epoch": 6.22,
      "learning_rate": 2.1027489235140054e-05,
      "loss": 0.5418,
      "step": 1245000
    },
    {
      "epoch": 6.22,
      "learning_rate": 2.1013616992941805e-05,
      "loss": 0.5393,
      "step": 1245500
    },
    {
      "epoch": 6.22,
      "learning_rate": 2.099974475074355e-05,
      "loss": 0.529,
      "step": 1246000
    },
    {
      "epoch": 6.23,
      "learning_rate": 2.0985872508545302e-05,
      "loss": 0.5508,
      "step": 1246500
    },
    {
      "epoch": 6.23,
      "learning_rate": 2.097200026634705e-05,
      "loss": 0.5339,
      "step": 1247000
    },
    {
      "epoch": 6.23,
      "learning_rate": 2.0958155768633196e-05,
      "loss": 0.5353,
      "step": 1247500
    },
    {
      "epoch": 6.23,
      "learning_rate": 2.0944283526434946e-05,
      "loss": 0.5449,
      "step": 1248000
    },
    {
      "epoch": 6.24,
      "learning_rate": 2.0930411284236693e-05,
      "loss": 0.5558,
      "step": 1248500
    },
    {
      "epoch": 6.24,
      "learning_rate": 2.0916539042038444e-05,
      "loss": 0.5423,
      "step": 1249000
    },
    {
      "epoch": 6.24,
      "learning_rate": 2.0902666799840194e-05,
      "loss": 0.5574,
      "step": 1249500
    },
    {
      "epoch": 6.24,
      "learning_rate": 2.088879455764194e-05,
      "loss": 0.5133,
      "step": 1250000
    },
    {
      "epoch": 6.25,
      "learning_rate": 2.0874950059928085e-05,
      "loss": 0.5425,
      "step": 1250500
    },
    {
      "epoch": 6.25,
      "learning_rate": 2.0861077817729835e-05,
      "loss": 0.5354,
      "step": 1251000
    },
    {
      "epoch": 6.25,
      "learning_rate": 2.0847205575531586e-05,
      "loss": 0.5418,
      "step": 1251500
    },
    {
      "epoch": 6.25,
      "learning_rate": 2.0833333333333336e-05,
      "loss": 0.5647,
      "step": 1252000
    },
    {
      "epoch": 6.25,
      "learning_rate": 2.081948883561948e-05,
      "loss": 0.5493,
      "step": 1252500
    },
    {
      "epoch": 6.26,
      "learning_rate": 2.080561659342123e-05,
      "loss": 0.5407,
      "step": 1253000
    },
    {
      "epoch": 6.26,
      "learning_rate": 2.0791744351222977e-05,
      "loss": 0.5144,
      "step": 1253500
    },
    {
      "epoch": 6.26,
      "learning_rate": 2.0777872109024728e-05,
      "loss": 0.5471,
      "step": 1254000
    },
    {
      "epoch": 6.26,
      "learning_rate": 2.076402761131087e-05,
      "loss": 0.5505,
      "step": 1254500
    },
    {
      "epoch": 6.27,
      "learning_rate": 2.075015536911262e-05,
      "loss": 0.5657,
      "step": 1255000
    },
    {
      "epoch": 6.27,
      "learning_rate": 2.0736283126914372e-05,
      "loss": 0.5383,
      "step": 1255500
    },
    {
      "epoch": 6.27,
      "learning_rate": 2.072241088471612e-05,
      "loss": 0.5396,
      "step": 1256000
    },
    {
      "epoch": 6.27,
      "learning_rate": 2.070853864251787e-05,
      "loss": 0.5492,
      "step": 1256500
    },
    {
      "epoch": 6.28,
      "learning_rate": 2.069472188928841e-05,
      "loss": 0.5661,
      "step": 1257000
    },
    {
      "epoch": 6.28,
      "learning_rate": 2.0680849647090157e-05,
      "loss": 0.5766,
      "step": 1257500
    },
    {
      "epoch": 6.28,
      "learning_rate": 2.0666977404891907e-05,
      "loss": 0.5518,
      "step": 1258000
    },
    {
      "epoch": 6.28,
      "learning_rate": 2.0653105162693658e-05,
      "loss": 0.5552,
      "step": 1258500
    },
    {
      "epoch": 6.29,
      "learning_rate": 2.0639232920495408e-05,
      "loss": 0.5435,
      "step": 1259000
    },
    {
      "epoch": 6.29,
      "learning_rate": 2.0625360678297155e-05,
      "loss": 0.5341,
      "step": 1259500
    },
    {
      "epoch": 6.29,
      "learning_rate": 2.0611516180583302e-05,
      "loss": 0.5648,
      "step": 1260000
    },
    {
      "epoch": 6.29,
      "learning_rate": 2.059764393838505e-05,
      "loss": 0.5463,
      "step": 1260500
    },
    {
      "epoch": 6.3,
      "learning_rate": 2.05837716961868e-05,
      "loss": 0.5435,
      "step": 1261000
    },
    {
      "epoch": 6.3,
      "learning_rate": 2.0569899453988547e-05,
      "loss": 0.536,
      "step": 1261500
    },
    {
      "epoch": 6.3,
      "learning_rate": 2.0556027211790297e-05,
      "loss": 0.5519,
      "step": 1262000
    },
    {
      "epoch": 6.3,
      "learning_rate": 2.0542154969592044e-05,
      "loss": 0.5396,
      "step": 1262500
    },
    {
      "epoch": 6.31,
      "learning_rate": 2.0528282727393794e-05,
      "loss": 0.5434,
      "step": 1263000
    },
    {
      "epoch": 6.31,
      "learning_rate": 2.0514410485195545e-05,
      "loss": 0.5501,
      "step": 1263500
    },
    {
      "epoch": 6.31,
      "learning_rate": 2.0500538242997295e-05,
      "loss": 0.5544,
      "step": 1264000
    },
    {
      "epoch": 6.31,
      "learning_rate": 2.0486666000799042e-05,
      "loss": 0.5446,
      "step": 1264500
    },
    {
      "epoch": 6.32,
      "learning_rate": 2.047282150308519e-05,
      "loss": 0.5444,
      "step": 1265000
    },
    {
      "epoch": 6.32,
      "learning_rate": 2.0458949260886936e-05,
      "loss": 0.5597,
      "step": 1265500
    },
    {
      "epoch": 6.32,
      "learning_rate": 2.0445077018688686e-05,
      "loss": 0.5755,
      "step": 1266000
    },
    {
      "epoch": 6.32,
      "learning_rate": 2.0431204776490433e-05,
      "loss": 0.5573,
      "step": 1266500
    },
    {
      "epoch": 6.33,
      "learning_rate": 2.0417332534292184e-05,
      "loss": 0.5637,
      "step": 1267000
    },
    {
      "epoch": 6.33,
      "learning_rate": 2.040346029209393e-05,
      "loss": 0.5327,
      "step": 1267500
    },
    {
      "epoch": 6.33,
      "learning_rate": 2.038958804989568e-05,
      "loss": 0.5528,
      "step": 1268000
    },
    {
      "epoch": 6.33,
      "learning_rate": 2.037571580769743e-05,
      "loss": 0.5505,
      "step": 1268500
    },
    {
      "epoch": 6.34,
      "learning_rate": 2.0361899054467972e-05,
      "loss": 0.5552,
      "step": 1269000
    },
    {
      "epoch": 6.34,
      "learning_rate": 2.0348026812269722e-05,
      "loss": 0.5343,
      "step": 1269500
    },
    {
      "epoch": 6.34,
      "learning_rate": 2.033415457007147e-05,
      "loss": 0.5399,
      "step": 1270000
    },
    {
      "epoch": 6.34,
      "learning_rate": 2.032028232787322e-05,
      "loss": 0.5702,
      "step": 1270500
    },
    {
      "epoch": 6.35,
      "learning_rate": 2.0306410085674967e-05,
      "loss": 0.5557,
      "step": 1271000
    },
    {
      "epoch": 6.35,
      "learning_rate": 2.0292537843476717e-05,
      "loss": 0.5597,
      "step": 1271500
    },
    {
      "epoch": 6.35,
      "learning_rate": 2.0278665601278468e-05,
      "loss": 0.5241,
      "step": 1272000
    },
    {
      "epoch": 6.35,
      "learning_rate": 2.0264793359080218e-05,
      "loss": 0.5519,
      "step": 1272500
    },
    {
      "epoch": 6.36,
      "learning_rate": 2.025097660585076e-05,
      "loss": 0.5428,
      "step": 1273000
    },
    {
      "epoch": 6.36,
      "learning_rate": 2.0237104363652505e-05,
      "loss": 0.552,
      "step": 1273500
    },
    {
      "epoch": 6.36,
      "learning_rate": 2.0223232121454256e-05,
      "loss": 0.5592,
      "step": 1274000
    },
    {
      "epoch": 6.36,
      "learning_rate": 2.0209359879256003e-05,
      "loss": 0.5518,
      "step": 1274500
    },
    {
      "epoch": 6.37,
      "learning_rate": 2.019551538154215e-05,
      "loss": 0.5624,
      "step": 1275000
    },
    {
      "epoch": 6.37,
      "learning_rate": 2.01816431393439e-05,
      "loss": 0.5662,
      "step": 1275500
    },
    {
      "epoch": 6.37,
      "learning_rate": 2.016777089714565e-05,
      "loss": 0.5608,
      "step": 1276000
    },
    {
      "epoch": 6.37,
      "learning_rate": 2.0153898654947398e-05,
      "loss": 0.5444,
      "step": 1276500
    },
    {
      "epoch": 6.38,
      "learning_rate": 2.014005415723354e-05,
      "loss": 0.5557,
      "step": 1277000
    },
    {
      "epoch": 6.38,
      "learning_rate": 2.0126181915035292e-05,
      "loss": 0.5559,
      "step": 1277500
    },
    {
      "epoch": 6.38,
      "learning_rate": 2.011230967283704e-05,
      "loss": 0.5638,
      "step": 1278000
    },
    {
      "epoch": 6.38,
      "learning_rate": 2.009843743063879e-05,
      "loss": 0.5607,
      "step": 1278500
    },
    {
      "epoch": 6.39,
      "learning_rate": 2.0084565188440536e-05,
      "loss": 0.5466,
      "step": 1279000
    },
    {
      "epoch": 6.39,
      "learning_rate": 2.0070720690726687e-05,
      "loss": 0.5553,
      "step": 1279500
    },
    {
      "epoch": 6.39,
      "learning_rate": 2.0056848448528434e-05,
      "loss": 0.5589,
      "step": 1280000
    },
    {
      "epoch": 6.39,
      "learning_rate": 2.0042976206330184e-05,
      "loss": 0.5546,
      "step": 1280500
    },
    {
      "epoch": 6.4,
      "learning_rate": 2.002910396413193e-05,
      "loss": 0.5639,
      "step": 1281000
    },
    {
      "epoch": 6.4,
      "learning_rate": 2.0015259466418075e-05,
      "loss": 0.5662,
      "step": 1281500
    },
    {
      "epoch": 6.4,
      "learning_rate": 2.0001414968704222e-05,
      "loss": 0.5448,
      "step": 1282000
    },
    {
      "epoch": 6.4,
      "learning_rate": 1.9987542726505972e-05,
      "loss": 0.5498,
      "step": 1282500
    },
    {
      "epoch": 6.41,
      "learning_rate": 1.9973670484307723e-05,
      "loss": 0.538,
      "step": 1283000
    },
    {
      "epoch": 6.41,
      "learning_rate": 1.995979824210947e-05,
      "loss": 0.5578,
      "step": 1283500
    },
    {
      "epoch": 6.41,
      "learning_rate": 1.994592599991122e-05,
      "loss": 0.5595,
      "step": 1284000
    },
    {
      "epoch": 6.41,
      "learning_rate": 1.9932053757712967e-05,
      "loss": 0.561,
      "step": 1284500
    },
    {
      "epoch": 6.42,
      "learning_rate": 1.9918181515514717e-05,
      "loss": 0.565,
      "step": 1285000
    },
    {
      "epoch": 6.42,
      "learning_rate": 1.9904309273316464e-05,
      "loss": 0.5553,
      "step": 1285500
    },
    {
      "epoch": 6.42,
      "learning_rate": 1.989046477560261e-05,
      "loss": 0.5561,
      "step": 1286000
    },
    {
      "epoch": 6.42,
      "learning_rate": 1.987659253340436e-05,
      "loss": 0.5499,
      "step": 1286500
    },
    {
      "epoch": 6.43,
      "learning_rate": 1.986272029120611e-05,
      "loss": 0.5571,
      "step": 1287000
    },
    {
      "epoch": 6.43,
      "learning_rate": 1.984884804900786e-05,
      "loss": 0.5872,
      "step": 1287500
    },
    {
      "epoch": 6.43,
      "learning_rate": 1.983497580680961e-05,
      "loss": 0.5627,
      "step": 1288000
    },
    {
      "epoch": 6.43,
      "learning_rate": 1.982115905358015e-05,
      "loss": 0.5751,
      "step": 1288500
    },
    {
      "epoch": 6.44,
      "learning_rate": 1.9807286811381897e-05,
      "loss": 0.5677,
      "step": 1289000
    },
    {
      "epoch": 6.44,
      "learning_rate": 1.9793414569183648e-05,
      "loss": 0.57,
      "step": 1289500
    },
    {
      "epoch": 6.44,
      "learning_rate": 1.9779542326985395e-05,
      "loss": 0.5682,
      "step": 1290000
    },
    {
      "epoch": 6.44,
      "learning_rate": 1.9765670084787145e-05,
      "loss": 0.553,
      "step": 1290500
    },
    {
      "epoch": 6.45,
      "learning_rate": 1.9751797842588895e-05,
      "loss": 0.5569,
      "step": 1291000
    },
    {
      "epoch": 6.45,
      "learning_rate": 1.9737925600390646e-05,
      "loss": 0.5597,
      "step": 1291500
    },
    {
      "epoch": 6.45,
      "learning_rate": 1.9724053358192393e-05,
      "loss": 0.5553,
      "step": 1292000
    },
    {
      "epoch": 6.45,
      "learning_rate": 1.9710208860478536e-05,
      "loss": 0.5616,
      "step": 1292500
    },
    {
      "epoch": 6.46,
      "learning_rate": 1.9696336618280287e-05,
      "loss": 0.5689,
      "step": 1293000
    },
    {
      "epoch": 6.46,
      "learning_rate": 1.9682464376082034e-05,
      "loss": 0.5475,
      "step": 1293500
    },
    {
      "epoch": 6.46,
      "learning_rate": 1.9668592133883784e-05,
      "loss": 0.5567,
      "step": 1294000
    },
    {
      "epoch": 6.46,
      "learning_rate": 1.9654719891685535e-05,
      "loss": 0.5736,
      "step": 1294500
    },
    {
      "epoch": 6.47,
      "learning_rate": 1.9640875393971682e-05,
      "loss": 0.5443,
      "step": 1295000
    },
    {
      "epoch": 6.47,
      "learning_rate": 1.962700315177343e-05,
      "loss": 0.5368,
      "step": 1295500
    },
    {
      "epoch": 6.47,
      "learning_rate": 1.9613158654059573e-05,
      "loss": 0.5744,
      "step": 1296000
    },
    {
      "epoch": 6.47,
      "learning_rate": 1.9599286411861323e-05,
      "loss": 0.574,
      "step": 1296500
    },
    {
      "epoch": 6.48,
      "learning_rate": 1.9585414169663073e-05,
      "loss": 0.5786,
      "step": 1297000
    },
    {
      "epoch": 6.48,
      "learning_rate": 1.957154192746482e-05,
      "loss": 0.5608,
      "step": 1297500
    },
    {
      "epoch": 6.48,
      "learning_rate": 1.955766968526657e-05,
      "loss": 0.5555,
      "step": 1298000
    },
    {
      "epoch": 6.48,
      "learning_rate": 1.9543797443068318e-05,
      "loss": 0.5621,
      "step": 1298500
    },
    {
      "epoch": 6.49,
      "learning_rate": 1.9529925200870068e-05,
      "loss": 0.5809,
      "step": 1299000
    },
    {
      "epoch": 6.49,
      "learning_rate": 1.9516052958671818e-05,
      "loss": 0.5586,
      "step": 1299500
    },
    {
      "epoch": 6.49,
      "learning_rate": 1.9502208460957962e-05,
      "loss": 0.581,
      "step": 1300000
    },
    {
      "epoch": 6.49,
      "learning_rate": 1.9488336218759712e-05,
      "loss": 0.5612,
      "step": 1300500
    },
    {
      "epoch": 6.5,
      "learning_rate": 1.947446397656146e-05,
      "loss": 0.566,
      "step": 1301000
    },
    {
      "epoch": 6.5,
      "learning_rate": 1.946059173436321e-05,
      "loss": 0.5727,
      "step": 1301500
    },
    {
      "epoch": 6.5,
      "learning_rate": 1.9446719492164957e-05,
      "loss": 0.5427,
      "step": 1302000
    },
    {
      "epoch": 6.5,
      "learning_rate": 1.9432874994451104e-05,
      "loss": 0.5677,
      "step": 1302500
    },
    {
      "epoch": 6.51,
      "learning_rate": 1.941900275225285e-05,
      "loss": 0.5793,
      "step": 1303000
    },
    {
      "epoch": 6.51,
      "learning_rate": 1.9405130510054605e-05,
      "loss": 0.5536,
      "step": 1303500
    },
    {
      "epoch": 6.51,
      "learning_rate": 1.939125826785635e-05,
      "loss": 0.5468,
      "step": 1304000
    },
    {
      "epoch": 6.51,
      "learning_rate": 1.9377386025658102e-05,
      "loss": 0.5487,
      "step": 1304500
    },
    {
      "epoch": 6.52,
      "learning_rate": 1.936351378345985e-05,
      "loss": 0.5557,
      "step": 1305000
    },
    {
      "epoch": 6.52,
      "learning_rate": 1.9349669285745993e-05,
      "loss": 0.5599,
      "step": 1305500
    },
    {
      "epoch": 6.52,
      "learning_rate": 1.9335797043547743e-05,
      "loss": 0.5796,
      "step": 1306000
    },
    {
      "epoch": 6.52,
      "learning_rate": 1.9321924801349494e-05,
      "loss": 0.561,
      "step": 1306500
    },
    {
      "epoch": 6.53,
      "learning_rate": 1.930808030363564e-05,
      "loss": 0.5833,
      "step": 1307000
    },
    {
      "epoch": 6.53,
      "learning_rate": 1.9294208061437388e-05,
      "loss": 0.5633,
      "step": 1307500
    },
    {
      "epoch": 6.53,
      "learning_rate": 1.9280335819239138e-05,
      "loss": 0.554,
      "step": 1308000
    },
    {
      "epoch": 6.53,
      "learning_rate": 1.9266463577040885e-05,
      "loss": 0.5567,
      "step": 1308500
    },
    {
      "epoch": 6.54,
      "learning_rate": 1.9252591334842635e-05,
      "loss": 0.5635,
      "step": 1309000
    },
    {
      "epoch": 6.54,
      "learning_rate": 1.9238719092644382e-05,
      "loss": 0.5499,
      "step": 1309500
    },
    {
      "epoch": 6.54,
      "learning_rate": 1.9224846850446133e-05,
      "loss": 0.5575,
      "step": 1310000
    },
    {
      "epoch": 6.54,
      "learning_rate": 1.9211002352732277e-05,
      "loss": 0.5579,
      "step": 1310500
    },
    {
      "epoch": 6.55,
      "learning_rate": 1.9197130110534027e-05,
      "loss": 0.5485,
      "step": 1311000
    },
    {
      "epoch": 6.55,
      "learning_rate": 1.9183257868335774e-05,
      "loss": 0.5655,
      "step": 1311500
    },
    {
      "epoch": 6.55,
      "learning_rate": 1.9169385626137528e-05,
      "loss": 0.578,
      "step": 1312000
    },
    {
      "epoch": 6.55,
      "learning_rate": 1.9155513383939275e-05,
      "loss": 0.5738,
      "step": 1312500
    },
    {
      "epoch": 6.56,
      "learning_rate": 1.9141641141741025e-05,
      "loss": 0.5598,
      "step": 1313000
    },
    {
      "epoch": 6.56,
      "learning_rate": 1.912779664402717e-05,
      "loss": 0.5825,
      "step": 1313500
    },
    {
      "epoch": 6.56,
      "learning_rate": 1.9113924401828916e-05,
      "loss": 0.5671,
      "step": 1314000
    },
    {
      "epoch": 6.56,
      "learning_rate": 1.9100052159630666e-05,
      "loss": 0.5473,
      "step": 1314500
    },
    {
      "epoch": 6.57,
      "learning_rate": 1.9086179917432413e-05,
      "loss": 0.5578,
      "step": 1315000
    },
    {
      "epoch": 6.57,
      "learning_rate": 1.9072307675234163e-05,
      "loss": 0.5852,
      "step": 1315500
    },
    {
      "epoch": 6.57,
      "learning_rate": 1.9058435433035914e-05,
      "loss": 0.5906,
      "step": 1316000
    },
    {
      "epoch": 6.57,
      "learning_rate": 1.9044563190837664e-05,
      "loss": 0.5781,
      "step": 1316500
    },
    {
      "epoch": 6.58,
      "learning_rate": 1.9030718693123808e-05,
      "loss": 0.5535,
      "step": 1317000
    },
    {
      "epoch": 6.58,
      "learning_rate": 1.901684645092556e-05,
      "loss": 0.5793,
      "step": 1317500
    },
    {
      "epoch": 6.58,
      "learning_rate": 1.9002974208727305e-05,
      "loss": 0.5527,
      "step": 1318000
    },
    {
      "epoch": 6.58,
      "learning_rate": 1.8989101966529056e-05,
      "loss": 0.5328,
      "step": 1318500
    },
    {
      "epoch": 6.59,
      "learning_rate": 1.89752574688152e-05,
      "loss": 0.5709,
      "step": 1319000
    },
    {
      "epoch": 6.59,
      "learning_rate": 1.896138522661695e-05,
      "loss": 0.5741,
      "step": 1319500
    },
    {
      "epoch": 6.59,
      "learning_rate": 1.8947512984418697e-05,
      "loss": 0.5794,
      "step": 1320000
    },
    {
      "epoch": 6.59,
      "learning_rate": 1.8933640742220447e-05,
      "loss": 0.5607,
      "step": 1320500
    },
    {
      "epoch": 6.6,
      "learning_rate": 1.8919796244506594e-05,
      "loss": 0.5766,
      "step": 1321000
    },
    {
      "epoch": 6.6,
      "learning_rate": 1.890592400230834e-05,
      "loss": 0.5768,
      "step": 1321500
    },
    {
      "epoch": 6.6,
      "learning_rate": 1.889205176011009e-05,
      "loss": 0.5591,
      "step": 1322000
    },
    {
      "epoch": 6.6,
      "learning_rate": 1.887817951791184e-05,
      "loss": 0.5787,
      "step": 1322500
    },
    {
      "epoch": 6.61,
      "learning_rate": 1.886430727571359e-05,
      "loss": 0.5762,
      "step": 1323000
    },
    {
      "epoch": 6.61,
      "learning_rate": 1.8850435033515336e-05,
      "loss": 0.5771,
      "step": 1323500
    },
    {
      "epoch": 6.61,
      "learning_rate": 1.8836562791317086e-05,
      "loss": 0.5692,
      "step": 1324000
    },
    {
      "epoch": 6.61,
      "learning_rate": 1.8822690549118833e-05,
      "loss": 0.5737,
      "step": 1324500
    },
    {
      "epoch": 6.62,
      "learning_rate": 1.8808818306920587e-05,
      "loss": 0.5443,
      "step": 1325000
    },
    {
      "epoch": 6.62,
      "learning_rate": 1.8794946064722334e-05,
      "loss": 0.5812,
      "step": 1325500
    },
    {
      "epoch": 6.62,
      "learning_rate": 1.8781073822524084e-05,
      "loss": 0.5564,
      "step": 1326000
    },
    {
      "epoch": 6.62,
      "learning_rate": 1.8767229324810228e-05,
      "loss": 0.5653,
      "step": 1326500
    },
    {
      "epoch": 6.63,
      "learning_rate": 1.875335708261198e-05,
      "loss": 0.5617,
      "step": 1327000
    },
    {
      "epoch": 6.63,
      "learning_rate": 1.8739484840413726e-05,
      "loss": 0.5769,
      "step": 1327500
    },
    {
      "epoch": 6.63,
      "learning_rate": 1.8725612598215476e-05,
      "loss": 0.5649,
      "step": 1328000
    },
    {
      "epoch": 6.63,
      "learning_rate": 1.8711740356017223e-05,
      "loss": 0.56,
      "step": 1328500
    },
    {
      "epoch": 6.64,
      "learning_rate": 1.8697868113818973e-05,
      "loss": 0.5632,
      "step": 1329000
    },
    {
      "epoch": 6.64,
      "learning_rate": 1.868402361610512e-05,
      "loss": 0.5738,
      "step": 1329500
    },
    {
      "epoch": 6.64,
      "learning_rate": 1.8670151373906867e-05,
      "loss": 0.5687,
      "step": 1330000
    },
    {
      "epoch": 6.64,
      "learning_rate": 1.8656279131708618e-05,
      "loss": 0.5988,
      "step": 1330500
    },
    {
      "epoch": 6.65,
      "learning_rate": 1.8642406889510368e-05,
      "loss": 0.5793,
      "step": 1331000
    },
    {
      "epoch": 6.65,
      "learning_rate": 1.8628534647312115e-05,
      "loss": 0.5742,
      "step": 1331500
    },
    {
      "epoch": 6.65,
      "learning_rate": 1.8614662405113866e-05,
      "loss": 0.5773,
      "step": 1332000
    },
    {
      "epoch": 6.65,
      "learning_rate": 1.860081790740001e-05,
      "loss": 0.5648,
      "step": 1332500
    },
    {
      "epoch": 6.66,
      "learning_rate": 1.8586945665201756e-05,
      "loss": 0.5783,
      "step": 1333000
    },
    {
      "epoch": 6.66,
      "learning_rate": 1.857307342300351e-05,
      "loss": 0.5726,
      "step": 1333500
    },
    {
      "epoch": 6.66,
      "learning_rate": 1.8559201180805257e-05,
      "loss": 0.5518,
      "step": 1334000
    },
    {
      "epoch": 6.66,
      "learning_rate": 1.8545328938607007e-05,
      "loss": 0.5799,
      "step": 1334500
    },
    {
      "epoch": 6.67,
      "learning_rate": 1.8531456696408754e-05,
      "loss": 0.5664,
      "step": 1335000
    },
    {
      "epoch": 6.67,
      "learning_rate": 1.85176121986949e-05,
      "loss": 0.5803,
      "step": 1335500
    },
    {
      "epoch": 6.67,
      "learning_rate": 1.850373995649665e-05,
      "loss": 0.5598,
      "step": 1336000
    },
    {
      "epoch": 6.67,
      "learning_rate": 1.84898677142984e-05,
      "loss": 0.5569,
      "step": 1336500
    },
    {
      "epoch": 6.68,
      "learning_rate": 1.8475995472100146e-05,
      "loss": 0.5641,
      "step": 1337000
    },
    {
      "epoch": 6.68,
      "learning_rate": 1.8462123229901896e-05,
      "loss": 0.5962,
      "step": 1337500
    },
    {
      "epoch": 6.68,
      "learning_rate": 1.8448278732188043e-05,
      "loss": 0.5897,
      "step": 1338000
    },
    {
      "epoch": 6.68,
      "learning_rate": 1.843440648998979e-05,
      "loss": 0.5508,
      "step": 1338500
    },
    {
      "epoch": 6.69,
      "learning_rate": 1.842053424779154e-05,
      "loss": 0.5978,
      "step": 1339000
    },
    {
      "epoch": 6.69,
      "learning_rate": 1.8406662005593288e-05,
      "loss": 0.5749,
      "step": 1339500
    },
    {
      "epoch": 6.69,
      "learning_rate": 1.8392817507879435e-05,
      "loss": 0.584,
      "step": 1340000
    },
    {
      "epoch": 6.69,
      "learning_rate": 1.8378945265681182e-05,
      "loss": 0.5634,
      "step": 1340500
    },
    {
      "epoch": 6.7,
      "learning_rate": 1.8365073023482932e-05,
      "loss": 0.5693,
      "step": 1341000
    },
    {
      "epoch": 6.7,
      "learning_rate": 1.835120078128468e-05,
      "loss": 0.5749,
      "step": 1341500
    },
    {
      "epoch": 6.7,
      "learning_rate": 1.833732853908643e-05,
      "loss": 0.5611,
      "step": 1342000
    },
    {
      "epoch": 6.7,
      "learning_rate": 1.832345629688818e-05,
      "loss": 0.5697,
      "step": 1342500
    },
    {
      "epoch": 6.71,
      "learning_rate": 1.830958405468993e-05,
      "loss": 0.5662,
      "step": 1343000
    },
    {
      "epoch": 6.71,
      "learning_rate": 1.8295739556976074e-05,
      "loss": 0.6133,
      "step": 1343500
    },
    {
      "epoch": 6.71,
      "learning_rate": 1.8281867314777824e-05,
      "loss": 0.5764,
      "step": 1344000
    },
    {
      "epoch": 6.71,
      "learning_rate": 1.826799507257957e-05,
      "loss": 0.5647,
      "step": 1344500
    },
    {
      "epoch": 6.72,
      "learning_rate": 1.8254122830381322e-05,
      "loss": 0.5791,
      "step": 1345000
    },
    {
      "epoch": 6.72,
      "learning_rate": 1.824025058818307e-05,
      "loss": 0.5728,
      "step": 1345500
    },
    {
      "epoch": 6.72,
      "learning_rate": 1.822637834598482e-05,
      "loss": 0.5701,
      "step": 1346000
    },
    {
      "epoch": 6.72,
      "learning_rate": 1.8212506103786566e-05,
      "loss": 0.588,
      "step": 1346500
    },
    {
      "epoch": 6.73,
      "learning_rate": 1.819863386158832e-05,
      "loss": 0.5636,
      "step": 1347000
    },
    {
      "epoch": 6.73,
      "learning_rate": 1.8184761619390067e-05,
      "loss": 0.5651,
      "step": 1347500
    },
    {
      "epoch": 6.73,
      "learning_rate": 1.8170889377191817e-05,
      "loss": 0.5708,
      "step": 1348000
    },
    {
      "epoch": 6.73,
      "learning_rate": 1.815704487947796e-05,
      "loss": 0.5392,
      "step": 1348500
    },
    {
      "epoch": 6.74,
      "learning_rate": 1.8143172637279708e-05,
      "loss": 0.5943,
      "step": 1349000
    },
    {
      "epoch": 6.74,
      "learning_rate": 1.812930039508146e-05,
      "loss": 0.5637,
      "step": 1349500
    },
    {
      "epoch": 6.74,
      "learning_rate": 1.8115455897367602e-05,
      "loss": 0.5818,
      "step": 1350000
    },
    {
      "epoch": 6.74,
      "learning_rate": 1.8101583655169353e-05,
      "loss": 0.5612,
      "step": 1350500
    },
    {
      "epoch": 6.75,
      "learning_rate": 1.8087711412971103e-05,
      "loss": 0.5764,
      "step": 1351000
    },
    {
      "epoch": 6.75,
      "learning_rate": 1.8073839170772853e-05,
      "loss": 0.5728,
      "step": 1351500
    },
    {
      "epoch": 6.75,
      "learning_rate": 1.80599669285746e-05,
      "loss": 0.5699,
      "step": 1352000
    },
    {
      "epoch": 6.75,
      "learning_rate": 1.804609468637635e-05,
      "loss": 0.5856,
      "step": 1352500
    },
    {
      "epoch": 6.76,
      "learning_rate": 1.8032222444178098e-05,
      "loss": 0.5678,
      "step": 1353000
    },
    {
      "epoch": 6.76,
      "learning_rate": 1.8018350201979848e-05,
      "loss": 0.5697,
      "step": 1353500
    },
    {
      "epoch": 6.76,
      "learning_rate": 1.8004477959781595e-05,
      "loss": 0.5554,
      "step": 1354000
    },
    {
      "epoch": 6.76,
      "learning_rate": 1.7990605717583345e-05,
      "loss": 0.5896,
      "step": 1354500
    },
    {
      "epoch": 6.77,
      "learning_rate": 1.7976733475385092e-05,
      "loss": 0.5856,
      "step": 1355000
    },
    {
      "epoch": 6.77,
      "learning_rate": 1.7962861233186843e-05,
      "loss": 0.5726,
      "step": 1355500
    },
    {
      "epoch": 6.77,
      "learning_rate": 1.794901673547299e-05,
      "loss": 0.6005,
      "step": 1356000
    },
    {
      "epoch": 6.77,
      "learning_rate": 1.793514449327474e-05,
      "loss": 0.5721,
      "step": 1356500
    },
    {
      "epoch": 6.78,
      "learning_rate": 1.7921272251076487e-05,
      "loss": 0.5658,
      "step": 1357000
    },
    {
      "epoch": 6.78,
      "learning_rate": 1.790742775336263e-05,
      "loss": 0.5693,
      "step": 1357500
    },
    {
      "epoch": 6.78,
      "learning_rate": 1.789355551116438e-05,
      "loss": 0.587,
      "step": 1358000
    },
    {
      "epoch": 6.78,
      "learning_rate": 1.7879683268966128e-05,
      "loss": 0.6041,
      "step": 1358500
    },
    {
      "epoch": 6.79,
      "learning_rate": 1.786581102676788e-05,
      "loss": 0.5814,
      "step": 1359000
    },
    {
      "epoch": 6.79,
      "learning_rate": 1.7851938784569626e-05,
      "loss": 0.5652,
      "step": 1359500
    },
    {
      "epoch": 6.79,
      "learning_rate": 1.7838066542371376e-05,
      "loss": 0.5938,
      "step": 1360000
    },
    {
      "epoch": 6.79,
      "learning_rate": 1.7824222044657523e-05,
      "loss": 0.564,
      "step": 1360500
    },
    {
      "epoch": 6.8,
      "learning_rate": 1.7810349802459274e-05,
      "loss": 0.585,
      "step": 1361000
    },
    {
      "epoch": 6.8,
      "learning_rate": 1.779647756026102e-05,
      "loss": 0.5449,
      "step": 1361500
    },
    {
      "epoch": 6.8,
      "learning_rate": 1.778260531806277e-05,
      "loss": 0.5677,
      "step": 1362000
    },
    {
      "epoch": 6.8,
      "learning_rate": 1.7768733075864518e-05,
      "loss": 0.5894,
      "step": 1362500
    },
    {
      "epoch": 6.81,
      "learning_rate": 1.7754860833666268e-05,
      "loss": 0.5755,
      "step": 1363000
    },
    {
      "epoch": 6.81,
      "learning_rate": 1.7741016335952412e-05,
      "loss": 0.5834,
      "step": 1363500
    },
    {
      "epoch": 6.81,
      "learning_rate": 1.7727144093754162e-05,
      "loss": 0.5636,
      "step": 1364000
    },
    {
      "epoch": 6.81,
      "learning_rate": 1.7713271851555913e-05,
      "loss": 0.5571,
      "step": 1364500
    },
    {
      "epoch": 6.82,
      "learning_rate": 1.769939960935766e-05,
      "loss": 0.5721,
      "step": 1365000
    },
    {
      "epoch": 6.82,
      "learning_rate": 1.768552736715941e-05,
      "loss": 0.5636,
      "step": 1365500
    },
    {
      "epoch": 6.82,
      "learning_rate": 1.7671682869445554e-05,
      "loss": 0.5736,
      "step": 1366000
    },
    {
      "epoch": 6.82,
      "learning_rate": 1.7657810627247304e-05,
      "loss": 0.5858,
      "step": 1366500
    },
    {
      "epoch": 6.83,
      "learning_rate": 1.764393838504905e-05,
      "loss": 0.5993,
      "step": 1367000
    },
    {
      "epoch": 6.83,
      "learning_rate": 1.76300661428508e-05,
      "loss": 0.5769,
      "step": 1367500
    },
    {
      "epoch": 6.83,
      "learning_rate": 1.761619390065255e-05,
      "loss": 0.5847,
      "step": 1368000
    },
    {
      "epoch": 6.83,
      "learning_rate": 1.76023216584543e-05,
      "loss": 0.5653,
      "step": 1368500
    },
    {
      "epoch": 6.84,
      "learning_rate": 1.758844941625605e-05,
      "loss": 0.5688,
      "step": 1369000
    },
    {
      "epoch": 6.84,
      "learning_rate": 1.75745771740578e-05,
      "loss": 0.5891,
      "step": 1369500
    },
    {
      "epoch": 6.84,
      "learning_rate": 1.7560732676343943e-05,
      "loss": 0.5847,
      "step": 1370000
    },
    {
      "epoch": 6.84,
      "learning_rate": 1.7546888178630087e-05,
      "loss": 0.5764,
      "step": 1370500
    },
    {
      "epoch": 6.85,
      "learning_rate": 1.7533015936431838e-05,
      "loss": 0.5902,
      "step": 1371000
    },
    {
      "epoch": 6.85,
      "learning_rate": 1.7519143694233585e-05,
      "loss": 0.5625,
      "step": 1371500
    },
    {
      "epoch": 6.85,
      "learning_rate": 1.7505271452035335e-05,
      "loss": 0.6151,
      "step": 1372000
    },
    {
      "epoch": 6.85,
      "learning_rate": 1.7491399209837085e-05,
      "loss": 0.5677,
      "step": 1372500
    },
    {
      "epoch": 6.86,
      "learning_rate": 1.7477526967638836e-05,
      "loss": 0.5659,
      "step": 1373000
    },
    {
      "epoch": 6.86,
      "learning_rate": 1.7463654725440583e-05,
      "loss": 0.5557,
      "step": 1373500
    },
    {
      "epoch": 6.86,
      "learning_rate": 1.744981022772673e-05,
      "loss": 0.5543,
      "step": 1374000
    },
    {
      "epoch": 6.86,
      "learning_rate": 1.7435937985528477e-05,
      "loss": 0.5933,
      "step": 1374500
    },
    {
      "epoch": 6.87,
      "learning_rate": 1.7422065743330227e-05,
      "loss": 0.5672,
      "step": 1375000
    },
    {
      "epoch": 6.87,
      "learning_rate": 1.7408193501131974e-05,
      "loss": 0.588,
      "step": 1375500
    },
    {
      "epoch": 6.87,
      "learning_rate": 1.7394321258933725e-05,
      "loss": 0.5767,
      "step": 1376000
    },
    {
      "epoch": 6.87,
      "learning_rate": 1.738044901673547e-05,
      "loss": 0.5704,
      "step": 1376500
    },
    {
      "epoch": 6.88,
      "learning_rate": 1.736660451902162e-05,
      "loss": 0.5635,
      "step": 1377000
    },
    {
      "epoch": 6.88,
      "learning_rate": 1.735273227682337e-05,
      "loss": 0.6163,
      "step": 1377500
    },
    {
      "epoch": 6.88,
      "learning_rate": 1.733886003462512e-05,
      "loss": 0.5728,
      "step": 1378000
    },
    {
      "epoch": 6.88,
      "learning_rate": 1.7324987792426866e-05,
      "loss": 0.5892,
      "step": 1378500
    },
    {
      "epoch": 6.89,
      "learning_rate": 1.731114329471301e-05,
      "loss": 0.592,
      "step": 1379000
    },
    {
      "epoch": 6.89,
      "learning_rate": 1.729727105251476e-05,
      "loss": 0.5806,
      "step": 1379500
    },
    {
      "epoch": 6.89,
      "learning_rate": 1.7283398810316508e-05,
      "loss": 0.5944,
      "step": 1380000
    },
    {
      "epoch": 6.89,
      "learning_rate": 1.7269526568118258e-05,
      "loss": 0.5781,
      "step": 1380500
    },
    {
      "epoch": 6.9,
      "learning_rate": 1.7255682070404405e-05,
      "loss": 0.5918,
      "step": 1381000
    },
    {
      "epoch": 6.9,
      "learning_rate": 1.7241809828206155e-05,
      "loss": 0.5625,
      "step": 1381500
    },
    {
      "epoch": 6.9,
      "learning_rate": 1.7227937586007902e-05,
      "loss": 0.5746,
      "step": 1382000
    },
    {
      "epoch": 6.9,
      "learning_rate": 1.7214065343809653e-05,
      "loss": 0.5757,
      "step": 1382500
    },
    {
      "epoch": 6.91,
      "learning_rate": 1.72001931016114e-05,
      "loss": 0.5582,
      "step": 1383000
    },
    {
      "epoch": 6.91,
      "learning_rate": 1.718632085941315e-05,
      "loss": 0.5703,
      "step": 1383500
    },
    {
      "epoch": 6.91,
      "learning_rate": 1.7172448617214897e-05,
      "loss": 0.6136,
      "step": 1384000
    },
    {
      "epoch": 6.91,
      "learning_rate": 1.7158604119501044e-05,
      "loss": 0.5661,
      "step": 1384500
    },
    {
      "epoch": 6.92,
      "learning_rate": 1.7144731877302795e-05,
      "loss": 0.5848,
      "step": 1385000
    },
    {
      "epoch": 6.92,
      "learning_rate": 1.713085963510454e-05,
      "loss": 0.5921,
      "step": 1385500
    },
    {
      "epoch": 6.92,
      "learning_rate": 1.7116987392906292e-05,
      "loss": 0.5936,
      "step": 1386000
    },
    {
      "epoch": 6.92,
      "learning_rate": 1.710311515070804e-05,
      "loss": 0.5765,
      "step": 1386500
    },
    {
      "epoch": 6.93,
      "learning_rate": 1.7089270652994186e-05,
      "loss": 0.5698,
      "step": 1387000
    },
    {
      "epoch": 6.93,
      "learning_rate": 1.7075398410795933e-05,
      "loss": 0.5917,
      "step": 1387500
    },
    {
      "epoch": 6.93,
      "learning_rate": 1.7061526168597683e-05,
      "loss": 0.5596,
      "step": 1388000
    },
    {
      "epoch": 6.93,
      "learning_rate": 1.704765392639943e-05,
      "loss": 0.5586,
      "step": 1388500
    },
    {
      "epoch": 6.94,
      "learning_rate": 1.703378168420118e-05,
      "loss": 0.5653,
      "step": 1389000
    },
    {
      "epoch": 6.94,
      "learning_rate": 1.7019937186487328e-05,
      "loss": 0.5714,
      "step": 1389500
    },
    {
      "epoch": 6.94,
      "learning_rate": 1.700606494428908e-05,
      "loss": 0.5621,
      "step": 1390000
    },
    {
      "epoch": 6.94,
      "learning_rate": 1.6992192702090825e-05,
      "loss": 0.5755,
      "step": 1390500
    },
    {
      "epoch": 6.95,
      "learning_rate": 1.6978320459892576e-05,
      "loss": 0.5517,
      "step": 1391000
    },
    {
      "epoch": 6.95,
      "learning_rate": 1.6964448217694323e-05,
      "loss": 0.5913,
      "step": 1391500
    },
    {
      "epoch": 6.95,
      "learning_rate": 1.6950575975496073e-05,
      "loss": 0.5654,
      "step": 1392000
    },
    {
      "epoch": 6.95,
      "learning_rate": 1.693670373329782e-05,
      "loss": 0.5978,
      "step": 1392500
    },
    {
      "epoch": 6.96,
      "learning_rate": 1.6922859235583964e-05,
      "loss": 0.5859,
      "step": 1393000
    },
    {
      "epoch": 6.96,
      "learning_rate": 1.6908986993385718e-05,
      "loss": 0.5788,
      "step": 1393500
    },
    {
      "epoch": 6.96,
      "learning_rate": 1.6895114751187465e-05,
      "loss": 0.581,
      "step": 1394000
    },
    {
      "epoch": 6.96,
      "learning_rate": 1.6881242508989215e-05,
      "loss": 0.5895,
      "step": 1394500
    },
    {
      "epoch": 6.97,
      "learning_rate": 1.686739801127536e-05,
      "loss": 0.5963,
      "step": 1395000
    },
    {
      "epoch": 6.97,
      "learning_rate": 1.685352576907711e-05,
      "loss": 0.5689,
      "step": 1395500
    },
    {
      "epoch": 6.97,
      "learning_rate": 1.6839653526878856e-05,
      "loss": 0.5909,
      "step": 1396000
    },
    {
      "epoch": 6.97,
      "learning_rate": 1.6825781284680606e-05,
      "loss": 0.5829,
      "step": 1396500
    },
    {
      "epoch": 6.98,
      "learning_rate": 1.6811909042482353e-05,
      "loss": 0.58,
      "step": 1397000
    },
    {
      "epoch": 6.98,
      "learning_rate": 1.6798036800284104e-05,
      "loss": 0.5796,
      "step": 1397500
    },
    {
      "epoch": 6.98,
      "learning_rate": 1.678416455808585e-05,
      "loss": 0.5841,
      "step": 1398000
    },
    {
      "epoch": 6.98,
      "learning_rate": 1.6770292315887604e-05,
      "loss": 0.589,
      "step": 1398500
    },
    {
      "epoch": 6.99,
      "learning_rate": 1.675642007368935e-05,
      "loss": 0.6025,
      "step": 1399000
    },
    {
      "epoch": 6.99,
      "learning_rate": 1.6742547831491102e-05,
      "loss": 0.5958,
      "step": 1399500
    },
    {
      "epoch": 6.99,
      "learning_rate": 1.6728703333777246e-05,
      "loss": 0.5801,
      "step": 1400000
    },
    {
      "epoch": 6.99,
      "learning_rate": 1.6714831091578996e-05,
      "loss": 0.5878,
      "step": 1400500
    },
    {
      "epoch": 7.0,
      "learning_rate": 1.6700958849380743e-05,
      "loss": 0.6044,
      "step": 1401000
    },
    {
      "epoch": 7.0,
      "learning_rate": 1.6687086607182493e-05,
      "loss": 0.5878,
      "step": 1401500
    },
    {
      "epoch": 7.0,
      "eval_loss": 1.0944536924362183,
      "eval_runtime": 1456.4218,
      "eval_samples_per_second": 79.989,
      "eval_steps_per_second": 19.998,
      "step": 1401680
    }
  ],
  "max_steps": 2002400,
  "num_train_epochs": 10,
  "total_flos": 2.665801044519056e+18,
  "trial_name": null,
  "trial_params": null
}
