| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 2562, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00117096018735363, |
| "grad_norm": 3.2623555660247803, |
| "learning_rate": 1.5503875968992249e-07, |
| "loss": 0.591235339641571, |
| "memory(GiB)": 130.94, |
| "step": 1, |
| "token_acc": 0.8447411307509088, |
| "train_speed(iter/s)": 0.019417 |
| }, |
| { |
| "epoch": 0.00585480093676815, |
| "grad_norm": 3.0421900749206543, |
| "learning_rate": 7.751937984496125e-07, |
| "loss": 0.5568965077400208, |
| "memory(GiB)": 131.94, |
| "step": 5, |
| "token_acc": 0.8460283319735964, |
| "train_speed(iter/s)": 0.043842 |
| }, |
| { |
| "epoch": 0.0117096018735363, |
| "grad_norm": 2.8613500595092773, |
| "learning_rate": 1.550387596899225e-06, |
| "loss": 0.5551010131835937, |
| "memory(GiB)": 131.94, |
| "step": 10, |
| "token_acc": 0.8356083810191254, |
| "train_speed(iter/s)": 0.052588 |
| }, |
| { |
| "epoch": 0.01756440281030445, |
| "grad_norm": 2.1271445751190186, |
| "learning_rate": 2.3255813953488376e-06, |
| "loss": 0.5327572345733642, |
| "memory(GiB)": 132.78, |
| "step": 15, |
| "token_acc": 0.8394243615921753, |
| "train_speed(iter/s)": 0.057201 |
| }, |
| { |
| "epoch": 0.0234192037470726, |
| "grad_norm": 1.0847800970077515, |
| "learning_rate": 3.10077519379845e-06, |
| "loss": 0.46837658882141114, |
| "memory(GiB)": 132.78, |
| "step": 20, |
| "token_acc": 0.8431611509225723, |
| "train_speed(iter/s)": 0.059425 |
| }, |
| { |
| "epoch": 0.02927400468384075, |
| "grad_norm": 0.8750381469726562, |
| "learning_rate": 3.875968992248063e-06, |
| "loss": 0.44452896118164065, |
| "memory(GiB)": 132.78, |
| "step": 25, |
| "token_acc": 0.8543377731613794, |
| "train_speed(iter/s)": 0.060974 |
| }, |
| { |
| "epoch": 0.0351288056206089, |
| "grad_norm": 0.5234003663063049, |
| "learning_rate": 4.651162790697675e-06, |
| "loss": 0.42905311584472655, |
| "memory(GiB)": 132.78, |
| "step": 30, |
| "token_acc": 0.8542961149814849, |
| "train_speed(iter/s)": 0.062206 |
| }, |
| { |
| "epoch": 0.040983606557377046, |
| "grad_norm": 0.5018875598907471, |
| "learning_rate": 5.4263565891472865e-06, |
| "loss": 0.4269443988800049, |
| "memory(GiB)": 132.78, |
| "step": 35, |
| "token_acc": 0.8522571433054558, |
| "train_speed(iter/s)": 0.063069 |
| }, |
| { |
| "epoch": 0.0468384074941452, |
| "grad_norm": 0.38381680846214294, |
| "learning_rate": 6.2015503875969e-06, |
| "loss": 0.40071582794189453, |
| "memory(GiB)": 132.78, |
| "step": 40, |
| "token_acc": 0.8555349764923779, |
| "train_speed(iter/s)": 0.063736 |
| }, |
| { |
| "epoch": 0.05269320843091335, |
| "grad_norm": 0.3016009032726288, |
| "learning_rate": 6.976744186046513e-06, |
| "loss": 0.4113297462463379, |
| "memory(GiB)": 132.78, |
| "step": 45, |
| "token_acc": 0.8491327275191562, |
| "train_speed(iter/s)": 0.064231 |
| }, |
| { |
| "epoch": 0.0585480093676815, |
| "grad_norm": 0.2976464331150055, |
| "learning_rate": 7.751937984496126e-06, |
| "loss": 0.4019885540008545, |
| "memory(GiB)": 132.78, |
| "step": 50, |
| "token_acc": 0.8631093056438779, |
| "train_speed(iter/s)": 0.064655 |
| }, |
| { |
| "epoch": 0.06440281030444965, |
| "grad_norm": 0.24487970769405365, |
| "learning_rate": 8.527131782945736e-06, |
| "loss": 0.3938943386077881, |
| "memory(GiB)": 132.78, |
| "step": 55, |
| "token_acc": 0.8631337386589192, |
| "train_speed(iter/s)": 0.065047 |
| }, |
| { |
| "epoch": 0.0702576112412178, |
| "grad_norm": 0.23692984879016876, |
| "learning_rate": 9.30232558139535e-06, |
| "loss": 0.41377553939819334, |
| "memory(GiB)": 132.97, |
| "step": 60, |
| "token_acc": 0.8609983103219724, |
| "train_speed(iter/s)": 0.065302 |
| }, |
| { |
| "epoch": 0.07611241217798595, |
| "grad_norm": 0.23579329252243042, |
| "learning_rate": 1.0077519379844963e-05, |
| "loss": 0.3947890758514404, |
| "memory(GiB)": 132.97, |
| "step": 65, |
| "token_acc": 0.8729405159237655, |
| "train_speed(iter/s)": 0.065577 |
| }, |
| { |
| "epoch": 0.08196721311475409, |
| "grad_norm": 0.2210317999124527, |
| "learning_rate": 1.0852713178294573e-05, |
| "loss": 0.3936769962310791, |
| "memory(GiB)": 132.97, |
| "step": 70, |
| "token_acc": 0.860379465686213, |
| "train_speed(iter/s)": 0.065756 |
| }, |
| { |
| "epoch": 0.08782201405152225, |
| "grad_norm": 0.23814593255519867, |
| "learning_rate": 1.1627906976744187e-05, |
| "loss": 0.39299349784851073, |
| "memory(GiB)": 132.97, |
| "step": 75, |
| "token_acc": 0.8482827629927034, |
| "train_speed(iter/s)": 0.06599 |
| }, |
| { |
| "epoch": 0.0936768149882904, |
| "grad_norm": 0.24474237859249115, |
| "learning_rate": 1.24031007751938e-05, |
| "loss": 0.39170591831207274, |
| "memory(GiB)": 132.97, |
| "step": 80, |
| "token_acc": 0.8571129295007489, |
| "train_speed(iter/s)": 0.066134 |
| }, |
| { |
| "epoch": 0.09953161592505855, |
| "grad_norm": 0.232538640499115, |
| "learning_rate": 1.3178294573643412e-05, |
| "loss": 0.3822017669677734, |
| "memory(GiB)": 132.97, |
| "step": 85, |
| "token_acc": 0.8780878727095818, |
| "train_speed(iter/s)": 0.066366 |
| }, |
| { |
| "epoch": 0.1053864168618267, |
| "grad_norm": 0.22437641024589539, |
| "learning_rate": 1.3953488372093025e-05, |
| "loss": 0.38762218952178956, |
| "memory(GiB)": 132.97, |
| "step": 90, |
| "token_acc": 0.8614531845562612, |
| "train_speed(iter/s)": 0.066559 |
| }, |
| { |
| "epoch": 0.11124121779859485, |
| "grad_norm": 0.22185830771923065, |
| "learning_rate": 1.4728682170542636e-05, |
| "loss": 0.38779487609863283, |
| "memory(GiB)": 132.97, |
| "step": 95, |
| "token_acc": 0.8664786644726099, |
| "train_speed(iter/s)": 0.066708 |
| }, |
| { |
| "epoch": 0.117096018735363, |
| "grad_norm": 0.27393871545791626, |
| "learning_rate": 1.550387596899225e-05, |
| "loss": 0.3883920192718506, |
| "memory(GiB)": 132.97, |
| "step": 100, |
| "token_acc": 0.8665084805343176, |
| "train_speed(iter/s)": 0.066847 |
| }, |
| { |
| "epoch": 0.12295081967213115, |
| "grad_norm": 0.22931204736232758, |
| "learning_rate": 1.6279069767441862e-05, |
| "loss": 0.38483271598815916, |
| "memory(GiB)": 132.97, |
| "step": 105, |
| "token_acc": 0.86842660702191, |
| "train_speed(iter/s)": 0.066999 |
| }, |
| { |
| "epoch": 0.1288056206088993, |
| "grad_norm": 0.24479679763317108, |
| "learning_rate": 1.7054263565891473e-05, |
| "loss": 0.37220172882080077, |
| "memory(GiB)": 132.97, |
| "step": 110, |
| "token_acc": 0.867410052595701, |
| "train_speed(iter/s)": 0.067112 |
| }, |
| { |
| "epoch": 0.13466042154566746, |
| "grad_norm": 0.2648003101348877, |
| "learning_rate": 1.7829457364341087e-05, |
| "loss": 0.39059298038482665, |
| "memory(GiB)": 132.97, |
| "step": 115, |
| "token_acc": 0.8748554193704952, |
| "train_speed(iter/s)": 0.067218 |
| }, |
| { |
| "epoch": 0.1405152224824356, |
| "grad_norm": 0.26005980372428894, |
| "learning_rate": 1.86046511627907e-05, |
| "loss": 0.3818374156951904, |
| "memory(GiB)": 132.97, |
| "step": 120, |
| "token_acc": 0.8672951527027911, |
| "train_speed(iter/s)": 0.06731 |
| }, |
| { |
| "epoch": 0.14637002341920374, |
| "grad_norm": 0.25006258487701416, |
| "learning_rate": 1.937984496124031e-05, |
| "loss": 0.3956636428833008, |
| "memory(GiB)": 132.97, |
| "step": 125, |
| "token_acc": 0.8609716918038115, |
| "train_speed(iter/s)": 0.067383 |
| }, |
| { |
| "epoch": 0.1522248243559719, |
| "grad_norm": 0.2747514545917511, |
| "learning_rate": 1.9999991663467044e-05, |
| "loss": 0.3932375907897949, |
| "memory(GiB)": 132.97, |
| "step": 130, |
| "token_acc": 0.8660186100028765, |
| "train_speed(iter/s)": 0.06745 |
| }, |
| { |
| "epoch": 0.15807962529274006, |
| "grad_norm": 0.2641543745994568, |
| "learning_rate": 1.9999699886272926e-05, |
| "loss": 0.39503839015960696, |
| "memory(GiB)": 132.97, |
| "step": 135, |
| "token_acc": 0.8533355723899442, |
| "train_speed(iter/s)": 0.067497 |
| }, |
| { |
| "epoch": 0.16393442622950818, |
| "grad_norm": 0.2637743353843689, |
| "learning_rate": 1.9998991296330317e-05, |
| "loss": 0.39163637161254883, |
| "memory(GiB)": 132.97, |
| "step": 140, |
| "token_acc": 0.8673312165879645, |
| "train_speed(iter/s)": 0.067529 |
| }, |
| { |
| "epoch": 0.16978922716627634, |
| "grad_norm": 0.2526402175426483, |
| "learning_rate": 1.9997865923175027e-05, |
| "loss": 0.3822649002075195, |
| "memory(GiB)": 132.97, |
| "step": 145, |
| "token_acc": 0.8725527891092668, |
| "train_speed(iter/s)": 0.067555 |
| }, |
| { |
| "epoch": 0.1756440281030445, |
| "grad_norm": 0.2798239588737488, |
| "learning_rate": 1.999632381371545e-05, |
| "loss": 0.388509464263916, |
| "memory(GiB)": 133.05, |
| "step": 150, |
| "token_acc": 0.8570005695948406, |
| "train_speed(iter/s)": 0.067614 |
| }, |
| { |
| "epoch": 0.18149882903981265, |
| "grad_norm": 0.24978382885456085, |
| "learning_rate": 1.999436503223061e-05, |
| "loss": 0.38669638633728026, |
| "memory(GiB)": 133.05, |
| "step": 155, |
| "token_acc": 0.8657276078873382, |
| "train_speed(iter/s)": 0.067633 |
| }, |
| { |
| "epoch": 0.1873536299765808, |
| "grad_norm": 0.2820796072483063, |
| "learning_rate": 1.9991989660367463e-05, |
| "loss": 0.39322915077209475, |
| "memory(GiB)": 133.05, |
| "step": 160, |
| "token_acc": 0.8509120957934454, |
| "train_speed(iter/s)": 0.06766 |
| }, |
| { |
| "epoch": 0.19320843091334894, |
| "grad_norm": 0.25325024127960205, |
| "learning_rate": 1.998919779713751e-05, |
| "loss": 0.3963874578475952, |
| "memory(GiB)": 133.05, |
| "step": 165, |
| "token_acc": 0.8568015157690381, |
| "train_speed(iter/s)": 0.067694 |
| }, |
| { |
| "epoch": 0.1990632318501171, |
| "grad_norm": 0.23693059384822845, |
| "learning_rate": 1.998598955891266e-05, |
| "loss": 0.3861080169677734, |
| "memory(GiB)": 133.05, |
| "step": 170, |
| "token_acc": 0.8704777077082435, |
| "train_speed(iter/s)": 0.067738 |
| }, |
| { |
| "epoch": 0.20491803278688525, |
| "grad_norm": 0.24995002150535583, |
| "learning_rate": 1.9982365079420382e-05, |
| "loss": 0.3748037338256836, |
| "memory(GiB)": 133.05, |
| "step": 175, |
| "token_acc": 0.8639677636839712, |
| "train_speed(iter/s)": 0.06777 |
| }, |
| { |
| "epoch": 0.2107728337236534, |
| "grad_norm": 0.2528163492679596, |
| "learning_rate": 1.9978324509738147e-05, |
| "loss": 0.37778520584106445, |
| "memory(GiB)": 133.05, |
| "step": 180, |
| "token_acc": 0.8692558237224801, |
| "train_speed(iter/s)": 0.067808 |
| }, |
| { |
| "epoch": 0.21662763466042154, |
| "grad_norm": 0.26185593008995056, |
| "learning_rate": 1.9973868018287093e-05, |
| "loss": 0.37712826728820803, |
| "memory(GiB)": 133.05, |
| "step": 185, |
| "token_acc": 0.8629621624330818, |
| "train_speed(iter/s)": 0.067862 |
| }, |
| { |
| "epoch": 0.2224824355971897, |
| "grad_norm": 0.2565723955631256, |
| "learning_rate": 1.9968995790825048e-05, |
| "loss": 0.38217387199401853, |
| "memory(GiB)": 133.05, |
| "step": 190, |
| "token_acc": 0.8526548122357622, |
| "train_speed(iter/s)": 0.06787 |
| }, |
| { |
| "epoch": 0.22833723653395785, |
| "grad_norm": 0.24071918427944183, |
| "learning_rate": 1.9963708030438754e-05, |
| "loss": 0.38128018379211426, |
| "memory(GiB)": 133.05, |
| "step": 195, |
| "token_acc": 0.86564623713995, |
| "train_speed(iter/s)": 0.067888 |
| }, |
| { |
| "epoch": 0.234192037470726, |
| "grad_norm": 0.2468400001525879, |
| "learning_rate": 1.995800495753542e-05, |
| "loss": 0.38081438541412355, |
| "memory(GiB)": 133.05, |
| "step": 200, |
| "token_acc": 0.8573196660493942, |
| "train_speed(iter/s)": 0.0679 |
| }, |
| { |
| "epoch": 0.24004683840749413, |
| "grad_norm": 0.24025513231754303, |
| "learning_rate": 1.9951886809833537e-05, |
| "loss": 0.39122610092163085, |
| "memory(GiB)": 133.05, |
| "step": 205, |
| "token_acc": 0.8568699202170693, |
| "train_speed(iter/s)": 0.067926 |
| }, |
| { |
| "epoch": 0.2459016393442623, |
| "grad_norm": 0.262650728225708, |
| "learning_rate": 1.9945353842352943e-05, |
| "loss": 0.38733615875244143, |
| "memory(GiB)": 133.05, |
| "step": 210, |
| "token_acc": 0.8605185069498672, |
| "train_speed(iter/s)": 0.067945 |
| }, |
| { |
| "epoch": 0.25175644028103045, |
| "grad_norm": 0.2334696501493454, |
| "learning_rate": 1.9938406327404233e-05, |
| "loss": 0.38346500396728517, |
| "memory(GiB)": 133.05, |
| "step": 215, |
| "token_acc": 0.8689892435384466, |
| "train_speed(iter/s)": 0.067989 |
| }, |
| { |
| "epoch": 0.2576112412177986, |
| "grad_norm": 0.2296629697084427, |
| "learning_rate": 1.9931044554577373e-05, |
| "loss": 0.3805164575576782, |
| "memory(GiB)": 133.05, |
| "step": 220, |
| "token_acc": 0.862054141615526, |
| "train_speed(iter/s)": 0.068004 |
| }, |
| { |
| "epoch": 0.26346604215456676, |
| "grad_norm": 0.2337953746318817, |
| "learning_rate": 1.992326883072965e-05, |
| "loss": 0.38329010009765624, |
| "memory(GiB)": 133.05, |
| "step": 225, |
| "token_acc": 0.8576421234268423, |
| "train_speed(iter/s)": 0.068016 |
| }, |
| { |
| "epoch": 0.2693208430913349, |
| "grad_norm": 0.22751180827617645, |
| "learning_rate": 1.991507947997287e-05, |
| "loss": 0.3914541244506836, |
| "memory(GiB)": 133.05, |
| "step": 230, |
| "token_acc": 0.8647305257189656, |
| "train_speed(iter/s)": 0.068037 |
| }, |
| { |
| "epoch": 0.275175644028103, |
| "grad_norm": 0.23834733664989471, |
| "learning_rate": 1.9906476843659866e-05, |
| "loss": 0.3868813753128052, |
| "memory(GiB)": 133.05, |
| "step": 235, |
| "token_acc": 0.8718037707532127, |
| "train_speed(iter/s)": 0.068062 |
| }, |
| { |
| "epoch": 0.2810304449648712, |
| "grad_norm": 0.2157682329416275, |
| "learning_rate": 1.989746128037024e-05, |
| "loss": 0.3725996971130371, |
| "memory(GiB)": 133.05, |
| "step": 240, |
| "token_acc": 0.8637500196081507, |
| "train_speed(iter/s)": 0.068079 |
| }, |
| { |
| "epoch": 0.28688524590163933, |
| "grad_norm": 0.24432708323001862, |
| "learning_rate": 1.988803316589545e-05, |
| "loss": 0.38200843334198, |
| "memory(GiB)": 133.05, |
| "step": 245, |
| "token_acc": 0.863402893772779, |
| "train_speed(iter/s)": 0.068119 |
| }, |
| { |
| "epoch": 0.2927400468384075, |
| "grad_norm": 0.22754515707492828, |
| "learning_rate": 1.987819289322311e-05, |
| "loss": 0.38454749584198, |
| "memory(GiB)": 133.05, |
| "step": 250, |
| "token_acc": 0.8616220657129776, |
| "train_speed(iter/s)": 0.068158 |
| }, |
| { |
| "epoch": 0.29859484777517564, |
| "grad_norm": 0.22906067967414856, |
| "learning_rate": 1.9867940872520646e-05, |
| "loss": 0.38929970264434816, |
| "memory(GiB)": 133.05, |
| "step": 255, |
| "token_acc": 0.862697854653979, |
| "train_speed(iter/s)": 0.068143 |
| }, |
| { |
| "epoch": 0.3044496487119438, |
| "grad_norm": 0.2391372174024582, |
| "learning_rate": 1.9857277531118173e-05, |
| "loss": 0.38328697681427004, |
| "memory(GiB)": 133.05, |
| "step": 260, |
| "token_acc": 0.875577325482754, |
| "train_speed(iter/s)": 0.068151 |
| }, |
| { |
| "epoch": 0.31030444964871196, |
| "grad_norm": 0.23862990736961365, |
| "learning_rate": 1.9846203313490697e-05, |
| "loss": 0.3745781660079956, |
| "memory(GiB)": 133.05, |
| "step": 265, |
| "token_acc": 0.8789255692291267, |
| "train_speed(iter/s)": 0.068172 |
| }, |
| { |
| "epoch": 0.3161592505854801, |
| "grad_norm": 0.2886284291744232, |
| "learning_rate": 1.983471868123958e-05, |
| "loss": 0.37299673557281493, |
| "memory(GiB)": 133.05, |
| "step": 270, |
| "token_acc": 0.8619748050993121, |
| "train_speed(iter/s)": 0.068214 |
| }, |
| { |
| "epoch": 0.32201405152224827, |
| "grad_norm": 0.25015807151794434, |
| "learning_rate": 1.98228241130733e-05, |
| "loss": 0.39740839004516604, |
| "memory(GiB)": 133.05, |
| "step": 275, |
| "token_acc": 0.8667058589327261, |
| "train_speed(iter/s)": 0.068226 |
| }, |
| { |
| "epoch": 0.32786885245901637, |
| "grad_norm": 0.22695152461528778, |
| "learning_rate": 1.98105201047875e-05, |
| "loss": 0.3711256980895996, |
| "memory(GiB)": 133.05, |
| "step": 280, |
| "token_acc": 0.8709827404894823, |
| "train_speed(iter/s)": 0.068264 |
| }, |
| { |
| "epoch": 0.3337236533957845, |
| "grad_norm": 0.25948262214660645, |
| "learning_rate": 1.9797807169244326e-05, |
| "loss": 0.376755690574646, |
| "memory(GiB)": 133.05, |
| "step": 285, |
| "token_acc": 0.8627933786950365, |
| "train_speed(iter/s)": 0.068275 |
| }, |
| { |
| "epoch": 0.3395784543325527, |
| "grad_norm": 0.2252376824617386, |
| "learning_rate": 1.9784685836351045e-05, |
| "loss": 0.3907461166381836, |
| "memory(GiB)": 133.05, |
| "step": 290, |
| "token_acc": 0.8594050471419237, |
| "train_speed(iter/s)": 0.068273 |
| }, |
| { |
| "epoch": 0.34543325526932084, |
| "grad_norm": 0.2580513656139374, |
| "learning_rate": 1.9771156653037944e-05, |
| "loss": 0.38218297958374026, |
| "memory(GiB)": 133.05, |
| "step": 295, |
| "token_acc": 0.8619312594063512, |
| "train_speed(iter/s)": 0.068289 |
| }, |
| { |
| "epoch": 0.351288056206089, |
| "grad_norm": 0.21899765729904175, |
| "learning_rate": 1.975722018323556e-05, |
| "loss": 0.3749994277954102, |
| "memory(GiB)": 133.05, |
| "step": 300, |
| "token_acc": 0.8698979752198593, |
| "train_speed(iter/s)": 0.068281 |
| }, |
| { |
| "epoch": 0.35714285714285715, |
| "grad_norm": 0.2238709181547165, |
| "learning_rate": 1.974287700785116e-05, |
| "loss": 0.37110333442687987, |
| "memory(GiB)": 133.05, |
| "step": 305, |
| "token_acc": 0.8662300629837371, |
| "train_speed(iter/s)": 0.0683 |
| }, |
| { |
| "epoch": 0.3629976580796253, |
| "grad_norm": 0.24307382106781006, |
| "learning_rate": 1.9728127724744516e-05, |
| "loss": 0.36276865005493164, |
| "memory(GiB)": 133.05, |
| "step": 310, |
| "token_acc": 0.8663095601853296, |
| "train_speed(iter/s)": 0.068292 |
| }, |
| { |
| "epoch": 0.36885245901639346, |
| "grad_norm": 0.2318965196609497, |
| "learning_rate": 1.9712972948703006e-05, |
| "loss": 0.38519649505615233, |
| "memory(GiB)": 133.05, |
| "step": 315, |
| "token_acc": 0.8719233901258103, |
| "train_speed(iter/s)": 0.068305 |
| }, |
| { |
| "epoch": 0.3747072599531616, |
| "grad_norm": 0.22240430116653442, |
| "learning_rate": 1.9697413311415967e-05, |
| "loss": 0.3795146465301514, |
| "memory(GiB)": 133.05, |
| "step": 320, |
| "token_acc": 0.8447559871358541, |
| "train_speed(iter/s)": 0.06832 |
| }, |
| { |
| "epoch": 0.3805620608899297, |
| "grad_norm": 0.21727585792541504, |
| "learning_rate": 1.9681449461448386e-05, |
| "loss": 0.37623322010040283, |
| "memory(GiB)": 133.05, |
| "step": 325, |
| "token_acc": 0.868092485549133, |
| "train_speed(iter/s)": 0.068321 |
| }, |
| { |
| "epoch": 0.3864168618266979, |
| "grad_norm": 0.24871428310871124, |
| "learning_rate": 1.9665082064213856e-05, |
| "loss": 0.3804615497589111, |
| "memory(GiB)": 133.05, |
| "step": 330, |
| "token_acc": 0.8632469719807496, |
| "train_speed(iter/s)": 0.06834 |
| }, |
| { |
| "epoch": 0.39227166276346603, |
| "grad_norm": 0.2242128700017929, |
| "learning_rate": 1.9648311801946823e-05, |
| "loss": 0.37839736938476565, |
| "memory(GiB)": 133.05, |
| "step": 335, |
| "token_acc": 0.8620339267458229, |
| "train_speed(iter/s)": 0.068356 |
| }, |
| { |
| "epoch": 0.3981264637002342, |
| "grad_norm": 0.23243097960948944, |
| "learning_rate": 1.9631139373674188e-05, |
| "loss": 0.3759917736053467, |
| "memory(GiB)": 133.05, |
| "step": 340, |
| "token_acc": 0.8630340491154014, |
| "train_speed(iter/s)": 0.068364 |
| }, |
| { |
| "epoch": 0.40398126463700235, |
| "grad_norm": 0.2167743444442749, |
| "learning_rate": 1.9613565495186126e-05, |
| "loss": 0.36579113006591796, |
| "memory(GiB)": 133.05, |
| "step": 345, |
| "token_acc": 0.8630803983851985, |
| "train_speed(iter/s)": 0.068389 |
| }, |
| { |
| "epoch": 0.4098360655737705, |
| "grad_norm": 0.2554558515548706, |
| "learning_rate": 1.9595590899006288e-05, |
| "loss": 0.3840445280075073, |
| "memory(GiB)": 133.05, |
| "step": 350, |
| "token_acc": 0.8682752142033024, |
| "train_speed(iter/s)": 0.06839 |
| }, |
| { |
| "epoch": 0.41569086651053866, |
| "grad_norm": 0.23864524066448212, |
| "learning_rate": 1.957721633436124e-05, |
| "loss": 0.3817277908325195, |
| "memory(GiB)": 133.05, |
| "step": 355, |
| "token_acc": 0.8645090065366, |
| "train_speed(iter/s)": 0.068384 |
| }, |
| { |
| "epoch": 0.4215456674473068, |
| "grad_norm": 0.25255629420280457, |
| "learning_rate": 1.9558442567149244e-05, |
| "loss": 0.3791682720184326, |
| "memory(GiB)": 133.05, |
| "step": 360, |
| "token_acc": 0.8775300258130478, |
| "train_speed(iter/s)": 0.068404 |
| }, |
| { |
| "epoch": 0.4274004683840749, |
| "grad_norm": 0.2247135043144226, |
| "learning_rate": 1.953927037990834e-05, |
| "loss": 0.3860400915145874, |
| "memory(GiB)": 133.05, |
| "step": 365, |
| "token_acc": 0.8536377662766984, |
| "train_speed(iter/s)": 0.068414 |
| }, |
| { |
| "epoch": 0.4332552693208431, |
| "grad_norm": 0.29746949672698975, |
| "learning_rate": 1.9519700571783718e-05, |
| "loss": 0.3866363763809204, |
| "memory(GiB)": 133.05, |
| "step": 370, |
| "token_acc": 0.8695576843716825, |
| "train_speed(iter/s)": 0.0684 |
| }, |
| { |
| "epoch": 0.43911007025761123, |
| "grad_norm": 0.23039910197257996, |
| "learning_rate": 1.9499733958494405e-05, |
| "loss": 0.38268446922302246, |
| "memory(GiB)": 133.05, |
| "step": 375, |
| "token_acc": 0.8581758827531537, |
| "train_speed(iter/s)": 0.068407 |
| }, |
| { |
| "epoch": 0.4449648711943794, |
| "grad_norm": 0.23166924715042114, |
| "learning_rate": 1.947937137229928e-05, |
| "loss": 0.37559897899627687, |
| "memory(GiB)": 133.05, |
| "step": 380, |
| "token_acc": 0.8744556465509139, |
| "train_speed(iter/s)": 0.068418 |
| }, |
| { |
| "epoch": 0.45081967213114754, |
| "grad_norm": 0.22437815368175507, |
| "learning_rate": 1.9458613661962366e-05, |
| "loss": 0.37695770263671874, |
| "memory(GiB)": 133.05, |
| "step": 385, |
| "token_acc": 0.8771398753952836, |
| "train_speed(iter/s)": 0.068428 |
| }, |
| { |
| "epoch": 0.4566744730679157, |
| "grad_norm": 0.23045028746128082, |
| "learning_rate": 1.943746169271746e-05, |
| "loss": 0.37760295867919924, |
| "memory(GiB)": 133.05, |
| "step": 390, |
| "token_acc": 0.8759578109502548, |
| "train_speed(iter/s)": 0.068419 |
| }, |
| { |
| "epoch": 0.46252927400468385, |
| "grad_norm": 0.21340611577033997, |
| "learning_rate": 1.941591634623206e-05, |
| "loss": 0.38206305503845217, |
| "memory(GiB)": 133.05, |
| "step": 395, |
| "token_acc": 0.8683378180616532, |
| "train_speed(iter/s)": 0.068433 |
| }, |
| { |
| "epoch": 0.468384074941452, |
| "grad_norm": 0.2345254123210907, |
| "learning_rate": 1.9393978520570638e-05, |
| "loss": 0.3681832790374756, |
| "memory(GiB)": 133.05, |
| "step": 400, |
| "token_acc": 0.8685244618395304, |
| "train_speed(iter/s)": 0.068457 |
| }, |
| { |
| "epoch": 0.47423887587822017, |
| "grad_norm": 0.23758217692375183, |
| "learning_rate": 1.9371649130157166e-05, |
| "loss": 0.36426939964294436, |
| "memory(GiB)": 133.05, |
| "step": 405, |
| "token_acc": 0.8676219452965636, |
| "train_speed(iter/s)": 0.068464 |
| }, |
| { |
| "epoch": 0.48009367681498827, |
| "grad_norm": 0.2363872230052948, |
| "learning_rate": 1.9348929105737044e-05, |
| "loss": 0.37017192840576174, |
| "memory(GiB)": 133.05, |
| "step": 410, |
| "token_acc": 0.8679473812363037, |
| "train_speed(iter/s)": 0.068468 |
| }, |
| { |
| "epoch": 0.4859484777517564, |
| "grad_norm": 0.24642601609230042, |
| "learning_rate": 1.932581939433827e-05, |
| "loss": 0.38428258895874023, |
| "memory(GiB)": 133.05, |
| "step": 415, |
| "token_acc": 0.8687720441289789, |
| "train_speed(iter/s)": 0.06847 |
| }, |
| { |
| "epoch": 0.4918032786885246, |
| "grad_norm": 0.2268989235162735, |
| "learning_rate": 1.9302320959231997e-05, |
| "loss": 0.37460167407989503, |
| "memory(GiB)": 133.05, |
| "step": 420, |
| "token_acc": 0.8672426525809843, |
| "train_speed(iter/s)": 0.068479 |
| }, |
| { |
| "epoch": 0.49765807962529274, |
| "grad_norm": 0.21185266971588135, |
| "learning_rate": 1.927843477989234e-05, |
| "loss": 0.37124834060668943, |
| "memory(GiB)": 133.05, |
| "step": 425, |
| "token_acc": 0.8814642777451279, |
| "train_speed(iter/s)": 0.068488 |
| }, |
| { |
| "epoch": 0.5035128805620609, |
| "grad_norm": 0.21971659362316132, |
| "learning_rate": 1.9254161851955587e-05, |
| "loss": 0.3843217849731445, |
| "memory(GiB)": 133.05, |
| "step": 430, |
| "token_acc": 0.8714790057188723, |
| "train_speed(iter/s)": 0.068499 |
| }, |
| { |
| "epoch": 0.509367681498829, |
| "grad_norm": 0.26225098967552185, |
| "learning_rate": 1.9229503187178694e-05, |
| "loss": 0.3771937370300293, |
| "memory(GiB)": 133.05, |
| "step": 435, |
| "token_acc": 0.8658792102647854, |
| "train_speed(iter/s)": 0.068506 |
| }, |
| { |
| "epoch": 0.5152224824355972, |
| "grad_norm": 0.23551629483699799, |
| "learning_rate": 1.920445981339708e-05, |
| "loss": 0.37624967098236084, |
| "memory(GiB)": 133.05, |
| "step": 440, |
| "token_acc": 0.8641905035935222, |
| "train_speed(iter/s)": 0.068518 |
| }, |
| { |
| "epoch": 0.5210772833723654, |
| "grad_norm": 0.25343942642211914, |
| "learning_rate": 1.9179032774481822e-05, |
| "loss": 0.37384233474731443, |
| "memory(GiB)": 133.05, |
| "step": 445, |
| "token_acc": 0.8723531724486548, |
| "train_speed(iter/s)": 0.068533 |
| }, |
| { |
| "epoch": 0.5269320843091335, |
| "grad_norm": 0.22508122026920319, |
| "learning_rate": 1.9153223130296125e-05, |
| "loss": 0.3715523719787598, |
| "memory(GiB)": 133.05, |
| "step": 450, |
| "token_acc": 0.8742618455654583, |
| "train_speed(iter/s)": 0.068547 |
| }, |
| { |
| "epoch": 0.5327868852459017, |
| "grad_norm": 0.2273603081703186, |
| "learning_rate": 1.9127031956651153e-05, |
| "loss": 0.3753758192062378, |
| "memory(GiB)": 133.05, |
| "step": 455, |
| "token_acc": 0.8717887326571352, |
| "train_speed(iter/s)": 0.068556 |
| }, |
| { |
| "epoch": 0.5386416861826698, |
| "grad_norm": 0.24021831154823303, |
| "learning_rate": 1.9100460345261175e-05, |
| "loss": 0.3885939598083496, |
| "memory(GiB)": 133.05, |
| "step": 460, |
| "token_acc": 0.8648985264452413, |
| "train_speed(iter/s)": 0.068545 |
| }, |
| { |
| "epoch": 0.544496487119438, |
| "grad_norm": 0.25094419717788696, |
| "learning_rate": 1.9073509403698062e-05, |
| "loss": 0.3836202621459961, |
| "memory(GiB)": 133.05, |
| "step": 465, |
| "token_acc": 0.8716216427648316, |
| "train_speed(iter/s)": 0.068548 |
| }, |
| { |
| "epoch": 0.550351288056206, |
| "grad_norm": 0.2209528684616089, |
| "learning_rate": 1.9046180255345142e-05, |
| "loss": 0.3783407688140869, |
| "memory(GiB)": 133.05, |
| "step": 470, |
| "token_acc": 0.871262499689834, |
| "train_speed(iter/s)": 0.068558 |
| }, |
| { |
| "epoch": 0.5562060889929742, |
| "grad_norm": 0.2333252876996994, |
| "learning_rate": 1.9018474039350342e-05, |
| "loss": 0.37140965461730957, |
| "memory(GiB)": 133.05, |
| "step": 475, |
| "token_acc": 0.870434477460474, |
| "train_speed(iter/s)": 0.068559 |
| }, |
| { |
| "epoch": 0.5620608899297423, |
| "grad_norm": 0.22321061789989471, |
| "learning_rate": 1.899039191057872e-05, |
| "loss": 0.3732731819152832, |
| "memory(GiB)": 133.05, |
| "step": 480, |
| "token_acc": 0.8651847926051782, |
| "train_speed(iter/s)": 0.06856 |
| }, |
| { |
| "epoch": 0.5679156908665105, |
| "grad_norm": 0.24292093515396118, |
| "learning_rate": 1.8961935039564338e-05, |
| "loss": 0.3720050096511841, |
| "memory(GiB)": 133.05, |
| "step": 485, |
| "token_acc": 0.8644098695583844, |
| "train_speed(iter/s)": 0.06857 |
| }, |
| { |
| "epoch": 0.5737704918032787, |
| "grad_norm": 0.25076785683631897, |
| "learning_rate": 1.8933104612461454e-05, |
| "loss": 0.37432427406311036, |
| "memory(GiB)": 133.05, |
| "step": 490, |
| "token_acc": 0.865598108538928, |
| "train_speed(iter/s)": 0.068571 |
| }, |
| { |
| "epoch": 0.5796252927400468, |
| "grad_norm": 0.2353287786245346, |
| "learning_rate": 1.8903901830995093e-05, |
| "loss": 0.37787389755249023, |
| "memory(GiB)": 133.05, |
| "step": 495, |
| "token_acc": 0.8628752281343229, |
| "train_speed(iter/s)": 0.068571 |
| }, |
| { |
| "epoch": 0.585480093676815, |
| "grad_norm": 0.23301288485527039, |
| "learning_rate": 1.8874327912410945e-05, |
| "loss": 0.3894960880279541, |
| "memory(GiB)": 133.05, |
| "step": 500, |
| "token_acc": 0.8649986209317486, |
| "train_speed(iter/s)": 0.068584 |
| }, |
| { |
| "epoch": 0.5913348946135831, |
| "grad_norm": 0.23387756943702698, |
| "learning_rate": 1.884438408942463e-05, |
| "loss": 0.37682523727416994, |
| "memory(GiB)": 133.05, |
| "step": 505, |
| "token_acc": 0.8542796019209774, |
| "train_speed(iter/s)": 0.068582 |
| }, |
| { |
| "epoch": 0.5971896955503513, |
| "grad_norm": 0.2101481854915619, |
| "learning_rate": 1.881407161017033e-05, |
| "loss": 0.3712585210800171, |
| "memory(GiB)": 133.05, |
| "step": 510, |
| "token_acc": 0.8757052407221665, |
| "train_speed(iter/s)": 0.068594 |
| }, |
| { |
| "epoch": 0.6030444964871194, |
| "grad_norm": 0.2197055220603943, |
| "learning_rate": 1.8783391738148738e-05, |
| "loss": 0.3659008026123047, |
| "memory(GiB)": 133.05, |
| "step": 515, |
| "token_acc": 0.8690927312016535, |
| "train_speed(iter/s)": 0.068604 |
| }, |
| { |
| "epoch": 0.6088992974238876, |
| "grad_norm": 0.2129889726638794, |
| "learning_rate": 1.875234575217441e-05, |
| "loss": 0.36564500331878663, |
| "memory(GiB)": 133.05, |
| "step": 520, |
| "token_acc": 0.8682967700230018, |
| "train_speed(iter/s)": 0.068614 |
| }, |
| { |
| "epoch": 0.6147540983606558, |
| "grad_norm": 0.20078937709331512, |
| "learning_rate": 1.8720934946322466e-05, |
| "loss": 0.3801888465881348, |
| "memory(GiB)": 133.05, |
| "step": 525, |
| "token_acc": 0.8619188686453682, |
| "train_speed(iter/s)": 0.068624 |
| }, |
| { |
| "epoch": 0.6206088992974239, |
| "grad_norm": 0.20143865048885345, |
| "learning_rate": 1.8689160629874622e-05, |
| "loss": 0.3495650768280029, |
| "memory(GiB)": 133.05, |
| "step": 530, |
| "token_acc": 0.8823268736367693, |
| "train_speed(iter/s)": 0.068621 |
| }, |
| { |
| "epoch": 0.6264637002341921, |
| "grad_norm": 0.20651988685131073, |
| "learning_rate": 1.865702412726465e-05, |
| "loss": 0.36185364723205565, |
| "memory(GiB)": 133.05, |
| "step": 535, |
| "token_acc": 0.879171148410336, |
| "train_speed(iter/s)": 0.068634 |
| }, |
| { |
| "epoch": 0.6323185011709602, |
| "grad_norm": 0.2135830670595169, |
| "learning_rate": 1.8624526778023142e-05, |
| "loss": 0.36333141326904295, |
| "memory(GiB)": 133.05, |
| "step": 540, |
| "token_acc": 0.8760890123251218, |
| "train_speed(iter/s)": 0.068639 |
| }, |
| { |
| "epoch": 0.6381733021077284, |
| "grad_norm": 0.21670690178871155, |
| "learning_rate": 1.85916699367217e-05, |
| "loss": 0.36627764701843263, |
| "memory(GiB)": 133.05, |
| "step": 545, |
| "token_acc": 0.8693160130902993, |
| "train_speed(iter/s)": 0.068638 |
| }, |
| { |
| "epoch": 0.6440281030444965, |
| "grad_norm": 0.2082773894071579, |
| "learning_rate": 1.855845497291646e-05, |
| "loss": 0.3783770799636841, |
| "memory(GiB)": 133.05, |
| "step": 550, |
| "token_acc": 0.8656727592628988, |
| "train_speed(iter/s)": 0.068642 |
| }, |
| { |
| "epoch": 0.6498829039812647, |
| "grad_norm": 0.2064507156610489, |
| "learning_rate": 1.8524883271091004e-05, |
| "loss": 0.36701202392578125, |
| "memory(GiB)": 133.05, |
| "step": 555, |
| "token_acc": 0.874370974788701, |
| "train_speed(iter/s)": 0.068639 |
| }, |
| { |
| "epoch": 0.6557377049180327, |
| "grad_norm": 0.19167180359363556, |
| "learning_rate": 1.8490956230598668e-05, |
| "loss": 0.3856034755706787, |
| "memory(GiB)": 133.05, |
| "step": 560, |
| "token_acc": 0.8677655700574375, |
| "train_speed(iter/s)": 0.068642 |
| }, |
| { |
| "epoch": 0.6615925058548009, |
| "grad_norm": 0.22284165024757385, |
| "learning_rate": 1.8456675265604183e-05, |
| "loss": 0.36545207500457766, |
| "memory(GiB)": 133.05, |
| "step": 565, |
| "token_acc": 0.8674683330306996, |
| "train_speed(iter/s)": 0.068653 |
| }, |
| { |
| "epoch": 0.667447306791569, |
| "grad_norm": 0.2335020750761032, |
| "learning_rate": 1.842204180502476e-05, |
| "loss": 0.36900959014892576, |
| "memory(GiB)": 133.05, |
| "step": 570, |
| "token_acc": 0.8763767159865549, |
| "train_speed(iter/s)": 0.068659 |
| }, |
| { |
| "epoch": 0.6733021077283372, |
| "grad_norm": 0.2406488060951233, |
| "learning_rate": 1.8387057292470517e-05, |
| "loss": 0.3836709499359131, |
| "memory(GiB)": 133.05, |
| "step": 575, |
| "token_acc": 0.8667227047725787, |
| "train_speed(iter/s)": 0.068662 |
| }, |
| { |
| "epoch": 0.6791569086651054, |
| "grad_norm": 0.21748137474060059, |
| "learning_rate": 1.8351723186184295e-05, |
| "loss": 0.3724257707595825, |
| "memory(GiB)": 133.05, |
| "step": 580, |
| "token_acc": 0.8577895654245747, |
| "train_speed(iter/s)": 0.068669 |
| }, |
| { |
| "epoch": 0.6850117096018735, |
| "grad_norm": 0.2269269824028015, |
| "learning_rate": 1.8316040958980896e-05, |
| "loss": 0.3713605165481567, |
| "memory(GiB)": 133.05, |
| "step": 585, |
| "token_acc": 0.8802838494896842, |
| "train_speed(iter/s)": 0.068677 |
| }, |
| { |
| "epoch": 0.6908665105386417, |
| "grad_norm": 0.24186237156391144, |
| "learning_rate": 1.828001209818567e-05, |
| "loss": 0.3882193088531494, |
| "memory(GiB)": 133.05, |
| "step": 590, |
| "token_acc": 0.8647495837870993, |
| "train_speed(iter/s)": 0.068687 |
| }, |
| { |
| "epoch": 0.6967213114754098, |
| "grad_norm": 0.24182303249835968, |
| "learning_rate": 1.8243638105572547e-05, |
| "loss": 0.37105526924133303, |
| "memory(GiB)": 133.05, |
| "step": 595, |
| "token_acc": 0.8747415704995677, |
| "train_speed(iter/s)": 0.068693 |
| }, |
| { |
| "epoch": 0.702576112412178, |
| "grad_norm": 0.2169107049703598, |
| "learning_rate": 1.82069204973014e-05, |
| "loss": 0.3660942554473877, |
| "memory(GiB)": 133.05, |
| "step": 600, |
| "token_acc": 0.8819129326127438, |
| "train_speed(iter/s)": 0.068705 |
| }, |
| { |
| "epoch": 0.7084309133489461, |
| "grad_norm": 0.22826465964317322, |
| "learning_rate": 1.816986080385489e-05, |
| "loss": 0.38544516563415526, |
| "memory(GiB)": 133.05, |
| "step": 605, |
| "token_acc": 0.850805587726625, |
| "train_speed(iter/s)": 0.068706 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 0.22054514288902283, |
| "learning_rate": 1.813246056997465e-05, |
| "loss": 0.36968698501586916, |
| "memory(GiB)": 133.05, |
| "step": 610, |
| "token_acc": 0.8651287265831155, |
| "train_speed(iter/s)": 0.068707 |
| }, |
| { |
| "epoch": 0.7201405152224825, |
| "grad_norm": 0.2099841833114624, |
| "learning_rate": 1.809472135459688e-05, |
| "loss": 0.3711225509643555, |
| "memory(GiB)": 133.05, |
| "step": 615, |
| "token_acc": 0.8681505343933286, |
| "train_speed(iter/s)": 0.068708 |
| }, |
| { |
| "epoch": 0.7259953161592506, |
| "grad_norm": 0.21193836629390717, |
| "learning_rate": 1.8056644730787412e-05, |
| "loss": 0.3799697160720825, |
| "memory(GiB)": 133.05, |
| "step": 620, |
| "token_acc": 0.8738008866124044, |
| "train_speed(iter/s)": 0.068719 |
| }, |
| { |
| "epoch": 0.7318501170960188, |
| "grad_norm": 0.21255411207675934, |
| "learning_rate": 1.8018232285676092e-05, |
| "loss": 0.3608224391937256, |
| "memory(GiB)": 133.05, |
| "step": 625, |
| "token_acc": 0.8694407077081082, |
| "train_speed(iter/s)": 0.068731 |
| }, |
| { |
| "epoch": 0.7377049180327869, |
| "grad_norm": 0.21150043606758118, |
| "learning_rate": 1.797948562039066e-05, |
| "loss": 0.3775743246078491, |
| "memory(GiB)": 133.05, |
| "step": 630, |
| "token_acc": 0.8636316861199378, |
| "train_speed(iter/s)": 0.068723 |
| }, |
| { |
| "epoch": 0.7435597189695551, |
| "grad_norm": 0.21777065098285675, |
| "learning_rate": 1.7940406349989987e-05, |
| "loss": 0.3736081838607788, |
| "memory(GiB)": 133.05, |
| "step": 635, |
| "token_acc": 0.8663054996457302, |
| "train_speed(iter/s)": 0.068733 |
| }, |
| { |
| "epoch": 0.7494145199063232, |
| "grad_norm": 0.20919020473957062, |
| "learning_rate": 1.7900996103396772e-05, |
| "loss": 0.36686708927154543, |
| "memory(GiB)": 133.05, |
| "step": 640, |
| "token_acc": 0.8738849498577591, |
| "train_speed(iter/s)": 0.068733 |
| }, |
| { |
| "epoch": 0.7552693208430913, |
| "grad_norm": 0.2190757542848587, |
| "learning_rate": 1.7861256523329634e-05, |
| "loss": 0.3648522853851318, |
| "memory(GiB)": 133.05, |
| "step": 645, |
| "token_acc": 0.8633769063180828, |
| "train_speed(iter/s)": 0.068726 |
| }, |
| { |
| "epoch": 0.7611241217798594, |
| "grad_norm": 0.2133089303970337, |
| "learning_rate": 1.7821189266234647e-05, |
| "loss": 0.3695883274078369, |
| "memory(GiB)": 133.05, |
| "step": 650, |
| "token_acc": 0.86513161996683, |
| "train_speed(iter/s)": 0.068724 |
| }, |
| { |
| "epoch": 0.7669789227166276, |
| "grad_norm": 0.21529735624790192, |
| "learning_rate": 1.7780796002216285e-05, |
| "loss": 0.36347646713256837, |
| "memory(GiB)": 133.05, |
| "step": 655, |
| "token_acc": 0.8618205939317708, |
| "train_speed(iter/s)": 0.068726 |
| }, |
| { |
| "epoch": 0.7728337236533958, |
| "grad_norm": 0.2055824100971222, |
| "learning_rate": 1.7740078414967817e-05, |
| "loss": 0.3710654258728027, |
| "memory(GiB)": 133.05, |
| "step": 660, |
| "token_acc": 0.876207411310151, |
| "train_speed(iter/s)": 0.06873 |
| }, |
| { |
| "epoch": 0.7786885245901639, |
| "grad_norm": 0.20337671041488647, |
| "learning_rate": 1.7699038201701132e-05, |
| "loss": 0.363714861869812, |
| "memory(GiB)": 133.05, |
| "step": 665, |
| "token_acc": 0.8631361610960301, |
| "train_speed(iter/s)": 0.068736 |
| }, |
| { |
| "epoch": 0.7845433255269321, |
| "grad_norm": 0.2067345827817917, |
| "learning_rate": 1.7657677073075968e-05, |
| "loss": 0.3705836296081543, |
| "memory(GiB)": 133.05, |
| "step": 670, |
| "token_acc": 0.8667006816477769, |
| "train_speed(iter/s)": 0.068738 |
| }, |
| { |
| "epoch": 0.7903981264637002, |
| "grad_norm": 0.20614713430404663, |
| "learning_rate": 1.761599675312864e-05, |
| "loss": 0.37332298755645754, |
| "memory(GiB)": 133.05, |
| "step": 675, |
| "token_acc": 0.8799103822873227, |
| "train_speed(iter/s)": 0.068737 |
| }, |
| { |
| "epoch": 0.7962529274004684, |
| "grad_norm": 0.21380652487277985, |
| "learning_rate": 1.7573998979200163e-05, |
| "loss": 0.36742873191833497, |
| "memory(GiB)": 133.05, |
| "step": 680, |
| "token_acc": 0.8703528431892178, |
| "train_speed(iter/s)": 0.068746 |
| }, |
| { |
| "epoch": 0.8021077283372365, |
| "grad_norm": 0.19453544914722443, |
| "learning_rate": 1.753168550186383e-05, |
| "loss": 0.37564864158630373, |
| "memory(GiB)": 133.05, |
| "step": 685, |
| "token_acc": 0.8728679647922801, |
| "train_speed(iter/s)": 0.068749 |
| }, |
| { |
| "epoch": 0.8079625292740047, |
| "grad_norm": 0.19713324308395386, |
| "learning_rate": 1.7489058084852247e-05, |
| "loss": 0.37057785987854003, |
| "memory(GiB)": 133.05, |
| "step": 690, |
| "token_acc": 0.8620633488698441, |
| "train_speed(iter/s)": 0.068747 |
| }, |
| { |
| "epoch": 0.8138173302107728, |
| "grad_norm": 0.20321306586265564, |
| "learning_rate": 1.744611850498383e-05, |
| "loss": 0.3668221950531006, |
| "memory(GiB)": 133.05, |
| "step": 695, |
| "token_acc": 0.8725824053835161, |
| "train_speed(iter/s)": 0.068741 |
| }, |
| { |
| "epoch": 0.819672131147541, |
| "grad_norm": 0.22502325475215912, |
| "learning_rate": 1.7402868552088724e-05, |
| "loss": 0.3616886854171753, |
| "memory(GiB)": 133.05, |
| "step": 700, |
| "token_acc": 0.8672900381533646, |
| "train_speed(iter/s)": 0.068742 |
| }, |
| { |
| "epoch": 0.8255269320843092, |
| "grad_norm": 0.206443652510643, |
| "learning_rate": 1.73593100289342e-05, |
| "loss": 0.36960477828979493, |
| "memory(GiB)": 133.05, |
| "step": 705, |
| "token_acc": 0.8645310315863375, |
| "train_speed(iter/s)": 0.068749 |
| }, |
| { |
| "epoch": 0.8313817330210773, |
| "grad_norm": 0.2609001696109772, |
| "learning_rate": 1.7315444751149533e-05, |
| "loss": 0.3676512956619263, |
| "memory(GiB)": 133.05, |
| "step": 710, |
| "token_acc": 0.8703732566911265, |
| "train_speed(iter/s)": 0.068756 |
| }, |
| { |
| "epoch": 0.8372365339578455, |
| "grad_norm": 0.20213671028614044, |
| "learning_rate": 1.727127454715029e-05, |
| "loss": 0.36738247871398927, |
| "memory(GiB)": 133.05, |
| "step": 715, |
| "token_acc": 0.8776044347530407, |
| "train_speed(iter/s)": 0.068761 |
| }, |
| { |
| "epoch": 0.8430913348946136, |
| "grad_norm": 0.2078767567873001, |
| "learning_rate": 1.722680125806214e-05, |
| "loss": 0.3677778720855713, |
| "memory(GiB)": 133.05, |
| "step": 720, |
| "token_acc": 0.8627296514081535, |
| "train_speed(iter/s)": 0.068763 |
| }, |
| { |
| "epoch": 0.8489461358313818, |
| "grad_norm": 0.22138644754886627, |
| "learning_rate": 1.71820267376441e-05, |
| "loss": 0.37197351455688477, |
| "memory(GiB)": 133.05, |
| "step": 725, |
| "token_acc": 0.8676777818660314, |
| "train_speed(iter/s)": 0.068766 |
| }, |
| { |
| "epoch": 0.8548009367681498, |
| "grad_norm": 0.21397338807582855, |
| "learning_rate": 1.7136952852211274e-05, |
| "loss": 0.37579007148742677, |
| "memory(GiB)": 133.05, |
| "step": 730, |
| "token_acc": 0.8572162173097093, |
| "train_speed(iter/s)": 0.068772 |
| }, |
| { |
| "epoch": 0.860655737704918, |
| "grad_norm": 0.20828036963939667, |
| "learning_rate": 1.7091581480557057e-05, |
| "loss": 0.3636088132858276, |
| "memory(GiB)": 133.05, |
| "step": 735, |
| "token_acc": 0.8666745722408246, |
| "train_speed(iter/s)": 0.068774 |
| }, |
| { |
| "epoch": 0.8665105386416861, |
| "grad_norm": 0.21285265684127808, |
| "learning_rate": 1.7045914513874815e-05, |
| "loss": 0.37646629810333254, |
| "memory(GiB)": 133.05, |
| "step": 740, |
| "token_acc": 0.8666506652036757, |
| "train_speed(iter/s)": 0.068785 |
| }, |
| { |
| "epoch": 0.8723653395784543, |
| "grad_norm": 0.19855837523937225, |
| "learning_rate": 1.699995385567907e-05, |
| "loss": 0.37862300872802734, |
| "memory(GiB)": 133.05, |
| "step": 745, |
| "token_acc": 0.8584255151366506, |
| "train_speed(iter/s)": 0.068799 |
| }, |
| { |
| "epoch": 0.8782201405152225, |
| "grad_norm": 0.21356073021888733, |
| "learning_rate": 1.695370142172614e-05, |
| "loss": 0.370495080947876, |
| "memory(GiB)": 133.05, |
| "step": 750, |
| "token_acc": 0.8650399529081709, |
| "train_speed(iter/s)": 0.068798 |
| }, |
| { |
| "epoch": 0.8840749414519906, |
| "grad_norm": 0.21858234703540802, |
| "learning_rate": 1.690715913993429e-05, |
| "loss": 0.3731105089187622, |
| "memory(GiB)": 133.05, |
| "step": 755, |
| "token_acc": 0.8690419204765525, |
| "train_speed(iter/s)": 0.068799 |
| }, |
| { |
| "epoch": 0.8899297423887588, |
| "grad_norm": 0.21877680718898773, |
| "learning_rate": 1.6860328950303392e-05, |
| "loss": 0.3532438039779663, |
| "memory(GiB)": 133.05, |
| "step": 760, |
| "token_acc": 0.8752962281074447, |
| "train_speed(iter/s)": 0.068803 |
| }, |
| { |
| "epoch": 0.8957845433255269, |
| "grad_norm": 0.2116468995809555, |
| "learning_rate": 1.6813212804834033e-05, |
| "loss": 0.3690504550933838, |
| "memory(GiB)": 133.05, |
| "step": 765, |
| "token_acc": 0.861989263346257, |
| "train_speed(iter/s)": 0.068807 |
| }, |
| { |
| "epoch": 0.9016393442622951, |
| "grad_norm": 0.20343121886253357, |
| "learning_rate": 1.676581266744615e-05, |
| "loss": 0.3611701488494873, |
| "memory(GiB)": 133.05, |
| "step": 770, |
| "token_acc": 0.8671105242834544, |
| "train_speed(iter/s)": 0.06881 |
| }, |
| { |
| "epoch": 0.9074941451990632, |
| "grad_norm": 0.19857962429523468, |
| "learning_rate": 1.6718130513897207e-05, |
| "loss": 0.3600625038146973, |
| "memory(GiB)": 133.05, |
| "step": 775, |
| "token_acc": 0.8728194751658959, |
| "train_speed(iter/s)": 0.068813 |
| }, |
| { |
| "epoch": 0.9133489461358314, |
| "grad_norm": 0.23387958109378815, |
| "learning_rate": 1.667016833169979e-05, |
| "loss": 0.3759610176086426, |
| "memory(GiB)": 133.05, |
| "step": 780, |
| "token_acc": 0.8710922399514741, |
| "train_speed(iter/s)": 0.068813 |
| }, |
| { |
| "epoch": 0.9192037470725996, |
| "grad_norm": 0.2053619623184204, |
| "learning_rate": 1.6621928120038806e-05, |
| "loss": 0.36916725635528563, |
| "memory(GiB)": 133.05, |
| "step": 785, |
| "token_acc": 0.8602640020509871, |
| "train_speed(iter/s)": 0.068813 |
| }, |
| { |
| "epoch": 0.9250585480093677, |
| "grad_norm": 0.20847375690937042, |
| "learning_rate": 1.657341188968811e-05, |
| "loss": 0.36096744537353515, |
| "memory(GiB)": 133.05, |
| "step": 790, |
| "token_acc": 0.8631381808792282, |
| "train_speed(iter/s)": 0.068819 |
| }, |
| { |
| "epoch": 0.9309133489461359, |
| "grad_norm": 0.20935416221618652, |
| "learning_rate": 1.6524621662926733e-05, |
| "loss": 0.3602827310562134, |
| "memory(GiB)": 133.05, |
| "step": 795, |
| "token_acc": 0.8806607875578047, |
| "train_speed(iter/s)": 0.068825 |
| }, |
| { |
| "epoch": 0.936768149882904, |
| "grad_norm": 0.214552640914917, |
| "learning_rate": 1.6475559473454558e-05, |
| "loss": 0.369510293006897, |
| "memory(GiB)": 133.05, |
| "step": 800, |
| "token_acc": 0.8770849556632923, |
| "train_speed(iter/s)": 0.068828 |
| }, |
| { |
| "epoch": 0.9426229508196722, |
| "grad_norm": 0.21994450688362122, |
| "learning_rate": 1.6426227366307563e-05, |
| "loss": 0.37307014465332033, |
| "memory(GiB)": 133.05, |
| "step": 805, |
| "token_acc": 0.876770090527487, |
| "train_speed(iter/s)": 0.068823 |
| }, |
| { |
| "epoch": 0.9484777517564403, |
| "grad_norm": 0.20645499229431152, |
| "learning_rate": 1.6376627397772576e-05, |
| "loss": 0.37114017009735106, |
| "memory(GiB)": 133.05, |
| "step": 810, |
| "token_acc": 0.8619496040676315, |
| "train_speed(iter/s)": 0.068823 |
| }, |
| { |
| "epoch": 0.9543325526932084, |
| "grad_norm": 0.2126459777355194, |
| "learning_rate": 1.6326761635301572e-05, |
| "loss": 0.3650930166244507, |
| "memory(GiB)": 133.05, |
| "step": 815, |
| "token_acc": 0.870646124823141, |
| "train_speed(iter/s)": 0.068826 |
| }, |
| { |
| "epoch": 0.9601873536299765, |
| "grad_norm": 0.20105397701263428, |
| "learning_rate": 1.6276632157425475e-05, |
| "loss": 0.37223210334777834, |
| "memory(GiB)": 133.05, |
| "step": 820, |
| "token_acc": 0.8648889553764547, |
| "train_speed(iter/s)": 0.068826 |
| }, |
| { |
| "epoch": 0.9660421545667447, |
| "grad_norm": 0.2080501765012741, |
| "learning_rate": 1.6226241053667536e-05, |
| "loss": 0.37712783813476564, |
| "memory(GiB)": 133.05, |
| "step": 825, |
| "token_acc": 0.8605132566814988, |
| "train_speed(iter/s)": 0.06883 |
| }, |
| { |
| "epoch": 0.9718969555035128, |
| "grad_norm": 0.2141636610031128, |
| "learning_rate": 1.617559042445625e-05, |
| "loss": 0.37673077583312986, |
| "memory(GiB)": 133.05, |
| "step": 830, |
| "token_acc": 0.8719900238096734, |
| "train_speed(iter/s)": 0.06883 |
| }, |
| { |
| "epoch": 0.977751756440281, |
| "grad_norm": 0.21488763391971588, |
| "learning_rate": 1.6124682381037767e-05, |
| "loss": 0.3640845537185669, |
| "memory(GiB)": 133.05, |
| "step": 835, |
| "token_acc": 0.8693016352169747, |
| "train_speed(iter/s)": 0.068834 |
| }, |
| { |
| "epoch": 0.9836065573770492, |
| "grad_norm": 0.22521890699863434, |
| "learning_rate": 1.607351904538792e-05, |
| "loss": 0.3786426782608032, |
| "memory(GiB)": 133.05, |
| "step": 840, |
| "token_acc": 0.86982781737791, |
| "train_speed(iter/s)": 0.068827 |
| }, |
| { |
| "epoch": 0.9894613583138173, |
| "grad_norm": 0.2129945307970047, |
| "learning_rate": 1.6022102550123775e-05, |
| "loss": 0.365330171585083, |
| "memory(GiB)": 133.05, |
| "step": 845, |
| "token_acc": 0.864430874708757, |
| "train_speed(iter/s)": 0.06883 |
| }, |
| { |
| "epoch": 0.9953161592505855, |
| "grad_norm": 0.216830313205719, |
| "learning_rate": 1.597043503841471e-05, |
| "loss": 0.3653510093688965, |
| "memory(GiB)": 133.05, |
| "step": 850, |
| "token_acc": 0.878798859209881, |
| "train_speed(iter/s)": 0.068829 |
| }, |
| { |
| "epoch": 1.0011709601873535, |
| "grad_norm": 0.2833782732486725, |
| "learning_rate": 1.5918518663893124e-05, |
| "loss": 0.35915145874023435, |
| "memory(GiB)": 133.05, |
| "step": 855, |
| "token_acc": 0.873855282676776, |
| "train_speed(iter/s)": 0.068529 |
| }, |
| { |
| "epoch": 1.0070257611241218, |
| "grad_norm": 0.24765369296073914, |
| "learning_rate": 1.5866355590564637e-05, |
| "loss": 0.3397256851196289, |
| "memory(GiB)": 133.05, |
| "step": 860, |
| "token_acc": 0.8892689705247213, |
| "train_speed(iter/s)": 0.068517 |
| }, |
| { |
| "epoch": 1.0128805620608898, |
| "grad_norm": 0.2325168401002884, |
| "learning_rate": 1.5813947992717894e-05, |
| "loss": 0.327287483215332, |
| "memory(GiB)": 133.05, |
| "step": 865, |
| "token_acc": 0.8796502265193716, |
| "train_speed(iter/s)": 0.068508 |
| }, |
| { |
| "epoch": 1.018735362997658, |
| "grad_norm": 0.2461637407541275, |
| "learning_rate": 1.5761298054833947e-05, |
| "loss": 0.3370250701904297, |
| "memory(GiB)": 133.05, |
| "step": 870, |
| "token_acc": 0.8818223536926445, |
| "train_speed(iter/s)": 0.068498 |
| }, |
| { |
| "epoch": 1.0245901639344261, |
| "grad_norm": 0.22223389148712158, |
| "learning_rate": 1.5708407971495195e-05, |
| "loss": 0.3431839942932129, |
| "memory(GiB)": 133.05, |
| "step": 875, |
| "token_acc": 0.8771204606261637, |
| "train_speed(iter/s)": 0.068498 |
| }, |
| { |
| "epoch": 1.0304449648711944, |
| "grad_norm": 0.22983962297439575, |
| "learning_rate": 1.565527994729389e-05, |
| "loss": 0.333197808265686, |
| "memory(GiB)": 133.05, |
| "step": 880, |
| "token_acc": 0.8869119581976505, |
| "train_speed(iter/s)": 0.068497 |
| }, |
| { |
| "epoch": 1.0362997658079625, |
| "grad_norm": 0.21161960065364838, |
| "learning_rate": 1.5601916196740283e-05, |
| "loss": 0.32940354347229006, |
| "memory(GiB)": 133.05, |
| "step": 885, |
| "token_acc": 0.8834938944853924, |
| "train_speed(iter/s)": 0.068496 |
| }, |
| { |
| "epoch": 1.0421545667447307, |
| "grad_norm": 0.22903162240982056, |
| "learning_rate": 1.5548318944170276e-05, |
| "loss": 0.3256603956222534, |
| "memory(GiB)": 133.05, |
| "step": 890, |
| "token_acc": 0.8883952211008513, |
| "train_speed(iter/s)": 0.068494 |
| }, |
| { |
| "epoch": 1.0480093676814988, |
| "grad_norm": 0.21301260590553284, |
| "learning_rate": 1.5494490423652732e-05, |
| "loss": 0.3253190040588379, |
| "memory(GiB)": 133.05, |
| "step": 895, |
| "token_acc": 0.8813899275623074, |
| "train_speed(iter/s)": 0.068483 |
| }, |
| { |
| "epoch": 1.053864168618267, |
| "grad_norm": 0.2047208845615387, |
| "learning_rate": 1.544043287889635e-05, |
| "loss": 0.31666491031646726, |
| "memory(GiB)": 133.05, |
| "step": 900, |
| "token_acc": 0.8909019236833806, |
| "train_speed(iter/s)": 0.068481 |
| }, |
| { |
| "epoch": 1.059718969555035, |
| "grad_norm": 0.23390096426010132, |
| "learning_rate": 1.538614856315614e-05, |
| "loss": 0.330989408493042, |
| "memory(GiB)": 133.05, |
| "step": 905, |
| "token_acc": 0.8884555161039297, |
| "train_speed(iter/s)": 0.068482 |
| }, |
| { |
| "epoch": 1.0655737704918034, |
| "grad_norm": 0.20488137006759644, |
| "learning_rate": 1.5331639739139477e-05, |
| "loss": 0.3256430149078369, |
| "memory(GiB)": 133.05, |
| "step": 910, |
| "token_acc": 0.8721090848001792, |
| "train_speed(iter/s)": 0.068473 |
| }, |
| { |
| "epoch": 1.0714285714285714, |
| "grad_norm": 0.21736453473567963, |
| "learning_rate": 1.5276908678911837e-05, |
| "loss": 0.3228193521499634, |
| "memory(GiB)": 133.05, |
| "step": 915, |
| "token_acc": 0.8874282476871164, |
| "train_speed(iter/s)": 0.068469 |
| }, |
| { |
| "epoch": 1.0772833723653397, |
| "grad_norm": 0.206723153591156, |
| "learning_rate": 1.5221957663802043e-05, |
| "loss": 0.3333425521850586, |
| "memory(GiB)": 133.17, |
| "step": 920, |
| "token_acc": 0.886105330059943, |
| "train_speed(iter/s)": 0.068454 |
| }, |
| { |
| "epoch": 1.0831381733021077, |
| "grad_norm": 0.203144371509552, |
| "learning_rate": 1.5166788984307204e-05, |
| "loss": 0.33838639259338377, |
| "memory(GiB)": 133.17, |
| "step": 925, |
| "token_acc": 0.8802329092899476, |
| "train_speed(iter/s)": 0.068444 |
| }, |
| { |
| "epoch": 1.088992974238876, |
| "grad_norm": 0.24915394186973572, |
| "learning_rate": 1.5111404939997227e-05, |
| "loss": 0.33564419746398927, |
| "memory(GiB)": 133.17, |
| "step": 930, |
| "token_acc": 0.8793440099130728, |
| "train_speed(iter/s)": 0.068442 |
| }, |
| { |
| "epoch": 1.094847775175644, |
| "grad_norm": 0.2503604292869568, |
| "learning_rate": 1.5055807839418966e-05, |
| "loss": 0.3157151460647583, |
| "memory(GiB)": 133.17, |
| "step": 935, |
| "token_acc": 0.8862683405108546, |
| "train_speed(iter/s)": 0.068436 |
| }, |
| { |
| "epoch": 1.100702576112412, |
| "grad_norm": 0.20239044725894928, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 0.3377982139587402, |
| "memory(GiB)": 133.17, |
| "step": 940, |
| "token_acc": 0.8886980901742478, |
| "train_speed(iter/s)": 0.068436 |
| }, |
| { |
| "epoch": 1.1065573770491803, |
| "grad_norm": 0.20267418026924133, |
| "learning_rate": 1.494398374795204e-05, |
| "loss": 0.3253162145614624, |
| "memory(GiB)": 133.17, |
| "step": 945, |
| "token_acc": 0.8780125495417973, |
| "train_speed(iter/s)": 0.068437 |
| }, |
| { |
| "epoch": 1.1124121779859484, |
| "grad_norm": 0.2210346758365631, |
| "learning_rate": 1.4887761418173947e-05, |
| "loss": 0.3438437461853027, |
| "memory(GiB)": 133.17, |
| "step": 950, |
| "token_acc": 0.8874266802316089, |
| "train_speed(iter/s)": 0.068436 |
| }, |
| { |
| "epoch": 1.1182669789227166, |
| "grad_norm": 0.206399604678154, |
| "learning_rate": 1.4831335354154444e-05, |
| "loss": 0.3289347648620605, |
| "memory(GiB)": 133.17, |
| "step": 955, |
| "token_acc": 0.8831363419858116, |
| "train_speed(iter/s)": 0.068436 |
| }, |
| { |
| "epoch": 1.1241217798594847, |
| "grad_norm": 0.21163643896579742, |
| "learning_rate": 1.4774707907874392e-05, |
| "loss": 0.32750353813171384, |
| "memory(GiB)": 133.17, |
| "step": 960, |
| "token_acc": 0.8880904228882937, |
| "train_speed(iter/s)": 0.068427 |
| }, |
| { |
| "epoch": 1.129976580796253, |
| "grad_norm": 0.20707455277442932, |
| "learning_rate": 1.4717881439708786e-05, |
| "loss": 0.3284764289855957, |
| "memory(GiB)": 133.17, |
| "step": 965, |
| "token_acc": 0.8722379691636817, |
| "train_speed(iter/s)": 0.068425 |
| }, |
| { |
| "epoch": 1.135831381733021, |
| "grad_norm": 0.2046642154455185, |
| "learning_rate": 1.4660858318328348e-05, |
| "loss": 0.3317260265350342, |
| "memory(GiB)": 133.17, |
| "step": 970, |
| "token_acc": 0.8710549063749603, |
| "train_speed(iter/s)": 0.068419 |
| }, |
| { |
| "epoch": 1.1416861826697893, |
| "grad_norm": 0.20032472908496857, |
| "learning_rate": 1.4603640920600813e-05, |
| "loss": 0.33744547367095945, |
| "memory(GiB)": 133.17, |
| "step": 975, |
| "token_acc": 0.8676646558084457, |
| "train_speed(iter/s)": 0.068416 |
| }, |
| { |
| "epoch": 1.1475409836065573, |
| "grad_norm": 0.20992988348007202, |
| "learning_rate": 1.4546231631491827e-05, |
| "loss": 0.3331944704055786, |
| "memory(GiB)": 133.17, |
| "step": 980, |
| "token_acc": 0.8770167266237555, |
| "train_speed(iter/s)": 0.068406 |
| }, |
| { |
| "epoch": 1.1533957845433256, |
| "grad_norm": 0.2045455127954483, |
| "learning_rate": 1.4488632843965573e-05, |
| "loss": 0.32609896659851073, |
| "memory(GiB)": 133.17, |
| "step": 985, |
| "token_acc": 0.8671518193224592, |
| "train_speed(iter/s)": 0.068401 |
| }, |
| { |
| "epoch": 1.1592505854800936, |
| "grad_norm": 0.21106521785259247, |
| "learning_rate": 1.4430846958884995e-05, |
| "loss": 0.3347620010375977, |
| "memory(GiB)": 133.17, |
| "step": 990, |
| "token_acc": 0.8760981150071534, |
| "train_speed(iter/s)": 0.068396 |
| }, |
| { |
| "epoch": 1.165105386416862, |
| "grad_norm": 0.2021251767873764, |
| "learning_rate": 1.4372876384911741e-05, |
| "loss": 0.33538064956665037, |
| "memory(GiB)": 133.17, |
| "step": 995, |
| "token_acc": 0.8768674285536101, |
| "train_speed(iter/s)": 0.068392 |
| }, |
| { |
| "epoch": 1.17096018735363, |
| "grad_norm": 0.22672772407531738, |
| "learning_rate": 1.4314723538405752e-05, |
| "loss": 0.3422734260559082, |
| "memory(GiB)": 133.17, |
| "step": 1000, |
| "token_acc": 0.8671538988967151, |
| "train_speed(iter/s)": 0.068389 |
| }, |
| { |
| "epoch": 1.1768149882903982, |
| "grad_norm": 0.2139746993780136, |
| "learning_rate": 1.4256390843324556e-05, |
| "loss": 0.3371597766876221, |
| "memory(GiB)": 133.17, |
| "step": 1005, |
| "token_acc": 0.8732182530767119, |
| "train_speed(iter/s)": 0.068388 |
| }, |
| { |
| "epoch": 1.1826697892271663, |
| "grad_norm": 0.21347731351852417, |
| "learning_rate": 1.4197880731122221e-05, |
| "loss": 0.3339057922363281, |
| "memory(GiB)": 133.17, |
| "step": 1010, |
| "token_acc": 0.8729292778317514, |
| "train_speed(iter/s)": 0.06839 |
| }, |
| { |
| "epoch": 1.1885245901639343, |
| "grad_norm": 0.21436652541160583, |
| "learning_rate": 1.4139195640648008e-05, |
| "loss": 0.3371711730957031, |
| "memory(GiB)": 133.17, |
| "step": 1015, |
| "token_acc": 0.8857815368682034, |
| "train_speed(iter/s)": 0.068385 |
| }, |
| { |
| "epoch": 1.1943793911007026, |
| "grad_norm": 0.21145156025886536, |
| "learning_rate": 1.4080338018044712e-05, |
| "loss": 0.3415823459625244, |
| "memory(GiB)": 133.17, |
| "step": 1020, |
| "token_acc": 0.8745781005321704, |
| "train_speed(iter/s)": 0.068382 |
| }, |
| { |
| "epoch": 1.2002341920374708, |
| "grad_norm": 0.2704923748970032, |
| "learning_rate": 1.4021310316646708e-05, |
| "loss": 0.33098018169403076, |
| "memory(GiB)": 133.17, |
| "step": 1025, |
| "token_acc": 0.8810291608110821, |
| "train_speed(iter/s)": 0.06838 |
| }, |
| { |
| "epoch": 1.2060889929742389, |
| "grad_norm": 0.20703041553497314, |
| "learning_rate": 1.3962114996877685e-05, |
| "loss": 0.3177175045013428, |
| "memory(GiB)": 133.17, |
| "step": 1030, |
| "token_acc": 0.8884392410781509, |
| "train_speed(iter/s)": 0.068376 |
| }, |
| { |
| "epoch": 1.211943793911007, |
| "grad_norm": 0.20425967872142792, |
| "learning_rate": 1.390275452614808e-05, |
| "loss": 0.3208155155181885, |
| "memory(GiB)": 133.17, |
| "step": 1035, |
| "token_acc": 0.8798795706976164, |
| "train_speed(iter/s)": 0.068375 |
| }, |
| { |
| "epoch": 1.2177985948477752, |
| "grad_norm": 0.2199791669845581, |
| "learning_rate": 1.3843231378752252e-05, |
| "loss": 0.32726430892944336, |
| "memory(GiB)": 133.17, |
| "step": 1040, |
| "token_acc": 0.8785451315143307, |
| "train_speed(iter/s)": 0.068367 |
| }, |
| { |
| "epoch": 1.2236533957845432, |
| "grad_norm": 0.22237712144851685, |
| "learning_rate": 1.3783548035765327e-05, |
| "loss": 0.33181195259094237, |
| "memory(GiB)": 133.17, |
| "step": 1045, |
| "token_acc": 0.8834801207851032, |
| "train_speed(iter/s)": 0.068368 |
| }, |
| { |
| "epoch": 1.2295081967213115, |
| "grad_norm": 0.20910513401031494, |
| "learning_rate": 1.3723706984939783e-05, |
| "loss": 0.3189753532409668, |
| "memory(GiB)": 133.17, |
| "step": 1050, |
| "token_acc": 0.8769508605389209, |
| "train_speed(iter/s)": 0.068364 |
| }, |
| { |
| "epoch": 1.2353629976580796, |
| "grad_norm": 0.20491260290145874, |
| "learning_rate": 1.366371072060177e-05, |
| "loss": 0.33074491024017333, |
| "memory(GiB)": 133.17, |
| "step": 1055, |
| "token_acc": 0.8681569771445384, |
| "train_speed(iter/s)": 0.068361 |
| }, |
| { |
| "epoch": 1.2412177985948478, |
| "grad_norm": 0.1918231099843979, |
| "learning_rate": 1.3603561743547125e-05, |
| "loss": 0.3256643772125244, |
| "memory(GiB)": 133.17, |
| "step": 1060, |
| "token_acc": 0.8732954670333983, |
| "train_speed(iter/s)": 0.068363 |
| }, |
| { |
| "epoch": 1.2470725995316159, |
| "grad_norm": 0.21773004531860352, |
| "learning_rate": 1.3543262560937135e-05, |
| "loss": 0.33045885562896726, |
| "memory(GiB)": 133.17, |
| "step": 1065, |
| "token_acc": 0.8785313558157261, |
| "train_speed(iter/s)": 0.068363 |
| }, |
| { |
| "epoch": 1.2529274004683841, |
| "grad_norm": 0.21782302856445312, |
| "learning_rate": 1.3482815686194033e-05, |
| "loss": 0.3164831161499023, |
| "memory(GiB)": 133.17, |
| "step": 1070, |
| "token_acc": 0.8841838807462733, |
| "train_speed(iter/s)": 0.068363 |
| }, |
| { |
| "epoch": 1.2587822014051522, |
| "grad_norm": 0.21324488520622253, |
| "learning_rate": 1.3422223638896235e-05, |
| "loss": 0.32593531608581544, |
| "memory(GiB)": 133.17, |
| "step": 1075, |
| "token_acc": 0.8798167525312546, |
| "train_speed(iter/s)": 0.068363 |
| }, |
| { |
| "epoch": 1.2646370023419204, |
| "grad_norm": 0.22865289449691772, |
| "learning_rate": 1.3361488944673315e-05, |
| "loss": 0.3352835178375244, |
| "memory(GiB)": 133.17, |
| "step": 1080, |
| "token_acc": 0.8729886330661392, |
| "train_speed(iter/s)": 0.068362 |
| }, |
| { |
| "epoch": 1.2704918032786885, |
| "grad_norm": 0.20328956842422485, |
| "learning_rate": 1.3300614135100736e-05, |
| "loss": 0.332173490524292, |
| "memory(GiB)": 133.17, |
| "step": 1085, |
| "token_acc": 0.8806762689525037, |
| "train_speed(iter/s)": 0.068357 |
| }, |
| { |
| "epoch": 1.2763466042154565, |
| "grad_norm": 0.19926570355892181, |
| "learning_rate": 1.3239601747594319e-05, |
| "loss": 0.331054162979126, |
| "memory(GiB)": 133.17, |
| "step": 1090, |
| "token_acc": 0.8812650906933006, |
| "train_speed(iter/s)": 0.068351 |
| }, |
| { |
| "epoch": 1.2822014051522248, |
| "grad_norm": 0.19676311314105988, |
| "learning_rate": 1.3178454325304472e-05, |
| "loss": 0.33361315727233887, |
| "memory(GiB)": 133.17, |
| "step": 1095, |
| "token_acc": 0.8700881415265362, |
| "train_speed(iter/s)": 0.068351 |
| }, |
| { |
| "epoch": 1.288056206088993, |
| "grad_norm": 0.20788326859474182, |
| "learning_rate": 1.3117174417010213e-05, |
| "loss": 0.31841249465942384, |
| "memory(GiB)": 133.17, |
| "step": 1100, |
| "token_acc": 0.8749374970517477, |
| "train_speed(iter/s)": 0.06835 |
| }, |
| { |
| "epoch": 1.2939110070257611, |
| "grad_norm": 0.21633991599082947, |
| "learning_rate": 1.3055764577012892e-05, |
| "loss": 0.34844322204589845, |
| "memory(GiB)": 133.17, |
| "step": 1105, |
| "token_acc": 0.8857762459338606, |
| "train_speed(iter/s)": 0.068351 |
| }, |
| { |
| "epoch": 1.2997658079625292, |
| "grad_norm": 0.2159479707479477, |
| "learning_rate": 1.2994227365029752e-05, |
| "loss": 0.32929096221923826, |
| "memory(GiB)": 133.17, |
| "step": 1110, |
| "token_acc": 0.8831624401350396, |
| "train_speed(iter/s)": 0.06835 |
| }, |
| { |
| "epoch": 1.3056206088992974, |
| "grad_norm": 0.21510519087314606, |
| "learning_rate": 1.2932565346087218e-05, |
| "loss": 0.33609514236450194, |
| "memory(GiB)": 133.17, |
| "step": 1115, |
| "token_acc": 0.8789613142554319, |
| "train_speed(iter/s)": 0.068346 |
| }, |
| { |
| "epoch": 1.3114754098360657, |
| "grad_norm": 0.19823956489562988, |
| "learning_rate": 1.2870781090413991e-05, |
| "loss": 0.3340220212936401, |
| "memory(GiB)": 133.17, |
| "step": 1120, |
| "token_acc": 0.8802133820301311, |
| "train_speed(iter/s)": 0.068343 |
| }, |
| { |
| "epoch": 1.3173302107728337, |
| "grad_norm": 0.19969677925109863, |
| "learning_rate": 1.2808877173333896e-05, |
| "loss": 0.32896521091461184, |
| "memory(GiB)": 133.17, |
| "step": 1125, |
| "token_acc": 0.8884312591176619, |
| "train_speed(iter/s)": 0.068342 |
| }, |
| { |
| "epoch": 1.3231850117096018, |
| "grad_norm": 0.19414611160755157, |
| "learning_rate": 1.2746856175158556e-05, |
| "loss": 0.33699817657470704, |
| "memory(GiB)": 133.17, |
| "step": 1130, |
| "token_acc": 0.8808933080116763, |
| "train_speed(iter/s)": 0.068346 |
| }, |
| { |
| "epoch": 1.32903981264637, |
| "grad_norm": 0.20659878849983215, |
| "learning_rate": 1.2684720681079825e-05, |
| "loss": 0.33256163597106936, |
| "memory(GiB)": 133.17, |
| "step": 1135, |
| "token_acc": 0.8659905808672699, |
| "train_speed(iter/s)": 0.068345 |
| }, |
| { |
| "epoch": 1.334894613583138, |
| "grad_norm": 0.21766500174999237, |
| "learning_rate": 1.2622473281062042e-05, |
| "loss": 0.3360875129699707, |
| "memory(GiB)": 133.17, |
| "step": 1140, |
| "token_acc": 0.8805351128851191, |
| "train_speed(iter/s)": 0.068346 |
| }, |
| { |
| "epoch": 1.3407494145199064, |
| "grad_norm": 0.21836382150650024, |
| "learning_rate": 1.256011656973406e-05, |
| "loss": 0.3428370952606201, |
| "memory(GiB)": 133.17, |
| "step": 1145, |
| "token_acc": 0.882268280446507, |
| "train_speed(iter/s)": 0.068346 |
| }, |
| { |
| "epoch": 1.3466042154566744, |
| "grad_norm": 0.21305552124977112, |
| "learning_rate": 1.2497653146281113e-05, |
| "loss": 0.3323945999145508, |
| "memory(GiB)": 133.17, |
| "step": 1150, |
| "token_acc": 0.8799263041729795, |
| "train_speed(iter/s)": 0.06834 |
| }, |
| { |
| "epoch": 1.3524590163934427, |
| "grad_norm": 0.2115429788827896, |
| "learning_rate": 1.2435085614336459e-05, |
| "loss": 0.33839111328125, |
| "memory(GiB)": 133.17, |
| "step": 1155, |
| "token_acc": 0.8877846609149278, |
| "train_speed(iter/s)": 0.068336 |
| }, |
| { |
| "epoch": 1.3583138173302107, |
| "grad_norm": 0.20214448869228363, |
| "learning_rate": 1.2372416581872857e-05, |
| "loss": 0.3267178773880005, |
| "memory(GiB)": 133.17, |
| "step": 1160, |
| "token_acc": 0.8858182364221651, |
| "train_speed(iter/s)": 0.068334 |
| }, |
| { |
| "epoch": 1.364168618266979, |
| "grad_norm": 0.19922491908073425, |
| "learning_rate": 1.2309648661093878e-05, |
| "loss": 0.33157687187194823, |
| "memory(GiB)": 133.17, |
| "step": 1165, |
| "token_acc": 0.8862414604099004, |
| "train_speed(iter/s)": 0.068333 |
| }, |
| { |
| "epoch": 1.370023419203747, |
| "grad_norm": 0.20893344283103943, |
| "learning_rate": 1.2246784468324993e-05, |
| "loss": 0.3382421016693115, |
| "memory(GiB)": 133.17, |
| "step": 1170, |
| "token_acc": 0.8688796266876001, |
| "train_speed(iter/s)": 0.068327 |
| }, |
| { |
| "epoch": 1.3758782201405153, |
| "grad_norm": 0.219789519906044, |
| "learning_rate": 1.218382662390454e-05, |
| "loss": 0.3261989116668701, |
| "memory(GiB)": 133.17, |
| "step": 1175, |
| "token_acc": 0.8682563507122426, |
| "train_speed(iter/s)": 0.068328 |
| }, |
| { |
| "epoch": 1.3817330210772834, |
| "grad_norm": 0.2007785141468048, |
| "learning_rate": 1.2120777752074492e-05, |
| "loss": 0.33451414108276367, |
| "memory(GiB)": 133.17, |
| "step": 1180, |
| "token_acc": 0.8779171167786075, |
| "train_speed(iter/s)": 0.068325 |
| }, |
| { |
| "epoch": 1.3875878220140514, |
| "grad_norm": 0.20650921761989594, |
| "learning_rate": 1.2057640480871084e-05, |
| "loss": 0.33679168224334716, |
| "memory(GiB)": 133.17, |
| "step": 1185, |
| "token_acc": 0.8786453140578265, |
| "train_speed(iter/s)": 0.068325 |
| }, |
| { |
| "epoch": 1.3934426229508197, |
| "grad_norm": 0.20114493370056152, |
| "learning_rate": 1.1994417442015243e-05, |
| "loss": 0.33562412261962893, |
| "memory(GiB)": 133.17, |
| "step": 1190, |
| "token_acc": 0.8844727744979327, |
| "train_speed(iter/s)": 0.068325 |
| }, |
| { |
| "epoch": 1.399297423887588, |
| "grad_norm": 0.19498831033706665, |
| "learning_rate": 1.193111127080292e-05, |
| "loss": 0.3253043174743652, |
| "memory(GiB)": 133.17, |
| "step": 1195, |
| "token_acc": 0.8870853046866852, |
| "train_speed(iter/s)": 0.068326 |
| }, |
| { |
| "epoch": 1.405152224824356, |
| "grad_norm": 0.1827043890953064, |
| "learning_rate": 1.186772460599523e-05, |
| "loss": 0.3244746685028076, |
| "memory(GiB)": 133.17, |
| "step": 1200, |
| "token_acc": 0.8863151296717072, |
| "train_speed(iter/s)": 0.068323 |
| }, |
| { |
| "epoch": 1.411007025761124, |
| "grad_norm": 0.21396119892597198, |
| "learning_rate": 1.1804260089708464e-05, |
| "loss": 0.3355713367462158, |
| "memory(GiB)": 133.17, |
| "step": 1205, |
| "token_acc": 0.8714279485774079, |
| "train_speed(iter/s)": 0.068317 |
| }, |
| { |
| "epoch": 1.4168618266978923, |
| "grad_norm": 0.20849740505218506, |
| "learning_rate": 1.1740720367303958e-05, |
| "loss": 0.3293231725692749, |
| "memory(GiB)": 133.17, |
| "step": 1210, |
| "token_acc": 0.8799478293040041, |
| "train_speed(iter/s)": 0.068316 |
| }, |
| { |
| "epoch": 1.4227166276346606, |
| "grad_norm": 0.19985808432102203, |
| "learning_rate": 1.1677108087277835e-05, |
| "loss": 0.33586926460266114, |
| "memory(GiB)": 133.17, |
| "step": 1215, |
| "token_acc": 0.8803578911815663, |
| "train_speed(iter/s)": 0.068314 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 0.2120925784111023, |
| "learning_rate": 1.1613425901150595e-05, |
| "loss": 0.335320782661438, |
| "memory(GiB)": 133.17, |
| "step": 1220, |
| "token_acc": 0.8822237863291518, |
| "train_speed(iter/s)": 0.068311 |
| }, |
| { |
| "epoch": 1.4344262295081966, |
| "grad_norm": 0.20144475996494293, |
| "learning_rate": 1.15496764633566e-05, |
| "loss": 0.34459710121154785, |
| "memory(GiB)": 133.17, |
| "step": 1225, |
| "token_acc": 0.8714527101578114, |
| "train_speed(iter/s)": 0.068308 |
| }, |
| { |
| "epoch": 1.440281030444965, |
| "grad_norm": 0.1978883147239685, |
| "learning_rate": 1.1485862431133445e-05, |
| "loss": 0.334246826171875, |
| "memory(GiB)": 133.17, |
| "step": 1230, |
| "token_acc": 0.8850997230525071, |
| "train_speed(iter/s)": 0.068306 |
| }, |
| { |
| "epoch": 1.446135831381733, |
| "grad_norm": 0.20052959024906158, |
| "learning_rate": 1.1421986464411169e-05, |
| "loss": 0.33509197235107424, |
| "memory(GiB)": 133.17, |
| "step": 1235, |
| "token_acc": 0.8704968021392047, |
| "train_speed(iter/s)": 0.068299 |
| }, |
| { |
| "epoch": 1.4519906323185012, |
| "grad_norm": 0.19154983758926392, |
| "learning_rate": 1.1358051225701404e-05, |
| "loss": 0.32514162063598634, |
| "memory(GiB)": 133.17, |
| "step": 1240, |
| "token_acc": 0.8735484752584716, |
| "train_speed(iter/s)": 0.068296 |
| }, |
| { |
| "epoch": 1.4578454332552693, |
| "grad_norm": 0.20475593209266663, |
| "learning_rate": 1.1294059379986384e-05, |
| "loss": 0.33394522666931153, |
| "memory(GiB)": 133.17, |
| "step": 1245, |
| "token_acc": 0.8737611977698427, |
| "train_speed(iter/s)": 0.068289 |
| }, |
| { |
| "epoch": 1.4637002341920375, |
| "grad_norm": 0.20034635066986084, |
| "learning_rate": 1.1230013594607874e-05, |
| "loss": 0.33555524349212645, |
| "memory(GiB)": 133.17, |
| "step": 1250, |
| "token_acc": 0.8783339011605555, |
| "train_speed(iter/s)": 0.068288 |
| }, |
| { |
| "epoch": 1.4695550351288056, |
| "grad_norm": 0.206059530377388, |
| "learning_rate": 1.1165916539155968e-05, |
| "loss": 0.33289051055908203, |
| "memory(GiB)": 133.17, |
| "step": 1255, |
| "token_acc": 0.8821623108149916, |
| "train_speed(iter/s)": 0.068291 |
| }, |
| { |
| "epoch": 1.4754098360655736, |
| "grad_norm": 0.1955031454563141, |
| "learning_rate": 1.1101770885357843e-05, |
| "loss": 0.3284996509552002, |
| "memory(GiB)": 133.17, |
| "step": 1260, |
| "token_acc": 0.8824508468283658, |
| "train_speed(iter/s)": 0.068292 |
| }, |
| { |
| "epoch": 1.481264637002342, |
| "grad_norm": 0.18819548189640045, |
| "learning_rate": 1.1037579306966365e-05, |
| "loss": 0.32820711135864256, |
| "memory(GiB)": 133.17, |
| "step": 1265, |
| "token_acc": 0.8902697768320305, |
| "train_speed(iter/s)": 0.068288 |
| }, |
| { |
| "epoch": 1.4871194379391102, |
| "grad_norm": 0.20186524093151093, |
| "learning_rate": 1.0973344479648652e-05, |
| "loss": 0.3230982065200806, |
| "memory(GiB)": 133.17, |
| "step": 1270, |
| "token_acc": 0.8823814255348585, |
| "train_speed(iter/s)": 0.068286 |
| }, |
| { |
| "epoch": 1.4929742388758782, |
| "grad_norm": 0.19547297060489655, |
| "learning_rate": 1.0909069080874556e-05, |
| "loss": 0.3249845027923584, |
| "memory(GiB)": 133.17, |
| "step": 1275, |
| "token_acc": 0.8751095158692027, |
| "train_speed(iter/s)": 0.068285 |
| }, |
| { |
| "epoch": 1.4988290398126463, |
| "grad_norm": 0.21490275859832764, |
| "learning_rate": 1.0844755789805042e-05, |
| "loss": 0.3330803394317627, |
| "memory(GiB)": 133.17, |
| "step": 1280, |
| "token_acc": 0.8725501507719461, |
| "train_speed(iter/s)": 0.068283 |
| }, |
| { |
| "epoch": 1.5046838407494145, |
| "grad_norm": 0.21036967635154724, |
| "learning_rate": 1.0780407287180526e-05, |
| "loss": 0.33710570335388185, |
| "memory(GiB)": 133.17, |
| "step": 1285, |
| "token_acc": 0.8735995618184534, |
| "train_speed(iter/s)": 0.068276 |
| }, |
| { |
| "epoch": 1.5105386416861828, |
| "grad_norm": 0.21496160328388214, |
| "learning_rate": 1.0716026255209124e-05, |
| "loss": 0.3322149276733398, |
| "memory(GiB)": 133.17, |
| "step": 1290, |
| "token_acc": 0.8727818581461427, |
| "train_speed(iter/s)": 0.068276 |
| }, |
| { |
| "epoch": 1.5163934426229508, |
| "grad_norm": 0.19405636191368103, |
| "learning_rate": 1.0651615377454872e-05, |
| "loss": 0.33303227424621584, |
| "memory(GiB)": 133.17, |
| "step": 1295, |
| "token_acc": 0.8809517074473936, |
| "train_speed(iter/s)": 0.068274 |
| }, |
| { |
| "epoch": 1.5222482435597189, |
| "grad_norm": 0.20200887322425842, |
| "learning_rate": 1.0587177338725834e-05, |
| "loss": 0.3389185905456543, |
| "memory(GiB)": 133.17, |
| "step": 1300, |
| "token_acc": 0.8810081420102018, |
| "train_speed(iter/s)": 0.068274 |
| }, |
| { |
| "epoch": 1.5281030444964872, |
| "grad_norm": 0.19218453764915466, |
| "learning_rate": 1.0522714824962228e-05, |
| "loss": 0.32448182106018064, |
| "memory(GiB)": 133.17, |
| "step": 1305, |
| "token_acc": 0.8922085069580942, |
| "train_speed(iter/s)": 0.068274 |
| }, |
| { |
| "epoch": 1.5339578454332554, |
| "grad_norm": 0.2063508927822113, |
| "learning_rate": 1.0458230523124443e-05, |
| "loss": 0.3380331039428711, |
| "memory(GiB)": 133.17, |
| "step": 1310, |
| "token_acc": 0.8834363870742206, |
| "train_speed(iter/s)": 0.06827 |
| }, |
| { |
| "epoch": 1.5398126463700235, |
| "grad_norm": 0.20604784786701202, |
| "learning_rate": 1.0393727121081057e-05, |
| "loss": 0.33421056270599364, |
| "memory(GiB)": 133.17, |
| "step": 1315, |
| "token_acc": 0.8805816011032537, |
| "train_speed(iter/s)": 0.068273 |
| }, |
| { |
| "epoch": 1.5456674473067915, |
| "grad_norm": 0.1895345002412796, |
| "learning_rate": 1.0329207307496785e-05, |
| "loss": 0.3230136394500732, |
| "memory(GiB)": 133.17, |
| "step": 1320, |
| "token_acc": 0.8821661202321777, |
| "train_speed(iter/s)": 0.068273 |
| }, |
| { |
| "epoch": 1.5515222482435598, |
| "grad_norm": 0.20009098947048187, |
| "learning_rate": 1.0264673771720429e-05, |
| "loss": 0.331970739364624, |
| "memory(GiB)": 133.17, |
| "step": 1325, |
| "token_acc": 0.8856471632036539, |
| "train_speed(iter/s)": 0.06827 |
| }, |
| { |
| "epoch": 1.5573770491803278, |
| "grad_norm": 0.19756639003753662, |
| "learning_rate": 1.0200129203672754e-05, |
| "loss": 0.33203625679016113, |
| "memory(GiB)": 133.17, |
| "step": 1330, |
| "token_acc": 0.8719384623094173, |
| "train_speed(iter/s)": 0.068266 |
| }, |
| { |
| "epoch": 1.5632318501170959, |
| "grad_norm": 0.20041348040103912, |
| "learning_rate": 1.0135576293734381e-05, |
| "loss": 0.3236687660217285, |
| "memory(GiB)": 133.17, |
| "step": 1335, |
| "token_acc": 0.8890052192879956, |
| "train_speed(iter/s)": 0.068267 |
| }, |
| { |
| "epoch": 1.5690866510538641, |
| "grad_norm": 0.2091531604528427, |
| "learning_rate": 1.007101773263365e-05, |
| "loss": 0.3356754302978516, |
| "memory(GiB)": 133.17, |
| "step": 1340, |
| "token_acc": 0.881420303456906, |
| "train_speed(iter/s)": 0.068267 |
| }, |
| { |
| "epoch": 1.5749414519906324, |
| "grad_norm": 0.18961018323898315, |
| "learning_rate": 1.0006456211334445e-05, |
| "loss": 0.32959842681884766, |
| "memory(GiB)": 133.17, |
| "step": 1345, |
| "token_acc": 0.881056978636539, |
| "train_speed(iter/s)": 0.068269 |
| }, |
| { |
| "epoch": 1.5807962529274004, |
| "grad_norm": 0.18674606084823608, |
| "learning_rate": 9.941894420924044e-06, |
| "loss": 0.3274309396743774, |
| "memory(GiB)": 133.17, |
| "step": 1350, |
| "token_acc": 0.8911319303466276, |
| "train_speed(iter/s)": 0.068266 |
| }, |
| { |
| "epoch": 1.5866510538641685, |
| "grad_norm": 0.19703362882137299, |
| "learning_rate": 9.87733505250094e-06, |
| "loss": 0.33193011283874513, |
| "memory(GiB)": 133.17, |
| "step": 1355, |
| "token_acc": 0.880100249375002, |
| "train_speed(iter/s)": 0.068262 |
| }, |
| { |
| "epoch": 1.5925058548009368, |
| "grad_norm": 0.1925787329673767, |
| "learning_rate": 9.812780797062678e-06, |
| "loss": 0.328415060043335, |
| "memory(GiB)": 133.17, |
| "step": 1360, |
| "token_acc": 0.8847896196463753, |
| "train_speed(iter/s)": 0.068258 |
| }, |
| { |
| "epoch": 1.598360655737705, |
| "grad_norm": 0.19211165606975555, |
| "learning_rate": 9.748234345393672e-06, |
| "loss": 0.32412943840026853, |
| "memory(GiB)": 133.17, |
| "step": 1365, |
| "token_acc": 0.8819075272921836, |
| "train_speed(iter/s)": 0.068253 |
| }, |
| { |
| "epoch": 1.604215456674473, |
| "grad_norm": 0.19750450551509857, |
| "learning_rate": 9.68369838795306e-06, |
| "loss": 0.33218812942504883, |
| "memory(GiB)": 133.17, |
| "step": 1370, |
| "token_acc": 0.8781786390424615, |
| "train_speed(iter/s)": 0.068246 |
| }, |
| { |
| "epoch": 1.6100702576112411, |
| "grad_norm": 0.19090089201927185, |
| "learning_rate": 9.61917561476255e-06, |
| "loss": 0.3252577781677246, |
| "memory(GiB)": 133.17, |
| "step": 1375, |
| "token_acc": 0.8718890721275258, |
| "train_speed(iter/s)": 0.068245 |
| }, |
| { |
| "epoch": 1.6159250585480094, |
| "grad_norm": 0.2007261961698532, |
| "learning_rate": 9.554668715294305e-06, |
| "loss": 0.3365320205688477, |
| "memory(GiB)": 133.17, |
| "step": 1380, |
| "token_acc": 0.8808937423036773, |
| "train_speed(iter/s)": 0.068246 |
| }, |
| { |
| "epoch": 1.6217798594847777, |
| "grad_norm": 0.20129120349884033, |
| "learning_rate": 9.490180378358826e-06, |
| "loss": 0.33901381492614746, |
| "memory(GiB)": 133.17, |
| "step": 1385, |
| "token_acc": 0.8765135837259478, |
| "train_speed(iter/s)": 0.068245 |
| }, |
| { |
| "epoch": 1.6276346604215457, |
| "grad_norm": 0.18519413471221924, |
| "learning_rate": 9.425713291992878e-06, |
| "loss": 0.32805542945861815, |
| "memory(GiB)": 133.17, |
| "step": 1390, |
| "token_acc": 0.8837560234916173, |
| "train_speed(iter/s)": 0.068243 |
| }, |
| { |
| "epoch": 1.6334894613583137, |
| "grad_norm": 0.19597233831882477, |
| "learning_rate": 9.361270143347452e-06, |
| "loss": 0.3414484977722168, |
| "memory(GiB)": 133.17, |
| "step": 1395, |
| "token_acc": 0.8769078651119291, |
| "train_speed(iter/s)": 0.068243 |
| }, |
| { |
| "epoch": 1.639344262295082, |
| "grad_norm": 0.17986047267913818, |
| "learning_rate": 9.296853618575753e-06, |
| "loss": 0.32855379581451416, |
| "memory(GiB)": 133.17, |
| "step": 1400, |
| "token_acc": 0.8869200388717233, |
| "train_speed(iter/s)": 0.068244 |
| }, |
| { |
| "epoch": 1.6451990632318503, |
| "grad_norm": 0.2232111245393753, |
| "learning_rate": 9.232466402721241e-06, |
| "loss": 0.33907437324523926, |
| "memory(GiB)": 133.17, |
| "step": 1405, |
| "token_acc": 0.8805843000676505, |
| "train_speed(iter/s)": 0.068245 |
| }, |
| { |
| "epoch": 1.651053864168618, |
| "grad_norm": 0.19428326189517975, |
| "learning_rate": 9.1681111796057e-06, |
| "loss": 0.3294277906417847, |
| "memory(GiB)": 133.17, |
| "step": 1410, |
| "token_acc": 0.8820219796725579, |
| "train_speed(iter/s)": 0.068247 |
| }, |
| { |
| "epoch": 1.6569086651053864, |
| "grad_norm": 0.205523282289505, |
| "learning_rate": 9.103790631717375e-06, |
| "loss": 0.34450831413269045, |
| "memory(GiB)": 133.17, |
| "step": 1415, |
| "token_acc": 0.8722953184421034, |
| "train_speed(iter/s)": 0.068245 |
| }, |
| { |
| "epoch": 1.6627634660421546, |
| "grad_norm": 0.1955317109823227, |
| "learning_rate": 9.039507440099164e-06, |
| "loss": 0.32976531982421875, |
| "memory(GiB)": 133.17, |
| "step": 1420, |
| "token_acc": 0.887285426963314, |
| "train_speed(iter/s)": 0.068244 |
| }, |
| { |
| "epoch": 1.6686182669789227, |
| "grad_norm": 0.1974899172782898, |
| "learning_rate": 8.975264284236866e-06, |
| "loss": 0.33209028244018557, |
| "memory(GiB)": 133.17, |
| "step": 1425, |
| "token_acc": 0.8826060927102499, |
| "train_speed(iter/s)": 0.068246 |
| }, |
| { |
| "epoch": 1.6744730679156907, |
| "grad_norm": 0.20223510265350342, |
| "learning_rate": 8.911063841947476e-06, |
| "loss": 0.33354964256286623, |
| "memory(GiB)": 133.17, |
| "step": 1430, |
| "token_acc": 0.8795497702238948, |
| "train_speed(iter/s)": 0.068244 |
| }, |
| { |
| "epoch": 1.680327868852459, |
| "grad_norm": 0.19802114367485046, |
| "learning_rate": 8.846908789267589e-06, |
| "loss": 0.33350410461425783, |
| "memory(GiB)": 133.17, |
| "step": 1435, |
| "token_acc": 0.8820895522388059, |
| "train_speed(iter/s)": 0.068246 |
| }, |
| { |
| "epoch": 1.6861826697892273, |
| "grad_norm": 0.19948238134384155, |
| "learning_rate": 8.78280180034184e-06, |
| "loss": 0.3242588758468628, |
| "memory(GiB)": 133.17, |
| "step": 1440, |
| "token_acc": 0.8763353704232109, |
| "train_speed(iter/s)": 0.068246 |
| }, |
| { |
| "epoch": 1.6920374707259953, |
| "grad_norm": 0.19532591104507446, |
| "learning_rate": 8.718745547311458e-06, |
| "loss": 0.3360363721847534, |
| "memory(GiB)": 133.17, |
| "step": 1445, |
| "token_acc": 0.8764055183683731, |
| "train_speed(iter/s)": 0.068241 |
| }, |
| { |
| "epoch": 1.6978922716627634, |
| "grad_norm": 0.20000973343849182, |
| "learning_rate": 8.654742700202849e-06, |
| "loss": 0.33543264865875244, |
| "memory(GiB)": 133.17, |
| "step": 1450, |
| "token_acc": 0.8791397393130521, |
| "train_speed(iter/s)": 0.06824 |
| }, |
| { |
| "epoch": 1.7037470725995316, |
| "grad_norm": 0.193691685795784, |
| "learning_rate": 8.590795926816348e-06, |
| "loss": 0.32405283451080324, |
| "memory(GiB)": 133.17, |
| "step": 1455, |
| "token_acc": 0.8792053838888559, |
| "train_speed(iter/s)": 0.068239 |
| }, |
| { |
| "epoch": 1.7096018735362999, |
| "grad_norm": 0.18100841343402863, |
| "learning_rate": 8.526907892614986e-06, |
| "loss": 0.32940475940704345, |
| "memory(GiB)": 133.17, |
| "step": 1460, |
| "token_acc": 0.8829538372890485, |
| "train_speed(iter/s)": 0.068234 |
| }, |
| { |
| "epoch": 1.715456674473068, |
| "grad_norm": 0.2313033789396286, |
| "learning_rate": 8.463081260613391e-06, |
| "loss": 0.3310007810592651, |
| "memory(GiB)": 133.17, |
| "step": 1465, |
| "token_acc": 0.8884524843192141, |
| "train_speed(iter/s)": 0.068231 |
| }, |
| { |
| "epoch": 1.721311475409836, |
| "grad_norm": 0.19678162038326263, |
| "learning_rate": 8.399318691266806e-06, |
| "loss": 0.3346008062362671, |
| "memory(GiB)": 133.17, |
| "step": 1470, |
| "token_acc": 0.8785229138209752, |
| "train_speed(iter/s)": 0.068229 |
| }, |
| { |
| "epoch": 1.7271662763466042, |
| "grad_norm": 0.20874732732772827, |
| "learning_rate": 8.335622842360168e-06, |
| "loss": 0.3276866674423218, |
| "memory(GiB)": 133.17, |
| "step": 1475, |
| "token_acc": 0.8830160906179125, |
| "train_speed(iter/s)": 0.068224 |
| }, |
| { |
| "epoch": 1.7330210772833725, |
| "grad_norm": 0.20175132155418396, |
| "learning_rate": 8.271996368897345e-06, |
| "loss": 0.33496603965759275, |
| "memory(GiB)": 133.17, |
| "step": 1480, |
| "token_acc": 0.8852224356801145, |
| "train_speed(iter/s)": 0.06822 |
| }, |
| { |
| "epoch": 1.7388758782201406, |
| "grad_norm": 0.19031141698360443, |
| "learning_rate": 8.208441922990454e-06, |
| "loss": 0.32518749237060546, |
| "memory(GiB)": 133.17, |
| "step": 1485, |
| "token_acc": 0.8788670711802744, |
| "train_speed(iter/s)": 0.068219 |
| }, |
| { |
| "epoch": 1.7447306791569086, |
| "grad_norm": 0.19358490407466888, |
| "learning_rate": 8.144962153749331e-06, |
| "loss": 0.32768878936767576, |
| "memory(GiB)": 133.17, |
| "step": 1490, |
| "token_acc": 0.8753486456636903, |
| "train_speed(iter/s)": 0.068217 |
| }, |
| { |
| "epoch": 1.7505854800936769, |
| "grad_norm": 0.21087020635604858, |
| "learning_rate": 8.081559707171094e-06, |
| "loss": 0.3388930559158325, |
| "memory(GiB)": 133.17, |
| "step": 1495, |
| "token_acc": 0.8764171874364358, |
| "train_speed(iter/s)": 0.06822 |
| }, |
| { |
| "epoch": 1.756440281030445, |
| "grad_norm": 0.1951858252286911, |
| "learning_rate": 8.01823722602986e-06, |
| "loss": 0.3247065544128418, |
| "memory(GiB)": 133.17, |
| "step": 1500, |
| "token_acc": 0.8884904457005652, |
| "train_speed(iter/s)": 0.068221 |
| }, |
| { |
| "epoch": 1.762295081967213, |
| "grad_norm": 0.20260894298553467, |
| "learning_rate": 7.954997349766576e-06, |
| "loss": 0.33308422565460205, |
| "memory(GiB)": 133.17, |
| "step": 1505, |
| "token_acc": 0.8817160406212514, |
| "train_speed(iter/s)": 0.068221 |
| }, |
| { |
| "epoch": 1.7681498829039812, |
| "grad_norm": 0.19411516189575195, |
| "learning_rate": 7.891842714379027e-06, |
| "loss": 0.3207800626754761, |
| "memory(GiB)": 133.17, |
| "step": 1510, |
| "token_acc": 0.8866104646064812, |
| "train_speed(iter/s)": 0.068218 |
| }, |
| { |
| "epoch": 1.7740046838407495, |
| "grad_norm": 0.2132834941148758, |
| "learning_rate": 7.828775952311921e-06, |
| "loss": 0.32387499809265136, |
| "memory(GiB)": 133.17, |
| "step": 1515, |
| "token_acc": 0.8781614519597012, |
| "train_speed(iter/s)": 0.068214 |
| }, |
| { |
| "epoch": 1.7798594847775175, |
| "grad_norm": 0.2175895869731903, |
| "learning_rate": 7.765799692347201e-06, |
| "loss": 0.32644095420837405, |
| "memory(GiB)": 133.17, |
| "step": 1520, |
| "token_acc": 0.878244971440831, |
| "train_speed(iter/s)": 0.068212 |
| }, |
| { |
| "epoch": 1.7857142857142856, |
| "grad_norm": 0.20511025190353394, |
| "learning_rate": 7.702916559494444e-06, |
| "loss": 0.3338191032409668, |
| "memory(GiB)": 133.17, |
| "step": 1525, |
| "token_acc": 0.8815095165856024, |
| "train_speed(iter/s)": 0.068213 |
| }, |
| { |
| "epoch": 1.7915690866510539, |
| "grad_norm": 0.19504858553409576, |
| "learning_rate": 7.64012917488146e-06, |
| "loss": 0.31484146118164064, |
| "memory(GiB)": 133.17, |
| "step": 1530, |
| "token_acc": 0.8943310386864273, |
| "train_speed(iter/s)": 0.068215 |
| }, |
| { |
| "epoch": 1.7974238875878221, |
| "grad_norm": 0.2018832564353943, |
| "learning_rate": 7.577440155645028e-06, |
| "loss": 0.3253478050231934, |
| "memory(GiB)": 133.17, |
| "step": 1535, |
| "token_acc": 0.883270074462929, |
| "train_speed(iter/s)": 0.068211 |
| }, |
| { |
| "epoch": 1.8032786885245902, |
| "grad_norm": 0.18957826495170593, |
| "learning_rate": 7.514852114821811e-06, |
| "loss": 0.3356925010681152, |
| "memory(GiB)": 133.17, |
| "step": 1540, |
| "token_acc": 0.8806853758108548, |
| "train_speed(iter/s)": 0.06821 |
| }, |
| { |
| "epoch": 1.8091334894613582, |
| "grad_norm": 0.18248967826366425, |
| "learning_rate": 7.452367661239433e-06, |
| "loss": 0.3128045558929443, |
| "memory(GiB)": 133.17, |
| "step": 1545, |
| "token_acc": 0.8822570031516938, |
| "train_speed(iter/s)": 0.068208 |
| }, |
| { |
| "epoch": 1.8149882903981265, |
| "grad_norm": 0.21197733283042908, |
| "learning_rate": 7.389989399407741e-06, |
| "loss": 0.3383420467376709, |
| "memory(GiB)": 133.17, |
| "step": 1550, |
| "token_acc": 0.8810136098103397, |
| "train_speed(iter/s)": 0.068204 |
| }, |
| { |
| "epoch": 1.8208430913348947, |
| "grad_norm": 0.1846388280391693, |
| "learning_rate": 7.3277199294102485e-06, |
| "loss": 0.3210147857666016, |
| "memory(GiB)": 133.17, |
| "step": 1555, |
| "token_acc": 0.8783838996638541, |
| "train_speed(iter/s)": 0.068204 |
| }, |
| { |
| "epoch": 1.8266978922716628, |
| "grad_norm": 0.21333329379558563, |
| "learning_rate": 7.265561846795741e-06, |
| "loss": 0.33364644050598147, |
| "memory(GiB)": 133.17, |
| "step": 1560, |
| "token_acc": 0.8799311976453201, |
| "train_speed(iter/s)": 0.068197 |
| }, |
| { |
| "epoch": 1.8325526932084308, |
| "grad_norm": 0.1916390359401703, |
| "learning_rate": 7.203517742470101e-06, |
| "loss": 0.3300149440765381, |
| "memory(GiB)": 133.17, |
| "step": 1565, |
| "token_acc": 0.8891839280314484, |
| "train_speed(iter/s)": 0.068196 |
| }, |
| { |
| "epoch": 1.838407494145199, |
| "grad_norm": 0.1898123174905777, |
| "learning_rate": 7.141590202588312e-06, |
| "loss": 0.3347996711730957, |
| "memory(GiB)": 133.17, |
| "step": 1570, |
| "token_acc": 0.8836206356563897, |
| "train_speed(iter/s)": 0.068196 |
| }, |
| { |
| "epoch": 1.8442622950819674, |
| "grad_norm": 0.25897353887557983, |
| "learning_rate": 7.079781808446648e-06, |
| "loss": 0.33739614486694336, |
| "memory(GiB)": 133.17, |
| "step": 1575, |
| "token_acc": 0.8688480209111277, |
| "train_speed(iter/s)": 0.068193 |
| }, |
| { |
| "epoch": 1.8501170960187352, |
| "grad_norm": 0.18949347734451294, |
| "learning_rate": 7.018095136375089e-06, |
| "loss": 0.3224343299865723, |
| "memory(GiB)": 133.17, |
| "step": 1580, |
| "token_acc": 0.8803287043737061, |
| "train_speed(iter/s)": 0.068192 |
| }, |
| { |
| "epoch": 1.8559718969555035, |
| "grad_norm": 0.19546827673912048, |
| "learning_rate": 6.956532757629945e-06, |
| "loss": 0.3295243740081787, |
| "memory(GiB)": 133.17, |
| "step": 1585, |
| "token_acc": 0.8802496310563046, |
| "train_speed(iter/s)": 0.068189 |
| }, |
| { |
| "epoch": 1.8618266978922717, |
| "grad_norm": 0.1977819800376892, |
| "learning_rate": 6.89509723828665e-06, |
| "loss": 0.3339688777923584, |
| "memory(GiB)": 133.17, |
| "step": 1590, |
| "token_acc": 0.8794367319992775, |
| "train_speed(iter/s)": 0.068188 |
| }, |
| { |
| "epoch": 1.8676814988290398, |
| "grad_norm": 0.2035733312368393, |
| "learning_rate": 6.833791139132824e-06, |
| "loss": 0.3196906089782715, |
| "memory(GiB)": 133.17, |
| "step": 1595, |
| "token_acc": 0.8819461276705585, |
| "train_speed(iter/s)": 0.068183 |
| }, |
| { |
| "epoch": 1.8735362997658078, |
| "grad_norm": 0.18036054074764252, |
| "learning_rate": 6.772617015561529e-06, |
| "loss": 0.3284833192825317, |
| "memory(GiB)": 133.17, |
| "step": 1600, |
| "token_acc": 0.8721648839682242, |
| "train_speed(iter/s)": 0.068185 |
| }, |
| { |
| "epoch": 1.879391100702576, |
| "grad_norm": 0.19073913991451263, |
| "learning_rate": 6.7115774174647475e-06, |
| "loss": 0.3214848518371582, |
| "memory(GiB)": 133.17, |
| "step": 1605, |
| "token_acc": 0.8888246134782375, |
| "train_speed(iter/s)": 0.068184 |
| }, |
| { |
| "epoch": 1.8852459016393444, |
| "grad_norm": 0.22237442433834076, |
| "learning_rate": 6.6506748891271045e-06, |
| "loss": 0.3328333854675293, |
| "memory(GiB)": 133.17, |
| "step": 1610, |
| "token_acc": 0.8864136225147821, |
| "train_speed(iter/s)": 0.068183 |
| }, |
| { |
| "epoch": 1.8911007025761124, |
| "grad_norm": 0.18580298125743866, |
| "learning_rate": 6.5899119691198025e-06, |
| "loss": 0.3259113073348999, |
| "memory(GiB)": 133.17, |
| "step": 1615, |
| "token_acc": 0.8816001292832858, |
| "train_speed(iter/s)": 0.068182 |
| }, |
| { |
| "epoch": 1.8969555035128804, |
| "grad_norm": 0.19562335312366486, |
| "learning_rate": 6.529291190194829e-06, |
| "loss": 0.3301589012145996, |
| "memory(GiB)": 133.17, |
| "step": 1620, |
| "token_acc": 0.8816063260815503, |
| "train_speed(iter/s)": 0.068183 |
| }, |
| { |
| "epoch": 1.9028103044496487, |
| "grad_norm": 0.19002656638622284, |
| "learning_rate": 6.468815079179364e-06, |
| "loss": 0.32632834911346437, |
| "memory(GiB)": 133.17, |
| "step": 1625, |
| "token_acc": 0.8859821923514176, |
| "train_speed(iter/s)": 0.06818 |
| }, |
| { |
| "epoch": 1.908665105386417, |
| "grad_norm": 0.19892436265945435, |
| "learning_rate": 6.408486156870466e-06, |
| "loss": 0.33937792778015136, |
| "memory(GiB)": 133.17, |
| "step": 1630, |
| "token_acc": 0.862874582417446, |
| "train_speed(iter/s)": 0.068181 |
| }, |
| { |
| "epoch": 1.914519906323185, |
| "grad_norm": 0.19243668019771576, |
| "learning_rate": 6.348306937929991e-06, |
| "loss": 0.3362755537033081, |
| "memory(GiB)": 133.17, |
| "step": 1635, |
| "token_acc": 0.8769627409259633, |
| "train_speed(iter/s)": 0.068182 |
| }, |
| { |
| "epoch": 1.920374707259953, |
| "grad_norm": 0.18101197481155396, |
| "learning_rate": 6.288279930779789e-06, |
| "loss": 0.31793382167816164, |
| "memory(GiB)": 133.17, |
| "step": 1640, |
| "token_acc": 0.890389030411674, |
| "train_speed(iter/s)": 0.068181 |
| }, |
| { |
| "epoch": 1.9262295081967213, |
| "grad_norm": 0.2016856074333191, |
| "learning_rate": 6.228407637497131e-06, |
| "loss": 0.3286017417907715, |
| "memory(GiB)": 133.17, |
| "step": 1645, |
| "token_acc": 0.8691879609602018, |
| "train_speed(iter/s)": 0.068179 |
| }, |
| { |
| "epoch": 1.9320843091334896, |
| "grad_norm": 0.18602800369262695, |
| "learning_rate": 6.1686925537104306e-06, |
| "loss": 0.3186060905456543, |
| "memory(GiB)": 133.17, |
| "step": 1650, |
| "token_acc": 0.8740382186265122, |
| "train_speed(iter/s)": 0.068178 |
| }, |
| { |
| "epoch": 1.9379391100702577, |
| "grad_norm": 0.19921670854091644, |
| "learning_rate": 6.109137168495205e-06, |
| "loss": 0.325826621055603, |
| "memory(GiB)": 133.17, |
| "step": 1655, |
| "token_acc": 0.8942359105977971, |
| "train_speed(iter/s)": 0.068178 |
| }, |
| { |
| "epoch": 1.9437939110070257, |
| "grad_norm": 0.1804487407207489, |
| "learning_rate": 6.049743964270336e-06, |
| "loss": 0.33586409091949465, |
| "memory(GiB)": 133.17, |
| "step": 1660, |
| "token_acc": 0.8788306137094006, |
| "train_speed(iter/s)": 0.068176 |
| }, |
| { |
| "epoch": 1.949648711943794, |
| "grad_norm": 0.20771907269954681, |
| "learning_rate": 5.990515416694591e-06, |
| "loss": 0.3336956024169922, |
| "memory(GiB)": 133.17, |
| "step": 1665, |
| "token_acc": 0.8826585274697895, |
| "train_speed(iter/s)": 0.068173 |
| }, |
| { |
| "epoch": 1.955503512880562, |
| "grad_norm": 0.19965799152851105, |
| "learning_rate": 5.931453994563434e-06, |
| "loss": 0.3285707473754883, |
| "memory(GiB)": 133.17, |
| "step": 1670, |
| "token_acc": 0.8875544099179484, |
| "train_speed(iter/s)": 0.068174 |
| }, |
| { |
| "epoch": 1.96135831381733, |
| "grad_norm": 0.20612315833568573, |
| "learning_rate": 5.872562159706116e-06, |
| "loss": 0.3315183877944946, |
| "memory(GiB)": 133.17, |
| "step": 1675, |
| "token_acc": 0.8774614658697704, |
| "train_speed(iter/s)": 0.068172 |
| }, |
| { |
| "epoch": 1.9672131147540983, |
| "grad_norm": 0.18963313102722168, |
| "learning_rate": 5.8138423668830605e-06, |
| "loss": 0.324364972114563, |
| "memory(GiB)": 133.17, |
| "step": 1680, |
| "token_acc": 0.8801062072294897, |
| "train_speed(iter/s)": 0.068174 |
| }, |
| { |
| "epoch": 1.9730679156908666, |
| "grad_norm": 0.19694305956363678, |
| "learning_rate": 5.755297063683551e-06, |
| "loss": 0.3285407066345215, |
| "memory(GiB)": 133.17, |
| "step": 1685, |
| "token_acc": 0.885107199114613, |
| "train_speed(iter/s)": 0.068174 |
| }, |
| { |
| "epoch": 1.9789227166276346, |
| "grad_norm": 0.18662695586681366, |
| "learning_rate": 5.696928690423693e-06, |
| "loss": 0.32373480796813964, |
| "memory(GiB)": 133.17, |
| "step": 1690, |
| "token_acc": 0.8790801928023776, |
| "train_speed(iter/s)": 0.068176 |
| }, |
| { |
| "epoch": 1.9847775175644027, |
| "grad_norm": 0.19431762397289276, |
| "learning_rate": 5.638739680044718e-06, |
| "loss": 0.3377500057220459, |
| "memory(GiB)": 133.17, |
| "step": 1695, |
| "token_acc": 0.8722363298833375, |
| "train_speed(iter/s)": 0.068175 |
| }, |
| { |
| "epoch": 1.990632318501171, |
| "grad_norm": 0.2024122029542923, |
| "learning_rate": 5.580732458011544e-06, |
| "loss": 0.3272620439529419, |
| "memory(GiB)": 133.17, |
| "step": 1700, |
| "token_acc": 0.8856717266189297, |
| "train_speed(iter/s)": 0.068173 |
| }, |
| { |
| "epoch": 1.9964871194379392, |
| "grad_norm": 0.18394924700260162, |
| "learning_rate": 5.522909442211708e-06, |
| "loss": 0.32718348503112793, |
| "memory(GiB)": 133.17, |
| "step": 1705, |
| "token_acc": 0.876887289049153, |
| "train_speed(iter/s)": 0.06817 |
| }, |
| { |
| "epoch": 2.002341920374707, |
| "grad_norm": 0.2651495337486267, |
| "learning_rate": 5.465273042854551e-06, |
| "loss": 0.31393914222717284, |
| "memory(GiB)": 133.17, |
| "step": 1710, |
| "token_acc": 0.8893703023658244, |
| "train_speed(iter/s)": 0.06798 |
| }, |
| { |
| "epoch": 2.0081967213114753, |
| "grad_norm": 0.21041427552700043, |
| "learning_rate": 5.407825662370778e-06, |
| "loss": 0.299090313911438, |
| "memory(GiB)": 133.17, |
| "step": 1715, |
| "token_acc": 0.8915390401403241, |
| "train_speed(iter/s)": 0.067979 |
| }, |
| { |
| "epoch": 2.0140515222482436, |
| "grad_norm": 0.21380308270454407, |
| "learning_rate": 5.350569695312313e-06, |
| "loss": 0.3101144790649414, |
| "memory(GiB)": 133.17, |
| "step": 1720, |
| "token_acc": 0.8875269739992413, |
| "train_speed(iter/s)": 0.067976 |
| }, |
| { |
| "epoch": 2.019906323185012, |
| "grad_norm": 0.1987718939781189, |
| "learning_rate": 5.293507528252474e-06, |
| "loss": 0.3136857509613037, |
| "memory(GiB)": 133.17, |
| "step": 1725, |
| "token_acc": 0.8871349620144686, |
| "train_speed(iter/s)": 0.067974 |
| }, |
| { |
| "epoch": 2.0257611241217797, |
| "grad_norm": 0.3591626286506653, |
| "learning_rate": 5.236641539686518e-06, |
| "loss": 0.30123333930969237, |
| "memory(GiB)": 133.17, |
| "step": 1730, |
| "token_acc": 0.8860105084502068, |
| "train_speed(iter/s)": 0.067977 |
| }, |
| { |
| "epoch": 2.031615925058548, |
| "grad_norm": 0.19819702208042145, |
| "learning_rate": 5.179974099932472e-06, |
| "loss": 0.29487655162811277, |
| "memory(GiB)": 133.17, |
| "step": 1735, |
| "token_acc": 0.8855569615495446, |
| "train_speed(iter/s)": 0.067974 |
| }, |
| { |
| "epoch": 2.037470725995316, |
| "grad_norm": 0.2023162841796875, |
| "learning_rate": 5.12350757103236e-06, |
| "loss": 0.29470908641815186, |
| "memory(GiB)": 133.17, |
| "step": 1740, |
| "token_acc": 0.8894021747623796, |
| "train_speed(iter/s)": 0.067975 |
| }, |
| { |
| "epoch": 2.0433255269320845, |
| "grad_norm": 0.19459553062915802, |
| "learning_rate": 5.067244306653736e-06, |
| "loss": 0.30195889472961424, |
| "memory(GiB)": 133.17, |
| "step": 1745, |
| "token_acc": 0.8966922700402876, |
| "train_speed(iter/s)": 0.067976 |
| }, |
| { |
| "epoch": 2.0491803278688523, |
| "grad_norm": 0.20582208037376404, |
| "learning_rate": 5.0111866519915575e-06, |
| "loss": 0.2972427845001221, |
| "memory(GiB)": 133.17, |
| "step": 1750, |
| "token_acc": 0.8860103790300714, |
| "train_speed(iter/s)": 0.067974 |
| }, |
| { |
| "epoch": 2.0550351288056206, |
| "grad_norm": 0.21163956820964813, |
| "learning_rate": 4.95533694367047e-06, |
| "loss": 0.2951073408126831, |
| "memory(GiB)": 133.17, |
| "step": 1755, |
| "token_acc": 0.8911898143660713, |
| "train_speed(iter/s)": 0.067976 |
| }, |
| { |
| "epoch": 2.060889929742389, |
| "grad_norm": 0.271316796541214, |
| "learning_rate": 4.899697509647379e-06, |
| "loss": 0.3005206108093262, |
| "memory(GiB)": 133.17, |
| "step": 1760, |
| "token_acc": 0.8827217211398426, |
| "train_speed(iter/s)": 0.067977 |
| }, |
| { |
| "epoch": 2.066744730679157, |
| "grad_norm": 0.1982126086950302, |
| "learning_rate": 4.844270669114424e-06, |
| "loss": 0.30247581005096436, |
| "memory(GiB)": 133.17, |
| "step": 1765, |
| "token_acc": 0.8939350325087765, |
| "train_speed(iter/s)": 0.067979 |
| }, |
| { |
| "epoch": 2.072599531615925, |
| "grad_norm": 0.20624509453773499, |
| "learning_rate": 4.789058732402319e-06, |
| "loss": 0.2944344520568848, |
| "memory(GiB)": 133.17, |
| "step": 1770, |
| "token_acc": 0.8877186400937866, |
| "train_speed(iter/s)": 0.067979 |
| }, |
| { |
| "epoch": 2.078454332552693, |
| "grad_norm": 0.18864554166793823, |
| "learning_rate": 4.734064000884044e-06, |
| "loss": 0.31334614753723145, |
| "memory(GiB)": 133.17, |
| "step": 1775, |
| "token_acc": 0.8753750599625646, |
| "train_speed(iter/s)": 0.067981 |
| }, |
| { |
| "epoch": 2.0843091334894615, |
| "grad_norm": 0.19976413249969482, |
| "learning_rate": 4.679288766878908e-06, |
| "loss": 0.3065293073654175, |
| "memory(GiB)": 133.17, |
| "step": 1780, |
| "token_acc": 0.8893787799945783, |
| "train_speed(iter/s)": 0.067979 |
| }, |
| { |
| "epoch": 2.0901639344262297, |
| "grad_norm": 0.20083464682102203, |
| "learning_rate": 4.624735313557019e-06, |
| "loss": 0.30294094085693357, |
| "memory(GiB)": 133.17, |
| "step": 1785, |
| "token_acc": 0.894106624191886, |
| "train_speed(iter/s)": 0.06798 |
| }, |
| { |
| "epoch": 2.0960187353629975, |
| "grad_norm": 0.19687768816947937, |
| "learning_rate": 4.570405914844105e-06, |
| "loss": 0.29626712799072263, |
| "memory(GiB)": 133.17, |
| "step": 1790, |
| "token_acc": 0.8918388887847958, |
| "train_speed(iter/s)": 0.067977 |
| }, |
| { |
| "epoch": 2.101873536299766, |
| "grad_norm": 0.21042723953723907, |
| "learning_rate": 4.516302835326723e-06, |
| "loss": 0.30143260955810547, |
| "memory(GiB)": 133.17, |
| "step": 1795, |
| "token_acc": 0.8979354142270508, |
| "train_speed(iter/s)": 0.067977 |
| }, |
| { |
| "epoch": 2.107728337236534, |
| "grad_norm": 0.20909157395362854, |
| "learning_rate": 4.462428330157886e-06, |
| "loss": 0.29250779151916506, |
| "memory(GiB)": 133.17, |
| "step": 1800, |
| "token_acc": 0.8972882018187891, |
| "train_speed(iter/s)": 0.067976 |
| }, |
| { |
| "epoch": 2.113583138173302, |
| "grad_norm": 0.18871068954467773, |
| "learning_rate": 4.4087846449630475e-06, |
| "loss": 0.296770715713501, |
| "memory(GiB)": 133.17, |
| "step": 1805, |
| "token_acc": 0.8939121347421645, |
| "train_speed(iter/s)": 0.067975 |
| }, |
| { |
| "epoch": 2.11943793911007, |
| "grad_norm": 0.19644689559936523, |
| "learning_rate": 4.355374015746493e-06, |
| "loss": 0.29331092834472655, |
| "memory(GiB)": 133.17, |
| "step": 1810, |
| "token_acc": 0.8898344723236344, |
| "train_speed(iter/s)": 0.067974 |
| }, |
| { |
| "epoch": 2.1252927400468384, |
| "grad_norm": 0.2067333608865738, |
| "learning_rate": 4.302198668798159e-06, |
| "loss": 0.298096752166748, |
| "memory(GiB)": 133.17, |
| "step": 1815, |
| "token_acc": 0.8860096940702505, |
| "train_speed(iter/s)": 0.067975 |
| }, |
| { |
| "epoch": 2.1311475409836067, |
| "grad_norm": 0.19337214529514313, |
| "learning_rate": 4.249260820600813e-06, |
| "loss": 0.28569879531860354, |
| "memory(GiB)": 133.17, |
| "step": 1820, |
| "token_acc": 0.8937030726309285, |
| "train_speed(iter/s)": 0.067973 |
| }, |
| { |
| "epoch": 2.1370023419203745, |
| "grad_norm": 0.21502645313739777, |
| "learning_rate": 4.1965626777376766e-06, |
| "loss": 0.29423298835754397, |
| "memory(GiB)": 133.17, |
| "step": 1825, |
| "token_acc": 0.8971041975679516, |
| "train_speed(iter/s)": 0.067972 |
| }, |
| { |
| "epoch": 2.142857142857143, |
| "grad_norm": 0.18807381391525269, |
| "learning_rate": 4.144106436800453e-06, |
| "loss": 0.30044715404510497, |
| "memory(GiB)": 133.17, |
| "step": 1830, |
| "token_acc": 0.8974527790728444, |
| "train_speed(iter/s)": 0.067975 |
| }, |
| { |
| "epoch": 2.148711943793911, |
| "grad_norm": 0.18506018817424774, |
| "learning_rate": 4.091894284297758e-06, |
| "loss": 0.2915837526321411, |
| "memory(GiB)": 133.17, |
| "step": 1835, |
| "token_acc": 0.8848257422956048, |
| "train_speed(iter/s)": 0.067975 |
| }, |
| { |
| "epoch": 2.1545667447306793, |
| "grad_norm": 0.22477097809314728, |
| "learning_rate": 4.039928396563983e-06, |
| "loss": 0.3101827621459961, |
| "memory(GiB)": 133.17, |
| "step": 1840, |
| "token_acc": 0.8897657467466561, |
| "train_speed(iter/s)": 0.067975 |
| }, |
| { |
| "epoch": 2.160421545667447, |
| "grad_norm": 0.20848192274570465, |
| "learning_rate": 3.9882109396685845e-06, |
| "loss": 0.28560404777526854, |
| "memory(GiB)": 133.17, |
| "step": 1845, |
| "token_acc": 0.8866163430466006, |
| "train_speed(iter/s)": 0.067973 |
| }, |
| { |
| "epoch": 2.1662763466042154, |
| "grad_norm": 0.1790919452905655, |
| "learning_rate": 3.936744069325797e-06, |
| "loss": 0.28580513000488283, |
| "memory(GiB)": 133.17, |
| "step": 1850, |
| "token_acc": 0.8959953003524735, |
| "train_speed(iter/s)": 0.067972 |
| }, |
| { |
| "epoch": 2.1721311475409837, |
| "grad_norm": 0.1946616917848587, |
| "learning_rate": 3.885529930804768e-06, |
| "loss": 0.28203678131103516, |
| "memory(GiB)": 133.17, |
| "step": 1855, |
| "token_acc": 0.8865687290155894, |
| "train_speed(iter/s)": 0.06797 |
| }, |
| { |
| "epoch": 2.177985948477752, |
| "grad_norm": 0.2024662047624588, |
| "learning_rate": 3.834570658840152e-06, |
| "loss": 0.3013646602630615, |
| "memory(GiB)": 133.17, |
| "step": 1860, |
| "token_acc": 0.8853980676749265, |
| "train_speed(iter/s)": 0.06797 |
| }, |
| { |
| "epoch": 2.1838407494145198, |
| "grad_norm": 0.1884947568178177, |
| "learning_rate": 3.7838683775431106e-06, |
| "loss": 0.2940408706665039, |
| "memory(GiB)": 133.17, |
| "step": 1865, |
| "token_acc": 0.8914837094453064, |
| "train_speed(iter/s)": 0.067971 |
| }, |
| { |
| "epoch": 2.189695550351288, |
| "grad_norm": 0.19168955087661743, |
| "learning_rate": 3.733425200312797e-06, |
| "loss": 0.2958191156387329, |
| "memory(GiB)": 133.17, |
| "step": 1870, |
| "token_acc": 0.8822518250452361, |
| "train_speed(iter/s)": 0.067972 |
| }, |
| { |
| "epoch": 2.1955503512880563, |
| "grad_norm": 0.194383904337883, |
| "learning_rate": 3.683243229748249e-06, |
| "loss": 0.28948154449462893, |
| "memory(GiB)": 133.17, |
| "step": 1875, |
| "token_acc": 0.8876668322153558, |
| "train_speed(iter/s)": 0.067972 |
| }, |
| { |
| "epoch": 2.201405152224824, |
| "grad_norm": 0.19730046391487122, |
| "learning_rate": 3.633324557560747e-06, |
| "loss": 0.29555392265319824, |
| "memory(GiB)": 133.17, |
| "step": 1880, |
| "token_acc": 0.8939645340207796, |
| "train_speed(iter/s)": 0.067973 |
| }, |
| { |
| "epoch": 2.2072599531615924, |
| "grad_norm": 0.18545053899288177, |
| "learning_rate": 3.5836712644866277e-06, |
| "loss": 0.28943870067596433, |
| "memory(GiB)": 133.17, |
| "step": 1885, |
| "token_acc": 0.8883624593035462, |
| "train_speed(iter/s)": 0.067973 |
| }, |
| { |
| "epoch": 2.2131147540983607, |
| "grad_norm": 0.20143678784370422, |
| "learning_rate": 3.5342854202005696e-06, |
| "loss": 0.29045825004577636, |
| "memory(GiB)": 133.17, |
| "step": 1890, |
| "token_acc": 0.8931490778817771, |
| "train_speed(iter/s)": 0.067975 |
| }, |
| { |
| "epoch": 2.218969555035129, |
| "grad_norm": 0.1933010071516037, |
| "learning_rate": 3.485169083229293e-06, |
| "loss": 0.2985133409500122, |
| "memory(GiB)": 133.17, |
| "step": 1895, |
| "token_acc": 0.8983116114671417, |
| "train_speed(iter/s)": 0.067977 |
| }, |
| { |
| "epoch": 2.2248243559718968, |
| "grad_norm": 0.2029975950717926, |
| "learning_rate": 3.4363243008657842e-06, |
| "loss": 0.29316296577453616, |
| "memory(GiB)": 133.17, |
| "step": 1900, |
| "token_acc": 0.8817185537873807, |
| "train_speed(iter/s)": 0.067975 |
| }, |
| { |
| "epoch": 2.230679156908665, |
| "grad_norm": 0.1947357952594757, |
| "learning_rate": 3.3877531090839478e-06, |
| "loss": 0.2983538627624512, |
| "memory(GiB)": 133.17, |
| "step": 1905, |
| "token_acc": 0.8836724096943308, |
| "train_speed(iter/s)": 0.067976 |
| }, |
| { |
| "epoch": 2.2365339578454333, |
| "grad_norm": 0.19401586055755615, |
| "learning_rate": 3.3394575324537327e-06, |
| "loss": 0.3019071578979492, |
| "memory(GiB)": 133.17, |
| "step": 1910, |
| "token_acc": 0.8830038763307387, |
| "train_speed(iter/s)": 0.067973 |
| }, |
| { |
| "epoch": 2.2423887587822016, |
| "grad_norm": 0.18747617304325104, |
| "learning_rate": 3.2914395840567605e-06, |
| "loss": 0.2899949312210083, |
| "memory(GiB)": 133.17, |
| "step": 1915, |
| "token_acc": 0.8937491349698655, |
| "train_speed(iter/s)": 0.067972 |
| }, |
| { |
| "epoch": 2.2482435597189694, |
| "grad_norm": 0.18497265875339508, |
| "learning_rate": 3.2437012654024057e-06, |
| "loss": 0.29514849185943604, |
| "memory(GiB)": 133.17, |
| "step": 1920, |
| "token_acc": 0.8952329266162637, |
| "train_speed(iter/s)": 0.067972 |
| }, |
| { |
| "epoch": 2.2540983606557377, |
| "grad_norm": 0.19390814006328583, |
| "learning_rate": 3.1962445663443643e-06, |
| "loss": 0.29795031547546386, |
| "memory(GiB)": 133.17, |
| "step": 1925, |
| "token_acc": 0.8883286157922032, |
| "train_speed(iter/s)": 0.067969 |
| }, |
| { |
| "epoch": 2.259953161592506, |
| "grad_norm": 0.19152696430683136, |
| "learning_rate": 3.1490714649977196e-06, |
| "loss": 0.3013578414916992, |
| "memory(GiB)": 133.17, |
| "step": 1930, |
| "token_acc": 0.8966371415703348, |
| "train_speed(iter/s)": 0.067968 |
| }, |
| { |
| "epoch": 2.265807962529274, |
| "grad_norm": 0.19523312151432037, |
| "learning_rate": 3.102183927656488e-06, |
| "loss": 0.29044888019561765, |
| "memory(GiB)": 133.17, |
| "step": 1935, |
| "token_acc": 0.8816311924321633, |
| "train_speed(iter/s)": 0.067969 |
| }, |
| { |
| "epoch": 2.271662763466042, |
| "grad_norm": 0.19032931327819824, |
| "learning_rate": 3.0555839087116547e-06, |
| "loss": 0.30231542587280275, |
| "memory(GiB)": 133.17, |
| "step": 1940, |
| "token_acc": 0.89288125, |
| "train_speed(iter/s)": 0.06797 |
| }, |
| { |
| "epoch": 2.2775175644028103, |
| "grad_norm": 0.19542452692985535, |
| "learning_rate": 3.009273350569705e-06, |
| "loss": 0.3001267433166504, |
| "memory(GiB)": 133.17, |
| "step": 1945, |
| "token_acc": 0.8971306271312823, |
| "train_speed(iter/s)": 0.067973 |
| }, |
| { |
| "epoch": 2.2833723653395785, |
| "grad_norm": 0.1856907606124878, |
| "learning_rate": 2.963254183571682e-06, |
| "loss": 0.29535422325134275, |
| "memory(GiB)": 133.17, |
| "step": 1950, |
| "token_acc": 0.8930906317907196, |
| "train_speed(iter/s)": 0.067972 |
| }, |
| { |
| "epoch": 2.289227166276347, |
| "grad_norm": 0.1937672644853592, |
| "learning_rate": 2.9175283259126943e-06, |
| "loss": 0.2962016582489014, |
| "memory(GiB)": 133.17, |
| "step": 1955, |
| "token_acc": 0.898554810095657, |
| "train_speed(iter/s)": 0.067972 |
| }, |
| { |
| "epoch": 2.2950819672131146, |
| "grad_norm": 0.18747203052043915, |
| "learning_rate": 2.872097683561986e-06, |
| "loss": 0.2947913885116577, |
| "memory(GiB)": 133.17, |
| "step": 1960, |
| "token_acc": 0.9006477145474906, |
| "train_speed(iter/s)": 0.067972 |
| }, |
| { |
| "epoch": 2.300936768149883, |
| "grad_norm": 0.188889279961586, |
| "learning_rate": 2.8269641501834834e-06, |
| "loss": 0.3037715911865234, |
| "memory(GiB)": 133.17, |
| "step": 1965, |
| "token_acc": 0.8837665048634434, |
| "train_speed(iter/s)": 0.067974 |
| }, |
| { |
| "epoch": 2.306791569086651, |
| "grad_norm": 0.18386943638324738, |
| "learning_rate": 2.782129607056848e-06, |
| "loss": 0.29630954265594484, |
| "memory(GiB)": 133.17, |
| "step": 1970, |
| "token_acc": 0.8854790349100962, |
| "train_speed(iter/s)": 0.067975 |
| }, |
| { |
| "epoch": 2.312646370023419, |
| "grad_norm": 0.1860065758228302, |
| "learning_rate": 2.7375959229990856e-06, |
| "loss": 0.2871407508850098, |
| "memory(GiB)": 133.17, |
| "step": 1975, |
| "token_acc": 0.8921277606269294, |
| "train_speed(iter/s)": 0.067973 |
| }, |
| { |
| "epoch": 2.3185011709601873, |
| "grad_norm": 0.18686528503894806, |
| "learning_rate": 2.6933649542866326e-06, |
| "loss": 0.29081072807312014, |
| "memory(GiB)": 133.17, |
| "step": 1980, |
| "token_acc": 0.8890196371424658, |
| "train_speed(iter/s)": 0.067976 |
| }, |
| { |
| "epoch": 2.3243559718969555, |
| "grad_norm": 0.17976053059101105, |
| "learning_rate": 2.649438544577977e-06, |
| "loss": 0.2809652090072632, |
| "memory(GiB)": 133.17, |
| "step": 1985, |
| "token_acc": 0.8903470664805608, |
| "train_speed(iter/s)": 0.067975 |
| }, |
| { |
| "epoch": 2.330210772833724, |
| "grad_norm": 0.18407879769802094, |
| "learning_rate": 2.6058185248368317e-06, |
| "loss": 0.2934088706970215, |
| "memory(GiB)": 133.17, |
| "step": 1990, |
| "token_acc": 0.896975139427167, |
| "train_speed(iter/s)": 0.067974 |
| }, |
| { |
| "epoch": 2.3360655737704916, |
| "grad_norm": 0.1934359222650528, |
| "learning_rate": 2.562506713255789e-06, |
| "loss": 0.2888351917266846, |
| "memory(GiB)": 133.17, |
| "step": 1995, |
| "token_acc": 0.8959334542575936, |
| "train_speed(iter/s)": 0.067976 |
| }, |
| { |
| "epoch": 2.34192037470726, |
| "grad_norm": 0.19506384432315826, |
| "learning_rate": 2.519504915180555e-06, |
| "loss": 0.29209365844726565, |
| "memory(GiB)": 133.17, |
| "step": 2000, |
| "token_acc": 0.8933135020860227, |
| "train_speed(iter/s)": 0.067975 |
| }, |
| { |
| "epoch": 2.347775175644028, |
| "grad_norm": 0.2316175252199173, |
| "learning_rate": 2.4768149230346917e-06, |
| "loss": 0.30724682807922366, |
| "memory(GiB)": 133.17, |
| "step": 2005, |
| "token_acc": 0.8923097611088004, |
| "train_speed(iter/s)": 0.067972 |
| }, |
| { |
| "epoch": 2.3536299765807964, |
| "grad_norm": 0.19259780645370483, |
| "learning_rate": 2.4344385162448924e-06, |
| "loss": 0.29259405136108396, |
| "memory(GiB)": 133.17, |
| "step": 2010, |
| "token_acc": 0.8962570117930629, |
| "train_speed(iter/s)": 0.067971 |
| }, |
| { |
| "epoch": 2.3594847775175642, |
| "grad_norm": 0.18455654382705688, |
| "learning_rate": 2.392377461166826e-06, |
| "loss": 0.2909110069274902, |
| "memory(GiB)": 133.17, |
| "step": 2015, |
| "token_acc": 0.8950562827510747, |
| "train_speed(iter/s)": 0.067971 |
| }, |
| { |
| "epoch": 2.3653395784543325, |
| "grad_norm": 0.19333600997924805, |
| "learning_rate": 2.350633511011511e-06, |
| "loss": 0.2987373352050781, |
| "memory(GiB)": 133.17, |
| "step": 2020, |
| "token_acc": 0.890084898990847, |
| "train_speed(iter/s)": 0.067969 |
| }, |
| { |
| "epoch": 2.371194379391101, |
| "grad_norm": 0.18590733408927917, |
| "learning_rate": 2.309208405772221e-06, |
| "loss": 0.3060739278793335, |
| "memory(GiB)": 133.17, |
| "step": 2025, |
| "token_acc": 0.894386606817296, |
| "train_speed(iter/s)": 0.06797 |
| }, |
| { |
| "epoch": 2.3770491803278686, |
| "grad_norm": 0.19246318936347961, |
| "learning_rate": 2.2681038721519768e-06, |
| "loss": 0.3093658208847046, |
| "memory(GiB)": 133.17, |
| "step": 2030, |
| "token_acc": 0.8894458411573517, |
| "train_speed(iter/s)": 0.067969 |
| }, |
| { |
| "epoch": 2.382903981264637, |
| "grad_norm": 0.19371892511844635, |
| "learning_rate": 2.227321623491563e-06, |
| "loss": 0.2991630077362061, |
| "memory(GiB)": 133.17, |
| "step": 2035, |
| "token_acc": 0.8873450543890716, |
| "train_speed(iter/s)": 0.067968 |
| }, |
| { |
| "epoch": 2.388758782201405, |
| "grad_norm": 0.17911982536315918, |
| "learning_rate": 2.186863359698108e-06, |
| "loss": 0.29452369213104246, |
| "memory(GiB)": 133.17, |
| "step": 2040, |
| "token_acc": 0.9101415057216162, |
| "train_speed(iter/s)": 0.067967 |
| }, |
| { |
| "epoch": 2.3946135831381734, |
| "grad_norm": 0.19897328317165375, |
| "learning_rate": 2.1467307671742377e-06, |
| "loss": 0.2978281736373901, |
| "memory(GiB)": 133.17, |
| "step": 2045, |
| "token_acc": 0.8880359089210048, |
| "train_speed(iter/s)": 0.067968 |
| }, |
| { |
| "epoch": 2.4004683840749417, |
| "grad_norm": 0.19614428281784058, |
| "learning_rate": 2.106925518747779e-06, |
| "loss": 0.2917934417724609, |
| "memory(GiB)": 133.17, |
| "step": 2050, |
| "token_acc": 0.892301005603362, |
| "train_speed(iter/s)": 0.067968 |
| }, |
| { |
| "epoch": 2.4063231850117095, |
| "grad_norm": 0.18466618657112122, |
| "learning_rate": 2.06744927360202e-06, |
| "loss": 0.2950620651245117, |
| "memory(GiB)": 133.17, |
| "step": 2055, |
| "token_acc": 0.8911625268446858, |
| "train_speed(iter/s)": 0.067964 |
| }, |
| { |
| "epoch": 2.4121779859484778, |
| "grad_norm": 0.19196145236492157, |
| "learning_rate": 2.0283036772065712e-06, |
| "loss": 0.29646165370941163, |
| "memory(GiB)": 133.17, |
| "step": 2060, |
| "token_acc": 0.8943602932370165, |
| "train_speed(iter/s)": 0.067964 |
| }, |
| { |
| "epoch": 2.418032786885246, |
| "grad_norm": 0.20200960338115692, |
| "learning_rate": 1.9894903612487683e-06, |
| "loss": 0.30394654273986815, |
| "memory(GiB)": 133.17, |
| "step": 2065, |
| "token_acc": 0.882068843029542, |
| "train_speed(iter/s)": 0.067964 |
| }, |
| { |
| "epoch": 2.423887587822014, |
| "grad_norm": 0.19281496107578278, |
| "learning_rate": 1.9510109435656457e-06, |
| "loss": 0.30283074378967284, |
| "memory(GiB)": 133.17, |
| "step": 2070, |
| "token_acc": 0.8936107928433829, |
| "train_speed(iter/s)": 0.067965 |
| }, |
| { |
| "epoch": 2.429742388758782, |
| "grad_norm": 0.1977166384458542, |
| "learning_rate": 1.9128670280765283e-06, |
| "loss": 0.30489649772644045, |
| "memory(GiB)": 133.17, |
| "step": 2075, |
| "token_acc": 0.8869975460007921, |
| "train_speed(iter/s)": 0.067962 |
| }, |
| { |
| "epoch": 2.4355971896955504, |
| "grad_norm": 0.185228630900383, |
| "learning_rate": 1.8750602047161603e-06, |
| "loss": 0.29401373863220215, |
| "memory(GiB)": 133.17, |
| "step": 2080, |
| "token_acc": 0.8979298187696017, |
| "train_speed(iter/s)": 0.067961 |
| }, |
| { |
| "epoch": 2.4414519906323187, |
| "grad_norm": 0.19245509803295135, |
| "learning_rate": 1.8375920493684264e-06, |
| "loss": 0.3006903171539307, |
| "memory(GiB)": 133.17, |
| "step": 2085, |
| "token_acc": 0.8867607400439009, |
| "train_speed(iter/s)": 0.067964 |
| }, |
| { |
| "epoch": 2.4473067915690865, |
| "grad_norm": 0.19419154524803162, |
| "learning_rate": 1.8004641238006815e-06, |
| "loss": 0.29811155796051025, |
| "memory(GiB)": 133.17, |
| "step": 2090, |
| "token_acc": 0.8943640794642241, |
| "train_speed(iter/s)": 0.067962 |
| }, |
| { |
| "epoch": 2.4531615925058547, |
| "grad_norm": 0.1823989599943161, |
| "learning_rate": 1.7636779755986443e-06, |
| "loss": 0.3039386749267578, |
| "memory(GiB)": 133.17, |
| "step": 2095, |
| "token_acc": 0.8863327040435527, |
| "train_speed(iter/s)": 0.067959 |
| }, |
| { |
| "epoch": 2.459016393442623, |
| "grad_norm": 0.1855112761259079, |
| "learning_rate": 1.7272351381018792e-06, |
| "loss": 0.3009587287902832, |
| "memory(GiB)": 133.17, |
| "step": 2100, |
| "token_acc": 0.8790456780659275, |
| "train_speed(iter/s)": 0.067959 |
| }, |
| { |
| "epoch": 2.4648711943793913, |
| "grad_norm": 0.18876492977142334, |
| "learning_rate": 1.6911371303399048e-06, |
| "loss": 0.28830153942108155, |
| "memory(GiB)": 133.17, |
| "step": 2105, |
| "token_acc": 0.8928979046201769, |
| "train_speed(iter/s)": 0.067956 |
| }, |
| { |
| "epoch": 2.470725995316159, |
| "grad_norm": 0.19157980382442474, |
| "learning_rate": 1.6553854569688632e-06, |
| "loss": 0.30360212326049807, |
| "memory(GiB)": 133.17, |
| "step": 2110, |
| "token_acc": 0.8886916557875393, |
| "train_speed(iter/s)": 0.067954 |
| }, |
| { |
| "epoch": 2.4765807962529274, |
| "grad_norm": 0.19697441160678864, |
| "learning_rate": 1.619981608208796e-06, |
| "loss": 0.30350236892700194, |
| "memory(GiB)": 133.17, |
| "step": 2115, |
| "token_acc": 0.8755962030416897, |
| "train_speed(iter/s)": 0.067954 |
| }, |
| { |
| "epoch": 2.4824355971896956, |
| "grad_norm": 0.19516149163246155, |
| "learning_rate": 1.584927059781548e-06, |
| "loss": 0.3021031379699707, |
| "memory(GiB)": 133.17, |
| "step": 2120, |
| "token_acc": 0.8850454875188026, |
| "train_speed(iter/s)": 0.067954 |
| }, |
| { |
| "epoch": 2.4882903981264635, |
| "grad_norm": 0.19685259461402893, |
| "learning_rate": 1.5502232728492362e-06, |
| "loss": 0.29403057098388674, |
| "memory(GiB)": 133.17, |
| "step": 2125, |
| "token_acc": 0.8935650598835121, |
| "train_speed(iter/s)": 0.067954 |
| }, |
| { |
| "epoch": 2.4941451990632317, |
| "grad_norm": 0.1944494992494583, |
| "learning_rate": 1.5158716939533524e-06, |
| "loss": 0.303509259223938, |
| "memory(GiB)": 133.17, |
| "step": 2130, |
| "token_acc": 0.8899391835374175, |
| "train_speed(iter/s)": 0.067955 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.18963733315467834, |
| "learning_rate": 1.4818737549544725e-06, |
| "loss": 0.3023875951766968, |
| "memory(GiB)": 133.17, |
| "step": 2135, |
| "token_acc": 0.8820025957494603, |
| "train_speed(iter/s)": 0.067956 |
| }, |
| { |
| "epoch": 2.5058548009367683, |
| "grad_norm": 0.2587365210056305, |
| "learning_rate": 1.448230872972568e-06, |
| "loss": 0.29965691566467284, |
| "memory(GiB)": 133.17, |
| "step": 2140, |
| "token_acc": 0.895712561145832, |
| "train_speed(iter/s)": 0.067953 |
| }, |
| { |
| "epoch": 2.5117096018735365, |
| "grad_norm": 0.1916307806968689, |
| "learning_rate": 1.4149444503279297e-06, |
| "loss": 0.3064573764801025, |
| "memory(GiB)": 133.17, |
| "step": 2145, |
| "token_acc": 0.8827446402570668, |
| "train_speed(iter/s)": 0.067953 |
| }, |
| { |
| "epoch": 2.5175644028103044, |
| "grad_norm": 0.18983621895313263, |
| "learning_rate": 1.382015874482735e-06, |
| "loss": 0.2994706630706787, |
| "memory(GiB)": 133.17, |
| "step": 2150, |
| "token_acc": 0.889184252992907, |
| "train_speed(iter/s)": 0.067954 |
| }, |
| { |
| "epoch": 2.5234192037470726, |
| "grad_norm": 0.19152384996414185, |
| "learning_rate": 1.3494465179831895e-06, |
| "loss": 0.29698777198791504, |
| "memory(GiB)": 133.17, |
| "step": 2155, |
| "token_acc": 0.8862135400891181, |
| "train_speed(iter/s)": 0.067954 |
| }, |
| { |
| "epoch": 2.529274004683841, |
| "grad_norm": 0.17604193091392517, |
| "learning_rate": 1.3172377384023393e-06, |
| "loss": 0.2926321029663086, |
| "memory(GiB)": 133.17, |
| "step": 2160, |
| "token_acc": 0.891226010077476, |
| "train_speed(iter/s)": 0.067956 |
| }, |
| { |
| "epoch": 2.5351288056206087, |
| "grad_norm": 0.18896515667438507, |
| "learning_rate": 1.2853908782834722e-06, |
| "loss": 0.29559669494628904, |
| "memory(GiB)": 133.17, |
| "step": 2165, |
| "token_acc": 0.8984888499945305, |
| "train_speed(iter/s)": 0.067953 |
| }, |
| { |
| "epoch": 2.540983606557377, |
| "grad_norm": 0.18624024093151093, |
| "learning_rate": 1.2539072650841523e-06, |
| "loss": 0.30248537063598635, |
| "memory(GiB)": 133.17, |
| "step": 2170, |
| "token_acc": 0.8883391871864846, |
| "train_speed(iter/s)": 0.067955 |
| }, |
| { |
| "epoch": 2.5468384074941453, |
| "grad_norm": 0.18639948964118958, |
| "learning_rate": 1.2227882111209011e-06, |
| "loss": 0.3061221599578857, |
| "memory(GiB)": 133.17, |
| "step": 2175, |
| "token_acc": 0.8831800956700007, |
| "train_speed(iter/s)": 0.067954 |
| }, |
| { |
| "epoch": 2.552693208430913, |
| "grad_norm": 0.4918629229068756, |
| "learning_rate": 1.1920350135144898e-06, |
| "loss": 0.29971723556518554, |
| "memory(GiB)": 133.17, |
| "step": 2180, |
| "token_acc": 0.894886042214037, |
| "train_speed(iter/s)": 0.067954 |
| }, |
| { |
| "epoch": 2.5585480093676813, |
| "grad_norm": 0.18684136867523193, |
| "learning_rate": 1.1616489541358678e-06, |
| "loss": 0.29734086990356445, |
| "memory(GiB)": 133.17, |
| "step": 2185, |
| "token_acc": 0.8841362452439526, |
| "train_speed(iter/s)": 0.067954 |
| }, |
| { |
| "epoch": 2.5644028103044496, |
| "grad_norm": 0.18606062233448029, |
| "learning_rate": 1.1316312995527424e-06, |
| "loss": 0.3008298873901367, |
| "memory(GiB)": 133.17, |
| "step": 2190, |
| "token_acc": 0.8897950269865535, |
| "train_speed(iter/s)": 0.067955 |
| }, |
| { |
| "epoch": 2.570257611241218, |
| "grad_norm": 0.1816904991865158, |
| "learning_rate": 1.1019833009767744e-06, |
| "loss": 0.29885680675506593, |
| "memory(GiB)": 133.17, |
| "step": 2195, |
| "token_acc": 0.8878981843540634, |
| "train_speed(iter/s)": 0.067954 |
| }, |
| { |
| "epoch": 2.576112412177986, |
| "grad_norm": 0.19094757735729218, |
| "learning_rate": 1.072706194211426e-06, |
| "loss": 0.30043601989746094, |
| "memory(GiB)": 133.17, |
| "step": 2200, |
| "token_acc": 0.8938432293837546, |
| "train_speed(iter/s)": 0.067955 |
| }, |
| { |
| "epoch": 2.581967213114754, |
| "grad_norm": 0.1899169534444809, |
| "learning_rate": 1.0438011996004581e-06, |
| "loss": 0.2995189905166626, |
| "memory(GiB)": 133.17, |
| "step": 2205, |
| "token_acc": 0.8880722202892788, |
| "train_speed(iter/s)": 0.067955 |
| }, |
| { |
| "epoch": 2.5878220140515222, |
| "grad_norm": 0.19649627804756165, |
| "learning_rate": 1.0152695219770558e-06, |
| "loss": 0.2872016429901123, |
| "memory(GiB)": 133.17, |
| "step": 2210, |
| "token_acc": 0.8980766878765166, |
| "train_speed(iter/s)": 0.067953 |
| }, |
| { |
| "epoch": 2.5936768149882905, |
| "grad_norm": 0.19790223240852356, |
| "learning_rate": 9.871123506136037e-07, |
| "loss": 0.29386420249938966, |
| "memory(GiB)": 133.17, |
| "step": 2215, |
| "token_acc": 0.9006561928197363, |
| "train_speed(iter/s)": 0.067953 |
| }, |
| { |
| "epoch": 2.5995316159250583, |
| "grad_norm": 0.18190743029117584, |
| "learning_rate": 9.593308591721274e-07, |
| "loss": 0.2908626079559326, |
| "memory(GiB)": 133.17, |
| "step": 2220, |
| "token_acc": 0.8927072444113778, |
| "train_speed(iter/s)": 0.067953 |
| }, |
| { |
| "epoch": 2.6053864168618266, |
| "grad_norm": 0.1853610724210739, |
| "learning_rate": 9.319262056553602e-07, |
| "loss": 0.300918436050415, |
| "memory(GiB)": 133.17, |
| "step": 2225, |
| "token_acc": 0.8953082310083849, |
| "train_speed(iter/s)": 0.067951 |
| }, |
| { |
| "epoch": 2.611241217798595, |
| "grad_norm": 0.19064903259277344, |
| "learning_rate": 9.048995323584764e-07, |
| "loss": 0.3040909767150879, |
| "memory(GiB)": 133.17, |
| "step": 2230, |
| "token_acc": 0.8990558015887316, |
| "train_speed(iter/s)": 0.06795 |
| }, |
| { |
| "epoch": 2.617096018735363, |
| "grad_norm": 0.18238228559494019, |
| "learning_rate": 8.78251965821485e-07, |
| "loss": 0.2880122184753418, |
| "memory(GiB)": 133.17, |
| "step": 2235, |
| "token_acc": 0.8914627457335544, |
| "train_speed(iter/s)": 0.067949 |
| }, |
| { |
| "epoch": 2.6229508196721314, |
| "grad_norm": 0.18738383054733276, |
| "learning_rate": 8.519846167822665e-07, |
| "loss": 0.2943183422088623, |
| "memory(GiB)": 133.17, |
| "step": 2240, |
| "token_acc": 0.9038425869666715, |
| "train_speed(iter/s)": 0.067949 |
| }, |
| { |
| "epoch": 2.628805620608899, |
| "grad_norm": 0.1841094046831131, |
| "learning_rate": 8.260985801302734e-07, |
| "loss": 0.2850812911987305, |
| "memory(GiB)": 133.17, |
| "step": 2245, |
| "token_acc": 0.8894028305143251, |
| "train_speed(iter/s)": 0.067948 |
| }, |
| { |
| "epoch": 2.6346604215456675, |
| "grad_norm": 0.18611599504947662, |
| "learning_rate": 8.005949348608977e-07, |
| "loss": 0.2972105979919434, |
| "memory(GiB)": 133.17, |
| "step": 2250, |
| "token_acc": 0.8871099881800386, |
| "train_speed(iter/s)": 0.067948 |
| }, |
| { |
| "epoch": 2.6405152224824358, |
| "grad_norm": 0.1857517957687378, |
| "learning_rate": 7.754747440304911e-07, |
| "loss": 0.30115318298339844, |
| "memory(GiB)": 133.17, |
| "step": 2255, |
| "token_acc": 0.8863566925844406, |
| "train_speed(iter/s)": 0.067949 |
| }, |
| { |
| "epoch": 2.6463700234192036, |
| "grad_norm": 0.19214338064193726, |
| "learning_rate": 7.507390547120541e-07, |
| "loss": 0.29389874935150145, |
| "memory(GiB)": 133.17, |
| "step": 2260, |
| "token_acc": 0.8897345572130235, |
| "train_speed(iter/s)": 0.067951 |
| }, |
| { |
| "epoch": 2.652224824355972, |
| "grad_norm": 0.18817630410194397, |
| "learning_rate": 7.263888979515954e-07, |
| "loss": 0.3036650657653809, |
| "memory(GiB)": 133.17, |
| "step": 2265, |
| "token_acc": 0.8849702240287362, |
| "train_speed(iter/s)": 0.067952 |
| }, |
| { |
| "epoch": 2.65807962529274, |
| "grad_norm": 0.18061281740665436, |
| "learning_rate": 7.024252887251548e-07, |
| "loss": 0.29589831829071045, |
| "memory(GiB)": 133.17, |
| "step": 2270, |
| "token_acc": 0.8896930575764528, |
| "train_speed(iter/s)": 0.067951 |
| }, |
| { |
| "epoch": 2.663934426229508, |
| "grad_norm": 0.18534523248672485, |
| "learning_rate": 6.788492258964896e-07, |
| "loss": 0.29939701557159426, |
| "memory(GiB)": 133.17, |
| "step": 2275, |
| "token_acc": 0.8869800488330657, |
| "train_speed(iter/s)": 0.067951 |
| }, |
| { |
| "epoch": 2.669789227166276, |
| "grad_norm": 0.19118830561637878, |
| "learning_rate": 6.556616921754489e-07, |
| "loss": 0.29693875312805174, |
| "memory(GiB)": 133.17, |
| "step": 2280, |
| "token_acc": 0.8911168593654094, |
| "train_speed(iter/s)": 0.067951 |
| }, |
| { |
| "epoch": 2.6756440281030445, |
| "grad_norm": 0.18963268399238586, |
| "learning_rate": 6.328636540770028e-07, |
| "loss": 0.3002347707748413, |
| "memory(GiB)": 133.17, |
| "step": 2285, |
| "token_acc": 0.889527246797438, |
| "train_speed(iter/s)": 0.06795 |
| }, |
| { |
| "epoch": 2.6814988290398127, |
| "grad_norm": 0.18357062339782715, |
| "learning_rate": 6.10456061880963e-07, |
| "loss": 0.304398250579834, |
| "memory(GiB)": 133.17, |
| "step": 2290, |
| "token_acc": 0.8937302240569359, |
| "train_speed(iter/s)": 0.067949 |
| }, |
| { |
| "epoch": 2.687353629976581, |
| "grad_norm": 0.18504291772842407, |
| "learning_rate": 5.884398495923727e-07, |
| "loss": 0.29355425834655763, |
| "memory(GiB)": 133.17, |
| "step": 2295, |
| "token_acc": 0.8842415418528523, |
| "train_speed(iter/s)": 0.06795 |
| }, |
| { |
| "epoch": 2.693208430913349, |
| "grad_norm": 0.1993853747844696, |
| "learning_rate": 5.668159349025649e-07, |
| "loss": 0.3113893985748291, |
| "memory(GiB)": 133.17, |
| "step": 2300, |
| "token_acc": 0.8775886656746031, |
| "train_speed(iter/s)": 0.067949 |
| }, |
| { |
| "epoch": 2.699063231850117, |
| "grad_norm": 0.18858520686626434, |
| "learning_rate": 5.455852191509214e-07, |
| "loss": 0.3054765224456787, |
| "memory(GiB)": 133.17, |
| "step": 2305, |
| "token_acc": 0.8816947533601692, |
| "train_speed(iter/s)": 0.067949 |
| }, |
| { |
| "epoch": 2.7049180327868854, |
| "grad_norm": 0.19768975675106049, |
| "learning_rate": 5.247485872873026e-07, |
| "loss": 0.29274706840515136, |
| "memory(GiB)": 133.17, |
| "step": 2310, |
| "token_acc": 0.8946102350213514, |
| "train_speed(iter/s)": 0.06795 |
| }, |
| { |
| "epoch": 2.710772833723653, |
| "grad_norm": 0.17342238128185272, |
| "learning_rate": 5.043069078351526e-07, |
| "loss": 0.2879345893859863, |
| "memory(GiB)": 133.17, |
| "step": 2315, |
| "token_acc": 0.8956176935229068, |
| "train_speed(iter/s)": 0.067951 |
| }, |
| { |
| "epoch": 2.7166276346604215, |
| "grad_norm": 0.18019071221351624, |
| "learning_rate": 4.842610328552999e-07, |
| "loss": 0.29531962871551515, |
| "memory(GiB)": 133.17, |
| "step": 2320, |
| "token_acc": 0.8930268304142333, |
| "train_speed(iter/s)": 0.06795 |
| }, |
| { |
| "epoch": 2.7224824355971897, |
| "grad_norm": 0.1835058629512787, |
| "learning_rate": 4.6461179791044806e-07, |
| "loss": 0.2953210353851318, |
| "memory(GiB)": 133.21, |
| "step": 2325, |
| "token_acc": 0.8869091207514772, |
| "train_speed(iter/s)": 0.067952 |
| }, |
| { |
| "epoch": 2.728337236533958, |
| "grad_norm": 0.1827324628829956, |
| "learning_rate": 4.453600220303378e-07, |
| "loss": 0.2804730415344238, |
| "memory(GiB)": 133.21, |
| "step": 2330, |
| "token_acc": 0.8874964005358507, |
| "train_speed(iter/s)": 0.067953 |
| }, |
| { |
| "epoch": 2.7341920374707263, |
| "grad_norm": 0.18949875235557556, |
| "learning_rate": 4.2650650767761535e-07, |
| "loss": 0.2842918872833252, |
| "memory(GiB)": 133.21, |
| "step": 2335, |
| "token_acc": 0.8981613154267605, |
| "train_speed(iter/s)": 0.067952 |
| }, |
| { |
| "epoch": 2.740046838407494, |
| "grad_norm": 0.2092583179473877, |
| "learning_rate": 4.0805204071437953e-07, |
| "loss": 0.3071431636810303, |
| "memory(GiB)": 133.21, |
| "step": 2340, |
| "token_acc": 0.886714704322126, |
| "train_speed(iter/s)": 0.06795 |
| }, |
| { |
| "epoch": 2.7459016393442623, |
| "grad_norm": 0.18031486868858337, |
| "learning_rate": 3.899973903694243e-07, |
| "loss": 0.30032360553741455, |
| "memory(GiB)": 133.21, |
| "step": 2345, |
| "token_acc": 0.8907299680407984, |
| "train_speed(iter/s)": 0.06795 |
| }, |
| { |
| "epoch": 2.7517564402810306, |
| "grad_norm": 0.18996600806713104, |
| "learning_rate": 3.72343309206179e-07, |
| "loss": 0.2920222759246826, |
| "memory(GiB)": 133.21, |
| "step": 2350, |
| "token_acc": 0.8913125942460162, |
| "train_speed(iter/s)": 0.067948 |
| }, |
| { |
| "epoch": 2.7576112412177984, |
| "grad_norm": 0.18651233613491058, |
| "learning_rate": 3.55090533091339e-07, |
| "loss": 0.2933474063873291, |
| "memory(GiB)": 133.21, |
| "step": 2355, |
| "token_acc": 0.9025793311463176, |
| "train_speed(iter/s)": 0.067946 |
| }, |
| { |
| "epoch": 2.7634660421545667, |
| "grad_norm": 0.17747479677200317, |
| "learning_rate": 3.382397811641858e-07, |
| "loss": 0.2873265266418457, |
| "memory(GiB)": 133.21, |
| "step": 2360, |
| "token_acc": 0.8948029740479362, |
| "train_speed(iter/s)": 0.067947 |
| }, |
| { |
| "epoch": 2.769320843091335, |
| "grad_norm": 0.17965154349803925, |
| "learning_rate": 3.217917558066241e-07, |
| "loss": 0.2922650337219238, |
| "memory(GiB)": 133.21, |
| "step": 2365, |
| "token_acc": 0.886650070990299, |
| "train_speed(iter/s)": 0.067947 |
| }, |
| { |
| "epoch": 2.775175644028103, |
| "grad_norm": 0.17902247607707977, |
| "learning_rate": 3.057471426138958e-07, |
| "loss": 0.3062438488006592, |
| "memory(GiB)": 133.21, |
| "step": 2370, |
| "token_acc": 0.8800339720197485, |
| "train_speed(iter/s)": 0.067946 |
| }, |
| { |
| "epoch": 2.781030444964871, |
| "grad_norm": 0.17590953409671783, |
| "learning_rate": 2.901066103660033e-07, |
| "loss": 0.29376084804534913, |
| "memory(GiB)": 133.21, |
| "step": 2375, |
| "token_acc": 0.8933526766467255, |
| "train_speed(iter/s)": 0.067946 |
| }, |
| { |
| "epoch": 2.7868852459016393, |
| "grad_norm": 0.19265642762184143, |
| "learning_rate": 2.7487081099983435e-07, |
| "loss": 0.3061210155487061, |
| "memory(GiB)": 133.21, |
| "step": 2380, |
| "token_acc": 0.8897680154530525, |
| "train_speed(iter/s)": 0.067947 |
| }, |
| { |
| "epoch": 2.7927400468384076, |
| "grad_norm": 0.18283043801784515, |
| "learning_rate": 2.6004037958199167e-07, |
| "loss": 0.2898393154144287, |
| "memory(GiB)": 133.21, |
| "step": 2385, |
| "token_acc": 0.9019573328471696, |
| "train_speed(iter/s)": 0.067947 |
| }, |
| { |
| "epoch": 2.798594847775176, |
| "grad_norm": 0.18570415675640106, |
| "learning_rate": 2.4561593428231165e-07, |
| "loss": 0.29611454010009763, |
| "memory(GiB)": 133.21, |
| "step": 2390, |
| "token_acc": 0.9104894052586534, |
| "train_speed(iter/s)": 0.067945 |
| }, |
| { |
| "epoch": 2.8044496487119437, |
| "grad_norm": 0.18174812197685242, |
| "learning_rate": 2.3159807634811182e-07, |
| "loss": 0.28598248958587646, |
| "memory(GiB)": 133.21, |
| "step": 2395, |
| "token_acc": 0.8965343061596744, |
| "train_speed(iter/s)": 0.067945 |
| }, |
| { |
| "epoch": 2.810304449648712, |
| "grad_norm": 0.18527300655841827, |
| "learning_rate": 2.1798739007911517e-07, |
| "loss": 0.3005537986755371, |
| "memory(GiB)": 133.21, |
| "step": 2400, |
| "token_acc": 0.887049760238975, |
| "train_speed(iter/s)": 0.067943 |
| }, |
| { |
| "epoch": 2.8161592505854802, |
| "grad_norm": 0.17769944667816162, |
| "learning_rate": 2.0478444280310206e-07, |
| "loss": 0.2945347785949707, |
| "memory(GiB)": 133.21, |
| "step": 2405, |
| "token_acc": 0.8905158466381549, |
| "train_speed(iter/s)": 0.067944 |
| }, |
| { |
| "epoch": 2.822014051522248, |
| "grad_norm": 0.22059805691242218, |
| "learning_rate": 1.919897848522656e-07, |
| "loss": 0.2934718132019043, |
| "memory(GiB)": 133.21, |
| "step": 2410, |
| "token_acc": 0.8902092807074844, |
| "train_speed(iter/s)": 0.067943 |
| }, |
| { |
| "epoch": 2.8278688524590163, |
| "grad_norm": 0.18694834411144257, |
| "learning_rate": 1.796039495402646e-07, |
| "loss": 0.2984294414520264, |
| "memory(GiB)": 133.21, |
| "step": 2415, |
| "token_acc": 0.8962242022599117, |
| "train_speed(iter/s)": 0.067942 |
| }, |
| { |
| "epoch": 2.8337236533957846, |
| "grad_norm": 0.18271034955978394, |
| "learning_rate": 1.6762745313999795e-07, |
| "loss": 0.3036228895187378, |
| "memory(GiB)": 133.21, |
| "step": 2420, |
| "token_acc": 0.876532044285046, |
| "train_speed(iter/s)": 0.06794 |
| }, |
| { |
| "epoch": 2.839578454332553, |
| "grad_norm": 0.17917729914188385, |
| "learning_rate": 1.5606079486208846e-07, |
| "loss": 0.29344632625579836, |
| "memory(GiB)": 133.21, |
| "step": 2425, |
| "token_acc": 0.8902957800547429, |
| "train_speed(iter/s)": 0.06794 |
| }, |
| { |
| "epoch": 2.845433255269321, |
| "grad_norm": 0.1874186247587204, |
| "learning_rate": 1.449044568340663e-07, |
| "loss": 0.3013723373413086, |
| "memory(GiB)": 133.21, |
| "step": 2430, |
| "token_acc": 0.8944912877684091, |
| "train_speed(iter/s)": 0.06794 |
| }, |
| { |
| "epoch": 2.851288056206089, |
| "grad_norm": 0.18850503861904144, |
| "learning_rate": 1.3415890408027932e-07, |
| "loss": 0.29042725563049315, |
| "memory(GiB)": 133.21, |
| "step": 2435, |
| "token_acc": 0.8900198911125016, |
| "train_speed(iter/s)": 0.067939 |
| }, |
| { |
| "epoch": 2.857142857142857, |
| "grad_norm": 0.1918351948261261, |
| "learning_rate": 1.2382458450250657e-07, |
| "loss": 0.30360941886901854, |
| "memory(GiB)": 133.21, |
| "step": 2440, |
| "token_acc": 0.8830958974326918, |
| "train_speed(iter/s)": 0.06794 |
| }, |
| { |
| "epoch": 2.8629976580796255, |
| "grad_norm": 0.17968802154064178, |
| "learning_rate": 1.1390192886129304e-07, |
| "loss": 0.29129633903503416, |
| "memory(GiB)": 133.21, |
| "step": 2445, |
| "token_acc": 0.9028775730901043, |
| "train_speed(iter/s)": 0.06794 |
| }, |
| { |
| "epoch": 2.8688524590163933, |
| "grad_norm": 0.19108013808727264, |
| "learning_rate": 1.0439135075798634e-07, |
| "loss": 0.30793027877807616, |
| "memory(GiB)": 133.21, |
| "step": 2450, |
| "token_acc": 0.8880401770150538, |
| "train_speed(iter/s)": 0.067942 |
| }, |
| { |
| "epoch": 2.8747072599531616, |
| "grad_norm": 0.18335837125778198, |
| "learning_rate": 9.529324661750494e-08, |
| "loss": 0.301357364654541, |
| "memory(GiB)": 133.21, |
| "step": 2455, |
| "token_acc": 0.8833441771706613, |
| "train_speed(iter/s)": 0.067941 |
| }, |
| { |
| "epoch": 2.88056206088993, |
| "grad_norm": 0.18961112201213837, |
| "learning_rate": 8.6607995671808e-08, |
| "loss": 0.29690849781036377, |
| "memory(GiB)": 133.21, |
| "step": 2460, |
| "token_acc": 0.8882351107925328, |
| "train_speed(iter/s)": 0.067941 |
| }, |
| { |
| "epoch": 2.8864168618266977, |
| "grad_norm": 0.22844481468200684, |
| "learning_rate": 7.833595994409248e-08, |
| "loss": 0.2876168727874756, |
| "memory(GiB)": 133.21, |
| "step": 2465, |
| "token_acc": 0.888989677822959, |
| "train_speed(iter/s)": 0.06794 |
| }, |
| { |
| "epoch": 2.892271662763466, |
| "grad_norm": 0.19000564515590668, |
| "learning_rate": 7.047748423370193e-08, |
| "loss": 0.3021047353744507, |
| "memory(GiB)": 133.21, |
| "step": 2470, |
| "token_acc": 0.8849937208945455, |
| "train_speed(iter/s)": 0.067942 |
| }, |
| { |
| "epoch": 2.898126463700234, |
| "grad_norm": 0.18740171194076538, |
| "learning_rate": 6.303289610175233e-08, |
| "loss": 0.29048540592193606, |
| "memory(GiB)": 133.21, |
| "step": 2475, |
| "token_acc": 0.8921316614420063, |
| "train_speed(iter/s)": 0.067942 |
| }, |
| { |
| "epoch": 2.9039812646370025, |
| "grad_norm": 0.18983155488967896, |
| "learning_rate": 5.6002505857480906e-08, |
| "loss": 0.2961090326309204, |
| "memory(GiB)": 133.21, |
| "step": 2480, |
| "token_acc": 0.8925172239013309, |
| "train_speed(iter/s)": 0.067943 |
| }, |
| { |
| "epoch": 2.9098360655737707, |
| "grad_norm": 0.1831265538930893, |
| "learning_rate": 4.938660654530969e-08, |
| "loss": 0.3080202579498291, |
| "memory(GiB)": 133.21, |
| "step": 2485, |
| "token_acc": 0.8858930624281501, |
| "train_speed(iter/s)": 0.067943 |
| }, |
| { |
| "epoch": 2.9156908665105385, |
| "grad_norm": 0.19748179614543915, |
| "learning_rate": 4.318547393263317e-08, |
| "loss": 0.30983719825744627, |
| "memory(GiB)": 133.21, |
| "step": 2490, |
| "token_acc": 0.8897056301087475, |
| "train_speed(iter/s)": 0.067942 |
| }, |
| { |
| "epoch": 2.921545667447307, |
| "grad_norm": 0.18569178879261017, |
| "learning_rate": 3.739936649832188e-08, |
| "loss": 0.29312853813171386, |
| "memory(GiB)": 133.21, |
| "step": 2495, |
| "token_acc": 0.8924126241525105, |
| "train_speed(iter/s)": 0.067945 |
| }, |
| { |
| "epoch": 2.927400468384075, |
| "grad_norm": 0.1950037181377411, |
| "learning_rate": 3.2028525421946563e-08, |
| "loss": 0.2936956167221069, |
| "memory(GiB)": 133.21, |
| "step": 2500, |
| "token_acc": 0.9020274516704794, |
| "train_speed(iter/s)": 0.067945 |
| }, |
| { |
| "epoch": 2.933255269320843, |
| "grad_norm": 0.18167735636234283, |
| "learning_rate": 2.70731745737296e-08, |
| "loss": 0.2973939418792725, |
| "memory(GiB)": 133.21, |
| "step": 2505, |
| "token_acc": 0.8934657981473672, |
| "train_speed(iter/s)": 0.067946 |
| }, |
| { |
| "epoch": 2.939110070257611, |
| "grad_norm": 0.18503886461257935, |
| "learning_rate": 2.2533520505211294e-08, |
| "loss": 0.29192218780517576, |
| "memory(GiB)": 133.21, |
| "step": 2510, |
| "token_acc": 0.8951111388611389, |
| "train_speed(iter/s)": 0.067947 |
| }, |
| { |
| "epoch": 2.9449648711943794, |
| "grad_norm": 0.17936980724334717, |
| "learning_rate": 1.8409752440639027e-08, |
| "loss": 0.28421769142150877, |
| "memory(GiB)": 133.21, |
| "step": 2515, |
| "token_acc": 0.8924425595173032, |
| "train_speed(iter/s)": 0.067947 |
| }, |
| { |
| "epoch": 2.9508196721311473, |
| "grad_norm": 0.18841403722763062, |
| "learning_rate": 1.470204226908134e-08, |
| "loss": 0.30081515312194823, |
| "memory(GiB)": 133.21, |
| "step": 2520, |
| "token_acc": 0.894779086363537, |
| "train_speed(iter/s)": 0.067944 |
| }, |
| { |
| "epoch": 2.9566744730679155, |
| "grad_norm": 0.19020894169807434, |
| "learning_rate": 1.1410544537263645e-08, |
| "loss": 0.3081362247467041, |
| "memory(GiB)": 133.21, |
| "step": 2525, |
| "token_acc": 0.8934638595786859, |
| "train_speed(iter/s)": 0.067943 |
| }, |
| { |
| "epoch": 2.962529274004684, |
| "grad_norm": 0.24749897420406342, |
| "learning_rate": 8.535396443124511e-09, |
| "loss": 0.2878671884536743, |
| "memory(GiB)": 133.21, |
| "step": 2530, |
| "token_acc": 0.8913681995528473, |
| "train_speed(iter/s)": 0.067944 |
| }, |
| { |
| "epoch": 2.968384074941452, |
| "grad_norm": 0.17989581823349, |
| "learning_rate": 6.076717830098e-09, |
| "loss": 0.2899226903915405, |
| "memory(GiB)": 133.21, |
| "step": 2535, |
| "token_acc": 0.8996739041991876, |
| "train_speed(iter/s)": 0.067943 |
| }, |
| { |
| "epoch": 2.9742388758782203, |
| "grad_norm": 0.18506699800491333, |
| "learning_rate": 4.034611182121007e-09, |
| "loss": 0.2908132553100586, |
| "memory(GiB)": 133.21, |
| "step": 2540, |
| "token_acc": 0.8988520352276212, |
| "train_speed(iter/s)": 0.067941 |
| }, |
| { |
| "epoch": 2.980093676814988, |
| "grad_norm": 0.18510298430919647, |
| "learning_rate": 2.40916161935445e-09, |
| "loss": 0.29580187797546387, |
| "memory(GiB)": 133.21, |
| "step": 2545, |
| "token_acc": 0.8895340031302065, |
| "train_speed(iter/s)": 0.067941 |
| }, |
| { |
| "epoch": 2.9859484777517564, |
| "grad_norm": 0.18303260207176208, |
| "learning_rate": 1.2004368946427758e-09, |
| "loss": 0.2922369956970215, |
| "memory(GiB)": 133.21, |
| "step": 2550, |
| "token_acc": 0.8920757330143692, |
| "train_speed(iter/s)": 0.067941 |
| }, |
| { |
| "epoch": 2.9918032786885247, |
| "grad_norm": 0.1823214441537857, |
| "learning_rate": 4.084873906851083e-10, |
| "loss": 0.29749574661254885, |
| "memory(GiB)": 133.21, |
| "step": 2555, |
| "token_acc": 0.8995572920769461, |
| "train_speed(iter/s)": 0.067939 |
| }, |
| { |
| "epoch": 2.9976580796252925, |
| "grad_norm": 0.17787551879882812, |
| "learning_rate": 3.334611793692766e-11, |
| "loss": 0.29738173484802244, |
| "memory(GiB)": 133.21, |
| "step": 2560, |
| "token_acc": 0.903360959533883, |
| "train_speed(iter/s)": 0.067939 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 2562, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 1.0, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2476392970944512.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|