Token Classification
Transformers
TensorBoard
Safetensors
xlm-roberta
Generated from Trainer
language-identification
codeswitching
Instructions to use DerivedFunction/polyglot-tagger-v2.2 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use DerivedFunction/polyglot-tagger-v2.2 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("token-classification", model="DerivedFunction/polyglot-tagger-v2.2")# Load model directly from transformers import AutoTokenizer, AutoModelForTokenClassification tokenizer = AutoTokenizer.from_pretrained("DerivedFunction/polyglot-tagger-v2.2") model = AutoModelForTokenClassification.from_pretrained("DerivedFunction/polyglot-tagger-v2.2") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": 60000, | |
| "best_metric": 0.9576732752290649, | |
| "best_model_checkpoint": "./polyglot-tagger-v2.2/checkpoint-60000", | |
| "epoch": 2.0, | |
| "eval_steps": 2500, | |
| "global_step": 62154, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.003217865589754316, | |
| "grad_norm": 12.599648475646973, | |
| "learning_rate": 4.9920359108022015e-05, | |
| "loss": 8.340531616210937, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.006435731179508632, | |
| "grad_norm": 16.90213966369629, | |
| "learning_rate": 4.9839913762589704e-05, | |
| "loss": 2.2957839965820312, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.009653596769262947, | |
| "grad_norm": 4.964681625366211, | |
| "learning_rate": 4.975946841715739e-05, | |
| "loss": 1.2708330535888672, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.012871462359017264, | |
| "grad_norm": 9.05567455291748, | |
| "learning_rate": 4.9679023071725076e-05, | |
| "loss": 0.8362879180908203, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.01608932794877158, | |
| "grad_norm": 6.991761207580566, | |
| "learning_rate": 4.959857772629276e-05, | |
| "loss": 0.7179710388183593, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.019307193538525894, | |
| "grad_norm": 6.725792407989502, | |
| "learning_rate": 4.951813238086044e-05, | |
| "loss": 0.6352015686035156, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.022525059128280213, | |
| "grad_norm": 8.771743774414062, | |
| "learning_rate": 4.943768703542813e-05, | |
| "loss": 0.5740464782714844, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.025742924718034528, | |
| "grad_norm": 9.227128028869629, | |
| "learning_rate": 4.935724168999582e-05, | |
| "loss": 0.5493228149414062, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.028960790307788843, | |
| "grad_norm": 11.525238990783691, | |
| "learning_rate": 4.927679634456351e-05, | |
| "loss": 0.5246843719482421, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.03217865589754316, | |
| "grad_norm": 5.464803695678711, | |
| "learning_rate": 4.919635099913119e-05, | |
| "loss": 0.5209083938598633, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.03539652148729747, | |
| "grad_norm": 16.38031005859375, | |
| "learning_rate": 4.911590565369888e-05, | |
| "loss": 0.4483478546142578, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.03861438707705179, | |
| "grad_norm": 8.507850646972656, | |
| "learning_rate": 4.903546030826657e-05, | |
| "loss": 0.4467829513549805, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.04183225266680611, | |
| "grad_norm": 10.285760879516602, | |
| "learning_rate": 4.895501496283425e-05, | |
| "loss": 0.44222518920898435, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.045050118256560426, | |
| "grad_norm": 8.84359359741211, | |
| "learning_rate": 4.887456961740194e-05, | |
| "loss": 0.3752718734741211, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.04826798384631474, | |
| "grad_norm": 5.227468490600586, | |
| "learning_rate": 4.879412427196963e-05, | |
| "loss": 0.4211923217773437, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.051485849436069056, | |
| "grad_norm": 5.467648983001709, | |
| "learning_rate": 4.871367892653732e-05, | |
| "loss": 0.3770075988769531, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.05470371502582337, | |
| "grad_norm": 7.041491508483887, | |
| "learning_rate": 4.8633233581105e-05, | |
| "loss": 0.39339431762695315, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.057921580615577686, | |
| "grad_norm": 4.243207931518555, | |
| "learning_rate": 4.855278823567269e-05, | |
| "loss": 0.38666561126708987, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.061139446205332, | |
| "grad_norm": 10.47419548034668, | |
| "learning_rate": 4.847234289024038e-05, | |
| "loss": 0.37535915374755857, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.06435731179508632, | |
| "grad_norm": 7.903866291046143, | |
| "learning_rate": 4.839189754480806e-05, | |
| "loss": 0.3448226165771484, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.06757517738484063, | |
| "grad_norm": 4.801281929016113, | |
| "learning_rate": 4.831145219937574e-05, | |
| "loss": 0.3735826873779297, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.07079304297459495, | |
| "grad_norm": 6.289346218109131, | |
| "learning_rate": 4.823100685394343e-05, | |
| "loss": 0.3530204391479492, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.07401090856434926, | |
| "grad_norm": 6.725964546203613, | |
| "learning_rate": 4.815056150851112e-05, | |
| "loss": 0.37652462005615234, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.07722877415410358, | |
| "grad_norm": 14.810294151306152, | |
| "learning_rate": 4.80701161630788e-05, | |
| "loss": 0.3361575698852539, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.08044663974385789, | |
| "grad_norm": 10.04340934753418, | |
| "learning_rate": 4.798967081764649e-05, | |
| "loss": 0.33932579040527344, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.08044663974385789, | |
| "eval_accuracy": 0.9722890395064231, | |
| "eval_f1": 0.8388115225531707, | |
| "eval_loss": 0.10779489576816559, | |
| "eval_precision": 0.7998833070774257, | |
| "eval_recall": 0.8817226431355398, | |
| "eval_runtime": 47.993, | |
| "eval_samples_per_second": 520.91, | |
| "eval_steps_per_second": 28.942, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.08366450533361222, | |
| "grad_norm": 5.665945529937744, | |
| "learning_rate": 4.790922547221418e-05, | |
| "loss": 0.3147930717468262, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.08688237092336654, | |
| "grad_norm": 5.679959297180176, | |
| "learning_rate": 4.782878012678187e-05, | |
| "loss": 0.363752555847168, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.09010023651312085, | |
| "grad_norm": 5.174692153930664, | |
| "learning_rate": 4.774833478134955e-05, | |
| "loss": 0.3410203552246094, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.09331810210287517, | |
| "grad_norm": 5.585685729980469, | |
| "learning_rate": 4.766788943591724e-05, | |
| "loss": 0.3211274337768555, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.09653596769262948, | |
| "grad_norm": 5.2783684730529785, | |
| "learning_rate": 4.758744409048493e-05, | |
| "loss": 0.3486956787109375, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.0997538332823838, | |
| "grad_norm": 6.57411527633667, | |
| "learning_rate": 4.750699874505261e-05, | |
| "loss": 0.33664627075195314, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.10297169887213811, | |
| "grad_norm": 8.044240951538086, | |
| "learning_rate": 4.74265533996203e-05, | |
| "loss": 0.31327726364135744, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.10618956446189243, | |
| "grad_norm": 6.461747169494629, | |
| "learning_rate": 4.734610805418799e-05, | |
| "loss": 0.32588130950927735, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.10940743005164674, | |
| "grad_norm": 2.9853570461273193, | |
| "learning_rate": 4.726566270875568e-05, | |
| "loss": 0.28595542907714844, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.11262529564140106, | |
| "grad_norm": 3.4314420223236084, | |
| "learning_rate": 4.718521736332336e-05, | |
| "loss": 0.32015865325927734, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.11584316123115537, | |
| "grad_norm": 5.1868205070495605, | |
| "learning_rate": 4.7104772017891044e-05, | |
| "loss": 0.29926179885864257, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.11906102682090969, | |
| "grad_norm": 5.119034767150879, | |
| "learning_rate": 4.702432667245873e-05, | |
| "loss": 0.30344295501708984, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.122278892410664, | |
| "grad_norm": 10.863602638244629, | |
| "learning_rate": 4.694388132702642e-05, | |
| "loss": 0.282437686920166, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.12549675800041832, | |
| "grad_norm": 3.252387046813965, | |
| "learning_rate": 4.6863435981594104e-05, | |
| "loss": 0.2832884407043457, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.12871462359017263, | |
| "grad_norm": 12.616849899291992, | |
| "learning_rate": 4.678299063616179e-05, | |
| "loss": 0.299547119140625, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.13193248917992695, | |
| "grad_norm": 7.093480110168457, | |
| "learning_rate": 4.670254529072948e-05, | |
| "loss": 0.26917409896850586, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.13515035476968126, | |
| "grad_norm": 8.318635940551758, | |
| "learning_rate": 4.6622099945297164e-05, | |
| "loss": 0.28246700286865234, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.13836822035943558, | |
| "grad_norm": 6.450869560241699, | |
| "learning_rate": 4.654165459986485e-05, | |
| "loss": 0.2775975036621094, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.1415860859491899, | |
| "grad_norm": 7.253055572509766, | |
| "learning_rate": 4.646120925443254e-05, | |
| "loss": 0.2734929466247559, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.1448039515389442, | |
| "grad_norm": 2.5073742866516113, | |
| "learning_rate": 4.638076390900023e-05, | |
| "loss": 0.2605240631103516, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.14802181712869852, | |
| "grad_norm": 4.528222560882568, | |
| "learning_rate": 4.6300318563567914e-05, | |
| "loss": 0.2719747543334961, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.15123968271845284, | |
| "grad_norm": 6.474442481994629, | |
| "learning_rate": 4.62198732181356e-05, | |
| "loss": 0.27226806640625, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.15445754830820715, | |
| "grad_norm": 4.291607856750488, | |
| "learning_rate": 4.613942787270329e-05, | |
| "loss": 0.29810441970825197, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.15767541389796147, | |
| "grad_norm": 2.773998737335205, | |
| "learning_rate": 4.6058982527270974e-05, | |
| "loss": 0.2813092041015625, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.16089327948771578, | |
| "grad_norm": 8.771388053894043, | |
| "learning_rate": 4.597853718183866e-05, | |
| "loss": 0.3041937828063965, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.16089327948771578, | |
| "eval_accuracy": 0.9766050453886422, | |
| "eval_f1": 0.8726156897994607, | |
| "eval_loss": 0.09095935523509979, | |
| "eval_precision": 0.8421638308564865, | |
| "eval_recall": 0.9053523880577816, | |
| "eval_runtime": 38.7855, | |
| "eval_samples_per_second": 644.572, | |
| "eval_steps_per_second": 35.812, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.16411114507747013, | |
| "grad_norm": 6.708637237548828, | |
| "learning_rate": 4.5898091836406345e-05, | |
| "loss": 0.25013633728027346, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.16732901066722444, | |
| "grad_norm": 4.836703777313232, | |
| "learning_rate": 4.5817646490974034e-05, | |
| "loss": 0.27049646377563474, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.17054687625697876, | |
| "grad_norm": 2.853039026260376, | |
| "learning_rate": 4.5737201145541717e-05, | |
| "loss": 0.2682499885559082, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.17376474184673307, | |
| "grad_norm": 2.6769561767578125, | |
| "learning_rate": 4.5656755800109406e-05, | |
| "loss": 0.25310728073120115, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.1769826074364874, | |
| "grad_norm": 3.740751028060913, | |
| "learning_rate": 4.5576310454677095e-05, | |
| "loss": 0.2738076400756836, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.1802004730262417, | |
| "grad_norm": 5.003463268280029, | |
| "learning_rate": 4.549586510924478e-05, | |
| "loss": 0.23465911865234376, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.18341833861599602, | |
| "grad_norm": 4.04716682434082, | |
| "learning_rate": 4.5415419763812466e-05, | |
| "loss": 0.2690970230102539, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.18663620420575033, | |
| "grad_norm": 4.878298282623291, | |
| "learning_rate": 4.5334974418380155e-05, | |
| "loss": 0.254027156829834, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.18985406979550465, | |
| "grad_norm": 6.428683757781982, | |
| "learning_rate": 4.5254529072947844e-05, | |
| "loss": 0.2576713562011719, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.19307193538525896, | |
| "grad_norm": 4.079443454742432, | |
| "learning_rate": 4.5174083727515526e-05, | |
| "loss": 0.27297264099121094, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.19628980097501328, | |
| "grad_norm": 1.6320624351501465, | |
| "learning_rate": 4.5093638382083215e-05, | |
| "loss": 0.24345703125, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.1995076665647676, | |
| "grad_norm": 4.895444869995117, | |
| "learning_rate": 4.5013193036650904e-05, | |
| "loss": 0.2668925857543945, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.2027255321545219, | |
| "grad_norm": 15.62102222442627, | |
| "learning_rate": 4.493274769121859e-05, | |
| "loss": 0.25465171813964843, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.20594339774427622, | |
| "grad_norm": 3.3204216957092285, | |
| "learning_rate": 4.4852302345786276e-05, | |
| "loss": 0.2394422721862793, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.20916126333403054, | |
| "grad_norm": 2.538597822189331, | |
| "learning_rate": 4.4771857000353965e-05, | |
| "loss": 0.2686458206176758, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.21237912892378485, | |
| "grad_norm": 3.806776523590088, | |
| "learning_rate": 4.469141165492165e-05, | |
| "loss": 0.2445933151245117, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.21559699451353917, | |
| "grad_norm": 5.519633769989014, | |
| "learning_rate": 4.4610966309489336e-05, | |
| "loss": 0.2316486930847168, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.21881486010329348, | |
| "grad_norm": 2.570289134979248, | |
| "learning_rate": 4.453052096405702e-05, | |
| "loss": 0.25618520736694333, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.2220327256930478, | |
| "grad_norm": 1.9880187511444092, | |
| "learning_rate": 4.445007561862471e-05, | |
| "loss": 0.22628854751586913, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.22525059128280212, | |
| "grad_norm": 5.40452766418457, | |
| "learning_rate": 4.4369630273192396e-05, | |
| "loss": 0.23788238525390626, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.22846845687255643, | |
| "grad_norm": 3.7318766117095947, | |
| "learning_rate": 4.428918492776008e-05, | |
| "loss": 0.23460798263549804, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.23168632246231075, | |
| "grad_norm": 3.2017786502838135, | |
| "learning_rate": 4.420873958232777e-05, | |
| "loss": 0.2477778434753418, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.23490418805206506, | |
| "grad_norm": 5.915706157684326, | |
| "learning_rate": 4.412829423689546e-05, | |
| "loss": 0.2488957405090332, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.23812205364181938, | |
| "grad_norm": 2.817690849304199, | |
| "learning_rate": 4.404784889146314e-05, | |
| "loss": 0.24542184829711913, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.2413399192315737, | |
| "grad_norm": 4.39451789855957, | |
| "learning_rate": 4.396740354603083e-05, | |
| "loss": 0.2323160743713379, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.2413399192315737, | |
| "eval_accuracy": 0.9765879819893235, | |
| "eval_f1": 0.8910072212088652, | |
| "eval_loss": 0.08586511015892029, | |
| "eval_precision": 0.8661088374239462, | |
| "eval_recall": 0.9173795037367669, | |
| "eval_runtime": 38.8792, | |
| "eval_samples_per_second": 643.017, | |
| "eval_steps_per_second": 35.726, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.244557784821328, | |
| "grad_norm": 6.589978218078613, | |
| "learning_rate": 4.388695820059852e-05, | |
| "loss": 0.25242706298828127, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.24777565041108232, | |
| "grad_norm": 8.314870834350586, | |
| "learning_rate": 4.3806512855166206e-05, | |
| "loss": 0.23692258834838867, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.25099351600083664, | |
| "grad_norm": 1.829958200454712, | |
| "learning_rate": 4.372606750973389e-05, | |
| "loss": 0.2302927017211914, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.254211381590591, | |
| "grad_norm": 2.879436492919922, | |
| "learning_rate": 4.364562216430158e-05, | |
| "loss": 0.22530841827392578, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.25742924718034527, | |
| "grad_norm": 4.005130767822266, | |
| "learning_rate": 4.3565176818869266e-05, | |
| "loss": 0.22251163482666014, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.2606471127700996, | |
| "grad_norm": 6.088663578033447, | |
| "learning_rate": 4.348473147343695e-05, | |
| "loss": 0.2417246437072754, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.2638649783598539, | |
| "grad_norm": 2.821535587310791, | |
| "learning_rate": 4.340428612800463e-05, | |
| "loss": 0.2411360549926758, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.26708284394960824, | |
| "grad_norm": 3.160569190979004, | |
| "learning_rate": 4.332384078257232e-05, | |
| "loss": 0.23668193817138672, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.2703007095393625, | |
| "grad_norm": 1.9559069871902466, | |
| "learning_rate": 4.324339543714001e-05, | |
| "loss": 0.20959726333618164, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.27351857512911687, | |
| "grad_norm": 3.570617914199829, | |
| "learning_rate": 4.316295009170769e-05, | |
| "loss": 0.23034614562988281, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.27673644071887116, | |
| "grad_norm": 15.547330856323242, | |
| "learning_rate": 4.308250474627538e-05, | |
| "loss": 0.22180200576782227, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.2799543063086255, | |
| "grad_norm": 1.9708352088928223, | |
| "learning_rate": 4.300205940084307e-05, | |
| "loss": 0.22687753677368164, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.2831721718983798, | |
| "grad_norm": 2.9480574131011963, | |
| "learning_rate": 4.292161405541076e-05, | |
| "loss": 0.22114805221557618, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.28639003748813413, | |
| "grad_norm": 1.7439308166503906, | |
| "learning_rate": 4.284116870997844e-05, | |
| "loss": 0.20100093841552735, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.2896079030778884, | |
| "grad_norm": 8.816022872924805, | |
| "learning_rate": 4.276072336454613e-05, | |
| "loss": 0.21243839263916015, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.29282576866764276, | |
| "grad_norm": 2.90824556350708, | |
| "learning_rate": 4.268027801911382e-05, | |
| "loss": 0.22030885696411132, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.29604363425739705, | |
| "grad_norm": 1.6333738565444946, | |
| "learning_rate": 4.25998326736815e-05, | |
| "loss": 0.20564973831176758, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.2992614998471514, | |
| "grad_norm": 2.048818349838257, | |
| "learning_rate": 4.251938732824919e-05, | |
| "loss": 0.22784378051757812, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.3024793654369057, | |
| "grad_norm": 2.5882511138916016, | |
| "learning_rate": 4.243894198281688e-05, | |
| "loss": 0.2064011001586914, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.30569723102666, | |
| "grad_norm": 4.423720359802246, | |
| "learning_rate": 4.235849663738457e-05, | |
| "loss": 0.22651321411132813, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.3089150966164143, | |
| "grad_norm": 6.1782732009887695, | |
| "learning_rate": 4.227805129195225e-05, | |
| "loss": 0.2322876739501953, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.31213296220616865, | |
| "grad_norm": 6.551365852355957, | |
| "learning_rate": 4.219760594651993e-05, | |
| "loss": 0.19593393325805664, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.31535082779592294, | |
| "grad_norm": 3.024858236312866, | |
| "learning_rate": 4.211716060108762e-05, | |
| "loss": 0.21738645553588867, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.3185686933856773, | |
| "grad_norm": 3.413978338241577, | |
| "learning_rate": 4.203671525565531e-05, | |
| "loss": 0.23522287368774414, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.32178655897543157, | |
| "grad_norm": 5.521674156188965, | |
| "learning_rate": 4.195626991022299e-05, | |
| "loss": 0.2253404998779297, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.32178655897543157, | |
| "eval_accuracy": 0.9818083480354339, | |
| "eval_f1": 0.9029431672892276, | |
| "eval_loss": 0.07069914042949677, | |
| "eval_precision": 0.8804620179673655, | |
| "eval_recall": 0.9266024362948766, | |
| "eval_runtime": 38.4445, | |
| "eval_samples_per_second": 650.288, | |
| "eval_steps_per_second": 36.13, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.3250044245651859, | |
| "grad_norm": 3.399749755859375, | |
| "learning_rate": 4.187582456479068e-05, | |
| "loss": 0.20138219833374024, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.32822229015494025, | |
| "grad_norm": 6.1640753746032715, | |
| "learning_rate": 4.179537921935837e-05, | |
| "loss": 0.2074588966369629, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.33144015574469454, | |
| "grad_norm": 2.652939796447754, | |
| "learning_rate": 4.171493387392605e-05, | |
| "loss": 0.20934364318847656, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.3346580213344489, | |
| "grad_norm": 2.3221635818481445, | |
| "learning_rate": 4.163448852849374e-05, | |
| "loss": 0.20557579040527343, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.3378758869242032, | |
| "grad_norm": 4.995593547821045, | |
| "learning_rate": 4.155404318306143e-05, | |
| "loss": 0.21284431457519531, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.3410937525139575, | |
| "grad_norm": 5.589059829711914, | |
| "learning_rate": 4.147359783762912e-05, | |
| "loss": 0.22939311981201171, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.3443116181037118, | |
| "grad_norm": 3.8397393226623535, | |
| "learning_rate": 4.13931524921968e-05, | |
| "loss": 0.20622701644897462, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.34752948369346615, | |
| "grad_norm": 6.400503158569336, | |
| "learning_rate": 4.131270714676449e-05, | |
| "loss": 0.2185076332092285, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.35074734928322043, | |
| "grad_norm": 8.725358009338379, | |
| "learning_rate": 4.123226180133218e-05, | |
| "loss": 0.22046119689941407, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.3539652148729748, | |
| "grad_norm": 2.3542211055755615, | |
| "learning_rate": 4.115181645589986e-05, | |
| "loss": 0.21081886291503907, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.35718308046272906, | |
| "grad_norm": 2.4933526515960693, | |
| "learning_rate": 4.107137111046755e-05, | |
| "loss": 0.21268159866333008, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.3604009460524834, | |
| "grad_norm": 2.3393359184265137, | |
| "learning_rate": 4.0990925765035234e-05, | |
| "loss": 0.20191749572753906, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.3636188116422377, | |
| "grad_norm": 7.511013507843018, | |
| "learning_rate": 4.091048041960292e-05, | |
| "loss": 0.22198728561401368, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.36683667723199204, | |
| "grad_norm": 2.7252819538116455, | |
| "learning_rate": 4.0830035074170606e-05, | |
| "loss": 0.18651321411132812, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.3700545428217463, | |
| "grad_norm": 1.7606490850448608, | |
| "learning_rate": 4.0749589728738295e-05, | |
| "loss": 0.20632423400878908, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.37327240841150067, | |
| "grad_norm": 1.7523146867752075, | |
| "learning_rate": 4.0669144383305984e-05, | |
| "loss": 0.19867513656616212, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.37649027400125495, | |
| "grad_norm": 1.9296563863754272, | |
| "learning_rate": 4.058869903787367e-05, | |
| "loss": 0.21059492111206055, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.3797081395910093, | |
| "grad_norm": 2.119272232055664, | |
| "learning_rate": 4.0508253692441355e-05, | |
| "loss": 0.20072717666625978, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.3829260051807636, | |
| "grad_norm": 1.8676605224609375, | |
| "learning_rate": 4.0427808347009044e-05, | |
| "loss": 0.2199223518371582, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.3861438707705179, | |
| "grad_norm": 7.326332092285156, | |
| "learning_rate": 4.034736300157673e-05, | |
| "loss": 0.22979413986206054, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.3893617363602722, | |
| "grad_norm": 6.403603553771973, | |
| "learning_rate": 4.0266917656144415e-05, | |
| "loss": 0.20364051818847656, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.39257960195002656, | |
| "grad_norm": 7.924391269683838, | |
| "learning_rate": 4.0186472310712104e-05, | |
| "loss": 0.21068944931030273, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.39579746753978085, | |
| "grad_norm": 1.8581446409225464, | |
| "learning_rate": 4.010602696527979e-05, | |
| "loss": 0.1954026412963867, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.3990153331295352, | |
| "grad_norm": 4.413888931274414, | |
| "learning_rate": 4.002558161984748e-05, | |
| "loss": 0.20379119873046875, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.4022331987192895, | |
| "grad_norm": 7.339420795440674, | |
| "learning_rate": 3.9945136274415165e-05, | |
| "loss": 0.21170831680297852, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.4022331987192895, | |
| "eval_accuracy": 0.9805167004142786, | |
| "eval_f1": 0.9110038183596832, | |
| "eval_loss": 0.07153428345918655, | |
| "eval_precision": 0.8942803330689929, | |
| "eval_recall": 0.9283646981644178, | |
| "eval_runtime": 38.6581, | |
| "eval_samples_per_second": 646.695, | |
| "eval_steps_per_second": 35.93, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.4054510643090438, | |
| "grad_norm": 6.818169116973877, | |
| "learning_rate": 3.9864690928982854e-05, | |
| "loss": 0.1984395217895508, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.4086689298987981, | |
| "grad_norm": 7.034976959228516, | |
| "learning_rate": 3.9784245583550536e-05, | |
| "loss": 0.20739023208618165, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.41188679548855245, | |
| "grad_norm": 1.944700002670288, | |
| "learning_rate": 3.9703800238118225e-05, | |
| "loss": 0.2069017219543457, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.41510466107830674, | |
| "grad_norm": 4.255757808685303, | |
| "learning_rate": 3.962335489268591e-05, | |
| "loss": 0.20037817001342773, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.4183225266680611, | |
| "grad_norm": 3.274329900741577, | |
| "learning_rate": 3.9542909547253596e-05, | |
| "loss": 0.20747562408447265, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.42154039225781537, | |
| "grad_norm": 8.684778213500977, | |
| "learning_rate": 3.9462464201821285e-05, | |
| "loss": 0.2101104164123535, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.4247582578475697, | |
| "grad_norm": 2.2195351123809814, | |
| "learning_rate": 3.938201885638897e-05, | |
| "loss": 0.2074994659423828, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.427976123437324, | |
| "grad_norm": 5.522794723510742, | |
| "learning_rate": 3.9301573510956657e-05, | |
| "loss": 0.20569501876831053, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.43119398902707834, | |
| "grad_norm": 8.248220443725586, | |
| "learning_rate": 3.9221128165524346e-05, | |
| "loss": 0.20211294174194336, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.4344118546168327, | |
| "grad_norm": 1.3259437084197998, | |
| "learning_rate": 3.9140682820092035e-05, | |
| "loss": 0.19619279861450195, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.43762972020658697, | |
| "grad_norm": 3.264453172683716, | |
| "learning_rate": 3.906023747465972e-05, | |
| "loss": 0.20112485885620118, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.4408475857963413, | |
| "grad_norm": 6.523902893066406, | |
| "learning_rate": 3.8979792129227406e-05, | |
| "loss": 0.17402765274047852, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.4440654513860956, | |
| "grad_norm": 6.627138137817383, | |
| "learning_rate": 3.8899346783795095e-05, | |
| "loss": 0.18946147918701173, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.44728331697584994, | |
| "grad_norm": 1.7494491338729858, | |
| "learning_rate": 3.881890143836278e-05, | |
| "loss": 0.20346969604492188, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.45050118256560423, | |
| "grad_norm": 6.50437068939209, | |
| "learning_rate": 3.8738456092930466e-05, | |
| "loss": 0.19907255172729493, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.4537190481553586, | |
| "grad_norm": 1.948766827583313, | |
| "learning_rate": 3.8658010747498155e-05, | |
| "loss": 0.19304462432861327, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.45693691374511286, | |
| "grad_norm": 2.1899731159210205, | |
| "learning_rate": 3.857756540206584e-05, | |
| "loss": 0.19804704666137696, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.4601547793348672, | |
| "grad_norm": 2.7552740573883057, | |
| "learning_rate": 3.8497120056633527e-05, | |
| "loss": 0.17073585510253905, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.4633726449246215, | |
| "grad_norm": 3.2083916664123535, | |
| "learning_rate": 3.841667471120121e-05, | |
| "loss": 0.19360818862915039, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.46659051051437583, | |
| "grad_norm": 5.908588409423828, | |
| "learning_rate": 3.83362293657689e-05, | |
| "loss": 0.17787866592407225, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.4698083761041301, | |
| "grad_norm": 2.6561052799224854, | |
| "learning_rate": 3.825578402033658e-05, | |
| "loss": 0.20204214096069337, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.47302624169388446, | |
| "grad_norm": 2.6253390312194824, | |
| "learning_rate": 3.817533867490427e-05, | |
| "loss": 0.18690492630004882, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.47624410728363875, | |
| "grad_norm": 1.7459347248077393, | |
| "learning_rate": 3.809489332947196e-05, | |
| "loss": 0.17405952453613283, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.4794619728733931, | |
| "grad_norm": 4.657139301300049, | |
| "learning_rate": 3.801444798403965e-05, | |
| "loss": 0.1884486961364746, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.4826798384631474, | |
| "grad_norm": 3.4389936923980713, | |
| "learning_rate": 3.793400263860733e-05, | |
| "loss": 0.1895262336730957, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.4826798384631474, | |
| "eval_accuracy": 0.9843394189343752, | |
| "eval_f1": 0.9178045953844602, | |
| "eval_loss": 0.05817751958966255, | |
| "eval_precision": 0.8991940957952288, | |
| "eval_recall": 0.9372017339627738, | |
| "eval_runtime": 38.9423, | |
| "eval_samples_per_second": 641.975, | |
| "eval_steps_per_second": 35.668, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.4858977040529017, | |
| "grad_norm": 2.704439640045166, | |
| "learning_rate": 3.785355729317502e-05, | |
| "loss": 0.1874333381652832, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.489115569642656, | |
| "grad_norm": 3.225116729736328, | |
| "learning_rate": 3.777311194774271e-05, | |
| "loss": 0.20594432830810547, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.49233343523241035, | |
| "grad_norm": 6.360146999359131, | |
| "learning_rate": 3.76926666023104e-05, | |
| "loss": 0.18481210708618165, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.49555130082216464, | |
| "grad_norm": 6.072609901428223, | |
| "learning_rate": 3.761222125687808e-05, | |
| "loss": 0.19338289260864258, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.498769166411919, | |
| "grad_norm": 6.828498363494873, | |
| "learning_rate": 3.753177591144577e-05, | |
| "loss": 0.17112209320068358, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.5019870320016733, | |
| "grad_norm": 1.9960297346115112, | |
| "learning_rate": 3.745133056601346e-05, | |
| "loss": 0.177659912109375, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.5052048975914276, | |
| "grad_norm": 2.489583969116211, | |
| "learning_rate": 3.737088522058114e-05, | |
| "loss": 0.1999117088317871, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.508422763181182, | |
| "grad_norm": 1.4370602369308472, | |
| "learning_rate": 3.729043987514882e-05, | |
| "loss": 0.18860567092895508, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.5116406287709362, | |
| "grad_norm": 1.5181185007095337, | |
| "learning_rate": 3.720999452971651e-05, | |
| "loss": 0.17629049301147462, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.5148584943606905, | |
| "grad_norm": 6.286506652832031, | |
| "learning_rate": 3.71295491842842e-05, | |
| "loss": 0.18211414337158202, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.5180763599504449, | |
| "grad_norm": 3.1108760833740234, | |
| "learning_rate": 3.704910383885188e-05, | |
| "loss": 0.18931018829345703, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.5212942255401992, | |
| "grad_norm": 0.5399872064590454, | |
| "learning_rate": 3.696865849341957e-05, | |
| "loss": 0.18857126235961913, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.5245120911299535, | |
| "grad_norm": 1.3765555620193481, | |
| "learning_rate": 3.688821314798726e-05, | |
| "loss": 0.18414220809936524, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.5277299567197078, | |
| "grad_norm": 3.104323148727417, | |
| "learning_rate": 3.680776780255494e-05, | |
| "loss": 0.18788623809814453, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.5309478223094621, | |
| "grad_norm": 1.609277367591858, | |
| "learning_rate": 3.672732245712263e-05, | |
| "loss": 0.17455484390258788, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.5341656878992165, | |
| "grad_norm": 2.4873690605163574, | |
| "learning_rate": 3.664687711169032e-05, | |
| "loss": 0.17273141860961913, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.5373835534889707, | |
| "grad_norm": 3.004922866821289, | |
| "learning_rate": 3.656643176625801e-05, | |
| "loss": 0.17702035903930663, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.540601419078725, | |
| "grad_norm": 4.731108665466309, | |
| "learning_rate": 3.648598642082569e-05, | |
| "loss": 0.1878594970703125, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.5438192846684794, | |
| "grad_norm": 3.8425683975219727, | |
| "learning_rate": 3.640554107539338e-05, | |
| "loss": 0.19668659210205078, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.5470371502582337, | |
| "grad_norm": 1.5426945686340332, | |
| "learning_rate": 3.632509572996107e-05, | |
| "loss": 0.16921035766601564, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.5502550158479881, | |
| "grad_norm": 1.886399507522583, | |
| "learning_rate": 3.624465038452876e-05, | |
| "loss": 0.15211493492126466, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.5534728814377423, | |
| "grad_norm": 5.827595233917236, | |
| "learning_rate": 3.616420503909644e-05, | |
| "loss": 0.1691839599609375, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.5566907470274967, | |
| "grad_norm": 5.728984832763672, | |
| "learning_rate": 3.608375969366412e-05, | |
| "loss": 0.1721731948852539, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.559908612617251, | |
| "grad_norm": 2.2809104919433594, | |
| "learning_rate": 3.600331434823181e-05, | |
| "loss": 0.1917552185058594, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.5631264782070053, | |
| "grad_norm": 2.936466693878174, | |
| "learning_rate": 3.5922869002799494e-05, | |
| "loss": 0.18653520584106445, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.5631264782070053, | |
| "eval_accuracy": 0.9851181304305561, | |
| "eval_f1": 0.9213907594344987, | |
| "eval_loss": 0.055659033358097076, | |
| "eval_precision": 0.9052643338588151, | |
| "eval_recall": 0.9381021597355321, | |
| "eval_runtime": 38.4689, | |
| "eval_samples_per_second": 649.876, | |
| "eval_steps_per_second": 36.107, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.5663443437967596, | |
| "grad_norm": 5.648510456085205, | |
| "learning_rate": 3.5842423657367184e-05, | |
| "loss": 0.17743080139160156, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.5695622093865139, | |
| "grad_norm": 2.0115761756896973, | |
| "learning_rate": 3.576197831193487e-05, | |
| "loss": 0.18602855682373046, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.5727800749762683, | |
| "grad_norm": 5.880187511444092, | |
| "learning_rate": 3.568153296650256e-05, | |
| "loss": 0.18167713165283203, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.5759979405660226, | |
| "grad_norm": 4.690539360046387, | |
| "learning_rate": 3.5601087621070244e-05, | |
| "loss": 0.17695968627929687, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.5792158061557768, | |
| "grad_norm": 0.8806695342063904, | |
| "learning_rate": 3.552064227563793e-05, | |
| "loss": 0.19892288208007813, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.5824336717455312, | |
| "grad_norm": 1.943228006362915, | |
| "learning_rate": 3.544019693020562e-05, | |
| "loss": 0.17242523193359374, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.5856515373352855, | |
| "grad_norm": 3.5915210247039795, | |
| "learning_rate": 3.5359751584773304e-05, | |
| "loss": 0.181878662109375, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.5888694029250399, | |
| "grad_norm": 2.8968303203582764, | |
| "learning_rate": 3.527930623934099e-05, | |
| "loss": 0.18603469848632812, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.5920872685147941, | |
| "grad_norm": 5.2887678146362305, | |
| "learning_rate": 3.519886089390868e-05, | |
| "loss": 0.16267045974731445, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.5953051341045484, | |
| "grad_norm": 1.6592695713043213, | |
| "learning_rate": 3.511841554847637e-05, | |
| "loss": 0.17949144363403322, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.5985229996943028, | |
| "grad_norm": 0.979318380355835, | |
| "learning_rate": 3.5037970203044054e-05, | |
| "loss": 0.1779278564453125, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.6017408652840571, | |
| "grad_norm": 1.6524354219436646, | |
| "learning_rate": 3.495752485761174e-05, | |
| "loss": 0.1590627098083496, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.6049587308738114, | |
| "grad_norm": 3.5658767223358154, | |
| "learning_rate": 3.4877079512179425e-05, | |
| "loss": 0.16901378631591796, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.6081765964635657, | |
| "grad_norm": 3.9737935066223145, | |
| "learning_rate": 3.4796634166747114e-05, | |
| "loss": 0.17097728729248046, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.61139446205332, | |
| "grad_norm": 3.3529911041259766, | |
| "learning_rate": 3.4716188821314796e-05, | |
| "loss": 0.17232301712036133, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.6146123276430744, | |
| "grad_norm": 3.2977654933929443, | |
| "learning_rate": 3.4635743475882485e-05, | |
| "loss": 0.16828372955322266, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.6178301932328286, | |
| "grad_norm": 3.297731876373291, | |
| "learning_rate": 3.4555298130450174e-05, | |
| "loss": 0.17515941619873046, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.621048058822583, | |
| "grad_norm": 7.258795261383057, | |
| "learning_rate": 3.4474852785017856e-05, | |
| "loss": 0.15848381996154784, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.6242659244123373, | |
| "grad_norm": 3.2193796634674072, | |
| "learning_rate": 3.4394407439585546e-05, | |
| "loss": 0.1693815231323242, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.6274837900020916, | |
| "grad_norm": 1.8956769704818726, | |
| "learning_rate": 3.4313962094153235e-05, | |
| "loss": 0.1632681083679199, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.6307016555918459, | |
| "grad_norm": 1.5155613422393799, | |
| "learning_rate": 3.4233516748720924e-05, | |
| "loss": 0.1833029556274414, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.6339195211816002, | |
| "grad_norm": 4.2836079597473145, | |
| "learning_rate": 3.4153071403288606e-05, | |
| "loss": 0.17777244567871095, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.6371373867713546, | |
| "grad_norm": 5.861156463623047, | |
| "learning_rate": 3.4072626057856295e-05, | |
| "loss": 0.17992183685302734, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.6403552523611089, | |
| "grad_norm": 3.4068503379821777, | |
| "learning_rate": 3.3992180712423984e-05, | |
| "loss": 0.17448163986206056, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.6435731179508631, | |
| "grad_norm": 2.3339030742645264, | |
| "learning_rate": 3.3911735366991666e-05, | |
| "loss": 0.16662296295166015, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.6435731179508631, | |
| "eval_accuracy": 0.985229301062481, | |
| "eval_f1": 0.92316609857673, | |
| "eval_loss": 0.056044481694698334, | |
| "eval_precision": 0.9047148608263973, | |
| "eval_recall": 0.9423856137687964, | |
| "eval_runtime": 38.8504, | |
| "eval_samples_per_second": 643.494, | |
| "eval_steps_per_second": 35.753, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.6467909835406175, | |
| "grad_norm": 3.3607404232025146, | |
| "learning_rate": 3.3831290021559355e-05, | |
| "loss": 0.17825605392456054, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.6500088491303718, | |
| "grad_norm": 2.690410852432251, | |
| "learning_rate": 3.3750844676127044e-05, | |
| "loss": 0.1796116065979004, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.6532267147201262, | |
| "grad_norm": 5.667492866516113, | |
| "learning_rate": 3.3670399330694727e-05, | |
| "loss": 0.15698086738586425, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.6564445803098805, | |
| "grad_norm": 2.9585955142974854, | |
| "learning_rate": 3.3589953985262416e-05, | |
| "loss": 0.14914603233337403, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.6596624458996347, | |
| "grad_norm": 3.539393901824951, | |
| "learning_rate": 3.35095086398301e-05, | |
| "loss": 0.15797216415405274, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.6628803114893891, | |
| "grad_norm": 3.268399238586426, | |
| "learning_rate": 3.342906329439779e-05, | |
| "loss": 0.16447280883789062, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.6660981770791434, | |
| "grad_norm": 1.261047601699829, | |
| "learning_rate": 3.3348617948965476e-05, | |
| "loss": 0.18879241943359376, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.6693160426688978, | |
| "grad_norm": 2.1420369148254395, | |
| "learning_rate": 3.326817260353316e-05, | |
| "loss": 0.15382347106933594, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.672533908258652, | |
| "grad_norm": 2.8946328163146973, | |
| "learning_rate": 3.318772725810085e-05, | |
| "loss": 0.15514832496643066, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.6757517738484063, | |
| "grad_norm": 12.205283164978027, | |
| "learning_rate": 3.3107281912668536e-05, | |
| "loss": 0.18069747924804688, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.6789696394381607, | |
| "grad_norm": 1.8224153518676758, | |
| "learning_rate": 3.302683656723622e-05, | |
| "loss": 0.17442039489746095, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.682187505027915, | |
| "grad_norm": 3.392157554626465, | |
| "learning_rate": 3.294639122180391e-05, | |
| "loss": 0.16294483184814454, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.6854053706176693, | |
| "grad_norm": 1.9432575702667236, | |
| "learning_rate": 3.2865945876371597e-05, | |
| "loss": 0.17553016662597656, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.6886232362074236, | |
| "grad_norm": 3.2393903732299805, | |
| "learning_rate": 3.2785500530939286e-05, | |
| "loss": 0.17193687438964844, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.691841101797178, | |
| "grad_norm": 3.1432416439056396, | |
| "learning_rate": 3.270505518550697e-05, | |
| "loss": 0.17834131240844728, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.6950589673869323, | |
| "grad_norm": 1.5839548110961914, | |
| "learning_rate": 3.262460984007466e-05, | |
| "loss": 0.16497987747192383, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.6982768329766865, | |
| "grad_norm": 3.1208765506744385, | |
| "learning_rate": 3.2544164494642346e-05, | |
| "loss": 0.18163761138916015, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.7014946985664409, | |
| "grad_norm": 8.323920249938965, | |
| "learning_rate": 3.246371914921003e-05, | |
| "loss": 0.1704619598388672, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.7047125641561952, | |
| "grad_norm": 0.9279898405075073, | |
| "learning_rate": 3.238327380377772e-05, | |
| "loss": 0.1771290397644043, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.7079304297459496, | |
| "grad_norm": 3.2795073986053467, | |
| "learning_rate": 3.23028284583454e-05, | |
| "loss": 0.1524122428894043, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.7111482953357038, | |
| "grad_norm": 5.583573818206787, | |
| "learning_rate": 3.222238311291309e-05, | |
| "loss": 0.16980670928955077, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.7143661609254581, | |
| "grad_norm": 4.969387531280518, | |
| "learning_rate": 3.214193776748077e-05, | |
| "loss": 0.1560416603088379, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.7175840265152125, | |
| "grad_norm": 3.3920233249664307, | |
| "learning_rate": 3.206149242204846e-05, | |
| "loss": 0.160349063873291, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.7208018921049668, | |
| "grad_norm": 2.8945395946502686, | |
| "learning_rate": 3.198104707661615e-05, | |
| "loss": 0.18149927139282226, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.724019757694721, | |
| "grad_norm": 1.1273301839828491, | |
| "learning_rate": 3.190060173118384e-05, | |
| "loss": 0.16230302810668945, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.724019757694721, | |
| "eval_accuracy": 0.9838156242825616, | |
| "eval_f1": 0.9249598360552048, | |
| "eval_loss": 0.05883582681417465, | |
| "eval_precision": 0.9098816590137007, | |
| "eval_recall": 0.9405461725473045, | |
| "eval_runtime": 38.7455, | |
| "eval_samples_per_second": 645.236, | |
| "eval_steps_per_second": 35.849, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.7272376232844754, | |
| "grad_norm": 2.699639081954956, | |
| "learning_rate": 3.182015638575152e-05, | |
| "loss": 0.17641155242919923, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.7304554888742297, | |
| "grad_norm": 4.614871978759766, | |
| "learning_rate": 3.173971104031921e-05, | |
| "loss": 0.15983717918395995, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.7336733544639841, | |
| "grad_norm": 2.631415843963623, | |
| "learning_rate": 3.16592656948869e-05, | |
| "loss": 0.15378172874450682, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.7368912200537383, | |
| "grad_norm": 1.4634898900985718, | |
| "learning_rate": 3.157882034945458e-05, | |
| "loss": 0.16743200302124023, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.7401090856434926, | |
| "grad_norm": 3.4641151428222656, | |
| "learning_rate": 3.149837500402227e-05, | |
| "loss": 0.15191415786743165, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.743326951233247, | |
| "grad_norm": 1.500380039215088, | |
| "learning_rate": 3.141792965858996e-05, | |
| "loss": 0.15310038566589357, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.7465448168230013, | |
| "grad_norm": 2.184008836746216, | |
| "learning_rate": 3.133748431315765e-05, | |
| "loss": 0.16115007400512696, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.7497626824127556, | |
| "grad_norm": 1.1047471761703491, | |
| "learning_rate": 3.125703896772533e-05, | |
| "loss": 0.153313045501709, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.7529805480025099, | |
| "grad_norm": 3.3007121086120605, | |
| "learning_rate": 3.117659362229301e-05, | |
| "loss": 0.16872770309448243, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.7561984135922643, | |
| "grad_norm": 6.547897815704346, | |
| "learning_rate": 3.10961482768607e-05, | |
| "loss": 0.18551017761230468, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.7594162791820186, | |
| "grad_norm": 2.700106382369995, | |
| "learning_rate": 3.1015702931428383e-05, | |
| "loss": 0.15708372116088867, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.7626341447717729, | |
| "grad_norm": 1.265189290046692, | |
| "learning_rate": 3.093525758599607e-05, | |
| "loss": 0.17149799346923827, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.7658520103615272, | |
| "grad_norm": 6.2682204246521, | |
| "learning_rate": 3.085481224056376e-05, | |
| "loss": 0.15998153686523436, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.7690698759512815, | |
| "grad_norm": 3.071823835372925, | |
| "learning_rate": 3.077436689513145e-05, | |
| "loss": 0.18916090011596678, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.7722877415410359, | |
| "grad_norm": 2.476935625076294, | |
| "learning_rate": 3.069392154969913e-05, | |
| "loss": 0.14498735427856446, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.7755056071307902, | |
| "grad_norm": 4.1790032386779785, | |
| "learning_rate": 3.061347620426682e-05, | |
| "loss": 0.1612476921081543, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.7787234727205444, | |
| "grad_norm": 1.0075314044952393, | |
| "learning_rate": 3.053303085883451e-05, | |
| "loss": 0.14984708786010742, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.7819413383102988, | |
| "grad_norm": 9.008003234863281, | |
| "learning_rate": 3.04525855134022e-05, | |
| "loss": 0.17614728927612305, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.7851592039000531, | |
| "grad_norm": 0.790449321269989, | |
| "learning_rate": 3.0372140167969882e-05, | |
| "loss": 0.1434452724456787, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.7883770694898075, | |
| "grad_norm": 2.6443631649017334, | |
| "learning_rate": 3.029169482253757e-05, | |
| "loss": 0.1625998878479004, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.7915949350795617, | |
| "grad_norm": 1.1716026067733765, | |
| "learning_rate": 3.0211249477105257e-05, | |
| "loss": 0.1629466438293457, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.794812800669316, | |
| "grad_norm": 1.9636585712432861, | |
| "learning_rate": 3.013080413167294e-05, | |
| "loss": 0.1526513195037842, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.7980306662590704, | |
| "grad_norm": 6.442037582397461, | |
| "learning_rate": 3.0050358786240628e-05, | |
| "loss": 0.16380990982055665, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.8012485318488247, | |
| "grad_norm": 2.0020787715911865, | |
| "learning_rate": 2.9969913440808317e-05, | |
| "loss": 0.16642404556274415, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.804466397438579, | |
| "grad_norm": 3.425807476043701, | |
| "learning_rate": 2.9889468095376006e-05, | |
| "loss": 0.15638751029968262, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.804466397438579, | |
| "eval_accuracy": 0.9871683237123339, | |
| "eval_f1": 0.9358503491978916, | |
| "eval_loss": 0.047558050602674484, | |
| "eval_precision": 0.9231722273671564, | |
| "eval_recall": 0.948881542557981, | |
| "eval_runtime": 39.0981, | |
| "eval_samples_per_second": 639.417, | |
| "eval_steps_per_second": 35.526, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.8076842630283333, | |
| "grad_norm": 1.7773306369781494, | |
| "learning_rate": 2.980902274994369e-05, | |
| "loss": 0.16818260192871093, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.8109021286180876, | |
| "grad_norm": 2.457798957824707, | |
| "learning_rate": 2.9728577404511378e-05, | |
| "loss": 0.1736893844604492, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.814119994207842, | |
| "grad_norm": 1.6925740242004395, | |
| "learning_rate": 2.9648132059079063e-05, | |
| "loss": 0.17085908889770507, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.8173378597975962, | |
| "grad_norm": 2.4007983207702637, | |
| "learning_rate": 2.956768671364675e-05, | |
| "loss": 0.16143144607543947, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.8205557253873506, | |
| "grad_norm": 2.328049898147583, | |
| "learning_rate": 2.9487241368214435e-05, | |
| "loss": 0.16922952651977538, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.8237735909771049, | |
| "grad_norm": 2.7495510578155518, | |
| "learning_rate": 2.9406796022782124e-05, | |
| "loss": 0.16985824584960937, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.8269914565668592, | |
| "grad_norm": 18.3980712890625, | |
| "learning_rate": 2.9326350677349813e-05, | |
| "loss": 0.15559816360473633, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.8302093221566135, | |
| "grad_norm": 2.3473148345947266, | |
| "learning_rate": 2.9245905331917495e-05, | |
| "loss": 0.17103708267211915, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.8334271877463678, | |
| "grad_norm": 2.7980849742889404, | |
| "learning_rate": 2.9165459986485184e-05, | |
| "loss": 0.1549097156524658, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.8366450533361222, | |
| "grad_norm": 0.4983047842979431, | |
| "learning_rate": 2.908501464105287e-05, | |
| "loss": 0.15225051879882812, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.8398629189258765, | |
| "grad_norm": 1.1898616552352905, | |
| "learning_rate": 2.900456929562056e-05, | |
| "loss": 0.14232300758361816, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.8430807845156307, | |
| "grad_norm": 1.2203755378723145, | |
| "learning_rate": 2.892412395018824e-05, | |
| "loss": 0.16007034301757814, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.8462986501053851, | |
| "grad_norm": 3.160578489303589, | |
| "learning_rate": 2.884367860475593e-05, | |
| "loss": 0.16384252548217773, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.8495165156951394, | |
| "grad_norm": 5.011970043182373, | |
| "learning_rate": 2.876323325932362e-05, | |
| "loss": 0.15432297706604003, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.8527343812848938, | |
| "grad_norm": 0.8514712452888489, | |
| "learning_rate": 2.86827879138913e-05, | |
| "loss": 0.15506745338439942, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.855952246874648, | |
| "grad_norm": 9.957659721374512, | |
| "learning_rate": 2.860234256845899e-05, | |
| "loss": 0.15732930183410646, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.8591701124644023, | |
| "grad_norm": 8.271772384643555, | |
| "learning_rate": 2.852189722302668e-05, | |
| "loss": 0.1552633285522461, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.8623879780541567, | |
| "grad_norm": 2.02590274810791, | |
| "learning_rate": 2.8441451877594365e-05, | |
| "loss": 0.15745894432067872, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.865605843643911, | |
| "grad_norm": 4.529170989990234, | |
| "learning_rate": 2.836100653216205e-05, | |
| "loss": 0.1467604446411133, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.8688237092336654, | |
| "grad_norm": 5.628500938415527, | |
| "learning_rate": 2.8280561186729736e-05, | |
| "loss": 0.1447477436065674, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.8720415748234196, | |
| "grad_norm": 12.87569522857666, | |
| "learning_rate": 2.8200115841297425e-05, | |
| "loss": 0.14656261444091798, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.8752594404131739, | |
| "grad_norm": 3.9200944900512695, | |
| "learning_rate": 2.8119670495865107e-05, | |
| "loss": 0.16447486877441406, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.8784773060029283, | |
| "grad_norm": 1.2977664470672607, | |
| "learning_rate": 2.8039225150432797e-05, | |
| "loss": 0.15925183296203613, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.8816951715926826, | |
| "grad_norm": 2.786237955093384, | |
| "learning_rate": 2.7958779805000486e-05, | |
| "loss": 0.15786813735961913, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.8849130371824369, | |
| "grad_norm": 4.701409339904785, | |
| "learning_rate": 2.787833445956817e-05, | |
| "loss": 0.14735154151916505, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.8849130371824369, | |
| "eval_accuracy": 0.985545749558937, | |
| "eval_f1": 0.9339101573386709, | |
| "eval_loss": 0.052501607686281204, | |
| "eval_precision": 0.9199600673862857, | |
| "eval_recall": 0.9482898341930256, | |
| "eval_runtime": 39.0393, | |
| "eval_samples_per_second": 640.38, | |
| "eval_steps_per_second": 35.58, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.8881309027721912, | |
| "grad_norm": 1.7882884740829468, | |
| "learning_rate": 2.7797889114135857e-05, | |
| "loss": 0.14846997261047362, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.8913487683619455, | |
| "grad_norm": 1.8413563966751099, | |
| "learning_rate": 2.7717443768703542e-05, | |
| "loss": 0.15191203117370605, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.8945666339516999, | |
| "grad_norm": 15.72754192352295, | |
| "learning_rate": 2.763699842327123e-05, | |
| "loss": 0.1671205520629883, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.8977844995414541, | |
| "grad_norm": 7.281972885131836, | |
| "learning_rate": 2.755655307783892e-05, | |
| "loss": 0.15349125862121582, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.9010023651312085, | |
| "grad_norm": 2.539133071899414, | |
| "learning_rate": 2.7476107732406603e-05, | |
| "loss": 0.1401151466369629, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.9042202307209628, | |
| "grad_norm": 1.8258554935455322, | |
| "learning_rate": 2.7395662386974292e-05, | |
| "loss": 0.14998969078063965, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.9074380963107171, | |
| "grad_norm": 6.016611099243164, | |
| "learning_rate": 2.731521704154198e-05, | |
| "loss": 0.14337385177612305, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.9106559619004714, | |
| "grad_norm": 3.221945285797119, | |
| "learning_rate": 2.7234771696109663e-05, | |
| "loss": 0.15551527023315428, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.9138738274902257, | |
| "grad_norm": 1.4967395067214966, | |
| "learning_rate": 2.715432635067735e-05, | |
| "loss": 0.13511601448059082, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.9170916930799801, | |
| "grad_norm": 2.9666309356689453, | |
| "learning_rate": 2.7073881005245038e-05, | |
| "loss": 0.1585793113708496, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.9203095586697344, | |
| "grad_norm": 0.9828789234161377, | |
| "learning_rate": 2.6993435659812727e-05, | |
| "loss": 0.15394386291503906, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.9235274242594886, | |
| "grad_norm": 0.7447288036346436, | |
| "learning_rate": 2.691299031438041e-05, | |
| "loss": 0.1461707878112793, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.926745289849243, | |
| "grad_norm": 1.8215960264205933, | |
| "learning_rate": 2.6832544968948098e-05, | |
| "loss": 0.14361499786376952, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.9299631554389973, | |
| "grad_norm": 5.320079326629639, | |
| "learning_rate": 2.6752099623515787e-05, | |
| "loss": 0.15090553283691407, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.9331810210287517, | |
| "grad_norm": 1.9044791460037231, | |
| "learning_rate": 2.667165427808347e-05, | |
| "loss": 0.16826240539550782, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.9363988866185059, | |
| "grad_norm": 2.378955602645874, | |
| "learning_rate": 2.659120893265116e-05, | |
| "loss": 0.14869707107543945, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.9396167522082602, | |
| "grad_norm": 3.2531590461730957, | |
| "learning_rate": 2.6510763587218844e-05, | |
| "loss": 0.15361614227294923, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.9428346177980146, | |
| "grad_norm": 3.1207923889160156, | |
| "learning_rate": 2.6430318241786533e-05, | |
| "loss": 0.14827921867370605, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.9460524833877689, | |
| "grad_norm": 8.111202239990234, | |
| "learning_rate": 2.6349872896354215e-05, | |
| "loss": 0.14765314102172852, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.9492703489775232, | |
| "grad_norm": 2.2104053497314453, | |
| "learning_rate": 2.6269427550921905e-05, | |
| "loss": 0.17542333602905275, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.9524882145672775, | |
| "grad_norm": 0.9287620782852173, | |
| "learning_rate": 2.6188982205489594e-05, | |
| "loss": 0.14969840049743652, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.9557060801570318, | |
| "grad_norm": 2.604642868041992, | |
| "learning_rate": 2.6108536860057283e-05, | |
| "loss": 0.16406068801879883, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 0.9589239457467862, | |
| "grad_norm": 7.544447422027588, | |
| "learning_rate": 2.6028091514624965e-05, | |
| "loss": 0.1531221103668213, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.9621418113365404, | |
| "grad_norm": 3.618058204650879, | |
| "learning_rate": 2.594764616919265e-05, | |
| "loss": 0.16164751052856446, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 0.9653596769262948, | |
| "grad_norm": 2.8580358028411865, | |
| "learning_rate": 2.586720082376034e-05, | |
| "loss": 0.15803168296813966, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.9653596769262948, | |
| "eval_accuracy": 0.9877764012153277, | |
| "eval_f1": 0.9371598218703612, | |
| "eval_loss": 0.04550632834434509, | |
| "eval_precision": 0.9245153134660876, | |
| "eval_recall": 0.9501550018651677, | |
| "eval_runtime": 39.0526, | |
| "eval_samples_per_second": 640.163, | |
| "eval_steps_per_second": 35.567, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.9685775425160491, | |
| "grad_norm": 1.4846822023391724, | |
| "learning_rate": 2.5786755478328022e-05, | |
| "loss": 0.138107852935791, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 0.9717954081058034, | |
| "grad_norm": 2.2517590522766113, | |
| "learning_rate": 2.570631013289571e-05, | |
| "loss": 0.13565559387207032, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.9750132736955578, | |
| "grad_norm": 3.6406161785125732, | |
| "learning_rate": 2.56258647874634e-05, | |
| "loss": 0.1619549560546875, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 0.978231139285312, | |
| "grad_norm": 2.2304012775421143, | |
| "learning_rate": 2.554541944203109e-05, | |
| "loss": 0.13655080795288085, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.9814490048750664, | |
| "grad_norm": 4.99883508682251, | |
| "learning_rate": 2.546497409659877e-05, | |
| "loss": 0.16668659210205078, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.9846668704648207, | |
| "grad_norm": 2.1127796173095703, | |
| "learning_rate": 2.538452875116646e-05, | |
| "loss": 0.14745793342590333, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.987884736054575, | |
| "grad_norm": 0.5811536908149719, | |
| "learning_rate": 2.5304083405734146e-05, | |
| "loss": 0.1281309223175049, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 0.9911026016443293, | |
| "grad_norm": 3.0965957641601562, | |
| "learning_rate": 2.5223638060301828e-05, | |
| "loss": 0.14012389183044432, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.9943204672340836, | |
| "grad_norm": 3.952867031097412, | |
| "learning_rate": 2.5143192714869517e-05, | |
| "loss": 0.18041374206542968, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 0.997538332823838, | |
| "grad_norm": 2.5378260612487793, | |
| "learning_rate": 2.5062747369437206e-05, | |
| "loss": 0.14559303283691405, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.0007401090856436, | |
| "grad_norm": 2.2179856300354004, | |
| "learning_rate": 2.4982302024004892e-05, | |
| "loss": 0.13755855560302735, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 1.0039579746753977, | |
| "grad_norm": 1.619863510131836, | |
| "learning_rate": 2.490185667857258e-05, | |
| "loss": 0.11792629241943359, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 1.007175840265152, | |
| "grad_norm": 1.596127986907959, | |
| "learning_rate": 2.4821411333140267e-05, | |
| "loss": 0.1253152847290039, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 1.0103937058549064, | |
| "grad_norm": 5.185761451721191, | |
| "learning_rate": 2.4740965987707952e-05, | |
| "loss": 0.11788433074951171, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 1.0136115714446607, | |
| "grad_norm": 4.540038585662842, | |
| "learning_rate": 2.4660520642275638e-05, | |
| "loss": 0.10257969856262207, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 1.016829437034415, | |
| "grad_norm": 0.530720055103302, | |
| "learning_rate": 2.4580075296843323e-05, | |
| "loss": 0.11232701301574707, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 1.0200473026241694, | |
| "grad_norm": 1.8916836977005005, | |
| "learning_rate": 2.4499629951411013e-05, | |
| "loss": 0.11638439178466797, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 1.0232651682139238, | |
| "grad_norm": 1.7947441339492798, | |
| "learning_rate": 2.4419184605978698e-05, | |
| "loss": 0.1160141658782959, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 1.0264830338036781, | |
| "grad_norm": 0.8208370804786682, | |
| "learning_rate": 2.4338739260546387e-05, | |
| "loss": 0.09769786834716797, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 1.0297008993934322, | |
| "grad_norm": 2.4557855129241943, | |
| "learning_rate": 2.4258293915114073e-05, | |
| "loss": 0.1321820545196533, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.0329187649831866, | |
| "grad_norm": 1.885315179824829, | |
| "learning_rate": 2.4177848569681762e-05, | |
| "loss": 0.10417740821838378, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 1.036136630572941, | |
| "grad_norm": 0.7314161658287048, | |
| "learning_rate": 2.4097403224249444e-05, | |
| "loss": 0.11431515693664551, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 1.0393544961626953, | |
| "grad_norm": 1.2714358568191528, | |
| "learning_rate": 2.4016957878817133e-05, | |
| "loss": 0.121004638671875, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 1.0425723617524496, | |
| "grad_norm": 3.3896045684814453, | |
| "learning_rate": 2.393651253338482e-05, | |
| "loss": 0.11515800476074219, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 1.045790227342204, | |
| "grad_norm": 1.288989782333374, | |
| "learning_rate": 2.3856067187952504e-05, | |
| "loss": 0.12445229530334473, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 1.045790227342204, | |
| "eval_accuracy": 0.9881399033280868, | |
| "eval_f1": 0.9421632544130543, | |
| "eval_loss": 0.04471902176737785, | |
| "eval_precision": 0.9309708286765537, | |
| "eval_recall": 0.9536280727029496, | |
| "eval_runtime": 38.6457, | |
| "eval_samples_per_second": 646.903, | |
| "eval_steps_per_second": 35.942, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 1.0490080929319583, | |
| "grad_norm": 1.3189036846160889, | |
| "learning_rate": 2.3775621842520194e-05, | |
| "loss": 0.11217801094055176, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 1.0522259585217126, | |
| "grad_norm": 3.2094037532806396, | |
| "learning_rate": 2.369517649708788e-05, | |
| "loss": 0.12271183013916015, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 1.055443824111467, | |
| "grad_norm": 9.113394737243652, | |
| "learning_rate": 2.3614731151655568e-05, | |
| "loss": 0.11827351570129395, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 1.058661689701221, | |
| "grad_norm": 1.4682255983352661, | |
| "learning_rate": 2.3534285806223254e-05, | |
| "loss": 0.12056323051452637, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 1.0618795552909754, | |
| "grad_norm": 1.7021067142486572, | |
| "learning_rate": 2.345384046079094e-05, | |
| "loss": 0.10995281219482422, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 1.0650974208807298, | |
| "grad_norm": 0.7944279313087463, | |
| "learning_rate": 2.3373395115358625e-05, | |
| "loss": 0.10448121070861817, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 1.0683152864704841, | |
| "grad_norm": 1.9725573062896729, | |
| "learning_rate": 2.3292949769926314e-05, | |
| "loss": 0.12772043228149413, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 1.0715331520602385, | |
| "grad_norm": 4.521146297454834, | |
| "learning_rate": 2.3212504424494e-05, | |
| "loss": 0.11528082847595215, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 1.0747510176499928, | |
| "grad_norm": 3.5572879314422607, | |
| "learning_rate": 2.3132059079061685e-05, | |
| "loss": 0.1119957447052002, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 1.0779688832397472, | |
| "grad_norm": 4.957686424255371, | |
| "learning_rate": 2.3051613733629375e-05, | |
| "loss": 0.1071746826171875, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 1.0811867488295013, | |
| "grad_norm": 1.005800724029541, | |
| "learning_rate": 2.297116838819706e-05, | |
| "loss": 0.11798532485961914, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 1.0844046144192556, | |
| "grad_norm": 1.6847596168518066, | |
| "learning_rate": 2.2890723042764746e-05, | |
| "loss": 0.12464913368225097, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 1.08762248000901, | |
| "grad_norm": 2.2746946811676025, | |
| "learning_rate": 2.281027769733243e-05, | |
| "loss": 0.11306841850280762, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 1.0908403455987643, | |
| "grad_norm": 7.0843706130981445, | |
| "learning_rate": 2.272983235190012e-05, | |
| "loss": 0.10698895454406739, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 1.0940582111885186, | |
| "grad_norm": 1.731086015701294, | |
| "learning_rate": 2.2649387006467806e-05, | |
| "loss": 0.11211064338684082, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 1.097276076778273, | |
| "grad_norm": 3.32621431350708, | |
| "learning_rate": 2.2568941661035495e-05, | |
| "loss": 0.10381333351135254, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 1.1004939423680273, | |
| "grad_norm": 1.3450391292572021, | |
| "learning_rate": 2.248849631560318e-05, | |
| "loss": 0.12325675010681153, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 1.1037118079577817, | |
| "grad_norm": 6.684245586395264, | |
| "learning_rate": 2.2408050970170866e-05, | |
| "loss": 0.12590248107910157, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 1.106929673547536, | |
| "grad_norm": 1.1298221349716187, | |
| "learning_rate": 2.2327605624738556e-05, | |
| "loss": 0.11126781463623046, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 1.1101475391372901, | |
| "grad_norm": 1.185738205909729, | |
| "learning_rate": 2.2247160279306238e-05, | |
| "loss": 0.11205435752868652, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 1.1133654047270445, | |
| "grad_norm": 5.897251605987549, | |
| "learning_rate": 2.2166714933873927e-05, | |
| "loss": 0.10614056587219238, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 1.1165832703167988, | |
| "grad_norm": 0.8509872555732727, | |
| "learning_rate": 2.2086269588441612e-05, | |
| "loss": 0.11909424781799316, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 1.1198011359065532, | |
| "grad_norm": 1.7664456367492676, | |
| "learning_rate": 2.20058242430093e-05, | |
| "loss": 0.10704885482788086, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 1.1230190014963075, | |
| "grad_norm": 4.05808687210083, | |
| "learning_rate": 2.1925378897576987e-05, | |
| "loss": 0.1148582649230957, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 1.1262368670860619, | |
| "grad_norm": 0.689573347568512, | |
| "learning_rate": 2.1844933552144676e-05, | |
| "loss": 0.11986784934997559, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.1262368670860619, | |
| "eval_accuracy": 0.988106293602156, | |
| "eval_f1": 0.9430045230472126, | |
| "eval_loss": 0.04454037919640541, | |
| "eval_precision": 0.9315594603074993, | |
| "eval_recall": 0.9547343100809097, | |
| "eval_runtime": 38.4818, | |
| "eval_samples_per_second": 649.658, | |
| "eval_steps_per_second": 36.095, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.1294547326758162, | |
| "grad_norm": 1.2704740762710571, | |
| "learning_rate": 2.1764488206712362e-05, | |
| "loss": 0.11099970817565918, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 1.1326725982655705, | |
| "grad_norm": 5.144682884216309, | |
| "learning_rate": 2.1684042861280047e-05, | |
| "loss": 0.13487217903137208, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 1.1358904638553247, | |
| "grad_norm": 0.7087175846099854, | |
| "learning_rate": 2.1603597515847733e-05, | |
| "loss": 0.12489970207214356, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 1.139108329445079, | |
| "grad_norm": 1.559787631034851, | |
| "learning_rate": 2.152315217041542e-05, | |
| "loss": 0.10530885696411132, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 1.1423261950348333, | |
| "grad_norm": 1.2589160203933716, | |
| "learning_rate": 2.1442706824983108e-05, | |
| "loss": 0.12130918502807617, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 1.1455440606245877, | |
| "grad_norm": 1.1291875839233398, | |
| "learning_rate": 2.1362261479550793e-05, | |
| "loss": 0.11352409362792969, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 1.148761926214342, | |
| "grad_norm": 5.766851425170898, | |
| "learning_rate": 2.1281816134118483e-05, | |
| "loss": 0.12505414009094237, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 1.1519797918040964, | |
| "grad_norm": 4.1735334396362305, | |
| "learning_rate": 2.1201370788686168e-05, | |
| "loss": 0.10489568710327149, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 1.1551976573938507, | |
| "grad_norm": 0.7965012192726135, | |
| "learning_rate": 2.1120925443253857e-05, | |
| "loss": 0.10613764762878418, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 1.158415522983605, | |
| "grad_norm": 11.224800109863281, | |
| "learning_rate": 2.104048009782154e-05, | |
| "loss": 0.10370283126831055, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.1616333885733594, | |
| "grad_norm": 2.968989372253418, | |
| "learning_rate": 2.0960034752389225e-05, | |
| "loss": 0.1149828052520752, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 1.1648512541631135, | |
| "grad_norm": 2.068894863128662, | |
| "learning_rate": 2.0879589406956914e-05, | |
| "loss": 0.10543420791625976, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 1.1680691197528679, | |
| "grad_norm": 1.2261930704116821, | |
| "learning_rate": 2.07991440615246e-05, | |
| "loss": 0.11141412734985351, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 1.1712869853426222, | |
| "grad_norm": 3.737278699874878, | |
| "learning_rate": 2.071869871609229e-05, | |
| "loss": 0.11346253395080566, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 1.1745048509323766, | |
| "grad_norm": 2.6356663703918457, | |
| "learning_rate": 2.0638253370659974e-05, | |
| "loss": 0.12470024108886718, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 1.177722716522131, | |
| "grad_norm": 1.931261658668518, | |
| "learning_rate": 2.0557808025227664e-05, | |
| "loss": 0.12472087860107423, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 1.1809405821118852, | |
| "grad_norm": 3.1169793605804443, | |
| "learning_rate": 2.047736267979535e-05, | |
| "loss": 0.10721843719482421, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 1.1841584477016396, | |
| "grad_norm": 3.3439762592315674, | |
| "learning_rate": 2.0396917334363035e-05, | |
| "loss": 0.115928316116333, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 1.1873763132913937, | |
| "grad_norm": 5.898672103881836, | |
| "learning_rate": 2.031647198893072e-05, | |
| "loss": 0.1268135643005371, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 1.190594178881148, | |
| "grad_norm": 1.7046561241149902, | |
| "learning_rate": 2.0236026643498406e-05, | |
| "loss": 0.11421506881713867, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.1938120444709024, | |
| "grad_norm": 1.223960041999817, | |
| "learning_rate": 2.0155581298066095e-05, | |
| "loss": 0.11065722465515136, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 1.1970299100606567, | |
| "grad_norm": 5.873408794403076, | |
| "learning_rate": 2.007513595263378e-05, | |
| "loss": 0.12281797409057617, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 1.200247775650411, | |
| "grad_norm": 1.8486683368682861, | |
| "learning_rate": 1.999469060720147e-05, | |
| "loss": 0.1131003189086914, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 1.2034656412401654, | |
| "grad_norm": 3.4886019229888916, | |
| "learning_rate": 1.9914245261769155e-05, | |
| "loss": 0.11905000686645507, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 1.2066835068299198, | |
| "grad_norm": 0.7239132523536682, | |
| "learning_rate": 1.983379991633684e-05, | |
| "loss": 0.10931044578552246, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 1.2066835068299198, | |
| "eval_accuracy": 0.9886145760727707, | |
| "eval_f1": 0.9460371209613769, | |
| "eval_loss": 0.043151482939720154, | |
| "eval_precision": 0.9363463404185619, | |
| "eval_recall": 0.9559305900361457, | |
| "eval_runtime": 38.5909, | |
| "eval_samples_per_second": 647.822, | |
| "eval_steps_per_second": 35.993, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 1.209901372419674, | |
| "grad_norm": 2.1548306941986084, | |
| "learning_rate": 1.9753354570904527e-05, | |
| "loss": 0.11063767433166503, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 1.2131192380094284, | |
| "grad_norm": 1.2177164554595947, | |
| "learning_rate": 1.9672909225472216e-05, | |
| "loss": 0.11377819061279297, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 1.2163371035991826, | |
| "grad_norm": 0.8512117266654968, | |
| "learning_rate": 1.95924638800399e-05, | |
| "loss": 0.1323404121398926, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 1.219554969188937, | |
| "grad_norm": 1.077217936515808, | |
| "learning_rate": 1.9512018534607587e-05, | |
| "loss": 0.0988365364074707, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 1.2227728347786913, | |
| "grad_norm": 2.1567108631134033, | |
| "learning_rate": 1.9431573189175276e-05, | |
| "loss": 0.12120766639709472, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.2259907003684456, | |
| "grad_norm": 3.18619966506958, | |
| "learning_rate": 1.9351127843742962e-05, | |
| "loss": 0.11067086219787597, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 1.2292085659582, | |
| "grad_norm": 1.0777678489685059, | |
| "learning_rate": 1.927068249831065e-05, | |
| "loss": 0.11158087730407715, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 1.2324264315479543, | |
| "grad_norm": 2.5809645652770996, | |
| "learning_rate": 1.9190237152878333e-05, | |
| "loss": 0.11114447593688964, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 1.2356442971377086, | |
| "grad_norm": 1.8847070932388306, | |
| "learning_rate": 1.9109791807446022e-05, | |
| "loss": 0.11262718200683594, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 1.238862162727463, | |
| "grad_norm": 7.437971591949463, | |
| "learning_rate": 1.9029346462013708e-05, | |
| "loss": 0.11268477439880371, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 1.242080028317217, | |
| "grad_norm": 3.669605255126953, | |
| "learning_rate": 1.8948901116581397e-05, | |
| "loss": 0.11750943183898926, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 1.2452978939069714, | |
| "grad_norm": 2.447467803955078, | |
| "learning_rate": 1.8868455771149082e-05, | |
| "loss": 0.10881879806518555, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 1.2485157594967258, | |
| "grad_norm": 4.701931953430176, | |
| "learning_rate": 1.8788010425716768e-05, | |
| "loss": 0.12167869567871094, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 1.2517336250864801, | |
| "grad_norm": 1.6426016092300415, | |
| "learning_rate": 1.8707565080284457e-05, | |
| "loss": 0.12139597892761231, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 1.2549514906762345, | |
| "grad_norm": 4.622367858886719, | |
| "learning_rate": 1.8627119734852143e-05, | |
| "loss": 0.10588560104370118, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 1.2581693562659888, | |
| "grad_norm": 1.7007027864456177, | |
| "learning_rate": 1.854667438941983e-05, | |
| "loss": 0.11511992454528809, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 1.2613872218557431, | |
| "grad_norm": 2.8813023567199707, | |
| "learning_rate": 1.8466229043987514e-05, | |
| "loss": 0.09704429626464844, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 1.2646050874454975, | |
| "grad_norm": 8.06043815612793, | |
| "learning_rate": 1.8385783698555203e-05, | |
| "loss": 0.11457429885864258, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 1.2678229530352518, | |
| "grad_norm": 8.746806144714355, | |
| "learning_rate": 1.830533835312289e-05, | |
| "loss": 0.12408617973327636, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 1.271040818625006, | |
| "grad_norm": 5.757315158843994, | |
| "learning_rate": 1.8224893007690578e-05, | |
| "loss": 0.10381578445434571, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 1.2742586842147603, | |
| "grad_norm": 0.5180655121803284, | |
| "learning_rate": 1.8144447662258263e-05, | |
| "loss": 0.10882855415344238, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 1.2774765498045146, | |
| "grad_norm": 2.3184099197387695, | |
| "learning_rate": 1.806400231682595e-05, | |
| "loss": 0.10384510040283203, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 1.280694415394269, | |
| "grad_norm": 3.118455648422241, | |
| "learning_rate": 1.7983556971393635e-05, | |
| "loss": 0.1031002426147461, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 1.2839122809840233, | |
| "grad_norm": 10.587071418762207, | |
| "learning_rate": 1.790311162596132e-05, | |
| "loss": 0.11156161308288574, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 1.2871301465737777, | |
| "grad_norm": 2.5432746410369873, | |
| "learning_rate": 1.782266628052901e-05, | |
| "loss": 0.11637428283691406, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.2871301465737777, | |
| "eval_accuracy": 0.989512214291476, | |
| "eval_f1": 0.9486766456772123, | |
| "eval_loss": 0.040956247597932816, | |
| "eval_precision": 0.9392224099241068, | |
| "eval_recall": 0.9583231499466176, | |
| "eval_runtime": 38.6339, | |
| "eval_samples_per_second": 647.1, | |
| "eval_steps_per_second": 35.953, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.290348012163532, | |
| "grad_norm": 1.766068458557129, | |
| "learning_rate": 1.7742220935096695e-05, | |
| "loss": 0.11921710968017578, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 1.2935658777532861, | |
| "grad_norm": 1.7064458131790161, | |
| "learning_rate": 1.7661775589664384e-05, | |
| "loss": 0.12581809997558593, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 1.2967837433430405, | |
| "grad_norm": 1.2497196197509766, | |
| "learning_rate": 1.758133024423207e-05, | |
| "loss": 0.10997524261474609, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 1.3000016089327948, | |
| "grad_norm": 1.997997760772705, | |
| "learning_rate": 1.750088489879976e-05, | |
| "loss": 0.11178022384643554, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 1.3032194745225492, | |
| "grad_norm": 1.8823689222335815, | |
| "learning_rate": 1.7420439553367444e-05, | |
| "loss": 0.09935328483581543, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 1.3064373401123035, | |
| "grad_norm": 4.478331089019775, | |
| "learning_rate": 1.733999420793513e-05, | |
| "loss": 0.11027010917663574, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 1.3096552057020578, | |
| "grad_norm": 3.932783365249634, | |
| "learning_rate": 1.7259548862502816e-05, | |
| "loss": 0.11079183578491211, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 1.3128730712918122, | |
| "grad_norm": 2.1589443683624268, | |
| "learning_rate": 1.71791035170705e-05, | |
| "loss": 0.10328941345214844, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 1.3160909368815665, | |
| "grad_norm": 2.9713399410247803, | |
| "learning_rate": 1.709865817163819e-05, | |
| "loss": 0.1030450439453125, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 1.3193088024713209, | |
| "grad_norm": 4.732212066650391, | |
| "learning_rate": 1.7018212826205876e-05, | |
| "loss": 0.114616117477417, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 1.3225266680610752, | |
| "grad_norm": 1.4628472328186035, | |
| "learning_rate": 1.6937767480773565e-05, | |
| "loss": 0.12563533782958985, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 1.3257445336508293, | |
| "grad_norm": 3.7251741886138916, | |
| "learning_rate": 1.685732213534125e-05, | |
| "loss": 0.11473335266113281, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 1.3289623992405837, | |
| "grad_norm": 4.524189472198486, | |
| "learning_rate": 1.6776876789908936e-05, | |
| "loss": 0.12875600814819335, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 1.332180264830338, | |
| "grad_norm": 28.197296142578125, | |
| "learning_rate": 1.6696431444476622e-05, | |
| "loss": 0.11420975685119629, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 1.3353981304200924, | |
| "grad_norm": 1.819801926612854, | |
| "learning_rate": 1.6615986099044308e-05, | |
| "loss": 0.09828336715698242, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 1.3386159960098467, | |
| "grad_norm": 1.7372171878814697, | |
| "learning_rate": 1.6535540753611997e-05, | |
| "loss": 0.1114089298248291, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 1.341833861599601, | |
| "grad_norm": 4.730764865875244, | |
| "learning_rate": 1.6455095408179682e-05, | |
| "loss": 0.11046221733093262, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 1.3450517271893552, | |
| "grad_norm": 0.6920406222343445, | |
| "learning_rate": 1.637465006274737e-05, | |
| "loss": 0.1076749610900879, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 1.3482695927791095, | |
| "grad_norm": 1.7678096294403076, | |
| "learning_rate": 1.6294204717315057e-05, | |
| "loss": 0.10196740150451661, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 1.3514874583688639, | |
| "grad_norm": 7.219323635101318, | |
| "learning_rate": 1.6213759371882746e-05, | |
| "loss": 0.10450594902038574, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.3547053239586182, | |
| "grad_norm": 3.9280190467834473, | |
| "learning_rate": 1.613331402645043e-05, | |
| "loss": 0.11356738090515137, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 1.3579231895483725, | |
| "grad_norm": 0.7362053394317627, | |
| "learning_rate": 1.6052868681018117e-05, | |
| "loss": 0.11030459403991699, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 1.3611410551381269, | |
| "grad_norm": 2.8355274200439453, | |
| "learning_rate": 1.5972423335585803e-05, | |
| "loss": 0.10215305328369141, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 1.3643589207278812, | |
| "grad_norm": 2.266855001449585, | |
| "learning_rate": 1.589197799015349e-05, | |
| "loss": 0.10216259002685547, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 1.3675767863176356, | |
| "grad_norm": 1.7423440217971802, | |
| "learning_rate": 1.5811532644721178e-05, | |
| "loss": 0.11463526725769042, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 1.3675767863176356, | |
| "eval_accuracy": 0.9885556297842152, | |
| "eval_f1": 0.9436256775454764, | |
| "eval_loss": 0.043291687965393066, | |
| "eval_precision": 0.9313770939571248, | |
| "eval_recall": 0.9562007177679731, | |
| "eval_runtime": 39.0115, | |
| "eval_samples_per_second": 640.836, | |
| "eval_steps_per_second": 35.605, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 1.37079465190739, | |
| "grad_norm": 2.6859209537506104, | |
| "learning_rate": 1.5731087299288863e-05, | |
| "loss": 0.10509669303894043, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 1.3740125174971443, | |
| "grad_norm": 1.952075481414795, | |
| "learning_rate": 1.5650641953856552e-05, | |
| "loss": 0.10759903907775879, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 1.3772303830868984, | |
| "grad_norm": 2.712860107421875, | |
| "learning_rate": 1.5570196608424238e-05, | |
| "loss": 0.1039089298248291, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 1.3804482486766527, | |
| "grad_norm": 0.966740608215332, | |
| "learning_rate": 1.5489751262991924e-05, | |
| "loss": 0.11702409744262696, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 1.383666114266407, | |
| "grad_norm": 0.4163697063922882, | |
| "learning_rate": 1.540930591755961e-05, | |
| "loss": 0.10730401039123535, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 1.3868839798561614, | |
| "grad_norm": 1.878928780555725, | |
| "learning_rate": 1.53288605721273e-05, | |
| "loss": 0.10289834022521972, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 1.3901018454459158, | |
| "grad_norm": 0.8312145471572876, | |
| "learning_rate": 1.5248415226694984e-05, | |
| "loss": 0.0965440845489502, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 1.39331971103567, | |
| "grad_norm": 2.221240282058716, | |
| "learning_rate": 1.516796988126267e-05, | |
| "loss": 0.11579631805419922, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 1.3965375766254244, | |
| "grad_norm": 1.7558112144470215, | |
| "learning_rate": 1.5087524535830357e-05, | |
| "loss": 0.09858304977416993, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 1.3997554422151786, | |
| "grad_norm": 2.686286449432373, | |
| "learning_rate": 1.5007079190398043e-05, | |
| "loss": 0.1057161808013916, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 1.402973307804933, | |
| "grad_norm": 3.1591944694519043, | |
| "learning_rate": 1.4926633844965732e-05, | |
| "loss": 0.08528660774230958, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 1.4061911733946872, | |
| "grad_norm": 5.5276994705200195, | |
| "learning_rate": 1.4846188499533417e-05, | |
| "loss": 0.11572813987731934, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 1.4094090389844416, | |
| "grad_norm": 1.2573060989379883, | |
| "learning_rate": 1.4765743154101105e-05, | |
| "loss": 0.11284799575805664, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 1.412626904574196, | |
| "grad_norm": 1.1216791868209839, | |
| "learning_rate": 1.468529780866879e-05, | |
| "loss": 0.11373154640197754, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 1.4158447701639503, | |
| "grad_norm": 1.1175236701965332, | |
| "learning_rate": 1.460485246323648e-05, | |
| "loss": 0.11358193397521972, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 1.4190626357537046, | |
| "grad_norm": 1.5515943765640259, | |
| "learning_rate": 1.4524407117804165e-05, | |
| "loss": 0.0964575481414795, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 1.422280501343459, | |
| "grad_norm": 1.3432780504226685, | |
| "learning_rate": 1.444396177237185e-05, | |
| "loss": 0.12204754829406739, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 1.4254983669332133, | |
| "grad_norm": 5.370590686798096, | |
| "learning_rate": 1.4363516426939538e-05, | |
| "loss": 0.10917285919189453, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 1.4287162325229676, | |
| "grad_norm": 3.957749366760254, | |
| "learning_rate": 1.4283071081507224e-05, | |
| "loss": 0.1064769458770752, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 1.4319340981127218, | |
| "grad_norm": 9.636327743530273, | |
| "learning_rate": 1.4202625736074913e-05, | |
| "loss": 0.11022495269775391, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 1.435151963702476, | |
| "grad_norm": 2.991060495376587, | |
| "learning_rate": 1.4122180390642597e-05, | |
| "loss": 0.10742597579956055, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 1.4383698292922305, | |
| "grad_norm": 2.648939847946167, | |
| "learning_rate": 1.4041735045210286e-05, | |
| "loss": 0.10721823692321777, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 1.4415876948819848, | |
| "grad_norm": 0.6414406895637512, | |
| "learning_rate": 1.3961289699777971e-05, | |
| "loss": 0.09946588516235351, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 1.4448055604717391, | |
| "grad_norm": 0.6889192461967468, | |
| "learning_rate": 1.3880844354345659e-05, | |
| "loss": 0.10531152725219727, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 1.4480234260614935, | |
| "grad_norm": 9.125837326049805, | |
| "learning_rate": 1.3800399008913344e-05, | |
| "loss": 0.09133255004882812, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 1.4480234260614935, | |
| "eval_accuracy": 0.9893700192971534, | |
| "eval_f1": 0.9493190302072849, | |
| "eval_loss": 0.043362099677324295, | |
| "eval_precision": 0.9403339180474755, | |
| "eval_recall": 0.9584775086505191, | |
| "eval_runtime": 39.1132, | |
| "eval_samples_per_second": 639.171, | |
| "eval_steps_per_second": 35.512, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 1.4512412916512476, | |
| "grad_norm": 2.54496431350708, | |
| "learning_rate": 1.371995366348103e-05, | |
| "loss": 0.11927374839782715, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 1.454459157241002, | |
| "grad_norm": 2.2553350925445557, | |
| "learning_rate": 1.3639508318048719e-05, | |
| "loss": 0.10068492889404297, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 1.4576770228307563, | |
| "grad_norm": 0.6272743940353394, | |
| "learning_rate": 1.3559062972616405e-05, | |
| "loss": 0.09143063545227051, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 1.4608948884205106, | |
| "grad_norm": 2.9756674766540527, | |
| "learning_rate": 1.3478617627184092e-05, | |
| "loss": 0.11779884338378906, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 1.464112754010265, | |
| "grad_norm": 1.4076873064041138, | |
| "learning_rate": 1.3398172281751778e-05, | |
| "loss": 0.09415918350219726, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 1.4673306196000193, | |
| "grad_norm": 2.004279613494873, | |
| "learning_rate": 1.3317726936319467e-05, | |
| "loss": 0.10082883834838867, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 1.4705484851897737, | |
| "grad_norm": 12.558138847351074, | |
| "learning_rate": 1.3237281590887152e-05, | |
| "loss": 0.10111323356628418, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 1.473766350779528, | |
| "grad_norm": 3.433985948562622, | |
| "learning_rate": 1.315683624545484e-05, | |
| "loss": 0.10751867294311523, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 1.4769842163692823, | |
| "grad_norm": 1.3402138948440552, | |
| "learning_rate": 1.3076390900022525e-05, | |
| "loss": 0.09543782234191894, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 1.4802020819590367, | |
| "grad_norm": 2.1528687477111816, | |
| "learning_rate": 1.2995945554590211e-05, | |
| "loss": 0.10040821075439453, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 1.4834199475487908, | |
| "grad_norm": 2.317426919937134, | |
| "learning_rate": 1.2915500209157898e-05, | |
| "loss": 0.10733115196228027, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 1.4866378131385451, | |
| "grad_norm": 2.752028703689575, | |
| "learning_rate": 1.2835054863725584e-05, | |
| "loss": 0.09164957046508788, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 1.4898556787282995, | |
| "grad_norm": 1.7984188795089722, | |
| "learning_rate": 1.2754609518293273e-05, | |
| "loss": 0.10080486297607422, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 1.4930735443180538, | |
| "grad_norm": 2.4552342891693115, | |
| "learning_rate": 1.2674164172860959e-05, | |
| "loss": 0.10242198944091797, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 1.4962914099078082, | |
| "grad_norm": 2.6899032592773438, | |
| "learning_rate": 1.2593718827428646e-05, | |
| "loss": 0.10129733085632324, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 1.4995092754975625, | |
| "grad_norm": 1.909561038017273, | |
| "learning_rate": 1.2513273481996332e-05, | |
| "loss": 0.1006275749206543, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 1.5027271410873166, | |
| "grad_norm": 19.42228889465332, | |
| "learning_rate": 1.2432828136564019e-05, | |
| "loss": 0.10171239852905273, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 1.505945006677071, | |
| "grad_norm": 1.729035496711731, | |
| "learning_rate": 1.2352382791131706e-05, | |
| "loss": 0.11470888137817382, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 1.5091628722668253, | |
| "grad_norm": 10.81437873840332, | |
| "learning_rate": 1.2271937445699392e-05, | |
| "loss": 0.09871227264404298, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 1.5123807378565797, | |
| "grad_norm": 3.496492624282837, | |
| "learning_rate": 1.219149210026708e-05, | |
| "loss": 0.10564603805541992, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 1.515598603446334, | |
| "grad_norm": 0.7296909093856812, | |
| "learning_rate": 1.2111046754834767e-05, | |
| "loss": 0.09953035354614258, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 1.5188164690360884, | |
| "grad_norm": 0.6311995983123779, | |
| "learning_rate": 1.2030601409402452e-05, | |
| "loss": 0.10630006790161133, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 1.5220343346258427, | |
| "grad_norm": 2.436532497406006, | |
| "learning_rate": 1.1950156063970138e-05, | |
| "loss": 0.09258381843566894, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 1.525252200215597, | |
| "grad_norm": 0.557080864906311, | |
| "learning_rate": 1.1869710718537825e-05, | |
| "loss": 0.1228843879699707, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 1.5284700658053514, | |
| "grad_norm": 7.565468788146973, | |
| "learning_rate": 1.1789265373105513e-05, | |
| "loss": 0.1068018627166748, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 1.5284700658053514, | |
| "eval_accuracy": 0.989717492156007, | |
| "eval_f1": 0.9515031847133757, | |
| "eval_loss": 0.03974379226565361, | |
| "eval_precision": 0.942391400345702, | |
| "eval_recall": 0.9607928892090403, | |
| "eval_runtime": 38.7298, | |
| "eval_samples_per_second": 645.498, | |
| "eval_steps_per_second": 35.864, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 1.5316879313951057, | |
| "grad_norm": 2.948582410812378, | |
| "learning_rate": 1.17088200276732e-05, | |
| "loss": 0.10712558746337891, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 1.53490579698486, | |
| "grad_norm": 6.355142593383789, | |
| "learning_rate": 1.1628374682240886e-05, | |
| "loss": 0.10939033508300781, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 1.5381236625746144, | |
| "grad_norm": 0.6847637295722961, | |
| "learning_rate": 1.1547929336808573e-05, | |
| "loss": 0.09593602180480958, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 1.5413415281643685, | |
| "grad_norm": 2.1787171363830566, | |
| "learning_rate": 1.146748399137626e-05, | |
| "loss": 0.09451790809631348, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 1.5445593937541229, | |
| "grad_norm": 4.488222122192383, | |
| "learning_rate": 1.1387038645943946e-05, | |
| "loss": 0.1111738109588623, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 1.5477772593438772, | |
| "grad_norm": 5.803431034088135, | |
| "learning_rate": 1.1306593300511632e-05, | |
| "loss": 0.09694368362426758, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 1.5509951249336316, | |
| "grad_norm": 1.8297266960144043, | |
| "learning_rate": 1.1226147955079319e-05, | |
| "loss": 0.105281982421875, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 1.5542129905233857, | |
| "grad_norm": 3.511244058609009, | |
| "learning_rate": 1.1145702609647006e-05, | |
| "loss": 0.10829096794128418, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 1.55743085611314, | |
| "grad_norm": 7.947854995727539, | |
| "learning_rate": 1.1065257264214692e-05, | |
| "loss": 0.11103793144226075, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 1.5606487217028944, | |
| "grad_norm": 3.7324254512786865, | |
| "learning_rate": 1.098481191878238e-05, | |
| "loss": 0.09467618942260742, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 1.5638665872926487, | |
| "grad_norm": 8.261491775512695, | |
| "learning_rate": 1.0904366573350067e-05, | |
| "loss": 0.10849467277526856, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 1.567084452882403, | |
| "grad_norm": 1.8083908557891846, | |
| "learning_rate": 1.0823921227917754e-05, | |
| "loss": 0.09797250747680664, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 1.5703023184721574, | |
| "grad_norm": 0.7597082257270813, | |
| "learning_rate": 1.074347588248544e-05, | |
| "loss": 0.08994776725769044, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 1.5735201840619117, | |
| "grad_norm": 1.0194493532180786, | |
| "learning_rate": 1.0663030537053127e-05, | |
| "loss": 0.1160097885131836, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 1.576738049651666, | |
| "grad_norm": 3.0627150535583496, | |
| "learning_rate": 1.0582585191620813e-05, | |
| "loss": 0.08039675712585449, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 1.5799559152414204, | |
| "grad_norm": 11.51146411895752, | |
| "learning_rate": 1.05021398461885e-05, | |
| "loss": 0.10077597618103028, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 1.5831737808311748, | |
| "grad_norm": 3.410426616668701, | |
| "learning_rate": 1.0421694500756186e-05, | |
| "loss": 0.12183525085449219, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 1.5863916464209291, | |
| "grad_norm": 4.240739822387695, | |
| "learning_rate": 1.0341249155323873e-05, | |
| "loss": 0.10310389518737793, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 1.5896095120106835, | |
| "grad_norm": 4.760958194732666, | |
| "learning_rate": 1.026080380989156e-05, | |
| "loss": 0.10938605308532715, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 1.5928273776004376, | |
| "grad_norm": 1.9922808408737183, | |
| "learning_rate": 1.0180358464459248e-05, | |
| "loss": 0.08123787879943847, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 1.596045243190192, | |
| "grad_norm": 1.350643277168274, | |
| "learning_rate": 1.0099913119026933e-05, | |
| "loss": 0.12260754585266113, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 1.5992631087799463, | |
| "grad_norm": 1.4271605014801025, | |
| "learning_rate": 1.001946777359462e-05, | |
| "loss": 0.09629259109497071, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 1.6024809743697006, | |
| "grad_norm": 0.5193639397621155, | |
| "learning_rate": 9.939022428162308e-06, | |
| "loss": 0.10984975814819337, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 1.605698839959455, | |
| "grad_norm": 9.31329345703125, | |
| "learning_rate": 9.858577082729994e-06, | |
| "loss": 0.09676812171936035, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 1.608916705549209, | |
| "grad_norm": 1.29440176486969, | |
| "learning_rate": 9.77813173729768e-06, | |
| "loss": 0.09296704292297363, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 1.608916705549209, | |
| "eval_accuracy": 0.9898457261872506, | |
| "eval_f1": 0.9511017704750988, | |
| "eval_loss": 0.038785140961408615, | |
| "eval_precision": 0.9418761588819233, | |
| "eval_recall": 0.9605098982518877, | |
| "eval_runtime": 39.1145, | |
| "eval_samples_per_second": 639.149, | |
| "eval_steps_per_second": 35.511, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 1.6121345711389634, | |
| "grad_norm": 2.5150434970855713, | |
| "learning_rate": 9.697686391865367e-06, | |
| "loss": 0.0893746280670166, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 1.6153524367287178, | |
| "grad_norm": 3.5938878059387207, | |
| "learning_rate": 9.617241046433054e-06, | |
| "loss": 0.10516523361206055, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 1.618570302318472, | |
| "grad_norm": 3.323432445526123, | |
| "learning_rate": 9.53679570100074e-06, | |
| "loss": 0.10093666076660156, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 1.6217881679082264, | |
| "grad_norm": 3.201542854309082, | |
| "learning_rate": 9.456350355568427e-06, | |
| "loss": 0.09994193077087403, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 1.6250060334979808, | |
| "grad_norm": 1.2016533613204956, | |
| "learning_rate": 9.375905010136114e-06, | |
| "loss": 0.09140503883361817, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 1.6282238990877351, | |
| "grad_norm": 2.176725149154663, | |
| "learning_rate": 9.295459664703802e-06, | |
| "loss": 0.10795197486877442, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 1.6314417646774895, | |
| "grad_norm": 2.084689140319824, | |
| "learning_rate": 9.215014319271487e-06, | |
| "loss": 0.10725319862365723, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 1.6346596302672438, | |
| "grad_norm": 3.9410250186920166, | |
| "learning_rate": 9.134568973839173e-06, | |
| "loss": 0.1040007209777832, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 1.6378774958569982, | |
| "grad_norm": 7.110863208770752, | |
| "learning_rate": 9.05412362840686e-06, | |
| "loss": 0.09510645866394044, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 1.6410953614467525, | |
| "grad_norm": 2.1342461109161377, | |
| "learning_rate": 8.973678282974548e-06, | |
| "loss": 0.10462862014770508, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 1.6443132270365068, | |
| "grad_norm": 2.3921563625335693, | |
| "learning_rate": 8.893232937542233e-06, | |
| "loss": 0.09732514381408691, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 1.647531092626261, | |
| "grad_norm": 2.4982810020446777, | |
| "learning_rate": 8.81278759210992e-06, | |
| "loss": 0.08822668075561524, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 1.6507489582160153, | |
| "grad_norm": 1.7426912784576416, | |
| "learning_rate": 8.732342246677608e-06, | |
| "loss": 0.08898674011230469, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 1.6539668238057696, | |
| "grad_norm": 9.542119026184082, | |
| "learning_rate": 8.651896901245295e-06, | |
| "loss": 0.12028385162353515, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 1.657184689395524, | |
| "grad_norm": 0.7342370748519897, | |
| "learning_rate": 8.571451555812981e-06, | |
| "loss": 0.10786977767944336, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 1.6604025549852781, | |
| "grad_norm": 0.8785775303840637, | |
| "learning_rate": 8.491006210380668e-06, | |
| "loss": 0.09443745613098145, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 1.6636204205750325, | |
| "grad_norm": 9.54134464263916, | |
| "learning_rate": 8.410560864948354e-06, | |
| "loss": 0.11094076156616212, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 1.6668382861647868, | |
| "grad_norm": 23.422744750976562, | |
| "learning_rate": 8.330115519516041e-06, | |
| "loss": 0.10699220657348633, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 1.6700561517545411, | |
| "grad_norm": 1.0714454650878906, | |
| "learning_rate": 8.249670174083727e-06, | |
| "loss": 0.10455569267272949, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 1.6732740173442955, | |
| "grad_norm": 2.6620521545410156, | |
| "learning_rate": 8.169224828651414e-06, | |
| "loss": 0.09597659111022949, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 1.6764918829340498, | |
| "grad_norm": 1.480083703994751, | |
| "learning_rate": 8.088779483219102e-06, | |
| "loss": 0.10422684669494629, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 1.6797097485238042, | |
| "grad_norm": 2.0191445350646973, | |
| "learning_rate": 8.008334137786787e-06, | |
| "loss": 0.10290263175964355, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 1.6829276141135585, | |
| "grad_norm": 1.1020231246948242, | |
| "learning_rate": 7.927888792354475e-06, | |
| "loss": 0.10773059844970703, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 1.6861454797033129, | |
| "grad_norm": 2.2319722175598145, | |
| "learning_rate": 7.847443446922162e-06, | |
| "loss": 0.10715092658996582, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 1.6893633452930672, | |
| "grad_norm": 3.737014055252075, | |
| "learning_rate": 7.76699810148985e-06, | |
| "loss": 0.08955263137817383, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 1.6893633452930672, | |
| "eval_accuracy": 0.9901202917944698, | |
| "eval_f1": 0.9537408288021826, | |
| "eval_loss": 0.03707578778266907, | |
| "eval_precision": 0.945334613829357, | |
| "eval_recall": 0.962297886572079, | |
| "eval_runtime": 38.6863, | |
| "eval_samples_per_second": 646.224, | |
| "eval_steps_per_second": 35.904, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 1.6925812108828215, | |
| "grad_norm": 1.981602668762207, | |
| "learning_rate": 7.686552756057535e-06, | |
| "loss": 0.09512082099914551, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 1.6957990764725759, | |
| "grad_norm": 0.9314746856689453, | |
| "learning_rate": 7.606107410625221e-06, | |
| "loss": 0.09555187225341796, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 1.69901694206233, | |
| "grad_norm": 1.3338748216629028, | |
| "learning_rate": 7.525662065192908e-06, | |
| "loss": 0.08804367065429687, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 1.7022348076520843, | |
| "grad_norm": 2.233142375946045, | |
| "learning_rate": 7.445216719760595e-06, | |
| "loss": 0.099904146194458, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 1.7054526732418387, | |
| "grad_norm": 2.9169979095458984, | |
| "learning_rate": 7.364771374328282e-06, | |
| "loss": 0.09450019836425781, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 1.708670538831593, | |
| "grad_norm": 1.4995619058609009, | |
| "learning_rate": 7.284326028895968e-06, | |
| "loss": 0.10291241645812989, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 1.7118884044213472, | |
| "grad_norm": 0.7775556445121765, | |
| "learning_rate": 7.203880683463656e-06, | |
| "loss": 0.1126316738128662, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 1.7151062700111015, | |
| "grad_norm": 4.055330753326416, | |
| "learning_rate": 7.123435338031342e-06, | |
| "loss": 0.10327481269836426, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 1.7183241356008558, | |
| "grad_norm": 1.895665168762207, | |
| "learning_rate": 7.04298999259903e-06, | |
| "loss": 0.09612360954284668, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 1.7215420011906102, | |
| "grad_norm": 2.202033758163452, | |
| "learning_rate": 6.962544647166714e-06, | |
| "loss": 0.09727460861206055, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 1.7247598667803645, | |
| "grad_norm": 1.6725600957870483, | |
| "learning_rate": 6.882099301734402e-06, | |
| "loss": 0.1051039981842041, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 1.7279777323701189, | |
| "grad_norm": 1.6951491832733154, | |
| "learning_rate": 6.801653956302088e-06, | |
| "loss": 0.09583574295043945, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 1.7311955979598732, | |
| "grad_norm": 2.0571811199188232, | |
| "learning_rate": 6.721208610869776e-06, | |
| "loss": 0.09057200431823731, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 1.7344134635496276, | |
| "grad_norm": 1.9436057806015015, | |
| "learning_rate": 6.640763265437462e-06, | |
| "loss": 0.10000760078430176, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 1.737631329139382, | |
| "grad_norm": 0.3372837007045746, | |
| "learning_rate": 6.560317920005149e-06, | |
| "loss": 0.10574793815612793, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 1.7408491947291362, | |
| "grad_norm": 2.41835880279541, | |
| "learning_rate": 6.479872574572836e-06, | |
| "loss": 0.10087454795837403, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 1.7440670603188906, | |
| "grad_norm": 0.6734520792961121, | |
| "learning_rate": 6.399427229140522e-06, | |
| "loss": 0.08962540626525879, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 1.747284925908645, | |
| "grad_norm": 2.5488386154174805, | |
| "learning_rate": 6.31898188370821e-06, | |
| "loss": 0.1008315372467041, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 1.7505027914983993, | |
| "grad_norm": 3.4320902824401855, | |
| "learning_rate": 6.238536538275895e-06, | |
| "loss": 0.08948537826538086, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 1.7537206570881534, | |
| "grad_norm": 3.819451093673706, | |
| "learning_rate": 6.158091192843583e-06, | |
| "loss": 0.08715433120727539, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 1.7569385226779077, | |
| "grad_norm": 1.9689542055130005, | |
| "learning_rate": 6.077645847411268e-06, | |
| "loss": 0.0976317596435547, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 1.760156388267662, | |
| "grad_norm": 2.7664942741394043, | |
| "learning_rate": 5.997200501978956e-06, | |
| "loss": 0.11424756050109863, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 1.7633742538574164, | |
| "grad_norm": 1.188758134841919, | |
| "learning_rate": 5.916755156546642e-06, | |
| "loss": 0.09281901359558105, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 1.7665921194471705, | |
| "grad_norm": 1.442600965499878, | |
| "learning_rate": 5.83630981111433e-06, | |
| "loss": 0.09055022239685058, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 1.7698099850369249, | |
| "grad_norm": 2.042584180831909, | |
| "learning_rate": 5.755864465682016e-06, | |
| "loss": 0.08615251541137696, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 1.7698099850369249, | |
| "eval_accuracy": 0.9903457354945594, | |
| "eval_f1": 0.9534046525260119, | |
| "eval_loss": 0.036240287125110626, | |
| "eval_precision": 0.9445875312476327, | |
| "eval_recall": 0.9623879291493549, | |
| "eval_runtime": 38.9533, | |
| "eval_samples_per_second": 641.795, | |
| "eval_steps_per_second": 35.658, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 1.7730278506266792, | |
| "grad_norm": 1.0843491554260254, | |
| "learning_rate": 5.675419120249703e-06, | |
| "loss": 0.08899230003356934, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 1.7762457162164336, | |
| "grad_norm": 12.144911766052246, | |
| "learning_rate": 5.594973774817389e-06, | |
| "loss": 0.08881766319274903, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 1.779463581806188, | |
| "grad_norm": 2.8250832557678223, | |
| "learning_rate": 5.514528429385076e-06, | |
| "loss": 0.0975367259979248, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 1.7826814473959423, | |
| "grad_norm": 1.2422140836715698, | |
| "learning_rate": 5.434083083952763e-06, | |
| "loss": 0.10660443305969239, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 1.7858993129856966, | |
| "grad_norm": 4.103642463684082, | |
| "learning_rate": 5.353637738520449e-06, | |
| "loss": 0.10672403335571289, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 1.789117178575451, | |
| "grad_norm": 2.0695173740386963, | |
| "learning_rate": 5.273192393088136e-06, | |
| "loss": 0.08749826431274414, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 1.7923350441652053, | |
| "grad_norm": 2.9659390449523926, | |
| "learning_rate": 5.192747047655823e-06, | |
| "loss": 0.10261861801147461, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 1.7955529097549596, | |
| "grad_norm": 3.0467967987060547, | |
| "learning_rate": 5.11230170222351e-06, | |
| "loss": 0.1026226806640625, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 1.798770775344714, | |
| "grad_norm": 2.826698064804077, | |
| "learning_rate": 5.031856356791196e-06, | |
| "loss": 0.08784229278564454, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 1.8019886409344683, | |
| "grad_norm": 4.862614631652832, | |
| "learning_rate": 4.951411011358883e-06, | |
| "loss": 0.09360240936279297, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 1.8052065065242224, | |
| "grad_norm": 1.5207270383834839, | |
| "learning_rate": 4.87096566592657e-06, | |
| "loss": 0.10859518051147461, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 1.8084243721139768, | |
| "grad_norm": 2.692488670349121, | |
| "learning_rate": 4.790520320494257e-06, | |
| "loss": 0.0937327766418457, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 1.8116422377037311, | |
| "grad_norm": 2.3452610969543457, | |
| "learning_rate": 4.710074975061943e-06, | |
| "loss": 0.0986541748046875, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 1.8148601032934855, | |
| "grad_norm": 3.4485702514648438, | |
| "learning_rate": 4.6296296296296296e-06, | |
| "loss": 0.11264933586120605, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 1.8180779688832396, | |
| "grad_norm": 0.7109314203262329, | |
| "learning_rate": 4.549184284197316e-06, | |
| "loss": 0.09030750274658203, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 1.821295834472994, | |
| "grad_norm": 11.383523941040039, | |
| "learning_rate": 4.468738938765003e-06, | |
| "loss": 0.09089506149291993, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 1.8245137000627483, | |
| "grad_norm": 2.5693790912628174, | |
| "learning_rate": 4.38829359333269e-06, | |
| "loss": 0.09092802047729492, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 1.8277315656525026, | |
| "grad_norm": 4.628587245941162, | |
| "learning_rate": 4.307848247900376e-06, | |
| "loss": 0.10240273475646973, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 1.830949431242257, | |
| "grad_norm": 2.5633656978607178, | |
| "learning_rate": 4.227402902468063e-06, | |
| "loss": 0.10268410682678222, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 1.8341672968320113, | |
| "grad_norm": 1.9654608964920044, | |
| "learning_rate": 4.14695755703575e-06, | |
| "loss": 0.08537924766540528, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 1.8373851624217656, | |
| "grad_norm": 5.80186653137207, | |
| "learning_rate": 4.066512211603437e-06, | |
| "loss": 0.09202526092529296, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 1.84060302801152, | |
| "grad_norm": 1.1093907356262207, | |
| "learning_rate": 3.986066866171124e-06, | |
| "loss": 0.08391679763793945, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 1.8438208936012743, | |
| "grad_norm": 2.4156336784362793, | |
| "learning_rate": 3.90562152073881e-06, | |
| "loss": 0.08964170455932617, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 1.8470387591910287, | |
| "grad_norm": 0.8483083844184875, | |
| "learning_rate": 3.825176175306497e-06, | |
| "loss": 0.08196611404418945, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 1.850256624780783, | |
| "grad_norm": 2.47489070892334, | |
| "learning_rate": 3.7447308298741836e-06, | |
| "loss": 0.09712170600891114, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 1.850256624780783, | |
| "eval_accuracy": 0.990784730222486, | |
| "eval_f1": 0.9556170744775851, | |
| "eval_loss": 0.03545914590358734, | |
| "eval_precision": 0.9478179448570814, | |
| "eval_recall": 0.9635456194286155, | |
| "eval_runtime": 39.1151, | |
| "eval_samples_per_second": 639.14, | |
| "eval_steps_per_second": 35.511, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 1.8534744903705374, | |
| "grad_norm": 2.640423536300659, | |
| "learning_rate": 3.6642854844418705e-06, | |
| "loss": 0.09879606246948242, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 1.8566923559602917, | |
| "grad_norm": 1.9036402702331543, | |
| "learning_rate": 3.5838401390095566e-06, | |
| "loss": 0.08787569999694825, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 1.8599102215500458, | |
| "grad_norm": 2.5272982120513916, | |
| "learning_rate": 3.5033947935772435e-06, | |
| "loss": 0.0921150016784668, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 1.8631280871398002, | |
| "grad_norm": 2.6478683948516846, | |
| "learning_rate": 3.4229494481449304e-06, | |
| "loss": 0.10192151069641113, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 1.8663459527295545, | |
| "grad_norm": 1.6722396612167358, | |
| "learning_rate": 3.3425041027126173e-06, | |
| "loss": 0.08221748352050781, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 1.8695638183193088, | |
| "grad_norm": 0.9039199948310852, | |
| "learning_rate": 3.2620587572803042e-06, | |
| "loss": 0.09699295043945312, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 1.872781683909063, | |
| "grad_norm": 4.108741283416748, | |
| "learning_rate": 3.1816134118479903e-06, | |
| "loss": 0.10586893081665039, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 1.8759995494988173, | |
| "grad_norm": 0.8022117018699646, | |
| "learning_rate": 3.1011680664156772e-06, | |
| "loss": 0.10414183616638184, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 1.8792174150885717, | |
| "grad_norm": 0.757673978805542, | |
| "learning_rate": 3.020722720983364e-06, | |
| "loss": 0.09568047523498535, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 1.882435280678326, | |
| "grad_norm": 2.535696506500244, | |
| "learning_rate": 2.9402773755510506e-06, | |
| "loss": 0.10529354095458984, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 1.8856531462680803, | |
| "grad_norm": 3.3938286304473877, | |
| "learning_rate": 2.8598320301187376e-06, | |
| "loss": 0.09156073570251465, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 1.8888710118578347, | |
| "grad_norm": 1.427871823310852, | |
| "learning_rate": 2.779386684686424e-06, | |
| "loss": 0.08934795379638671, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 1.892088877447589, | |
| "grad_norm": 1.7206834554672241, | |
| "learning_rate": 2.698941339254111e-06, | |
| "loss": 0.10297789573669433, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 1.8953067430373434, | |
| "grad_norm": 2.4081387519836426, | |
| "learning_rate": 2.618495993821798e-06, | |
| "loss": 0.10169391632080078, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 1.8985246086270977, | |
| "grad_norm": 4.349306583404541, | |
| "learning_rate": 2.5380506483894844e-06, | |
| "loss": 0.09130579948425294, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 1.901742474216852, | |
| "grad_norm": 1.038880467414856, | |
| "learning_rate": 2.4576053029571713e-06, | |
| "loss": 0.09172413825988769, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 1.9049603398066064, | |
| "grad_norm": 1.437605619430542, | |
| "learning_rate": 2.377159957524858e-06, | |
| "loss": 0.10029353141784668, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 1.9081782053963607, | |
| "grad_norm": 1.8541232347488403, | |
| "learning_rate": 2.2967146120925443e-06, | |
| "loss": 0.09666367530822755, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 1.9113960709861149, | |
| "grad_norm": 4.376369953155518, | |
| "learning_rate": 2.2162692666602312e-06, | |
| "loss": 0.08466120719909669, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 1.9146139365758692, | |
| "grad_norm": 1.9385297298431396, | |
| "learning_rate": 2.1358239212279177e-06, | |
| "loss": 0.08599072456359863, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 1.9178318021656235, | |
| "grad_norm": 1.689815640449524, | |
| "learning_rate": 2.0553785757956042e-06, | |
| "loss": 0.09492490768432617, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 1.9210496677553779, | |
| "grad_norm": 1.3797245025634766, | |
| "learning_rate": 1.974933230363291e-06, | |
| "loss": 0.09027796745300293, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 1.924267533345132, | |
| "grad_norm": 0.4921301007270813, | |
| "learning_rate": 1.894487884930978e-06, | |
| "loss": 0.0897593879699707, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 1.9274853989348864, | |
| "grad_norm": 1.489128828048706, | |
| "learning_rate": 1.8140425394986648e-06, | |
| "loss": 0.09694389343261718, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 1.9307032645246407, | |
| "grad_norm": 0.7353283166885376, | |
| "learning_rate": 1.7335971940663515e-06, | |
| "loss": 0.0947257137298584, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 1.9307032645246407, | |
| "eval_accuracy": 0.9908240277481897, | |
| "eval_f1": 0.9576732752290649, | |
| "eval_loss": 0.03446565568447113, | |
| "eval_precision": 0.9507949387630905, | |
| "eval_recall": 0.9646518568065757, | |
| "eval_runtime": 38.8968, | |
| "eval_samples_per_second": 642.726, | |
| "eval_steps_per_second": 35.71, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 1.933921130114395, | |
| "grad_norm": 1.3698302507400513, | |
| "learning_rate": 1.653151848634038e-06, | |
| "loss": 0.0832412052154541, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 1.9371389957041494, | |
| "grad_norm": 4.799550533294678, | |
| "learning_rate": 1.5727065032017249e-06, | |
| "loss": 0.11331718444824218, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 1.9403568612939037, | |
| "grad_norm": 1.4409183263778687, | |
| "learning_rate": 1.4922611577694116e-06, | |
| "loss": 0.09095165252685547, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 1.943574726883658, | |
| "grad_norm": 0.7844113111495972, | |
| "learning_rate": 1.4118158123370983e-06, | |
| "loss": 0.07738580703735351, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 1.9467925924734124, | |
| "grad_norm": 1.4404165744781494, | |
| "learning_rate": 1.331370466904785e-06, | |
| "loss": 0.08979734420776367, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 1.9500104580631668, | |
| "grad_norm": 12.599478721618652, | |
| "learning_rate": 1.2509251214724717e-06, | |
| "loss": 0.10170120239257813, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 1.953228323652921, | |
| "grad_norm": 16.47142791748047, | |
| "learning_rate": 1.1704797760401584e-06, | |
| "loss": 0.08839907646179199, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 1.9564461892426754, | |
| "grad_norm": 1.4317628145217896, | |
| "learning_rate": 1.0900344306078451e-06, | |
| "loss": 0.10484655380249024, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 1.9596640548324298, | |
| "grad_norm": 1.4019297361373901, | |
| "learning_rate": 1.0095890851755318e-06, | |
| "loss": 0.0975730323791504, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 1.9628819204221841, | |
| "grad_norm": 1.428163766860962, | |
| "learning_rate": 9.291437397432184e-07, | |
| "loss": 0.09934920310974121, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 1.9660997860119382, | |
| "grad_norm": 0.9141352772712708, | |
| "learning_rate": 8.486983943109052e-07, | |
| "loss": 0.09621753692626953, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 1.9693176516016926, | |
| "grad_norm": 4.859970569610596, | |
| "learning_rate": 7.682530488785919e-07, | |
| "loss": 0.0880228328704834, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 1.972535517191447, | |
| "grad_norm": 14.404598236083984, | |
| "learning_rate": 6.878077034462786e-07, | |
| "loss": 0.10370702743530273, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 1.9757533827812013, | |
| "grad_norm": 1.890513300895691, | |
| "learning_rate": 6.073623580139654e-07, | |
| "loss": 0.08836669921875, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 1.9789712483709554, | |
| "grad_norm": 3.365849494934082, | |
| "learning_rate": 5.269170125816521e-07, | |
| "loss": 0.0887114429473877, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 1.9821891139607097, | |
| "grad_norm": 2.7111990451812744, | |
| "learning_rate": 4.4647166714933875e-07, | |
| "loss": 0.10604698181152344, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 1.985406979550464, | |
| "grad_norm": 1.373115062713623, | |
| "learning_rate": 3.6602632171702546e-07, | |
| "loss": 0.08321118354797363, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 1.9886248451402184, | |
| "grad_norm": 1.2105165719985962, | |
| "learning_rate": 2.855809762847122e-07, | |
| "loss": 0.09142358779907227, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 1.9918427107299728, | |
| "grad_norm": 1.9538366794586182, | |
| "learning_rate": 2.0513563085239887e-07, | |
| "loss": 0.07895866394042969, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 1.995060576319727, | |
| "grad_norm": 0.8726534843444824, | |
| "learning_rate": 1.246902854200856e-07, | |
| "loss": 0.10273164749145508, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 1.9982784419094815, | |
| "grad_norm": 3.1368095874786377, | |
| "learning_rate": 4.424493998777231e-08, | |
| "loss": 0.08670869827270508, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 62154, | |
| "total_flos": 1.5480486185003853e+18, | |
| "train_loss": 0.1780550116395906, | |
| "train_runtime": 17614.9903, | |
| "train_samples_per_second": 508.09, | |
| "train_steps_per_second": 3.528 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 62154, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 2500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.5480486185003853e+18, | |
| "train_batch_size": 18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |