{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.5280855941918228, "eval_steps": 500, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07642338555598013, "grad_norm": 0.04852772876620293, "learning_rate": 0.00019991780772074993, "loss": 0.631, "step": 100 }, { "epoch": 0.15284677111196027, "grad_norm": 0.06019178777933121, "learning_rate": 0.00019926107993313918, "loss": 0.4967, "step": 200 }, { "epoch": 0.2292701566679404, "grad_norm": 0.06423385441303253, "learning_rate": 0.00019795194081958614, "loss": 0.4377, "step": 300 }, { "epoch": 0.30569354222392053, "grad_norm": 0.05076967179775238, "learning_rate": 0.00019599899493270587, "loss": 0.4121, "step": 400 }, { "epoch": 0.38211692777990064, "grad_norm": 0.06871291249990463, "learning_rate": 0.00019341507836108832, "loss": 0.3974, "step": 500 }, { "epoch": 0.4585403133358808, "grad_norm": 0.062317609786987305, "learning_rate": 0.00019021717436179406, "loss": 0.3826, "step": 600 }, { "epoch": 0.5349636988918609, "grad_norm": 0.054332610219717026, "learning_rate": 0.00018642630173483835, "loss": 0.3695, "step": 700 }, { "epoch": 0.6113870844478411, "grad_norm": 0.07528349757194519, "learning_rate": 0.00018206737667334231, "loss": 0.3589, "step": 800 }, { "epoch": 0.6878104700038211, "grad_norm": 0.07792173326015472, "learning_rate": 0.00017716904899736617, "loss": 0.3527, "step": 900 }, { "epoch": 0.7642338555598013, "grad_norm": 0.08550075441598892, "learning_rate": 0.00017176351384780628, "loss": 0.3407, "step": 1000 }, { "epoch": 0.8406572411157814, "grad_norm": 0.09895286709070206, "learning_rate": 0.00016588630007803324, "loss": 0.3292, "step": 1100 }, { "epoch": 0.9170806266717616, "grad_norm": 0.05582467094063759, "learning_rate": 0.00015957603673410394, "loss": 0.321, "step": 1200 }, { "epoch": 0.9935040122277417, "grad_norm": 0.05442088469862938, "learning_rate": 0.00015287419915839849, "loss": 0.3202, "step": 1300 }, { "epoch": 1.0695452808559418, "grad_norm": 0.07129843533039093, "learning_rate": 0.00014582483638546267, "loss": 0.3075, "step": 1400 }, { "epoch": 1.145968666411922, "grad_norm": 0.06552577763795853, "learning_rate": 0.00013847428162179422, "loss": 0.3008, "step": 1500 }, { "epoch": 1.2223920519679021, "grad_norm": 0.07303871214389801, "learning_rate": 0.00013087084771249831, "loss": 0.2948, "step": 1600 }, { "epoch": 1.2988154375238823, "grad_norm": 0.12717650830745697, "learning_rate": 0.00012306450959641117, "loss": 0.2948, "step": 1700 }, { "epoch": 1.3752388230798624, "grad_norm": 0.05392363294959068, "learning_rate": 0.00011510657583681364, "loss": 0.2887, "step": 1800 }, { "epoch": 1.4516622086358426, "grad_norm": 0.07771366834640503, "learning_rate": 0.00010704935138665758, "loss": 0.286, "step": 1900 }, { "epoch": 1.5280855941918228, "grad_norm": 0.056173793971538544, "learning_rate": 9.894579380484204e-05, "loss": 0.2874, "step": 2000 } ], "logging_steps": 100, "max_steps": 3924, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.400738054418727e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }