+{"forward-backward-time": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [19.39068, 0.66038, 0.65673, 0.66493, 0.65894, 0.6473, 0.65746, 0.64942, 0.66259, 0.65247, 0.65165, 0.64944, 0.81313, 0.65069, 0.64982, 0.65247, 0.65149, 0.65284, 0.64913, 0.6496]}, "forward-compute-time": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [3.63253, 0.27412, 0.26777, 0.27338, 0.26922, 0.26445, 0.27043, 0.26308, 0.27178, 0.26246, 0.26565, 0.26691, 0.42095, 0.26741, 0.26653, 0.26546, 0.26547, 0.26403, 0.26266, 0.26606]}, "backward-compute-time": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [2.0264, 0.24005, 0.23751, 0.24162, 0.24102, 0.23888, 0.24027, 0.23829, 0.24182, 0.24308, 0.24109, 0.23964, 0.23841, 0.24005, 0.23898, 0.23896, 0.24052, 0.23894, 0.24242, 0.23863]}, "forward-recv-time": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [8.32911, 0.07441, 0.07755, 0.07578, 0.07557, 0.07223, 0.0737, 0.07404, 0.07108, 0.07174, 0.07137, 0.07162, 0.07437, 0.07185, 0.07129, 0.07247, 0.0719, 0.07573, 0.07292, 0.07122]}, "forward-send-time": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [1.47287, 0.00053, 0.00063, 0.00048, 0.00045, 0.00047, 0.00046, 0.00045, 0.00046, 0.00063, 0.00044, 0.00046, 0.00047, 0.00045, 0.00056, 0.00046, 0.00045, 0.00046, 0.00045, 0.00044]}, "backward-recv-time": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [0.1444, 0.13179, 0.12767, 0.13592, 0.1279, 0.12912, 0.13033, 0.1328, 0.13106, 0.13249, 0.12957, 0.12877, 0.13334, 0.12829, 0.12815, 0.13128, 0.12985, 0.13117, 0.12901, 0.1277]}, "backward-send-time": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [0.00065, 0.00056, 0.00066, 0.00067, 0.0006, 0.00059, 0.00064, 0.00067, 0.00068, 0.0006, 0.00056, 0.00058, 0.00059, 0.00056, 0.00064, 0.00058, 0.00049, 0.00079, 0.00081, 0.0006]}, "forward-send-backward-recv-time": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [12.49425, 0.23291, 0.228, 0.22475, 0.22786, 0.22525, 0.22534, 0.22597, 0.23004, 0.22656, 0.22342, 0.22577, 0.38374, 0.22857, 0.22673, 0.22371, 0.22908, 0.23017, 0.23145, 0.23191]}, "backward-send-forward-recv-time": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [5.02478, 0.00608, 0.00441, 0.00414, 0.0093, 0.00347, 0.00363, 0.00527, 0.0093, 0.00705, 0.00369, 0.00633, 0.00834, 0.00352, 0.0034, 0.00565, 0.00346, 0.00354, 0.00341, 0.0035]}, "layernorm-grads-all-reduce-time": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [5e-05, 2e-05, 2e-05, 3e-05, 3e-05, 2e-05, 3e-05, 2e-05, 2e-05, 2e-05, 2e-05, 3e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05]}, "embedding-grads-all-reduce-time": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [0.47745, 0.00052, 0.00064, 0.00053, 0.00052, 0.0006, 0.00052, 0.00062, 0.00052, 0.00056, 0.00065, 0.00056, 0.00054, 0.00053, 0.00058, 0.00052, 0.00052, 0.00052, 0.00055, 0.00053]}, "all-grads-sync-time": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [0.43086, 0.00036, 0.00041, 0.00037, 0.00032, 0.00037, 0.00048, 0.00044, 0.00043, 0.00045, 0.00034, 0.00044, 0.00037, 0.00043, 0.00044, 0.00032, 0.00032, 0.00045, 0.00045, 0.00045]}, "optimizer-copy-to-main-grad-time": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [0.00053, 0.00034, 0.00032, 0.00033, 0.00034, 0.00031, 0.00033, 0.00035, 0.00032, 0.00033, 0.00036, 0.00035, 0.00033, 0.00033, 0.00034, 0.00035, 0.00033, 0.00034, 0.00032, 0.00035]}, "optimizer-clip-main-grad-time": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [2.26638, 0.00127, 0.00123, 0.00144, 0.00125, 0.00123, 0.00128, 0.00162, 0.00128, 0.00131, 0.00138, 0.00133, 0.00142, 0.0013, 0.00136, 0.00137, 0.00133, 0.00135, 0.00129, 0.00136]}, "optimizer-count-zeros-time": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [0.01282, 0.00738, 0.00728, 0.00736, 0.00738, 0.00733, 0.00738, 0.00735, 0.00731, 0.00727, 0.00897, 0.00755, 0.0073, 0.00721, 0.00734, 0.00746, 0.00736, 0.00734, 0.00737, 0.00726]}, "optimizer-inner-step-time": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [0.00984, 0.00108, 0.00105, 0.00108, 0.00105, 0.00105, 0.00107, 0.00104, 0.00105, 0.00106, 0.00106, 0.00105, 0.0012, 0.00106, 0.00105, 0.00105, 0.00105, 0.00106, 0.00104, 0.00106]}, "optimizer-copy-main-to-model-params-time": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [0.0011, 0.00101, 0.00102, 0.00102, 0.00101, 0.00102, 0.00101, 0.00101, 0.00101, 0.00101, 0.00101, 0.00101, 0.0015, 0.00102, 0.00101, 0.00101, 0.00102, 0.00268, 0.00101, 0.00101]}, "optimizer-time": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [2.29197, 0.01172, 0.01152, 0.01191, 0.01165, 0.01156, 0.0117, 0.01199, 0.01159, 0.01161, 0.0134, 0.01194, 0.01269, 0.01155, 0.01172, 0.01186, 0.01173, 0.01343, 0.01172, 0.01165]}, "learning-rate": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [0.0001, 0.0001, 9e-05, 9e-05, 8e-05, 8e-05, 7e-05, 7e-05, 6e-05, 6e-05, 5e-05, 5e-05, 5e-05, 4e-05, 4e-05, 3e-05, 3e-05, 2e-05, 2e-05, 1e-05]}, "learning-rate vs samples": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [0.0001, 0.0001, 9e-05, 9e-05, 8e-05, 8e-05, 7e-05, 7e-05, 6e-05, 6e-05, 5e-05, 5e-05, 5e-05, 4e-05, 4e-05, 3e-05, 3e-05, 2e-05, 2e-05, 1e-05]}, "batch-size": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0]}, "batch-size vs samples": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0]}, "lm loss": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [10.41489, 9.20451, 8.62156, 8.34435, 8.08472, 7.96931, 7.68116, 7.39495, 7.26108, 7.19145, 7.31028, 7.16653, 7.05979, 6.99436, 6.85568, 6.93225, 6.95525, 7.02522, 6.66561, 6.93924]}, "lm loss vs samples": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [10.41489, 9.20451, 8.62156, 8.34435, 8.08472, 7.96931, 7.68116, 7.39495, 7.26108, 7.19145, 7.31028, 7.16653, 7.05979, 6.99436, 6.85568, 6.93225, 6.95525, 7.02522, 6.66561, 6.93924]}, "loss-scale": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]}, "loss-scale vs samples": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]}, "grad-norm": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [31.51239, 2.98952, 3.27663, 2.61225, 2.39588, 1.99758, 1.81287, 1.93167, 1.62175, 1.51416, 1.16291, 1.32388, 1.20328, 1.10814, 1.5007, 2.15295, 1.65903, 1.42013, 2.08526, 1.2754]}, "grad-norm vs samples": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [31.51239, 2.98952, 3.27663, 2.61225, 2.39588, 1.99758, 1.81287, 1.93167, 1.62175, 1.51416, 1.16291, 1.32388, 1.20328, 1.10814, 1.5007, 2.15295, 1.65903, 1.42013, 2.08526, 1.2754]}, "num-zeros": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [115745.0, 111070.0, 117081.0, 112381.0, 118700.0, 116957.0, 111399.0, 114013.0, 118460.0, 116959.0, 111499.0, 115613.0, 108489.0, 119947.0, 115772.0, 116922.0, 119841.0, 120380.0, 121396.0, 118455.0]}, "num-zeros vs samples": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [115745.0, 111070.0, 117081.0, 112381.0, 118700.0, 116957.0, 111399.0, 114013.0, 118460.0, 116959.0, 111499.0, 115613.0, 108489.0, 119947.0, 115772.0, 116922.0, 119841.0, 120380.0, 121396.0, 118455.0]}, "params-norm": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [309.46707, 309.48447, 309.52603, 309.57944, 309.64523, 309.72018, 309.80231, 309.8884, 309.97391, 310.05591, 310.13483, 310.20755, 310.27094, 310.32535, 310.37161, 310.40887, 310.43597, 310.45648, 310.47238, 310.48444]}, "params-norm vs samples": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [309.46707, 309.48447, 309.52603, 309.57944, 309.64523, 309.72018, 309.80231, 309.8884, 309.97391, 310.05591, 310.13483, 310.20755, 310.27094, 310.32535, 310.37161, 310.40887, 310.43597, 310.45648, 310.47238, 310.48444]}, "iteration-time": {"start_step": 0, "end_step": 100, "step_interval": 5, "values": [21.7057, 0.68569, 0.68236, 0.69077, 0.68415, 0.67238, 0.68288, 0.67481, 0.6874, 0.67748, 0.6785, 0.67478, 0.83941, 0.6755, 0.67503, 0.67787, 0.67668, 0.67904, 0.67443, 0.67541]}, "lm loss validation": {"start_step": 0, "end_step": 2, "step_interval": 5, "values": [6.86582]}, "lm loss validation vs samples": {"start_step": 0, "end_step": 2, "step_interval": 5, "values": [6.86582]}, "lm loss validation ppl": {"start_step": 0, "end_step": 2, "step_interval": 5, "values": [958.93542]}, "lm loss validation ppl vs samples": {"start_step": 0, "end_step": 2, "step_interval": 5, "values": [958.93542]}}
0 commit comments