from matplotlib import pyplot as plt import numpy as np import warnings def parse_file(file_name): speeds = [] losses = [] idxs = [] idx = 0 step_loss_map = {} with open(file_name) as file: for line in file: line = line.strip() if 'loss:' not in line or 'global_runtime:' not in line: continue line = line.split(':') loss = float(line[5].split(',')[0].strip()) lr = float(line[7].split(',')[0].strip()) step = int(line[8].split(',')[0].strip()) # 去重相同step的loss,因为可能resume后有些步数重复 if step < 362801: continue if step not in step_loss_map.keys(): step_loss_map[step] = loss else: # assert step_loss_map[step] == loss, f'There are two same step {step} with different loss, loss1={step_loss_map[step]}, loss2={loss}, diff is {step_loss_map[step] - loss}, in file {file_name}' if step_loss_map[step] != loss: warnings.warn(f'There are two same step {step} with different loss, loss1={step_loss_map[step]}, loss2={loss}, diff is {step_loss_map[step] - loss}, in file {file_name}') continue speed = float(line[16].split(',')[0].strip()) speeds.append(speed) losses.append(loss) idxs.append(idx) idx += 1 if idx > 10000: break return speeds, losses, idxs baseline_loss = None baseline_idx = None baseline = None file_names = [ # 'ec3_gpu_128_long_run', # 'ec3_xpu_128_long_run.0', # 'eb_lite_gpu_long_run', # 'eb_lite_gpu_long_run.0', # 'eb_lite_xpu_long_run.0', # 'eb_lite_xpu_rc4.1.0', # 'eb_lite_xpu_rc4.1-same-ernie.0', # 'eb_lite_xpu_rc4.1-fuse-sharding.0', 'eblite_gpu_from_349000.0', 'eblite_xpu_from_349000_with_replace.0' ] color_list = ['b', 'r', 'y', 'g'] color_list = color_list[0:len(file_names)] start_idx = 0 end_idx = -1 plot_diff = len(file_names) > 1 if plot_diff: plt.figure(figsize=(20, 10)) else: plt.figure(figsize=(10, 8)) max_diff_idx = 0 for file, color in zip(file_names, color_list): speed, loss, idx = parse_file(file) if baseline_loss is None: baseline_loss = loss baseline_idx = idx baseline = file elif plot_diff: diff = [] diff_idx = [] for i in range(min(len(baseline_loss), len(loss))): diff.append(loss[i] - baseline_loss[i]) diff_idx.append(i) max_diff_idx = max(max_diff_idx, len(diff_idx)) plt.subplot(1, 2, 2) mean_diff = np.array(diff[start_idx:end_idx]).mean() print(f'mean {file} - {baseline}: {mean_diff}') plt.plot(diff_idx[start_idx:end_idx], diff[start_idx:end_idx], label=f'{file} - {baseline}', c=color) plt.legend() if plot_diff: plt.subplot(1, 2, 1) plt.plot(idx, loss, label=f'{file}', c=color) plt.legend() if plot_diff: plt.subplot(1, 2, 2) # plt.xlim([1200, 1600]) plt.ylim([-0.001, 0.001]) zeros = [0 for i in range(max_diff_idx)] diff_idx = [i for i in range(max_diff_idx)] plt.plot(diff_idx[start_idx:end_idx], zeros[start_idx:end_idx], label='zero', color=color_list[0]) # zeros = [0.005 for i in range(max_diff_idx)] # diff_idx = [i for i in range(max_diff_idx)] # plt.plot(diff_idx[start_idx:end_idx], zeros[start_idx:end_idx], label='0.005') plt.legend() plt.show()