Created
August 13, 2024 05:24
-
-
Save zhiqiu/ad0fbb262020b8f9e2e25cdb4ab29c84 to your computer and use it in GitHub Desktop.
plot loss curve and the diff of two log
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from matplotlib import pyplot as plt | |
| import numpy as np | |
| import warnings | |
| def parse_file(file_name): | |
| speeds = [] | |
| losses = [] | |
| idxs = [] | |
| idx = 0 | |
| step_loss_map = {} | |
| with open(file_name) as file: | |
| for line in file: | |
| line = line.strip() | |
| if 'loss:' not in line or 'global_runtime:' not in line: | |
| continue | |
| line = line.split(':') | |
| loss = float(line[5].split(',')[0].strip()) | |
| lr = float(line[7].split(',')[0].strip()) | |
| step = int(line[8].split(',')[0].strip()) | |
| # 去重相同step的loss,因为可能resume后有些步数重复 | |
| if step < 362801: | |
| continue | |
| if step not in step_loss_map.keys(): | |
| step_loss_map[step] = loss | |
| else: | |
| # assert step_loss_map[step] == loss, f'There are two same step {step} with different loss, loss1={step_loss_map[step]}, loss2={loss}, diff is {step_loss_map[step] - loss}, in file {file_name}' | |
| if step_loss_map[step] != loss: | |
| warnings.warn(f'There are two same step {step} with different loss, loss1={step_loss_map[step]}, loss2={loss}, diff is {step_loss_map[step] - loss}, in file {file_name}') | |
| continue | |
| speed = float(line[16].split(',')[0].strip()) | |
| speeds.append(speed) | |
| losses.append(loss) | |
| idxs.append(idx) | |
| idx += 1 | |
| if idx > 10000: | |
| break | |
| return speeds, losses, idxs | |
| baseline_loss = None | |
| baseline_idx = None | |
| baseline = None | |
| file_names = [ | |
| # 'ec3_gpu_128_long_run', | |
| # 'ec3_xpu_128_long_run.0', | |
| # 'eb_lite_gpu_long_run', | |
| # 'eb_lite_gpu_long_run.0', | |
| # 'eb_lite_xpu_long_run.0', | |
| # 'eb_lite_xpu_rc4.1.0', | |
| # 'eb_lite_xpu_rc4.1-same-ernie.0', | |
| # 'eb_lite_xpu_rc4.1-fuse-sharding.0', | |
| 'eblite_gpu_from_349000.0', | |
| 'eblite_xpu_from_349000_with_replace.0' | |
| ] | |
| color_list = ['b', 'r', 'y', 'g'] | |
| color_list = color_list[0:len(file_names)] | |
| start_idx = 0 | |
| end_idx = -1 | |
| plot_diff = len(file_names) > 1 | |
| if plot_diff: | |
| plt.figure(figsize=(20, 10)) | |
| else: | |
| plt.figure(figsize=(10, 8)) | |
| max_diff_idx = 0 | |
| for file, color in zip(file_names, color_list): | |
| speed, loss, idx = parse_file(file) | |
| if baseline_loss is None: | |
| baseline_loss = loss | |
| baseline_idx = idx | |
| baseline = file | |
| elif plot_diff: | |
| diff = [] | |
| diff_idx = [] | |
| for i in range(min(len(baseline_loss), len(loss))): | |
| diff.append(loss[i] - baseline_loss[i]) | |
| diff_idx.append(i) | |
| max_diff_idx = max(max_diff_idx, len(diff_idx)) | |
| plt.subplot(1, 2, 2) | |
| mean_diff = np.array(diff[start_idx:end_idx]).mean() | |
| print(f'mean {file} - {baseline}: {mean_diff}') | |
| plt.plot(diff_idx[start_idx:end_idx], diff[start_idx:end_idx], label=f'{file} - {baseline}', c=color) | |
| plt.legend() | |
| if plot_diff: | |
| plt.subplot(1, 2, 1) | |
| plt.plot(idx, loss, label=f'{file}', c=color) | |
| plt.legend() | |
| if plot_diff: | |
| plt.subplot(1, 2, 2) | |
| # plt.xlim([1200, 1600]) | |
| plt.ylim([-0.001, 0.001]) | |
| zeros = [0 for i in range(max_diff_idx)] | |
| diff_idx = [i for i in range(max_diff_idx)] | |
| plt.plot(diff_idx[start_idx:end_idx], zeros[start_idx:end_idx], label='zero', color=color_list[0]) | |
| # zeros = [0.005 for i in range(max_diff_idx)] | |
| # diff_idx = [i for i in range(max_diff_idx)] | |
| # plt.plot(diff_idx[start_idx:end_idx], zeros[start_idx:end_idx], label='0.005') | |
| plt.legend() | |
| plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment