Skip to content

Instantly share code, notes, and snippets.

@zhiqiu
Created August 13, 2024 05:24
Show Gist options
  • Save zhiqiu/ad0fbb262020b8f9e2e25cdb4ab29c84 to your computer and use it in GitHub Desktop.
Save zhiqiu/ad0fbb262020b8f9e2e25cdb4ab29c84 to your computer and use it in GitHub Desktop.
plot loss curve and the diff of two log
from matplotlib import pyplot as plt
import numpy as np
import warnings
def parse_file(file_name):
speeds = []
losses = []
idxs = []
idx = 0
step_loss_map = {}
with open(file_name) as file:
for line in file:
line = line.strip()
if 'loss:' not in line or 'global_runtime:' not in line:
continue
line = line.split(':')
loss = float(line[5].split(',')[0].strip())
lr = float(line[7].split(',')[0].strip())
step = int(line[8].split(',')[0].strip())
# 去重相同step的loss,因为可能resume后有些步数重复
if step < 362801:
continue
if step not in step_loss_map.keys():
step_loss_map[step] = loss
else:
# assert step_loss_map[step] == loss, f'There are two same step {step} with different loss, loss1={step_loss_map[step]}, loss2={loss}, diff is {step_loss_map[step] - loss}, in file {file_name}'
if step_loss_map[step] != loss:
warnings.warn(f'There are two same step {step} with different loss, loss1={step_loss_map[step]}, loss2={loss}, diff is {step_loss_map[step] - loss}, in file {file_name}')
continue
speed = float(line[16].split(',')[0].strip())
speeds.append(speed)
losses.append(loss)
idxs.append(idx)
idx += 1
if idx > 10000:
break
return speeds, losses, idxs
baseline_loss = None
baseline_idx = None
baseline = None
file_names = [
# 'ec3_gpu_128_long_run',
# 'ec3_xpu_128_long_run.0',
# 'eb_lite_gpu_long_run',
# 'eb_lite_gpu_long_run.0',
# 'eb_lite_xpu_long_run.0',
# 'eb_lite_xpu_rc4.1.0',
# 'eb_lite_xpu_rc4.1-same-ernie.0',
# 'eb_lite_xpu_rc4.1-fuse-sharding.0',
'eblite_gpu_from_349000.0',
'eblite_xpu_from_349000_with_replace.0'
]
color_list = ['b', 'r', 'y', 'g']
color_list = color_list[0:len(file_names)]
start_idx = 0
end_idx = -1
plot_diff = len(file_names) > 1
if plot_diff:
plt.figure(figsize=(20, 10))
else:
plt.figure(figsize=(10, 8))
max_diff_idx = 0
for file, color in zip(file_names, color_list):
speed, loss, idx = parse_file(file)
if baseline_loss is None:
baseline_loss = loss
baseline_idx = idx
baseline = file
elif plot_diff:
diff = []
diff_idx = []
for i in range(min(len(baseline_loss), len(loss))):
diff.append(loss[i] - baseline_loss[i])
diff_idx.append(i)
max_diff_idx = max(max_diff_idx, len(diff_idx))
plt.subplot(1, 2, 2)
mean_diff = np.array(diff[start_idx:end_idx]).mean()
print(f'mean {file} - {baseline}: {mean_diff}')
plt.plot(diff_idx[start_idx:end_idx], diff[start_idx:end_idx], label=f'{file} - {baseline}', c=color)
plt.legend()
if plot_diff:
plt.subplot(1, 2, 1)
plt.plot(idx, loss, label=f'{file}', c=color)
plt.legend()
if plot_diff:
plt.subplot(1, 2, 2)
# plt.xlim([1200, 1600])
plt.ylim([-0.001, 0.001])
zeros = [0 for i in range(max_diff_idx)]
diff_idx = [i for i in range(max_diff_idx)]
plt.plot(diff_idx[start_idx:end_idx], zeros[start_idx:end_idx], label='zero', color=color_list[0])
# zeros = [0.005 for i in range(max_diff_idx)]
# diff_idx = [i for i in range(max_diff_idx)]
# plt.plot(diff_idx[start_idx:end_idx], zeros[start_idx:end_idx], label='0.005')
plt.legend()
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment