local function read_logs(filename) local function filterlog(logline) return string.match(logline, "(.-)%..-%[.-%] %[(%w+).- (%d+) (%w+) *(.*)") end local logs = {} local sessions = {} local cluster = {} for line in io.lines(filename) do local node, addr, tag, timestamp, what, info = filterlog(line) if tag then -- 特别处理 session / cluster / sleep 组合 -- 如果遇到 cluster , 那么下一条 sleep 和该条 cluster 绑定。 -- 如果 cluster 的 tag 和 sleep 的 tag 不同,那么找到最近的 session 绑定。 local log = logs[node] if log == nil then log = {} logs[node] = log end table.insert(log, { tag = tag, timestamp = math.tointeger(timestamp), address = addr, what = what, info = info, node = node, }) end end return logs end local function filter_log(logs, node, tag) local result = {} local subtag local cluster local sessions = {} for _, item in ipairs(logs[node]) do if item.tag == tag then local what = item.what if what == "session" then subtag = item.info elseif what == "cluster" then cluster = item.info else if what == "sleep" then item.session = { subtag = subtag or tag, node = cluster } if subtag ~= tag then table.insert(sessions, subtag) end subtag = nil cluster = nil end table.insert(result, item) end else if item.tag == subtag and item.what == "cluster" then cluster = item.info end end end return result, sessions end -- 按 tracecall begin/end 分组 local function filter_group(list) local groups = {} local stack = {} for _, item in ipairs(list) do if item.what == "tracecall" then if item.info == "begin" then local newframe = { item } table.insert(groups, newframe) table.insert(stack, newframe) elseif item.info == "end" then local frame = stack[#stack] if frame then table.insert(frame, item) table.remove(stack) -- pop stack end end else local frame = stack[#stack] if frame then table.insert(frame, item) end end end return groups end local function filter_log_all(logs, node, tag) local result, sessions = filter_log(logs, node, tag) local subs = {} for _, subtag in ipairs(sessions) do for nodename in pairs(logs) do subs[nodename .. "/" .. subtag] = filter_group(filter_log(logs, nodename, subtag)) end end return result, subs end -- 从 logs 中产生调用树,可能会用到 subs 中的子 session local function gen_tree(logs, subs) local result = {} local stack = { { frame = result, address = nil , open = true }, } for _, item in ipairs(logs) do local top while true do top = stack[#stack] if item.address ~= top.address then if top.open then local sublist = {} table.insert(top.frame, sublist) table.insert(stack, { frame = sublist, address = item.address, open = true }) else table.remove(stack) end else break end end if top.open then table.insert(top.frame, item) if item.what == "response" or item.what == "error" or item.what == "end" then -- 当用 skynet.response 回应时,就不会用 response , 但是会 end 。此时,cpu 开销不计算在内,时间开销计算在外部延迟中。 top.open = false elseif item.what == "sleep" and item.session then -- sub session local nodename = item.session.node nodename = nodename:match "s1_(.*)" -- todo: 为 s1_ 专门写的 local tagname = nodename .. "/" .. item.session.subtag local sublist = subs[tagname] if sublist and sublist[1] then local list = table.remove(sublist, 1) table.insert(top.frame, gen_tree(list, subs)) end end end end return result[1] end local function calc_time(tree) local ti = { time = 0, cpu = 0, latency = 0, net = 0, sleep = 0, cluster = 0, } for i, item in ipairs(tree) do if item.address then local next_time for j = i+1, #tree do local next_item = tree[j] if #next_item == 0 then next_time = next_item.timestamp end end if next_time then item.time = next_time - item.timestamp ti.time = ti.time + item.time local subframe = tree[i+1] if subframe.address == nil then local subti = calc_time(subframe) if item.what == "call" then local latency = item.time - subti.time ti.latency = ti.latency + latency ti.cpu = ti.cpu + subti.cpu ti.latency = ti.latency + subti.latency ti.net = ti.net + subti.net ti.sleep = ti.sleep + subti.sleep ti.cluster = ti.cluster + subti.cluster item.detail = subti elseif item.what == "sleep" then local net = item.time - subti.time ti.net = ti.net + net ti.cluster = ti.cluster + subti.time item.detail = subti end else -- todo : 把自己调用自己的时间统计分离为 cpu 消耗和调度消耗 -- 目前,自己调用自己会把当前服务响应其它无关请求需要的时间计入 cpu if item.what == "sleep" then ti.sleep = ti.sleep + item.time else ti.cpu = ti.cpu + item.time end end end end end return ti end ------------------------------ local function print_item(item) print(item.tag, item.address, item.what, item.info) end local function print_groups(groups) for idx, group in ipairs(groups) do print("frame", idx) for _, item in ipairs(group) do print_item(item) end end end local function time_detail(ti) local result = {} for k,v in pairs(ti) do if v > 0 then table.insert(result, string.format("%s=%fms",k, v/1000000)) end end return table.concat(result, ",") end local function timestring(node) if node.time == nil then return "" elseif node.detail then return string.format("%fms(%s)", node.time / 1000000, time_detail(node.detail) ) else return string.format("%fms", node.time / 1000000) end end local function print_tree(tree, level) if not tree.address then for _, subtree in ipairs(tree) do print_tree(subtree, level + 1) end else print(string.format("%s%s:%s %s %s %s", string.rep(" ", level-1), tree.node,tree.address, timestring(tree), tree.what, tree.info)) end end local function main() local logs = assert(read_logs "trace.log") local log, sub = filter_log_all(logs, "login1", ":00000063-7") local tree = gen_tree(log, sub) local ti = calc_time(tree) print(time_detail(ti)) print_tree(tree, 0) end main()