import heapq import re import sys from datetime import datetime from operator import itemgetter def parse_lines(file_name): ts = datetime.min ts_str = "" # Replace "service_mongo1" with "node0" and "mongo1_bridge" with "bridge1". file_id = None if (match := re.match(r'service_mongo(\d+)', file_name)): file_id = f'd{int(match.group(1))}' elif (match := re.match(r'mongo(\d+)_bridge.log', file_name)): file_id = f'b{int(match.group(1))}' # Like 2020-06-24T12:00:05.133+00:00 or 2020-06-24T12:00:33.887+0000 # or 2020-06-24T12:00:21.985Z. pat = re.compile( r'(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3})(Z|\+00:?00)') for line in open(file_name): match = pat.search(line) if match: ts_str = match.group(1) ts = datetime.fromisoformat(match.group(1)) if file_id: out = f'[js_test:foo] {ts_str}+0000 {file_id}| {line}' else: out = f'[js_test:foo] {ts_str}+0000 {line}' yield ts, out def merge_logs(*file_names): generators = (parse_lines(file_name) for file_name in file_names) return heapq.merge(*generators, key=itemgetter(0)) for ts, line in merge_logs(*sys.argv[1:]): sys.stdout.write(line)