# -*- coding: utf-8 -*- import sys import hashlib def read_log(path): with open(path, "rb") as fh: data = fh.read() return data.splitlines() def refine_log(raw_log): items = raw_log.split() new_items = [] for item in items: item = item.decode("utf-8") # 去掉时间戳 if "ts" in item: continue # 去掉 num_alerts=xxx if "num_alerts" in item: continue new_items.append(item) return " ".join(new_items) def main(path): hash_map = {} for raw_log in read_log(path): log = refine_log(raw_log) hex = hashlib.sha1(log.encode("utf-8")).hexdigest() if hex not in hash_map: print(raw_log) hash_map[hex] = raw_log if __name__ == "__main__": main(sys.argv[1])