import fastavro import json import os ymd = "2021-08-21" s3_path = "path/to/avro/fils/with/date={}" data = [] for i in range(0, 24): ymdh = f"{ymd}-{i:02d}" target_dir = s3_path.format(ymdh) files = sorted([os.path.join(target_dir, f) for f in os.listdir(target_dir) if f.endswith(".avro")]) for f in files: with open(f, "rb") as fo: for record in fastavro.reader(fo): data.append(record) # print(record) with open('out.json', 'w') as out: json.dump(data, out, sort_keys=True, indent=4)