""" Returns result in the format: { "B": { 2401: "1/6 = 2%", 3101: "1/6 = 2%" }, "C": { "02": "3/6 = 50%" }, "K": { " } } """ import csv from collections import defaultdict columns = defaultdict(list) # each value in each column is appended to a list def read_csv_by_column(file_path): with open(file_path) as f: reader = csv.DictReader(f) # read rows into a dictionary format for row in reader: # read a row as {column1: value1, column2: value2,...} for (k, v) in row.items(): # go over each column name and value columns[k].append(v) # append the value into the appropriate list return columns def grouped_result(columns): result = {} for header, values in columns.items(): # Special case if header.lower() in ['e', 'k']: tmp = [] for v in values: if not v.strip(): continue vs = [i.strip() for i in "{}".format(v).split("-") if i.strip()] tmp.extend(vs) values = tmp result[header] = {} tmp = {} for idx, v in enumerate(values): if v not in tmp: tmp[v] = 0 tmp[v] += 1 # Fix 0 start index idx += 1 for v in values: perc = round((tmp[v]*100.0)/idx, 2) result[header]["{}".format(v)] = "{}/{} = {}".format(tmp[v], idx, perc) return result if __name__ == '__main__': file_path = "data.csv" columns = read_csv_by_column(file_path) import json print(json.dumps(grouped_result(columns)))