""" This script formats a magento product export file, and sets each additional_attribute as a new column in a target file """ import csv # real files source_file = './catalog_product_20210316_044037.csv' target_file = './catalog_product_20210316_044037_formatted.csv' def get_modified_headers(): reader = csv.reader(open(source_file, 'r')) headers = next(reader) # get all headers for additional_attributes additional_attributes_headers = [] i = 0 for row in reader: target_index = headers.index("additional_attributes") attributes = row[target_index].split(",") for attribute in attributes: data = attribute.split("=") if len(data) != 1: if data[0] != 'publish_date': header = data[0] # add attribute as header if header not in additional_attributes_headers: additional_attributes_headers.append(header) # iterator i += 1 headers_modified = headers + additional_attributes_headers return [headers_modified, additional_attributes_headers] def run_with_writer(): """ Main run function """ reader = csv.reader(open(source_file, 'r')) writer = csv.writer(open(target_file, 'w')) # get original headers headers = next(reader) # add additional attributes to headers modified_headers = get_modified_headers()[0] additional_attributes_headers = get_modified_headers()[1] # write headers to file writer.writerow(modified_headers) # extract data and populate rows i = 0 for row in reader: print(f'processing row: {i}') result = [None] * len(additional_attributes_headers) # pre-populate result (to avoid shifting items as we add insert them at the index) target_index = headers.index("additional_attributes") attributes = row[target_index].split(",") for attribute in attributes: data = attribute.split("=") if len(data) != 1: if data[0] != 'publish_date': header = data[0] value = data[1] target_index = additional_attributes_headers.index(header) result[target_index] = value modified_row = row + result # write modified row writer.writerow(modified_row) i += 1 # Main entry point if __name__ == '__main__': run_with_writer()