Last active
March 16, 2021 19:58
-
-
Save WinstonN/bc3c1fa4e32c266669b9eb5c91d981c7 to your computer and use it in GitHub Desktop.
Revisions
-
WinstonN revised this gist
Mar 16, 2021 . 1 changed file with 0 additions and 37 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -3,11 +3,6 @@ additional_attribute as a new column in a target file """ import csv # real files source_file = './catalog_product_20210316_044037.csv' @@ -16,14 +11,11 @@ def get_modified_headers(): reader = csv.reader(open(source_file, 'r')) headers = next(reader) # get all headers for additional_attributes additional_attributes_headers = [] i = 0 for row in reader: target_index = headers.index("additional_attributes") attributes = row[target_index].split(",") @@ -39,9 +31,7 @@ def get_modified_headers(): # iterator i += 1 headers_modified = headers + additional_attributes_headers return [headers_modified, additional_attributes_headers] @@ -53,16 +43,10 @@ def run_with_writer(): # get original headers headers = next(reader) # add additional attributes to headers modified_headers = get_modified_headers()[0] additional_attributes_headers = get_modified_headers()[1] # write headers to file writer.writerow(modified_headers) @@ -71,50 +55,29 @@ def run_with_writer(): i = 0 for row in reader: print(f'processing row: {i}') result = [None] * len(additional_attributes_headers) # pre-populate result (to avoid shifting items as we add insert them at the index) target_index = headers.index("additional_attributes") attributes = row[target_index].split(",") for attribute in attributes: data = attribute.split("=") if len(data) != 1: if data[0] != 'publish_date': header = data[0] value = data[1] target_index = additional_attributes_headers.index(header) result[target_index] = value modified_row = row + result # write modified row writer.writerow(modified_row) i += 1 -
WinstonN created this gist
Mar 16, 2021 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,123 @@ """ This script formats a magento product export file, and sets each additional_attribute as a new column in a target file """ import csv import pandas as pd # testing files # source_file = './importexport_605048471de17.csv' # target_file = './importexport_605048471de177_formatted.csv' # real files source_file = './catalog_product_20210316_044037.csv' target_file = './catalog_product_20210316_044037_formatted.csv' def get_modified_headers(): reader = csv.reader(open(source_file, 'r')) headers = next(reader) # print('headers original') # print(headers) # get all headers for additional_attributes additional_attributes_headers = [] i = 0 for row in reader: # print(f'processing row: {i}') target_index = headers.index("additional_attributes") attributes = row[target_index].split(",") for attribute in attributes: data = attribute.split("=") if len(data) != 1: if data[0] != 'publish_date': header = data[0] # add attribute as header if header not in additional_attributes_headers: additional_attributes_headers.append(header) # iterator i += 1 # print('headers modified') headers_modified = headers + additional_attributes_headers # print(headers_modified) return [headers_modified, additional_attributes_headers] def run_with_writer(): """ Main run function """ reader = csv.reader(open(source_file, 'r')) writer = csv.writer(open(target_file, 'w')) # get original headers headers = next(reader) # print('original headers') # print(headers) # add additional attributes to headers modified_headers = get_modified_headers()[0] additional_attributes_headers = get_modified_headers()[1] # print('modified headers') # print(modified_headers) # print('additional_attribute headers') # print(additional_attributes_headers) # write headers to file writer.writerow(modified_headers) # extract data and populate rows i = 0 for row in reader: print(f'processing row: {i}') # print('original row') # print(row) result = [None] * len(additional_attributes_headers) # print(result) # pre-populate result (to avoid shifting items as we add insert them at the index) target_index = headers.index("additional_attributes") # print(row[target_index]) attributes = row[target_index].split(",") # print(attributes) for attribute in attributes: # print(attribute) data = attribute.split("=") if len(data) != 1: if data[0] != 'publish_date': header = data[0] value = data[1] # print('get target index for value') target_index = additional_attributes_headers.index(header) # print(f'insert {value} at index {target_index}') result[target_index] = value modified_row = row + result # print('modified headers') # print(modified_headers) # print('modified row') # print(modified_row) # write data # write original row # writer.writerow(row) # write modified row writer.writerow(modified_row) # break loop # if i == 10: # break i += 1 # Main entry point if __name__ == '__main__': run_with_writer()