Skip to content

Instantly share code, notes, and snippets.

@WinstonN
Last active March 16, 2021 19:58
Show Gist options
  • Select an option

  • Save WinstonN/bc3c1fa4e32c266669b9eb5c91d981c7 to your computer and use it in GitHub Desktop.

Select an option

Save WinstonN/bc3c1fa4e32c266669b9eb5c91d981c7 to your computer and use it in GitHub Desktop.

Revisions

  1. WinstonN revised this gist Mar 16, 2021. 1 changed file with 0 additions and 37 deletions.
    37 changes: 0 additions & 37 deletions flatten_additional_attributes.py
    Original file line number Diff line number Diff line change
    @@ -3,11 +3,6 @@
    additional_attribute as a new column in a target file
    """
    import csv
    import pandas as pd

    # testing files
    # source_file = './importexport_605048471de17.csv'
    # target_file = './importexport_605048471de177_formatted.csv'

    # real files
    source_file = './catalog_product_20210316_044037.csv'
    @@ -16,14 +11,11 @@
    def get_modified_headers():
    reader = csv.reader(open(source_file, 'r'))
    headers = next(reader)
    # print('headers original')
    # print(headers)

    # get all headers for additional_attributes
    additional_attributes_headers = []
    i = 0
    for row in reader:
    # print(f'processing row: {i}')
    target_index = headers.index("additional_attributes")

    attributes = row[target_index].split(",")
    @@ -39,9 +31,7 @@ def get_modified_headers():
    # iterator
    i += 1

    # print('headers modified')
    headers_modified = headers + additional_attributes_headers
    # print(headers_modified)

    return [headers_modified, additional_attributes_headers]

    @@ -53,16 +43,10 @@ def run_with_writer():

    # get original headers
    headers = next(reader)
    # print('original headers')
    # print(headers)

    # add additional attributes to headers
    modified_headers = get_modified_headers()[0]
    additional_attributes_headers = get_modified_headers()[1]
    # print('modified headers')
    # print(modified_headers)
    # print('additional_attribute headers')
    # print(additional_attributes_headers)

    # write headers to file
    writer.writerow(modified_headers)
    @@ -71,50 +55,29 @@ def run_with_writer():
    i = 0
    for row in reader:
    print(f'processing row: {i}')
    # print('original row')
    # print(row)

    result = [None] * len(additional_attributes_headers)
    # print(result)
    # pre-populate result (to avoid shifting items as we add insert them at the index)
    target_index = headers.index("additional_attributes")

    # print(row[target_index])
    attributes = row[target_index].split(",")
    # print(attributes)

    for attribute in attributes:
    # print(attribute)
    data = attribute.split("=")

    if len(data) != 1:
    if data[0] != 'publish_date':
    header = data[0]
    value = data[1]

    # print('get target index for value')
    target_index = additional_attributes_headers.index(header)
    # print(f'insert {value} at index {target_index}')
    result[target_index] = value


    modified_row = row + result
    # print('modified headers')
    # print(modified_headers)
    # print('modified row')
    # print(modified_row)

    # write data
    # write original row
    # writer.writerow(row)

    # write modified row
    writer.writerow(modified_row)

    # break loop
    # if i == 10:
    # break

    i += 1


  2. WinstonN created this gist Mar 16, 2021.
    123 changes: 123 additions & 0 deletions flatten_additional_attributes.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,123 @@
    """
    This script formats a magento product export file, and sets each
    additional_attribute as a new column in a target file
    """
    import csv
    import pandas as pd

    # testing files
    # source_file = './importexport_605048471de17.csv'
    # target_file = './importexport_605048471de177_formatted.csv'

    # real files
    source_file = './catalog_product_20210316_044037.csv'
    target_file = './catalog_product_20210316_044037_formatted.csv'

    def get_modified_headers():
    reader = csv.reader(open(source_file, 'r'))
    headers = next(reader)
    # print('headers original')
    # print(headers)

    # get all headers for additional_attributes
    additional_attributes_headers = []
    i = 0
    for row in reader:
    # print(f'processing row: {i}')
    target_index = headers.index("additional_attributes")

    attributes = row[target_index].split(",")
    for attribute in attributes:
    data = attribute.split("=")
    if len(data) != 1:
    if data[0] != 'publish_date':
    header = data[0]
    # add attribute as header
    if header not in additional_attributes_headers:
    additional_attributes_headers.append(header)

    # iterator
    i += 1

    # print('headers modified')
    headers_modified = headers + additional_attributes_headers
    # print(headers_modified)

    return [headers_modified, additional_attributes_headers]


    def run_with_writer():
    """ Main run function """
    reader = csv.reader(open(source_file, 'r'))
    writer = csv.writer(open(target_file, 'w'))

    # get original headers
    headers = next(reader)
    # print('original headers')
    # print(headers)

    # add additional attributes to headers
    modified_headers = get_modified_headers()[0]
    additional_attributes_headers = get_modified_headers()[1]
    # print('modified headers')
    # print(modified_headers)
    # print('additional_attribute headers')
    # print(additional_attributes_headers)

    # write headers to file
    writer.writerow(modified_headers)

    # extract data and populate rows
    i = 0
    for row in reader:
    print(f'processing row: {i}')
    # print('original row')
    # print(row)

    result = [None] * len(additional_attributes_headers)
    # print(result)
    # pre-populate result (to avoid shifting items as we add insert them at the index)
    target_index = headers.index("additional_attributes")

    # print(row[target_index])
    attributes = row[target_index].split(",")
    # print(attributes)

    for attribute in attributes:
    # print(attribute)
    data = attribute.split("=")

    if len(data) != 1:
    if data[0] != 'publish_date':
    header = data[0]
    value = data[1]

    # print('get target index for value')
    target_index = additional_attributes_headers.index(header)
    # print(f'insert {value} at index {target_index}')
    result[target_index] = value


    modified_row = row + result
    # print('modified headers')
    # print(modified_headers)
    # print('modified row')
    # print(modified_row)

    # write data
    # write original row
    # writer.writerow(row)

    # write modified row
    writer.writerow(modified_row)

    # break loop
    # if i == 10:
    # break

    i += 1


    # Main entry point
    if __name__ == '__main__':
    run_with_writer()