WinstonN · March 16, 2021 19:58 · Mar 16, 2021 · Mar 16, 2021
diff --git a/flatten_additional_attributes.py b/flatten_additional_attributes.py
@@ -3,11 +3,6 @@
 additional_attribute as a new column in a target file
 """
 import csv
-import pandas as pd
-
-# testing files
-# source_file = './importexport_605048471de17.csv'
-# target_file = './importexport_605048471de177_formatted.csv'
 
 # real files
 source_file = './catalog_product_20210316_044037.csv'
@@ -16,14 +11,11 @@
 def get_modified_headers():
     reader = csv.reader(open(source_file, 'r'))
     headers = next(reader)
-    # print('headers original')
-    # print(headers)
 
     # get all headers for additional_attributes
     additional_attributes_headers = []
     i = 0
     for row in reader:
-        # print(f'processing row: {i}')
         target_index = headers.index("additional_attributes")
 
         attributes = row[target_index].split(",")
@@ -39,9 +31,7 @@ def get_modified_headers():
         # iterator
         i += 1
 
-    # print('headers modified')
     headers_modified = headers + additional_attributes_headers
-    # print(headers_modified)
 
     return [headers_modified, additional_attributes_headers]
 
@@ -53,16 +43,10 @@ def run_with_writer():
 
     # get original headers
     headers = next(reader)
-    # print('original headers')
-    # print(headers)
 
     # add additional attributes to headers
     modified_headers = get_modified_headers()[0]
     additional_attributes_headers = get_modified_headers()[1]
-    # print('modified headers')
-    # print(modified_headers)
-    # print('additional_attribute headers')
-    # print(additional_attributes_headers)
 
     # write headers to file
     writer.writerow(modified_headers)
@@ -71,50 +55,29 @@ def run_with_writer():
     i = 0
     for row in reader:
         print(f'processing row: {i}')
-        # print('original row')
-        # print(row)
 
         result = [None] * len(additional_attributes_headers)
-        # print(result)
         # pre-populate result (to avoid shifting items as we add insert them at the index)
         target_index = headers.index("additional_attributes")
-
-        # print(row[target_index])
         attributes = row[target_index].split(",")
-        # print(attributes)
 
         for attribute in attributes:
-            # print(attribute)
             data = attribute.split("=")
 
             if len(data) != 1:
                 if data[0] != 'publish_date':
                     header = data[0]
                     value = data[1]
 
-                    # print('get target index for value')
                     target_index = additional_attributes_headers.index(header)
-                    # print(f'insert {value} at index {target_index}')
                     result[target_index] = value
 
 
         modified_row = row + result
-        # print('modified headers')
-        # print(modified_headers)
-        # print('modified row')
-        # print(modified_row)
-
-        # write data
-        # write original row
-        # writer.writerow(row)
 
         # write modified row
         writer.writerow(modified_row)
 
-        # break loop
-        # if i == 10:
-        #     break
-
         i += 1
 
 

diff --git a/flatten_additional_attributes.py b/flatten_additional_attributes.py
@@ -0,0 +1,123 @@
+"""
+This script formats a magento product export file, and sets each
+additional_attribute as a new column in a target file
+"""
+import csv
+import pandas as pd
+
+# testing files
+# source_file = './importexport_605048471de17.csv'
+# target_file = './importexport_605048471de177_formatted.csv'
+
+# real files
+source_file = './catalog_product_20210316_044037.csv'
+target_file = './catalog_product_20210316_044037_formatted.csv'
+
+def get_modified_headers():
+    reader = csv.reader(open(source_file, 'r'))
+    headers = next(reader)
+    # print('headers original')
+    # print(headers)
+
+    # get all headers for additional_attributes
+    additional_attributes_headers = []
+    i = 0
+    for row in reader:
+        # print(f'processing row: {i}')
+        target_index = headers.index("additional_attributes")
+
+        attributes = row[target_index].split(",")
+        for attribute in attributes:
+            data = attribute.split("=")
+            if len(data) != 1:
+                if data[0] != 'publish_date':
+                    header = data[0]
+                    # add attribute as header
+                    if header not in additional_attributes_headers:
+                        additional_attributes_headers.append(header)
+
+        # iterator
+        i += 1
+
+    # print('headers modified')
+    headers_modified = headers + additional_attributes_headers
+    # print(headers_modified)
+
+    return [headers_modified, additional_attributes_headers]
+
+
+def run_with_writer():
+    """ Main run function """
+    reader = csv.reader(open(source_file, 'r'))
+    writer = csv.writer(open(target_file, 'w'))
+
+    # get original headers
+    headers = next(reader)
+    # print('original headers')
+    # print(headers)
+
+    # add additional attributes to headers
+    modified_headers = get_modified_headers()[0]
+    additional_attributes_headers = get_modified_headers()[1]
+    # print('modified headers')
+    # print(modified_headers)
+    # print('additional_attribute headers')
+    # print(additional_attributes_headers)
+
+    # write headers to file
+    writer.writerow(modified_headers)
+
+    # extract data and populate rows
+    i = 0
+    for row in reader:
+        print(f'processing row: {i}')
+        # print('original row')
+        # print(row)
+
+        result = [None] * len(additional_attributes_headers)
+        # print(result)
+        # pre-populate result (to avoid shifting items as we add insert them at the index)
+        target_index = headers.index("additional_attributes")
+
+        # print(row[target_index])
+        attributes = row[target_index].split(",")
+        # print(attributes)
+
+        for attribute in attributes:
+            # print(attribute)
+            data = attribute.split("=")
+
+            if len(data) != 1:
+                if data[0] != 'publish_date':
+                    header = data[0]
+                    value = data[1]
+
+                    # print('get target index for value')
+                    target_index = additional_attributes_headers.index(header)
+                    # print(f'insert {value} at index {target_index}')
+                    result[target_index] = value
+
+
+        modified_row = row + result
+        # print('modified headers')
+        # print(modified_headers)
+        # print('modified row')
+        # print(modified_row)
+
+        # write data
+        # write original row
+        # writer.writerow(row)
+
+        # write modified row
+        writer.writerow(modified_row)
+
+        # break loop
+        # if i == 10:
+        #     break
+
+        i += 1
+
+
+# Main entry point
+if __name__ == '__main__':
+    run_with_writer()
No results found