naqushab · May 6, 2020 11:37 · Dec 7, 2015 · May 23, 2015 · Apr 18, 2015
diff --git a/separator.py b/separator.py
@@ -1,16 +1,18 @@
-def separator(df,col,sep):
-    untouched_columns = [c for c in df.columns if c is not col]
-    divided_column = map(lambda x: x.split(sep),df[col].values)
-    divided_column = [item for sublist in divided_column for item in sublist]
-    repeats = [len(x.split(",")) for x in df[col]]
-    repeated_rows=[divided_column]
-    for c in untouched_columns:
-        working = zip(df[c].values,repeats)
-        working_accum=[]
-        for v,r in working:
-            working_accum.append([v]*r)
-        working_accum = [item for sublist in working_accum for item in sublist]
-        repeated_rows.append(working_accum)
-    new_names = [col]+untouched_columns
-    series = [pandas.Series(data=d,name=n) for d,n in zip(repeated_rows,new_names)]
-    return pandas.DataFrame(series).transpose()
+def splitDataFrameList(df,target_column,separator):
+    ''' df = dataframe to split,
+    target_column = the column containing the values to split
+    separator = the symbol used to perform the split
+
+    returns: a dataframe with each entry for the target column separated, with each element moved into a new row. 
+    The values in the other columns are duplicated across the newly divided rows.
+    '''
+    def splitListToRows(row,row_accumulator,target_column,separator):
+        split_row = row[target_column].split(separator)
+        for s in split_row:
+            new_row = row.to_dict()
+            new_row[target_column] = s
+            row_accumulator.append(new_row)
+    new_rows = []
+    df.apply(splitListToRows,axis=1,args = (new_rows,target_column,separator))
+    new_df = pandas.DataFrame(new_rows)
+    return new_df
diff --git a/separator.py b/separator.py
@@ -0,0 +1,16 @@
+def separator(df,col,sep):
+    untouched_columns = [c for c in df.columns if c is not col]
+    divided_column = map(lambda x: x.split(sep),df[col].values)
+    divided_column = [item for sublist in divided_column for item in sublist]
+    repeats = [len(x.split(",")) for x in df[col]]
+    repeated_rows=[divided_column]
+    for c in untouched_columns:
+        working = zip(df[c].values,repeats)
+        working_accum=[]
+        for v,r in working:
+            working_accum.append([v]*r)
+        working_accum = [item for sublist in working_accum for item in sublist]
+        repeated_rows.append(working_accum)
+    new_names = [col]+untouched_columns
+    series = [pandas.Series(data=d,name=n) for d,n in zip(repeated_rows,new_names)]
+    return pandas.DataFrame(series).transpose()
No results found