Skip to content

Instantly share code, notes, and snippets.

@DrOctogon
Forked from jrivero/csv_splitter.py
Last active August 29, 2015 14:11
Show Gist options
  • Save DrOctogon/3bedea7efd4d468d3c4f to your computer and use it in GitHub Desktop.
Save DrOctogon/3bedea7efd4d468d3c4f to your computer and use it in GitHub Desktop.

Revisions

  1. @palewire palewire revised this gist Sep 24, 2010. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions csv_splitter.py
    Original file line number Diff line number Diff line change
    @@ -27,7 +27,7 @@ def split(filehandler, delimiter=',', row_limit=10000,
    output_path,
    output_name_template % current_piece
    )
    current_out_writer = csv.writer(open(current_out_path, 'w'))
    current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
    current_limit = row_limit
    if keep_headers:
    headers = reader.next()
    @@ -40,7 +40,7 @@ def split(filehandler, delimiter=',', row_limit=10000,
    output_path,
    output_name_template % current_piece
    )
    current_out_writer = csv.writer(open(current_out_path, 'w'))
    current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
    if keep_headers:
    current_out_writer.writerow(headers)
    current_out_writer.writerow(row)
  2. @palewire palewire revised this gist Sep 24, 2010. 1 changed file with 2 additions and 0 deletions.
    2 changes: 2 additions & 0 deletions csv_splitter.py
    Original file line number Diff line number Diff line change
    @@ -24,6 +24,7 @@ def split(filehandler, delimiter=',', row_limit=10000,
    reader = csv.reader(filehandler, delimiter=delimiter)
    current_piece = 1
    current_out_path = os.path.join(
    output_path,
    output_name_template % current_piece
    )
    current_out_writer = csv.writer(open(current_out_path, 'w'))
    @@ -36,6 +37,7 @@ def split(filehandler, delimiter=',', row_limit=10000,
    current_piece += 1
    current_limit = row_limit * current_piece
    current_out_path = os.path.join(
    output_path,
    output_name_template % current_piece
    )
    current_out_writer = csv.writer(open(current_out_path, 'w'))
  3. @palewire palewire revised this gist Sep 24, 2010. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion csv_splitter.py
    Original file line number Diff line number Diff line change
    @@ -11,7 +11,7 @@ def split(filehandler, delimiter=',', row_limit=10000,
    `row_limit`: The number of rows you want in each output file. 10,000 by default.
    `output_name_template`: A %s-style template for the numbered output files.
    `output_path`: Where to stick the output files
    `output_path`: Where to stick the output files.
    `keep_headers`: Whether or not to print the headers in each output file.
    Example usage:
  4. @palewire palewire revised this gist Sep 24, 2010. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion csv_splitter.py
    Original file line number Diff line number Diff line change
    @@ -9,7 +9,7 @@ def split(filehandler, delimiter=',', row_limit=10000,
    Arguments:
    `row_limit`: The number of rows you want in each output file
    `row_limit`: The number of rows you want in each output file. 10,000 by default.
    `output_name_template`: A %s-style template for the numbered output files.
    `output_path`: Where to stick the output files
    `keep_headers`: Whether or not to print the headers in each output file.
  5. @palewire palewire revised this gist Sep 24, 2010. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion csv_splitter.py
    Original file line number Diff line number Diff line change
    @@ -17,7 +17,7 @@ def split(filehandler, delimiter=',', row_limit=10000,
    Example usage:
    >> from toolbox import csv_splitter;
    >> csv_splitter.split(csv.splitter(open('/home/ben/input.csv', 'r')));
    >> csv_splitter.split(open('/home/ben/input.csv', 'r'));
    """
    import csv
  6. @palewire palewire revised this gist Sep 24, 2010. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion csv_splitter.py
    Original file line number Diff line number Diff line change
    @@ -17,7 +17,7 @@ def split(filehandler, delimiter=',', row_limit=10000,
    Example usage:
    >> from toolbox import csv_splitter;
    >> csv_splitter.split(csv.splitter(open('/home/ben/Desktop/lasd/2009-01-02 [00.00.00].csv', 'r')));
    >> csv_splitter.split(csv.splitter(open('/home/ben/input.csv', 'r')));
    """
    import csv
  7. @palewire palewire renamed this gist Sep 24, 2010. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  8. @palewire palewire created this gist Sep 24, 2010.
    44 changes: 44 additions & 0 deletions Python CSV splitter
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,44 @@
    import os

    def split(filehandler, delimiter=',', row_limit=10000,
    output_name_template='output_%s.csv', output_path='.', keep_headers=True):
    """
    Splits a CSV file into multiple pieces.

    A quick bastardization of the Python CSV library.

    Arguments:

    `row_limit`: The number of rows you want in each output file
    `output_name_template`: A %s-style template for the numbered output files.
    `output_path`: Where to stick the output files
    `keep_headers`: Whether or not to print the headers in each output file.

    Example usage:

    >> from toolbox import csv_splitter;
    >> csv_splitter.split(csv.splitter(open('/home/ben/Desktop/lasd/2009-01-02 [00.00.00].csv', 'r')));

    """
    import csv
    reader = csv.reader(filehandler, delimiter=delimiter)
    current_piece = 1
    current_out_path = os.path.join(
    output_name_template % current_piece
    )
    current_out_writer = csv.writer(open(current_out_path, 'w'))
    current_limit = row_limit
    if keep_headers:
    headers = reader.next()
    current_out_writer.writerow(headers)
    for i, row in enumerate(reader):
    if i + 1 > current_limit:
    current_piece += 1
    current_limit = row_limit * current_piece
    current_out_path = os.path.join(
    output_name_template % current_piece
    )
    current_out_writer = csv.writer(open(current_out_path, 'w'))
    if keep_headers:
    current_out_writer.writerow(headers)
    current_out_writer.writerow(row)