Skip to content

Instantly share code, notes, and snippets.

@scionoftech
Last active February 17, 2025 04:27
Show Gist options
  • Save scionoftech/5a635a0fe39aa5e226476545da0f406a to your computer and use it in GitHub Desktop.
Save scionoftech/5a635a0fe39aa5e226476545da0f406a to your computer and use it in GitHub Desktop.

Revisions

  1. scionoftech revised this gist Jun 14, 2019. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion pdf2excel.py
    Original file line number Diff line number Diff line change
    @@ -22,7 +22,7 @@ def to_excel(path, output_path):

    @click.command()
    @click.option('--input_path', '-ip', help='input file path')
    @click.option('--output_path', '-op', help='output file path')
    @click.option('--output_path', '-op', help='output folder path')
    def convert(input_path, output_path):
    """This program generates xlsx files from pdf."""
    to_excel(input_path, output_path)
  2. scionoftech created this gist Jun 14, 2019.
    32 changes: 32 additions & 0 deletions pdf2excel.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,32 @@
    import click
    from pathlib import Path
    import pdfplumber
    import pandas as pd
    from pandas import ExcelWriter


    def to_excel(path, output_path):
    with pdfplumber.open(path) as pdf:
    data = []
    for i in range(len(pdf.pages)):
    page = pdf.pages[i]
    # print(first_page.chars[0])
    data += page.extract_table()
    # print(data)
    df = pd.DataFrame(data)
    filename = Path(path).name.split('.')
    writer = ExcelWriter(output_path + os.sep + filename[0] + '.xlsx')
    df.to_excel(writer, 'Sheet1', index=False)
    writer.save()


    @click.command()
    @click.option('--input_path', '-ip', help='input file path')
    @click.option('--output_path', '-op', help='output file path')
    def convert(input_path, output_path):
    """This program generates xlsx files from pdf."""
    to_excel(input_path, output_path)


    if __name__ == '__main__':
    convert()