#!/usr/bin/env ruby # chasepdf2csv -- Convert Chase credit card statements from PDF to CSV. Written # to easily import older statements into QuickBooks Online/Self-Employed. Chase # unfortunately only offers statements up to 6 months in the past, making it a # huge chore to synchronize past transactions. # # How to Use # ---------- # This script requires Ruby >2.0.0 and pdftotext. Copy this script somewhere and # make it executable. Run it like any other command. # # ISC License # ----------- # Copyright (c) 2018-2020 Ivy Evans # # Permission to use, copy, modify, and/or distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH # REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY # AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, # INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM # LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR # OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR # PERFORMANCE OF THIS SOFTWARE. require 'csv' require 'optparse' def error(msg) STDERR.puts("error: #{msg}") end def fatal(msg) error(msg) exit 1 end class Statement DUE_DATE_PATTERN = %r{ Payment\s+Due\s+Date:? \s+ (?\d{2})/(?\d{2})/(?\d{2}) }x class Transaction # Regex for matching transactions in a Chase credit statement. # # Edge Case: Amazon orders # # 01/23 AMAZON MKTPLACE PMTS AMZN.COM/BILL WA 12.34\n # Order Number 123-4567890-1234567\n # # Edge Case: Rewards points # # 01/23 AMAZON MARKETPLACE AMZN.COM/BILLWA 4.56 7,890 # LINE_ITEM_PATTERN = %r{ (?\d{2}/\d{2}) \s+ (?.+) \s+ (?-?[\d,]+\.\d{2}) ( [ ] (?[1-9][\d,]+)? | \s* Order\s+Number\s+ (?[^\s]+) )? }x def self.scan(output, year) output.to_enum(:scan, LINE_ITEM_PATTERN).collect { Transaction.new(Regexp.last_match, year) } end def initialize(data, year) @date = data[:date]+"/#{year}" @description = data[:description] @amount = data[:amount] @points = data[:points] @order_num = data[:order_num] end attr_reader :date, :amount, :points, :order_num alias rewards? points alias order_num? order_num def description order_num? ? "#{@description} ##{order_num}" : @description end def to_hash { date: date, description: description, amount: amount, points: points, order_num: order_num, } end alias to_h to_hash end attr_reader :line_items def self.parse(path) output = `pdftotext -raw #{path} -` unless $?.success? fatal "pdftotext: failed to parse #{path} (exit code #{$?})" end unless m = output.match(DUE_DATE_PATTERN) fatal "parse error: could not match due date in #{path}" end new(Transaction.scan(output, m[:year])) end def initialize(line_items) @line_items = line_items end def each_line_item(&block) line_items.each(&block) end end def main(args = ARGV) unless system('command -v pdftotext >/dev/null 2>&1') fatal "error: pdftotext not found!" end outfile = STDOUT options = OptionParser.new do |opts| opts.banner = "Usage: #{$0} [options] FILE..." opts.on('-o', '--output=FILE', 'Output to file') do |path| outfile = File.open(path, 'w') end opts.on('-h', '--help', 'Show this message') do puts opts exit end end options.parse!(args) if ARGV.empty? fatal "error: no files specified" exit 1 end csv = CSV.new( outfile, headers: %w[Date Description Amount], write_headers: true, ) ARGV.each do |file| Statement.parse(file).each_line_item do |line_item| next if line_item.rewards? csv << [ line_item.date, line_item.description, line_item.amount ] end end end if $0 == __FILE__ main end