lorenrogers · March 8, 2016 17:35
diff --git a/ofx-to-ledger.py b/ofx-to-ledger.py
 # Originally from https://gist.github.com/jeffrichards/4694710

 # TODO make matching with existing transactions happen.  That is if I make a payment to
 # my credit card and then use this tool to load both my credit card statement and my bank
 # account statement into my ledger file, I don't want to import this transaction twice.

 # TODO improve the category selection to have some ability to fuzzy match, particularly for
 # payees that have numbers in their name is RONA #8868 where the number is (probably) the
 # store number, I would like any future RONA purchases even from new store numbers the same way

 # TODO to use memo fields to help category selction based on switches within the account
 # my bank keeps all the usable information for category matching in the memo field, so I need
 # to be looking there on occasion, my credit card keeps all the good info in the payee field.
 # I used to preprocess the download from the bank when I used gnucash, but now that I'm writing
 # the script that hsould just be built in as an option.

 from __future__ import print_function
 from ofxparse import OfxParser
 import os
 import re
 import sys
 from collections import Counter

 if len(sys.argv) != 1:
    print ('This utility does not take command-line arguments')
    exit()

 if not 'LEDGER_FILE' in os.environ:
    print ('Please set the environment variable LEDGER_FILE to point to the ledger file')
    exit()

 known_ids = set()
 already_imported = set()
 ###### (SET THIS TO MATCH YOUR ACCOUNTS WHEN YOU RUN THE SCRIPT) ######
 account_id_to_account_name = {'12345': 'Assets:ExampleSavings'}
 transactions=0
 account_listing = []
 with open(os.environ['LEDGER_FILE'],'r') as ledger_scan:
    for line in ledger_scan:
        id_match = re.search("@id +(\S+.*)", line)
        import_match = re.search("@imported +(\S+.*)", line)
        account_map_match = re.search("@account +(\S*) +(\S+.*)", line)
        if line[0].isdigit():
            transactions = transactions + 1
            transaction_line = line.strip()
            transaction_line = re.sub('[0-9]*/[0-9]*/[0-9]*\s*','',transaction_line)
        else:
            if transactions>=1:
                if not line.startswith(';'):
                    account_line =line.strip();
                    account_line = re.sub('[\s\-]*[0-9]*\.[0-9]*','',line)
                    account_line = re.sub('[(\!\*)\s]*','',account_line,1)

                    account_listing.append((transaction_line, account_line))


        if id_match != None:
            known_ids.add(id_match.group(1))
        if import_match != None:
            already_imported.add(import_match.group(1))
        elif account_map_match != None:
            account_id_to_account_name[account_map_match.group(1)] = account_map_match.group(2)

 d = {}

 if transactions>1:
    for x, y in account_listing:
        d.setdefault(x.strip(), []).append(y.strip())

 with open(os.environ['LEDGER_FILE'],'ab') as ledger_output:
    for (dirpath,dirnames,filenames) in os.walk('.',False):
        for filename in filenames:
            if (filename.endswith('.ofx') or filename.endswith('.qfx')) and not filename in already_imported:
                print ("Importing {0}".format(filename),end='')
                with open(os.path.join(dirpath, filename),'r') as ofx_file:
                    ofx = OfxParser.parse(ofx_file)
                    account_name = account_id_to_account_name[ofx.account.number.encode('ascii')]
                    print (" ({0})".format(account_name))

                    ledger_output.write('\n\n\n\n\n;;;; ######################################################################\n')
                    ledger_output.write(';;;; @imported {0}\n'.format(filename))
                    ledger_output.write(';;;; ######################################################################\n\n')

                    def transaction_sort_key(t):
                        try:
                            return (t.date, t.payee)
                        except AttributeError:
                            return (t.date, "UNSPECIFIED PAYEE")

                    for t in sorted(ofx.account.statement.transactions, key=transaction_sort_key):
                        if len(t.id) > 10:
                            unique_id = t.id
                        else:
                            unique_id = ofx.account.number.encode('ascii') + "." + t.id

                        if unique_id in known_ids:
                            continue

                        print ("    {0}".format(unique_id))
                        date = t.date.date()
                        ledger_output.write ('; @id {0}\n'.format(unique_id))

                        try:
                            payee = t.payee
                        except AttributeError:
                            payee = "UNSPECIFIED PAYEE"

                        ledger_output.write ('{0}/{1}/{2} {3}\n'.format(date.year,date.month,date.day,payee))

                        t.amount = float(t.amount)

                        try:
                            categories = Counter(d[payee.strip()])
                            written = 0
                            found_account = 'Expenses:unknown'
                            for acc in categories:
                                if re.search('Income|Expenses.*',acc):
                                    found_account = acc

                        except KeyError:
                            found_account='Expenses:unknown'

                        if len(t.memo) > 0:
                            ledger_output.write ('    {0}    ${1:0.2f}   ; {2}\n'.format(
                                account_name, t.amount, t.memo))
                        else:
                                ledger_output.write ('    {0}    ${1:0.2f}\n'.format(
                                    account_name, t.amount))


                        ledger_output.write ('    {0}\n\n'.format(found_account))
	# Originally from https://gist.github.com/jeffrichards/4694710

	# TODO make matching with existing transactions happen. That is if I make a payment to
	# my credit card and then use this tool to load both my credit card statement and my bank
	# account statement into my ledger file, I don't want to import this transaction twice.

	# TODO improve the category selection to have some ability to fuzzy match, particularly for
	# payees that have numbers in their name is RONA #8868 where the number is (probably) the
	# store number, I would like any future RONA purchases even from new store numbers the same way

	# TODO to use memo fields to help category selction based on switches within the account
	# my bank keeps all the usable information for category matching in the memo field, so I need
	# to be looking there on occasion, my credit card keeps all the good info in the payee field.
	# I used to preprocess the download from the bank when I used gnucash, but now that I'm writing
	# the script that hsould just be built in as an option.

	from __future__ import print_function
	from ofxparse import OfxParser
	import os
	import re
	import sys
	from collections import Counter

	if len(sys.argv) != 1:
	print ('This utility does not take command-line arguments')
	exit()

	if not 'LEDGER_FILE' in os.environ:
	print ('Please set the environment variable LEDGER_FILE to point to the ledger file')
	exit()

	known_ids = set()
	already_imported = set()
	###### (SET THIS TO MATCH YOUR ACCOUNTS WHEN YOU RUN THE SCRIPT) ######
	account_id_to_account_name = {'12345': 'Assets:ExampleSavings'}
	transactions=0
	account_listing = []
	with open(os.environ['LEDGER_FILE'],'r') as ledger_scan:
	for line in ledger_scan:
	id_match = re.search("@id +(\S+.*)", line)
	import_match = re.search("@imported +(\S+.*)", line)
	account_map_match = re.search("@account +(\S) +(\S+.)", line)
	if line[0].isdigit():
	transactions = transactions + 1
	transaction_line = line.strip()
	transaction_line = re.sub('[0-9]/[0-9]/[0-9]\s','',transaction_line)
	else:
	if transactions>=1:
	if not line.startswith(';'):
	account_line =line.strip();
	account_line = re.sub('[\s\-][0-9]\.[0-9]*','',line)
	account_line = re.sub('[(\!\)\s]','',account_line,1)

	account_listing.append((transaction_line, account_line))


	if id_match != None:
	known_ids.add(id_match.group(1))
	if import_match != None:
	already_imported.add(import_match.group(1))
	elif account_map_match != None:
	account_id_to_account_name[account_map_match.group(1)] = account_map_match.group(2)

	d = {}

	if transactions>1:
	for x, y in account_listing:
	d.setdefault(x.strip(), []).append(y.strip())

	with open(os.environ['LEDGER_FILE'],'ab') as ledger_output:
	for (dirpath,dirnames,filenames) in os.walk('.',False):
	for filename in filenames:
	if (filename.endswith('.ofx') or filename.endswith('.qfx')) and not filename in already_imported:
	print ("Importing {0}".format(filename),end='')
	with open(os.path.join(dirpath, filename),'r') as ofx_file:
	ofx = OfxParser.parse(ofx_file)
	account_name = account_id_to_account_name[ofx.account.number.encode('ascii')]
	print (" ({0})".format(account_name))

	ledger_output.write('\n\n\n\n\n;;;; ######################################################################\n')
	ledger_output.write(';;;; @imported {0}\n'.format(filename))
	ledger_output.write(';;;; ######################################################################\n\n')

	def transaction_sort_key(t):
	try:
	return (t.date, t.payee)
	except AttributeError:
	return (t.date, "UNSPECIFIED PAYEE")

	for t in sorted(ofx.account.statement.transactions, key=transaction_sort_key):
	if len(t.id) > 10:
	unique_id = t.id
	else:
	unique_id = ofx.account.number.encode('ascii') + "." + t.id

	if unique_id in known_ids:
	continue

	print (" {0}".format(unique_id))
	date = t.date.date()
	ledger_output.write ('; @id {0}\n'.format(unique_id))

	try:
	payee = t.payee
	except AttributeError:
	payee = "UNSPECIFIED PAYEE"

	ledger_output.write ('{0}/{1}/{2} {3}\n'.format(date.year,date.month,date.day,payee))

	t.amount = float(t.amount)

	try:
	categories = Counter(d[payee.strip()])
	written = 0
	found_account = 'Expenses:unknown'
	for acc in categories:
	if re.search('Income\|Expenses.*',acc):
	found_account = acc

	except KeyError:
	found_account='Expenses:unknown'

	if len(t.memo) > 0:
	ledger_output.write (' {0} ${1:0.2f} ; {2}\n'.format(
	account_name, t.amount, t.memo))
	else:
	ledger_output.write (' {0} ${1:0.2f}\n'.format(
	account_name, t.amount))


	ledger_output.write (' {0}\n\n'.format(found_account))