# Originally from https://gist.github.com/jeffrichards/4694710 # TODO make matching with existing transactions happen. That is if I make a payment to # my credit card and then use this tool to load both my credit card statement and my bank # account statement into my ledger file, I don't want to import this transaction twice. # TODO improve the category selection to have some ability to fuzzy match, particularly for # payees that have numbers in their name is RONA #8868 where the number is (probably) the # store number, I would like any future RONA purchases even from new store numbers the same way # TODO to use memo fields to help category selction based on switches within the account # my bank keeps all the usable information for category matching in the memo field, so I need # to be looking there on occasion, my credit card keeps all the good info in the payee field. # I used to preprocess the download from the bank when I used gnucash, but now that I'm writing # the script that hsould just be built in as an option. from __future__ import print_function from ofxparse import OfxParser import os import re import sys from collections import Counter if len(sys.argv) != 1: print ('This utility does not take command-line arguments') exit() if not 'LEDGER_FILE' in os.environ: print ('Please set the environment variable LEDGER_FILE to point to the ledger file') exit() known_ids = set() already_imported = set() ###### (SET THIS TO MATCH YOUR ACCOUNTS WHEN YOU RUN THE SCRIPT) ###### account_id_to_account_name = {'12345': 'Assets:ExampleSavings'} transactions=0 account_listing = [] with open(os.environ['LEDGER_FILE'],'r') as ledger_scan: for line in ledger_scan: id_match = re.search("@id +(\S+.*)", line) import_match = re.search("@imported +(\S+.*)", line) account_map_match = re.search("@account +(\S*) +(\S+.*)", line) if line[0].isdigit(): transactions = transactions + 1 transaction_line = line.strip() transaction_line = re.sub('[0-9]*/[0-9]*/[0-9]*\s*','',transaction_line) else: if transactions>=1: if not line.startswith(';'): account_line =line.strip(); account_line = re.sub('[\s\-]*[0-9]*\.[0-9]*','',line) account_line = re.sub('[(\!\*)\s]*','',account_line,1) account_listing.append((transaction_line, account_line)) if id_match != None: known_ids.add(id_match.group(1)) if import_match != None: already_imported.add(import_match.group(1)) elif account_map_match != None: account_id_to_account_name[account_map_match.group(1)] = account_map_match.group(2) d = {} if transactions>1: for x, y in account_listing: d.setdefault(x.strip(), []).append(y.strip()) with open(os.environ['LEDGER_FILE'],'ab') as ledger_output: for (dirpath,dirnames,filenames) in os.walk('.',False): for filename in filenames: if (filename.endswith('.ofx') or filename.endswith('.qfx')) and not filename in already_imported: print ("Importing {0}".format(filename),end='') with open(os.path.join(dirpath, filename),'r') as ofx_file: ofx = OfxParser.parse(ofx_file) account_name = account_id_to_account_name[ofx.account.number.encode('ascii')] print (" ({0})".format(account_name)) ledger_output.write('\n\n\n\n\n;;;; ######################################################################\n') ledger_output.write(';;;; @imported {0}\n'.format(filename)) ledger_output.write(';;;; ######################################################################\n\n') def transaction_sort_key(t): try: return (t.date, t.payee) except AttributeError: return (t.date, "UNSPECIFIED PAYEE") for t in sorted(ofx.account.statement.transactions, key=transaction_sort_key): if len(t.id) > 10: unique_id = t.id else: unique_id = ofx.account.number.encode('ascii') + "." + t.id if unique_id in known_ids: continue print (" {0}".format(unique_id)) date = t.date.date() ledger_output.write ('; @id {0}\n'.format(unique_id)) try: payee = t.payee except AttributeError: payee = "UNSPECIFIED PAYEE" ledger_output.write ('{0}/{1}/{2} {3}\n'.format(date.year,date.month,date.day,payee)) t.amount = float(t.amount) try: categories = Counter(d[payee.strip()]) written = 0 found_account = 'Expenses:unknown' for acc in categories: if re.search('Income|Expenses.*',acc): found_account = acc except KeyError: found_account='Expenses:unknown' if len(t.memo) > 0: ledger_output.write (' {0} ${1:0.2f} ; {2}\n'.format( account_name, t.amount, t.memo)) else: ledger_output.write (' {0} ${1:0.2f}\n'.format( account_name, t.amount)) ledger_output.write (' {0}\n\n'.format(found_account))