#!/usr/bin/env python3

import re
from pprint import pprint

def main():
    begin = "cat"
    end = "bar"
    word_len = len(begin)
    graph = build_graph(get_words(word_len))
    pprint(graph)
    
    
def build_graph(wordlist):
    graph = {}
    for word in wordlist:
        graph[word] = set()
        for x in wordlist:
            d = editdist(word, x)
            if d < 3:
                print(d, word, x)
            if d == 1:
                graph[word].add(x)
    return graph
    print("END")

    
def get_words(length):
    return [w for w in 
            get_all_word_iterator()
            if len(w) == length]
    
    
def get_all_word_iterator():
    invalid_word = r"[^a-z]"   # regex for any non lowercase-ascii.  We will use re.search
    
    dict_file = "/usr/share/dict/words"
    with open(dict_file) as newfile:
        # with ... as essentially creates a closure to ensure the filehandles are cleaned up
        # open returns an iterator overlines.  So we can do iterable stuff over it
        # it also means that if you return it directly and store multiple references, each
        # iteration of any of them would advance the iteration.  But anyways...
        
        # we use [ ... for ... in .. ] to create a list comprehension which reads all the 
        # lines before returning.  
        
        # In a different application we could return a generator expresion using the fact
        # that newfile is a line-by-line iterator to avoid reading all the file at once.
        # we don't want that here as we are iterating over the list many times, and all those
        # disk reads would be silly (yeah yeah, the page would be in cache...)
        
        # secret: pythonistas don't make nearly enough use of regular expresions, but they
        # often don't need them.  this case seems like a perfect fit.
        
        # edit, we're going to return a generator expression (iterator) here
        return (word.strip() 
                for word in newfile 
                if not re.search(invalid_word, word))
                 

def editdist(s,t):
    if s == t:
        return 0
    if s == "":
        return len(t)
    if t == "":
        return len(s)
    if s[-1] == t[-1]:
        cost = 0
    else:
        cost = 1
    res = min([editdist(s[:-1], t)+1,
               editdist(s, t[:-1])+1,
               editdist(s[:-1], t[:-1]) + cost])
    return res


if __name__ == '__main__':
    main()