#!/usr/bin/env python3 import re from pprint import pprint def main(): begin = "cat" end = "bar" word_len = len(begin) graph = build_graph(get_words(word_len)) pprint(graph) def build_graph(wordlist): graph = {} for word in wordlist: graph[word] = set() for x in wordlist: d = editdist(word, x) if d < 3: print(d, word, x) if d == 1: graph[word].add(x) return graph print("END") def get_words(length): return [w for w in get_all_word_iterator() if len(w) == length] def get_all_word_iterator(): invalid_word = r"[^a-z]" # regex for any non lowercase-ascii. We will use re.search dict_file = "/usr/share/dict/words" with open(dict_file) as newfile: # with ... as essentially creates a closure to ensure the filehandles are cleaned up # open returns an iterator overlines. So we can do iterable stuff over it # it also means that if you return it directly and store multiple references, each # iteration of any of them would advance the iteration. But anyways... # we use [ ... for ... in .. ] to create a list comprehension which reads all the # lines before returning. # In a different application we could return a generator expresion using the fact # that newfile is a line-by-line iterator to avoid reading all the file at once. # we don't want that here as we are iterating over the list many times, and all those # disk reads would be silly (yeah yeah, the page would be in cache...) # secret: pythonistas don't make nearly enough use of regular expresions, but they # often don't need them. this case seems like a perfect fit. # edit, we're going to return a generator expression (iterator) here return (word.strip() for word in newfile if not re.search(invalid_word, word)) def editdist(s,t): if s == t: return 0 if s == "": return len(t) if t == "": return len(s) if s[-1] == t[-1]: cost = 0 else: cost = 1 res = min([editdist(s[:-1], t)+1, editdist(s, t[:-1])+1, editdist(s[:-1], t[:-1]) + cost]) return res if __name__ == '__main__': main()