Created
April 9, 2013 17:35
-
-
Save linusyang/5347699 to your computer and use it in GitHub Desktop.
Revisions
-
linusyang created this gist
Apr 9, 2013 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,229 @@ #!/usr/bin/env python # # By Linus Yang <[email protected]> # Written on iPad using Textastic Code Editor # Licensed under GPL v3 # ''' Function: Transform an NFA to DFA then simplify it Limitation: - input file should be named as 'nfa_*.txt' - only one start node named x and end node named y in NFA - only one literal on the arrow is allowed in NFA - use 'e' to represent epsilon Input Sample: (nfa_0.txt) x 5 e 5 5 a 5 5 b 5 1 e 1 3 a 1 4 b 3 2 a 4 2 b 2 6 e 6 6 a 6 6 b 6 y e Output Sample: (dfa_0.txt) {X, 1, 5} {1, 3, 5} {1, 4, 5} {1, 3, 5} {1, 2, 3, 5, 6, Y} {1, 4, 5} {1, 4, 5} {1, 3, 5} {1, 2, 4, 5, 6, Y} {1, 2, 3, 5, 6, Y} {1, 2, 3, 5, 6, Y} {1, 4, 5, 6, Y} {1, 2, 4, 5, 6, Y} {1, 3, 5, 6, Y} {1, 2, 4, 5, 6, Y} {1, 4, 5, 6, Y} {1, 3, 5, 6, Y} {1, 2, 4, 5, 6, Y} {1, 3, 5, 6, Y} {1, 2, 3, 5, 6, Y} {1, 4, 5, 6, Y} s a b 0 1 2 1 3 2 2 1 4 3* 3 5 4* 6 4 5* 6 4 6* 3 5 {{0}, {1}, {2}, {3, 4, 5, 6}} s a b 0 1 2 1 3 2 2 1 3 3* 3 3 ''' EPS = 'e' START = 'x' END = 'y' fin = None fout = None def write(s): if fout: fout.write(s) else: print(s), def str_set(s): x = list(s) if x: y = [] res = '{' c = [] for i in x: if i.isdigit(): y.append(int(i)) else: c.append(i) y.sort() if START in c: res += START.upper() + ', ' for i in y: res += '%d' % i + ', ' if END in c: res += END.upper() + '}' else: res = res[:-2] + '}' return res return '{}' def eps_closure(nfa, node_set): if node_set == set([]): return node_set res = node_set.copy() for node in node_set: next_list = nfa.get(node) if next_list: for next in next_list: if next[1] == EPS: res.add(next[0]) if next[0] != node: res |= eps_closure(nfa, set([next[0]])) return res def next_set(nfa, now_set, c): res = set([]) for node in now_set: next_list = nfa.get(node) if next_list: for next in next_list: if next[1] == c: res.add(next[0]) return res def main(): nfa = {} lit = set([]) for s in fin: e = s.lower().split() if nfa.get(e[0]): nfa[e[0]].append((e[1], e[2])) else: nfa[e[0]] = [(e[1], e[2])] lit.add(e[2]) lit.remove(EPS) liter = list(lit) liter.sort() q = [eps_closure(nfa, set([START]))] status = [q[0]] dfa_str = '' dfa = {} end_node = [] mid_node = [] while q: now = q.pop(0) i = status.index(now) now_index = '%d' % i end_str = '' if END in now: end_str = '*' end_node.append(i) else: mid_node.append(i) write(str_set(now) + ' ') dfa_str += now_index + end_str + ' ' next_dict = {} for c in liter: next = eps_closure(nfa, next_set(nfa, now, c)) if not next in status and next: q.append(next) status.append(next) j = status.index(next) if next else -1 next_index = '%d' % j write(str_set(next) + ' ') dfa_str += next_index + ' ' next_dict[c] = j write('\n') dfa_str += '\n' dfa[i] = next_dict write('\ns %s\n%s\n' % (' '.join(liter), dfa_str)) q = [[end_node, True], [mid_node, True]] fresh = True while fresh: now = q[0] for c in liter: next = {} for i in now[0]: if dfa[i][c] == -1: if next.get(-1): next[-1].append(i) else: next[-1] = [i] else: j = 0 for x in q: if dfa[i][c] in x[0]: if next.get(j): next[j].append(i) else: next[j] = [i] j += 1 splited = True now_split = next.values() if now[0] in now_split: splited = False else: for x in now_split: q.append([x, True]) break q.pop(0) if not splited: q.append([now[0], False]) fresh = False for x in q: if x[1] == True: fresh = True break split = [x for x, y in q] split.sort() write(str(split).replace('[', '{').replace(']', '}') + '\n') for x in split: if len(x) > 1: rep = x[0] for i in xrange(1, len(x)): for j in dfa: for c in liter: if dfa[j][c] == x[i]: dfa[j][c] = rep del dfa[x[i]] write('\ns %s\n' % (' '.join(liter))) for i in dfa: write('%d%s ' % (i, '*' if i in end_node else '')) for c in liter: write('%d ' % dfa[i][c]) write('\n') fin.close() fout.close() if __name__ == '__main__': import os now_dir = os.path.dirname(os.path.realpath(__file__)) files = [x for x in os.listdir(now_dir) if os.path.isfile(x) and x.endswith('txt') and x.startswith('nfa_')] for x in files: fin = open(x, 'r') fout = open(x.replace('nfa_', 'dfa_'), 'w') main()