linusyang · April 9, 2013 17:35 · Apr 9, 2013
diff --git a/dfa.py b/dfa.py
@@ -0,0 +1,229 @@
+#!/usr/bin/env python
+#
+# By Linus Yang <[email protected]>
+# Written on iPad using Textastic Code Editor
+# Licensed under GPL v3
+#
+
+'''
+Function:
+    Transform an NFA to DFA then simplify it
+
+Limitation:
+    - input file should be named as 'nfa_*.txt'
+    - only one start node named x and end node named y in NFA
+    - only one literal on the arrow is allowed in NFA
+    - use 'e' to represent epsilon
+
+Input Sample: (nfa_0.txt)
+    x 5 e
+    5 5 a
+    5 5 b
+    5 1 e
+    1 3 a
+    1 4 b
+    3 2 a
+    4 2 b
+    2 6 e
+    6 6 a
+    6 6 b
+    6 y e
+
+Output Sample: (dfa_0.txt)
+    {X, 1, 5} {1, 3, 5} {1, 4, 5} 
+    {1, 3, 5} {1, 2, 3, 5, 6, Y} {1, 4, 5} 
+    {1, 4, 5} {1, 3, 5} {1, 2, 4, 5, 6, Y} 
+    {1, 2, 3, 5, 6, Y} {1, 2, 3, 5, 6, Y} {1, 4, 5, 6, Y} 
+    {1, 2, 4, 5, 6, Y} {1, 3, 5, 6, Y} {1, 2, 4, 5, 6, Y} 
+    {1, 4, 5, 6, Y} {1, 3, 5, 6, Y} {1, 2, 4, 5, 6, Y} 
+    {1, 3, 5, 6, Y} {1, 2, 3, 5, 6, Y} {1, 4, 5, 6, Y} 
+
+    s a b
+    0 1 2 
+    1 3 2 
+    2 1 4 
+    3* 3 5 
+    4* 6 4 
+    5* 6 4 
+    6* 3 5 
+
+    {{0}, {1}, {2}, {3, 4, 5, 6}}
+
+    s a b
+    0 1 2 
+    1 3 2 
+    2 1 3 
+    3* 3 3 
+'''
+
+EPS = 'e'
+START = 'x'
+END = 'y'
+fin = None
+fout = None
+
+def write(s):
+    if fout:
+        fout.write(s)
+    else:
+        print(s),
+
+def str_set(s):
+    x = list(s)
+    if x:
+        y = []
+        res = '{'
+        c = []
+        for i in x:
+            if i.isdigit():
+                y.append(int(i))
+            else:
+                c.append(i)
+        y.sort()
+        if START in c:
+            res += START.upper() + ', '
+        for i in y:
+            res += '%d' % i + ', '
+        if END in c:
+            res += END.upper() + '}'
+        else:
+            res = res[:-2] + '}'
+        return res
+    return '{}'
+
+def eps_closure(nfa, node_set):
+    if node_set == set([]):
+        return node_set
+    res = node_set.copy()
+    for node in node_set:
+        next_list = nfa.get(node)
+        if next_list:
+            for next in next_list:
+                if next[1] == EPS:
+                    res.add(next[0])
+                    if next[0] != node:
+                        res |= eps_closure(nfa, set([next[0]]))
+    return res
+
+def next_set(nfa, now_set, c):
+    res = set([])
+    for node in now_set:
+        next_list = nfa.get(node)
+        if next_list:
+            for next in next_list:
+                if next[1] == c:
+                    res.add(next[0])
+    return res
+
+def main():
+    nfa = {}
+    lit = set([])
+    for s in fin:
+        e = s.lower().split()
+        if nfa.get(e[0]):
+            nfa[e[0]].append((e[1], e[2]))
+        else:
+            nfa[e[0]] = [(e[1], e[2])]
+        lit.add(e[2])
+    lit.remove(EPS)
+    liter = list(lit)
+    liter.sort()
+    q = [eps_closure(nfa, set([START]))]
+    status = [q[0]]
+    dfa_str = ''
+    dfa = {}
+    end_node = []
+    mid_node = []
+    while q:
+        now = q.pop(0)
+        i = status.index(now)
+        now_index = '%d' % i
+        end_str = ''
+        if END in now:
+            end_str = '*'
+            end_node.append(i)
+        else:
+            mid_node.append(i)
+        write(str_set(now) + ' ')
+        dfa_str += now_index + end_str + ' '
+        next_dict = {}
+        for c in liter:
+            next = eps_closure(nfa, next_set(nfa, now, c))
+            if not next in status and next:
+                q.append(next)
+                status.append(next)
+            j = status.index(next) if next else -1
+            next_index = '%d' % j
+            write(str_set(next) + ' ')
+            dfa_str += next_index + ' '
+            next_dict[c] = j
+        write('\n')
+        dfa_str += '\n'
+        dfa[i] = next_dict
+    write('\ns %s\n%s\n' % (' '.join(liter), dfa_str))
+    q = [[end_node, True], [mid_node, True]]
+    fresh = True
+    while fresh:
+        now = q[0]
+        for c in liter:
+            next = {}
+            for i in now[0]:
+                if dfa[i][c] == -1:
+                    if next.get(-1):
+                        next[-1].append(i)
+                    else:
+                        next[-1] = [i]
+                else:
+                    j = 0
+                    for x in q:
+                        if dfa[i][c] in x[0]:
+                            if next.get(j):
+                                next[j].append(i)
+                            else:
+                                next[j] = [i]
+                        j += 1
+            splited = True
+            now_split = next.values()
+            if now[0] in now_split:
+                splited = False
+            else:
+                for x in now_split:
+                    q.append([x, True])
+                break
+        q.pop(0)
+        if not splited:
+            q.append([now[0], False])
+        fresh = False
+        for x in q:
+            if x[1] == True:
+                fresh = True
+                break
+    split = [x for x, y in q]
+    split.sort()
+    write(str(split).replace('[', '{').replace(']', '}') + '\n')
+    for x in split:
+        if len(x) > 1:
+            rep = x[0]
+            for i in xrange(1, len(x)):
+                for j in dfa:
+                    for c in liter:
+                        if dfa[j][c] == x[i]:
+                            dfa[j][c] = rep
+                del dfa[x[i]]
+    write('\ns %s\n' % (' '.join(liter)))
+    for i in dfa:
+        write('%d%s ' % (i, '*' if i in end_node else ''))
+        for c in liter:
+            write('%d ' % dfa[i][c])
+        write('\n')
+    fin.close()
+    fout.close()
+
+if __name__ == '__main__':
+    import os
+    now_dir = os.path.dirname(os.path.realpath(__file__))
+    files = [x for x in os.listdir(now_dir) if os.path.isfile(x) and x.endswith('txt') and x.startswith('nfa_')]
+    for x in files:
+        fin = open(x, 'r')
+        fout = open(x.replace('nfa_', 'dfa_'), 'w')
+        main()