Skip to content

Instantly share code, notes, and snippets.

@linusyang
Created April 9, 2013 17:35
Show Gist options
  • Save linusyang/5347699 to your computer and use it in GitHub Desktop.
Save linusyang/5347699 to your computer and use it in GitHub Desktop.

Revisions

  1. linusyang created this gist Apr 9, 2013.
    229 changes: 229 additions & 0 deletions dfa.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,229 @@
    #!/usr/bin/env python
    #
    # By Linus Yang <[email protected]>
    # Written on iPad using Textastic Code Editor
    # Licensed under GPL v3
    #

    '''
    Function:
    Transform an NFA to DFA then simplify it
    Limitation:
    - input file should be named as 'nfa_*.txt'
    - only one start node named x and end node named y in NFA
    - only one literal on the arrow is allowed in NFA
    - use 'e' to represent epsilon
    Input Sample: (nfa_0.txt)
    x 5 e
    5 5 a
    5 5 b
    5 1 e
    1 3 a
    1 4 b
    3 2 a
    4 2 b
    2 6 e
    6 6 a
    6 6 b
    6 y e
    Output Sample: (dfa_0.txt)
    {X, 1, 5} {1, 3, 5} {1, 4, 5}
    {1, 3, 5} {1, 2, 3, 5, 6, Y} {1, 4, 5}
    {1, 4, 5} {1, 3, 5} {1, 2, 4, 5, 6, Y}
    {1, 2, 3, 5, 6, Y} {1, 2, 3, 5, 6, Y} {1, 4, 5, 6, Y}
    {1, 2, 4, 5, 6, Y} {1, 3, 5, 6, Y} {1, 2, 4, 5, 6, Y}
    {1, 4, 5, 6, Y} {1, 3, 5, 6, Y} {1, 2, 4, 5, 6, Y}
    {1, 3, 5, 6, Y} {1, 2, 3, 5, 6, Y} {1, 4, 5, 6, Y}
    s a b
    0 1 2
    1 3 2
    2 1 4
    3* 3 5
    4* 6 4
    5* 6 4
    6* 3 5
    {{0}, {1}, {2}, {3, 4, 5, 6}}
    s a b
    0 1 2
    1 3 2
    2 1 3
    3* 3 3
    '''

    EPS = 'e'
    START = 'x'
    END = 'y'
    fin = None
    fout = None

    def write(s):
    if fout:
    fout.write(s)
    else:
    print(s),

    def str_set(s):
    x = list(s)
    if x:
    y = []
    res = '{'
    c = []
    for i in x:
    if i.isdigit():
    y.append(int(i))
    else:
    c.append(i)
    y.sort()
    if START in c:
    res += START.upper() + ', '
    for i in y:
    res += '%d' % i + ', '
    if END in c:
    res += END.upper() + '}'
    else:
    res = res[:-2] + '}'
    return res
    return '{}'

    def eps_closure(nfa, node_set):
    if node_set == set([]):
    return node_set
    res = node_set.copy()
    for node in node_set:
    next_list = nfa.get(node)
    if next_list:
    for next in next_list:
    if next[1] == EPS:
    res.add(next[0])
    if next[0] != node:
    res |= eps_closure(nfa, set([next[0]]))
    return res

    def next_set(nfa, now_set, c):
    res = set([])
    for node in now_set:
    next_list = nfa.get(node)
    if next_list:
    for next in next_list:
    if next[1] == c:
    res.add(next[0])
    return res

    def main():
    nfa = {}
    lit = set([])
    for s in fin:
    e = s.lower().split()
    if nfa.get(e[0]):
    nfa[e[0]].append((e[1], e[2]))
    else:
    nfa[e[0]] = [(e[1], e[2])]
    lit.add(e[2])
    lit.remove(EPS)
    liter = list(lit)
    liter.sort()
    q = [eps_closure(nfa, set([START]))]
    status = [q[0]]
    dfa_str = ''
    dfa = {}
    end_node = []
    mid_node = []
    while q:
    now = q.pop(0)
    i = status.index(now)
    now_index = '%d' % i
    end_str = ''
    if END in now:
    end_str = '*'
    end_node.append(i)
    else:
    mid_node.append(i)
    write(str_set(now) + ' ')
    dfa_str += now_index + end_str + ' '
    next_dict = {}
    for c in liter:
    next = eps_closure(nfa, next_set(nfa, now, c))
    if not next in status and next:
    q.append(next)
    status.append(next)
    j = status.index(next) if next else -1
    next_index = '%d' % j
    write(str_set(next) + ' ')
    dfa_str += next_index + ' '
    next_dict[c] = j
    write('\n')
    dfa_str += '\n'
    dfa[i] = next_dict
    write('\ns %s\n%s\n' % (' '.join(liter), dfa_str))
    q = [[end_node, True], [mid_node, True]]
    fresh = True
    while fresh:
    now = q[0]
    for c in liter:
    next = {}
    for i in now[0]:
    if dfa[i][c] == -1:
    if next.get(-1):
    next[-1].append(i)
    else:
    next[-1] = [i]
    else:
    j = 0
    for x in q:
    if dfa[i][c] in x[0]:
    if next.get(j):
    next[j].append(i)
    else:
    next[j] = [i]
    j += 1
    splited = True
    now_split = next.values()
    if now[0] in now_split:
    splited = False
    else:
    for x in now_split:
    q.append([x, True])
    break
    q.pop(0)
    if not splited:
    q.append([now[0], False])
    fresh = False
    for x in q:
    if x[1] == True:
    fresh = True
    break
    split = [x for x, y in q]
    split.sort()
    write(str(split).replace('[', '{').replace(']', '}') + '\n')
    for x in split:
    if len(x) > 1:
    rep = x[0]
    for i in xrange(1, len(x)):
    for j in dfa:
    for c in liter:
    if dfa[j][c] == x[i]:
    dfa[j][c] = rep
    del dfa[x[i]]
    write('\ns %s\n' % (' '.join(liter)))
    for i in dfa:
    write('%d%s ' % (i, '*' if i in end_node else ''))
    for c in liter:
    write('%d ' % dfa[i][c])
    write('\n')
    fin.close()
    fout.close()

    if __name__ == '__main__':
    import os
    now_dir = os.path.dirname(os.path.realpath(__file__))
    files = [x for x in os.listdir(now_dir) if os.path.isfile(x) and x.endswith('txt') and x.startswith('nfa_')]
    for x in files:
    fin = open(x, 'r')
    fout = open(x.replace('nfa_', 'dfa_'), 'w')
    main()