# The final version. class Node: def __init__(self, L, j, children): self.L, self.i, self.children = L, j, children def __repr__(self): return str((self.L, self.i, self.children)) class GSS: def __init__(self): self.gss, self.P = {}, {} def get(self, L, i, children): my_label = (L, i) if my_label not in self.gss: self.gss[my_label] = Node(L, i, children) assert my_label not in self.P self.P[my_label] = [] return self.gss[my_label] def add_to_P(self, u, j): label = (u.L, u.i) self.P[label].append(j) def __repr__(self): return str(self.gss) class GLLParser: def create(self, L, u, j): v = self.gss.get(L, j, [u]) if u not in v.children: v.children.append(u) label = (L, j) for k in self.gss.P[label]: self.add(L, u, k) return v def add(self, L, u, j): if (L, u) not in self.U[j]: self.U[j].append((L, u)) assert (L,u,j) not in self.R self.R.append((L, u, j)) def pop(self, u, j): if u != self.u0: self.gss.add_to_P(u, j) for v in u.children: self.add(u.L, v, j) return u def __init__(self, input_str): self.R = [] self.gss = GSS() self.I = input_str self.m = len(self.I) # |I| + 1 self.u1 = self.gss.get('L0', 0, []) self.u0 = self.gss.get('$', self.m, []) self.u1.children.append(self.u0) self.U = [] for j in range(self.m): # 0<=j<=m self.U.append([]) # U_j = empty def compile_terminal(key, n_alt, r_pos, r_len, token): if r_len == r_pos: Lnxt = '_' else: Lnxt = '%s[%d]_%d' % (key, n_alt, r_pos+1) return ''' elif L == '%s[%d]_%d': if parser.I[i] == '%s': i = i+1 L = '%s' else: L = 'L0' continue ''' % (key, n_alt, r_pos, token, Lnxt) def compile_nonterminal(key, n_alt, r_pos, r_len, token): if r_len == r_pos: Lnxt = '_' else: Lnxt = '%s[%d]_%d' % (key, n_alt, r_pos+1) return ''' elif L == '%s[%d]_%d': c_u = parser.create('%s', c_u, i) L = '%s' continue ''' % (key, n_alt, r_pos, Lnxt, token) def compile_rule(key, n_alt, rule): res = [] for i, t in enumerate(rule): if (t[0],t[-1]) == ('<', '>'): r = compile_nonterminal(key, n_alt, i, len(rule), t) else: r = compile_terminal(key, n_alt, i, len(rule), t) res.append(r) res.append(''' elif L == '%s[%d]_%d': L = 'L_' continue ''' % (key, n_alt, len(rule))) return '\n'.join(res) def compile_def(key, definition): res = [] res.append(''' elif L == '%s': ''' % key) for n_alt,rule in enumerate(definition): res.append(''' parser.add( '%s[%d]_0', c_u, i)''' % (key, n_alt)) res.append(''' # def L = 'L0' continue''') for n_alt,rule in enumerate(definition): r = compile_rule(key, n_alt, rule) res.append(r) return '\n'.join(res) def compile_grammar(g, start): res = [''' def parse_string(parser): c_u = parser.u1 i = 0 L = '%s' # starting state while True: if L == 'L0': if parser.R: (L, c_u, i), *parser.R = parser.R continue else: if ('L0', parser.u0) in parser.U[parser.m-1]: return 'success' else: return 'failed' elif L == 'L_': c_u = parser.pop(c_u, i) L = 'L0' continue ''' % start] for k in g: r = compile_def(k, g[k]) res.append(r) res.append(''' else: assert False if __name__ == '__main__': import simplefuzzer G = { '': [ ['', '', 'd'], ['', ''], ['g', 'p', ''], []], '': [['a'], ['c']], '': [['a'], ['b']], '': ['c'] } import sys import gll mystr = sys.argv[1] g = gll.GLLParser(mystr) assert parse_string(g) == 'success' gf = simplefuzzer.LimitFuzzer(G) for i in range(1000): s = gf.iter_fuzz(key='', max_depth=100) print(s) g = gll.GLLParser(s+'$') assert parse_string(g) == 'success' print(g.gss) ''') return '\n'.join(res) if __name__ == '__main__': import simplefuzzer G = { '': [ ['', '', 'd'], ['', ''], ['g', 'p', ''], []], '': [['a'], ['c']], '': [['a'], ['b']], '': ['c'] } res = compile_grammar(G, '') print(res)