Created
August 21, 2019 11:40
-
-
Save Islidius/e036a9f261b18f9132cf26ef5a9a06c9 to your computer and use it in GitHub Desktop.
Revisions
-
Islidius created this gist
Aug 21, 2019 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,278 @@ // 'ε' symbol for empty symbol const EPSILON = 'ε'; // this is a set that tracks the fact it contains 'ε' seperatly class CustomSet { constructor() { this.set = new Set(); this.eps = false; } addTerminal(terminal) { this.set.add(terminal); } union(other) { for(let k of other.set) { this.set.add(k); } } setEpsilon() { this.eps = true; } hasEpsilon() { return this.eps; } asArray() { if(this.hasEpsilon()) return [...this.set, EPSILON]; return [...this.set]; } } // simple graph wrapper class Graph { constructor() { this.graph = {}; } addLink(from, to) { if(this.graph[from] === undefined) { this.graph[from] = []; } this.graph[from].push(to); } getLink(from) { return this.graph[from] ? this.graph[from] : []; } } const isTerminal = (symbol) => !/[A-Z]/.test(symbol); const isNonTerminal = (symbol) => /[A-Z]/.test(symbol); const isEpsilon = (symbol) => symbol === EPSILON; const getLHS = (production) => production.split('->')[0].replace(/\s+/g, ''); const getRHS = (production) => production.split('->')[1].replace(/\s+/g, ''); const getProductionsForSymbol = (symbol, grammar) => grammar.filter((p) => getLHS(p) === symbol); const getProductionsWithSymbol = (symbol, grammar) => grammar.filter((p) => getRHS(p).indexOf(symbol) !== -1); const generateFirst = (symbol, firstSets, grammar) => { if(firstSets[symbol]) return; let first = firstSets[symbol] = new CustomSet; // the first set of a terminal symbol is just the terminal if(isTerminal(symbol)) { first.addTerminal(symbol); return; } for(let production of getProductionsForSymbol(symbol, grammar)) { let rhs = getRHS(production); let hasAtLeastOneTerminal = false; for(let prodSymbol of rhs) { // if production is 'ε' then the first also contains it if(prodSymbol === EPSILON) { first.setEpsilon(); break; } // get the first of the current symbol generateFirst(prodSymbol, firstSets, grammar); let firstOfNonTerminal = firstSets[prodSymbol]; first.union(firstOfNonTerminal); // if the first of the current symbol does not contain 'ε' break if(!firstOfNonTerminal.hasEpsilon()) { hasAtLeastOneTerminal = true; break; } } // remember that if all non-terminals contain 'ε' first also contains 'ε' if(!hasAtLeastOneTerminal) { first.setEpsilon(); } } }; const buildFirst = (grammar) => { let firstSets = {}; for(let prod of grammar) { generateFirst(getLHS(prod), firstSets, grammar); } return firstSets; }; // this follow pass does only copy in the first sets and builds the follow graph const generateFollow = (symbol, followSets, firstSets, graph, grammar) => { if(followSets[symbol]) return; let follow = followSets[symbol] = new CustomSet; for(let production of getProductionsWithSymbol(symbol, grammar)) { let rhs = getRHS(production); let symbolIndex = 0; while(true) { // find the location of the symbol in question symbolIndex = rhs.indexOf(symbol, symbolIndex); // was not found if(symbolIndex === -1) break; // symbol after the symbol symbolIndex += 1; while(true) { // symbol is the last one. This means FOLLOW(symbol) = FOLLOW(getLHS(production)) // if we copy here we run in caching problems, so just add it // to the graph and later propagate it if(symbolIndex === rhs.length) { graph.addLink(symbol, getLHS(production)); break; } let followSymbol = rhs[symbolIndex]; generateFirst(followSymbol, firstSets, grammar); let firstOfFollow = firstSets[followSymbol]; follow.union(firstOfFollow); // break if the FIRST does not contain 'ε' if(!firstOfFollow.hasEpsilon()) break; symbolIndex += 1; } } } }; // propagate the FOLLOW sets using the graph previously build // this employs a simple breath first search of the graph and // unions the sets. const propagateFollow = (symbol, followSets, graph) => { let visited = [symbol]; let current = [symbol]; while(current.length !== 0) { let next = []; for(let c of current) { for(let n of graph.getLink(c)) { if(!visited.includes(n)) { visited.push(n); next.push(n); followSets[symbol].union(followSets[n]); } } } current = next; } }; // first build the follow sets and graph, the propagate the follow // set using the graph. const buildFollow = (grammar, firstSets, startingSymbol) => { let followSets = {}; let graph = new Graph; for(let prod of grammar) { generateFollow(getLHS(prod), followSets, firstSets, graph, grammar); } // add the end of input to the starting symbol followSets[startingSymbol].addTerminal('$'); for(let prod of grammar) { propagateFollow(getLHS(prod), followSets, graph); } return followSets; }; const printGrammar = (grammar) => { for(let k of grammar) { console.log(' ', k); } }; const printSet = (set) => { for(let k in set) { console.log(' ', k, ':', set[k].asArray()); } }; const buildAndPrint = (grammar, startingSymbol) => { let firstSets = buildFirst(grammar); let followSets = buildFollow(grammar, firstSets, startingSymbol); console.log('Grammar:\n'); printGrammar(grammar); console.log(''); console.log('First sets:\n'); printSet(firstSets); console.log(''); console.log('Follow sets:\n'); printSet(followSets); console.log(''); } let grammar = [ 'S -> F', 'S -> (S + F)', 'F -> a' ]; buildAndPrint(grammar, 'S'); grammar = [ 'E -> TX', 'X -> +TX', 'X -> ε', 'T -> FY', 'Y -> *FY', 'Y -> ε', 'F -> a', 'F -> (E)', ]; buildAndPrint(grammar, 'E'); grammar = [ 'S -> AB', 'A -> ε', 'B -> ε' ]; buildAndPrint(grammar, 'S'); grammar = [ 'E -> aT', 'E -> ε', 'T -> bE', 'T -> ε', 'S -> Ec' ]; buildAndPrint(grammar, 'S'); grammar = [ 'E -> EE', 'E -> a' ]; buildAndPrint(grammar, 'E');