Skip to content

Instantly share code, notes, and snippets.

@Islidius
Created August 21, 2019 11:40
Show Gist options
  • Select an option

  • Save Islidius/e036a9f261b18f9132cf26ef5a9a06c9 to your computer and use it in GitHub Desktop.

Select an option

Save Islidius/e036a9f261b18f9132cf26ef5a9a06c9 to your computer and use it in GitHub Desktop.

Revisions

  1. Islidius created this gist Aug 21, 2019.
    278 changes: 278 additions & 0 deletions firstandfollowsets.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,278 @@
    // 'ε' symbol for empty symbol
    const EPSILON = 'ε';

    // this is a set that tracks the fact it contains 'ε' seperatly
    class CustomSet {
    constructor() {
    this.set = new Set();
    this.eps = false;
    }

    addTerminal(terminal) {
    this.set.add(terminal);
    }

    union(other) {
    for(let k of other.set) {
    this.set.add(k);
    }
    }

    setEpsilon() {
    this.eps = true;
    }

    hasEpsilon() {
    return this.eps;
    }

    asArray() {
    if(this.hasEpsilon()) return [...this.set, EPSILON];
    return [...this.set];
    }
    }

    // simple graph wrapper
    class Graph {
    constructor() {
    this.graph = {};
    }

    addLink(from, to) {
    if(this.graph[from] === undefined) {
    this.graph[from] = [];
    }
    this.graph[from].push(to);
    }

    getLink(from) {
    return this.graph[from] ? this.graph[from] : [];
    }
    }

    const isTerminal = (symbol) => !/[A-Z]/.test(symbol);
    const isNonTerminal = (symbol) => /[A-Z]/.test(symbol);
    const isEpsilon = (symbol) => symbol === EPSILON;

    const getLHS = (production) =>
    production.split('->')[0].replace(/\s+/g, '');

    const getRHS = (production) =>
    production.split('->')[1].replace(/\s+/g, '');

    const getProductionsForSymbol = (symbol, grammar) =>
    grammar.filter((p) => getLHS(p) === symbol);

    const getProductionsWithSymbol = (symbol, grammar) =>
    grammar.filter((p) => getRHS(p).indexOf(symbol) !== -1);

    const generateFirst = (symbol, firstSets, grammar) => {
    if(firstSets[symbol]) return;

    let first = firstSets[symbol] = new CustomSet;

    // the first set of a terminal symbol is just the terminal
    if(isTerminal(symbol)) {
    first.addTerminal(symbol);
    return;
    }

    for(let production of getProductionsForSymbol(symbol, grammar)) {
    let rhs = getRHS(production);

    let hasAtLeastOneTerminal = false;

    for(let prodSymbol of rhs) {

    // if production is 'ε' then the first also contains it
    if(prodSymbol === EPSILON) {
    first.setEpsilon();
    break;
    }

    // get the first of the current symbol
    generateFirst(prodSymbol, firstSets, grammar);

    let firstOfNonTerminal = firstSets[prodSymbol];
    first.union(firstOfNonTerminal);

    // if the first of the current symbol does not contain 'ε' break
    if(!firstOfNonTerminal.hasEpsilon()) {
    hasAtLeastOneTerminal = true;
    break;
    }
    }

    // remember that if all non-terminals contain 'ε' first also contains 'ε'
    if(!hasAtLeastOneTerminal) {
    first.setEpsilon();
    }
    }
    };

    const buildFirst = (grammar) => {
    let firstSets = {};
    for(let prod of grammar) {
    generateFirst(getLHS(prod), firstSets, grammar);
    }
    return firstSets;
    };

    // this follow pass does only copy in the first sets and builds the follow graph
    const generateFollow = (symbol, followSets, firstSets, graph, grammar) => {
    if(followSets[symbol]) return;

    let follow = followSets[symbol] = new CustomSet;

    for(let production of getProductionsWithSymbol(symbol, grammar)) {
    let rhs = getRHS(production);

    let symbolIndex = 0;
    while(true) {
    // find the location of the symbol in question
    symbolIndex = rhs.indexOf(symbol, symbolIndex);

    // was not found
    if(symbolIndex === -1) break;

    // symbol after the symbol
    symbolIndex += 1;

    while(true) {
    // symbol is the last one. This means FOLLOW(symbol) = FOLLOW(getLHS(production))
    // if we copy here we run in caching problems, so just add it
    // to the graph and later propagate it
    if(symbolIndex === rhs.length) {
    graph.addLink(symbol, getLHS(production));
    break;
    }

    let followSymbol = rhs[symbolIndex];
    generateFirst(followSymbol, firstSets, grammar);
    let firstOfFollow = firstSets[followSymbol];
    follow.union(firstOfFollow);

    // break if the FIRST does not contain 'ε'
    if(!firstOfFollow.hasEpsilon()) break;

    symbolIndex += 1;
    }
    }
    }
    };

    // propagate the FOLLOW sets using the graph previously build
    // this employs a simple breath first search of the graph and
    // unions the sets.
    const propagateFollow = (symbol, followSets, graph) => {
    let visited = [symbol];
    let current = [symbol];
    while(current.length !== 0) {
    let next = [];
    for(let c of current) {
    for(let n of graph.getLink(c)) {
    if(!visited.includes(n)) {
    visited.push(n);
    next.push(n);

    followSets[symbol].union(followSets[n]);
    }
    }
    }
    current = next;
    }
    };

    // first build the follow sets and graph, the propagate the follow
    // set using the graph.
    const buildFollow = (grammar, firstSets, startingSymbol) => {
    let followSets = {};
    let graph = new Graph;
    for(let prod of grammar) {
    generateFollow(getLHS(prod), followSets, firstSets, graph, grammar);
    }

    // add the end of input to the starting symbol
    followSets[startingSymbol].addTerminal('$');

    for(let prod of grammar) {
    propagateFollow(getLHS(prod), followSets, graph);
    }

    return followSets;
    };

    const printGrammar = (grammar) => {
    for(let k of grammar) {
    console.log(' ', k);
    }
    };

    const printSet = (set) => {
    for(let k in set) {
    console.log(' ', k, ':', set[k].asArray());
    }
    };

    const buildAndPrint = (grammar, startingSymbol) => {
    let firstSets = buildFirst(grammar);
    let followSets = buildFollow(grammar, firstSets, startingSymbol);

    console.log('Grammar:\n');
    printGrammar(grammar);
    console.log('');

    console.log('First sets:\n');
    printSet(firstSets);
    console.log('');

    console.log('Follow sets:\n');
    printSet(followSets);
    console.log('');
    }

    let grammar = [
    'S -> F',
    'S -> (S + F)',
    'F -> a'
    ];

    buildAndPrint(grammar, 'S');

    grammar = [
    'E -> TX',
    'X -> +TX',
    'X -> ε',
    'T -> FY',
    'Y -> *FY',
    'Y -> ε',
    'F -> a',
    'F -> (E)',
    ];

    buildAndPrint(grammar, 'E');

    grammar = [
    'S -> AB',
    'A -> ε',
    'B -> ε'
    ];

    buildAndPrint(grammar, 'S');

    grammar = [
    'E -> aT',
    'E -> ε',
    'T -> bE',
    'T -> ε',
    'S -> Ec'
    ];

    buildAndPrint(grammar, 'S');

    grammar = [
    'E -> EE',
    'E -> a'
    ];

    buildAndPrint(grammar, 'E');