Skip to content

Instantly share code, notes, and snippets.

@pconerly
Created August 2, 2020 00:06
Show Gist options
  • Select an option

  • Save pconerly/60d26db96414422c00b6f95107e1e537 to your computer and use it in GitHub Desktop.

Select an option

Save pconerly/60d26db96414422c00b6f95107e1e537 to your computer and use it in GitHub Desktop.

Revisions

  1. pconerly created this gist Aug 2, 2020.
    66 changes: 66 additions & 0 deletions parser.h
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,66 @@
    #pragma once

    #include "defs.h"



    struct StringParser {
    const char *pos, *end;

    char peek() { return *pos; }
    bool atEnd() { return pos >= end; }
    void skip(int skip = 1) { pos += skip; }
    void rewind(int skip = 1) { pos -= skip; }

    bool accept(char c) {
    if (*pos == c) {
    ++pos;
    return true;
    }
    return false;
    }
    bool accept(const char* str) {
    auto start = pos;
    while (*str) {
    if (!accept(*str)) {
    pos = start;
    return false;
    }
    ++str;
    }
    return true;
    }
    bool acceptAnyOf(const char *str, char *cout = nullptr) {
    while (*str) {
    if (accept(*str)) {
    if (cout) {
    *cout = *str;
    }
    return true;
    }
    ++str;
    }
    return false;
    }
    bool acceptInRanges(std::initializer_list<std::initializer_list<char>> ranges, char *cout = nullptr) {
    auto r = ranges.begin();
    for (auto&&range : ranges) {
    if (range.size() == 1) {
    if (*pos == *range.begin()) {
    if (cout) { *cout = *pos; }
    ++pos;
    return true;
    }
    }
    else if (range.size() == 2) {
    if (*pos >= *range.begin() && *pos <= *(range.begin() + 1)) {
    if (cout) { *cout = *pos; }
    ++pos;
    return true;
    }
    }
    }

    return false;
    }
    };
    189 changes: 189 additions & 0 deletions vex.cpp
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,189 @@

    static bool _acceptSigil(StringParser& p) {
    while (!p.atEnd()) {
    if (p.accept('@')) return true;
    p.skip();
    }
    return false;
    }

    static bool _acceptTagCharacter(StringParser& p) {
    return p.acceptInRanges({ { 'a', 'z' },{ 'A', 'Z' },{ '0', '9' },{ '_' },{'.'} });
    }

    static bool _acceptTag(StringParser& p) {
    auto start = p.pos;
    while (!p.atEnd() && _acceptTagCharacter(p));
    return p.pos != start;
    }

    static void _parseSpan(VexSpan span, VexNode* parent);

    static void _parseNode(VexNode* node, StringParser &p) {
    node->tag.begin = p.pos;
    _acceptTag(p);
    node->tag.end = p.pos;

    bool parseBody = false;

    if (p.atEnd()) {
    node->body = { p.pos, p.pos };
    }
    else {
    if (p.accept(':')) { // body is until line ending
    node->body.begin = p.pos;
    while (!p.atEnd() && p.peek() != '\n') p.skip();
    node->body.end = p.pos;
    parseBody = true;
    }
    else if (p.accept('=')) { // body is next tag
    node->body.begin = p.pos;
    _acceptTag(p);
    node->body.end = p.pos;
    }
    else if (p.accept('{')) { // start scope
    int bracketCount = 1;
    while (p.accept('{')) { ++bracketCount; }

    node->body.begin = p.pos;

    // find end
    int scopeStack = 1;
    while (true) {
    if (p.atEnd()) {
    node->body.end = p.pos;
    break;
    }

    if (p.accept('}')) {
    int newbracketCount = 1;
    while (newbracketCount < bracketCount && p.accept('}')) { ++newbracketCount; }
    if (newbracketCount == bracketCount) {
    --scopeStack;
    }

    if (!scopeStack) {
    node->body.end = p.pos - bracketCount;
    break;
    }
    }
    else if (p.accept('{')) {
    int newbracketCount = 1;
    while (newbracketCount < bracketCount && p.accept('{')) { ++newbracketCount; }
    if (newbracketCount == bracketCount) {
    ++scopeStack;
    }
    }
    else {
    p.skip();
    }
    }
    parseBody = true;
    }
    else if (p.accept('(')) { // everythings a node
    int bracketCount = 1;
    while (p.accept('(')) { ++bracketCount; }

    node->body.begin = p.pos;
    int scopeStack = 1;

    // find end parens
    while (true) {
    if (p.atEnd()) {
    node->body.end = p.pos;
    break;
    }

    if (p.accept(')')) {
    int newbracketCount = 1;
    while (newbracketCount < bracketCount && p.accept(')')) { ++newbracketCount; }
    if (newbracketCount == bracketCount) {
    --scopeStack;
    }
    if (!scopeStack) {
    node->body.end = p.pos - bracketCount;
    break;
    }
    }
    else if (p.accept('(')) {
    int newbracketCount = 1;
    while (newbracketCount < bracketCount && p.accept('(')) { ++newbracketCount; }
    if (newbracketCount == bracketCount) {
    ++scopeStack;
    }
    }
    else {
    p.skip();
    }
    }

    // now parse the inner span for nodes
    StringParser subp = { node->body.begin, node->body.end };
    VexNode* lastChild = nullptr;

    while (!subp.atEnd()) {
    VexNode child;
    child.span.begin = subp.pos;
    _parseNode(&child, subp);

    // if the parse found a valid node, alloc it and link it up
    if (child.span.end > child.span.begin) {
    auto childPtr = new VexNode(child);
    if (!lastChild) { node->children = childPtr; }
    else { lastChild->next = childPtr; }
    lastChild = childPtr;
    }

    // skip to next tag
    while (!subp.atEnd()) { // the sad vex mustache man
    if (_acceptTagCharacter(subp) || subp.acceptAnyOf(":={(")) {
    subp.rewind();
    break;
    }
    subp.skip();
    }
    }
    }
    else { // this node is done
    node->body = { p.pos, p.pos };
    }
    }

    node->span.end = p.pos;
    if (parseBody) {
    _parseSpan(node->body, node);
    }
    }


    static void _parseSpan(VexSpan span, VexNode* parent) {
    VexNode* lastChild = nullptr;

    StringParser p = { span.begin, span.end };

    while (_acceptSigil(p)) {
    auto newNode = new VexNode();

    if (!lastChild) {// first child
    parent->children = newNode;
    }
    else {
    lastChild->next = newNode;
    }
    lastChild = newNode;
    newNode->span.begin = p.pos - 1; //include sigil into span

    _parseNode(newNode, p);
    }
    }

    VexNode* vexCreate(const char* docStart, const char* docEnd) {
    auto out = new VexNode();
    out->span = out->body = { docStart, docEnd };
    out->children = out->next = nullptr;
    out->tag = { docStart, docStart };

    _parseSpan(out->body, out);

    return out;
    }