Skip to content

Instantly share code, notes, and snippets.

@mrcrilly
Created September 4, 2024 01:57
Show Gist options
  • Select an option

  • Save mrcrilly/65d48c9f4a6db204cf25628d82f8d3aa to your computer and use it in GitHub Desktop.

Select an option

Save mrcrilly/65d48c9f4a6db204cf25628d82f8d3aa to your computer and use it in GitHub Desktop.

Revisions

  1. mrcrilly created this gist Sep 4, 2024.
    204 changes: 204 additions & 0 deletions ml_lexer.c
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,204 @@
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <ctype.h>

    #define MAX_TOKENS 1000
    #define MAX_TOKEN_LENGTH 100

    typedef enum {
    TOKEN_IDENTIFIER,
    TOKEN_NUMBER,
    TOKEN_ASSIGN,
    TOKEN_PRINT,
    TOKEN_RETURN,
    TOKEN_MULTIPLY,
    TOKEN_ADD,
    TOKEN_SUBTRACT,
    TOKEN_DIVIDE,
    TOKEN_LPAREN,
    TOKEN_RPAREN,
    TOKEN_FUNCTION,
    TOKEN_EOF
    } TokenType;

    typedef struct {
    TokenType type;
    char value[MAX_TOKEN_LENGTH];
    } Token;

    Token tokens[MAX_TOKENS];
    int tokenIndex = 0;
    int currentToken = 0;

    void tokenize(const char *input) {
    const char *p = input;
    while (*p) {
    if (isspace(*p)) {
    p++;
    } else if (isdigit(*p)) {
    tokens[tokenIndex].type = TOKEN_NUMBER;
    int len = 0;
    while (isdigit(*p)) {
    tokens[tokenIndex].value[len++] = *p++;
    }
    tokens[tokenIndex].value[len] = '\0';
    tokenIndex++;
    } else if (isalpha(*p)) {
    int len = 0;
    while (isalnum(*p)) {
    tokens[tokenIndex].value[len++] = *p++;
    }
    tokens[tokenIndex].value[len] = '\0';
    if (strcmp(tokens[tokenIndex].value, "print") == 0) {
    tokens[tokenIndex].type = TOKEN_PRINT;
    } else if (strcmp(tokens[tokenIndex].value, "return") == 0) {
    tokens[tokenIndex].type = TOKEN_RETURN;
    } else if (strcmp(tokens[tokenIndex].value, "function") == 0) {
    tokens[tokenIndex].type = TOKEN_FUNCTION;
    } else {
    tokens[tokenIndex].type = TOKEN_IDENTIFIER;
    }
    tokenIndex++;
    } else if (*p == '<' && *(p+1) == '-') {
    tokens[tokenIndex].type = TOKEN_ASSIGN;
    strcpy(tokens[tokenIndex].value, "<-");
    tokenIndex++;
    p += 2;
    } else if (*p == '+') {
    tokens[tokenIndex].type = TOKEN_ADD;
    strcpy(tokens[tokenIndex].value, "+");
    tokenIndex++;
    p++;
    } else if (*p == '-') {
    tokens[tokenIndex].type = TOKEN_SUBTRACT;
    strcpy(tokens[tokenIndex].value, "-");
    tokenIndex++;
    p++;
    } else if (*p == '*') {
    tokens[tokenIndex].type = TOKEN_MULTIPLY;
    strcpy(tokens[tokenIndex].value, "*");
    tokenIndex++;
    p++;
    } else if (*p == '/') {
    tokens[tokenIndex].type = TOKEN_DIVIDE;
    strcpy(tokens[tokenIndex].value, "/");
    tokenIndex++;
    p++;
    } else if (*p == '(') {
    tokens[tokenIndex].type = TOKEN_LPAREN;
    strcpy(tokens[tokenIndex].value, "(");
    tokenIndex++;
    p++;
    } else if (*p == ')') {
    tokens[tokenIndex].type = TOKEN_RPAREN;
    strcpy(tokens[tokenIndex].value, ")");
    tokenIndex++;
    p++;
    } else {
    printf("Unexpected character: %c\n", *p);
    exit(1);
    }
    }
    tokens[tokenIndex].type = TOKEN_EOF;
    }

    #include <stdio.h>

    void parseProgram();
    void parseStatement();
    void parseExpression();

    void parseProgram() {
    while (tokens[currentToken].type != TOKEN_EOF) {
    parseStatement();
    }
    }

    void parseStatement() {
    if (tokens[currentToken].type == TOKEN_IDENTIFIER) {
    printf("%s = ", tokens[currentToken].value);
    currentToken++; // consume identifier
    if (tokens[currentToken].type == TOKEN_ASSIGN) {
    currentToken++; // consume <-
    parseExpression();
    printf(";\n");
    }
    } else if (tokens[currentToken].type == TOKEN_PRINT) {
    currentToken++; // consume print
    printf("printf(\"%%d\\n\", ");
    parseExpression();
    printf(");\n");
    } else if (tokens[currentToken].type == TOKEN_RETURN) {
    currentToken++; // consume return
    printf("return ");
    parseExpression();
    printf(";\n");
    } else {
    printf("Unexpected statement.\n");
    exit(1);
    }
    }

    void parseExpression() {
    parseTerm();
    while (tokens[currentToken].type == TOKEN_ADD || tokens[currentToken].type == TOKEN_SUBTRACT) {
    if (tokens[currentToken].type == TOKEN_ADD) {
    printf(" + ");
    } else if (tokens[currentToken].type == TOKEN_SUBTRACT) {
    printf(" - ");
    }
    currentToken++; // consume + or -
    parseTerm();
    }
    }

    void parseTerm() {
    parseFactor();
    while (tokens[currentToken].type == TOKEN_MULTIPLY || tokens[currentToken].type == TOKEN_DIVIDE) {
    if (tokens[currentToken].type == TOKEN_MULTIPLY) {
    printf(" * ");
    } else if (tokens[currentToken].type == TOKEN_DIVIDE) {
    printf(" / ");
    }
    currentToken++; // consume * or /
    parseFactor();
    }
    }

    void parseFactor() {
    if (tokens[currentToken].type == TOKEN_NUMBER) {
    printf("%s", tokens[currentToken].value);
    currentToken++; // consume number
    } else if (tokens[currentToken].type == TOKEN_IDENTIFIER) {
    printf("%s", tokens[currentToken].value);
    currentToken++; // consume identifier
    } else if (tokens[currentToken].type == TOKEN_LPAREN) {
    currentToken++; // consume (
    printf("(");
    parseExpression();
    if (tokens[currentToken].type == TOKEN_RPAREN) {
    printf(")");
    currentToken++; // consume )
    } else {
    printf("Expected closing parenthesis.\n");
    exit(1);
    }
    } else {
    printf("Unexpected factor.\n");
    exit(1);
    }
    }


    int main() {
    const char *mlProgram =
    "x <- 8\n"
    "y <- 3\n"
    "print x * y\n";

    tokenize(mlProgram);
    parseProgram();

    return 0;
    }