mrcrilly · September 4, 2024 01:57 · Sep 4, 2024
diff --git a/ml_lexer.c b/ml_lexer.c
@@ -0,0 +1,204 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#define MAX_TOKENS 1000
+#define MAX_TOKEN_LENGTH 100
+
+typedef enum {
+    TOKEN_IDENTIFIER,
+    TOKEN_NUMBER,
+    TOKEN_ASSIGN,
+    TOKEN_PRINT,
+    TOKEN_RETURN,
+    TOKEN_MULTIPLY,
+    TOKEN_ADD,
+    TOKEN_SUBTRACT,
+    TOKEN_DIVIDE,
+    TOKEN_LPAREN,
+    TOKEN_RPAREN,
+    TOKEN_FUNCTION,
+    TOKEN_EOF
+} TokenType;
+
+typedef struct {
+    TokenType type;
+    char value[MAX_TOKEN_LENGTH];
+} Token;
+
+Token tokens[MAX_TOKENS];
+int tokenIndex = 0;
+int currentToken = 0;
+
+void tokenize(const char *input) {
+    const char *p = input;
+    while (*p) {
+        if (isspace(*p)) {
+            p++;
+        } else if (isdigit(*p)) {
+            tokens[tokenIndex].type = TOKEN_NUMBER;
+            int len = 0;
+            while (isdigit(*p)) {
+                tokens[tokenIndex].value[len++] = *p++;
+            }
+            tokens[tokenIndex].value[len] = '\0';
+            tokenIndex++;
+        } else if (isalpha(*p)) {
+            int len = 0;
+            while (isalnum(*p)) {
+                tokens[tokenIndex].value[len++] = *p++;
+            }
+            tokens[tokenIndex].value[len] = '\0';
+            if (strcmp(tokens[tokenIndex].value, "print") == 0) {
+                tokens[tokenIndex].type = TOKEN_PRINT;
+            } else if (strcmp(tokens[tokenIndex].value, "return") == 0) {
+                tokens[tokenIndex].type = TOKEN_RETURN;
+            } else if (strcmp(tokens[tokenIndex].value, "function") == 0) {
+                tokens[tokenIndex].type = TOKEN_FUNCTION;
+            } else {
+                tokens[tokenIndex].type = TOKEN_IDENTIFIER;
+            }
+            tokenIndex++;
+        } else if (*p == '<' && *(p+1) == '-') {
+            tokens[tokenIndex].type = TOKEN_ASSIGN;
+            strcpy(tokens[tokenIndex].value, "<-");
+            tokenIndex++;
+            p += 2;
+        } else if (*p == '+') {
+            tokens[tokenIndex].type = TOKEN_ADD;
+            strcpy(tokens[tokenIndex].value, "+");
+            tokenIndex++;
+            p++;
+        } else if (*p == '-') {
+            tokens[tokenIndex].type = TOKEN_SUBTRACT;
+            strcpy(tokens[tokenIndex].value, "-");
+            tokenIndex++;
+            p++;
+        } else if (*p == '*') {
+            tokens[tokenIndex].type = TOKEN_MULTIPLY;
+            strcpy(tokens[tokenIndex].value, "*");
+            tokenIndex++;
+            p++;
+        } else if (*p == '/') {
+            tokens[tokenIndex].type = TOKEN_DIVIDE;
+            strcpy(tokens[tokenIndex].value, "/");
+            tokenIndex++;
+            p++;
+        } else if (*p == '(') {
+            tokens[tokenIndex].type = TOKEN_LPAREN;
+            strcpy(tokens[tokenIndex].value, "(");
+            tokenIndex++;
+            p++;
+        } else if (*p == ')') {
+            tokens[tokenIndex].type = TOKEN_RPAREN;
+            strcpy(tokens[tokenIndex].value, ")");
+            tokenIndex++;
+            p++;
+        } else {
+            printf("Unexpected character: %c\n", *p);
+            exit(1);
+        }
+    }
+    tokens[tokenIndex].type = TOKEN_EOF;
+}
+
+#include <stdio.h>
+
+void parseProgram();
+void parseStatement();
+void parseExpression();
+
+void parseProgram() {
+    while (tokens[currentToken].type != TOKEN_EOF) {
+        parseStatement();
+    }
+}
+
+void parseStatement() {
+    if (tokens[currentToken].type == TOKEN_IDENTIFIER) {
+        printf("%s = ", tokens[currentToken].value);
+        currentToken++; // consume identifier
+        if (tokens[currentToken].type == TOKEN_ASSIGN) {
+            currentToken++; // consume <-
+            parseExpression();
+            printf(";\n");
+        }
+    } else if (tokens[currentToken].type == TOKEN_PRINT) {
+        currentToken++; // consume print
+        printf("printf(\"%%d\\n\", ");
+        parseExpression();
+        printf(");\n");
+    } else if (tokens[currentToken].type == TOKEN_RETURN) {
+        currentToken++; // consume return
+        printf("return ");
+        parseExpression();
+        printf(";\n");
+    } else {
+        printf("Unexpected statement.\n");
+        exit(1);
+    }
+}
+
+void parseExpression() {
+    parseTerm();
+    while (tokens[currentToken].type == TOKEN_ADD || tokens[currentToken].type == TOKEN_SUBTRACT) {
+        if (tokens[currentToken].type == TOKEN_ADD) {
+            printf(" + ");
+        } else if (tokens[currentToken].type == TOKEN_SUBTRACT) {
+            printf(" - ");
+        }
+        currentToken++; // consume + or -
+        parseTerm();
+    }
+}
+
+void parseTerm() {
+    parseFactor();
+    while (tokens[currentToken].type == TOKEN_MULTIPLY || tokens[currentToken].type == TOKEN_DIVIDE) {
+        if (tokens[currentToken].type == TOKEN_MULTIPLY) {
+            printf(" * ");
+        } else if (tokens[currentToken].type == TOKEN_DIVIDE) {
+            printf(" / ");
+        }
+        currentToken++; // consume * or /
+        parseFactor();
+    }
+}
+
+void parseFactor() {
+    if (tokens[currentToken].type == TOKEN_NUMBER) {
+        printf("%s", tokens[currentToken].value);
+        currentToken++; // consume number
+    } else if (tokens[currentToken].type == TOKEN_IDENTIFIER) {
+        printf("%s", tokens[currentToken].value);
+        currentToken++; // consume identifier
+    } else if (tokens[currentToken].type == TOKEN_LPAREN) {
+        currentToken++; // consume (
+        printf("(");
+        parseExpression();
+        if (tokens[currentToken].type == TOKEN_RPAREN) {
+            printf(")");
+            currentToken++; // consume )
+        } else {
+            printf("Expected closing parenthesis.\n");
+            exit(1);
+        }
+    } else {
+        printf("Unexpected factor.\n");
+        exit(1);
+    }
+}
+
+
+int main() {
+    const char *mlProgram = 
+        "x <- 8\n"
+        "y <- 3\n"
+        "print x * y\n";
+
+    tokenize(mlProgram);
+    parseProgram();
+
+    return 0;
+}
No results found