Skip to content

Instantly share code, notes, and snippets.

@RuslanUC
Last active March 18, 2025 12:34
Show Gist options
  • Select an option

  • Save RuslanUC/032c3f916adaff0e794e643fa08df1c0 to your computer and use it in GitHub Desktop.

Select an option

Save RuslanUC/032c3f916adaff0e794e643fa08df1c0 to your computer and use it in GitHub Desktop.
Json parser in C (Warning: it is very simple and probably does not comply with json format described in RFC8259)
#include "json.h"
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
char next_subtoken(const char* data, size_t* pos, const size_t size, const bool in_string) {
while(*pos < size) {
if(in_string)
return data[*pos] == '\n' ? 0 : data[(*pos)++];
if (isalnum(data[*pos]) || data[*pos] == ':'
|| data[*pos] == '.' || data[*pos] == ','
|| data[*pos] == '\'' || data[*pos] == '"'
|| data[*pos] == '{' || data[*pos] == '}'
|| data[*pos] == '[' || data[*pos] == ']') {
return data[(*pos)++];
}
(*pos)++;
}
return 0;
}
JsonValue parse_whatever(const char* data, size_t* pos, const size_t size) {
char subtoken;
JsonValue ret = { .type = JSON_VALUE_INVALID };
while(*pos < size && (subtoken = next_subtoken(data, pos, size, false)) > 0) {
--(*pos);
switch(subtoken) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': {
ret.type = JSON_VALUE_NUMBER;
ret.number = json_parse_number(data, size, pos);
return ret;
}
case '{': {
ret.type = JSON_VALUE_OBJECT;
ret.object = json_parse_object(data, size, pos);
return ret;
}
case '[': {
ret.type = JSON_VALUE_ARRAY;
ret.array = json_parse_array(data, size, pos);
return ret;
}
case '\'':
case '"': {
ret.type = JSON_VALUE_STRING;
ret.string = json_parse_string(data, size, pos);
return ret;
}
case 'n': {
if(size - *pos >= 4 && memcmp(data + *pos, "null", 4) == 0) {
ret.type = JSON_VALUE_NULL;
ret.null = NULL;
*pos += 4;
return ret;
}
}
case 't': {
if(size - *pos >= 4 && memcmp(data + *pos, "true", 4) == 0) {
ret.type = JSON_VALUE_BOOL;
ret.boolean.value = true;
*pos += 4;
return ret;
}
}
case 'f': {
if(size - *pos >= 5 && memcmp(data + *pos, "false", 5) == 0) {
ret.type = JSON_VALUE_BOOL;
ret.boolean.value = false;
*pos += 5;
return ret;
}
}
default: {
fprintf(stderr, "parse_whatever() failed at pos %zu, unexpected character.\n", *pos);
return ret;
}
}
}
return ret;
}
JsonString json_parse_string(const char* data, const size_t size, size_t* pos) {
char subtoken;
size_t lpos = 0;
if(pos == NULL) pos = &lpos;
const char quote = next_subtoken(data, pos, size, false);
const size_t str_start = *pos;
while(*pos < size && (subtoken = next_subtoken(data, pos, size, true)) > 0) {
if(subtoken == quote) {
const size_t str_size = *pos - str_start - 1;
char* string = malloc(str_size + 1);
memcpy(string, data + str_start, str_size);
string[str_size] = '\0';
return (JsonString){
.size = str_size,
.value = string,
};
}
}
// TODO: unexpected eof
fprintf(stderr, "json_parse_string() failed at pos %zu, unexpected eof.\n", *pos);
return (JsonString){
.size = 0,
.value = NULL,
};
}
JsonNumber json_parse_number(const char* data, const size_t size, size_t* pos) {
char subtoken;
size_t lpos = 0;
if(pos == NULL) pos = &lpos;
bool is_fractional = false;
const size_t start_pos = *pos;
JsonNumber ret = (JsonNumber){
.type = JSON_NUMBER_INT,
.int_ = 0,
};
while(*pos < size && (subtoken = next_subtoken(data, pos, size, false)) > 0) {
if(subtoken == '.') {
if(is_fractional) {
fprintf(stderr, "json_parse_number() failed at pos %zu, fractional number has multiple dots.\n", *pos);
return ret; // TODO: invalid number
}
is_fractional = true;
continue;
}
if(!isdigit(subtoken)) {
--(*pos);
if(is_fractional) {
ret.type = JSON_NUMBER_DOUBLE;
ret.double_.value = strtod(data + start_pos, NULL);
} else {
ret.type = JSON_NUMBER_INT;
ret.int_.value = strtoll(data + start_pos, NULL, 10);
}
return ret;
}
}
// TODO: unexpected eof
fprintf(stderr, "json_parse_number() failed at pos %zu, unexpected eof.\n", *pos);
return ret;
}
JsonBool json_parse_bool(const char* data, const size_t size, size_t* pos) {
size_t lpos = 0;
if(pos == NULL) pos = &lpos;
const size_t bytes_left = size - *pos;
if(bytes_left < 1 || ((data[*pos] != 't' || bytes_left < 4) && (data[*pos] != 'f' || bytes_left < 5))) {
fprintf(stderr, "json_parse_bool() failed at pos %zu, invalid first character or unexpected eof.\n", *pos);
return (JsonBool){ .value = false }; // TODO: invalid bool
}
if(data[*pos] == 't') {
if(memcmp(data + *pos, "true", 4) == 0) {
*pos += 4;
return (JsonBool){ .value = true };
}
fprintf(stderr, "json_parse_bool() failed at pos %zu, invalid bool.\n", *pos);
return (JsonBool){ .value = false }; // TODO: invalid bool
}
if(data[*pos] == 'f') {
if(memcmp(data + *pos, "false", 5) == 0) {
*pos += 5;
return (JsonBool){ .value = false };
}
fprintf(stderr, "json_parse_bool() failed at pos %zu, invalid bool.\n", *pos);
return (JsonBool){ .value = false }; // TODO: invalid bool
}
fprintf(stderr, "json_parse_bool() failed at pos %zu, this should be unreachable.\n", *pos);
return (JsonBool){ .value = false }; // TODO: invalid bool
}
JsonObject json_parse_object(const char* data, const size_t size, size_t* pos) {
char subtoken;
size_t lpos = 0;
if(pos == NULL) pos = &lpos;
JsonObject ret = {0};
size_t capacity = 0;
while(*pos < size && (subtoken = next_subtoken(data, pos, size, false)) > 0) {
if(subtoken == '{')
break;
fprintf(stderr, "json_parse_object() failed at pos %zu, expected \"{\", got \"%c\".\n", *pos, subtoken);
goto error;
}
while(*pos < size && (subtoken = next_subtoken(data, pos, size, false)) > 0) {
if(subtoken == '}') {
if(ret.items)
ret.items = realloc(ret.items, ret.size * sizeof(JsonObjectItem));
return ret;
}
if(ret.size > 0) {
if(subtoken != ',') {
fprintf(stderr, "json_parse_object() failed at pos %zu, expected \",\", got \"%c\".\n", *pos, subtoken);
goto error;
}
subtoken = next_subtoken(data, pos, size, false);
if(subtoken == '\0') {
fprintf(stderr, "json_parse_object() failed at pos %zu, unexpected eof.\n", *pos);
goto error;
}
}
--(*pos);
const JsonValue key = parse_whatever(data, pos, size);
if(key.type != JSON_VALUE_STRING) {
fprintf(stderr, "json_parse_object() failed at pos %zu, expected string, got type %d.\n", *pos, key.type);
goto error;
}
subtoken = next_subtoken(data, pos, size, false);
if(subtoken != ':') {
fprintf(stderr, "json_parse_object() failed at pos %zu, expected \":\", got \"%c\".\n", *pos, subtoken);
goto error;
}
const JsonValue value = parse_whatever(data, pos, size);
if(value.type == JSON_VALUE_INVALID) {
fprintf(stderr, "json_parse_object() failed at pos %zu, invalid value.\n", *pos);
goto error;
}
if(ret.size >= capacity) {
if(capacity == 0)
capacity = 64;
else
capacity *= 2;
ret.items = realloc(ret.items, capacity * sizeof(JsonObjectItem));
}
ret.items[ret.size++] = (JsonObjectItem){
.key = key.string,
.value = value,
};
}
error:
// TODO: return error "unexpected eof"
json_free_object(ret);
ret.items = NULL;
ret.size = 0;
return ret;
}
JsonArray json_parse_array(const char* data, const size_t size, size_t* pos) {
char subtoken;
size_t lpos = 0;
if(pos == NULL) pos = &lpos;
(*pos)++;
JsonArray ret = {0};
size_t capacity = 0;
while(*pos < size && (subtoken = next_subtoken(data, pos, size, false)) > 0) {
if(subtoken == ']') {
if(ret.items)
ret.items = realloc(ret.items, ret.size * sizeof(JsonValue));
return ret;
}
if(ret.size > 0) {
if(subtoken != ',')
goto error;
subtoken = next_subtoken(data, pos, size, false);
if(subtoken == '\0')
goto error;
}
--(*pos);
const JsonValue value = parse_whatever(data, pos, size);
if(value.type == JSON_VALUE_INVALID)
goto error;
if(ret.size >= capacity) {
if(capacity == 0)
capacity = 64;
else
capacity *= 2;
ret.items = realloc(ret.items, capacity * sizeof(JsonValue));
}
ret.items[ret.size++] = value;
}
error:
// TODO: return error "unexpected eof"
fprintf(stderr, "json_parse_array() failed at pos %zu, unexpected eof.\n", *pos);
json_free_array(ret);
ret.items = NULL;
ret.size = 0;
return ret;
}
void free_whatever(const JsonValue value) {
switch(value.type) {
case JSON_VALUE_STRING: {
json_free_string(value.string);
return;
}
case JSON_VALUE_OBJECT: {
json_free_object(value.object);
return;
}
case JSON_VALUE_ARRAY: {
json_free_array(value.array);
return;
}
default:
}
}
void json_free_string(const JsonString string) {
if(string.value != NULL)
free(string.value);
}
void json_free_object(const JsonObject object) {
for(size_t i = 0; i < object.size; ++i) {
json_free_string(object.items[i].key);
free_whatever(object.items[i].value);
}
if(object.items != NULL)
free(object.items);
}
void json_free_array(const JsonArray array) {
for(size_t i = 0; i < array.size; ++i) {
free_whatever(array.items[i]);
}
if(array.items != NULL)
free(array.items);
}
void out_buf_realloc_maybe(char** out, size_t pos, size_t* cap, size_t min_size) {
if((pos + min_size) >= *cap || *cap == 0) {
if(*cap == 0)
*cap = min_size * 2;
else
*cap = (*cap + min_size) * 1.5;
*out = realloc(*out, *cap);
}
}
size_t json_dump_whatever(const JsonValue value, char** out, size_t out_pos, size_t* out_cap) {
size_t lcap = 0;
if(out_cap == NULL) out_cap = &lcap;
switch (value.type) {
case JSON_VALUE_NULL: {
out_buf_realloc_maybe(out, out_pos, out_cap, 4);
memcpy(*out + out_pos, "null", 4);
return 4;
}
case JSON_VALUE_STRING: {
return json_dump_string(value.string, out, out_pos, out_cap);
}
case JSON_VALUE_NUMBER: {
return json_dump_number(value.number, out, out_pos, out_cap);
}
case JSON_VALUE_BOOL: {
return json_dump_bool(value.boolean, out, out_pos, out_cap);
}
case JSON_VALUE_OBJECT: {
return json_dump_object(value.object, out, out_pos, out_cap);
}
case JSON_VALUE_ARRAY: {
return json_dump_array(value.array, out, out_pos, out_cap);
}
default: {
fprintf(stderr, "json_dump_whatever() warning: got invalid value.\n");
return 0;
}
}
return 0;
}
size_t json_dump_string(const JsonString string, char** out, size_t out_pos, size_t* out_cap) {
size_t lcap = 0;
if(out_cap == NULL) out_cap = &lcap;
const size_t start_pos = out_pos;
out_buf_realloc_maybe(out, out_pos, out_cap, string.size + 2);
(*out)[out_pos++] = '"';
memcpy(*out + out_pos, string.value, string.size);
out_pos += string.size;
(*out)[out_pos++] = '"';
return out_pos - start_pos;
}
size_t json_dump_number(const JsonNumber number, char** out, size_t out_pos, size_t* out_cap) {
size_t lcap = 0;
if(out_cap == NULL) out_cap = &lcap;
char buffer[sizeof(int64_t) * 8 + 1] = {0};
if(number.type == JSON_NUMBER_INT) {
snprintf(buffer, sizeof(int64_t) * 8, "%lld", number.int_.value);
} else {
snprintf(buffer, sizeof(int64_t) * 8, "%f", number.double_.value);
}
const size_t str_len = strlen(buffer);
out_buf_realloc_maybe(out, out_pos, out_cap, str_len);
memcpy(*out + out_pos, buffer, str_len);
return str_len;
}
size_t json_dump_bool(const JsonBool boolean, char** out, size_t out_pos, size_t* out_cap) {
size_t lcap = 0;
if(out_cap == NULL) out_cap = &lcap;
if(boolean.value) {
out_buf_realloc_maybe(out, out_pos, out_cap, 4);
memcpy(*out + out_pos, "true", 4);
return 4;
}
out_buf_realloc_maybe(out, out_pos, out_cap, 5);
memcpy(*out + out_pos, "false", 5);
return 5;
}
size_t json_dump_object(const JsonObject object, char** out, size_t out_pos, size_t* out_cap) {
size_t lcap = 0;
if(out_cap == NULL) out_cap = &lcap;
if(object.size == 0) {
out_buf_realloc_maybe(out, out_pos, out_cap, 2);
memcpy(*out + out_pos, "{}", 2);
return 2;
}
const size_t start_pos = out_pos;
out_buf_realloc_maybe(out, out_pos, out_cap, 1);
(*out)[out_pos++] = '{';
for(size_t i = 0; i < object.size; ++i) {
if(i > 0) {
out_buf_realloc_maybe(out, out_pos, out_cap, 1);
(*out)[out_pos++] = ',';
}
out_pos += json_dump_string(object.items[i].key, out, out_pos, out_cap);
out_buf_realloc_maybe(out, out_pos, out_cap, 1);
(*out)[out_pos++] = ':';
out_pos += json_dump_whatever(object.items[i].value, out, out_pos, out_cap);
}
out_buf_realloc_maybe(out, out_pos, out_cap, 1);
(*out)[out_pos++] = '}';
return out_pos - start_pos;
}
size_t json_dump_array(const JsonArray array, char** out, size_t out_pos, size_t* out_cap) {
size_t lcap = 0;
if(out_cap == NULL) out_cap = &lcap;
if(array.size == 0) {
out_buf_realloc_maybe(out, out_pos, out_cap, 2);
memcpy(*out + out_pos, "[]", 2);
return 2;
}
const size_t start_pos = out_pos;
out_buf_realloc_maybe(out, out_pos, out_cap, 1);
(*out)[out_pos++] = '[';
for(size_t i = 0; i < array.size; ++i) {
if(i > 0) {
out_buf_realloc_maybe(out, out_pos, out_cap, 1);
(*out)[out_pos++] = ',';
}
out_pos += json_dump_whatever(array.items[i], out, out_pos, out_cap);
}
out_buf_realloc_maybe(out, out_pos, out_cap, 1);
(*out)[out_pos++] = ']';
return out_pos - start_pos;
}
#ifdef JSON_TEST_FUNC
#include <assert.h>
void json_simple_test() {
const char* json_string = "{\"a\": [123, 123.456, \"some string\", null, true, false, [1, 2], {\"test\": \"asd\"}]}";
const size_t json_len = strlen(json_string);
JsonObject obj = json_parse_object(json_string, json_len, NULL);
assert(obj.size == 1);
assert(obj.items != NULL);
assert(obj.items[0].key.size == 1);
assert(strcmp(obj.items[0].key.value, "a") == 0);
assert(obj.items[0].value.type == JSON_VALUE_ARRAY);
assert(obj.items[0].value.array.size == 8);
assert(obj.items[0].value.array.items != NULL);
assert(obj.items[0].value.array.items[0].type == JSON_VALUE_NUMBER);
assert(obj.items[0].value.array.items[0].number.type == JSON_NUMBER_INT);
assert(obj.items[0].value.array.items[0].number.int_.value == 123);
assert(obj.items[0].value.array.items[1].type == JSON_VALUE_NUMBER);
assert(obj.items[0].value.array.items[1].number.type == JSON_NUMBER_DOUBLE);
assert(obj.items[0].value.array.items[1].number.double_.value == 123.456);
assert(obj.items[0].value.array.items[2].type == JSON_VALUE_STRING);
assert(obj.items[0].value.array.items[2].string.size == 11);
assert(strcmp(obj.items[0].value.array.items[2].string.value, "some string") == 0);
assert(obj.items[0].value.array.items[3].type == JSON_VALUE_NULL);
assert(obj.items[0].value.array.items[3].null == NULL);
assert(obj.items[0].value.array.items[4].type == JSON_VALUE_BOOL);
assert(obj.items[0].value.array.items[4].boolean.value == true);
assert(obj.items[0].value.array.items[5].type == JSON_VALUE_BOOL);
assert(obj.items[0].value.array.items[5].boolean.value == false);
assert(obj.items[0].value.array.items[6].type == JSON_VALUE_ARRAY);
assert(obj.items[0].value.array.items[6].array.size == 2);
assert(obj.items[0].value.array.items[6].array.items[0].type == JSON_VALUE_NUMBER);
assert(obj.items[0].value.array.items[6].array.items[0].number.type == JSON_NUMBER_INT);
assert(obj.items[0].value.array.items[6].array.items[0].number.int_.value == 1);
assert(obj.items[0].value.array.items[6].array.items[1].type == JSON_VALUE_NUMBER);
assert(obj.items[0].value.array.items[6].array.items[1].number.type == JSON_NUMBER_INT);
assert(obj.items[0].value.array.items[6].array.items[1].number.int_.value == 2);
assert(obj.items[0].value.array.items[7].type == JSON_VALUE_OBJECT);
assert(obj.items[0].value.array.items[7].object.size == 1);
assert(obj.items[0].value.array.items[7].object.items[0].key.size == 4);
assert(strcmp(obj.items[0].value.array.items[7].object.items[0].key.value, "test") == 0);
assert(obj.items[0].value.array.items[7].object.items[0].value.type == JSON_VALUE_STRING);
assert(obj.items[0].value.array.items[7].object.items[0].value.string.size == 3);
assert(strcmp(obj.items[0].value.array.items[7].object.items[0].value.string.value, "asd") == 0);
json_free_object(obj);
}
#endif
#pragma once
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
typedef struct JsonString {
size_t size;
char* value;
} JsonString;
typedef struct JsonBool {
bool value;
} JsonBool;
typedef struct JsonInt {
int64_t value;
} JsonInt;
typedef struct JsonDouble {
double value;
} JsonDouble;
typedef enum JsonNumberType {
JSON_NUMBER_INT = 0,
JSON_NUMBER_DOUBLE,
} JsonNumberType;
typedef struct JsonNumber {
JsonNumberType type;
union {
JsonInt int_;
JsonDouble double_;
};
} JsonNumber;
typedef enum JsonValueType {
JSON_VALUE_INVALID = -1,
JSON_VALUE_NULL,
JSON_VALUE_STRING,
JSON_VALUE_NUMBER,
JSON_VALUE_BOOL,
JSON_VALUE_OBJECT,
JSON_VALUE_ARRAY,
} JsonValueType;
struct JsonValue;
struct JsonObjectItem;
typedef struct JsonObject {
size_t size;
struct JsonObjectItem* items;
} JsonObject;
typedef struct JsonArray {
size_t size;
struct JsonValue* items;
} JsonArray;
typedef struct JsonValue {
JsonValueType type;
union {
JsonString string;
JsonNumber number;
JsonBool boolean;
JsonObject object;
JsonArray array;
void* null;
};
} JsonValue;
typedef struct JsonObjectItem {
JsonString key;
JsonValue value;
} JsonObjectItem;
JsonString json_parse_string(const char* data, const size_t size, size_t* pos);
JsonNumber json_parse_number(const char* data, const size_t size, size_t* pos);
JsonBool json_parse_bool(const char* data, const size_t size, size_t* pos);
JsonObject json_parse_object(const char* data, const size_t size, size_t* pos);
JsonArray json_parse_array(const char* data, const size_t size, size_t* pos);
/*
WARNING: json_dump_string() writes string "as-is", i.e. without escaping quotes, null-termination checks, etc.
*/
size_t json_dump_string(const JsonString string, char** out, size_t out_pos, size_t* out_cap);
size_t json_dump_number(const JsonNumber number, char** out, size_t out_pos, size_t* out_cap);
size_t json_dump_bool(const JsonBool boolean, char** out, size_t out_pos, size_t* out_cap);
size_t json_dump_object(const JsonObject object, char** out, size_t out_pos, size_t* out_cap);
size_t json_dump_array(const JsonArray array, char** out, size_t out_pos, size_t* out_cap);
void json_free_string(const JsonString string);
void json_free_object(const JsonObject object);
void json_free_array(const JsonArray array);
#ifdef JSON_TEST_FUNC
void json_simple_test();
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment