~exec64/freekdl

ee4e69de27d8809747351d65cc57f18be6f20819 — Harry Jeffery 10 months ago
FreeKDL initial public release
7 files changed, 5438 insertions(+), 0 deletions(-)

A .gitignore
A CMakeLists.txt
A LICENSE
A README.md
A freekdl.c
A freekdl.h
A test.c
A  => .gitignore +1 -0
@@ 1,1 @@
build/
\ No newline at end of file

A  => CMakeLists.txt +11 -0
@@ 1,11 @@
cmake_minimum_required(VERSION 3.20)
project(freekdl)
enable_testing()

add_library(freekdl freekdl.c)
set_property(TARGET freekdl PROPERTY C_STANDARD 90)

add_executable(freekdl_test test.c)
set_property(TARGET freekdl_test PROPERTY C_STANDARD 90)

add_test(freekdl_test freekdl_test)

A  => LICENSE +18 -0
@@ 1,18 @@
Copyright 2021 Harry Jeffery

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file

A  => README.md +86 -0
@@ 1,86 @@
FreeKDL - KDL parsing library for C
===================================

FreeKDL is an ANSI C compliant library for reading and writing
[KDL](https://kdl.dev/) files. It has minimal dependencies,
and configurable memory allocation.

Features:

- Portable, ANSI C compliant
- No dependencies other than `stddef.h` and `stdint.h`
- Full UTF-8 support
- No recursion while parsing
- All memory allocation user controllable
- All `malloc` and `realloc` failures handled gracefully
- Permissive MIT licensing

## Documentation

The [freekdl.h](freekdl.h) header is thoroughly commented and is
intended as the primary source of documentation.

```c
const char *kdlStr = "..."; // UTF-8 string containing the KDL document. May contain NULL bytes.
size_t kdlStrLen = 1234; // Length of kdlStr in bytes, if 0 FreeKDL will strlen() the input
struct fkdl_document doc = {0}; // In memory representation of a KDL document
struct fkdl_allocator allocator = {
  // specify malloc, realloc, free here
  // Or pass NULL into readDocument to default to the libc implementations
};
struct fkdl_error error; // Receives information about errors encountered
if (!fkdl_readDocument(kdlStr, kdlStrLen, &doc, &allocator, &error)) {
  // Handle error
}
printf("first node's identifier: %s\n", doc.nodes[0].identifier.data);
```

## Portability

FreeKDL consists of a single C source file, and header file which are
trivial to add to your project. It also only depends on `stddef.h` and
`stdint.h`, meaning that it'll compile and run almost anywhere.

## No recursion while parsing

Recursion is not used while parsing. This prevents a malicious KDL file
with 1,000,000 levels of nesting from overflowing the stack. Instead
memory is allocated as needed through the `fkdl_allocator` passed in.

## Memory allocation

FreeKDL allows users to pass in a `fkdl_allocator` struct that defines
`malloc`, `realloc`, and `free` operations, as well as a custom `void *context`,
that will be passed to the aforementioned functions. This allows users
of this library to use any custom allocator desired, or to set hard
memory limits on FreeKDL.

In addition, FreeKDL checks the result of all `malloc` and `realloc` calls,
and will fail gracefully if either fails.

## Null byte tolerant

FreeKDL handles null bytes in strings gracefully. All internal string logic uses
an explicit `len` variable in `fkdl_string`. However, for user convenience a NULL-terminator
is appended to all `fkdl_strings` (not counted in `len`).

## License

Copyright 2021 Harry Jeffery

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file

A  => freekdl.c +2997 -0
@@ 1,2997 @@
/*
Copyright 2021 Harry Jeffery

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

#include "freekdl.h"

/* If we're in a hosted environment with a stdlib,
 * provide a default allocator implementation that'll
 * be used when NULL is passed.
 */
#ifdef __STDC_HOSTED__
#include <stdlib.h>
static void *fkdl_malloc(void *context, size_t len)
{
  return malloc(len);
}

static void *fkdl_realloc(void *context, void *ptr, size_t len)
{
  return realloc(ptr, len);
}

static void fkdl_free(void *context, void *ptr)
{
  free(ptr);
}

static struct fkdl_allocator fkdl_defaultAllocator = {
  .malloc = fkdl_malloc,
  .realloc = fkdl_realloc,
  .free = fkdl_free,
};

#else
static struct fkdl_allocator fkdl_defaultAllocator = {0};
#endif

static void fkdl_memset(void *dst, char value, size_t len)
{
  size_t i;
  char *c = dst;
  for (i = 0; i < len; ++i) {
    c[i] = value;
  }
}

static void fkdl_memcpy(void *dst, const void *src, size_t len)
{
  size_t i;
  char *a = dst;
  const char *b = src;
  for (i = 0; i < len; ++i) {
    a[i] = b[i];
  }
}

static int fkdl_memcmp(const void *s1, const void *s2, size_t len)
{
  size_t i;
  const char *lhs = s1;
  const char *rhs = s2;
  for (i = 0; i < len; ++i) {
    int cmp = lhs[i] - rhs[i];
    if (cmp != 0) {
      return cmp;
    }
  }
  return 0;
}

static size_t fkdl_strlen(const char *str)
{
  if (!str) {
    return 0;
  }
  size_t len = 0;
  while (str[len] != '\0') {
    ++len;
  }
  return len;
}

enum fkdl_token_type
{
  FKDL_TOK_NULL,
  FKDL_TOK_BOOL,
  FKDL_TOK_IDENTIFIER,
  FKDL_TOK_STRING,
  FKDL_TOK_RAW_STRING,
  FKDL_TOK_NUMBER,
  FKDL_TOK_TYPE_ANNOTATION,
  FKDL_TOK_PROPERTY_EQUALS,
  FKDL_TOK_NODE_TERMINATOR,
  FKDL_TOK_OPEN_BRACE,
  FKDL_TOK_CLOSE_BRACE,
  FKDL_TOK_SLASHDASH
};

struct fkdl_token
{
  enum fkdl_token_type type;
  size_t index;
  size_t length;
  size_t line;
  size_t column;
};

struct fkdl_cursor
{
  const char *input;
  size_t inputLen;
  size_t index;

  size_t line;
  size_t column;
  fkdl_bool prevWhitespace; /* Previous char was whitespace */
};

struct fkdl_writeCursor
{
  char *output;
  size_t outputLen;
  size_t index;
};

/* internal dynamic array. Initialise zeroed out, except for itemSize */
struct fkdl_array
{
  void *data;
  size_t itemSize; /* Must be initialised. Size of each item in bytes */
  size_t len; /* len in items not bytes */
  size_t cap; /* capacity in items not bytes */
};

static fkdl_bool arrayPush(struct fkdl_array *array, void *item, struct fkdl_allocator *allocator)
{
  if (!array || !item || !array->itemSize || !allocator || !allocator->malloc || !allocator->realloc) {
    return FKDL_FALSE;
  }

  if (!array->data) {
    array->len = 0;
    array->cap = 8;
    array->data = allocator->malloc(allocator->context, array->cap * array->itemSize);
    if (!array->data) {
      return FKDL_FALSE;
    }
  }

  if (array->len == array->cap) {
    size_t newCap = array->cap * 2;
    void *newData = allocator->realloc(
      allocator->context,
      array->data,
      newCap * array->itemSize);
    if (!newData) {
      return FKDL_FALSE;
    }
    array->cap = newCap;
    array->data = newData;
  }

  char *dst = array->data;
  fkdl_memcpy(dst + (array->len * array->itemSize), item, array->itemSize);
  array->len += 1;
  return FKDL_TRUE;
}

static fkdl_bool arrayPop(struct fkdl_array *array, void *outItem)
{
  if (!array || !array->itemSize || array->len < 1) {
    return FKDL_FALSE;
  }
  array->len -= 1;
  if (outItem) {
    const char *src = array->data;
    fkdl_memcpy(outItem, src + (array->len * array->itemSize), array->itemSize);
  }
  return FKDL_TRUE;
}

static void arrayCleanup(struct fkdl_array *array, struct fkdl_allocator *allocator)
{
  if (!array || !allocator || !allocator->free) {
    return;
  }

  if (array->data) {
    allocator->free(allocator->context, array->data);
  }
  array->data = NULL;
  array->len = 0;
  array->cap = 0;
}

static void errorFromToken(struct fkdl_error *error, const struct fkdl_token *token, const char *message)
{
  if (!error) {
    return;
  }
  if (token) {
    error->index = token->index;
    error->line = token->line;
    error->column = token->column;
  } else {
    error->index = 0;
    error->line = 0;
    error->column = 0;
  }
  error->message = message ? message : "";
}

static void errorFromCursor(struct fkdl_error *error, const struct fkdl_cursor *cursor, const char *message)
{
  if (!error) {
    return;
  }
  if (cursor) {
    error->index = cursor->index;
    error->line = cursor->line;
    error->column = cursor->column;
  } else {
    error->index = 0;
    error->line = 0;
    error->column = 0;
  }
  error->message = message ? message : "";
}

/* Writes a single byte to a cursor */
static fkdl_bool writeChar(struct fkdl_writeCursor *cursor, char value)
{
  if (!cursor) {
    return FKDL_FALSE;
  }
  if (!cursor->output) {
    /* Don't write, just count bytes written */
    cursor->index += 1;
    return FKDL_TRUE;
  }
  if (cursor->index + 1 >= cursor->outputLen) {
    return FKDL_FALSE;
  }
  cursor->output[cursor->index++] = value;
  return FKDL_TRUE;
}

/* Writes a null-terminated string to a cursor, minus the null */
static fkdl_bool writeCString(struct fkdl_writeCursor *cursor, const char *str)
{
  if (!cursor || !str) {
    return FKDL_FALSE;
  }
  size_t len = fkdl_strlen(str);
  if (!cursor->output) {
    /* Don't write, just count bytes written */
    cursor->index += len;
    return FKDL_TRUE;
  }
  if (cursor->index + len >= cursor->outputLen) {
    return FKDL_FALSE;
  }
  size_t i;
  for (i = 0; i < len; ++i) {
    cursor->output[cursor->index++] = str[i];
  }
  return FKDL_TRUE;
}

/* Writes a fkdl_string to a cursor */
static fkdl_bool writeString(struct fkdl_writeCursor *cursor, const struct fkdl_string *str)
{
  if (!cursor || !str) {
    return FKDL_FALSE;
  }
  if (!cursor->output) {
    /* Don't write, just count bytes written */
    cursor->index += str->len;
    return FKDL_TRUE;
  }
  if (cursor->index + str->len >= cursor->outputLen) {
    return FKDL_FALSE;
  }
  size_t i;
  for (i = 0; i < str->len; ++i) {
    cursor->output[cursor->index++] = str->data[i];
  }
  return FKDL_TRUE;
}

static fkdl_bool cursorMemset(struct fkdl_writeCursor *cursor, char value, size_t len)
{
  if (!cursor) {
    return FKDL_FALSE;
  }
  if (!cursor->output) {
    /* Don't write, just count bytes written */
    cursor->index += len;
    return FKDL_TRUE;
  }
  if (cursor->index + len >= cursor->outputLen) {
    return FKDL_FALSE;
  }
  size_t i;
  for (i = 0; i < len; ++i) {
    cursor->output[cursor->index++] = value;
  }
  return FKDL_TRUE;
}

static fkdl_bool atEof(const struct fkdl_cursor *cursor)
{
  if (!cursor) {
    return FKDL_TRUE;
  } else {
    return cursor->index >= cursor->inputLen;
  }
}

/* Reads a single unicode code point, returns the value in out, and the number */
/* of bytes read in outSize. */
static fkdl_bool readUtf8Char(const char *input, size_t inputLen, uint32_t *out, size_t *outSize)
{
  if (!input) {
    return FKDL_FALSE;
  }
  uint32_t value = 0;
  size_t size = 0;
  if (inputLen >= 1 && (input[0] & 0b10000000) == 0) {
    value = input[0] & 0b01111111;
    size = 1;
  } else if (inputLen >= 2 &&
             (input[0] & 0b11100000) == 0b11000000 &&
             (input[1] & 0b11000000) == 0b10000000) {
    value = input[0] & 0b00011111;
    value <<= 6;
    value |= input[1] & 0b00111111;
    size = 2;
  } else if (inputLen >= 3 &&
             (input[0] & 0b11110000) == 0b11100000 &&
             (input[1] & 0b11000000) == 0b10000000 &&
             (input[2] & 0b11000000) == 0b10000000) {
    value = (input[0] & 0b00001111);
    value <<= 6;
    value |= input[1] & 0b00111111;
    value <<= 6;
    value |= input[2] & 0b00111111;
    size = 3;
  } else if (inputLen >= 4 &&
             (input[0] & 0b11111000) == 0b11110000 &&
             (input[1] & 0b11000000) == 0b10000000 &&
             (input[2] & 0b11000000) == 0b10000000 &&
             (input[3] & 0b11000000) == 0b10000000) {
    value = (input[0] & 0b00000111);
    value <<= 6;
    value |= input[1] & 0b00111111;
    value <<= 6;
    value |= input[2] & 0b00111111;
    value <<= 6;
    value |= input[3] & 0b00111111;
    size = 4;
  } else {
    return FKDL_FALSE;
  }
  if (out) {
    *out = value;
  }
  if (outSize) {
    *outSize = size;
  }
  return FKDL_TRUE;
}

/* Returns the size in bytes required to encode a given code point in utf-8 */
static size_t utf8Width(uint32_t codePoint)
{
  if (codePoint < 0x80) {
    return 1;
  } else if (codePoint < 0x800) {
    return 2;
  } else if (codePoint < 0x10000) {
    return 3;
  } else if (codePoint < 0x110000) {
    return 4;
  } else {
    /* Unencodeable */
    return 0;
  }
}

/* writes a single codepoint to output, encoded as UTF-8 */
static fkdl_bool writeUtf8Char(
  char *output,
  size_t outputLen,
  uint32_t codePoint,
  size_t *outSize)
{
  if (!output) {
    return FKDL_FALSE;
  }
  size_t size = 0;

  if (outputLen >= 1 && codePoint < 0x80) {
    output[0] = 0b01111111 & codePoint;
    size = 1;
  } else if (outputLen >= 2 && codePoint < 0x800) {
    output[0] = 0b11000000 | ((codePoint >> 6) & 0b00011111);
    output[1] = 0b10000000 | ((codePoint >> 0) & 0b00111111);
    size = 2;
  } else if (outputLen >= 3 && codePoint < 0x10000) {
    output[0] = 0b11100000 | ((codePoint >> 12) & 0b00001111);
    output[1] = 0b10000000 | ((codePoint >> 6) &  0b00111111);
    output[2] = 0b10000000 | ((codePoint >> 0) &  0b00111111);
    size = 3;
  } else if (outputLen >= 4 && codePoint < 0x110000) {
    output[0] = 0b11110000 | ((codePoint >> 18) & 0b00000111);
    output[1] = 0b10000000 | ((codePoint >> 12) & 0b00111111);
    output[2] = 0b10000000 | ((codePoint >> 6) &  0b00111111);
    output[3] = 0b10000000 | ((codePoint >> 0) &  0b00111111);
    size = 4;
  } else {
    return FKDL_FALSE;
  }

  if (outSize) {
    *outSize = size;
  }
  return FKDL_TRUE;
}

static const char *toEscapeCode(uint32_t codePoint)
{
  switch (codePoint) {
    case 0x0008: return "\\b";       /* Backspace */
    case 0x0009: return "\\t";       /* Tab */
    case 0x000C: return "\\f";       /* Form Feed */
    case 0x000D: return "\\r";       /* Carriage return */
    case 0x000A: return "\\n";       /* Line Feed */
    case 0x0022: return "\\\"";      /* Quotation Mark */
    case 0x002F: return "\\/";       /* Forwardslash */
    case 0x005C: return "\\\\";      /* Backslash */
    case 0x0085: return "\\u{85}";   /* Next Line */
    case 0x2028: return "\\u{2028}"; /* Line Separator */
    case 0x2029: return "\\u{2029}"; /* Paragraph Separator */
  }
  return NULL;
}

/* Write a fkdl_string to a cursor, escaping anything required to be a valid quoted string */
static fkdl_bool writeEscapedString(struct fkdl_writeCursor *cursor, const struct fkdl_string *str)
{
  if (!cursor || !str) {
    return FKDL_FALSE;
  }
  size_t i;
  for (i = 0; i < str->len; ) {
    uint32_t codePoint;
    size_t step;
    if (!readUtf8Char(str->data + i, str->len - i, &codePoint, &step)) {
      return FKDL_FALSE;
    }
    const char *escape = toEscapeCode(codePoint);
    if (escape) {
      if (!writeCString(cursor, escape)) {
        return FKDL_FALSE;
      }
    } else {
      size_t written;
      if (!writeUtf8Char(
        cursor->output + cursor->index,
        cursor->outputLen - cursor->index,
        codePoint,
        &written)) {
        return FKDL_FALSE;
      }
      cursor->index += written;
    }
    i += step;
  }
  return FKDL_TRUE;
}

/* Looks at the next codepoint in the cursor without consuming */
static fkdl_bool peekChar(
  const struct fkdl_cursor *cursor,
  uint32_t *out,
  size_t *outSize,
  struct fkdl_error *error)
{
  if (!cursor) {
    return FKDL_FALSE;
  }
  if (atEof(cursor)) {
    errorFromCursor(error, cursor, "Unexpected EOF");
    return 0;
  }
  fkdl_bool result = readUtf8Char(
    cursor->input + cursor->index,
    cursor->inputLen - cursor->index,
    out,
    outSize);
  if (result) {
    return FKDL_TRUE;
  }
  errorFromCursor(error, cursor, "Invalid UTF-8 character");
  return FKDL_FALSE;
}

static fkdl_bool isWhitespace(uint32_t codePoint)
{
  switch (codePoint) {
    case 0x0009: /* Character Tabulation */
    case 0x0020: /* Space */
    case 0x00A0: /* No-Break Space */
    case 0x1680: /* Ogham Space Mark */
    case 0x2000: /* En Quad */
    case 0x2001: /* Em Quad */
    case 0x2002: /* En Space */
    case 0x2003: /* Em Space */
    case 0x2004: /* Three-Per-Em Space */
    case 0x2005: /* Four-Per-Em Space */
    case 0x2006: /* Six-Per-Em Space */
    case 0x2007: /* Figure Space */
    case 0x2008: /* Punctuation-Per-Em Space */
    case 0x2009: /* Thin Space */
    case 0x200A: /* Hair Space */
    case 0x202F: /* Narrow No-Break Space */
    case 0x205F: /* Medium Mathematical Space */
    case 0x3000: /* Ideographic Space */
    case 0xFEFF: /* Byte order marker */
      return FKDL_TRUE;
    default:
      return FKDL_FALSE;
  }
}

/* Returns true if str matches what's under the cursor */
static fkdl_bool peekStr(
    const struct fkdl_cursor *cursor,
    const char *str)
{
  if (!cursor || !str) {
    return FKDL_FALSE;
  }
  size_t i;
  size_t len = fkdl_strlen(str);
  for (i = 0; i < len; ++i) {
    if (cursor->index + i >= cursor->inputLen) {
      return FKDL_FALSE;
    }
    if (cursor->input[cursor->index + i] != str[i]) {
      return FKDL_FALSE;
    }
  }
  return FKDL_TRUE;
}

static fkdl_bool isNewline(uint32_t codePoint)
{
  switch (codePoint) {
    case 0x000D: /* Carriage return */
    case 0x000A: /* Line Feed */
    case 0x0085: /* Next Line */
    case 0x000C: /* Form Feed */
    case 0x2028: /* Line Separator */
    case 0x2029: /* Paragraph Separator */
      return FKDL_TRUE;
    default:
      return FKDL_FALSE;
  }
}

static fkdl_bool isPartOfIdentifier(uint32_t codePoint)
{
  if (codePoint <= 0x20) {
    return FKDL_FALSE;
  }
  if (codePoint >= 0x110000) {
    return FKDL_FALSE;
  }
  switch (codePoint) {
    case '\\':
    case '/':
    case '(':
    case ')':
    case '{':
    case '}':
    case '<':
    case '>':
    case '[':
    case ']':
    case ';':
    case '=':
    case ',':
    case '"':
      return FKDL_FALSE;
    default:
      break;
  }
  if (isWhitespace(codePoint)) {
    return FKDL_FALSE;
  }
  return FKDL_TRUE;
}

/* Check if the given string matches what's under the cursor, and */
/* is then followed by whitespace or a newline */
static fkdl_bool peekWord(
    const struct fkdl_cursor *cursor,
    const char *str)
{
  if (!cursor || !str) {
    return FKDL_FALSE;
  }
  size_t len = fkdl_strlen(str);
  size_t i;
  for (i = 0; i < len; ++i) {
    if (cursor->index + i >= cursor->inputLen) {
      return FKDL_FALSE;
    }
    if (cursor->input[cursor->index + i] != str[i]) {
      return FKDL_FALSE;
    }
  }
  struct fkdl_cursor end = *cursor;
  end.index += len;
  uint32_t codePoint;
  if (!peekChar(&end, &codePoint, NULL, NULL)) {
    /* EOF can count as end of a word */
    return FKDL_TRUE;
  }
  return !isPartOfIdentifier(codePoint);
}

/* Consume a single codepoint from the cursor */
static fkdl_bool consumeChar(
  struct fkdl_cursor *cursor,
  uint32_t *out,
  size_t *outSize,
  struct fkdl_error *error)
{
  if (!cursor) {
    return FKDL_FALSE;
  }
  uint32_t localOut;
  size_t localOutSize;
  if (!peekChar(cursor, &localOut, &localOutSize, error)) {
    return FKDL_FALSE;
  }
  cursor->index += localOutSize;
  cursor->column += localOutSize;
  cursor->prevWhitespace = isWhitespace(localOut);
  if (out) {
    *out = localOut;
  }
  if (outSize) {
    *outSize = localOutSize;
  }
  return FKDL_TRUE;
}

static fkdl_bool isHexDigit(uint32_t codePoint)
{
  return ((codePoint >= '0' && codePoint <= '9') ||
          (codePoint >= 'a' && codePoint <= 'f') ||
          (codePoint >= 'A' && codePoint <= 'F'));
}

/* Consumes 1-6 hex characters */
/* writes their value to out */
/* writes how many chars consumed to outSize */
static fkdl_bool consumeHexCode(
  struct fkdl_cursor *cursor,
  uint32_t *out,
  size_t *outSize,
  struct fkdl_error *error)
{
  if (!cursor) {
    return FKDL_FALSE;
  }
  uint32_t value = 0;
  size_t len = 0;
  while (1) {
    uint32_t codePoint;
    size_t step;
    if (!peekChar(cursor, &codePoint, &step, error)) {
      break;
    }

    if (!isHexDigit(codePoint)) {
      break;
    }

    /* Valid hex digit. Add it to the current value */
    value <<= 4;
    if (codePoint >= '0' && codePoint <= '9') {
      value += codePoint - '0';
    } else if (codePoint >= 'a' && codePoint <= 'f') {
      value += 10 + codePoint - 'a';
    } else {
      value += 10 + codePoint - 'A';
    }
    len += 1;
    if (!consumeChar(cursor, NULL, NULL, error)) {
      return FKDL_FALSE;
    }
  }

  if (len == 0) {
    errorFromCursor(error, cursor, "Hex code cannot be empty");
    return FKDL_FALSE;
  }

  if (out) {
    *out = value;
  }
  if (outSize) {
    *outSize = len;
  }
  return FKDL_TRUE;
}

static fkdl_bool isStartOfNumber(uint32_t codePoint)
{
  if (codePoint == '+' || codePoint == '-' || (codePoint >= '0' && codePoint <= '9')) {
    return FKDL_TRUE;
  }
  return FKDL_FALSE;
}

static fkdl_bool consumeNewline(
  struct fkdl_cursor *cursor,
  struct fkdl_error *error)
{
  if (!cursor) {
    return FKDL_FALSE;
  }
  struct fkdl_cursor start = *cursor;
  uint32_t codePoint;
  if (!consumeChar(cursor, &codePoint, NULL, error)) {
    return FKDL_FALSE;
  }
  if (!isNewline(codePoint)) {
    /* We didn't consume a newline. Abort. */
    errorFromCursor(error, &start, "Expected new line");
    return FKDL_FALSE;
  }
  if (codePoint == 0xD) {
    /* This was a CR, peek the next char and if it's an LF, consume */
    /* that too. CRLF counts as a single newline. */
    if (peekChar(cursor, &codePoint, NULL, NULL) && codePoint == 0xA) {
      consumeChar(cursor, NULL, NULL, NULL);
    }
  }
  cursor->column = 0;
  cursor->line += 1;
  return FKDL_TRUE;
}

/* consumes a // comment, the whitespace after it, and the newline after it */
static void consumeSingleComment(struct fkdl_cursor *cursor)
{
  if (!cursor) {
    return;
  }
  while (1) {
    uint32_t codePoint;
    if (!peekChar(cursor, &codePoint, NULL, NULL)) {
      return;
    }
    if (isNewline(codePoint))
      break;
    consumeChar(cursor, NULL, NULL, NULL);
  }
  consumeNewline(cursor, NULL);
}

/* Consumes whitespace, including mutli-line comments, doesn't read newline */
static fkdl_bool consumeWhitespace(
  struct fkdl_cursor *cursor,
  struct fkdl_error *error)
{
  if (!cursor) {
    return FKDL_FALSE;
  }
  size_t commentDepth = 0;
  struct fkdl_cursor start = *cursor;

  while (1) {
    if (atEof(cursor)) {
      /* Reached EOF */
      break;
    }

    uint32_t codePoint;
    size_t step;
    if (!peekChar(cursor, &codePoint, &step, error)) {
      return FKDL_FALSE;
    }

    if (isWhitespace(codePoint)) {
      consumeChar(cursor, NULL, NULL, NULL);
      continue;
    }

    if (peekStr(cursor, "/*")) {
      commentDepth += 1;
      consumeChar(cursor, NULL, NULL, NULL);
      consumeChar(cursor, NULL, NULL, NULL);
      continue;
    }

    if (peekStr(cursor, "*/")) {
      if (commentDepth > 0) {
        commentDepth -= 1;
        consumeChar(cursor, NULL, NULL, NULL);
        consumeChar(cursor, NULL, NULL, NULL);
      } else {
        errorFromCursor(error, cursor, "Invalid end of block comment. No block comment open.");
        return FKDL_FALSE;
      }
      continue;
    }

    /* If non whitespace but in comment, consume anyway */
    if (commentDepth > 0) {
      if (isNewline(codePoint)) {
        consumeNewline(cursor, error);
        continue;
      } else {
        consumeChar(cursor, NULL, NULL, error);
        continue;
      }
    }

    /* Non whitespace, and not in comment, stop consuming. */
    break;
  }

  if (start.index != cursor->index) {
    cursor->prevWhitespace = FKDL_TRUE;
  }
  return FKDL_TRUE;
}

/* Count how many '#'s can be found under the cursor,
 * used for finding the end of a raw string
 */
static size_t countHashes(const char *ptr, size_t maxLen) {
  if (!ptr) {
    return 0;
  }
  size_t num = 0;
  size_t i;
  for (i = 0; i < maxLen; ++i) {
    if (ptr[i] == '#') {
      num += 1;
    } else {
      break;
    }
  }
  return num;
}

/* Given a cursor pointing to the opening " of a regular string */
/* or r#" of a raw string, return the index of its start, */
/* how many bytes long it is (in the input). Also consume */
/* the string while doing this. */
static fkdl_bool measureAndConsumeString(
  struct fkdl_cursor *cursor,
  size_t *outStart,
  size_t *outLen,
  fkdl_bool *outRaw,
  struct fkdl_error *error)
{
  if (!cursor) {
    return FKDL_FALSE;
  }
  uint32_t codePoint;

  if (!consumeChar(cursor, &codePoint, NULL, error)) {
    return FKDL_FALSE;
  }

  fkdl_bool isRaw;

  if (codePoint == '"') {
    isRaw = FKDL_FALSE;
  } else if (codePoint == 'r') {
    isRaw = FKDL_TRUE;
  } else {
    errorFromCursor(error, cursor, "Invalid start of string");
    return FKDL_FALSE;
  }

  size_t hashLen = 0; /* How many #s are in the raw start/end quote */
  if (isRaw) {
    while (1) {
      if (!consumeChar(cursor, &codePoint, NULL, error)) {
        return FKDL_FALSE;
      }
      if (codePoint == '#') {
        hashLen += 1;
      } else if (codePoint == '"') {
        break;
      } else {
        errorFromCursor(error, cursor, "Expected # or \"");
        return FKDL_FALSE;
      }
    }
  }

  /* We're now at the first byte of the string's body */
  size_t start = cursor->index;

  size_t len = 0;
  fkdl_bool isEscape = FKDL_FALSE;
  while (1) {
    size_t step;
    if (!consumeChar(cursor, &codePoint, &step, error)) {
      return FKDL_FALSE;
    }
    if (!isRaw && codePoint == '\\' && !isEscape) {
      isEscape = FKDL_TRUE;
    } else if (codePoint == '"' && !isEscape) {
      if (!isRaw) {
        break;
      }
      /* Raw string, check for the right number of #s to close out */
      if (hashLen == countHashes(cursor->input + cursor->index,
        cursor->inputLen - cursor->index)) {
        break;
      }
    } else {
      isEscape = FKDL_FALSE;
    }
    len += step;
  }

  /* We've consumed the final ", just need to consume any hashes */
  while (hashLen > 0) {
    if (!consumeChar(cursor, NULL, NULL, error)) {
      return FKDL_FALSE;
    }
    hashLen -= 1;
  }

  if (outStart) {
    *outStart = start;
  }
  if (outLen) {
    *outLen = len;
  }
  if (outRaw) {
    *outRaw = isRaw;
  }
  return FKDL_TRUE;
}

/* Consumes both raw/quoted strings, populating token with their contents */
static fkdl_bool consumeString(
  struct fkdl_cursor *cursor,
  struct fkdl_token *token,
  struct fkdl_error *error)
{
  if (!cursor || !token) {
    return FKDL_FALSE;
  }
  struct fkdl_cursor startCursor = *cursor;

  size_t start;
  size_t len;
  fkdl_bool isRaw;
  if (!measureAndConsumeString(cursor, &start, &len, &isRaw, error)) {
    return FKDL_FALSE;
  }
  token->type = isRaw ? FKDL_TOK_RAW_STRING : FKDL_TOK_STRING;
  token->index = start;
  token->length = len;
  token->line = startCursor.line;
  token->column = startCursor.column;
  return FKDL_TRUE;
}

static fkdl_bool isPartOfNumber(uint32_t codePoint)
{
  if (codePoint >= '0' && codePoint <= '9') {
    return FKDL_TRUE;
  }
  if (codePoint >= 'a' && codePoint <= 'f') {
    return FKDL_TRUE;
  }
  if (codePoint >= 'A' && codePoint <= 'F') {
    return FKDL_TRUE;
  }
  switch (codePoint) {
    case '_':
    case 'x':
    case 'o':
    case '+':
    case '-':
    case '.':
      return FKDL_TRUE;
    default:
      return FKDL_FALSE;
  }
  return FKDL_FALSE;
}

/* Consumes a number from cursor, populating token with it */
static fkdl_bool consumeNumber(
  struct fkdl_cursor *cursor,
  struct fkdl_token *token,
  struct fkdl_error *error)
{
  if (!cursor || !token) {
    return FKDL_FALSE;
  }
  struct fkdl_cursor start = *cursor;
  uint32_t codePoint;
  while (1) {
    if (atEof(cursor)) {
      break;
    }
    if (!peekChar(cursor, &codePoint, NULL, error)) {
      errorFromCursor(error, &start, "Invalid number");
      return FKDL_FALSE;
    }
    if (!isPartOfNumber(codePoint)) {
      break;
    }
    consumeChar(cursor, NULL, NULL, NULL);
  }
  /* Reached the end of the number. */
  token->type = FKDL_TOK_NUMBER;
  token->index = start.index;
  token->length = cursor->index - start.index;
  token->line = start.line;
  token->column = start.column;
  return FKDL_TRUE;
}

/* True if the cursor points to the start of a raw string */
static fkdl_bool isRawStringStart(const struct fkdl_cursor *startCursor)
{
  if (!startCursor) {
    return FKDL_FALSE;
  }
  struct fkdl_cursor cursor = *startCursor;
  
  uint32_t codePoint;
  if (!consumeChar(&cursor, &codePoint, NULL, NULL)) {
    return FKDL_FALSE;
  }
  if (codePoint != 'r') {
    return FKDL_FALSE;
  }
  while (1) {
    if (!consumeChar(&cursor, &codePoint, NULL, NULL)) {
      break;
    }
    if (codePoint == '#') {
      continue;
    } else if (codePoint == '"') {
      return FKDL_TRUE;
    } else {
      break;
    }
  }
  return FKDL_FALSE;
}

/* True if the codepoint is a valid first codepoint in an identifier */
static fkdl_bool isStartOfIdentifier(uint32_t codePoint)
{
  if (codePoint >= '0' && codePoint <= '9') {
    return FKDL_FALSE;
  }
  return isPartOfIdentifier(codePoint);
}

/* Consumes an identifier from cursor */
static fkdl_bool consumeIdentifier(struct fkdl_cursor *cursor, struct fkdl_error *error)
{
  if (!cursor) {
    return FKDL_FALSE;
  }
  uint32_t codePoint;

  if (!consumeChar(cursor, &codePoint, NULL, error)) {
    return FKDL_FALSE;
  }

  if (!isStartOfIdentifier(codePoint)) {
    if (error) {
      error->index = cursor->index - 1;
      error->line = cursor->line;
      error->column = cursor->column - 1;
      error->message = "Invalid character at start of identifier";
    }
    return FKDL_FALSE;
  }

  while (1) {
    if (!peekChar(cursor, &codePoint, NULL, error)) {
      break;
    }
    if (!isPartOfIdentifier(codePoint)) {
      break;
    }
    if (!consumeChar(cursor, NULL, NULL, error)) {
      return FKDL_FALSE;
    }
  }

  return FKDL_TRUE;
}

/* consumes a type annotation from cursor */
static fkdl_bool consumeTypeAnnotation(struct fkdl_cursor *cursor, struct fkdl_error *error)
{
  if (!cursor) {
    return FKDL_FALSE;
  }
  uint32_t codePoint;
  if (!consumeChar(cursor, &codePoint, NULL, error)) {
    return FKDL_FALSE;
  }
  if (codePoint != '(') {
    if (error) {
      error->index = cursor->index - 1;
      error->line = cursor->line;
      error->column = cursor->column - 1;
      error->message = "Expected (";
    }
    return FKDL_FALSE;
  }

  if (!consumeIdentifier(cursor, error)) {
    return FKDL_FALSE;
  }

  if (!consumeChar(cursor, &codePoint, NULL, error)) {
    return FKDL_FALSE;
  }
  if (codePoint != ')') {
    if (error) {
      error->index = cursor->index - 1;
      error->line = cursor->line;
      error->column = cursor->column - 1;
      error->message = "Expected )";
    }
    return FKDL_FALSE;
  }

  return FKDL_TRUE;
}

static fkdl_bool isLineContinuation(uint32_t codePoint, struct fkdl_cursor *cursor)
{
  return codePoint == '\\' && cursor->prevWhitespace;
}

/* Reads input string, filling tokens with the tokens identified. */
/* Returns true on success */
static fkdl_bool tokenise(
  const char *input, /* utf-8 string. May contain null bytes. */
  size_t inputLen, /* length of utf-8 string in bytes. */
  struct fkdl_array *tokens, /* array of token structs */
  struct fkdl_error *error, /* Optional parse error info */
  struct fkdl_allocator *allocator) /* the allocator to use */
{
  struct fkdl_cursor cursor = {
    .input = input,
    .inputLen = inputLen,
    .index = 0,
    .line = 1,
    .column = 0,
    .prevWhitespace = FKDL_FALSE,
  };

  if (!input) {
    errorFromCursor(error, &cursor, "NULL input");
    return FKDL_FALSE;
  }

  while (1) {
    uint32_t codePoint;
    size_t step;

    if (atEof(&cursor)) {
      /* Reached EOF, throw down a node terminator */
      struct fkdl_token token = {
        .type = FKDL_TOK_NODE_TERMINATOR,
        .index = cursor.index,
        .length = 0,
        .line = cursor.line,
        .column = cursor.column,
      };
      if (!arrayPush(tokens, &token, allocator)) {
        return FKDL_FALSE;
      }
      break;
    }

    if (!peekChar(&cursor, &codePoint, &step, error)) {
      return FKDL_FALSE;
    }

    if (isWhitespace(codePoint) || peekStr(&cursor, "/*")) {
      if (!consumeWhitespace(&cursor, error)) {
        return FKDL_FALSE;
      }
      continue;
    }

    if (isNewline(codePoint)) {
      struct fkdl_cursor start = cursor;

      if (!consumeNewline(&cursor, error)) {
        return FKDL_FALSE;
      }

      struct fkdl_token token = {
        .type = FKDL_TOK_NODE_TERMINATOR,
        .index = start.index,
        .length = cursor.index - start.index,
        .line = start.line,
        .column = start.column,
      };
      if (!arrayPush(tokens, &token, allocator)) {
        return FKDL_FALSE;
      }
      continue;
    }

    /* Look for line continuation after whitespace */
    if (isLineContinuation(codePoint, &cursor)) {
      /* Consume '\\' then until end of whitespace, including block comments */
      if (!consumeChar(&cursor, NULL, NULL, error)) {
        return FKDL_FALSE;
      }
      if (!consumeWhitespace(&cursor, error)) {
        return FKDL_FALSE;
      }
      if (peekStr(&cursor, "//")) {
        consumeSingleComment(&cursor);
      } else if (!consumeNewline(&cursor, error)) {
        return FKDL_FALSE;
      }
      continue;
    }

    if (peekStr(&cursor, "//")) {
      consumeSingleComment(&cursor);
      continue;
    }

    if (peekStr(&cursor, "/-")) {
      struct fkdl_cursor start = cursor;
      consumeChar(&cursor, NULL, NULL, NULL);
      consumeChar(&cursor, NULL, NULL, NULL);

      struct fkdl_token token = {
        .type = FKDL_TOK_SLASHDASH,
        .index = start.index,
        .length = cursor.index - start.index,
        .line = start.line,
        .column = start.column,
      };
      if (!arrayPush(tokens, &token, allocator)) {
        return FKDL_FALSE;
      }
      continue;
    }

    if (codePoint == '(') {
      struct fkdl_cursor start = cursor;
      if (!consumeTypeAnnotation(&cursor, error)) {
        return FKDL_FALSE;
      }

      struct fkdl_token token = {
        .type = FKDL_TOK_TYPE_ANNOTATION,
        .index = start.index + 1, /* +1 to skip the '(' */
        .length = cursor.index - start.index - 2, /* same reason as above */
        .line = start.line,
        .column = start.column,
      };
      if (!arrayPush(tokens, &token, allocator)) {
        return FKDL_FALSE;
      }
      continue;
    }

    if (codePoint == '"' || isRawStringStart(&cursor)) {
      /* Look for escaped string */
      struct fkdl_token token;
      if (!consumeString(&cursor, &token, error)) {
        return FKDL_FALSE;
      }
      if (!arrayPush(tokens, &token, allocator)) {
        return FKDL_FALSE;
      }
      continue;
    }

    if (isStartOfNumber(codePoint)) {
      struct fkdl_token token;
      if (!consumeNumber(&cursor, &token, error)) {
        return FKDL_FALSE;
      }
      if (!arrayPush(tokens, &token, allocator)) {
        return FKDL_FALSE;
      }
      continue;
    }

    if (peekWord(&cursor, "null")) {
      struct fkdl_cursor start = cursor;
      size_t i;
      for (i = 0; i < 4; ++i) {
        consumeChar(&cursor, NULL, NULL, NULL);
      }
      struct fkdl_token token = {
        .type = FKDL_TOK_NULL,
        .index = start.index,
        .length = cursor.index - start.index,
        .line = start.line,
        .column = start.column,
      };
      if (!arrayPush(tokens, &token, allocator)) {
        return FKDL_FALSE;
      }
      continue;
    }

    if (peekWord(&cursor, "true")) {
      struct fkdl_cursor start = cursor;
      size_t i;
      for (i = 0; i < 4; ++i) {
        consumeChar(&cursor, NULL, NULL, NULL);
      }
      struct fkdl_token token = {
        .type = FKDL_TOK_BOOL,
        .index = start.index,
        .length = cursor.index - start.index,
        .line = start.line,
        .column = start.column,
      };
      if (!arrayPush(tokens, &token, allocator)) {
        return FKDL_FALSE;
      }
      continue;
    }

    if (peekWord(&cursor, "false")) {
      struct fkdl_cursor start = cursor;
      size_t i;
      for (i = 0; i < 5; ++i) {
        consumeChar(&cursor, NULL, NULL, NULL);
      }
      struct fkdl_token token = {
        .type = FKDL_TOK_BOOL,
        .index = start.index,
        .length = cursor.index - start.index,
        .line = start.line,
        .column = start.column,
      };
      if (!arrayPush(tokens, &token, allocator)) {
        return FKDL_FALSE;
      }
      continue;
    }

    if (isStartOfIdentifier(codePoint)) {
      struct fkdl_cursor start = cursor;
      if (!consumeIdentifier(&cursor, error)) {
        return FKDL_FALSE;
      }
      struct fkdl_token token = {
        .type = FKDL_TOK_IDENTIFIER,
        .index = start.index,
        .length = cursor.index - start.index,
        .line = start.line,
        .column = start.column,
      };
      if (!arrayPush(tokens, &token, allocator)) {
        return FKDL_FALSE;
      }
      continue;
    }

    if (codePoint == '{') {
      struct fkdl_cursor start = cursor;
      consumeChar(&cursor, NULL, NULL, NULL);
      struct fkdl_token token = {
        .type = FKDL_TOK_OPEN_BRACE,
        .index = start.index,
        .length = cursor.index - start.index,
        .line = start.line,
        .column = start.column,
      };
      if (!arrayPush(tokens, &token, allocator)) {
        return FKDL_FALSE;
      }
      continue;
    }

    if (codePoint == '}') {
      struct fkdl_cursor start = cursor;
      consumeChar(&cursor, NULL, NULL, NULL);
      struct fkdl_token termToken = {
        .type = FKDL_TOK_NODE_TERMINATOR,
        .index = start.index,
        .length = cursor.index - start.index,
        .line = start.line,
        .column = start.column,
      };
      if (!arrayPush(tokens, &termToken, allocator)) {
        return FKDL_FALSE;
      }
      struct fkdl_token braceToken = {
        .type = FKDL_TOK_CLOSE_BRACE,
        .index = start.index,
        .length = cursor.index - start.index,
        .line = start.line,
        .column = start.column,
      };
      if (!arrayPush(tokens, &braceToken, allocator)) {
        return FKDL_FALSE;
      }
      continue;
    }

    if (codePoint == ';') {
      struct fkdl_cursor start = cursor;
      consumeChar(&cursor, NULL, NULL, NULL);
      struct fkdl_token token = {
        .type = FKDL_TOK_NODE_TERMINATOR,
        .index = start.index,
        .length = cursor.index - start.index,
        .line = start.line,
        .column = start.column,
      };
      if (!arrayPush(tokens, &token, allocator)) {
        return FKDL_FALSE;
      }
      continue;
    }

    if (codePoint == '=' && !cursor.prevWhitespace) {
      struct fkdl_cursor start = cursor;
      consumeChar(&cursor, NULL, NULL, NULL);
      struct fkdl_token token = {
        .type = FKDL_TOK_PROPERTY_EQUALS,
        .index = start.index,
        .length = cursor.index - start.index,
        .line = start.line,
        .column = start.column,
      };
      if (!arrayPush(tokens, &token, allocator)) {
        return FKDL_FALSE;
      }
      continue;
    }

    errorFromCursor(error, &cursor, "Unexpected character");
    return FKDL_FALSE;
  }
  return FKDL_TRUE;
}

/* Creates a new fkdl_string populated with a cleaned up representation of the number
 * found in str. That means stripping underscores, etc.
 */
static struct fkdl_string copyNumber(struct fkdl_allocator *allocator, const char *str, size_t strLen)
{
  /* TODO: standardise exponent notation */
  /* TODO: drop leading zeroes */
  struct fkdl_string out = {0};
  if (!allocator || !allocator->malloc) {
    return out;
  }
  /* technically wasting any bytes used by underscores */
  out.data = allocator->malloc(allocator->context, strLen + 1);
  out.len = 0;
  if (out.data) {
    size_t i;
    for (i = 0; i < strLen; ++i) {
      if (str[i] == '_') {
        continue;
      }
      out.data[out.len++] = str[i];
    }
    /* Null terminate the strings as a favour to the end user, even if we */
    /* don't rely on this ourselves. */
    out.data[out.len] = 0;
  }
  return out;
}

/* Constructs a new fkdl_string with a copy of the string pointed to by str */
static struct fkdl_string copyString(struct fkdl_allocator *allocator, const char *str, size_t strLen)
{
  struct fkdl_string out = {0};
  if (!allocator || !allocator->malloc) {
    return out;
  }
  out.data = allocator->malloc(allocator->context, strLen + 1);
  if (out.data) {
    size_t i;
    for (i = 0; i < strLen; ++i) {
      out.data[i] = str[i];
    }
    /* Null terminate the strings as a favour to the end user, even if we */
    /* don't rely on this ourselves. */
    out.data[strLen] = 0;
  }
  out.len = strLen;
  return out;
}

/* Return how much space a string will need after unescaping is applied */
static fkdl_bool unescapedStringLength(const char *str, size_t strLen, size_t *newLen)
{
  if (!str) {
    return FKDL_FALSE;
  }
  size_t i;
  size_t len = 0;
  for (i = 0; i < strLen; ++i) {
    if (str[i] != '\\') {
      len += 1;
      continue;
    }

    /* On an escape code. */

    /* Reject incomplete codes */
    if (i + 1 >= strLen) {
      return FKDL_FALSE;
    }

    switch (str[i+1]) {
      case 'n':
      case 'r':
      case 't':
      case '\\':
      case '/':
      case '"':
      case 'b':
      case 'f':
        /* Just a regular escape. One byte required. */
        len += 1;
        i += 1;
        continue;
    }

    if (str[i+1] != 'u') {
      /* Unknown escape if it's not \\u{123456} */
      return FKDL_FALSE;
    }
    
    /* Handle a unicode code point escape code */

    /* Check for bare minimum size for u{1} */
    if (i+4 >= strLen) {
      return FKDL_FALSE;
    }

    if (str[i+2] != '{') {
      return FKDL_FALSE;
    }

    struct fkdl_cursor cursor = {
      .input = str + i + 3,
      .inputLen = strLen - i - 3,
      .line = 1,
    };
    uint32_t codePoint;
    size_t bytesRead;
    if (!consumeHexCode(&cursor, &codePoint, &bytesRead, NULL)) {
      return FKDL_FALSE;
    }
    len += utf8Width(codePoint);
    i += bytesRead + 3; /* hex code plus u{} */
  }
  if (newLen) {
    *newLen = len;
  }
  return FKDL_TRUE;
}

/* copy a string from in to out, applying any escape patterns found */
static fkdl_bool unescapeString(char *out, const char *in, size_t inLen)
{
  if (!in || !out) {
    return FKDL_FALSE;
  }
  size_t i;
  for (i = 0; i < inLen; ++i) {
    if (in[i] != '\\') {
      *out = in[i];
      out += 1;
      continue;
    }

    /* On an escape code. */
    /* Reject incomplete codes */
    if (i + 1 >= inLen) {
      return FKDL_FALSE;
    }

    switch (in[i+1]) {
      case 'n':
        *out = '\n';
        out += 1;
        i += 1;
        continue;
      case 'r':
        *out = '\r';
        out += 1;
        i += 1;
        continue;
      case 't':
        *out = '\t';
        out += 1;
        i += 1;
        continue;
      case '\\':
        *out = '\\';
        out += 1;
        i += 1;
        continue;
      case '/':
        *out = '/';
        out += 1;
        i += 1;
        continue;
      case '"':
        *out = '"';
        out += 1;
        i += 1;
        continue;
      case 'b':
        *out = '\b';
        out += 1;
        i += 1;
        continue;
      case 'f':
        *out = '\f';
        out += 1;
        i += 1;
        continue;
    }

    if (in[i+1] != 'u') {
      /* Unknown escape if it's not \\u{123456} */
      return FKDL_FALSE;
    }
    
    /* Handle a unicode code point escape code */

    /* Check for bare minimum size for u{1} */
    if (i+4 >= inLen) {
      return FKDL_FALSE;
    }

    if (in[i+2] != '{') {
      return FKDL_FALSE;
    }

    struct fkdl_cursor cursor = {
      .input = in + i + 3,
      .inputLen = inLen - i - 3,
      .line = 1,
    };
    uint32_t codePoint;
    size_t bytesRead;
    size_t bytesWritten;
    if (!consumeHexCode(&cursor, &codePoint, &bytesRead, NULL)) {
      return FKDL_FALSE;
    }
    if (!writeUtf8Char(out, 4, codePoint, &bytesWritten)) {
      return FKDL_FALSE;
    }
    out += bytesWritten;
    i += bytesRead + 3;
  }
  return FKDL_TRUE;
}

/* Construct a fkdl_string with the contents of the string pointed to by str, but with escape sequences applied */
static struct fkdl_string unescapeAndCopyString(struct fkdl_allocator *allocator, const char *str, size_t strLen)
{
  struct fkdl_string out = {0};
  if (!allocator || !allocator->malloc || !str) {
    return out;
  }
  size_t rawLen;
  if (!unescapedStringLength(str, strLen, &rawLen)) {
    return out;
  }
  out.data = allocator->malloc(allocator->context, rawLen + 1);
  if (!out.data) {
    return out;
  }
  if (unescapeString(out.data, str, strLen)) {
    /* Null terminate it as a kindness */
    out.len = rawLen;
    out.data[out.len] = 0;
  }
  return out;
}

static struct fkdl_string copyIdentifierOrStringToString(
  struct fkdl_allocator *allocator,
  const char *str,
  const struct fkdl_token *token,
  struct fkdl_error *error)
{
  struct fkdl_string string = {0};
  if (token->type != FKDL_TOK_STRING && token->type != FKDL_TOK_IDENTIFIER) {
    errorFromToken(error, token, "Unexpected string or identifier");
    return string;
  }
  return copyString(allocator, str + token->index, token->length);
}

static struct fkdl_value copyTokenToValue(
  struct fkdl_allocator *allocator,
  const char *str,
  const struct fkdl_token *typeToken,
  const struct fkdl_token *valueToken,
  struct fkdl_error *error)
{
  struct fkdl_value value = {0};
  if (!allocator || !str || !valueToken) {
    return value;
  }
  switch(valueToken->type) {
    case FKDL_TOK_NULL:
      value.type = FKDL_NULL;
      break;
    case FKDL_TOK_STRING:
    case FKDL_TOK_RAW_STRING:
      value.type = FKDL_STRING;
      break;
    case FKDL_TOK_NUMBER:
      value.type = FKDL_NUMBER;
      break;
    case FKDL_TOK_BOOL:
      value.type = FKDL_BOOL;
      break;
    default:
      return value;
  }

  if (typeToken) {
    value.annotation = copyString(allocator, str + typeToken->index, typeToken->length);
    if (!value.annotation.data) {
      errorFromToken(error, valueToken, "Out of memory");
      return value;
    }
  }

  if (value.type != FKDL_NULL) {
    if (valueToken->type == FKDL_TOK_STRING) {
      value.as_string = unescapeAndCopyString(allocator, str + valueToken->index, valueToken->length);
    } else if (valueToken->type == FKDL_TOK_NUMBER) {
      value.as_string = copyNumber(allocator, str + valueToken->index, valueToken->length);
    } else {
      value.as_string = copyString(allocator, str + valueToken->index, valueToken->length);
    }
    if (!value.as_string.data) {
      errorFromToken(error, valueToken, "Out of memory");
      return value;
    }
  }

  return value;
}

static fkdl_bool tokenTypeIsValue(enum fkdl_token_type type)
{
  switch(type) {
    case FKDL_TOK_NULL:
    case FKDL_TOK_BOOL:
    case FKDL_TOK_IDENTIFIER:
    case FKDL_TOK_STRING:
    case FKDL_TOK_RAW_STRING:
    case FKDL_TOK_NUMBER:
      return FKDL_TRUE;
    default:
      return FKDL_FALSE;
  }
}

void fkdl_cleanupString(struct fkdl_string *string, struct fkdl_allocator *allocator)
{
  if (!allocator) {
    allocator = &fkdl_defaultAllocator;
  }
  if (!string || !allocator->free) {
    return;
  }
  if (string->data) {
    allocator->free(allocator->context, string->data);
    string->data = NULL;
  }
  string->len = 0;
}

void fkdl_cleanupValue(struct fkdl_value *value, struct fkdl_allocator *allocator)
{
  if (!value) {
    return;
  }
  fkdl_cleanupString(&value->as_string, allocator);
  value->type = FKDL_INVALID_VALUE;
}

void fkdl_cleanupProperty(struct fkdl_property *property, struct fkdl_allocator *allocator)
{
  if (!property) {
    return;
  }
  fkdl_cleanupString(&property->key, allocator);
  fkdl_cleanupValue(&property->value, allocator);
}

void fkdl_cleanupNode(struct fkdl_node *node, struct fkdl_allocator *allocator)
{
  if (!allocator) {
    allocator = &fkdl_defaultAllocator;
  }
  if (!node || !allocator->free) {
    return;
  }

  if (node->arguments) {
    size_t i;
    for (i = 0; i < node->argumentsLen; ++i) {
      fkdl_cleanupValue(&node->arguments[i], allocator);
    }
    allocator->free(allocator->context, node->arguments);
    node->arguments = NULL;
    node->argumentsLen = 0;
    node->argumentsCap = 0;
  }

  if (node->properties) {
    size_t i;
    for (i = 0; i < node->propertiesLen; ++i) {
      fkdl_cleanupProperty(&node->properties[i], allocator);
    }
    allocator->free(allocator->context, node->properties);
    node->properties = NULL;
    node->propertiesLen = 0;
    node->propertiesCap = 0;
  }

  if (node->children) {
    size_t i;
    for (i = 0; i < node->childrenLen; ++i) {
      fkdl_cleanupNode(&node->children[i], allocator);
    }
    allocator->free(allocator->context, node->children);
    node->children = NULL;
    node->childrenLen = 0;
    node->childrenCap = 0;
  }
}

void fkdl_cleanupDocument(struct fkdl_document *document, struct fkdl_allocator *allocator)
{
  if (!allocator) {
    allocator = &fkdl_defaultAllocator;
  }
  if (!document || !allocator->free) {
    return;
  }

  if (document->nodes) {
    size_t i;
    for (i = 0; i < document->nodesLen; ++i) {
      fkdl_cleanupNode(&document->nodes[i], allocator);
    }
    allocator->free(allocator->context, document->nodes);
  }
}

fkdl_bool fkdl_nodeAddArgument(
  struct fkdl_node *node,
  const struct fkdl_value *value,
  struct fkdl_allocator *allocator)
{
  if (!allocator) {
    allocator = &fkdl_defaultAllocator;
  }
  if (!node || !value || !allocator->malloc || !allocator->realloc) {
    return FKDL_FALSE;
  }

  if (!node->arguments) {
    node->argumentsLen = 0;
    node->argumentsCap = 8;
    node->arguments = allocator->malloc(
      allocator->context,
      node->argumentsCap * sizeof *node->arguments);
    if (!node->arguments) {
      return FKDL_FALSE;
    }
  } else if (node->argumentsLen == node->argumentsCap) {
    size_t newCap = node->argumentsCap * 2;
    struct fkdl_value *newArgs = allocator->realloc(
      allocator->context,
      node->arguments,
      newCap * sizeof *node->arguments);
    if (!newArgs) {
      return FKDL_FALSE;
    }
    node->arguments = newArgs;
    node->argumentsCap = newCap;
  }
  node->arguments[node->argumentsLen++] = *value;
  return FKDL_TRUE;
}

fkdl_bool fkdl_nodeRemoveArgument(
  struct fkdl_node *node,
  size_t argumentIndex,
  struct fkdl_allocator *allocator)
{
  if (!allocator) {
    allocator = &fkdl_defaultAllocator;
  }
  if (!node || !allocator->free || !node->arguments) {
    return FKDL_FALSE;
  }
  if (argumentIndex >= node->argumentsLen) {
    return FKDL_FALSE;
  }

  fkdl_cleanupValue(&node->arguments[argumentIndex], allocator);

  size_t i;
  for (i = argumentIndex; i + 1 < node->argumentsLen; ++i) {
    node->arguments[i] = node->arguments[i+1];
  }
  node->argumentsLen -= 1;

  return FKDL_TRUE;
}

static struct fkdl_property *nodeAllocProperty(
  struct fkdl_node *node,
  struct fkdl_allocator *allocator)
{
  if (!node || !allocator || !allocator->malloc || !allocator->realloc) {
    return NULL;
  }

  if (!node->properties) {
    node->propertiesLen = 0;
    node->propertiesCap = 8;
    node->properties = allocator->malloc(
      allocator->context,
      node->propertiesCap * sizeof *node->properties);
    if (!node->properties) {
      return NULL;
    }
  } else if (node->propertiesLen == node->propertiesCap) {
    size_t newCap = node->propertiesCap * 2;
    struct fkdl_property *newProps = allocator->realloc(
      allocator->context,
      node->properties,
      newCap * sizeof *node->properties);
    if (!newProps) {
      return NULL;
    }
    node->properties = newProps;
    node->propertiesCap = newCap;
  }
  return &node->properties[node->propertiesLen++];
}

/* Takes the last property of a node and shifts it left until sorted */
static void nodeSortProperty(struct fkdl_node *node)
{
  if (!node) {
    return;
  }
  size_t i;
  for (i = node->propertiesLen; i >= 2; --i) {
    struct fkdl_property *rhs = &node->properties[i-1];
    struct fkdl_property *lhs = &node->properties[i-2];
    size_t len = lhs->key.len < rhs->key.len ? lhs->key.len : rhs->key.len;
    int cmp = fkdl_memcmp(lhs->key.data, rhs->key.data, len);
    if (cmp < 0) {
      break;
    }
    if (cmp == 0 && lhs->key.len <= rhs->key.len) {
      break;
    }
    struct fkdl_property tmp = *lhs;
    *lhs = *rhs;
    *rhs = tmp;
  }
}

fkdl_bool fkdl_nodeAddProperty(
  struct fkdl_node *node,
  const struct fkdl_property *property,
  struct fkdl_allocator *allocator)
{
  if (!allocator) {
    allocator = &fkdl_defaultAllocator;
  }
  if (!node || !property) {
    return FKDL_FALSE;
  }

  /* See if we're overwriting an existing property */
  fkdl_bool foundProperty = FKDL_FALSE;
  size_t propertyIndex;

  size_t i;
  for (i = 0; i < node->propertiesLen; ++i) {
    if (node->properties[i].key.len != property->key.len)
      continue;

    if (fkdl_memcmp(node->properties[i].key.data,
                property->key.data,
                property->key.len)) {
      continue;
    }

    foundProperty = FKDL_TRUE;
    propertyIndex = i;
    break;
  }

  if (!foundProperty) {
    /* Didn't find a property to overwrite, create one */
    struct fkdl_property *newProp = nodeAllocProperty(node, allocator);
    if (!newProp) {
      return FKDL_FALSE;
    }
    *newProp = *property;
    nodeSortProperty(node);
  } else {
    /* Overwrite the old property instead */
    struct fkdl_property *oldProp = &node->properties[propertyIndex];
    fkdl_cleanupProperty(oldProp, allocator);
    *oldProp = *property;
  }
  return FKDL_TRUE;
}

fkdl_bool fkdl_nodeRemoveProperty(
  struct fkdl_node *node,
  const struct fkdl_string *key,
  struct fkdl_allocator *allocator)
{
  if (!allocator) {
    allocator = &fkdl_defaultAllocator;
  }
  if (!node || !key) {
    return FKDL_FALSE;
  }

  if (!node->properties) {
    return FKDL_FALSE;
  }
  fkdl_bool foundProperty = FKDL_FALSE;
  size_t propertyIndex;

  size_t i;
  for (i = 0; i < node->propertiesLen; ++i) {
    if (!node->properties[i].key.len != key->len) {
      continue;
    }
    if (memcmp(node->properties[i].key.data,
               key->data,
               key->len)) {
      continue;
    }
    foundProperty = FKDL_TRUE;
    propertyIndex = i;
    break;
  }

  if (!foundProperty) {
    return FKDL_FALSE;
  }
  
  fkdl_cleanupProperty(&node->properties[propertyIndex], allocator);
  for (i = propertyIndex; i + 1 < node->propertiesLen; ++i) {
    node->properties[i] = node->properties[i+1];
  }
  node->propertiesLen -= 1;
  return FKDL_TRUE;
}

/* Constructs a property by copying the null-terminated key and value strings */
static fkdl_bool buildProperty(
  struct fkdl_property *property,
  struct fkdl_allocator *allocator,
  enum fkdl_value_type type,
  const char *key,
  const char *value)
{
  fkdl_cleanupProperty(property, allocator);
  struct fkdl_string keyStr = copyString(allocator, key, fkdl_strlen(key));
  if (!keyStr.data) {
    return FKDL_FALSE;
  }
  struct fkdl_string valueStr = {0};
  if (type != FKDL_BOOL) {
    valueStr = copyString(allocator, value, fkdl_strlen(value));
    if (!valueStr.data) {
      fkdl_cleanupProperty(property, allocator);
      return FKDL_FALSE;
    }
  }
  property->key = keyStr;
  property->value.type = type;
  property->value.as_string = valueStr;
  return FKDL_TRUE;
}

fkdl_bool fkdl_nodeSetStringProperty(
  struct fkdl_node *node,
  const char *key,
  const char *value,
  struct fkdl_allocator *allocator)
{
  if (!allocator) {
    allocator = &fkdl_defaultAllocator;
  }
  if (!node || !key || !value) {
    return FKDL_FALSE;
  }

  struct fkdl_property property;
  if (!buildProperty(&property, allocator, FKDL_STRING, key, value)) {
    return FKDL_FALSE;
  }

  if (!fkdl_nodeAddProperty(node, &property, allocator)) {
    fkdl_cleanupProperty(&property, allocator);
    return FKDL_FALSE;
  }

  return FKDL_TRUE;
}

fkdl_bool fkdl_nodeSetBoolProperty(
  struct fkdl_node *node,
  const char *key,
  fkdl_bool value,
  struct fkdl_allocator *allocator)
{
  if (!allocator) {
    allocator = &fkdl_defaultAllocator;
  }
  if (!node || !key) {
    return FKDL_FALSE;
  }

  const char *valueStr = value ? "true" : "false";
  struct fkdl_property property;
  if (!buildProperty(&property, allocator, FKDL_BOOL, key, valueStr)) {
    return FKDL_FALSE;
  }

  if (!fkdl_nodeAddProperty(node, &property, allocator)) {
    fkdl_cleanupProperty(&property, allocator);
    return FKDL_FALSE;
  }

  return FKDL_TRUE;
}

fkdl_bool fkdl_nodeSetNullProperty(
  struct fkdl_node *node,
  const char *key,
  struct fkdl_allocator *allocator)
{
  if (!allocator) {
    allocator = &fkdl_defaultAllocator;
  }
  if (!node || !key) {
    return FKDL_FALSE;
  }

  struct fkdl_property property;
  if (!buildProperty(&property, allocator, FKDL_NULL, key, NULL)) {
    return FKDL_FALSE;
  }

  if (!fkdl_nodeAddProperty(node, &property, allocator)) {
    fkdl_cleanupProperty(&property, allocator);
    return FKDL_FALSE;
  }

  return FKDL_TRUE;
}

fkdl_bool fkdl_nodeSetNumberProperty(
  struct fkdl_node *node,
  const char *key,
  const char *value,
  struct fkdl_allocator *allocator)
{
  if (!allocator) {
    allocator = &fkdl_defaultAllocator;
  }
  if (!node || !key || !value) {
    return FKDL_FALSE;
  }

  struct fkdl_property property;
  if (!buildProperty(&property, allocator, FKDL_NUMBER, key, value)) {
    return FKDL_FALSE;
  }

  if (!fkdl_nodeAddProperty(node, &property, allocator)) {
    fkdl_cleanupProperty(&property, allocator);
    return FKDL_FALSE;
  }

  return FKDL_TRUE;
}

fkdl_bool fkdl_nodeAddChild(
  struct fkdl_node *node,
  const struct fkdl_node *child,
  struct fkdl_allocator *allocator)
{
  if (!allocator) {
    allocator = &fkdl_defaultAllocator;
  }
  if (!node || !child || !allocator->malloc || !allocator->realloc) {
    return FKDL_FALSE;
  }

  if (!node->children) {
    node->childrenLen = 0;
    node->childrenCap = 8;
    node->children = allocator->malloc(
      allocator->context,
      node->childrenCap * sizeof *node->children);
    if (!node->children) {
      return FKDL_FALSE;
    }
  } else if (node->childrenLen == node->childrenCap) {
    size_t newCap = node->childrenCap * 2;
    struct fkdl_node *newChildren = allocator->realloc(
      allocator->context,
      node->children,
      newCap * sizeof *node->children);
    if (!newChildren) {
      return FKDL_FALSE;
    }
    node->children = newChildren;
    node->childrenCap = newCap;
  }
  node->children[node->childrenLen++] = *child;
  return FKDL_TRUE;
}

fkdl_bool fkdl_documentAddNode(
  struct fkdl_document *document,
  const struct fkdl_node *node,
  struct fkdl_allocator *allocator)
{
  if (!allocator) {
    allocator = &fkdl_defaultAllocator;
  }
  if (!document || !node || !allocator->malloc || !allocator->realloc) {
    return FKDL_FALSE;
  }

  if (!document->nodes) {
    document->nodesLen = 0;
    document->nodesCap = 8;
    document->nodes = allocator->malloc(
      allocator->context,
      document->nodesCap * sizeof *document->nodes);
    if (!document->nodes) {
      return FKDL_FALSE;
    }
  } else if (document->nodesLen == document->nodesCap) {
    size_t newCap = document->nodesCap * 2;
    struct fkdl_node *newNodes = allocator->realloc(
      allocator->context,
      document->nodes,
      newCap * sizeof *document->nodes);
    if (!newNodes) {
      return FKDL_FALSE;
    }
    document->nodes = newNodes;
    document->nodesCap = newCap;
  }
  document->nodes[document->nodesLen++] = *node;

  return FKDL_TRUE;
}

/* Applies a node terminator token, ending the current node and adding it as
 * a child of the relevant document or node.
 */
static fkdl_bool terminateNode(
  struct fkdl_document *document,
  const struct fkdl_token *token,
  struct fkdl_allocator *allocator,
  struct fkdl_node *curNode,
  struct fkdl_node *parentNode,
  struct fkdl_error *error,
  fkdl_bool *haveCurNode,
  fkdl_bool *skipNextNode)
{
  if (!*haveCurNode) {
    return FKDL_TRUE;
  }
  if (*skipNextNode) {
    fkdl_cleanupNode(curNode, allocator);
    *haveCurNode = FKDL_FALSE;
    *skipNextNode = FKDL_FALSE;
    return FKDL_TRUE;
  }

  if (parentNode) {
    if (!fkdl_nodeAddChild(parentNode, curNode, allocator)) {
      errorFromToken(error, token, "Out of memory");
      return FKDL_FALSE;
    }
  } else {
    if (!fkdl_documentAddNode(document, curNode, allocator)) {
      errorFromToken(error, token, "Out of memory");
      return FKDL_FALSE;
    }
  }
  fkdl_memset(curNode, 0, sizeof *curNode);
  *haveCurNode = FKDL_FALSE;
  return FKDL_TRUE;
}

/* Process a list of tokens, building a document out of them */
static fkdl_bool processTokens(
  const char *input,
  const struct fkdl_token *tokens,
  size_t tokensLen,
  struct fkdl_document *document, 
  struct fkdl_allocator *allocator,
  struct fkdl_error *error)
{
  fkdl_bool failed = FKDL_FALSE;

  /* The current node we're building, i.e. we're going through its properties */
  fkdl_bool haveCurNode = FKDL_FALSE;
  struct fkdl_node curNode = {0};

  /* Parent of the current node we're handling, i.e. we're going through its children */
  struct fkdl_node *parentNode = NULL;
  /* Stack of parental nodes excluding the current parentNode. For tracking */
  /* recursion through children. */
  struct fkdl_array parents = { .itemSize = sizeof(struct fkdl_node*) };

  /* slashdash support */
  fkdl_bool skipNextNode = FKDL_FALSE;
  fkdl_bool skipNextPropOrArg = FKDL_FALSE;

  /* Skipdepth tracks how far into a hierarchy we are when slashdash tells us to ignore */
  /* a node and its children. If >=1 then nothing matters other than { and }, which each */
  /* increment or decrement skipDepth respectively. */
  size_t skipDepth = 0;

  size_t i;
  for (i = 0; i < tokensLen; ++i) {

    /* If skipDepth is >0, we need to fast forward through everything except braces */
    /* until the current brace pair ends. */
    if (skipDepth > 0) {
      if (tokens[i].type == FKDL_TOK_OPEN_BRACE) {
        skipDepth += 1;
      } else if (tokens[i].type == FKDL_TOK_CLOSE_BRACE) {
        skipDepth -= 1;
      }
      if (skipDepth == 0 && skipNextPropOrArg) {
        /* we were handling a /-{ case, so the final } terminates the node */
        skipNextPropOrArg = FKDL_FALSE;
        if (!terminateNode(document, &tokens[i], allocator, &curNode, parentNode, error,
          &haveCurNode, &skipNextNode)) {
            failed = FKDL_TRUE;
            break;
        }
      }
      continue;
    }

    if (!haveCurNode &&
        (tokens[i].type == FKDL_TOK_IDENTIFIER ||
         tokens[i].type == FKDL_TOK_STRING ||
         tokens[i].type == FKDL_TOK_RAW_STRING ||
         tokens[i].type == FKDL_TOK_TYPE_ANNOTATION)) {
      /* Start of a new node */
      size_t typeIndex = i;
      size_t identifierIndex = i;
      fkdl_bool hasType = i + 1 < tokensLen && tokens[i].type == FKDL_TOK_TYPE_ANNOTATION &&
        (tokens[i+1].type == FKDL_TOK_IDENTIFIER 
        || tokens[i+1].type == FKDL_TOK_STRING
        || tokens[i+1].type == FKDL_TOK_RAW_STRING);
      haveCurNode = FKDL_TRUE;
      if (hasType) {
        identifierIndex = i + 1;
        if (tokens[identifierIndex].type == FKDL_TOK_STRING) {
          curNode.annotation = copyString(allocator, input + tokens[typeIndex].index, tokens[typeIndex].length);
        } else {
          curNode.annotation = unescapeAndCopyString(allocator, input + tokens[typeIndex].index, tokens[typeIndex].length);
        }
        if (!curNode.annotation.data) {
          errorFromToken(error, &tokens[i], "Out of memory");
          failed = FKDL_TRUE;
          break;
        }
      }
      curNode.identifier = copyString(
        allocator,
        input + tokens[identifierIndex].index,
        tokens[identifierIndex].length);
      if (!curNode.identifier.data) {
        errorFromToken(error, &tokens[i], "Out of memory");
        failed = FKDL_TRUE;
        break;
      }
      if (hasType) {
        /* skip over the identifier if we just consumed type + identifier */
        i += 1;
      }
      continue;
    }

    if (tokens[i].type == FKDL_TOK_NODE_TERMINATOR) {
      if (!terminateNode(document, &tokens[i], allocator, &curNode, parentNode, error,
        &haveCurNode, &skipNextNode)) {
          failed = FKDL_TRUE;
          break;
      }
      continue;
    }

    if (tokens[i].type == FKDL_TOK_OPEN_BRACE) {
      /* Current node is terminated, and becomes the parent node. */
      if (!haveCurNode) {
        errorFromToken(error, &tokens[i], "Unexpected {. No active node.");
        failed = FKDL_TRUE;
        break;
      }
      if (skipNextNode) {
        fkdl_cleanupNode(&curNode, allocator);
        haveCurNode = FKDL_FALSE;
        skipNextNode = FKDL_FALSE;
        /* Engage skipDepth so everything until the matching close brace */
        /* is also ignored. */
        skipDepth += 1;
        continue;
      }
      if (skipNextPropOrArg) {
        /* Engage skipDepth so everything until the matching close brace */
        /* is also ignored. */
        skipDepth += 1;
        continue;
      }

      if (parentNode) {
        /* Current node is added to the old parent, */
        /* then the pointer to the old parent is pushed into the parents stack, */
        /* then finally the last node in the old parent becomes our new parent */
        if (!fkdl_nodeAddChild(parentNode, &curNode, allocator)) {
          errorFromToken(error, &tokens[i], "Out of memory");
          failed = FKDL_TRUE;
          break;
        }
        if (!arrayPush(&parents, &parentNode, allocator)) {
          errorFromToken(error, &tokens[i], "Out of memory");
          failed = FKDL_TRUE;
          break;
        }
        parentNode = &parentNode->children[parentNode->childrenLen - 1];
      } else {
        /* No parent, so document just gains a new node, and parent becomes that. */
        if (!fkdl_documentAddNode(document, &curNode, allocator)) {
          errorFromToken(error, &tokens[i], "Out of memory");
          failed = FKDL_TRUE;
          break;
        }
        parentNode = &document->nodes[document->nodesLen - 1];
      }
      fkdl_memset(&curNode, 0, sizeof curNode);
      haveCurNode = FKDL_FALSE;
      continue;
    }

    if (tokens[i].type == FKDL_TOK_CLOSE_BRACE) {
      if (!parentNode) {
        errorFromToken(error, &tokens[i], "Unexpected }");
        failed = FKDL_TRUE;
        break;
      }

      if (parents.len > 0) {
        if (!arrayPop(&parents, &parentNode)) {
          errorFromToken(error, &tokens[i], "Internal error");
          failed = FKDL_TRUE;
          break;
        }
      } else {
        /* Nothing else on the stack, parent is now NULL */
        parentNode = NULL;
      }
      continue;
    }

    if (tokens[i].type == FKDL_TOK_SLASHDASH) {
      if (haveCurNode && !skipNextPropOrArg) {
        skipNextPropOrArg = FKDL_TRUE;
      } else if (!haveCurNode && !skipNextNode) {
        skipNextNode = FKDL_TRUE;
      } else {
        errorFromToken(error, &tokens[i], "Unexpected /-");
        failed = FKDL_TRUE;
        break;
      }
      continue;
    }

    if (haveCurNode && 
         (tokens[i].type == FKDL_TOK_TYPE_ANNOTATION ||
          tokenTypeIsValue(tokens[i].type))) {
      /* Already in a node, so this is a property or argument */
      fkdl_bool isProperty = i + 2 < tokensLen
        && tokens[i+1].type == FKDL_TOK_PROPERTY_EQUALS;

      if (!isProperty && tokens[i].type == FKDL_TOK_IDENTIFIER) {
        errorFromToken(error, &tokens[i], "Identifiers cannot be arguments");
        failed = FKDL_TRUE;
        break;
      }

      fkdl_bool propertyHasType = i + 3 < tokensLen
        && tokens[i+2].type == FKDL_TOK_TYPE_ANNOTATION;

      fkdl_bool argumentHasType = !isProperty && i + 1 < tokensLen
        && tokens[i].type == FKDL_TOK_TYPE_ANNOTATION
        && tokenTypeIsValue(tokens[i+1].type);

      if (skipNextPropOrArg) {
        skipNextPropOrArg = FKDL_FALSE;
        if (isProperty && propertyHasType) {
          i += 3; /* Skip '=', type annotation, and value. */
        } else if (isProperty) {
          i += 2; /* Skip '=' and value. */
        } else if (argumentHasType) {
          i += 1; /* Skip the type annotation */
        }
        continue;
      }

      if (isProperty) {
        struct fkdl_string key = copyIdentifierOrStringToString(
          allocator,
          input,
          &tokens[i],
          error);
        if (!key.data) {
          failed = FKDL_TRUE;
          break;
        }
        size_t valueIndex;
        size_t typeIndex;
        if (propertyHasType) {
          typeIndex = i + 2;
          valueIndex = i + 3;
        } else {
          typeIndex = 0;
          valueIndex = i + 2;
        }
        if (!tokenTypeIsValue(tokens[valueIndex].type)) {
          errorFromToken(error, &tokens[valueIndex], "Expected value after '='");
          fkdl_cleanupString(&key, allocator);
          failed = FKDL_TRUE;
          break;
        }
        struct fkdl_value value = copyTokenToValue(
          allocator,
          input,
          propertyHasType ? &tokens[typeIndex] : NULL,
          &tokens[valueIndex],
          error);
        if (value.type == FKDL_INVALID_VALUE) {
          fkdl_cleanupString(&key, allocator);
          failed = FKDL_TRUE;
          break;
        }

        struct fkdl_property property = {
          .key = key,
          .value = value,
        };
        if (!fkdl_nodeAddProperty(&curNode, &property, allocator)) {
          errorFromToken(error, &tokens[i], "Out of memory");
          fkdl_cleanupProperty(&property, allocator);
          failed = FKDL_TRUE;
          break;
        }
        if (propertyHasType) {
          i += 1; /* Skip the type annotation */
        }
        i += 2; /* Skip '=' and value too. */
      } else {
        /* Just a plain argument */
        size_t typeIndex;
        size_t valueIndex;
        if (argumentHasType) {
          typeIndex = i;
          valueIndex = i + 1;
        } else {
          valueIndex = i;
        }
        struct fkdl_value value = copyTokenToValue(
          allocator,
          input,
          argumentHasType ? &tokens[typeIndex] : NULL,
          &tokens[valueIndex],
          error);
        if (!fkdl_nodeAddArgument(&curNode, &value, allocator)) {
          fkdl_cleanupValue(&value, allocator);
          fkdl_cleanupNode(&curNode, allocator);
          errorFromToken(error, &tokens[i], "Out of memory");
          failed = FKDL_TRUE;
          break;
        }
        if (argumentHasType) {
          i += 1; /* skip the value after the type annotation */
        }
      }
      continue;
    }

    /* Didn't handle this token. */
    switch (tokens[i].type) {
      case FKDL_TOK_NULL:
        errorFromToken(error, &tokens[i], "Unexpected null");
        break;
      case FKDL_TOK_BOOL:
        errorFromToken(error, &tokens[i], "Unexpected bool");
        break;
      case FKDL_TOK_IDENTIFIER:
        errorFromToken(error, &tokens[i], "Unexpected identifier");
        break;
      case FKDL_TOK_STRING:
        errorFromToken(error, &tokens[i], "Unexpected string");
        break;
      case FKDL_TOK_RAW_STRING:
        errorFromToken(error, &tokens[i], "Unexpected raw string");
        break;
      case FKDL_TOK_NUMBER:
        errorFromToken(error, &tokens[i], "Unexpected number");
        break;
      case FKDL_TOK_TYPE_ANNOTATION:
        errorFromToken(error, &tokens[i], "Unexpected type annotation");
        break;
      case FKDL_TOK_PROPERTY_EQUALS:
        errorFromToken(error, &tokens[i], "Unexpected =");
        break;
      case FKDL_TOK_NODE_TERMINATOR:
        errorFromToken(error, &tokens[i], "Unexpected end of node");
        break;
      case FKDL_TOK_OPEN_BRACE:
        errorFromToken(error, &tokens[i], "Unexpected {");
        break;
      case FKDL_TOK_CLOSE_BRACE:
        errorFromToken(error, &tokens[i], "Unexpected }");
        break;
      case FKDL_TOK_SLASHDASH:
        errorFromToken(error, &tokens[i], "Unexpected /-");
        break;
    }

    failed = FKDL_TRUE;
    break;
  }
  
  fkdl_cleanupNode(&curNode, allocator);
  arrayCleanup(&parents, allocator);
  return !failed;
}

fkdl_bool fkdl_readDocument(
  const char *input,
  size_t inputLen,
  struct fkdl_document *output,
  struct fkdl_allocator *allocator,
  struct fkdl_error *error
)
{
  if (!allocator) {
    allocator = &fkdl_defaultAllocator;
  }

  if (inputLen == 0) {
    inputLen = fkdl_strlen(input);
  }

  struct fkdl_array tokens = {
    .itemSize = sizeof(struct fkdl_token),
  };
  struct fkdl_error tokenError;

  if (!tokenise(
        input,
        inputLen,
        &tokens,
        error,
        allocator)) {
    return FKDL_FALSE;
  }

  struct fkdl_document document = {0};
  if (!processTokens(input, (void*)tokens.data, tokens.len, &document, allocator, error)) {
    return FKDL_FALSE;
  }

  *output = document;
  return FKDL_TRUE;
}

/* True if it's a codepoint that should be escaped for a quoted string */
static fkdl_bool isEscaped(uint32_t codePoint)
{
  switch (codePoint) {
    case 0x0008: /* Backspace */
    case 0x0009: /* Tab */
    case 0x000C: /* Form Feed */
    case 0x000D: /* Carriage return */
    case 0x000A: /* Line Feed */
    case 0x0022: /* Quotation Mark */
    case 0x002F: /* Forwardslash */
    case 0x005C: /* Backslash */
    case 0x0085: /* Next Line */
    case 0x2028: /* Line Separator */
    case 0x2029: /* Paragraph Separator */
      return FKDL_TRUE;
  }
  return FKDL_FALSE;
}

/* True if the string would need quotes to be used as an identifier */
static fkdl_bool identifierNeedsQuotes(const struct fkdl_string *str)
{
  if (!str) {
    return FKDL_FALSE;
  }
  if (str->len == 0) {
    return FKDL_TRUE;
  }
  size_t i;
  for (i = 0; i < str->len;) {
    uint32_t codePoint;
    size_t step;
    if (!readUtf8Char(str->data + i, str->len - i, &codePoint, &step)) {
      return FKDL_FALSE;
    }
    if (isWhitespace(codePoint) || isNewline(codePoint) || isEscaped(codePoint)) {
      return FKDL_TRUE;
    }
    i += step;
  }
  return FKDL_FALSE;
}

/* Writes an identifier to cursor, complete with quotes if needed */
static fkdl_bool writeIdentifier(
  struct fkdl_writeCursor *cursor,
  const struct fkdl_string *identifier)
{
  fkdl_bool needQuotes = identifierNeedsQuotes(identifier);
  if (needQuotes) {
    if (!writeChar(cursor, '"')) {
      return FKDL_FALSE;
    }
  }
  writeEscapedString(cursor, identifier);
  if (needQuotes) {
    if (!writeChar(cursor, '"')) {
      return FKDL_FALSE;
    }
  }
  return FKDL_TRUE;
}

/* Writes a value to cursor */
static fkdl_bool writeValue(
  struct fkdl_writeCursor *cursor,
  const struct fkdl_value *value)
{
  if (!cursor || !value) {
    return FKDL_FALSE;
  }

  if (value->annotation.len > 0) {
    if (!writeChar(cursor, '(')) {
      return FKDL_FALSE;
    }
    if (!writeString(cursor, &value->annotation)) {
      return FKDL_FALSE;
    }
    if (!writeChar(cursor, ')')) {
      return FKDL_FALSE;
    }
  }

  if (value->type == FKDL_NULL) {
    return writeCString(cursor, "null");
  }
  if (value->type == FKDL_BOOL) {
    return writeString(cursor, &value->as_string);
  }
  if (value->type == FKDL_NUMBER) {
    return writeString(cursor, &value->as_string);
  }
  if (value->type == FKDL_STRING) {
    if (!writeChar(cursor, '"')) {
      return FKDL_FALSE;
    }
    if (!writeEscapedString(cursor, &value->as_string)) {
      return FKDL_FALSE;
    }
    if (!writeChar(cursor, '"')) {
      return FKDL_FALSE;
    }
    return FKDL_TRUE;
  }
  return FKDL_FALSE;
}

/* Writes a property to cursor */
static fkdl_bool writeProperty(
  struct fkdl_writeCursor *cursor,
  const struct fkdl_property *property)
{
  if (!cursor || !property) {
    return FKDL_FALSE;
  }
  if (!writeIdentifier(cursor, &property->key)) {
    return FKDL_FALSE;
  }
  if (!writeChar(cursor, '=')) {
    return FKDL_FALSE;
  }
  if (!writeValue(cursor, &property->value)) {
    return FKDL_FALSE;
  }
  return FKDL_TRUE;
}

/* Writes a node to cursor with the relevant indentation */
fkdl_bool writeNode(
  struct fkdl_writeCursor *cursor,
  size_t indent, /* how many spaces of indentation to add */
  const struct fkdl_node *node)
{
  size_t i;

  if (!node || !cursor) {
    return FKDL_FALSE;
  }

  if (!cursorMemset(cursor, ' ', indent)) {
    return FKDL_FALSE;
  }

  /* write type annotation */
  if (node->annotation.len > 0) {
    if (!writeChar(cursor, '(')) {
      return FKDL_FALSE;
    }
    if (!writeString(cursor, &node->annotation)) {
      return FKDL_FALSE;
    }
    if (!writeChar(cursor, ')')) {
      return FKDL_FALSE;
    }
  }

  /* write identifier */
  if (!writeIdentifier(cursor, &node->identifier)) {
    return FKDL_FALSE;
  }

  /* write args */
  for (i = 0; i < node->argumentsLen; ++i) {
    if (!writeChar(cursor, ' ')) {
      return FKDL_FALSE;
    }
    if (!writeValue(cursor, &node->arguments[i])) {
      return FKDL_FALSE;
    }
  }

  /* write properties */
  for (i = 0; i < node->propertiesLen; ++i) {
    if (!writeChar(cursor, ' ')) {
      return FKDL_FALSE;
    }
    if (!writeProperty(cursor, &node->properties[i])) {
      return FKDL_FALSE;
    }
  }

  /* recurse through { children } */
  if (node->childrenLen > 0) {
    if (!writeCString(cursor, " {\n")) {
      return FKDL_FALSE;
    }
    for (i = 0; i < node->childrenLen; ++i) {
      if (!writeNode(cursor, indent + 4, &node->children[i])) {
        return FKDL_FALSE;
      }
    }
    if (!cursorMemset(cursor, ' ', indent)) {
      return FKDL_FALSE;
    }
    if (!writeChar(cursor, '}')) {
      return FKDL_FALSE;
    }
  }

  if (!writeChar(cursor, '\n')) {
    return FKDL_FALSE;
  }
  return FKDL_TRUE;
}

fkdl_bool fkdl_writeDocument(
  char *output,
  size_t outputLen,
  size_t *bytesWritten,
  const struct fkdl_document *document)
{
  /* NULL output doesn't write anything, just determine space required */
  if (!document) {
    return FKDL_FALSE;
  }

  struct fkdl_writeCursor cursor = {
    .output = output,
    .outputLen = outputLen,
    .index = 0,
  };

  size_t i;
  for (i = 0; i < document->nodesLen; ++i) {
    if (!writeNode(&cursor, 0, &document->nodes[i])) {
      return FKDL_FALSE;
    }
  }

  /* null terminate */
  writeChar(&cursor, '\0');

  if (bytesWritten) {
    *bytesWritten = cursor.index;
  }
  return FKDL_TRUE;
}

A  => freekdl.h +433 -0
@@ 1,433 @@
/*
Copyright 2021 Harry Jeffery

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

#ifndef FREEKDL_H
#define FREEKDL_H

#include <stddef.h>
#include <stdint.h>

typedef int fkdl_bool;
#define FKDL_TRUE 1
#define FKDL_FALSE 0

enum fkdl_value_type
{
  FKDL_INVALID_VALUE,
  FKDL_NULL,
  FKDL_STRING,
  FKDL_NUMBER,
  FKDL_BOOL,
};

/** A UTF-8 string. Intended as a means for passing strings, not a working area
 * for building and manipulating strings in place.
*/
struct fkdl_string
{
  /** Raw UTF-8 bytes of the string. May contain NULL bytes, but also always NULL terminated. */
  char *data;
  /** Length of the string in bytes, excluding the NULL terminator added by FreeKDL. */
  size_t len;
};

/** A KDL value */
struct fkdl_value
{
  /** The type of value held */
  enum fkdl_value_type type;
  /** If non-empty, the value's type annotation excluding the wrapping parenthesise. */
  struct fkdl_string annotation;
  /** The raw value represented as a string. 
   * 
   * For example, "null", "true", "false". Escaped strings are expanded during
   * parsing, so "a\nb" contains a newline, not a '\' or 'n'. Numbers have their
   * underscores removed during parsing, but do not change between bases, i.e. hex
   * values will remain hex, as will binary, decimal, and octal.
   */
  struct fkdl_string as_string;
};

struct fkdl_property
{
  struct fkdl_string key;
  struct fkdl_value value;
};

/** In memory representation of a KDL node. Must have an identifier
 * but may have any number of arguments, properties, or children.
 * 
 * May be manipulated directly so long as all dynamic arrays are
 * kept in good order. "Good order" meaning that:
 * 1) empty arrays are either a NULL pointer, or a valid pointer to a buffer.
 * 2) Length variables are always up to date.
 * 3) Capacity variables are valid for the buffer, or 0 if the buffer is NULL.
 * 
 * Several helper functions are provided to simplify manipulation of the node
 * struct.
 */
struct fkdl_node
{
  /** The name or "identifier" of the node */
  struct fkdl_string identifier;
  /** If non-empty, the type annotation of the node, excluding
   * the enclosing parenthesise.
   */
  struct fkdl_string annotation;

  /** Dynamic array of this node's arguments. */
  struct fkdl_value *arguments;
  /** Length of the arguments array in items, not bytes. */
  size_t argumentsLen;
  /** Capacity of the arguments array in items, not bytes. */
  size_t argumentsCap;

  /** Dynamic array of this node's properties. Kept in
   * alphabetical key order.
   */
  struct fkdl_property *properties;
  /** Length of the properties array in items, not bytes. */
  size_t propertiesLen;
  /** Capacity of the properties array in items, not bytes. */
  size_t propertiesCap;

  /** Dynamic array of this node's children. */
  struct fkdl_node *children;
  /** Length of the children array in items, not bytes. */
  size_t childrenLen;
  /** Capacity of the children array in items, not bytes. */
  size_t childrenCap;
};

/** A KDL document, containing zero or more nodes. */
struct fkdl_document
{
  struct fkdl_node *nodes;
  size_t nodesLen;
  size_t nodesCap;
};

/** The interface for specifying how memory is managed by FreeKDL.
 * All functions must be implemented and behave like the
 * C standard library equivalents.
 * 
 * It is acceptable for malloc or realloc to fail, FreeKDL will
 * handle these failures gracefully, returning an error.
 *
 * The context pointer is to allow custom state to be passed into
 * the allocator. This could be used for setting and tracking
 * memory limits for FreeKDL, or for using a custom memory manager.
 */
struct fkdl_allocator
{
  void *(*malloc)(void *context, size_t len);
  void *(*realloc)(void *context, void *ptr, size_t len);
  void (*free)(void *context, void *ptr);
  void *context;
};

/** Representation of an error returned by FreeKDL. */
struct fkdl_error
{
  /** The position in the input where FreeKDL ran into a problem, in bytes. */
  size_t index;
  /** The line of input corresponding to index, where 1 is the first line. */
  size_t line;
  /** The position in the current line, corresponding to index. */
  size_t column;
  /** Further information about the error.*/
  const char *message;
};

/** Reads a UTF-8 string and populates an fkdl_document.
 * 
 * If inputLen is zero, strlen() is used to determine the length of input.
 * 
 * @param input The UTF-8 string to read. May contain NULL bytes.
 * @param inputLen The length of input in bytes, excluding any NULL terminator.
 * @param output A pointer to the fkdl_document to be populated.
 * @param allocator A pointer to the allocator to use. If NULL, defaults to malloc/free.
 * @param error A pointer to the fkdl_error to be populated on failure. May be NULL.
 * @return FKDL_TRUE on success, FKDL_FALSE on error.
 */
fkdl_bool fkdl_readDocument(
  const char *input,
  size_t inputLen,
  struct fkdl_document *output,
  struct fkdl_allocator *allocator,
  struct fkdl_error *error
);

/** Serialises a fkdl_document to a UTF-8 string in the buffer pointed to by output.
 * 
 * If output is passed as NULL, no output will be written but the number of bytes
 * that would have been written is still written to bytesWritten. This behaviour
 * can be used to determine how much space to allocate for the generated text.
 * 
 * @param output A pointer to the buffer to populate with UTF-8 text. May be NULL.
 * @param outputLen The length of output in bytes.
 * @param bytesWritten If non-NULL, the pointee is set to the number of bytes generated.
 * @param document The fkdl_document to serialise.
 * @return FKDL_TRUE on success, FKDL_FALSE on error.
 */
fkdl_bool fkdl_writeDocument(
  char *output,
  size_t outputLen,
  size_t *bytesWritten,
  const struct fkdl_document *document
);

/** Adds a value to a node as an additional argument.
 * 
 * A deep copy is NOT performed, so any buffers owned by
 * value must be considered owned by the node upon the
 * successful completion of this function.
 * 
 * @param node The node to add the value to.
 * @param value The value to add to the node.
 * @param allocator The allocator interface to use, may be NULL.
 * @return FKDL_TRUE on success, FKDL_FALSE on error.
 */
fkdl_bool fkdl_nodeAddArgument(
  struct fkdl_node *node,
  const struct fkdl_value *value,
  struct fkdl_allocator *allocator);

/** Removes a given argument from node using a 0-based index.
 * 
 * @param node The node to remove the argument from.
 * @param index The value to add to the node.
 * @param allocator The allocator interface to use, may be NULL.
 */
fkdl_bool fkdl_nodeRemoveArgument(
  struct fkdl_node *node,
  size_t argumentIndex,
  struct fkdl_allocator *allocator);

/** Adds a property to a node.
 * 
 * A deep copy is NOT performed, so any buffers owned by
 * the property must be considered owned by the node upon the
 * successful completion of this function.
 * 
 * If a property exists with the same key, it will be replaced
 * by the new one.
 * 
 * @param node The node to add the property to.
 * @param property The property to add to the node.
 * @param allocator The allocator interface to use, may be NULL.
 * @return FKDL_TRUE on success, FKDL_FALSE on error.
 */
fkdl_bool fkdl_nodeAddProperty(
  struct fkdl_node *node,
  const struct fkdl_property *property,
  struct fkdl_allocator *allocator);

/** Removes a property from a node.
 * 
 * @param node The node to add the property to.
 * @param key A UTF-8 string matching the key of the property to remove.
 * @param allocator The allocator interface to use, may be NULL.
 * @return FKDL_TRUE on success, FKDL_FALSE on error.
 */
fkdl_bool fkdl_nodeRemoveProperty(
  struct fkdl_node *node,
  const struct fkdl_string *key,
  struct fkdl_allocator *allocator);

/** Set a string property on a node.
 * 
 * A convenience function for adding a key-value property
 * pair to a node. Both key and value are assumed to be
 * NULL-terminated UTF-8 strings, which will be copied into
 * the new property.
 * 
 * If a property exists with the same key, it will be replaced
 * by the new one.
 * 
 * @param node The node to add the property to.
 * @param key A NULL-terminated UTF-8 C-string containing the key.
 * @param value A NULL-terminated UTF-8 C-string containing the value.
 * @param allocator The allocator interface to use, may be NULL.
 * @return FKDL_TRUE on success, FKDL_FALSE on error.
 */
fkdl_bool fkdl_nodeSetStringProperty(
  struct fkdl_node *node,
  const char *key,
  const char *value,
  struct fkdl_allocator *allocator);

/** Set a boolean property on a node.
 * 
 * A convenience function for adding a key-value property
 * pair to a node. Key is assumed to be a NULL-terminated
 * UTF-8 string, which will be copied into the new property.
 * 
 * If a property exists with the same key, it will be replaced
 * by the new one.
 * 
 * @param node The node to add the property to.
 * @param key A NULL-terminated UTF-8 C-string containing the key.
 * @param value A FKDL_BOOL containing the value to set.
 * @param allocator The allocator interface to use, may be NULL.
 * @return FKDL_TRUE on success, FKDL_FALSE on error.
 */
fkdl_bool fkdl_nodeSetBoolProperty(
  struct fkdl_node *node,
  const char *key,
  fkdl_bool value,
  struct fkdl_allocator *allocator);

/** Set a null property on a node.
 * 
 * A convenience function for adding a key-value property
 * pair to a node. Key is assumed to be a NULL-terminated
 * UTF-8 string, which will be copied into the new property.
 * 
 * If a property exists with the same key, it will be replaced
 * by the new one.
 * 
 * @param node The node to add the property to.
 * @param key A NULL-terminated UTF-8 C-string containing the key.
 * @param allocator The allocator interface to use, may be NULL.
 * @return FKDL_TRUE on success, FKDL_FALSE on error.
 */
fkdl_bool fkdl_nodeSetNullProperty(
  struct fkdl_node *node,
  const char *key,
  struct fkdl_allocator *allocator);

/** Set a numeric property on a node.
 * 
 * A convenience function for adding a key-value property
 * pair to a node. Both key and value are assumed to be
 * NULL-terminated UTF-8 strings, which will be copied into
 * the new property.
 * 
 * If a property exists with the same key, it will be replaced
 * by the new one.
 * 
 * Any valid number in a KDL document may be passed.
 * 
 * @param node The node to add the property to.
 * @param key A NULL-terminated UTF-8 C-string containing the key.
 * @param value A NULL-terminated UTF-8 C-string containing the value.
 * @param allocator The allocator interface to use, may be NULL.
 * @return FKDL_TRUE on success, FKDL_FALSE on error.
 */
fkdl_bool fkdl_nodeSetNumberProperty(
  struct fkdl_node *node,
  const char *key,
  const char *value,
  struct fkdl_allocator *allocator);

/** Add one node to another node as a child.
 * 
 * child is NOT deep copied, so all buffers owned by child must be
 * considered owned by node after this function completes successfully.
 * 
 * @param node The node to gain a new child.
 * @param child The node to gain a parent.
 * @param allocator The allocator interface to use, may be NULL.
 * @return FKDL_TRUE on success, FKDL_FALSE on error.
 */
fkdl_bool fkdl_nodeAddChild(
  struct fkdl_node *node,
  const struct fkdl_node *child,
  struct fkdl_allocator *allocator);

/** Cleanup a string.
 * 
 * Cleans up the buffers owned by a string. The string pointer itself
 * is not freed, which allows it to be a local stack variable.
 * Therefore, if you called malloc() to create the memory for string
 * itself, you're still responsible for calling free().
 * 
 * @param string The string to cleanup.
 * @param allocator The allocator interface to use, may be NULL.
 */
void fkdl_cleanupString(struct fkdl_string *string, struct fkdl_allocator *allocator);

/** Cleanup a value.
 * 
 * Cleans up the buffers owned by a value. The value pointer itself
 * is not freed, which allows it to be a local stack variable.
 * Therefore, if you called malloc() to create the memory for value
 * itself, you're still responsible for calling free().
 * 
 * @param value The value to cleanup.
 * @param allocator The allocator interface to use, may be NULL.
 */
void fkdl_cleanupValue(struct fkdl_value *value, struct fkdl_allocator *allocator);

/** Cleanup a property.
 * 
 * Cleans up the buffers owned by a property. The property pointer itself
 * is not freed, which allows it to be a local stack variable.
 * Therefore, if you called malloc() to create the memory for property
 * itself, you're still responsible for calling free().
 * 
 * @param property The property to cleanup.
 * @param allocator The allocator interface to use, may be NULL.
 */
void fkdl_cleanupProperty(struct fkdl_property *property, struct fkdl_allocator *allocator);

/** Cleanup a node.
 * 
 * Cleans up the buffers owned by a node. The node pointer itself
 * is not freed, which allows it to be a local stack variable.
 * Therefore, if you called malloc() to create the memory for node
 * itself, you're still responsible for calling free().
 * 
 * @param node The node to cleanup.
 * @param allocator The allocator interface to use, may be NULL.
 */
void fkdl_cleanupNode(struct fkdl_node *node, struct fkdl_allocator *allocator);

/** Cleanup a document.
 * 
 * Cleans up the buffers owned by a document. The document pointer itself
 * is not freed, which allows it to be a local stack variable.
 * Therefore, if you called malloc() to create the memory for document
 * itself, you're still responsible for calling free().
 * 
 * @param document The document to cleanup.
 * @param allocator The allocator interface to use, may be NULL.
 */
void fkdl_cleanupDocument(struct fkdl_document *document, struct fkdl_allocator *allocator);

/** Add a node to a document.
 * 
 * node is NOT deep copied, so all buffers owned by node must be
 * considered owned by the document after this function completes
 * successfully.
 * 
 * @param document The document to gain a new child.
 * @param node The node to be added.
 * @param allocator The allocator interface to use, may be NULL.
 * @return FKDL_TRUE on success, FKDL_FALSE on error.
 */
fkdl_bool fkdl_documentAddNode(
  struct fkdl_document *document,
  const struct fkdl_node *node,
  struct fkdl_allocator *allocator);

#endif

A  => test.c +1892 -0
@@ 1,1892 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

/* Including the c file to give access to its static functions for testing. */
#include "freekdl.c"

int checkUtf8Read(const char *str, size_t strLen, uint32_t expectedValue, size_t expectedSize)
{
  uint32_t value;
  size_t size;
  if (!readUtf8Char(str, strLen, &value, &size)) {
    printf("checkUtf8 for \"%s\" returned false.\n", str);
    return 0;
  }
  if (value != expectedValue) {
    printf("checkUtf8 for \"%s\" failed. value=0x%x != expected=0x%x\n", str, value, expectedValue);
    return 0;
  }
  if (size != expectedSize) {
    printf("checkUtf8 for \"%s\" failed. size=0x%zx != expected=0x%zx\n", str, size, expectedSize);
    return 0;
  }
  return 1;
}

int checkUtf8Write(uint32_t input, const char *expected, size_t expectedSize)
{
  char buf[4];
  size_t size;
  if (!writeUtf8Char(buf, sizeof buf, input, &size)) {
    printf("checkUtf8 for 0x%x returned false.\n", input);
    return 0;
  }
  if (memcmp(buf, expected, min(expectedSize, size))) {
    printf("checkUtf8 for 0x%x failed. value=0x%hhx 0x%hhx 0x%hhx 0x%hhx != expected=0x%hhx 0x%hhx 0x%hhx 0x%hhx\n",
      input,
      size >= 1 ? buf[0] : 0,
      size >= 2 ? buf[1] : 0,
      size >= 3 ? buf[2] : 0,
      size >= 4 ? buf[3] : 0,
      expectedSize >= 1 ? expected[0] : 0,
      expectedSize >= 2 ? expected[1] : 0,
      expectedSize >= 3 ? expected[2] : 0,
      expectedSize >= 4 ? expected[3] : 0);
    return 0;
  }
  if (size != expectedSize) {
    printf("checkUtf8 for 0x%x failed. size=0x%zx != expected=0x%zx\n", input, size, expectedSize);
    return 0;
  }
  return 1;
}

int testUtf8(void)
{
  return 
    checkUtf8Read("hello", 5, 'h', 1) &&
    checkUtf8Read("\0hello", 6, '\0', 1) &&
    checkUtf8Read("β test", 7, 0x03B2, 2) &&
    checkUtf8Read("Җ test", 7, 0x0496, 2) &&
    checkUtf8Read("\xef\xbb\xbf test", 8, 0xFEFF, 3) &&
    checkUtf8Read("🙈", 4, 0x1F648, 4);
    checkUtf8Write('h', "h", 1) &&
    checkUtf8Write('\0', "\0", 1) &&
    checkUtf8Write(0x03B2, "β", 2) &&
    checkUtf8Write(0x0496, "Җ", 2) &&
    checkUtf8Write(0xFEFF, "\xef\xbb\xbe", 3) &&
    checkUtf8Write(0x1F648, "🙈", 4);
}

int testPeekCharSuccess(struct fkdl_cursor *cursor, uint32_t expected, size_t expectedSize)
{
  struct fkdl_cursor before = *cursor;
  uint32_t out = 0;
  size_t outSize = 0;
  struct fkdl_error error = {0};
  if (!peekChar(cursor, &out, &outSize, &error)) {
    printf("peekChar for \"%s\" returned false.\n", cursor->input);
    return 0;
  }
  struct fkdl_cursor after = *cursor;
  if (memcmp(&before, &after, sizeof before)) {
    printf("peekChar changed the state of the cursor\n");
    return 0;
  }
  if (out != expected) {
    printf("peekChar for \"%s\" output '%c' not '%c'\n", cursor->input, out, expected);
    return 0;
  }
  if (outSize != expectedSize) {
    printf("peekChar for \"%s\" output size %zd not %zd\n", cursor->input, outSize, expectedSize);
    return 0;
  }
  return 1;
}

int testPeekChar_Start(void)
{
  const char test[] = "this is a test cursor";
  struct fkdl_cursor cursor = {
    .input = test,
    .inputLen = sizeof test,
    .line = 1,
    .prevWhitespace = FKDL_FALSE,
  };

  return testPeekCharSuccess(&cursor, 't', 1);
}

int testPeekChar_Middle(void)
{
  const char test[] = "this is a test cursor";
  struct fkdl_cursor cursor = {
    .input = test,
    .inputLen = sizeof test,
    .index = 8,
    .line = 1,
    .column = 8,
    .prevWhitespace = FKDL_TRUE,
  };

  return testPeekCharSuccess(&cursor, 'a', 1);
}

int testPeekChar_End(void)
{
  const char test[] = "this is a test cursor";
  struct fkdl_cursor cursor = {
    .input = test,
    .inputLen = sizeof test,
    .index = sizeof test,
    .line = 1,
    .column = sizeof test,
    .prevWhitespace = FKDL_TRUE,
  };

  uint32_t out = 0;
  size_t outSize = 0;
  struct fkdl_error error = {0};
  if (peekChar(&cursor, &out, &outSize, &error)) {
    printf("peekChar for end returned true.\n");
    return 0;
  }
  if (error.column != 22) {
    printf("peekChar for for error set error.column to %zd not %zd\n", error.column, (size_t)22);
    return 0;
  }
  if (error.line != 1) {
    printf("peekChar for for error set error.column to %zd not %zd\n", error.line, (size_t)1);
    return 0;
  }
  if (error.index != 22) {
    printf("peekChar for for error set error.index to %zd not %zd\n", error.index, (size_t)22);
    return 0;
  }
  return 1;
}

int testPeekChar(void)
{
  return
    testPeekChar_Start() &&
    testPeekChar_Middle() &&
    testPeekChar_End();
}

int testPeekWordCase(struct fkdl_cursor *cursor, const char *word, int shouldBeTrue)
{
  struct fkdl_cursor before = *cursor;
  uint32_t out = 0;
  size_t outSize = 0;
  struct fkdl_error error = {0};
  if (shouldBeTrue != peekWord(cursor, word)) {
    printf("For \"%s\" and \"%s\" peekWord returned %d not %d.\n",
      cursor->input + cursor->index,
      word,
      !shouldBeTrue, shouldBeTrue);
    return 0;
  }
  struct fkdl_cursor after = *cursor;
  if (memcmp(&before, &after, sizeof before)) {
    printf("peekWord changed the state of the cursor\n");
    return 0;
  }
  return 1;
}

int testPeekStrCase(struct fkdl_cursor *cursor, const char *str, int shouldBeTrue)
{
  struct fkdl_cursor before = *cursor;
  uint32_t out = 0;
  size_t outSize = 0;
  struct fkdl_error error = {0};
  if (shouldBeTrue != peekStr(cursor, str)) {
    printf("For \"%s\" and \"%s\" peekStr returned %d not %d.\n",
      cursor->input + cursor->index,
      str,
      !shouldBeTrue, shouldBeTrue);
    return 0;
  }
  struct fkdl_cursor after = *cursor;
  if (memcmp(&before, &after, sizeof before)) {
    printf("peekStr changed the state of the cursor\n");
    return 0;
  }
  return 1;
}

int testPeekStr(void)
{
  const char test[] = "this is a test cursor";
  struct fkdl_cursor cursor = {
    .input = test,
    .inputLen = sizeof test,
    .index = 0,
    .line = 1,
    .column = 0,
    .prevWhitespace = FKDL_TRUE,
  };

  if (!testPeekStrCase(&cursor, "this", 1)) {
    return 0;
  }
  if (!testPeekStrCase(&cursor, "fail", 0)) {
    return 0;
  }
  cursor.index = 4;
  if (!testPeekStrCase(&cursor, "is", 0)) {
    return 0;
  }
  cursor.index = 10;
  if (!testPeekStrCase(&cursor, "test", 1)) {
    return 0;
  }
  cursor.index = 15;
  if (!testPeekStrCase(&cursor, "cursor", 1)) {
    return 0;
  }
  cursor.index = 17;
  if (!testPeekStrCase(&cursor, "anything", 0)) {
    return 0;
  }
  cursor.index = sizeof test;
  if (!testPeekStrCase(&cursor, "anything", 0)) {
    return 0;
  }
  return 1;
}

int testPeekWord(void)
{
  const char test[] = "this is\ta test cursor\n";
  struct fkdl_cursor cursor = {
    .input = test,
    .inputLen = sizeof test,
    .index = 0,
    .line = 1,
    .column = 0,
    .prevWhitespace = FKDL_TRUE,
  };

  if (!testPeekWordCase(&cursor, "this", 1)) {
    return 0;
  }
  cursor.index = 4;
  if (!testPeekWordCase(&cursor, "is", 0)) {
    return 0;
  }
  cursor.index = 5;
  if (!testPeekWordCase(&cursor, "is", 1)) {
    return 0;
  }
  cursor.index = 10;
  if (!testPeekWordCase(&cursor, "test", 1)) {
    return 0;
  }
  cursor.index = 15;
  if (!testPeekWordCase(&cursor, "cursor", 1)) {
    return 0;
  }
  cursor.index = 17;
  if (!testPeekWordCase(&cursor, "anything", 0)) {
    return 0;
  }
  cursor.index = sizeof test;
  if (!testPeekWordCase(&cursor, "anything", 0)) {
    return 0;
  }
  return 1;
}

int testConsumeCharSuccess(
  struct fkdl_cursor *cursor,
  uint32_t expected, 
  size_t expectedSize,
  int prevWhitespace)
{
  struct fkdl_cursor before = *cursor;
  uint32_t out = 0;
  size_t outSize = 0;
  struct fkdl_error error = {0};
  if (!consumeChar(cursor, &out, &outSize, &error)) {
    printf("consumeChar for \"%s\" returned false.\n", cursor->input);
    return 0;
  }
  struct fkdl_cursor after = *cursor;
  if (out != expected) {
    printf("consumeChar for \"%s\" output '%c' not '%c'\n", cursor->input, out, expected);
    return 0;
  }
  if (outSize != expectedSize) {
    printf("consumeChar for \"%s\" output size %zd not %zd\n", cursor->input, outSize, expectedSize);
    return 0;
  }
  if (after.index != before.index + expectedSize) {
    printf("consumeChar changed index to %zd but expected %zd\n", after.index, before.index + expectedSize);
    return 0;
  }
  if (after.column != before.column + expectedSize) {
    printf("consumeChar changed index to %zd but expected %zd\n", after.index, before.index + expectedSize);
    return 0;
  }
  if (after.prevWhitespace != prevWhitespace) {
    printf("consumeChar set prevWhitespace to %d but expected %d\n", after.prevWhitespace, prevWhitespace);
    return 0;
  }
  return 1;
}

int testConsumeChar(void)
{
  const char test[] = "this Җ\0ok\n";
  struct fkdl_cursor cursor = {
    .input = test,
    .inputLen = sizeof test,
    .line = 1,
    .prevWhitespace = FKDL_FALSE,
  };

  return
    testConsumeCharSuccess(&cursor, 't', 1, 0) &&
    testConsumeCharSuccess(&cursor, 'h', 1, 0) &&
    testConsumeCharSuccess(&cursor, 'i', 1, 0) &&
    testConsumeCharSuccess(&cursor, 's', 1, 0) &&
    testConsumeCharSuccess(&cursor, ' ', 1, 1) &&
    testConsumeCharSuccess(&cursor, 0x496, 2, 0) &&
    testConsumeCharSuccess(&cursor, 0, 1, 0) &&
    testConsumeCharSuccess(&cursor, 'o', 1, 0) &&
    testConsumeCharSuccess(&cursor, 'k', 1, 0) &&
    testConsumeCharSuccess(&cursor, '\n', 1, 0);
}

int testConsumeNewline(void)
{
  const char test[] =
    "this\n"
    "is\r\n"
    "a test\r"
    "of newline\n";
  struct fkdl_cursor cursor = {
    .input = test,
    .inputLen = sizeof test,
    .index = 0,
    .line = 1,
    .column = 0,
    .prevWhitespace = FKDL_TRUE,
  };
  return
    testConsumeCharSuccess(&cursor, 't', 1, 0) &&
    testConsumeCharSuccess(&cursor, 'h', 1, 0) &&
    testConsumeCharSuccess(&cursor, 'i', 1, 0) &&
    testConsumeCharSuccess(&cursor, 's', 1, 0) &&
    consumeNewline(&cursor, NULL) &&
    cursor.line == 2 && cursor.column == 0 &&
    cursor.prevWhitespace == 0 && 1 &&
    testConsumeCharSuccess(&cursor, 'i', 1, 0) &&
    testConsumeCharSuccess(&cursor, 's', 1, 0) &&
    cursor.line == 2 && cursor.column == 2 &&
    consumeNewline(&cursor, NULL) &&
    cursor.line == 3 && cursor.column == 0 &&
    cursor.prevWhitespace == 0 &&
    testConsumeCharSuccess(&cursor, 'a', 1, 0);
}

int testConsumeSingleComment(void)
{
  const char test[] =
    "// this is a single comment\n"
    "a//this is another\r\n"
    "b";
  struct fkdl_cursor cursor = {
    .input = test,
    .inputLen = sizeof test,
    .index = 0,
    .line = 1,
    .column = 0,
    .prevWhitespace = FKDL_TRUE,
  };
  consumeSingleComment(&cursor);
  if (!testConsumeCharSuccess(&cursor, 'a', 1, 0)) {
    return 0;
  }
  consumeSingleComment(&cursor);
  if (!testConsumeCharSuccess(&cursor, 'b', 1, 0)) {
    return 0;
  }
  return 1;
}

int testConsumeWhitspaceSuccess(struct fkdl_cursor *cursor)
{
  if (!consumeWhitespace(cursor, NULL)) {
    printf("consumeWhitespace for \"%s\" returned false\n", cursor->input + cursor->index);
    return 0;
  }
  return 1;
}

int testConsumeWhitespace(void)
{
  const char test[] =
    "a  b\t \tc /* comment */d"
    "/* comment /* nesting */ works */e"
    "    \nf";
  struct fkdl_cursor cursor = {
    .input = test,
    .inputLen = sizeof test,
    .index = 0,
    .line = 1,
    .column = 0,
    .prevWhitespace = FKDL_TRUE,
  };
  return
    testConsumeCharSuccess(&cursor, 'a', 1, 0) &&
    testConsumeWhitspaceSuccess(&cursor) &&
    testConsumeCharSuccess(&cursor, 'b', 1, 0) &&
    testConsumeWhitspaceSuccess(&cursor) &&
    testConsumeCharSuccess(&cursor, 'c', 1, 0) &&
    testConsumeWhitspaceSuccess(&cursor) &&
    testConsumeCharSuccess(&cursor, 'd', 1, 0) &&
    testConsumeWhitspaceSuccess(&cursor) &&
    testConsumeCharSuccess(&cursor, 'e', 1, 0) &&
    testConsumeWhitspaceSuccess(&cursor) &&
    testConsumeCharSuccess(&cursor, '\n', 1, 0) &&
    testConsumeCharSuccess(&cursor, 'f', 1, 0);
}

int testUnescapingLen(const char *str, size_t expected)
{
  size_t out;
  if (!unescapedStringLength(str, strlen(str), &out)) {
    printf("failed to measure unescaped length of '%s'\n", str);
    return 0;
  }
  if (out != expected) {
    printf("unescapedStringLength for '%s' should be %zd but is %zd\n", str, expected, out);
    return 0;
  }
  return 1;
}

int testUnescapingMatch(const char *before, const char *after)
{
  char buf[256];
  if (!unescapeString(buf, before, strlen(before))) {
    printf("Failed to unescape '%s'\n", before);
    return 0;
  }
  if (memcmp(buf, after, strlen(after))) {
    printf("Unescaping of '%s' should be '%s' but is '%s'\n", before, after, buf);
    return 0;
  }
  return 1;
}

int testUnescaping(void)
{
  return
    testUnescapingLen("simple", 6) &&
    testUnescapingLen("two\\nlines", 3 + 1 + 5) &&
    testUnescapingLen("one\\ttab", 3 + 1 + 3) &&
    testUnescapingLen("one\\\\slash", 3 + 1 + 5) &&
    testUnescapingLen("\\n", 1) &&
    testUnescapingLen("\\u{20}", 1) &&
    testUnescapingLen("\\u{3B2}", 2) &&
    testUnescapingLen("\\u{feff}", 3) &&
    testUnescapingLen("\\u{1f648}", 4) &&
    testUnescapingMatch("simple", "simple") &&
    testUnescapingMatch("two\\nlines", "two\nlines") &&
    testUnescapingMatch("one\\ttab", "one\ttab") &&
    testUnescapingMatch("one\\\\slash", "one\\slash") &&
    testUnescapingMatch("\\n", "\n") &&
    testUnescapingMatch("\\u{20}", " ") &&
    testUnescapingMatch("\\u{3B2}", "β") &&
    testUnescapingMatch("\\u{feff}", "\xef\xbb\xbf") &&
    testUnescapingMatch("\\u{1f648}", "🙈");
}

const char *tokenType(enum fkdl_token_type type)
{
  switch(type) {
    case FKDL_TOK_NULL: return "NULL";
    case FKDL_TOK_BOOL: return "BOOL";
    case FKDL_TOK_IDENTIFIER: return "IDENTIFIER";
    case FKDL_TOK_STRING: return "STRING";
    case FKDL_TOK_RAW_STRING: return "RAW_STRING";
    case FKDL_TOK_NUMBER: return "NUMBER";
    case FKDL_TOK_TYPE_ANNOTATION: return "TYPE_ANNOTATION";
    case FKDL_TOK_PROPERTY_EQUALS: return "PROPERTY_EQUALS";
    case FKDL_TOK_NODE_TERMINATOR: return "NODE_TERMINATOR";
    case FKDL_TOK_OPEN_BRACE: return "OPEN_BRACE";
    case FKDL_TOK_CLOSE_BRACE: return "CLOSE_BRACE";
    case FKDL_TOK_SLASHDASH: return "SLASHDASH";
    default: return "UNKNOWN";
  }
}

struct token_expectation {
  enum fkdl_token_type type;
  const char *text;
};

int testTokeniseSuccess(
  const char *input,
  size_t inputLen,
  const struct token_expectation *expectations,
  size_t expectationsLen)
{
  struct fkdl_array tokens = {
    .itemSize = sizeof(struct fkdl_token),
  };
  struct fkdl_error error;

  if (!tokenise(input, inputLen, &tokens, &error, &fkdl_defaultAllocator)) {
    printf("fkdl_tokenise failed for: \"%s\"\n", input);
    printf("error: %s index=%zd line=%zd column=%zd\n", error.message, error.index, error.line, error.column);
    return 0;
  }

  if (expectationsLen != tokens.len) {
    printf("fkdl_tokenise wrote %zd tokens. Expected %zd for \"%s\".\n",
      tokens.len,
      expectationsLen,
      input);
    return 0;
  }

  struct fkdl_token *outTokens = tokens.data;

  size_t i, j;
  for (i = 0; i < tokens.len; ++i) {
    if (outTokens[i].type != expectations[i].type) {
      printf("token[%zd].type = %s but expected %s\n",
        i,
        tokenType(outTokens[i].type),
        tokenType(expectations[i].type));
      return 0;
    }
    if (expectations[i].text) {
      if (memcmp(input + outTokens[i].index, expectations[i].text, strlen(expectations[i].text))) {
        printf("token text was \"");
        for (j = 0; j < outTokens[i].length; ++j) {
          putchar(input[outTokens[i].index + j]);
        }
        printf("\" expected \"%s\"\n", expectations[i].text);
        return 0;
      }
    }
  }

  return 1;
}

int testConsumeHexCodeCase(const char *str, uint32_t expectedValue, int shouldReturnTrue)
{
  struct fkdl_cursor cursor = {
    .input = str,
    .inputLen = strlen(str),
    .line = 1,
  };

  uint32_t out = 0;
  size_t outSize = 0;
  struct fkdl_error error = {0};
  if (shouldReturnTrue != consumeHexCode(&cursor, &out, &outSize, &error)) {
    printf("For \"%s\" consumeHexCode returned %d not %d.\n",
      str, !shouldReturnTrue, shouldReturnTrue);
    return 0;
  }
  if (expectedValue != out) {
    printf("For \"%s\" consumeHexCode read 0x%x not 0x%x.\n",
      str, out, expectedValue);
    return 0;
  }
  return 1;
}

static int testConsumeHexCode(void)
{
  return testConsumeHexCodeCase("00cc00", 0x00cc00, 1) &&
         testConsumeHexCodeCase("10", 0x10, 1) &&
         testConsumeHexCodeCase("F", 0xf, 1) &&
         testConsumeHexCodeCase("310}", 0x310, 1) &&
         testConsumeHexCodeCase("garbage", 0x0, 0) &&
         testConsumeHexCodeCase("cafefe!", 0xcafefe, 1);
}

int testIsRawStringStartCase(const char *str, int shouldReturnTrue)
{
  struct fkdl_cursor cursor = {
    .input = str,
    .inputLen = strlen(str),
    .line = 1,
  };
  if (shouldReturnTrue != isRawStringStart(&cursor)) {
    printf("For \"%s\" isRawStringStart returned %d not %d.\n",
      str, !shouldReturnTrue, shouldReturnTrue);
    return 0;
  }
  return 1;
}

int testIsRawStringStart(void)
{
  return testIsRawStringStartCase("no", 0) &&
         testIsRawStringStartCase("'no'", 0) &&
         testIsRawStringStartCase("\"no'", 0) &&
         testIsRawStringStartCase("\\\"no'", 0) &&
         testIsRawStringStartCase("r\"yes", 1) &&
         testIsRawStringStartCase("r#\"yes", 1) &&
         testIsRawStringStartCase("r######\"yes", 1) &&
         testIsRawStringStartCase("r##########\"yes", 1) &&
         testIsRawStringStartCase("r##########no", 0) &&
         testIsRawStringStartCase("r##########'no", 0);
}


int testCountHashesCase(const char *str, size_t expected)
{
  size_t count = countHashes(str, strlen(str));
  if (count != expected) {
    printf("For \"%s\" countHashes returned %zd not %zd.\n",
      str, count, expected);
    return 0;
  }
  return 1;
}


int testCountHashes(void)
{
  return testCountHashesCase("foobar", 0) &&
         testCountHashesCase("#one", 1) &&
         testCountHashesCase("##two", 2) &&
         testCountHashesCase("###", 3) &&
         testCountHashesCase("####four####", 4) &&
         testCountHashesCase("\"", 0) &&
         testCountHashesCase("\\#", 0);
}

int testConsumeStringCase(const char *str, size_t offset, const char *body)
{
  struct fkdl_cursor cursor = {
    .input = str,
    .inputLen = strlen(str),
    .line = 1,
  };

  struct fkdl_token token;
  struct fkdl_error error;

  if (!consumeString(&cursor, &token, &error)) {
    printf("consumeString failed for \"%s\" because: %s\n", str, error.message);
    return 0;
  }

  if (token.index != offset) {
    printf("consumeString length expected offset %zd but actually %zd\n", offset, token.index);
    return 0;
  }

  if (memcmp(body, str + token.index, strlen(body))) {
    printf("consumeString expected \"%s\" but pointed to: \"%s\"\n", body, str + token.index);
    return 0;
  }

  if (token.length != strlen(body)) {
    printf("consumeString length expected len %zd but actually %zd\n", strlen(body), token.length);
    return 0;
  }

  return 1;
}

int testConsumeString(void)
{
  return
    testConsumeStringCase("\"simple\" other stuff", 1, "simple") &&
    testConsumeStringCase("\"simple_\\\"escaped\" other stuff", 1, "simple_\\\"escaped") &&
    testConsumeStringCase("r\"simple raw\" other", 2, "simple raw") &&
    testConsumeStringCase("r###\" raw with hashes\"### other", 5, " raw with hashes") &&
    testConsumeStringCase("r###\" raw \" hashes\"### other", 5, " raw \" hashes") &&
    testConsumeStringCase("r#####\" raw ###\" hashes\"##### other", 7, " raw ###\" hashes") &&
    testConsumeStringCase("r#####\"eof string\"#####", 7, "eof string");
}

int testBasicTokenise(void)
{
  const char keywords[] = "true false null { } ;";
  const struct token_expectation keywordTokens[] = {
    { .type = FKDL_TOK_BOOL, .text = "true" },
    { .type = FKDL_TOK_BOOL, .text = "false" },
    { .type = FKDL_TOK_NULL, .text = "null"},
    { .type = FKDL_TOK_OPEN_BRACE, .text = "{"},
    { .type = FKDL_TOK_NODE_TERMINATOR, .text = "}"},
    { .type = FKDL_TOK_CLOSE_BRACE, .text = "}"},
    { .type = FKDL_TOK_NODE_TERMINATOR, .text = ";"},
    { .type = FKDL_TOK_NODE_TERMINATOR}
  };

  if (!testTokeniseSuccess(keywords, sizeof keywords - 1,
    keywordTokens, sizeof keywordTokens / sizeof keywordTokens[0])) {
    return 0;
  }

  const char numbers[] = "1.5 0x10 0o7 0b10101 -5 +2 -2.5 1.5e17 2.5E-3";
  const struct token_expectation numberTokens[] = {
    { .type = FKDL_TOK_NUMBER, .text = "1.5"},
    { .type = FKDL_TOK_NUMBER, .text = "0x10"},
    { .type = FKDL_TOK_NUMBER, .text = "0o7"},
    { .type = FKDL_TOK_NUMBER, .text = "0b10101"},
    { .type = FKDL_TOK_NUMBER, .text = "-5"},
    { .type = FKDL_TOK_NUMBER, .text = "+2"},
    { .type = FKDL_TOK_NUMBER, .text = "-2.5"},
    { .type = FKDL_TOK_NUMBER, .text = "1.5e17"},
    { .type = FKDL_TOK_NUMBER, .text = "2.5E-3"},
    { .type = FKDL_TOK_NODE_TERMINATOR}
  };

  if (!testTokeniseSuccess(numbers, sizeof numbers - 1,
    numberTokens, sizeof numberTokens / sizeof numberTokens[0])) {
    return 0;
  }

  const char strings[] = "\"abc\" \"abc \\\" escaped\" r\"def\" r#\"ghi\"# r###\"jkl \" ## \" \"### \"mno\"";
  const struct token_expectation stringTokens[] = {
    { .type = FKDL_TOK_STRING, .text = "abc"},
    { .type = FKDL_TOK_STRING, .text = "abc \\\" escaped"},
    { .type = FKDL_TOK_RAW_STRING, .text = "def"},
    { .type = FKDL_TOK_RAW_STRING, .text = "ghi"},
    { .type = FKDL_TOK_RAW_STRING, .text = "jkl \" ## \" "},
    { .type = FKDL_TOK_STRING, .text = "mno"},
    { .type = FKDL_TOK_NODE_TERMINATOR}
  };

  if (!testTokeniseSuccess(strings, sizeof strings - 1,
    stringTokens, sizeof stringTokens / sizeof stringTokens[0])) {
    return 0;
  }

  return 1;
}

int testEscapedUnicode(void)
{
  const char input[] = "🙈 emoji 😁";
  const char expected[] = "🙈 emoji 😁";

  struct fkdl_string str = copyString(&fkdl_defaultAllocator, input, sizeof input);
  if (!str.data) {
    printf("copyString failed for unicode\n");
    return 0;
  }

  if (strcmp(expected, str.data)) {
    printf("copystring didn't match input. expected '%s' but got '%s'\n", expected, str.data);
    return 0;
  }

  char output[32];
  struct fkdl_writeCursor cursor = {
    .output = output,
    .outputLen = sizeof output,
    .index = 0,
  };

  if (!writeEscapedString(&cursor, &str)) {
    printf("writeEscapedString failed for unicode\n");
    return 0;
  }

  if (strcmp(expected, output)) {
    printf("writeEscapedString didn't match expected. expected '%s' but got '%s'\n", expected, output);
    return 0;
  }

  return 1;
}

int testMixedTokenise(void)
{
  const char keywords[] = "test \"str\"\n 1.5E2 r#\"mix\"ed\"# {100_000 true} null (int)2.7 foo=\"bar\"";
  const struct token_expectation keywordTokens[] = {
    { .type = FKDL_TOK_IDENTIFIER, .text = "test" },
    { .type = FKDL_TOK_STRING, .text = "str" },
    { .type = FKDL_TOK_NODE_TERMINATOR, .text = "\n" },
    { .type = FKDL_TOK_NUMBER, .text = "1.5E2"},
    { .type = FKDL_TOK_RAW_STRING, .text = "mix\"ed"},
    { .type = FKDL_TOK_OPEN_BRACE, .text = "{"},
    { .type = FKDL_TOK_NUMBER, .text = "100_000"},
    { .type = FKDL_TOK_BOOL, .text = "true"},
    { .type = FKDL_TOK_NODE_TERMINATOR, .text = "}"},
    { .type = FKDL_TOK_CLOSE_BRACE, .text = "}"},
    { .type = FKDL_TOK_NULL, .text = "null"},
    { .type = FKDL_TOK_TYPE_ANNOTATION, .text = "int"},
    { .type = FKDL_TOK_NUMBER, .text = "2.7"},
    { .type = FKDL_TOK_IDENTIFIER, .text = "foo"},
    { .type = FKDL_TOK_PROPERTY_EQUALS, .text = "="},
    { .type = FKDL_TOK_STRING, .text = "bar"},
    { .type = FKDL_TOK_NODE_TERMINATOR}
  };

  if (!testTokeniseSuccess(keywords, sizeof keywords - 1,
    keywordTokens, sizeof keywordTokens / sizeof keywordTokens[0])) {
    return 0;
  }

  return 1;
}

int testReadBasicDocument(void)
{
  const char docStr[] =
"title \"Hello World\" slug=(url)\"hello_world\" id=1234\n"
"subtitle \"My first document\"\n"
"body {\n"
"    paragraph \"This is the first paragraph\"\n"
"    paragraph \"This is the second paragraph\"\n"
"    paragraph \"This is the third paragraph\"\n"
"}\n"
// "footer text=\"😁 All rights reserved\"\n";
"footer text=\"\xf0\x9f\x98\x81 All rights reserved\"\n";
  struct fkdl_document document;
  struct fkdl_error error;

  if (!fkdl_readDocument(docStr, strlen(docStr), &document, NULL, &error)) {
    printf("fkdl_readDocument failed because: %s\n", error.message);
    printf("index=%zd line=%zd column=%zd\n", error.index, error.line, error.column);
    return 0;
  }

  if (document.nodesLen != 4) {
    printf("expected 4 nodes, found: %zd\n", document.nodesLen);
    return 0;
  }

  if (!document.nodes) {
    printf("document had null nodes pointer\n");
    return 0;
  }

  if (document.nodes[0].identifier.len != 5 ||
      strcmp(document.nodes[0].identifier.data, "title")) {
    printf("expected first node to be title\n");
    return 0;
  }

  if (document.nodes[0].argumentsLen != 1) {
    printf("expected first node to have 1 argument\n");
    return 0;
  }

  if (document.nodes[0].arguments[0].type != FKDL_STRING ||
    strcmp(document.nodes[0].arguments[0].as_string.data, "Hello World")) {
    printf("expected first node's first argument to be 'Hello World'\n");
    return 0;
  }

  if (document.nodes[0].propertiesLen != 2) {
    printf("expected first node to have 2 properties, found %zd\n", document.nodes[0].propertiesLen);
    return 0;
  }

  if (strcmp(document.nodes[0].properties[0].key.data, "id")) {
    printf("expected first node's first property key to be id\n");
    return 0;
  }

  if (document.nodes[0].properties[0].value.type != FKDL_NUMBER) {
    printf("expected first node's first property value to be a string\n");
    return 0;
  }

  if (strcmp(document.nodes[0].properties[0].value.as_string.data, "1234")) {
    printf("expected first node's first property value to be 1234\n");
    return 0;
  }

  if (strcmp(document.nodes[0].properties[1].key.data, "slug")) {
    printf("expected first node's second property key to be slug\n");
    return 0;
  }

  if (document.nodes[0].properties[1].value.annotation.len == 0 ||
    strcmp(document.nodes[0].properties[1].value.annotation.data, "url")) {
    printf("expected first node's second property value to have type url\n");
    return 0;
  }

  if (document.nodes[0].properties[1].value.type != FKDL_STRING) {
    printf("expected first node's second property value to be a string\n");
    return 0;
  }

  if (strcmp(document.nodes[0].properties[1].value.as_string.data, "hello_world")) {
    printf("expected first node's second property value to be hello_world\n");
    return 0;
  }

  if (document.nodes[2].childrenLen != 3) {
    printf("expected third node to have 3 children\n");
    return 0;
  }

  size_t i;
  for (i = 0; i < 3; ++i) {
    if (strcmp("paragraph", document.nodes[2].children[i].identifier.data)) {
      printf("expected third node's child %zd to be a paragraph\n", i);
      return 0;
    }
  }

  if (strcmp("footer", document.nodes[3].identifier.data)) {
    printf("expected fourth node to be a footer but was: %s\n",
      document.nodes[3].identifier.data);
    return 0;
  }

  if (strcmp("😁 All rights reserved", document.nodes[3].properties[0].value.as_string.data)) {
    printf("expected fourth node first property to have an emoji but was: %s\n",
      document.nodes[3].properties[0].value.as_string.data);
    return 0;
  }

  /* TODO check child nodes */


  char output[8192];
  if (!fkdl_writeDocument(output, sizeof output, NULL, &document)) {
    printf("error writing document\n");
    return 0;
  }
  /* printf("document:\n----\n%s----\n", output); */

  return 1;
}

int testBeforeAfter(const char *before, const char *after) {
  struct fkdl_document document;
  struct fkdl_error error;

  if (!fkdl_readDocument(before, strlen(before), &document, NULL, &error)) {
    printf("fkdl_readDocument failed because: %s\n", error.message);
    printf("index=%zd line=%zd column=%zd\n", error.index, error.line, error.column);
    printf("text:\n%s\n", before);
    return 0;
  }

  char output[8192];
  if (!fkdl_writeDocument(output, sizeof output, NULL, &document)) {
    printf("error writing document after reading: %s\n", before);
    return 0;
  }

  if (strcmp(output, after)) {
    printf("expected/actual did not match.\ninput:\n%s\n------\nexpected:\n%s\n------\nactual:\n%s\n------\n",
      before, after, output);
    return 0;
  }

  return 1;
}

int testExamples(void)
{
  const char *cases[] = {
"node (int)1 (string)\"foo\" \"bar\"\n",
"node (int)1 (string)\"foo\" \"bar\"\n",

"\"foo bar\" 1 2 3\n",
"\"foo bar\" 1 2 3\n",

"\"foobar\" 1 2 3\n",
"foobar 1 2 3\n",

"foobar /-1 2 3\n",
"foobar 2 3\n",

"foo 1; /-bar 2; baz 3\n",
"foo 1\nbaz 3\n",

"foo { a 1; bar b=2 { c 3 };baz 7}final 0b101",
"foo {\n"
"    a 1\n"
"    bar b=2 {\n"
"        c 3\n"
"    }\n"
"    baz 7\n"
"}\n"
"final 0b101\n",

/* examples from the kdl README */
"title \"Hello, World\"\n",
"title \"Hello, World\"\n",

"bookmarks 12 15 188 1234\n",
"bookmarks 12 15 188 1234\n",

"author \"Alex Monad\" email=\"alex@example.com\" active=true\n",
"author \"Alex Monad\" active=true email=\"alex@example.com\"\n",

"contents {\n"
"    section \"First section\" {\n"
"      paragraph \"This is the first paragraph\"\n"
"      paragraph \"This is the second paragraph\"\n"
"    }\n"
"}\n",
"contents {\n"
"    section \"First section\" {\n"
"        paragraph \"This is the first paragraph\"\n"
"        paragraph \"This is the second paragraph\"\n"
"    }\n"
"}\n",

"node1; node2; node3;",
"node1\nnode2\nnode3\n",

"node \"this\\nhas\\tescapes\"\n",
"node \"this\\nhas\\tescapes\"\n",

"other r\"C:\\Users\\zkat\\\"\n",
"other \"C:\\\\Users\\\\zkat\\\\\"\n",

"string \"my\nmultiline\nvalue\"\n",
"string \"my\\nmultiline\\nvalue\"\n",

"other-raw r#\"hello\"world\"#\n",
"other-raw \"hello\\\"world\"\n",

"num 1.234e-42\n",
"num 1.234e-42\n",

"my-hex 0xdeadbeef\n"
"my-octal 0o755\n"
"my-binary 0b10101101\n",
"my-hex 0xdeadbeef\n"
"my-octal 0o755\n"
"my-binary 0b10101101\n",

"bignum 1_000_000\n",
"bignum 1000000\n",

"// C style\n"
"\n"
"/*\n"
"C style muiltiline\n"
"*/\n"
"\n"
"tag /*foo=true*/ bar=false\n"
"\n"
"/*/*\n"
"hello\n"
"*/*/\n",
"tag bar=false\n",

"// This entire node and its children are all commented out.\n"
"/-mynode \"foo\" key=1 {\n"
"  a\n"
"  b\n"
"  c\n"
"}\n"
"\n"
"mynode /-\"commented\" \"not commented\" /-key=\"value\" /-{\n"
"  a\n"
"  b\n"
"}\n",
"mynode \"not commented\"\n",

"numbers (u8)10 (i32)20 myfloat=(f32)1.5 {\n"
"  strings (uuid)\"123e4567-e89b-12d3-a456-426614174000\" (date)\"2021-02-03\" filter=(regex)r\"$\\d+\"\n"
"  (author)person name=\"Alex\"\n"
"}\n",
"numbers (u8)10 (i32)20 myfloat=(f32)1.5 {\n"
"    strings (uuid)\"123e4567-e89b-12d3-a456-426614174000\" (date)\"2021-02-03\" filter=(regex)\"$\\\\d+\"\n"
"    (author)person name=\"Alex\"\n"
"}\n",

"title \\  \n"
"  \"Some title\"\n",
"title \"Some title\"\n",

"smile \"😁\"\n",
"smile \"😁\"\n",

"\"!@#$@$%Q#$%~@!40\" \"1.2.3\" \"!!!!!\"=true\n",
"!@#$@$%Q#$%~@!40 \"1.2.3\" !!!!!=true\n",

"foo123~!@#$%^&*.:'|?+ \"weeee\"\n",
"foo123~!@#$%^&*.:'|?+ \"weeee\"\n",

"ノード お名前=\"☜(゚ヮ゚☜)\"\n",
"ノード お名前=\"☜(゚ヮ゚☜)\"\n",

"foo bar=true \"baz\" quux=false 1 2 3\n",
"foo \"baz\" 1 2 3 bar=true quux=false\n",

/* test cases from kdl repo */

/* all_escapes */
"node \"\\\\ \\\" \\/ \\b \\f \\n \\r \\t \"\n",
"node \"\\\\ \\\" \\/ \\b \\f \\n \\r \\t \"\n",

/* all_node_fields */
"node \"arg\" prop=\"val\" {\n"
"    inner_node\n"
"}\n",
"node \"arg\" prop=\"val\" {\n"
"    inner_node\n"
"}\n",

/* arg_and_prop_same_Name */
"node \"arg\" arg=\"val\"\n",
"node \"arg\" arg=\"val\"\n",

/* arg_false_type */
"node (type)false\n",
"node (type)false\n",

/* arg_float_type */
"node (type)2.5\n",
"node (type)2.5\n",

/* arg_hex_type */
"node (type)0x10\n",
"node (type)0x10\n",

/* arg_null_type */
"node (type)null\n",
"node (type)null\n",

/* arg_raw_string_type */
"node (type)r\"str\"\n",
"node (type)\"str\"\n",

/* arg_string_type */
"node (type)\"str\"\n",
"node (type)\"str\"\n",

/* arg_true_type */
"node (type)true\n",
"node (type)true\n",

/* arg_type */
"node (type)\"arg\"\n",
"node (type)\"arg\"\n",

/* arg_zero_type */
"node (type)0\n",
"node (type)0\n",

/* asterisk_in_block_comment */
"node /* * */\n",
"node\n",

/* bare_emoji */
"😁 \"happy!\"\n",
"😁 \"happy!\"\n",

/* binary */
"node 0b10\n",
"node 0b10\n",

/* binary_trailing_underscore */
"node 0b10_\n",
"node 0b10\n",

/* binary_underscore */
"node 0b1_0\n",
"node 0b10\n",

/* block_comment */
"node /* comment */ \"arg\"\n",
"node \"arg\"\n",

/* block_comment_after_node */
"node /* hey */ \"arg\"\n",
"node \"arg\"\n",

/* block_comment_before_node */
"/* hey */ node\n",
"node\n",

/* block_comment_before_node_no_space */
"/* hey*/node\n",
"node\n",

/* block_comment_newline */
"/* hey */\n",
"",

/* boolean_arg */
"node false true\n",
"node false true\n",

/* boolean_prop */
"node prop1=true prop2=false\n",
"node prop1=true prop2=false\n",

/* commented_arg */
"node /-\"arg1\" \"arg2\"\n",
"node \"arg2\"\n",

/* commented_child */
"node \"arg\" /-{\n"
"     inner_node\n"
"}\n",
"node \"arg\"\n",

/* commented_line */
"// node_1\n"
"node_2\n",
"node_2\n",

/* commented_node */
"/-node_1\n"
"node_2\n",
"node_2\n",

/* commented_prop */
"node /-prop=\"val\" \"arg\"\n",
"node \"arg\"\n",

/* crlf_between_nodes */
"node1\r\nnode2\n",
"node1\n"
"node2\n",

/* emoji */
"node \"😀\"\n",
"node \"😀\"\n",

/* empty */
"",
"",

/* empty_child */
"node {\n"
"}\n",
"node\n",

/* empty_child_same_line */
"node {}\n",
"node\n",

/* empty_child_whitespace */
"node {\n"
"\n"
"     }\n",
"node\n",

/* empty_quoted_node_id */
"\"\" \"arg\"\n",
"\"\" \"arg\"\n",

/* empty_quoted_prop_key */
"node \"\"=\"empty\"\n",
"node \"\"=\"empty\"\n",

/* empty_string_arg */
"node \"\"\n",
"node \"\"\n",

/* esc_newline_in_string */
"node \"hello\\nworld\"\n",
"node \"hello\\nworld\"\n",

/* esc_unicode_in_string */
"node \"hello\\u{0a}world\"\n",
"node \"hello\\nworld\"\n",

/* escline */
"node \\\n"
"    \"arg\"\n",
"node \"arg\"\n",

/* escline_comment_node */
"node1\n"
"  \\// hey\n"
"   node2\n",
"node1\n"
"node2\n",

/* escline_line_comment */
"node \\   // comment\n"
"    \"arg\" \\// comment\n"
"    \"arg2\n"
"\"\n",
"node \"arg\" \"arg2\\n\"\n",

/* escline_node */
"node1\n"
"node2\n",
"node1\n"
"node2\n",

/* false_prefix_in_bare_id */
"false_id\n",
"false_id\n",

/* false_prefix_in_prop_key */
"node false_id=1\n",
"node false_id=1\n",

/* hex */
"node 0xabcdef1234567890\n",
"node 0xabcdef1234567890\n",

/* hex_int - TODO force case? */
"node 0xABCDEF0123456789abcdef\n",
"node 0xABCDEF0123456789abcdef\n",

/* hex_int_underscores */
"node 0xABC_def_0123\n",
"node 0xABCdef0123\n",

/* hex_leading_zero - TODO remove leading zeroes */
"node 0x01\n",
"node 0x01\n",

/* int_multiple_underscores */
"node 1_2_3_4\n",
"node 1234\n",

/* just_block_comment */
"/* hey */\n",
"",

/* just_child */
"node {\n"
"    inner_node     \n"
"}\n",
"node {\n"
"    inner_node\n"
"}\n",

/* just_newline */
"\n",
"",

/* just_node_id */
"node",
"node\n",

/* just_space */
" ",
"",

/* leading_newline */
"\n"
"node",
"node\n",

/* leading_zero_binary - TODO remove leading zeroes */
"node 0b01\n",
"node 0b01\n",

/* leading_zero_int - TODO remove leading zeroes */
"node 011\n",
"node 011\n",

/* leading_zero_oct - TODO remove leading zeroes */
"node 0o01\n",
"node 0o01\n",

/* multiline_comment */
"node /*\n"
"some\n"
"comments\n"
"*/ \"arg\"",
"node \"arg\"\n",

/* multiline_nodes */
"node \\\n"
"    \"arg1\" \\// comment\n"
"    \"arg2\"\n",
"node \"arg1\" \"arg2\"\n",

/* multiline_string */
"node \" hey\n"
"everyone\n"
"how goes?\n"
"\"",
"node \" hey\\neveryone\\nhow goes?\\n\"\n",

/* negative_exponent - TODO capitalise e */
"node 1.0e-10\n",
"node 1.0e-10\n",

/* negative float */
"node -1.0 key=-10.0\n",
"node -1.0 key=-10.0\n",

/* negative_int */
"node -10 prop=-15\n",
"node -10 prop=-15\n",

/* nested_block_comment */
"node /* hi /* there */ everyone */ \"arg\"\n",
"node \"arg\"\n",

/* nested_children */
"node1 {\n"
"    node2 {\n"
"        node\n"
"    }\n"
"}\n",
"node1 {\n"
"    node2 {\n"
"        node\n"
"    }\n"
"}\n",

/* nested comments */
"node /*/* nested */*/ \"arg\"",
"node \"arg\"\n",

/* nested_multiline_block_comments */
"node /*\n"
"hey /*\n"
"how's\n"
"*/\n"
"    it going\n"
"    */ \"arg\"\n"
"    ",
"node \"arg\"\n",

/* newline_between_nodes */
"node1\nnode2",
"node1\n"
"node2\n",

/* newline_in_block_comment */
"node /* hey so\n"
"I was thinking\n"
"about newts */ \"arg\"",
"node \"arg\"\n",

/* no_decimal_exponent - TODO capitalise e and add +*/
"node 1e10",
"node 1e10\n",

/* node_false */
"node false\n",
"node false\n",

/* node_true */
"node true\n",
"node true\n",

/* node_type */
"(type)node\n",
"(type)node\n",

/* null_arg */
"node null\n",
"node null\n",

/* null_prefix_in_bare_id */
"null_id\n",
"null_id\n",

/* null_prefix_in_prop_key */
"node null_id=1\n",
"node null_id=1\n",

/* null_prop */
"node prop=null\n",
"node prop=null\n",

/* numeric_arg */
"node 15.7\n",
"node 15.7\n",

/* numeric_prop */
"node prop=10.0\n",
"node prop=10.0\n",

/* octal */
"node 0o76543210\n",
"node 0o76543210\n",

/* only_cr */
"\r",
"",

/* only_line_comment */
"// hi",
"",

/* only_line_comment_crlf */
"// hi\r\n",
"",

/* only_line_comment_newline */
"// hi\n",
"",

/* positive_exponent */
"node 1.0e+10\n",
"node 1.0e+10\n",

/* positive_int */
"node +10\n",
"node +10\n",

/* parse_all_arg_types */
"node 1 1.0 1.0e10 1.0e-10 0x01 0o07 0b10 \"arg\" r\"arg\\\\\" true false null",
"node 1 1.0 1.0e10 1.0e-10 0x01 0o07 0b10 \"arg\" \"arg\\\\\\\\\" true false null\n",

/* preserve_duplicate_nodes */
"node\n"
"node\n",
"node\n"
"node\n",

/* preserve_node_order */
"node2\n"
"node5\n"
"node1\n",
"node2\n"
"node5\n"
"node1\n",

/* prop_false_type */
"node key=(type)false\n",
"node key=(type)false\n",

/* prop_float_type */
"node key=(type)2.5E10\n",
"node key=(type)2.5E10\n",

/* prop_hex_type */
"node key=(type)0x10\n",
"node key=(type)0x10\n",

/* prop_null_type */
"node key=(type)null\n",
"node key=(type)null\n",

/* prop_raw_string_type */
"node key=(type)r\"str\"\n",
"node key=(type)\"str\"\n",

/* prop_string_type */
"node key=(type)\"str\"\n",
"node key=(type)\"str\"\n",

/* prop_true_type */
"node key=(type)true\n",
"node key=(type)true\n",

/* prop_type */
"node key=(type)true\n",
"node key=(type)true\n",

/* prop_zero_type */
"node key=(type)0\n",
"node key=(type)0\n",

/* quoted_node_name */
"\"0node\"",
"0node\n",

/* quoted_numeric */
"node prop=\"10.0\"",
"node prop=\"10.0\"\n",

/* quoted_prop_name */
"node \"0prop\"=\"val\"\n",
"node 0prop=\"val\"\n",

/* r_node */
"r \"arg\"\n",
"r \"arg\"\n",

/* raw_arg_type */
"node (type)true\n",
"node (type)true\n",

/* raw_node_name */
"r\"\\node\"\n",
"\"\\\\node\"\n",

/* raw_node_type */
"(type)node\n",
"(type)node\n",

/* raw_prop_type */
"node key=(type)true\n",
"node key=(type)true\n",

/* raw_string_arg */
"node_1 r\"arg\\n\"\n"
"node_2 r#\"\"arg\\n\"and stuff\"#\n"
"node_3 r##\"#\"arg\\n\"#and stuff\"##\n",
"node_1 \"arg\\\\n\"\n"
"node_2 \"\\\"arg\\\\n\\\"and stuff\"\n"
"node_3 \"#\\\"arg\\\\n\\\"#and stuff\"\n",

/* raw_string_backslash */
"node r\"\n\"",
"node \"\\n\"\n",

/* raw_string_hash_no_esc */
"node r\"#\"\n",
"node \"#\"\n",

/* raw_string_just_backslash */
"node r\"\\\"\n",
"node \"\\\\\"\n",

/* raw_string_just_quote */
"node r#\"\"\"#\n",
"node \"\\\"\"\n",

/* raw_string_multiple_hash */
"node r###\"\"#\"##\"###\n",
"node \"\\\"#\\\"##\"\n",

/* raw_string_newline */
"node r\"\n"
"hello\n"
"world\n"
"\"",
"node \"\\nhello\\nworld\\n\"\n",

/* raw_string_prop */
"node_1 prop=r\"arg\\n\"\n"
"node_2 prop=r#\"\"arg\"\\n\"#\n"
"node_3 prop=r##\"#\"arg\"#\\n\"##\n",
"node_1 prop=\"arg\\\\n\"\n"
"node_2 prop=\"\\\"arg\\\"\\\\n\"\n"
"node_3 prop=\"#\\\"arg\\\"#\\\\n\"\n",

/* raw_string_quote */
"node r#\"a\"b\"#\n",
"node \"a\\\"b\"\n",

/* repeated_arg */
"node \"arg\" \"arg\"\n",
"node \"arg\" \"arg\"\n",

/* repeated_prop */
"node prop=10 prop=11\n",
"node prop=11\n",

/* same_args */
"node \"whee\" \"whee\"\n",
"node \"whee\" \"whee\"\n",

/* same_name_nodes */
"node\nnode\n",
"node\nnode\n",

/* sci_notation_large */
"node prop=1.23E+1000\n",
"node prop=1.23E+1000\n",

/* sci_notation_small */
"node prop=1.23E-1000\n",
"node prop=1.23E-1000\n",

/* semicolon_after_child */
"node {\n"
"     childnode\n"
"};\n",
"node {\n"
"    childnode\n"
"}\n",

/* semicolon_in_child */
"node1 {\n"
"     node2;\n"
"}\n",
"node1 {\n"
"    node2\n"
"}\n",

/* semicolon_separated */
"node1;node2",
"node1\n"
"node2\n",

/* semicolon_separated_nodes */
"node1; node2",
"node1\n"
"node2\n",

/* semicolon_terminated */
"node1;\n",
"node1\n",

/* single_arg */
"node \"arg\"\n",
"node \"arg\"\n",

/* single_prop */
"node prop=\"val\"\n",
"node prop=\"val\"\n",

/* slashdash_arg_after_newline_esc */
"node \\\n"
"    /- \"arg\" \"arg2\"\n",
"node \"arg2\"\n",

/* slashdash_child */
"node /- {\n"
"    node2\n"
"}",
"node\n",

/* slashdash_empty_child */
"node /- {\n"
"}",
"node\n",

/* slashdash_full_node */
"/- node 1.0 \"a\" b=\"b\n"
"\"",
"",

/* slashdash_in_slashdash */
"/-node1 /-1.0\n"
"node2\n",
"node2\n",

/* slashdash_negative_number */
"node /--1.0 2.0\n",
"node 2.0\n",

/* slashdash_node_in_child */
"node1 {\n"
"    /- node2\n"
"}\n",
"node1\n",

/* slashdash_node_with_child */
"/-node {\n"
"   node2\n"
"}\n",
"",

/* slashdash_only_node */
"/-node\n",
"",

/* slashdash_only_node_space */
"/- node\n",
"",

/* slashdash_prop */
"node /-key=\"value\" \"arg\"\n",
"node \"arg\"\n",

/* slashdash_raw_prop_key */
"node /-key=\"value\"\n",
"node\n",

/* string_arg */
"node \"arg\"\n",
"node \"arg\"\n",

/* string_prop */
"node prop=\"val\"\n",
"node prop=\"val\"\n",

/* tab_space */
"node\t",
"node\n",

/* trailing_crlf */
"node\r\n",
"node\n",

/* trailing_underscore_hex */
"node 0x123abc_",
"node 0x123abc\n",

/* trailing_underscore_octal */
"node 0o123_",
"node 0o123\n",

/* true_prefix_in_bare_id */
"true_id",
"true_id\n",

/* true_prefix_in_prop_key */
"node true_id=1\n",
"node true_id=1\n",

/* two_nodes */
"node1\n"
"node2\n",
"node1\n"
"node2\n",

/* underscore_in_exponent */
"node 1.0e-10_0\n",
"node 1.0e-100\n",

/* underscore_in_float */
"node 1_1.0\n",
"node 11.0\n",

/* underscore_in_fraction */
"node 1.0_2\n",
"node 1.02\n",

/* underscore_in_int */
"node 1_0\n",
"node 10\n",

/* underscore_in_octal */
"node 0o012_3456_7\n",
"node 0o01234567\n",

/* unusual_bare_id_chars_in_quoted_id */
"\"foo123~!@#$%^&*.:'|?+\" \"weeee\"\n",
"foo123~!@#$%^&*.:'|?+ \"weeee\"\n",

/* unusual_chars_in_bare_id */
"foo123~!@#$%^&*.:'|?+ \"weeee\"\n",
"foo123~!@#$%^&*.:'|?+ \"weeee\"\n",

/* zero_arg */
"node 0\n",
"node 0\n",

/* zero_float */
"node 0.0\n",
"node 0.0\n",

/* zero_int */
"node 0\n",
"node 0\n",

};
  size_t i;
  for (i = 0; i < sizeof cases / sizeof *cases; i += 2) {
    if (!testBeforeAfter(cases[i], cases[i+1])) {
      return 0;
    }
  }
  return 1;
}

int main(int argc, char **argv)
{
  if (testUtf8() &&
      testPeekChar() &&
      testPeekStr() &&
      testPeekWord() &&
      testConsumeChar() &&
      testConsumeNewline() &&
      testConsumeHexCode() &&
      testIsRawStringStart() &&
      testEscapedUnicode() &&
      testCountHashes() &&
      testConsumeSingleComment() &&
      testConsumeString() &&
      testConsumeWhitespace() &&
      testUnescaping() &&
      testBasicTokenise() &&
      testMixedTokenise() &&
      testReadBasicDocument() &&
      testExamples()) {
    printf("Tests passed\n");
    return 0;
  }
  return 1;
}