~ahelwer/tree-sitter-tlaplus

7455f17038d551ecd2b082c39e4a63bcb403e158 — Andrew Helwer 3 months ago 3896a5b
Rewrite scanner in C (#99)

Rewrite scanner in C, add some corpus tests, add generated files from newer versions of tree-sitter CLI

Signed-off-by: Andrew Helwer <2n8rn1w1f@mozmail.com>
A .editorconfig => .editorconfig +39 -0
@@ 0,0 1,39 @@
root = true

[*]
charset = utf-8
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true

[*.{json,toml,yml,gyp}]
indent_style = space
indent_size = 2

[*.js]
indent_style = space
indent_size = 2

[*.rs]
indent_style = space
indent_size = 4

[*.{c,cc,h}]
indent_style = space
indent_size = 4

[*.{py,pyi}]
indent_style = space
indent_size = 4

[*.swift]
indent_style = space
indent_size = 4

[*.go]
indent_style = tab
indent_size = 8

[Makefile]
indent_style = tab
indent_size = 8

A .gitattributes => .gitattributes +11 -0
@@ 0,0 1,11 @@
* text eol=lf

src/*.json linguist-generated
src/parser.c linguist-generated
src/tree_sitter/* linguist-generated

bindings/** linguist-generated
binding.gyp linguist-generated
setup.py linguist-generated
Makefile linguist-generated
Package.swift linguist-generated

M .github/workflows/ci.yml => .github/workflows/ci.yml +2 -6
@@ 40,12 40,8 @@ jobs:
        if: matrix.os == 'windows-latest'
        shell: pwsh
        run: .\test\run-corpus.ps1
      - name: Corpus Tests (Unix)
        if: matrix.os == 'ubuntu-latest'
        shell: bash
        run: ./test/run-corpus.sh --parallel
      - name: Corpus Tests (MacOS)
        if: matrix.os == 'macos-latest'
      - name: Corpus Tests (Linux & macOS)
        if: matrix.os != 'windows-latest'
        shell: bash
        run: ./test/run-corpus.sh
      - name: Query File Tests

M Cargo.toml => Cargo.toml +6 -11
@@ 6,25 6,20 @@ authors = ["Andrew Helwer", "Vasiliy Morkovkin"]
license = "MIT"
readme = "README.md"
keywords = ["tla+", "tlaplus", "pluscal", "tree-sitter", "parser"]
categories = ["parser-implementations"]
categories = ["parsing", "text-editors"]
repository = "https://github.com/tlaplus-community/tree-sitter-tlaplus"
edition = "2018"
edition = "2021"
autoexamples = false

build = "bindings/rust/build.rs"
include = [
  "bindings/rust/*",
  "grammar.js",
  "queries/*",
  "src/*",
  "./LICENSE.md"
]
include = ["bindings/rust/*", "grammar.js", "queries/*", "src/*"]

[lib]
path = "bindings/rust/lib.rs"

[dependencies]
tree-sitter = "0.20.10"
tree-sitter = "0.22.1"

[build-dependencies]
cc = "1.0"
cc = "1.0.87"


A Makefile => Makefile +109 -0
@@ 0,0 1,109 @@
VERSION := 0.0.1

LANGUAGE_NAME := tree-sitter-tlaplus

# repository
SRC_DIR := src

PARSER_REPO_URL := $(shell git -C $(SRC_DIR) remote get-url origin 2>/dev/null)

ifeq ($(PARSER_URL),)
	PARSER_URL := $(subst .git,,$(PARSER_REPO_URL))
ifeq ($(shell echo $(PARSER_URL) | grep '^[a-z][-+.0-9a-z]*://'),)
	PARSER_URL := $(subst :,/,$(PARSER_URL))
	PARSER_URL := $(subst git@,https://,$(PARSER_URL))
endif
endif

TS ?= tree-sitter

# ABI versioning
SONAME_MAJOR := $(word 1,$(subst ., ,$(VERSION)))
SONAME_MINOR := $(word 2,$(subst ., ,$(VERSION)))

# install directory layout
PREFIX ?= /usr/local
INCLUDEDIR ?= $(PREFIX)/include
LIBDIR ?= $(PREFIX)/lib
PCLIBDIR ?= $(LIBDIR)/pkgconfig

# object files
OBJS := $(patsubst %.c,%.o,$(wildcard $(SRC_DIR)/*.c))

# flags
ARFLAGS := rcs
override CFLAGS += -I$(SRC_DIR) -std=c11 -fPIC

# OS-specific bits
ifeq ($(OS),Windows_NT)
	$(error "Windows is not supported")
else ifeq ($(shell uname),Darwin)
	SOEXT = dylib
	SOEXTVER_MAJOR = $(SONAME_MAJOR).dylib
	SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).dylib
	LINKSHARED := $(LINKSHARED)-dynamiclib -Wl,
	ifneq ($(ADDITIONAL_LIBS),)
	LINKSHARED := $(LINKSHARED)$(ADDITIONAL_LIBS),
	endif
	LINKSHARED := $(LINKSHARED)-install_name,$(LIBDIR)/lib$(LANGUAGE_NAME).$(SONAME_MAJOR).dylib,-rpath,@executable_path/../Frameworks
else
	SOEXT = so
	SOEXTVER_MAJOR = so.$(SONAME_MAJOR)
	SOEXTVER = so.$(SONAME_MAJOR).$(SONAME_MINOR)
	LINKSHARED := $(LINKSHARED)-shared -Wl,
	ifneq ($(ADDITIONAL_LIBS),)
	LINKSHARED := $(LINKSHARED)$(ADDITIONAL_LIBS)
	endif
	LINKSHARED := $(LINKSHARED)-soname,lib$(LANGUAGE_NAME).so.$(SONAME_MAJOR)
endif
ifneq ($(filter $(shell uname),FreeBSD NetBSD DragonFly),)
	PCLIBDIR := $(PREFIX)/libdata/pkgconfig
endif

all: lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT) $(LANGUAGE_NAME).pc

lib$(LANGUAGE_NAME).a: $(OBJS)
	$(AR) $(ARFLAGS) $@ $^

lib$(LANGUAGE_NAME).$(SOEXT): $(OBJS)
	$(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@
ifneq ($(STRIP),)
	$(STRIP) $@
endif

$(LANGUAGE_NAME).pc: bindings/c/$(LANGUAGE_NAME).pc.in
	sed  -e 's|@URL@|$(PARSER_URL)|' \
		-e 's|@VERSION@|$(VERSION)|' \
		-e 's|@LIBDIR@|$(LIBDIR)|' \
		-e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \
		-e 's|@REQUIRES@|$(REQUIRES)|' \
		-e 's|@ADDITIONAL_LIBS@|$(ADDITIONAL_LIBS)|' \
		-e 's|=$(PREFIX)|=$${prefix}|' \
		-e 's|@PREFIX@|$(PREFIX)|' $< > $@

$(SRC_DIR)/parser.c: grammar.js
	$(TS) generate --no-bindings

install: all
	install -Dm644 bindings/c/$(LANGUAGE_NAME).h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h
	install -Dm644 $(LANGUAGE_NAME).pc '$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc
	install -Dm755 lib$(LANGUAGE_NAME).a '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a
	install -m755 lib$(LANGUAGE_NAME).$(SOEXT) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER)
	ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR)
	ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXT)

uninstall:
	$(RM) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a \
		'$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER) \
		'$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) \
		'$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXT) \
		'$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h \
		'$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc

clean:
	$(RM) $(OBJS) $(LANGUAGE_NAME).pc lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT)

test:
	$(TS) test

.PHONY: all install uninstall clean test

A Package.swift => Package.swift +48 -0
@@ 0,0 1,48 @@
// swift-tools-version:5.3
import PackageDescription

let package = Package(
    name: "TreeSitterTlaplus",
    platforms: [.macOS(.v10_13), .iOS(.v11)],
    products: [
        .library(name: "TreeSitterTlaplus", targets: ["TreeSitterTlaplus"]),
    ],
    dependencies: [],
    targets: [
        .target(name: "TreeSitterTlaplus",
                path: ".",
                exclude: [
                    "Cargo.toml",
                    "Makefile",
                    "binding.gyp",
                    "bindings/c",
                    "bindings/go",
                    "bindings/node",
                    "bindings/python",
                    "bindings/rust",
                    "prebuilds",
                    "grammar.js",
                    "package.json",
                    "package-lock.json",
                    "pyproject.toml",
                    "setup.py",
                    "test",
                    "examples",
                    ".editorconfig",
                    ".github",
                    ".gitignore",
                    ".gitattributes",
                    ".gitmodules",
                ],
                sources: [
                    "src/parser.c",
                    "src/scanner.c",
                ],
                resources: [
                    .copy("queries")
                ],
                publicHeadersPath: "bindings/swift",
                cSettings: [.headerSearchPath("src")])
    ],
    cLanguageStandard: .c11
)

M binding.gyp => binding.gyp +8 -4
@@ 2,16 2,20 @@
  "targets": [
    {
      "target_name": "tree_sitter_tlaplus_binding",
      "dependencies": [
        "<!(node -p \"require('node-addon-api').targets\"):node_addon_api_except",
      ],
      "include_dirs": [
        "<!(node -e \"require('nan')\")",
        "src"
        "src",
      ],
      "sources": [
        "bindings/node/binding.cc",
        "src/parser.c",
        "src/scanner.cc",
        "src/scanner.c",
      ],
      "cflags_c": [
        "-std=c11",
      ],
      "cflags_c": ["-std=c99"]
    }
  ]
}

A bindings/c/tree-sitter-tlaplus.h => bindings/c/tree-sitter-tlaplus.h +16 -0
@@ 0,0 1,16 @@
#ifndef TREE_SITTER_TLAPLUS_H_
#define TREE_SITTER_TLAPLUS_H_

typedef struct TSLanguage TSLanguage;

#ifdef __cplusplus
extern "C" {
#endif

const TSLanguage *tree_sitter_tlaplus(void);

#ifdef __cplusplus
}
#endif

#endif // TREE_SITTER_TLAPLUS_H_

A bindings/c/tree-sitter-tlaplus.pc.in => bindings/c/tree-sitter-tlaplus.pc.in +11 -0
@@ 0,0 1,11 @@
prefix=@PREFIX@
libdir=@LIBDIR@
includedir=@INCLUDEDIR@

Name: tree-sitter-tlaplus
Description: Tlaplus grammar for tree-sitter
URL: @URL@
Version: @VERSION@
Requires: @REQUIRES@
Libs: -L${libdir} @ADDITIONAL_LIBS@ -ltree-sitter-tlaplus
Cflags: -I${includedir}

A bindings/go/binding.go => bindings/go/binding.go +13 -0
@@ 0,0 1,13 @@
package tree_sitter_tlaplus

// #cgo CFLAGS: -std=c11 -fPIC
// #include "../../src/parser.c"
// // NOTE: if your language has an external scanner, add it here.
import "C"

import "unsafe"

// Get the tree-sitter Language for this grammar.
func Language() unsafe.Pointer {
	return unsafe.Pointer(C.tree_sitter_tlaplus())
}

A bindings/go/binding_test.go => bindings/go/binding_test.go +15 -0
@@ 0,0 1,15 @@
package tree_sitter_tlaplus_test

import (
	"testing"

	tree_sitter "github.com/smacker/go-tree-sitter"
	"github.com/tree-sitter/tree-sitter-tlaplus"
)

func TestCanLoadGrammar(t *testing.T) {
	language := tree_sitter.NewLanguage(tree_sitter_tlaplus.Language())
	if language == nil {
		t.Errorf("Error loading Tlaplus grammar")
	}
}

A bindings/go/go.mod => bindings/go/go.mod +5 -0
@@ 0,0 1,5 @@
module github.com/tree-sitter/tree-sitter-tlaplus

go 1.22

require github.com/smacker/go-tree-sitter v0.0.0-20230720070738-0d0a9f78d8f8

M bindings/node/binding.cc => bindings/node/binding.cc +14 -22
@@ 1,28 1,20 @@
#include "tree_sitter/parser.h"
#include <node.h>
#include "nan.h"
#include <napi.h>

using namespace v8;
typedef struct TSLanguage TSLanguage;

extern "C" TSLanguage * tree_sitter_tlaplus();
extern "C" TSLanguage *tree_sitter_tlaplus();

namespace {
// "tree-sitter", "language" hashed with BLAKE2
const napi_type_tag LANGUAGE_TYPE_TAG = {
  0x8AF2E5212AD58ABF, 0xD5006CAD83ABBA16
};

NAN_METHOD(New) {}

void Init(Local<Object> exports, Local<Object> module) {
  Local<FunctionTemplate> tpl = Nan::New<FunctionTemplate>(New);
  tpl->SetClassName(Nan::New("Language").ToLocalChecked());
  tpl->InstanceTemplate()->SetInternalFieldCount(1);

  Local<Function> constructor = Nan::GetFunction(tpl).ToLocalChecked();
  Local<Object> instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
  Nan::SetInternalFieldPointer(instance, 0, tree_sitter_tlaplus());

  Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("tlaplus").ToLocalChecked());
  Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance);
Napi::Object Init(Napi::Env env, Napi::Object exports) {
    exports["name"] = Napi::String::New(env, "tlaplus");
    auto language = Napi::External<TSLanguage>::New(env, tree_sitter_tlaplus());
    language.TypeTag(&LANGUAGE_TYPE_TAG);
    exports["language"] = language;
    return exports;
}

NODE_MODULE(tree_sitter_tlaplus_binding, Init)

}  // namespace
NODE_API_MODULE(tree_sitter_tlaplus_binding, Init)

A bindings/node/index.d.ts => bindings/node/index.d.ts +28 -0
@@ 0,0 1,28 @@
type BaseNode = {
  type: string;
  named: boolean;
};

type ChildNode = {
  multiple: boolean;
  required: boolean;
  types: BaseNode[];
};

type NodeInfo =
  | (BaseNode & {
      subtypes: BaseNode[];
    })
  | (BaseNode & {
      fields: { [name: string]: ChildNode };
      children: ChildNode[];
    });

type Language = {
  name: string;
  language: unknown;
  nodeTypeInfo: NodeInfo[];
};

declare const language: Language;
export = language;

M bindings/node/index.js => bindings/node/index.js +3 -15
@@ 1,18 1,6 @@
try {
  module.exports = require("../../build/Release/tree_sitter_tlaplus_binding");
} catch (error1) {
  if (error1.code !== 'MODULE_NOT_FOUND') {
    throw error1;
  }
  try {
    module.exports = require("../../build/Debug/tree_sitter_tlaplus_binding");
  } catch (error2) {
    if (error2.code !== 'MODULE_NOT_FOUND') {
      throw error2;
    }
    throw error1
  }
}
const root = require("path").join(__dirname, "..", "..");

module.exports = require("node-gyp-build")(root);

try {
  module.exports.nodeTypeInfo = require("../../src/node-types.json");

A bindings/python/tree_sitter_tlaplus/__init__.py => bindings/python/tree_sitter_tlaplus/__init__.py +5 -0
@@ 0,0 1,5 @@
"Tlaplus grammar for tree-sitter"

from ._binding import language

__all__ = ["language"]

A bindings/python/tree_sitter_tlaplus/__init__.pyi => bindings/python/tree_sitter_tlaplus/__init__.pyi +1 -0
@@ 0,0 1,1 @@
def language() -> int: ...

A bindings/python/tree_sitter_tlaplus/binding.c => bindings/python/tree_sitter_tlaplus/binding.c +27 -0
@@ 0,0 1,27 @@
#include <Python.h>

typedef struct TSLanguage TSLanguage;

TSLanguage *tree_sitter_tlaplus(void);

static PyObject* _binding_language(PyObject *self, PyObject *args) {
    return PyLong_FromVoidPtr(tree_sitter_tlaplus());
}

static PyMethodDef methods[] = {
    {"language", _binding_language, METH_NOARGS,
     "Get the tree-sitter language for this grammar."},
    {NULL, NULL, 0, NULL}
};

static struct PyModuleDef module = {
    .m_base = PyModuleDef_HEAD_INIT,
    .m_name = "_binding",
    .m_doc = NULL,
    .m_size = -1,
    .m_methods = methods
};

PyMODINIT_FUNC PyInit__binding(void) {
    return PyModule_Create(&module);
}

A bindings/python/tree_sitter_tlaplus/py.typed => bindings/python/tree_sitter_tlaplus/py.typed +0 -0
M bindings/rust/build.rs => bindings/rust/build.rs +8 -19
@@ 2,29 2,18 @@ fn main() {
    let src_dir = std::path::Path::new("src");

    let mut c_config = cc::Build::new();
    c_config.include(&src_dir);
    c_config
        .flag_if_supported("-Wno-unused-parameter")
        .flag_if_supported("-Wno-unused-but-set-variable")
        .flag_if_supported("-Wno-trigraphs");
    c_config.std("c11").include(src_dir);

    let parser_path = src_dir.join("parser.c");
    c_config.file(&parser_path);
    println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());

    let scanner_path = src_dir.join("scanner.cc");
    // NOTE: if your language uses an external scanner, uncomment this block:
    /*
    let scanner_path = src_dir.join("scanner.c");
    c_config.file(&scanner_path);
    println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
    */

    c_config.compile("parser");
    println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());

    let mut cpp_config = cc::Build::new();
    cpp_config.cpp(true);
    cpp_config.include(&src_dir);
    cpp_config
        .flag_if_supported("-Wno-unused-parameter")
        .flag_if_supported("-Wno-unused-but-set-variable");
    let scanner_path = src_dir.join("scanner.cc");
    cpp_config.file(&scanner_path);
    cpp_config.compile("scanner");
    println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
    c_config.compile("tree-sitter-tlaplus");
}

M bindings/rust/lib.rs => bindings/rust/lib.rs +12 -10
@@ 1,13 1,15 @@
//! This crate provides tlaplus language support for the [tree-sitter][] parsing library.
//! This crate provides Tlaplus language support for the [tree-sitter][] parsing library.
//!
//! Typically, you will use the [language][language func] function to add this language to a
//! tree-sitter [Parser][], and then use the parser to parse some code:
//!
//! ```
//! let code = "";
//! let code = r#"
//! "#;
//! let mut parser = tree_sitter::Parser::new();
//! parser.set_language(tree_sitter_tlaplus::language()).expect("Error loading tlaplus grammar");
//! parser.set_language(&tree_sitter_tlaplus::language()).expect("Error loading Tlaplus grammar");
//! let tree = parser.parse(code, None).unwrap();
//! assert!(!tree.root_node().has_error());
//! ```
//!
//! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html


@@ 31,14 33,14 @@ pub fn language() -> Language {
/// The content of the [`node-types.json`][] file for this grammar.
///
/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
pub const NODE_TYPES: &'static str = include_str!("../../src/node-types.json");
pub const NODE_TYPES: &str = include_str!("../../src/node-types.json");

// Uncomment these to include any queries that this grammar contains

// pub const HIGHLIGHTS_QUERY: &'static str = include_str!("../../queries/highlights.scm");
// pub const INJECTIONS_QUERY: &'static str = include_str!("../../queries/injections.scm");
// pub const LOCALS_QUERY: &'static str = include_str!("../../queries/locals.scm");
// pub const TAGS_QUERY: &'static str = include_str!("../../queries/tags.scm");
// pub const HIGHLIGHTS_QUERY: &str = include_str!("../../queries/highlights.scm");
// pub const INJECTIONS_QUERY: &str = include_str!("../../queries/injections.scm");
// pub const LOCALS_QUERY: &str = include_str!("../../queries/locals.scm");
// pub const TAGS_QUERY: &str = include_str!("../../queries/tags.scm");

#[cfg(test)]
mod tests {


@@ 46,7 48,7 @@ mod tests {
    fn test_can_load_grammar() {
        let mut parser = tree_sitter::Parser::new();
        parser
            .set_language(super::language())
            .expect("Error loading tlaplus language");
            .set_language(&super::language())
            .expect("Error loading Tlaplus grammar");
    }
}

A bindings/swift/TreeSitterTlaplus/tlaplus.h => bindings/swift/TreeSitterTlaplus/tlaplus.h +16 -0
@@ 0,0 1,16 @@
#ifndef TREE_SITTER_TLAPLUS_H_
#define TREE_SITTER_TLAPLUS_H_

typedef struct TSLanguage TSLanguage;

#ifdef __cplusplus
extern "C" {
#endif

const TSLanguage *tree_sitter_tlaplus(void);

#ifdef __cplusplus
}
#endif

#endif // TREE_SITTER_TLAPLUS_H_

M package.json => package.json +24 -3
@@ 3,8 3,11 @@
  "version": "1.1.0",
  "description": "A tree-sitter grammar for TLA⁺ and PlusCal",
  "main": "bindings/node",
  "types": "bindings/node",
  "scripts": {
    "test": "npx tree-sitter test"
    "test": "npx tree-sitter test",
    "install": "node-gyp-build",
    "prebuildify": "prebuildify --napi --strip"
  },
  "repository": {
    "type": "git",


@@ 28,11 31,29 @@
  },
  "homepage": "https://github.com/tlaplus-community/tree-sitter-tlaplus#readme",
  "dependencies": {
    "nan": "^2.14.2"
    "node-gyp-build": "^4.8.0",
    "node-addon-api": "^7.1.0"
  },
  "devDependencies": {
  "peerDependencies": {
    "tree-sitter-cli": "0.21.0"
  },
  "peerDependenciesMeta": {
    "tree_sitter": {
      "optional": true
    }
  },
  "devDependencies": {
    "prebuildify": "6.0.0",
    "tree-sitter-cli": "0.22.1"
  },
  "files": [
    "grammar.js",
    "binding.gyp",
    "prebuilds/**",
    "bindings/node/*",
    "queries/*",
    "src/**"
  ],
  "tree-sitter": [
    {
      "scope": "source.tlaplus",

A pyproject.toml => pyproject.toml +29 -0
@@ 0,0 1,29 @@
[build-system]
requires = ["setuptools>=42", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "tree-sitter-tlaplus"
description = "Tlaplus grammar for tree-sitter"
version = "1.1.0"
keywords = ["incremental", "parsing", "tree-sitter", "tlaplus"]
classifiers = [
  "Intended Audience :: Developers",
  "License :: OSI Approved :: MIT License",
  "Topic :: Software Development :: Compilers",
  "Topic :: Text Processing :: Linguistic",
  "Typing :: Typed"
]
requires-python = ">=3.8"
license.text = "MIT"
readme = "README.md"

[project.urls]
Homepage = "https://github.com/tree-sitter/tree-sitter-tlaplus"

[project.optional-dependencies]
core = ["tree-sitter~=0.21"]

[tool.cibuildwheel]
build = "cp38-*"
build-frontend = "build"

A setup.py => setup.py +57 -0
@@ 0,0 1,57 @@
from os.path import isdir, join
from platform import system

from setuptools import Extension, find_packages, setup
from setuptools.command.build import build
from wheel.bdist_wheel import bdist_wheel


class Build(build):
    def run(self):
        if isdir("queries"):
            dest = join(self.build_lib, "tree_sitter_tlaplus", "queries")
            self.copy_tree("queries", dest)
        super().run()


class BdistWheel(bdist_wheel):
    def get_tag(self):
        python, abi, platform = super().get_tag()
        if python.startswith("cp"):
            python, abi = "cp38", "abi3"
        return python, abi, platform


setup(
    packages=find_packages("bindings/python"),
    package_dir={"": "bindings/python"},
    package_data={
        "tree_sitter_tlaplus": ["*.pyi", "py.typed"],
        "tree_sitter_tlaplus.queries": ["*.scm"],
    },
    ext_package="tree_sitter_tlaplus",
    ext_modules=[
        Extension(
            name="_binding",
            sources=[
                "bindings/python/tree_sitter_tlaplus/binding.c",
                "src/parser.c",
                "src/scanner.c",
            ],
            extra_compile_args=(
                ["-std=c11"] if system() != 'Windows' else []
            ),
            define_macros=[
                ("Py_LIMITED_API", "0x03080000"),
                ("PY_SSIZE_T_CLEAN", None)
            ],
            include_dirs=["src"],
            py_limited_api=True,
        )
    ],
    cmdclass={
        "build": Build,
        "bdist_wheel": BdistWheel
    },
    zip_safe=False
)

M src/parser.c => src/parser.c +4 -3
@@ 1,7 1,6 @@
#include "tree_sitter/parser.h"

#if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
#endif



@@ 1005790,10 1005789,12 @@ unsigned tree_sitter_tlaplus_external_scanner_serialize(void *, char *);
void tree_sitter_tlaplus_external_scanner_deserialize(void *, const char *, unsigned);

#ifdef _WIN32
#define extern __declspec(dllexport)
#define TS_PUBLIC __declspec(dllexport)
#else
#define TS_PUBLIC __attribute__((visibility("default")))
#endif

extern const TSLanguage *tree_sitter_tlaplus(void) {
TS_PUBLIC const TSLanguage *tree_sitter_tlaplus() {
  static const TSLanguage language = {
    .version = LANGUAGE_VERSION,
    .symbol_count = SYMBOL_COUNT,

R src/scanner.cc => src/scanner.c +460 -330
@@ 1,13 1,14 @@
#include <tree_sitter/parser.h>
#include <cassert>
#include <climits>
#include <cstring>
#include <cwctype>
#include <vector>
#include "tree_sitter/parser.h"
#include "tree_sitter/alloc.h"
#include "tree_sitter/array.h"
#include "assert.h"
#include "limits.h"
#include "string.h"
#include "wctype.h"

/**
 * Macro; goes to the lexer state without consuming any codepoints.
 * 
 *
 * @param state_value The new lexer state.
 */
#define GO_TO_STATE(state_value)  \


@@ 18,7 19,7 @@

/**
 * Macro; marks the given lexeme as accepted.
 * 
 *
 * @param lexeme The lexeme to mark as accepted.
 */
#define ACCEPT_LEXEME(lexeme)       \


@@ 29,7 30,7 @@
/**
 * Macro; marks the given token as accepted without also marking the
 * current position as its end.
 * 
 *
 * @param token The token to mark as accepted.
 */
#define ACCEPT_LOOKAHEAD_TOKEN(token)     \


@@ 46,8 47,6 @@
    return result_lexeme; \
  }

namespace {

  // Tokens emitted by this external scanner.
  enum TokenType {
    LEADING_EXTRAMODULAR_TEXT,  // Freeform text at the start of the file.


@@ 70,31 69,34 @@ namespace {
  };

  // Datatype used to record length of nested proofs & jlists.
  using nest_address = int16_t;
  typedef int16_t nest_address;

  // Datatype used to record column index of jlists.
  using column_index = int16_t;
  typedef int16_t column_index;

  // Datatype used to record proof levels.
  using proof_level = int32_t;
  
  typedef int32_t proof_level;

  // A dynamic array of chars.
  typedef Array(char) CharArray;

  /**
   * Advances the scanner while marking the codepoint as non-whitespace.
   * 
   *
   * @param lexer The tree-sitter lexing control structure.
   */
  void advance(TSLexer* const lexer) {
  static void advance(TSLexer* const lexer) {
    lexer->advance(lexer, false);
  }

  /**
   * Checks whether the next codepoint is the one given.
   * 
   *
   * @param lexer The tree-sitter lexing control structure.
   * @param codepoint The codepoint to check.
   * @return Whether the next codepoint is the one given.
   */
  bool is_next_codepoint(
  static bool is_next_codepoint(
    const TSLexer* const lexer,
    int32_t const codepoint
  ) {


@@ 103,11 105,11 @@ namespace {

  /**
   * Checks whether there are any codepoints left in the string.
   * 
   *
   * @param lexer The tree-sitter lexing control structure.
   * @return Whether there are any codepoints left in the string.
   */
  bool has_next(const TSLexer* const lexer) {
  static bool has_next(const TSLexer* const lexer) {
    return !lexer->eof(lexer);
  }



@@ 115,22 117,22 @@ namespace {
   * Checks whether the given codepoint could be used in an identifier,
   * which consist of capital ASCII letters, lowercase ASCII letters,
   * and underscores.
   * 
   *
   * @param codepoint The codepoint to check.
   * @return Whether the given codepoint could be used in an identifier.
   */
  bool is_identifier_char(int32_t const codepoint) {
  static bool is_identifier_char(int32_t const codepoint) {
    return iswalnum(codepoint) || ('_' == codepoint);
  }

  /**
   * Consumes codepoints as long as they are the one given.
   * 
   *
   * @param lexer The tree-sitter lexing control structure.
   * @param codepoint The codepoint to consume.
   * @return The number of codepoints consumed.
   */
  void consume_codepoint(TSLexer* const lexer, const int32_t codepoint) {
  static void consume_codepoint(TSLexer* const lexer, const int32_t codepoint) {
    while (has_next(lexer) && is_next_codepoint(lexer, codepoint)) {
      advance(lexer);
    }


@@ 139,12 141,12 @@ namespace {
  /**
   * Checks whether the next codepoint sequence is the one given.
   * This function can change the state of the lexer.
   * 
   *
   * @param lexer The tree-sitter lexing control structure.
   * @param token The codepoint sequence to check for.
   * @return Whether the next codepoint sequence is the one given.
   */
  bool is_next_codepoint_sequence(
  static bool is_next_codepoint_sequence(
    TSLexer* const lexer,
    const char codepoint_sequence[]
  ) {


@@ 171,7 173,7 @@ namespace {
    EMTLexState_END_OF_FILE,
    EMTLexState_BLANK_BEFORE_END_OF_FILE
  };
  

  /**
   * Scans for extramodular text, the freeform text that can be present
   * outside of TLA⁺ modules. This function skips any leading whitespace


@@ 188,9 190,9 @@ namespace {
   * @param valid_symbols Tokens possibly expected in this spot.
   * @return Whether any extramodular text was detected.
   */
  bool scan_extramodular_text(TSLexer* const lexer, const bool* const valid_symbols) {
  static bool scan_extramodular_text(TSLexer* const lexer, const bool* const valid_symbols) {
    bool has_consumed_any = false;
    EMTLexState state = EMTLexState_CONSUME;
    enum EMTLexState state = EMTLexState_CONSUME;
    START_LEXER();
    eof = !has_next(lexer);
    switch (state) {


@@ 236,45 238,53 @@ namespace {
  enum ProofStepIdType {
    ProofStepIdType_STAR,     // <*>
    ProofStepIdType_PLUS,     // <+>
    ProofStepIdType_NUMBERED  // <1234>
    ProofStepIdType_NUMBERED, // <1234>
    ProofStepIdType_NONE      // Invalid or nonexistent
  };
  

  // Data about a proof step ID.
  struct ProofStepId {
    // The proof step ID type.
    ProofStepIdType type;
    
    enum ProofStepIdType type;

    // The proof step ID level (-1 if not NUMBERED).
    proof_level level;
    
  };

    /**
     * Initializes a new instance of the ProofStepId class.
     * 
     *
     * @param raw_level The unparsed contents of the <...> lexeme.
     */
    ProofStepId(const std::vector<char>& raw_level) {
      level = -1;
      if ('*' == raw_level.at(0)) {
        type = ProofStepIdType_STAR;
      } else if ('+' == raw_level.at(0)) {
        type = ProofStepIdType_PLUS;
    static struct ProofStepId parse_proof_step_id(const CharArray* raw_level) {
      struct ProofStepId id;
      id.level = -1;
      if (0 == raw_level->size) {
        id.type = ProofStepIdType_NONE;
      } else if ('*' == *array_get(raw_level, 0)) {
        id.type = ProofStepIdType_STAR;
      } else if ('+' == *array_get(raw_level, 0)) {
        id.type = ProofStepIdType_PLUS;
      } else {
        type = ProofStepIdType_NUMBERED;
        // We can't use std::stoi because it isn't included in the emcc
        // build so will cause errors; thus we roll our own. raw_level
        // should also be a std::string but that isn't included either.
        // level = std::stoi(raw_level);
        level = 0;
        id.type = ProofStepIdType_NUMBERED;
        id.level = 0;
        int32_t multiplier = 1;
        for (size_t i = 0; i < raw_level.size(); i++) {
          const size_t index = raw_level.size() - i - 1;
          const int8_t digit_value = raw_level.at(index) - 48;
          level += digit_value * multiplier;
          multiplier *= 10;
        for (size_t i = 0; i < raw_level->size; i++) {
          const size_t index = raw_level->size - i - 1;
          const int8_t digit_value = *array_get(raw_level, index) - 48;
          if (0 <= digit_value && digit_value <= 9) {
            id.level += digit_value * multiplier;
            multiplier *= 10;
          } else {
            id.type = ProofStepIdType_NONE;
            id.level = -1;
            break;
          }
        }
      }
      array_delete(raw_level);
      return id;
    }
  };

  // Lexemes recognized by this lexer.
  enum Lexeme {


@@ 376,30 386,30 @@ namespace {
    LexState_OTHER,
    LexState_END_OF_FILE
  };
  

  /**
   * Looks ahead to identify the next lexeme. Consumes all leading
   * whitespace. Out parameters include column of first non-whitespace
   * codepoint and the level of the proof step ID lexeme if encountered.
   *
   * @param lexer The tree-sitter lexing control structure.
   * @param lexeme_start_col The starting column of the first lexeme. 
   * @param lexeme_start_col The starting column of the first lexeme.
   * @param proof_step_id_level The level of the proof step ID.
   * @return The lexeme encountered.
   */
  Lexeme lex_lookahead(
  static enum Lexeme lex_lookahead(
    TSLexer* const lexer,
    column_index& lexeme_start_col,
    std::vector<char>& proof_step_id_level
    column_index* lexeme_start_col,
    CharArray* proof_step_id_level
  ) {
    LexState state = LexState_CONSUME_LEADING_SPACE;
    Lexeme result_lexeme = Lexeme_OTHER;
    enum LexState state = LexState_CONSUME_LEADING_SPACE;
    enum Lexeme result_lexeme = Lexeme_OTHER;
    START_LEXER();
    eof = !has_next(lexer);
    switch (state) {
      case LexState_CONSUME_LEADING_SPACE:
        if (iswspace(lookahead)) SKIP(LexState_CONSUME_LEADING_SPACE);
        lexeme_start_col = lexer->get_column(lexer);
        *lexeme_start_col = lexer->get_column(lexer);
        lexer->mark_end(lexer);
        if (eof) ADVANCE(LexState_END_OF_FILE);
        if ('/' == lookahead) ADVANCE(LexState_FORWARD_SLASH);


@@ 429,14 439,14 @@ namespace {
        if ('T' == lookahead) ADVANCE(LexState_T);
        if ('V' == lookahead) ADVANCE(LexState_V);
        if ('W' == lookahead) ADVANCE(LexState_W);
        if (L'∧' == lookahead) ADVANCE(LexState_LAND);
        if (L'∨' == lookahead) ADVANCE(LexState_LOR);
        if (L'〉' == lookahead) ADVANCE(LexState_R_ANGLE_BRACKET);
        if (L'⟩' == lookahead) ADVANCE(LexState_R_ANGLE_BRACKET);
        if (L'⟶' == lookahead) ADVANCE(LexState_RIGHT_ARROW);
        if (L'→' == lookahead) ADVANCE(LexState_RIGHT_ARROW);
        if (L'⟼' == lookahead) ADVANCE(LexState_RIGHT_MAP_ARROW);
        if (L'↦' == lookahead) ADVANCE(LexState_RIGHT_MAP_ARROW);
        if (L'\u2227' == lookahead) ADVANCE(LexState_LAND); // '∧'
        if (L'\u2228' == lookahead) ADVANCE(LexState_LOR); // '∨'
        if (L'\u3009' == lookahead) ADVANCE(LexState_R_ANGLE_BRACKET); // '〉'
        if (L'\u27E9' == lookahead) ADVANCE(LexState_R_ANGLE_BRACKET); // '⟩'
        if (L'\u27F6' == lookahead) ADVANCE(LexState_RIGHT_ARROW); // '⟶'
        if (L'\u2192' == lookahead) ADVANCE(LexState_RIGHT_ARROW); // '→'
        if (L'\u27FC' == lookahead) ADVANCE(LexState_RIGHT_MAP_ARROW); // '⟼'
        if (L'\u21A6' == lookahead) ADVANCE(LexState_RIGHT_MAP_ARROW); // '↦'
        ADVANCE(LexState_OTHER);
        END_LEX_STATE();
      case LexState_FORWARD_SLASH:


@@ 449,7 459,7 @@ namespace {
        if ('*' == lookahead) ADVANCE(LexState_COMMENT_START);
        END_LEX_STATE();
      case LexState_LT:
        proof_step_id_level.push_back(static_cast<char>(lookahead & CHAR_MAX));
        array_push(proof_step_id_level, (char)(lookahead & CHAR_MAX));
        if (iswdigit(lookahead)) ADVANCE(LexState_PROOF_LEVEL_NUMBER);
        if ('*' == lookahead) ADVANCE(LexState_PROOF_LEVEL_STAR);
        if ('+' == lookahead) ADVANCE(LexState_PROOF_LEVEL_PLUS);


@@ 727,7 737,7 @@ namespace {
        END_LEX_STATE();
      case LexState_PROOF_LEVEL_NUMBER:
        if (iswdigit(lookahead)) {
          proof_step_id_level.push_back(static_cast<char>(lookahead & CHAR_MAX));
          array_push(proof_step_id_level, (char)(lookahead & CHAR_MAX));
          ADVANCE(LexState_PROOF_LEVEL_NUMBER);
        }
        if ('>' == lookahead) ADVANCE(LexState_PROOF_NAME);


@@ 764,7 774,7 @@ namespace {
        END_LEX_STATE();
    }
  }
  

  // Tokens recognized by this scanner.
  enum Token {
    Token_LAND,


@@ 789,7 799,7 @@ namespace {
   * @param lexeme The lexeme to map to a token.
   * @return The token corresponding to the given lexeme.
   */
  Token tokenize_lexeme(Lexeme lexeme) {
  static enum Token tokenize_lexeme(enum Lexeme lexeme) {
    switch (lexeme) {
      case Lexeme_FORWARD_SLASH: return Token_OTHER;
      case Lexeme_BACKWARD_SLASH: return Token_OTHER;


@@ 841,7 851,7 @@ namespace {
      default: return Token_OTHER;
    }
  }
    

  // Possible types of junction list.
  enum JunctType {
    JunctType_CONJUNCTION,


@@ 852,148 862,152 @@ namespace {
  struct JunctList {

    // The type of jlist.
    JunctType type;
    enum JunctType type;

    // The starting alignment columnt of the jlist.
    column_index alignment_column;
    
    JunctList() { }
  };

    JunctList(JunctType const type, column_index const alignment_column) {
      this->type = type;
      this->alignment_column = alignment_column;
    static struct JunctList create_junctlist(enum JunctType const type, column_index const alignment_column) {
      struct JunctList jlist;
      jlist.type = type;
      jlist.alignment_column = alignment_column;
      return jlist;
    }

    unsigned serialize(char* const buffer, bool const is_dry_run) {
    static unsigned jlist_serialize(struct JunctList* this, char* const buffer, bool const is_dry_run) {
      unsigned offset = 0;
      unsigned copied = 0;

      // Serialize junction type
      copied = sizeof(uint8_t);
      if (!is_dry_run) { buffer[offset] = static_cast<uint8_t>(type); }
      if (!is_dry_run) { buffer[offset] = (uint8_t)(this->type); }
      offset += copied;

      // Serialize alignment column
      copied = sizeof(column_index);
      if (!is_dry_run) { memcpy(&buffer[offset], (char*)&alignment_column, copied); }
      if (!is_dry_run) { memcpy(&buffer[offset], &this->alignment_column, copied); }
      offset += copied;

      return offset;
    }

    unsigned deserialize(const char* const buffer) {
    static unsigned jlist_deserialize(struct JunctList* this, const char* const buffer) {
      unsigned offset = 0;
      unsigned copied = 0;

      // Deserialize junction type
      copied = sizeof(uint8_t);
      type = JunctType(buffer[offset]);
      this->type = (enum JunctType)buffer[offset];
      offset += copied;

      // Deserialize alignment column
      copied = sizeof(column_index);
      memcpy((char*)&alignment_column, &buffer[offset], copied);
      memcpy(&this->alignment_column, &buffer[offset], copied);
      offset += copied;
      

      return offset;
    }
  };
  

  /**
   * A stateful scanner used to parse junction lists and proofs.
   */
  struct Scanner {

    // The nested junction lists at the current lexer position.
    std::vector<JunctList> jlists;
    Array(struct JunctList) jlists;

    // The nested proofs at the current lexer position.
    std::vector<proof_level> proofs;
    Array(proof_level) proofs;

    // The level of the last proof.
    proof_level last_proof_level;

    // Whether we have seen a PROOF token.
    bool have_seen_proof_keyword;

    /**
     * Initializes a new instance of the Scanner object.
     */
    Scanner() {
      deserialize(NULL, 0);
    }
    
    /**
     * Calculates the serialized size of the scanner.
     */
    unsigned serialized_size() {
      return serialize(NULL, true);
    }
    
    /**
     * Serializes the Scanner state into the given buffer.
     *
     * @param buffer The buffer into which to serialize the scanner state.
     * @return Number of bytes written into the buffer.
     */
    unsigned serialize(char* const buffer) {
      return serialize(buffer, false);
    }
};

    /**
     * Serializes the Scanner state into the given buffer.
     *
     * @param this The Scanner state to serialize.
     * @param buffer The buffer into which to serialize the scanner state.
     * @param is_dry_run Whether to actually copy the bytes to the buffer.
     * @return Number of bytes written into the buffer.
     */
    unsigned serialize(char* const buffer, const bool is_dry_run) {
    static unsigned scanner_try_serialize(
      const struct Scanner* const this,
      char* const buffer,
      const bool is_dry_run
    ) {
      unsigned offset = 0;
      unsigned copied = 0;

      const nest_address jlist_depth = static_cast<nest_address>(jlists.size());
      const nest_address jlist_depth = (nest_address)(this->jlists.size);
      copied = sizeof(nest_address);
      if (!is_dry_run) { memcpy(&buffer[offset], &jlist_depth, copied); }
      offset += copied;
      for (nest_address i = 0; i < jlist_depth; i++) {
        char* const buffer_addr = is_dry_run ? NULL : &buffer[offset];
        copied = jlists[i].serialize(buffer_addr, is_dry_run);
        copied = jlist_serialize(array_get(&this->jlists, i), buffer_addr, is_dry_run);
        offset += copied;
      }
      
      const nest_address proof_depth = static_cast<nest_address>(proofs.size());

      const nest_address proof_depth = (nest_address)(this->proofs.size);
      copied = sizeof(nest_address);
      if (!is_dry_run) { memcpy(&buffer[offset], &proof_depth, copied); }
      offset += copied;
      copied = proof_depth * sizeof(proof_level);
      if (!is_dry_run && copied > 0) { memcpy(&buffer[offset], proofs.data(), copied); }
      if (!is_dry_run && copied > 0) { memcpy(&buffer[offset], this->proofs.contents, copied); }
      offset += copied;
      

      copied = sizeof(proof_level);
      if (!is_dry_run) { memcpy(&buffer[offset], &last_proof_level, copied); }
      if (!is_dry_run) { memcpy(&buffer[offset], &this->last_proof_level, copied); }
      offset += copied;
      

      copied = sizeof(uint8_t);
      if (!is_dry_run) { buffer[offset] = static_cast<uint8_t>(have_seen_proof_keyword); }
      if (!is_dry_run) { buffer[offset] = (uint8_t)(this->have_seen_proof_keyword); }
      offset += copied;

      return offset;
    }

    /**
     * Serializes the Scanner state into the given buffer.
     *
     * @param this The Scanner state to serialize.
     * @param buffer The buffer into which to serialize the scanner state.
     * @return Number of bytes written into the buffer.
     */
    static unsigned scanner_serialize(const struct Scanner* this, char* buffer) {
      return scanner_try_serialize(this, buffer, false);
    }

    /**
     * Calculates the serialized size of the scanner.
     *
     * @param this The Scanner state to find the deserialized size of.
     * @return The size of the Scanner when serialized.
     */
    static unsigned scanner_serialized_size(const struct Scanner* const this) {
      return scanner_try_serialize(this, NULL, true);
    }

    /**
     * Deserializes the Scanner state from the given buffer.
     * 
     *
     * @param this The Scanner state to deserialize.
     * @param buffer The buffer from which to deserialize the state.
     * @param length The bytes available to read from the buffer.
     */
    void deserialize(const char* const buffer, unsigned const length) {
    static void scanner_deserialize(struct Scanner* const this, const char* const buffer, unsigned const length) {
      // Very important to clear values of all fields here!
      // Scanner object is reused; if a variable isn't cleared, it can
      // lead to extremely strange & impossible-to-debug behavior.
      jlists.clear();
      proofs.clear();
      last_proof_level = -1;
      have_seen_proof_keyword = false;
      array_delete(&this->jlists);
      array_delete(&this->proofs);
      this->last_proof_level = -1;
      this->have_seen_proof_keyword = false;

      if (length > 0) {
        unsigned offset = 0;


@@ 1002,82 1016,114 @@ namespace {
        nest_address jlist_depth = 0;
        copied = sizeof(nest_address);
        memcpy(&jlist_depth, &buffer[offset], copied);
        jlists.resize(jlist_depth);
        if (jlist_depth > 0) array_grow_by(&(this->jlists), jlist_depth);
        offset += copied;

        for (nest_address i = 0; i < jlist_depth; i++) {
          assert(offset < length);
          copied = jlists[i].deserialize(&buffer[offset]);
          copied = jlist_deserialize(array_get(&(this->jlists), i), &buffer[offset]);
          offset += copied;
        }
      

        nest_address proof_depth = 0;
        copied = sizeof(nest_address);
        memcpy(&proof_depth, &buffer[offset], copied);
        proofs.resize(proof_depth);
        if (proof_depth > 0) array_grow_by(&(this->proofs), proof_depth);
        offset += copied;

        copied = proof_depth * sizeof(proof_level);
        if (copied > 0) { memcpy(proofs.data(), &buffer[offset], copied); }
        if (copied > 0) { memcpy(this->proofs.contents, &buffer[offset], copied); }
        offset += copied;
        

        copied = sizeof(proof_level);
        memcpy(&last_proof_level, &buffer[offset], copied);
        memcpy(&(this->last_proof_level), &buffer[offset], copied);
        offset += copied;
        

        copied = sizeof(uint8_t);
        have_seen_proof_keyword = static_cast<bool>(buffer[offset] & 1);
        this->have_seen_proof_keyword = (bool)(buffer[offset] & 1);
        offset += copied;
 

        assert(offset == length);
      }
    }

    /**
     * Initializes a new instance of the Scanner object.
     *
     * @return A newly-created Scanner.
     */
    static struct Scanner scanner_create() {
      struct Scanner s;
      array_init(&s.jlists);
      array_init(&s.proofs);
      s.last_proof_level = -1;
      s.have_seen_proof_keyword = false;
      return s;
    }

    /**
     * Frees all memory associated with this Scanner.
     *
     * @param this The Scanner to free.
     */
    static void scanner_free(struct Scanner* const this) {
      array_delete(&this->jlists);
      array_delete(&this->proofs);
    }

    /**
     * Whether the Scanner state indicates we are currently in a jlist.
     * 
     *
     * @param this The Scanner state.
     * @return Whether we are in a jlist.
     */
    bool is_in_jlist() const {
      return !jlists.empty();
    static bool is_in_jlist(const struct Scanner* const this) {
      return this->jlists.size > 0;
    }

    /**
     * The column index of the current jlist. Returns negative number if
     * we are not currently in a jlist.
     * 
     *
     * @param this The Scanner state.
     * @return The column index of the current jlist.
     */
    column_index get_current_jlist_column_index() const {
      return is_in_jlist() ? this->jlists.back().alignment_column : -1;
    static column_index get_current_jlist_column_index(const struct Scanner* const this) {
      return is_in_jlist(this) ? array_back(&this->jlists)->alignment_column : -1;
    }

    /**
     * Whether the given jlist type matches the current jlist.
     * 
     *
     * @param this The Scanner state.
     * @param type The jlist type to check.
     * @return Whether the given jlist type matches the current jlist.
     */
    bool current_jlist_type_is(JunctType const type) const {
      return is_in_jlist() && type == this->jlists.back().type;
    static bool current_jlist_type_is(
      struct Scanner* const this,
      enum JunctType const type
    ) {
      return is_in_jlist(this) && type == array_back(&this->jlists)->type;
    }

    /**
     * Emits an INDENT token, recording the new jlist in the Scanner state.
     * 
     *
     * @param this The Scanner state.
     * @param lexer The tree-sitter lexing control structure.
     * @param type The type of the new jlist.
     * @param col The column position of the new jlist.
     * @return Whether an INDENT token was emitted.
     */
    bool emit_indent(
    static bool emit_indent(
      struct Scanner* const this,
      TSLexer* const lexer,
      JunctType const type,
      enum JunctType const type,
      column_index const col
    ) {
      lexer->result_symbol = INDENT;
      JunctList new_list(type, col);
      this->jlists.push_back(new_list);
      struct JunctList new_list = create_junctlist(type, col);
      array_push(&this->jlists, new_list);
      return true;
    }



@@ 1089,21 1135,22 @@ namespace {
     * @param type The type of junction list.
     * @return Whether a BULLET token was emitted.
     */
    bool emit_bullet(TSLexer* const lexer, const JunctType type) {
    static bool emit_bullet(TSLexer* const lexer, enum JunctType type) {
      lexer->result_symbol = BULLET;
      return true;
    }

    /**
     * Emits a DEDENT token, removing a jlist from the Scanner state.
     * 
     *
     * @param this The Scanner state.
     * @param lexer The tree-sitter lexing control structure.
     * @return Whether a DEDENT token was emitted.
     */
    bool emit_dedent(TSLexer* const lexer) {
      if (is_in_jlist()) {
    static bool emit_dedent(struct Scanner* const this, TSLexer* const lexer) {
      if (is_in_jlist(this)) {
        lexer->result_symbol = DEDENT;
        this->jlists.pop_back();
        array_pop(&this->jlists);
        return true;
      } else {
        return false;


@@ 1128,64 1175,66 @@ namespace {
     *    -> this is an infix operator that also ends the current list
     * 5. The junct is prior to the cpos of the current jlist
     *    -> this ends the current jlist, emit DEDENT token
     * 
     *
     * @param this The Scanner state.
     * @param lexer The tree-sitter lexing control structure.
     * @param valid_symbols Tokens possibly expected in this spot.
     * @param type The type of junction encountered.
     * @param next The column position of the junct token encountered.
     * @return Whether a jlist-relevant token should be emitted.
     */
    bool handle_junct_token(
    static bool handle_junct_token(
      struct Scanner* const this,
      TSLexer* const lexer,
      const bool* const valid_symbols,
      JunctType const next_type,
      enum JunctType const next_type,
      column_index const next_col
    ) {
      const column_index current_col = get_current_jlist_column_index();
      const column_index current_col = get_current_jlist_column_index(this);
      if (current_col < next_col) {
        if (valid_symbols[INDENT]) {
          /**
           * The start of a new junction list!
           */
          return emit_indent(lexer, next_type, next_col);
          return emit_indent(this, lexer, next_type, next_col);
        } else {
          /**
           * This is an infix junction symbol. Tree-sitter will only look for
           * a new jlist at the start of an expression rule; infix operators
           * occur when joining two expression rules together, so tree-sitter
           * is only looking for either BULLET or DEDENT rules. Examples:
           * 
           *
           *   /\ a /\ b
           *       ^ tree-sitter will NEVER look for an INDENT here
           * 
           *
           *   /\ a
           *   /\ b
           *  ^ tree-sitter WILL look for a BULLET here
           * 
           *
           *   /\ /\ a
           *     ^ tree-sitter WILL look for an INDENT here
           */
          return false;
        }
      } else if (current_col == next_col) {
        if (current_jlist_type_is(next_type)) {
        if (current_jlist_type_is(this, next_type)) {
          /**
           * This is another entry in the jlist.
           */
          return emit_bullet(lexer, next_type);
        } else {
          /** 
          /**
           * Disjunct in alignment with conjunct list or vice-versa; treat
           * this as an infix operator by terminating the current list.
           */
          return emit_dedent(lexer);
          return emit_dedent(this, lexer);
        }
      } else {
        /**
         * Junct found prior to the alignment column of the current jlist.
         * This marks the end of the jlist.
         */
        return emit_dedent(lexer);
        return emit_dedent(this, lexer);
      }
    }



@@ 1196,7 1245,7 @@ namespace {
     * (), [], <<>>, and {}; it also includes IF/THEN, THEN/ELSE, CASE/->,
     * and basically every other language construct where an expression is
     * squeezed between a known start & end token.
     * 
     *
     * Previously I implemented complicated logic using a stack to keep
     * track of all the delimiters that have been seen (and their
     * pairs) but found that tree-sitter would never trigger the


@@ 1205,13 1254,13 @@ namespace {
     * we can assume that when we *do* see a right delimiter, it
     * matches a left delimiter that occurred prior to the start of the
     * jlist, so we can emit a DEDENT token to end the jlist. Example:
     * 
     *
     *    /\ ( a + b )
     *              ^ tree-sitter will never look for an INDENT,
     *                BULLET, or DEDENT token here; it is only
     *                looking for another infix operator or the
     *                right-delimiter.
     * 
     *
     *    ( /\ a + b )
     *              ^ tree-sitter WILL look for an INDENT, BULLET, or
     *                DEDENT token here in addition to looking for an


@@ 1219,32 1268,32 @@ namespace {
     *                token before seeing the right delimiter, although
     *                error recovery is simple enough that it would
     *                barely notice its absence.
     * 
     *
     * There are a few notable exceptions to this rule; for example, the
     * empty set or empty sequence:
     * 
     *
     *    /\  { }
     *         ^
     *    /\ << >>
     *         ^ there is the option for an expression here, so tree-sitter
     *           looks for INDENT tokens and we will see a right delimiter
     *           in this external scanner.
     * 
     *
     * Another example when the code is in a non-parseable state which we
     * nonetheless wish to handle gracefully:
     * 
     *
     *    /\ [x \in S |-> ]
     *                   ^ user is about to write an expression here, but
     *                     there is a time when the code is non-parseable;
     *                     tree-sitter will again look for an INDENT token
     *                     and we will see a right delimiter in this
     *                     external scanner.
     * 
     *
     * The easy solution to these cases is to simply check whether
     * tree-sitter is looking for a DEDENT token. If so, emit one; if not,
     * emit nothing. Tree-sitter will not look for a DEDENT token inside
     * enclosing delimiters within the scope of a jlist.
     * 
     *
     * One side-effect of all this is that tree-sitter parses certain
     * arrangements of jlists and delimiters that are actually illegal
     * according to TLA⁺ syntax rules; that is okay since tree-sitter's


@@ 1252,26 1301,27 @@ namespace {
     * errs on the side of being overly-permissive. For a concrete
     * example here, tree-sitter will parse this illegal expression
     * without complaint:
     * 
     *
     *    /\ A
     *    /\ (B + C
     *  )
     *    /\ D
     * 
     *
     * This should simply be detected as an error at the semantic level.
     *
     * @param this The Scanner state.
     * @param lexer The tree-sitter lexing control structure.
     * @param valid_symbols Tokens possibly expected in this spot.
     * @return Whether a jlist-relevant token should be emitted.
     */
    bool handle_right_delimiter_token(
    static bool handle_right_delimiter_token(
      struct Scanner* const this,
      TSLexer* const lexer,
      const bool* const valid_symbols
    ) {
      return is_in_jlist()
      return is_in_jlist(this)
        && valid_symbols[DEDENT]
        && !this->jlists.empty()
        && emit_dedent(lexer);
        && emit_dedent(this, lexer);
    }

    /**


@@ 1283,13 1333,17 @@ namespace {
     * 3. End-of-file (this shouldn't happen but we will end the jlist to
     *    improve error reporting since the end-of-module token is missing)
     *
     * @param this The Scanner state.
     * @param lexer The tree-sitter lexing control structure.
     * @return Whether a jlist-relevant token should be emitted.
     */
    bool handle_terminator_token(TSLexer* const lexer) {
      return is_in_jlist() && emit_dedent(lexer);
    static bool handle_terminator_token(
      struct Scanner* const this,
      TSLexer* const lexer
    ) {
      return is_in_jlist(this) && emit_dedent(this, lexer);
    }
    

    /**
     * Non-junct tokens could possibly indicate the end of a jlist. Rules:
     * - If the token cpos is leq to the current jlist cpos, the jlist


@@ 1304,66 1358,80 @@ namespace {
     *              ELSE Q
     *      /\ R
     *   so emit no token.
     * 
     *
     * @param this The Scanner state.
     * @param lexer The tree-sitter lexing control structure.
     * @param next The column position of the encountered token.
     * @return Whether a jlist-relevant token should be emitted.
     */
    bool handle_other_token(
    static bool handle_other_token(
      struct Scanner* const this,
      TSLexer* const lexer,
      column_index const next
    ) {
      return is_in_jlist()
        && next <= get_current_jlist_column_index()
        && emit_dedent(lexer);
      return is_in_jlist(this)
        && next <= get_current_jlist_column_index(this)
        && emit_dedent(this, lexer);
    }
    

    /**
     * Gets whether we are currently in a proof.
     *
     * @param this The Scanner state.
     * @return Whether we are currently in a proof.
     */
    bool is_in_proof() const {
      return !proofs.empty();
    static bool is_in_proof(const struct Scanner* const this) {
      return this->proofs.size > 0;
    }
    

    /**
     * Gets the current proof level; -1 if none.
     * 
     *
     * @param this The Scanner state.
     * @return The current proof level.
     */
    proof_level get_current_proof_level() const {
      return is_in_proof() ? proofs.back() : -1;
    static proof_level get_current_proof_level(const struct Scanner* const this) {
      return is_in_proof(this) ? *array_back(&this->proofs) : -1;
    }

    /**
     * Emits a token indicating the start of a new proof.
     * 
     *
     * @param this The Scanner state.
     * @param lexer The tree-sitter lexing control structure.
     * @param level The level of the new proof.
     * @return Whether a token should be emitted.
     */
    bool emit_begin_proof(TSLexer* const lexer, proof_level level) {
    static bool emit_begin_proof(
      struct Scanner* const this,
      TSLexer* const lexer,
      proof_level level
    ) {
      lexer->result_symbol = BEGIN_PROOF;
      proofs.push_back(level);
      last_proof_level = level;
      have_seen_proof_keyword = false;
      array_push(&this->proofs, level);
      this->last_proof_level = level;
      this->have_seen_proof_keyword = false;
      return true;
    }
    

    /**
     * Emits a token indicating the start of a new proof step.
     * 
     *
     * @param this The Scanner state.
     * @param lexer The tree-sitter lexing control structure.
     * @param level The level of the new proof step.
     * @return Whether a token should be emitted.
     */
    bool emit_begin_proof_step(TSLexer* const lexer, proof_level level) {
      last_proof_level = level;
    static bool emit_begin_proof_step(
      struct Scanner* const this,
      TSLexer* const lexer,
      proof_level level
    ) {
      this->last_proof_level = level;
      lexer->result_symbol = BEGIN_PROOF_STEP;
      return true;
    }
    

    /**
     * Handle encountering a new proof step ID. This probably marks the
     * beginning of a new proof step, but could also be a reference to a


@@ 1388,7 1456,7 @@ namespace {
     *    -> This is another proof step; emit BEGIN_PROOF_STEP token.
     * 3. The new proof token level is less than the current level
     *    -> This is an error, which we will try to recover from.
     * 
     *
     * There are also rules to handle proof step IDs where the level is
     * inferred, like <+> and <*>. They are as follows:
     * 1. The proof step ID is <+>


@@ 1407,22 1475,24 @@ namespace {
     *
     * Proofs are ended upon encountering a QED step, which is handled
     * elsewhere.
     * 
     *
     * @param this The Scanner state.
     * @param lexer The tree-sitter lexing control structure.
     * @param valid_symbols Tokens possibly expected in this spot.
     * @param next The column position of the encountered token.
     * @return Whether a token should be emitted.
     */
    bool handle_proof_step_id_token(
    static bool handle_proof_step_id_token(
      struct Scanner* const this,
      TSLexer* const lexer,
      const bool* const valid_symbols,
      column_index const next,
      const std::vector<char>& proof_step_id_level
      struct ProofStepId proof_step_id_token
    ) {
      ProofStepId proof_step_id_token(proof_step_id_level);
      assert(ProofStepIdType_NONE != proof_step_id_token.type);
      if (valid_symbols[BEGIN_PROOF] || valid_symbols[BEGIN_PROOF_STEP]) {
        proof_level next_proof_level = -1;
        const proof_level current_proof_level = get_current_proof_level();
        const proof_level current_proof_level = get_current_proof_level(this);
        switch (proof_step_id_token.type) {
          case ProofStepIdType_STAR:
            /**


@@ 1430,8 1500,8 @@ namespace {
             * or directly following a PROOF keyword.
             */
            next_proof_level =
              !is_in_proof() || have_seen_proof_keyword
              ? last_proof_level + 1
              !is_in_proof(this) || this->have_seen_proof_keyword
              ? this->last_proof_level + 1
              : current_proof_level;
            break;
          case ProofStepIdType_PLUS:


@@ 1443,7 1513,7 @@ namespace {
             * BEGIN_PROOF_STEP token.
             */
            next_proof_level = valid_symbols[BEGIN_PROOF]
              ? last_proof_level + 1
              ? this->last_proof_level + 1
              : current_proof_level;
            break;
          case ProofStepIdType_NUMBERED:


@@ 1454,16 1524,16 @@ namespace {
        }

        if (next_proof_level > current_proof_level) {
          return emit_begin_proof(lexer, next_proof_level);
          return emit_begin_proof(this, lexer, next_proof_level);
        } else if (next_proof_level == current_proof_level) {
          if (have_seen_proof_keyword) {
          if (this->have_seen_proof_keyword) {
            // This has been declared a new proof using the PROOF keyword
            // but does not have a level greater than the old; thus we've
            // detected a syntax error.
            // TODO: handle this.
            return false;
          } else {
            return emit_begin_proof_step(lexer, next_proof_level);
            return emit_begin_proof_step(this, lexer, next_proof_level);
          }
        } else {
          // The next proof level is lower than the current. This is


@@ 1474,10 1544,10 @@ namespace {
      } else {
        if (valid_symbols[DEDENT]) {
          // End all jlists before start of proof.
          return handle_terminator_token(lexer);
          return handle_terminator_token(this, lexer);
        } else {
          // This is a reference to a proof step in an expression.
          return handle_other_token(lexer, next);
          return handle_other_token(this, lexer, next);
        }
      }
    }


@@ 1486,51 1556,55 @@ namespace {
     * Handles the PROOF keyword token. We record that we've seen the
     * PROOF keyword, which modifies the interpretation of the subsequent
     * proof step ID. The PROOF token also terminates any current jlist.
     * 
     *
     * @param this The Scanner state.
     * @param lexer The tree-sitter lexing control structure.
     * @param valid_symbols Tokens possibly expected in this spot.
     * @return Whether a token should be emitted.
     */
    bool handle_proof_keyword_token(
    static bool handle_proof_keyword_token(
      struct Scanner* const this,
      TSLexer* const lexer,
      const bool* const valid_symbols
    ) {
      if (valid_symbols[PROOF_KEYWORD]) {
        have_seen_proof_keyword = true;
        this->have_seen_proof_keyword = true;
        lexer->result_symbol = PROOF_KEYWORD;
        lexer->mark_end(lexer);
        return true;
      } else {
        return handle_terminator_token(lexer);
        return handle_terminator_token(this, lexer);
      }
    }
    

    /**
     * Handles the BY, OBVIOUS, and OMITTED keyword tokens. We record
     * that we've seen the keyword, which negates any PROOF keyword
     * previously encountered. These tokens also terminate any current
     * jlist.
     * 
     *
     * @param this The Scanner state.
     * @param lexer The tree-sitter lexing control structure.
     * @param valid_symbols Tokens possibly expected in this spot.
     * @param keyword_type The specific keyword being handled.
     * @return Whether a token should be emitted.
     */
    bool handle_terminal_proof_keyword_token(
    static bool handle_terminal_proof_keyword_token(
      struct Scanner* const this,
      TSLexer* const lexer,
      const bool* const valid_symbols,
      TokenType keyword_type
      enum TokenType keyword_type
    ) {
      if (valid_symbols[keyword_type]) {
        have_seen_proof_keyword = false;
        this->have_seen_proof_keyword = false;
        lexer->result_symbol = keyword_type;
        lexer->mark_end(lexer);
        return true;
      } else {
        return handle_terminator_token(lexer);
        return handle_terminator_token(this, lexer);
      }
    }
    

    /**
     * Handles the QED keyword token. The QED token indicates this is the
     * final step of a proof, so we modify the state accordingly. First


@@ 1545,41 1619,45 @@ namespace {
     * returned to help the error recovery process. Not performing this
     * check previously led to a segfault; see:
     * https://github.com/tlaplus-community/tree-sitter-tlaplus/issues/60
     * 
     *
     * @param this The Scanner state.
     * @param lexer The tree-sitter lexing control structure.
     * @param valid_symbols Tokens possibly expected in this spot.
     * @return Whether a token should be emitted.
     */
    bool handle_qed_keyword_token(
    static bool handle_qed_keyword_token(
      struct Scanner* const this,
      TSLexer* const lexer,
      const bool* const valid_symbols
    ) {
      if (is_in_proof()) {
        last_proof_level = get_current_proof_level();
        proofs.pop_back();
      if (is_in_proof(this)) {
        this->last_proof_level = get_current_proof_level(this);
        array_pop(&this->proofs);
      }

      lexer->result_symbol = QED_KEYWORD;
      lexer->mark_end(lexer);
      return true;
    }
    

    /**
     * Handles the fairness tokens WF_ and SF_.
     * Need to handle this in an external scanner due to:
     * https://github.com/tree-sitter/tree-sitter/issues/1615
     *
     * @param this The Scanner state.
     * @param lexer The tree-sitter lexing control structure.
     * @param next The column position of the encountered token.
     * @param keyword_type The specific keyword being handled.
     * @return Whether a token should be emitted.
     */
    bool handle_fairness_keyword_token(
    static bool handle_fairness_keyword_token(
      struct Scanner* const this,
      TSLexer* const lexer,
      column_index const next,
      TokenType keyword_type
      enum TokenType keyword_type
    ) {
      if (handle_other_token(lexer, next)) {
      if (handle_other_token(this, lexer, next)) {
        return true;
      } else {
        lexer->result_symbol = keyword_type;


@@ 1587,15 1665,20 @@ namespace {
        return true;
      }
    }
    

    /**
     * Scans for various possible external tokens.
     * 
     *
     * @param this The Scanner state.
     * @param lexer The tree-sitter lexing control structure.
     * @param valid_symbols Tokens possibly expected in this spot.
     * @return Whether a token was encountered.
     */
    bool scan(TSLexer* const lexer, const bool* const valid_symbols) {
    static bool scan(
      struct Scanner* const this,
      TSLexer* const lexer,
      const bool* const valid_symbols
    ) {
      // All symbols are marked as valid during error recovery.
      // We can check for this by looking at the validity of the final
      // (unused) external symbol, ERROR_SENTINEL.


@@ 1611,43 1694,45 @@ namespace {
        return scan_extramodular_text(lexer, valid_symbols);
      } else {
        column_index col = -1;
        std::vector<char> proof_step_id_level;
        switch (tokenize_lexeme(lex_lookahead(lexer, col, proof_step_id_level))) {
        CharArray proof_step_id_level = array_new();
        enum Token token = tokenize_lexeme(lex_lookahead(lexer, &col, &proof_step_id_level));
        struct ProofStepId proof_step_id_token = parse_proof_step_id(&proof_step_id_level);
        array_delete(&proof_step_id_level);
        switch (token) {
          case Token_LAND:
            return handle_junct_token(lexer, valid_symbols, JunctType_CONJUNCTION, col);
            return handle_junct_token(this, lexer, valid_symbols, JunctType_CONJUNCTION, col);
          case Token_LOR:
            return handle_junct_token(lexer, valid_symbols, JunctType_DISJUNCTION, col);
            return handle_junct_token(this, lexer, valid_symbols, JunctType_DISJUNCTION, col);
          case Token_RIGHT_DELIMITER:
            return handle_right_delimiter_token(lexer, valid_symbols);
            return handle_right_delimiter_token(this, lexer, valid_symbols);
          case Token_COMMENT_START:
            return false;
          case Token_TERMINATOR:
            return handle_terminator_token(lexer);
            return handle_terminator_token(this, lexer);
          case Token_PROOF_STEP_ID:
            return handle_proof_step_id_token(lexer, valid_symbols, col, proof_step_id_level);
            return handle_proof_step_id_token(this, lexer, valid_symbols, col, proof_step_id_token);
          case Token_PROOF_KEYWORD:
            return handle_proof_keyword_token(lexer, valid_symbols);
            return handle_proof_keyword_token(this, lexer, valid_symbols);
          case Token_BY_KEYWORD:
            return handle_terminal_proof_keyword_token(lexer, valid_symbols, BY_KEYWORD);
            return handle_terminal_proof_keyword_token(this, lexer, valid_symbols, BY_KEYWORD);
          case Token_OBVIOUS_KEYWORD:
            return handle_terminal_proof_keyword_token(lexer, valid_symbols, OBVIOUS_KEYWORD);
            return handle_terminal_proof_keyword_token(this, lexer, valid_symbols, OBVIOUS_KEYWORD);
          case Token_OMITTED_KEYWORD:
            return handle_terminal_proof_keyword_token(lexer, valid_symbols, OMITTED_KEYWORD);
            return handle_terminal_proof_keyword_token(this, lexer, valid_symbols, OMITTED_KEYWORD);
          case Token_QED_KEYWORD:
            return handle_qed_keyword_token(lexer, valid_symbols);
            return handle_qed_keyword_token(this, lexer, valid_symbols);
          case Token_WEAK_FAIRNESS:
            return handle_fairness_keyword_token(lexer, col, WEAK_FAIRNESS);
            return handle_fairness_keyword_token(this, lexer, col, WEAK_FAIRNESS);
          case Token_STRONG_FAIRNESS:
            return handle_fairness_keyword_token(lexer, col, STRONG_FAIRNESS);
            return handle_fairness_keyword_token(this, lexer, col, STRONG_FAIRNESS);
          case Token_OTHER:
            return handle_other_token(lexer, col);
            return handle_other_token(this, lexer, col);
          default:
            return false;
        }
      }
    }
  };
  

  /**
   * A hierarchy of nested stateful scanners.
   * Each time a PlusCal block is entered, a nested context is created.


@@ 1657,62 1742,81 @@ namespace {
  struct NestedScanner {

    // The enclosing context(s) of the PlusCal block.
    std::vector< std::vector<char> > enclosing_contexts;
    
    Array(CharArray) enclosing_contexts;

    // The currently-active context.
    Scanner current_context;
    struct Scanner current_context;

  };

    /**
     * Initializes a new instance of the NestedScanner object.
     * Serialize the nested scanner into a buffer.
     *
     * @param this The NestedScanner state.
     * @param buffer The buffer to serialize the state into.
     * @return The number of bytes written into the buffer.
     */
    NestedScanner() {
      this->deserialize(NULL, 0);
    }
    
    unsigned serialize(char* const buffer) {
    static unsigned nested_scanner_serialize(
      const struct NestedScanner* const this,
      char* const buffer
    ) {
      unsigned offset = 0;
      unsigned copied = 0;

      // First write number of enclosing contexts (guaranteed to be >= 1)
      nest_address const context_depth = this->enclosing_contexts.size() + 1;
      nest_address const context_depth = this->enclosing_contexts.size + 1;
      copied = sizeof(nest_address);
      if (copied > 0) memcpy(&buffer[offset], &context_depth, copied);
      offset += copied;
      

      // Then write size of N-1 enclosing contexts
      for (int i = 0; i < context_depth - 1; i++) {
        unsigned const context_size = this->enclosing_contexts[i].size();
        unsigned const context_size = array_get(&this->enclosing_contexts, i)->size;
        copied = sizeof(unsigned);
        if (copied > 0) memcpy(&buffer[offset], &context_size, copied);
        offset += copied;
      }
      

      // Reserve space for current context size
      unsigned const current_context_size_offset = offset;
      copied = sizeof(unsigned);
      offset += copied;
      

      // Serialize N-1 enclosing contexts
      for (int i = 0; i < this->enclosing_contexts.size(); i++) {
        std::vector<char>& context = this->enclosing_contexts[i];
        copied = context.size();
        if (copied > 0) memcpy(&buffer[offset], context.data(), copied);
      for (unsigned i = 0; i < this->enclosing_contexts.size; i++) {
        CharArray* context = array_get(&this->enclosing_contexts, i);
        copied = context->size;
        if (copied > 0) memcpy(&buffer[offset], context->contents, copied);
        offset += copied;
      }

      // Serialize current context
      copied = this->current_context.serialize(&buffer[offset]);
      copied = scanner_serialize(&this->current_context, &buffer[offset]);
      offset += copied;
      

      // Write current context size to reserved position
      memcpy(&buffer[current_context_size_offset], &copied, sizeof(unsigned));
      

      return offset;
    }

    void deserialize(const char* const buffer, unsigned const length) {
      this->enclosing_contexts.clear();
      this->current_context.deserialize(NULL, 0);
    /**
     * Deserialize a nested scanner.
     *
     * @param this The nested scanner instance to deserialize into.
     * @param buffer The buffer to deserialize from.
     * @param length The number of bytes in the buffer.
     */
    static void nested_scanner_deserialize(
      struct NestedScanner* const this,
      const char* const buffer,
      unsigned const length
    ) {
      for (unsigned i = 0; i < this->enclosing_contexts.size; i++) {
        array_delete(array_get(&this->enclosing_contexts, i));
      }
      array_delete(&this->enclosing_contexts);
      scanner_deserialize(&this->current_context, NULL, 0);

      if (length > 0) {
        unsigned offset = 0;


@@ 1723,78 1827,104 @@ namespace {
        copied = sizeof(nest_address);
        memcpy(&context_depth, &buffer[offset], copied);
        assert(1 <= context_depth);
        this->enclosing_contexts.resize(context_depth - 1);
        if (context_depth - 1 > 0) array_grow_by(&this->enclosing_contexts, context_depth - 1);
        offset += copied;
        

        // Next N items: size of all contexts
        std::vector<unsigned> context_sizes;
        context_sizes.resize(context_depth);
        Array(unsigned) context_sizes = array_new();
        if (context_depth > 0) array_grow_by(&context_sizes, context_depth);
        copied = context_depth * sizeof(unsigned);
        if (copied > 0) memcpy(context_sizes.data(), &buffer[offset], copied);
        if (copied > 0) memcpy(context_sizes.contents, &buffer[offset], copied);
        offset += copied;
        

        // Deserialize N-1 contexts as enclosing contexts
        for (int i = 0; i < context_depth - 1; i++) {
          copied = context_sizes[i];
          this->enclosing_contexts[i].resize(copied);
          if (copied > 0) memcpy(this->enclosing_contexts[i].data(), &buffer[offset], copied);
          copied = *array_get(&context_sizes, i);
          array_grow_by(array_get(&this->enclosing_contexts, i), copied);
          if (copied > 0) memcpy(array_get(&this->enclosing_contexts, i)->contents, &buffer[offset], copied);
          offset += copied;
        }
        

        // Final context is deserialized as current context
        copied = context_sizes.back();
        this->current_context.deserialize(&buffer[offset], copied);
        copied = *array_back(&context_sizes);
        scanner_deserialize(&this->current_context, &buffer[offset], copied);
        offset += copied;

        array_delete(&context_sizes);
        assert(offset == length);
      }
    }

    bool scan(TSLexer* const lexer, const bool* const valid_symbols) {
    /**
     * Initializes a new instance of the NestedScanner object.
     *
     * @param this The NestedScanner to initialize.
     */
    static void nested_scanner_init(struct NestedScanner* const this) {
      array_init(&this->enclosing_contexts);
      this->current_context = scanner_create();
    }

    /**
     * Frees all memory allocated by the nested scanner.
     *
     * @param this The NestedScanner to free.
     */
    static void nested_scanner_free(struct NestedScanner* const this) {
      for (unsigned i = 0; i < this->enclosing_contexts.size; i++) {
        array_delete(array_get(&this->enclosing_contexts, i));
      }
      array_delete(&this->enclosing_contexts);
      scanner_free(&this->current_context);
    }

    static bool nested_scan(
      struct NestedScanner* const this,
      TSLexer* const lexer,
      const bool* const valid_symbols) {
      // All symbols are marked as valid during error recovery.
      // We can check for this by looking at the validity of the final
      // (unused) external symbol, ERROR_SENTINEL.
      // TODO: actually function during error recovery
      // https://github.com/tlaplus-community/tree-sitter-tlaplus/issues/19
      if (valid_symbols[ERROR_SENTINEL]) {
        return false;
      } else if (valid_symbols[PCAL_START]) {
        // Entering PlusCal block; push current context then clear
        unsigned const expected_size = this->current_context.serialized_size();
        this->enclosing_contexts.resize(this->enclosing_contexts.size() + 1);
        this->enclosing_contexts.back().resize(expected_size);
        unsigned const actual_size = this->current_context.serialize(this->enclosing_contexts.back().data());
        unsigned const expected_size = scanner_serialized_size(&this->current_context);
        CharArray serialized_current_context = array_new();
        array_grow_by(&serialized_current_context, expected_size);
        unsigned const actual_size = scanner_serialize(&this->current_context, serialized_current_context.contents);
        assert(expected_size == actual_size);
        this->current_context.deserialize(NULL, 0);
        array_push(&this->enclosing_contexts, serialized_current_context);
        scanner_free(&this->current_context);
        this->current_context = scanner_create();
        lexer->result_symbol = PCAL_START;
        return true;
      } else if (valid_symbols[PCAL_END] && !this->enclosing_contexts.empty()) {
      } else if (valid_symbols[PCAL_END] && this->enclosing_contexts.size > 0) {
        // Exiting PlusCal block; rehydrate context then pop
        std::vector<char>& next = this->enclosing_contexts.back();
        this->current_context.deserialize(next.data(), next.size());
        this->enclosing_contexts.pop_back();
        CharArray* next = array_back(&this->enclosing_contexts);
        scanner_deserialize(&this->current_context, next->contents, next->size);
        array_delete(&array_pop(&this->enclosing_contexts));
        lexer->result_symbol = PCAL_END;
        return true;
      } else {
        return current_context.scan(lexer, valid_symbols);
        return scan(&this->current_context, lexer, valid_symbols);
      }
    }
  };
}

extern "C" {

  // Called once when language is set on a parser.
  // Allocates memory for storing scanner state.
  void* tree_sitter_tlaplus_external_scanner_create() {
    return new NestedScanner();
    struct NestedScanner* scanner = ts_malloc(sizeof(struct NestedScanner));
    nested_scanner_init(scanner);
    return scanner;
  }

  // Called once parser is deleted or different language set.
  // Frees memory storing scanner state.
  void tree_sitter_tlaplus_external_scanner_destroy(void* const payload) {
    NestedScanner* const scanner = static_cast<NestedScanner*>(payload);
    delete scanner;
    struct NestedScanner* const scanner = (struct NestedScanner*)(payload);
    nested_scanner_free(scanner);
    ts_free(scanner);
  }

  // Called whenever this scanner recognizes a token.


@@ 1803,8 1933,8 @@ extern "C" {
    void* const payload,
    char* const buffer
  ) {
    NestedScanner* scanner = static_cast<NestedScanner*>(payload);
    return scanner->serialize(buffer);
    const struct NestedScanner* const scanner = (struct NestedScanner*)(payload);
    return nested_scanner_serialize(scanner, buffer);
  }

  // Called when handling edits and ambiguities.


@@ 1814,8 1944,8 @@ extern "C" {
    const char* const buffer,
    unsigned const length
  ) {
    NestedScanner* const scanner = static_cast<NestedScanner*>(payload);
    scanner->deserialize(buffer, length);
    struct NestedScanner* const scanner = (struct NestedScanner*)(payload);
    nested_scanner_deserialize(scanner, buffer, length);
  }

  // Scans for tokens.


@@ 1824,7 1954,7 @@ extern "C" {
    TSLexer* const lexer,
    const bool* const valid_symbols
  ) {
    NestedScanner* const scanner = static_cast<NestedScanner*>(payload);
    return scanner->scan(lexer, valid_symbols);
    struct NestedScanner* const scanner = (struct NestedScanner*)(payload);
    return nested_scan(scanner, lexer, valid_symbols);
  }
}


A src/tree_sitter/alloc.h => src/tree_sitter/alloc.h +54 -0
@@ 0,0 1,54 @@
#ifndef TREE_SITTER_ALLOC_H_
#define TREE_SITTER_ALLOC_H_

#ifdef __cplusplus
extern "C" {
#endif

#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>

// Allow clients to override allocation functions
#ifdef TREE_SITTER_REUSE_ALLOCATOR

extern void *(*ts_current_malloc)(size_t);
extern void *(*ts_current_calloc)(size_t, size_t);
extern void *(*ts_current_realloc)(void *, size_t);
extern void (*ts_current_free)(void *);

#ifndef ts_malloc
#define ts_malloc  ts_current_malloc
#endif
#ifndef ts_calloc
#define ts_calloc  ts_current_calloc
#endif
#ifndef ts_realloc
#define ts_realloc ts_current_realloc
#endif
#ifndef ts_free
#define ts_free    ts_current_free
#endif

#else

#ifndef ts_malloc
#define ts_malloc  malloc
#endif
#ifndef ts_calloc
#define ts_calloc  calloc
#endif
#ifndef ts_realloc
#define ts_realloc realloc
#endif
#ifndef ts_free
#define ts_free    free
#endif

#endif

#ifdef __cplusplus
}
#endif

#endif // TREE_SITTER_ALLOC_H_

A src/tree_sitter/array.h => src/tree_sitter/array.h +287 -0
@@ 0,0 1,287 @@
#ifndef TREE_SITTER_ARRAY_H_
#define TREE_SITTER_ARRAY_H_

#ifdef __cplusplus
extern "C" {
#endif

#include "./alloc.h"

#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>

#ifdef _MSC_VER
#pragma warning(disable : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif

#define Array(T)       \
  struct {             \
    T *contents;       \
    uint32_t size;     \
    uint32_t capacity; \
  }

/// Initialize an array.
#define array_init(self) \
  ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)

/// Create an empty array.
#define array_new() \
  { NULL, 0, 0 }

/// Get a pointer to the element at a given `index` in the array.
#define array_get(self, _index) \
  (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])

/// Get a pointer to the first element in the array.
#define array_front(self) array_get(self, 0)

/// Get a pointer to the last element in the array.
#define array_back(self) array_get(self, (self)->size - 1)

/// Clear the array, setting its size to zero. Note that this does not free any
/// memory allocated for the array's contents.
#define array_clear(self) ((self)->size = 0)

/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
/// less than the array's current capacity, this function has no effect.
#define array_reserve(self, new_capacity) \
  _array__reserve((Array *)(self), array_elem_size(self), new_capacity)

/// Free any memory allocated for this array. Note that this does not free any
/// memory allocated for the array's contents.
#define array_delete(self) _array__delete((Array *)(self))

/// Push a new `element` onto the end of the array.
#define array_push(self, element)                            \
  (_array__grow((Array *)(self), 1, array_elem_size(self)), \
   (self)->contents[(self)->size++] = (element))

/// Increase the array's size by `count` elements.
/// New elements are zero-initialized.
#define array_grow_by(self, count) \
  (_array__grow((Array *)(self), count, array_elem_size(self)), \
   memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)), \
   (self)->size += (count))

/// Append all elements from one array to the end of another.
#define array_push_all(self, other)                                       \
  array_extend((self), (other)->size, (other)->contents)

/// Append `count` elements to the end of the array, reading their values from the
/// `contents` pointer.
#define array_extend(self, count, contents)                    \
  _array__splice(                                               \
    (Array *)(self), array_elem_size(self), (self)->size, \
    0, count,  contents                                        \
  )

/// Remove `old_count` elements from the array starting at the given `index`. At
/// the same index, insert `new_count` new elements, reading their values from the
/// `new_contents` pointer.
#define array_splice(self, _index, old_count, new_count, new_contents)  \
  _array__splice(                                                       \
    (Array *)(self), array_elem_size(self), _index,                \
    old_count, new_count, new_contents                                 \
  )

/// Insert one `element` into the array at the given `index`.
#define array_insert(self, _index, element) \
  _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))

/// Remove one element from the array at the given `index`.
#define array_erase(self, _index) \
  _array__erase((Array *)(self), array_elem_size(self), _index)

/// Pop the last element off the array, returning the element by value.
#define array_pop(self) ((self)->contents[--(self)->size])

/// Assign the contents of one array to another, reallocating if necessary.
#define array_assign(self, other) \
  _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))

/// Swap one array with another
#define array_swap(self, other) \
  _array__swap((Array *)(self), (Array *)(other))

/// Get the size of the array contents
#define array_elem_size(self) (sizeof *(self)->contents)

/// Search a sorted array for a given `needle` value, using the given `compare`
/// callback to determine the order.
///
/// If an existing element is found to be equal to `needle`, then the `index`
/// out-parameter is set to the existing value's index, and the `exists`
/// out-parameter is set to true. Otherwise, `index` is set to an index where
/// `needle` should be inserted in order to preserve the sorting, and `exists`
/// is set to false.
#define array_search_sorted_with(self, compare, needle, _index, _exists) \
  _array__search_sorted(self, 0, compare, , needle, _index, _exists)

/// Search a sorted array for a given `needle` value, using integer comparisons
/// of a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_with`.
#define array_search_sorted_by(self, field, needle, _index, _exists) \
  _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)

/// Insert a given `value` into a sorted array, using the given `compare`
/// callback to determine the order.
#define array_insert_sorted_with(self, compare, value) \
  do { \
    unsigned _index, _exists; \
    array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
    if (!_exists) array_insert(self, _index, value); \
  } while (0)

/// Insert a given `value` into a sorted array, using integer comparisons of
/// a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_by`.
#define array_insert_sorted_by(self, field, value) \
  do { \
    unsigned _index, _exists; \
    array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
    if (!_exists) array_insert(self, _index, value); \
  } while (0)

// Private

typedef Array(void) Array;

/// This is not what you're looking for, see `array_delete`.
static inline void _array__delete(Array *self) {
  if (self->contents) {
    ts_free(self->contents);
    self->contents = NULL;
    self->size = 0;
    self->capacity = 0;
  }
}

/// This is not what you're looking for, see `array_erase`.
static inline void _array__erase(Array *self, size_t element_size,
                                uint32_t index) {
  assert(index < self->size);
  char *contents = (char *)self->contents;
  memmove(contents + index * element_size, contents + (index + 1) * element_size,
          (self->size - index - 1) * element_size);
  self->size--;
}

/// This is not what you're looking for, see `array_reserve`.
static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
  if (new_capacity > self->capacity) {
    if (self->contents) {
      self->contents = ts_realloc(self->contents, new_capacity * element_size);
    } else {
      self->contents = ts_malloc(new_capacity * element_size);
    }
    self->capacity = new_capacity;
  }
}

/// This is not what you're looking for, see `array_assign`.
static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
  _array__reserve(self, element_size, other->size);
  self->size = other->size;
  memcpy(self->contents, other->contents, self->size * element_size);
}

/// This is not what you're looking for, see `array_swap`.
static inline void _array__swap(Array *self, Array *other) {
  Array swap = *other;
  *other = *self;
  *self = swap;
}

/// This is not what you're looking for, see `array_push` or `array_grow_by`.
static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
  uint32_t new_size = self->size + count;
  if (new_size > self->capacity) {
    uint32_t new_capacity = self->capacity * 2;
    if (new_capacity < 8) new_capacity = 8;
    if (new_capacity < new_size) new_capacity = new_size;
    _array__reserve(self, element_size, new_capacity);
  }
}

/// This is not what you're looking for, see `array_splice`.
static inline void _array__splice(Array *self, size_t element_size,
                                 uint32_t index, uint32_t old_count,
                                 uint32_t new_count, const void *elements) {
  uint32_t new_size = self->size + new_count - old_count;
  uint32_t old_end = index + old_count;
  uint32_t new_end = index + new_count;
  assert(old_end <= self->size);

  _array__reserve(self, element_size, new_size);

  char *contents = (char *)self->contents;
  if (self->size > old_end) {
    memmove(
      contents + new_end * element_size,
      contents + old_end * element_size,
      (self->size - old_end) * element_size
    );
  }
  if (new_count > 0) {
    if (elements) {
      memcpy(
        (contents + index * element_size),
        elements,
        new_count * element_size
      );
    } else {
      memset(
        (contents + index * element_size),
        0,
        new_count * element_size
      );
    }
  }
  self->size += new_count - old_count;
}

/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
  do { \
    *(_index) = start; \
    *(_exists) = false; \
    uint32_t size = (self)->size - *(_index); \
    if (size == 0) break; \
    int comparison; \
    while (size > 1) { \
      uint32_t half_size = size / 2; \
      uint32_t mid_index = *(_index) + half_size; \
      comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
      if (comparison <= 0) *(_index) = mid_index; \
      size -= half_size; \
    } \
    comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
    if (comparison == 0) *(_exists) = true; \
    else if (comparison < 0) *(_index) += 1; \
  } while (0)

/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
/// parameter by reference in order to work with the generic sorting function above.
#define _compare_int(a, b) ((int)*(a) - (int)(b))

#ifdef _MSC_VER
#pragma warning(default : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif

#ifdef __cplusplus
}
#endif

#endif  // TREE_SITTER_ARRAY_H_

A test/corpus/unicode/operators-unicode.txt => test/corpus/unicode/operators-unicode.txt +799 -0
@@ 0,0 1,799 @@
=============|||
Unicode Operator Definitions
=============|||

---- MODULE Test ----
x ≈ y ≜ 0
x ≔ y ≜ 1
x ≍ y ≜ 2
x ◯ y ≜ 3
x ⩴ y ≜ 4
x ● y ≜ 5
x ∩ y ≜ 6
x ⋅ y ≜ 7
x ∘ y ≜ 8
x ≅ y ≜ 9
x ∪ y ≜ 10
x ÷ y ≜ 11
x ≐ y ≜ 12
x ‥ y ≜ 13
x … y ≜ 14
x ≡ y ≜ 15
x ‼ y ≜ 16
x ≥ y ≜ 17
x ≫ y ≜ 18
x ⇔ y ≜ 19
x ⇒ y ≜ 20
x ∈ y ≜ 21
x ∧ y ≜ 22
x ⫤ y ≜ 23
x ↝ y ≜ 24
x ≤ y ≜ 25
x ≪ y ≜ 26
x ∨ y ≜ 27
x ⊣ y ≜ 28
x ≠ y ≜ 29
x ∉ y ≜ 30
x ⊙ y ≜ 31
x ⊖ y ≜ 32
x ⊕ y ≜ 33
x ⊘ y ≜ 34
x ⊗ y ≜ 35
x ⇸ y ≜ 36
x ≺ y ≜ 37
x ⪯ y ≜ 38
x ∝ y ≜ 39
x ⁇ y ≜ 40
x ⊨ y ≜ 41
x ⊢ y ≜ 42
x ∼ y ≜ 43
x ≃ y ≜ 44
x ⊓ y ≜ 45
x ⊔ y ≜ 46
x ⊏ y ≜ 47
x ⊑ y ≜ 48
x ⊐ y ≜ 49
x ⊒ y ≜ 50
x ⋆ y ≜ 51
x ⊂ y ≜ 52
x ⊆ y ≜ 53
x ≻ y ≜ 54
x ⪰ y ≜ 55
x ⊃ y ≜ 56
x ⊇ y ≜ 57
x × y ≜ 58
x ⊎ y ≜ 59
x ‖ y ≜ 60
x ≀ y ≜ 61
□ x ≜ 62
◇ x ≜ 63
¬ x ≜ 64
x ⁺ ≜ 65
====

-------------|||

(source_file (module (header_line) name: (identifier) (header_line)
  (operator_definition parameter: (identifier) name: (infix_op_symbol (approx)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (assign)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (asymp)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (bigcirc)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (bnf_rule)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (bullet)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (cap)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (cdot)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (circ)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (cong)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (cup)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (div)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (doteq)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (dots_2)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (dots_3)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (equiv)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (excl)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (geq)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (gg)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (iff)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (implies)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (in)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (land)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (ld_ttile)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (leads_to)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (leq)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (ll)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (lor)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (ls_ttile)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (neq)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (notin)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (odot)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (ominus)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (oplus)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (oslash)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (otimes)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (plus_arrow)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (prec)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (preceq)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (propto)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (qq)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (rd_ttile)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (rs_ttile)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (sim)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (simeq)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (sqcap)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (sqcup)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (sqsubset)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (sqsubseteq)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (sqsupset)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (sqsupseteq)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (star)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (subset)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (subseteq)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (succ)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (succeq)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (supset)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (supseteq)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (times)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (uplus)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (vertvert)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (infix_op_symbol (wr)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition name: (prefix_op_symbol (always)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition name: (prefix_op_symbol (eventually)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition name: (prefix_op_symbol (lnot)) parameter: (identifier) (def_eq) definition: (nat_number))
  (operator_definition parameter: (identifier) name: (postfix_op_symbol (sup_plus)) (def_eq) definition: (nat_number))
(double_line)))

=============|||
Unicode Operator Declarations as Parameters
=============|||

---- MODULE Test ----
op(
  □ _,
  ◇ _,
  ¬ _,
  _ ≈ _,
  _ ≔ _,
  _ ≍ _,
  _ ◯ _,
  _ ⩴ _,
  _ ● _,
  _ ∩ _,
  _ ⋅ _,
  _ ∘ _,
  _ ≅ _,
  _ ∪ _,
  _ ÷ _,
  _ ≐ _,
  _ ‥ _,
  _ … _,
  _ ≡ _,
  _ ‼ _,
  _ ≥ _,
  _ ≫ _,
  _ ⇔ _,
  _ ⇒ _,
  _ ∈ _,
  _ ∧ _,
  _ ⫤ _,
  _ ↝ _,
  _ ≤ _,
  _ ≪ _,
  _ ∨ _,
  _ ⊣ _,
  _ ≠ _,
  _ ∉ _,
  _ ⊙ _,
  _ ⊖ _,
  _ ⊕ _,
  _ ⊘ _,
  _ ⊗ _,
  _ ⇸ _,
  _ ≺ _,
  _ ⪯ _,
  _ ∝ _,
  _ ⁇ _,
  _ ⊨ _,
  _ ⊢ _,
  _ ∼ _,
  _ ≃ _,
  _ ⊓ _,
  _ ⊔ _,
  _ ⊏ _,
  _ ⊑ _,
  _ ⊐ _,
  _ ⊒ _,
  _ ⋆ _,
  _ ⊂ _,
  _ ⊆ _,
  _ ≻ _,
  _ ⪰ _,
  _ ⊃ _,
  _ ⊇ _,
  _ × _,
  _ ⊎ _,
  _ ‖ _,
  _ ≀ _,
  _ ⁺
) ≜ 1
====

-------------|||

(source_file (module (header_line) name: (identifier) (header_line)
  (operator_definition name: (identifier)
    parameter: (operator_declaration name: (prefix_op_symbol (always)) (placeholder))
    parameter: (operator_declaration name: (prefix_op_symbol (eventually)) (placeholder))
    parameter: (operator_declaration name: (prefix_op_symbol (lnot)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (approx)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (assign)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (asymp)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (bigcirc)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (bnf_rule)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (bullet)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (cap)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (cdot)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (circ)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (cong)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (cup)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (div)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (doteq)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (dots_2)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (dots_3)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (equiv)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (excl)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (geq)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (gg)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (iff)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (implies)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (in)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (land)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (ld_ttile)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (leads_to)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (leq)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (ll)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (lor)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (ls_ttile)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (neq)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (notin)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (odot)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (ominus)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (oplus)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (oslash)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (otimes)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (plus_arrow)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (prec)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (preceq)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (propto)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (qq)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (rd_ttile)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (rs_ttile)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (sim)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (simeq)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (sqcap)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (sqcup)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (sqsubset)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (sqsubseteq)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (sqsupset)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (sqsupseteq)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (star)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (subset)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (subseteq)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (succ)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (succeq)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (supset)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (supseteq)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (times)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (uplus)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (vertvert)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (infix_op_symbol (wr)) (placeholder))
    parameter: (operator_declaration (placeholder) name: (postfix_op_symbol (sup_plus)))
    (def_eq)
    definition: (nat_number)
  )
(double_line)))

=============|||
Unicode Operator Application
=============|||

---- MODULE Test ----
op ≜ {
  □ x,
  ◇ x,
  ¬ x,
  x ≈ y,
  x ≔ y,
  x ≍ y,
  x ◯ y,
  x ⩴ y,
  x ● y,
  x ∩ y,
  x ⋅ y,
  x ∘ y,
  x ≅ y,
  x ∪ y,
  x ÷ y,
  x ≐ y,
  x ‥ y,
  x … y,
  x ≡ y,
  x ‼ y,
  x ≥ y,
  x ≫ y,
  x ⇔ y,
  x ⇒ y,
  x ∈ y,
  x ∧ y,
  x ⫤ y,
  x ↝ y,
  x ≤ y,
  x ≪ y,
  x ∨ y,
  x ⊣ y,
  x ≠ y,
  x ∉ y,
  x ⊙ y,
  x ⊖ y,
  x ⊕ y,
  x ⊘ y,
  x ⊗ y,
  x ⇸ y,
  x ≺ y,
  x ⪯ y,
  x ∝ y,
  x ⁇ y,
  x ⊨ y,
  x ⊢ y,
  x ∼ y,
  x ≃ y,
  x ⊓ y,
  x ⊔ y,
  x ⊏ y,
  x ⊑ y,
  x ⊐ y,
  x ⊒ y,
  x ⋆ y,
  x ⊂ y,
  x ⊆ y,
  x ≻ y,
  x ⪰ y,
  x ⊃ y,
  x ⊇ y,
  x × y,
  x ⊎ y,
  x ‖ y,
  x ≀ y,
  x ⁺
}
====

-------------|||

(source_file (module (header_line) name: (identifier) (header_line)
  (operator_definition name: (identifier) (def_eq) definition: (finite_set_literal
    (bound_prefix_op symbol: (always) rhs: (identifier_ref))
    (bound_prefix_op symbol: (eventually) rhs: (identifier_ref))
    (bound_prefix_op symbol: (lnot) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (approx) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (assign) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (asymp) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (bigcirc) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (bnf_rule) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (bullet) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (cap) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (cdot) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (circ) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (cong) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (cup) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (div) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (doteq) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (dots_2) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (dots_3) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (equiv) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (excl) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (geq) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (gg) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (iff) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (implies) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (in) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (land) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (ld_ttile) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (leads_to) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (leq) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (ll) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (lor) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (ls_ttile) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (neq) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (notin) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (odot) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (ominus) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (oplus) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (oslash) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (otimes) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (plus_arrow) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (prec) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (preceq) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (propto) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (qq) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (rd_ttile) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (rs_ttile) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (sim) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (simeq) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (sqcap) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (sqcup) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (sqsubset) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (sqsubseteq) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (sqsupset) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (sqsupseteq) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (star) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (subset) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (subseteq) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (succ) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (succeq) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (supset) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (supseteq) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (times) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (uplus) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (vertvert) rhs: (identifier_ref))
    (bound_infix_op lhs: (identifier_ref) symbol: (wr) rhs: (identifier_ref))
    (bound_postfix_op lhs: (identifier_ref) symbol: (sup_plus))
  ))
(double_line)))

=============|||
Unicode Operator References
=============|||

---- MODULE Test ----
op ≜ f(
  □,
  ◇,
  ¬,
  ≈,
  ≔,
  ≍,
  ◯,
  ⩴,
  ●,
  ∩,
  ⋅,
  ∘,
  ≅,
  ∪,
  ÷,
  ≐,
  ‥,
  …,
  ≡,
  ‼,
  ≥,
  ≫,
  ⇔,
  ⇒,
  ∈,
  ⫤,
  ↝,
  ≤,
  ≪,
  ⊣,
  ≠,
  ∉,
  ⊙,
  ⊖,
  ⊕,
  ⊘,
  ⊗,
  ⇸,
  ≺,
  ⪯,
  ∝,
  ⁇,
  ⊨,
  ⊢,
  ∼,
  ≃,
  ⊓,
  ⊔,
  ⊏,
  ⊑,
  ⊐,
  ⊒,
  ⋆,
  ⊂,
  ⊆,
  ≻,
  ⪰,
  ⊃,
  ⊇,
  ×,
  ⊎,
  ‖,
  ≀,
)
====

-------------|||

(source_file (module (header_line) name: (identifier) (header_line)
  (operator_definition name: (identifier) (def_eq) definition: (bound_op name: (identifier_ref)
    parameter: (prefix_op_symbol (always))
    parameter: (prefix_op_symbol (eventually))
    parameter: (prefix_op_symbol (lnot))
    parameter: (infix_op_symbol (approx))
    parameter: (infix_op_symbol (assign))
    parameter: (infix_op_symbol (asymp))
    parameter: (infix_op_symbol (bigcirc))
    parameter: (infix_op_symbol (bnf_rule))
    parameter: (infix_op_symbol (bullet))
    parameter: (infix_op_symbol (cap))
    parameter: (infix_op_symbol (cdot))
    parameter: (infix_op_symbol (circ))
    parameter: (infix_op_symbol (cong))
    parameter: (infix_op_symbol (cup))
    parameter: (infix_op_symbol (div))
    parameter: (infix_op_symbol (doteq))
    parameter: (infix_op_symbol (dots_2))
    parameter: (infix_op_symbol (dots_3))
    parameter: (infix_op_symbol (equiv))
    parameter: (infix_op_symbol (excl))
    parameter: (infix_op_symbol (geq))
    parameter: (infix_op_symbol (gg))
    parameter: (infix_op_symbol (iff))
    parameter: (infix_op_symbol (implies))
    parameter: (infix_op_symbol (in))
    parameter: (infix_op_symbol (ld_ttile))
    parameter: (infix_op_symbol (leads_to))
    parameter: (infix_op_symbol (leq))
    parameter: (infix_op_symbol (ll))
    parameter: (infix_op_symbol (ls_ttile))
    parameter: (infix_op_symbol (neq))
    parameter: (infix_op_symbol (notin))
    parameter: (infix_op_symbol (odot))
    parameter: (infix_op_symbol (ominus))
    parameter: (infix_op_symbol (oplus))
    parameter: (infix_op_symbol (oslash))
    parameter: (infix_op_symbol (otimes))
    parameter: (infix_op_symbol (plus_arrow))
    parameter: (infix_op_symbol (prec))
    parameter: (infix_op_symbol (preceq))
    parameter: (infix_op_symbol (propto))
    parameter: (infix_op_symbol (qq))
    parameter: (infix_op_symbol (rd_ttile))
    parameter: (infix_op_symbol (rs_ttile))
    parameter: (infix_op_symbol (sim))
    parameter: (infix_op_symbol (simeq))
    parameter: (infix_op_symbol (sqcap))
    parameter: (infix_op_symbol (sqcup))
    parameter: (infix_op_symbol (sqsubset))
    parameter: (infix_op_symbol (sqsubseteq))
    parameter: (infix_op_symbol (sqsupset))
    parameter: (infix_op_symbol (sqsupseteq))
    parameter: (infix_op_symbol (star))
    parameter: (infix_op_symbol (subset))
    parameter: (infix_op_symbol (subseteq))
    parameter: (infix_op_symbol (succ))
    parameter: (infix_op_symbol (succeq))
    parameter: (infix_op_symbol (supset))
    parameter: (infix_op_symbol (supseteq))
    parameter: (infix_op_symbol (times))
    parameter: (infix_op_symbol (uplus))
    parameter: (infix_op_symbol (vertvert))
    parameter: (infix_op_symbol (wr))
    parameter: (postfix_op_symbol (sup_plus))
  ))
(double_line)))

=============|||
Nonfix Unicode Operators
=============|||

---- MODULE Test ----
op ≜ {
  A!B!□(x),
  A!B!◇(x),
  A!B!¬(x),
  A!B!≈(x, y),
  A!B!≔(x, y),
  A!B!≍(x, y),
  A!B!◯(x, y),
  A!B!⩴(x, y),
  A!B!●(x, y),
  A!B!∩(x, y),
  A!B!⋅(x, y),
  A!B!∘(x, y),
  A!B!≅(x, y),
  A!B!∪(x, y),
  A!B!÷(x, y),
  A!B!≐(x, y),
  A!B!‥(x, y),
  A!B!…(x, y),
  A!B!≡(x, y),
  A!B!‼(x, y),
  A!B!≥(x, y),
  A!B!≫(x, y),
  A!B!⇔(x, y),
  A!B!⇒(x, y),
  A!B!∈(x, y),
  A!B!∧(x, y),
  A!B!⫤(x, y),
  A!B!↝(x, y),
  A!B!≤(x, y),
  A!B!≪(x, y),
  A!B!∨(x, y),
  A!B!⊣(x, y),
  A!B!≠(x, y),
  A!B!∉(x, y),
  A!B!⊙(x, y),
  A!B!⊖(x, y),
  A!B!⊕(x, y),
  A!B!⊘(x, y),
  A!B!⊗(x, y),
  A!B!⇸(x, y),
  A!B!≺(x, y),
  A!B!⪯(x, y),
  A!B!∝(x, y),
  A!B!⁇(x, y),
  A!B!⊨(x, y),
  A!B!⊢(x, y),
  A!B!∼(x, y),
  A!B!≃(x, y),
  A!B!⊓(x, y),
  A!B!⊔(x, y),
  A!B!⊏(x, y),
  A!B!⊑(x, y),
  A!B!⊐(x, y),
  A!B!⊒(x, y),
  A!B!⋆(x, y),
  A!B!⊂(x, y),
  A!B!⊆(x, y),
  A!B!≻(x, y),
  A!B!⪰(x, y),
  A!B!⊃(x, y),
  A!B!⊇(x, y),
  A!B!×(x, y),
  A!B!⊎(x, y),
  A!B!‖(x, y),
  A!B!≀(x, y),
  A!B!⁺(x)
}
====

-------------|||
(source_file (module (header_line) name: (identifier) (header_line)
  (operator_definition name: (identifier) (def_eq) definition: (finite_set_literal
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (prefix_op_symbol (always)) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (prefix_op_symbol (eventually)) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (prefix_op_symbol (lnot)) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (approx)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (assign)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (asymp)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (bigcirc)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (bnf_rule)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (bullet)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (cap)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (cdot)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (circ)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (cong)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (cup)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (div)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (doteq)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (dots_2)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (dots_3)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (equiv)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (excl)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (geq)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (gg)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (iff)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (implies)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (in)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (land)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (ld_ttile)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (leads_to)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (leq)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (ll)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (lor)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (ls_ttile)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (neq)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (notin)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (odot)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (ominus)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (oplus)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (oslash)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (otimes)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (plus_arrow)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (prec)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (preceq)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (propto)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (qq)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (rd_ttile)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (rs_ttile)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (sim)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (simeq)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (sqcap)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (sqcup)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (sqsubset)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (sqsubseteq)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (sqsupset)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (sqsupseteq)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (star)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (subset)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (subseteq)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (succ)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (succeq)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (supset)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (supseteq)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (times)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (uplus)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (vertvert)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (infix_op_symbol (wr)) (identifier_ref) (identifier_ref)))
    (prefixed_op prefix: (subexpr_prefix (subexpr_component (identifier_ref)) (subexpr_component (identifier_ref))) op:
      (bound_nonfix_op symbol: (postfix_op_symbol (sup_plus)) (identifier_ref)))
  ))
(double_line)))


A test/corpus/unicode/quantification-unicode.txt => test/corpus/unicode/quantification-unicode.txt +157 -0
@@ 0,0 1,157 @@
==============================|||
Bounded Quantification
==============================|||

---- MODULE Test ----
op ≜ ∀ x, y ∈ Nat, z ∈ Int, ⟨a, b, c⟩ ∈ Real : FALSE
op ≜ ∃ ⟨x, y⟩ ∈ Nat, a, b ∈ Int : TRUE
====

------------------------------|||

(source_file (module (header_line) name: (identifier) (header_line)
  (operator_definition name: (identifier) (def_eq)
    definition: (bounded_quantification
      quantifier: (forall)
      bound: (quantifier_bound
        intro: (identifier) intro: (identifier)
        (set_in)
        set: (nat_number_set)
      )
      bound: (quantifier_bound
        intro: (identifier)
        (set_in)
        set: (int_number_set)
      )
      bound: (quantifier_bound
        intro: (tuple_of_identifiers (langle_bracket) (identifier) (identifier) (identifier) (rangle_bracket))
        (set_in)
        set: (real_number_set)
      )
      expression: (boolean)
    )
  )
  (operator_definition name: (identifier) (def_eq)
    definition: (bounded_quantification
      quantifier: (exists)
      bound: (quantifier_bound
        intro: (tuple_of_identifiers (langle_bracket) (identifier) (identifier) (rangle_bracket))
        (set_in)
        set: (nat_number_set)
      )
      bound: (quantifier_bound
        intro: (identifier) intro: (identifier)
        (set_in)
        set: (int_number_set)
      )
      expression: (boolean)
    )
  )
(double_line)))

==============================|||
Unbounded Quantification
==============================|||

---- MODULE Test ----
op ≜ ∀ x : TRUE
op ≜ ∃ x, y : FALSE
op ≜ \AA x, y, z : TRUE
op ≜ \EE x, y, z, w : FALSE
====

------------------------------|||

(source_file (module (header_line) name: (identifier) (header_line)
  (operator_definition name: (identifier) (def_eq)
    definition: (unbounded_quantification
      quantifier: (forall)
      intro: (identifier)
      expression: (boolean)
    )
  )
  (operator_definition name: (identifier) (def_eq)
    definition: (unbounded_quantification
      quantifier: (exists)
      intro: (identifier)
      intro: (identifier)
      expression: (boolean)
    )
  )
  (operator_definition name: (identifier) (def_eq)
    definition: (unbounded_quantification
      quantifier: (temporal_forall)
      intro: (identifier)
      intro: (identifier)
      intro: (identifier)
      expression: (boolean)
    )
  )
  (operator_definition name: (identifier) (def_eq)
    definition: (unbounded_quantification
      quantifier: (temporal_exists)
      intro: (identifier)
      intro: (identifier)
      intro: (identifier)
      intro: (identifier)
      expression: (boolean)
    )
  )
(double_line)))

==============================|||
Bounded CHOOSE
==============================|||

---- MODULE Test ----
op ≜ CHOOSE x ∈ Nat : TRUE
op ≜ CHOOSE ⟨x, y, z⟩ ∈ S : FALSE
====

------------------------------|||

(source_file (module (header_line) name: (identifier) (header_line)
  (operator_definition name: (identifier) (def_eq)
    definition: (choose
      intro: (identifier)
      (set_in)
      set: (nat_number_set)
      expression: (boolean)
    )
  )
  (operator_definition name: (identifier) (def_eq)
    definition: (choose
      intro: (tuple_of_identifiers (langle_bracket) (identifier) (identifier) (identifier) (rangle_bracket))
      (set_in)
      set: (identifier_ref)
      expression: (boolean)
    )
  )
(double_line)))

==============================|||
Unbounded CHOOSE
==============================|||

---- MODULE Test ----
op ≜ CHOOSE x : TRUE
op ≜ CHOOSE ⟨x, y, z⟩ : FALSE
====

------------------------------|||

(source_file (module (header_line) name: (identifier) (header_line)
  (operator_definition name: (identifier) (def_eq)
    definition: (choose
      intro: (identifier)
      expression: (boolean)
    )
  )
  (operator_definition name: (identifier) (def_eq)
    definition: (choose
      intro: (tuple_of_identifiers (langle_bracket) (identifier) (identifier) (identifier) (rangle_bracket))
      expression: (boolean)
    )
  )
(double_line)))


M test/sanitize/build-sanitized.sh => test/sanitize/build-sanitized.sh +20 -21
@@ 16,36 16,35 @@ export LINK
export CFLAGS
export CXXFLAGS

echo "Building tree-sitter..."
dependencies_dir=test/dependencies
ts_dir=$dependencies_dir/tree-sitter
pushd $ts_dir
make clean
make
popd

sanitize_dir=test/sanitize
lang_name="tlaplus"
ts_lang="tree_sitter_${lang_name}"

objects=()

dependencies_dir=test/dependencies
ts_dir=$dependencies_dir/tree-sitter
src_dir="src"
out_dir="${sanitize_dir}/out"
mkdir -p $out_dir

echo "Building scanner..."
scanner="scanner"
scanner_in="${src_dir}/${scanner}.cc"
scanner_out="${out_dir}/${scanner}.o"
$CXX $CXXFLAGS -g -O1 -I $src_dir -c $scanner_in -o $scanner_out

# Compiling with -O0 speeds up the build dramatically
echo "Building parser..."
parser="parser"
parser_in="${src_dir}/${parser}.c"
parser_out="${out_dir}/${parser}.o"
$CC $CFLAGS -g -O0 -I $src_dir -c $parser_in -o $parser_out
scanner="scanner"
scanner_in="${src_dir}/${scanner}.c"
scanner_out="${out_dir}/${scanner}.o"

if [ -z "$1" ]; then
  echo "Building tree-sitter..."
  pushd $ts_dir
  make clean
  make
  popd

  # Compiling with -O0 speeds up the build dramatically
  echo "Building parser..."
  $CC $CFLAGS -g -O0 -I $src_dir -c $parser_in -o $parser_out
fi

echo "Building scanner..."
$CC $CFLAGS -g -O0 -I $src_dir -c $scanner_in -o $scanner_out

echo "Building runner..."
$CXX $CXXFLAGS \

A test/sanitize/run-corpus.sh => test/sanitize/run-corpus.sh +14 -0
@@ 0,0 1,14 @@
#! /bin/sh
EXITCODE=0
find "test/examples" -type f -name "*.tla" | while IFS= read -r file; do
  echo "$file"
  ./test/sanitize/out/parse_tlaplus "$file" -q
  RESULT=$?
  if [ "$RESULT" -ne 0 ]; then
    echo "FAILURE: $RESULT"
    valgrind ./test/sanitize/out/parse_tlaplus "$file" -q
    EXITCODE=1
  fi
done
exit $EXITCODE