~ahelwer/tree-sitter-tlaplus

c54aebd31e2ac394a0aa70b510724c99144119f1 — Andrew Helwer 1 year, 5 months ago 6fd16d8
Fixes crash when encountering QED keyword during error recovery (#78)

Result of calling vector.pop_back() when vector was empty
M .github/workflows/ci.yml => .github/workflows/ci.yml +12 -7
@@ 24,18 24,23 @@ jobs:
        run: npm install
      - name: Unit Tests
        run: ./node_modules/.bin/tree-sitter test
      - name: Crash Regression Tests
        shell: bash
        run: |
          ./node_modules/.bin/tree-sitter init-config
          ./node_modules/.bin/tree-sitter parse -q test/crash_regressions/QEDErrorRecovery.tla || (($? == 1))
      - name: Corpus Tests (Windows)
        if: matrix.os == 'windows-latest'
        shell: pwsh
        run: |
          .\node_modules\.bin\tree-sitter init-config
          .\test\run-corpus.ps1
        run: .\test\run-corpus.ps1
      - name: Corpus Tests (Unix)
        if: matrix.os != 'windows-latest'
        if: matrix.os == 'ubuntu-latest'
        shell: bash
        run: |
          ./node_modules/.bin/tree-sitter init-config
          ./test/run-corpus.sh
        run: ./test/run-corpus.sh --parallel
      - name: Corpus Tests (MacOS)
        if: matrix.os == 'macos-latest'
        shell: bash
        run: ./test/run-corpus.sh
      - name: Query File Tests
        shell: pwsh
        run: |

M Cargo.toml => Cargo.toml +1 -1
@@ 1,7 1,7 @@
[package]
name = "tree-sitter-tlaplus"
description = "A tree-sitter grammar for TLA⁺ and PlusCal"
version = "1.0.2"
version = "1.0.3"
authors = ["Andrew Helwer", "Vasiliy Morkovkin"]
license = "MIT"
readme = "README.md"

M README.md => README.md +4 -0
@@ 14,6 14,7 @@ The most important files in this repo are `grammar.js` and `src/scanner.cc`.
The former is the source of truth for parser code generation and the latter contains logic for parsing the context-sensitive parts of TLA⁺ like nested proofs and conjunction/disjunction lists.

A blog post detailing the development process of this parser can be found [here](https://ahelwer.ca/post/2023-01-11-tree-sitter-tlaplus/).
This repo is [mirrored on sourcehut](mirrored at https://git.sr.ht/~ahelwer/tree-sitter-tlaplus).

## Aims & Capabilities
The aim of this project is to facilitate creation of modern user-assistive language tooling for TLA⁺.


@@ 78,7 79,10 @@ You can also click the "query" checkbox to open a third pane for testing [tree q
```

## Contributions
One easy way to contribute is to add your TLA⁺ specifications to the [tlaplus/examples](https://github.com/tlaplus/examples) repo, which this grammar uses as a valuable test corpus!

Pull requests are welcome. If you modify `grammar.js`, make sure you run `npx tree-sitter generate` before committing & pushing.
Generated files are (unfortunately) currently present in the repo but will hopefully be removed in [the future](https://github.com/tree-sitter/tree-sitter/discussions/1243).
Their correspondence is enforced during CI.



M package.json => package.json +1 -1
@@ 1,6 1,6 @@
{
  "name": "@tlaplus/tree-sitter-tlaplus",
  "version": "1.0.2",
  "version": "1.0.3",
  "description": "A tree-sitter grammar for TLA⁺ and PlusCal",
  "main": "bindings/node",
  "scripts": {

M src/scanner.cc => src/scanner.cc +26 -7
@@ 1084,9 1084,13 @@ namespace {
     * @return Whether a DEDENT token was emitted.
     */
    bool emit_dedent(TSLexer* const lexer) {
      lexer->result_symbol = DEDENT;
      this->jlists.pop_back();
      return true;
      if (is_in_jlist()) {
        lexer->result_symbol = DEDENT;
        this->jlists.pop_back();
        return true;
      } else {
        return false;
      }
    }

    /**


@@ 1515,13 1519,28 @@ namespace {
     * we record the current proof level in case there is a child proof
     * of this step that uses <+> or PROOF <*> for its first step. Then
     * we pop the top proof level off the stack.
     *
     * It's possible to encounter a QED keyword while not inside of a proof
     * as part of an earlier syntax error, and proof step IDs being treated
     * as sequences of < and > operators. In this case the current proof
     * state should not be modified, but a QED keyword token is still
     * returned to help the error recovery process. Not performing this
     * check previously led to a segfault; see:
     * https://github.com/tlaplus-community/tree-sitter-tlaplus/issues/60
     * 
     * @param lexer The tree-sitter lexing control structure.
     * @param valid_symbols Tokens possibly expected in this spot.
     * @return Whether a token should be emitted.
     */
    bool handle_qed_keyword_token(TSLexer* const lexer) {
      last_proof_level = get_current_proof_level();
      proofs.pop_back();
    bool handle_qed_keyword_token(
      TSLexer* const lexer,
      const bool* const valid_symbols
    ) {
      if (valid_symbols[QED_KEYWORD]) {
        last_proof_level = get_current_proof_level();
        proofs.pop_back();
      }

      lexer->result_symbol = QED_KEYWORD;
      lexer->mark_end(lexer);
      return true;


@@ 1597,7 1616,7 @@ namespace {
          case Token_OMITTED_KEYWORD:
            return handle_terminal_proof_keyword_token(lexer, valid_symbols, OMITTED_KEYWORD);
          case Token_QED_KEYWORD:
            return handle_qed_keyword_token(lexer);
            return handle_qed_keyword_token(lexer, valid_symbols);
          case Token_WEAK_FAIRNESS:
            return handle_fairness_keyword_token(lexer, col, WEAK_FAIRNESS);
          case Token_STRONG_FAIRNESS:

A test/crash_regressions/QEDErrorRecovery.tla => test/crash_regressions/QEDErrorRecovery.tla +7 -0
@@ 0,0 1,7 @@
---- MODULE Test ----
op == [ ]
THEOREM TRUE
<*> A
<*> QED
====


M test/run-corpus.sh => test/run-corpus.sh +2 -1
@@ 1,7 1,8 @@
#! /bin/sh

specs=$(find "test/examples" -name "*.tla")
ncpu=$(command -v nproc > /dev/null && nproc || echo 1)
ncpu=$(($# > 0 ? $(command -v nproc > /dev/null && nproc || echo 1) : 1))
echo "Concurrent parser count: $ncpu"
failures=$(echo "$specs" | xargs -P $ncpu -I {} ./node_modules/.bin/tree-sitter parse -q {})
if test -z "$failures"; then
  exit 0