~mna/fastpeg

e67c7991223e8f7677b4173159aac8485568f46e — Martin Angers 5 years ago 9e0527f
finalize ebnf grammar
1 files changed, 47 insertions(+), 19 deletions(-)

M doc/grammar.ebnf
M doc/grammar.ebnf => doc/grammar.ebnf +47 -19
@@ 4,24 4,42 @@
#
# [0]: https://golang.org/ref/spec#Notation

grammar    = definition { definition } .
definition = identifier ARROW expression [ EOS ] .
expression = sequence { SEPARATOR sequence } .
sequence   = label { label } .
label      = [ identifier COLON ] prefix .
prefix     = AND coderef
           | DOLLAR coderef
           | AT coderef
           | [ AND | NOT ] suffix
           .
suffix     = primary [ QUESTION | STAR | PLUS ] .
primary    = identifier
           | LPAREN expression RPAREN
           | literal
           | class
           | DOT
           .
coderef    = LBRACE identifier RBRACE .
grammar        = definition { definition } .
definition     = identifier ARROW expression [ EOS ] .
expression     = sequence { SEPARATOR sequence } .
sequence       = label { label } .
label          = [ identifier COLON ] prefix .
prefix         = AND coderef # TODO: move coderefs before labels, no sense in having id:${code}
               | DOLLAR coderef
               | AT coderef
               | [ AND | NOT ] suffix
               .
suffix         = primary [ QUESTION | STAR | PLUS ] .
primary        = identifier
               | LPAREN expression RPAREN
               | literal
               | class
               | DOT
               .
coderef        = LBRACE identifier RBRACE .
identifier     = LETTER { LETTER | NUMBER } .
literal        = dquote_literal | squote_literal .
dquote_literal = DQUOTE { SAFE_RUNE | SQUOTE | RBRACK | dquote_escape } DQUOTE .
squote_literal = SQUOTE { SAFE_RUNE | DQUOTE | RBRACK | squote_escape } SQUOTE .
class          = LBRACK range { range } RBRACK .
range          = char MINUS char | char .
char           = SAFE_RUNE | SQUOTE | DQUOTE | class_escape .
dquote_escape  = common_escape | BACKSLASH DQUOTE .
squote_escape  = common_escape | BACKSLASH SQUOTE .
class_escape   = common_escape | BACKSLASH LBRACK | BACKSLASH RBRACK | BACKSLASH MINUS | BACKSLASH CHEVRON .
common_escape  = hex_escape
               | unicode_escape
               | BACKSLASH ( 'a' | 'b' | 'f' | 'n' | 'r' | 't' | 'v' | BACKSLASH )
               .
hex_escape     = BACKSLASH 'x' HEX_DIGIT HEX_DIGIT .
unicode_escape = BACKSLASH 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
               | BACKSLASH 'U' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
               .

# For reference, the terminal productions are loosely defined here.
# They are handled by the scanner.


@@ 42,4 60,14 @@ RPAREN    = ')' .
DOT       = '.' .
LBRACE    = '{' .
RBRACE    = '}' .

LETTER    = any unicode letter or '_' .
NUMBER    = any unicode number or '-' .
LBRACK    = '[' .
RBRACK    = ']' .
DQUOTE    = '"' .
SQUOTE    = '\'' .
SAFE_RUNE = any unicode code point except '\n', '\\', '"', '\'' and ']' .
BACKSLASH = '\\' .
MINUS     = '-' .
CHEVRON   = '^' .
HEX_DIGIT = '0' ... '9' | 'a' ... 'f' | 'A' ... 'F' .