~ehmry/dhall-nim

b839efac8f7701e096107fd676c3282440badb46 — Emery Hemingway 1 year, 3 months ago
Initial commit
6 files changed, 1351 insertions(+), 0 deletions(-)

A .gitmodules
A dhall-lang
A dhall.nimble
A src/dhall.abnf
A src/dhall/term.nim
A tests/config.nims
A  => .gitmodules +3 -0
@@ 1,3 @@
[submodule "tests/dhall-lang"]
	path = dhall-lang
	url = https://github.com/dhall-lang/dhall-lang.git

A  => dhall-lang +1 -0
@@ 1,1 @@
Subproject commit bf757ba1ee526714278568012c6bc98d7ea9757e

A  => dhall.nimble +13 -0
@@ 1,13 @@
# Package

version       = "0.1.0"
author        = "Emery Hemingway"
description   = "Dhall interpreter"
license       = "GPL-3.0"
srcDir        = "src"



# Dependencies

requires "nim >= 1.2.0", "bigints"

A  => src/dhall.abnf +932 -0
@@ 1,932 @@
; ABNF syntax based on RFC 5234
;
; The character encoding for Dhall is UTF-8
;
; Some notes on implementing this grammar:
;
; First, do not use a lexer to tokenize the file before parsing.  Instead, treat
; the individual characters of the file as the tokens to feed into the parser.
; You should not use a lexer because Dhall's grammar supports two features which
; cannot be correctly supported by a lexer:
;
; * String interpolation (i.e. "foo ${Natural/toInteger bar} baz")
; * Nested block comments (i.e. "{- foo {- bar -} baz -}")
;
; Second, this grammar assumes that your parser can backtrack and/or try
; multiple parses simultaneously.  For example, consider this expression:
;
;     List ./MyType
;
; A parser might first try to parse the period as the beginning of a field
; selector, only to realize immediately afterwards that `/MyType` is not a valid
; name for a field.  A conforming parser must backtrack so that the expression
; `./MyType` can instead be correctly interpreted as a relative path
;
; Third, if there are multiple valid parses then prefer the first parse
; according to the ordering of alternatives. That is, the order of evaluation
; of the alternatives is left-to-right.
;
; For example, the grammar for single quoted string literals is:
;
;     single-quote-continue =
;           "'''"               single-quote-continue
;         / "${" complete-expression "}" single-quote-continue
;         / "''${"              single-quote-continue
;         / "''"
;         / %x20-10FFFF         single-quote-continue
;         / tab                 single-quote-continue
;         / end-of-line         single-quote-continue
;
;         single-quote-literal = "''" single-quote-continue
;
; ... which permits valid parses for the following code:
;
;     "''''''''''''''''"
;
; If you tried to parse all alternatives then there are at least two valid
; interpretations for the above code:
;
; * A single quoted literal with four escape sequences of the form "'''"
;     * i.e. "''" followed by "'''"  four times in a row followed by "''"
; * Four empty single quoted literals
;     * i.e. "''''" four times in a row
;
; The correct interpretation is the first one because parsing the escape
; sequence "'''" takes precedence over parsing the termination sequence "''",
; according to the order of the alternatives in the `single-quote-continue`
; rule.
;
; Some parsing libraries do not backtrack by default but allow the user to
; selectively backtrack in certain parts of the grammar.  Usually parsing
; libraries do this to improve efficiency and error messages.  Dhall's grammar
; takes that into account by minimizing the number of rules that require the
; parser to backtrack and comments below will highlight where you need to
; explicitly backtrack
;
; Specifically, if you see an uninterrupted literal in a grammar rule such as:
;
;     "->"
;
; ... or:
;
;     %x66.6f.72.61.6c.6c
;
; ... then that string literal is parsed as a single unit, meaning that you
; should backtrack if you parse only part of the literal
;
; In all other cases you can assume that you do not need to backtrack unless
; there is a comment explicitly asking you to backtrack
;
; When parsing a repeated construct, prefer alternatives that parse as many
; repetitions as possible.  On in other words:
;
;     [a] = a / ""
;
;     a* = a* a / ""
;
; Note that the latter rule also specifies that repetition produces
; left-associated expressions.  For example, function application is
; left-associative and all operators are left-associative when they are not
; parenthesized.
;
; Additionally, try alternatives in an order that minimizes backtracking
; according to the following rule:
;
;     (a / b) (c / d) = a c / a d / b c / b d

; NOTE: There are many line endings in the wild
;
; See: https://en.wikipedia.org/wiki/Newline
;
; For simplicity this supports Unix and Windows line-endings, which are the most
; common
end-of-line =
      %x0A     ; "\n"
    / %x0D.0A  ; "\r\n"

; This rule matches all characters that are not:
;
; * not ASCII
; * not part of a surrogate pair
; * not a "non-character"
valid-non-ascii =
      %x80-D7FF
    ; %xD800-DFFF = surrogate pairs
    / %xE000-FFFD
    ; %xFFFE-FFFF = non-characters
    / %x10000-1FFFD
    ; %x1FFFE-1FFFF = non-characters
    / %x20000-2FFFD
    ; %x2FFFE-2FFFF = non-characters
    / %x30000-3FFFD
    ; %x3FFFE-3FFFF = non-characters
    / %x40000-4FFFD
    ; %x4FFFE-4FFFF = non-characters
    / %x50000-5FFFD
    ; %x5FFFE-5FFFF = non-characters
    / %x60000-6FFFD
    ; %x6FFFE-6FFFF = non-characters
    / %x70000-7FFFD
    ; %x7FFFE-7FFFF = non-characters
    / %x80000-8FFFD
    ; %x8FFFE-8FFFF = non-characters
    / %x90000-9FFFD
    ; %x9FFFE-9FFFF = non-characters
    / %xA0000-AFFFD
    ; %xAFFFE-AFFFF = non-characters
    / %xB0000-BFFFD
    ; %xBFFFE-BFFFF = non-characters
    / %xC0000-CFFFD
    ; %xCFFFE-CFFFF = non-characters
    / %xD0000-DFFFD
    ; %xDFFFE-DFFFF = non-characters
    / %xE0000-EFFFD
    ; %xEFFFE-EFFFF = non-characters
    / %xF0000-FFFFD
    ; %xFFFFE-FFFFF = non-characters
    / %x100000-10FFFD
    ; %x10FFFE-10FFFF = non-characters

tab = %x09  ; "\t"

block-comment = "{-" block-comment-continue

block-comment-char =
      %x20-7F
    / valid-non-ascii
    / tab
    / end-of-line

block-comment-continue =
    "-}"
    / block-comment block-comment-continue
    / block-comment-char block-comment-continue

not-end-of-line = %x20-7F / valid-non-ascii / tab

; NOTE: Slightly different from Haskell-style single-line comments because this
; does not require a space after the dashes
line-comment = "--" *not-end-of-line end-of-line

whitespace-chunk =
      " "
    / tab
    / end-of-line
    / line-comment
    / block-comment

whsp = *whitespace-chunk

; nonempty whitespace
whsp1 = 1*whitespace-chunk

; Uppercase or lowercase ASCII letter
ALPHA = %x41-5A / %x61-7A

; ASCII digit
DIGIT = %x30-39  ; 0-9

ALPHANUM = ALPHA / DIGIT

HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"

; A simple label cannot be one of the reserved keywords
; listed in the `keyword` rule.
; A PEG parser could use negative lookahead to
; enforce this, e.g. as follows:
; simple-label =
;       keyword 1*simple-label-next-char
;     / !keyword (simple-label-first-char *simple-label-next-char)
simple-label-first-char = ALPHA / "_"
simple-label-next-char = ALPHANUM / "-" / "/" / "_"
simple-label = simple-label-first-char *simple-label-next-char

quoted-label-char =
      %x20-5F
        ; %x60 = '`'
    / %x61-7E

quoted-label = *quoted-label-char

; NOTE: Dhall does not support Unicode labels, mainly to minimize the potential
; for code obfuscation
label = ("`" quoted-label "`" / simple-label)

; A nonreserved-label cannot not be any of the reserved identifiers for builtins
; (unless quoted).
; Their list can be found in the `builtin` rule.
; The only place where this restriction applies is bound variables.
; A PEG parser could use negative lookahead to avoid parsing those identifiers,
; e.g. as follows:
; nonreserved-label =
;      builtin 1*simple-label-next-char
;    / !builtin label
nonreserved-label = label

; An any-label is allowed to be one of the reserved identifiers (but not a keyword).
any-label = label

; Allow specifically `Some` in record and union labels.
any-label-or-some = any-label / Some

; Dhall's double-quoted strings are similar to JSON strings (RFC7159) except:
;
; * Dhall strings support string interpolation
;
; * Dhall strings also support escaping string interpolation by adding a new
;   `\$` escape sequence
;
; * Dhall strings also allow Unicode escape sequences of the form `\u{XXX}`
double-quote-chunk =
      interpolation
      ; '\'    Beginning of escape sequence
    / %x5C double-quote-escaped
    / double-quote-char

double-quote-escaped =
      %x22                 ; '"'    quotation mark  U+0022
    / %x24                 ; '$'    dollar sign     U+0024
    / %x5C                 ; '\'    reverse solidus U+005C
    / %x2F                 ; '/'    solidus         U+002F
    / %x62                 ; 'b'    backspace       U+0008
    / %x66                 ; 'f'    form feed       U+000C
    / %x6E                 ; 'n'    line feed       U+000A
    / %x72                 ; 'r'    carriage return U+000D
    / %x74                 ; 't'    tab             U+0009
    / %x75 unicode-escape  ; 'uXXXX' / 'u{XXXX}'    U+XXXX

; Valid Unicode escape sequences are as follows:
;
; * Exactly 4 hexadecimal digits without braces:
;       `\uXXXX`
; * 1-6 hexadecimal digits within braces (with optional zero padding):
;       `\u{XXXX}`, `\u{000X}`, `\u{XXXXX}`, `\u{00000XXXXX}`, etc.
;   Any number of leading zeros are allowed within the braces preceding the 1-6
;   digits specifying the codepoint.
;
; From these sequences, the parser must also reject any codepoints that are in
; the following ranges:
;
; * Surrogate pairs: `%xD800-DFFF`
; * Non-characters: `%xNFFFE-NFFFF` / `%x10FFFE-10FFFF` for `N` in `{ 0 .. F }`
;
; See the `valid-non-ascii` rule for the exact ranges that are not allowed
unicode-escape = unbraced-escape / "{" braced-escape "}"

; All valid last 4 digits for unicode codepoints (outside Plane 0): `0000-FFFD`
unicode-suffix = (DIGIT / "A" / "B" / "C" / "D" / "E") 3HEXDIG
               / "F" 2HEXDIG (DIGIT / "A" / "B" / "C" / "D")

; All 4-hex digit unicode escape sequences that are not:
;
; * Surrogate pairs (i.e. `%xD800-DFFF`)
; * Non-characters (i.e. `%xFFFE-FFFF`)
;
unbraced-escape =
      (DIGIT / "A" / "B" / "C") 3HEXDIG
    / "D" ("0" / "1" / "2" / "3" / "4" / "5" / "6" / "7") HEXDIG HEXDIG
    ; %xD800-DFFF Surrogate pairs
    / "E" 3HEXDIG
    / "F" 2HEXDIG (DIGIT / "A" / "B" / "C" / "D")
    ; %xFFFE-FFFF Non-characters

; All 1-6 digit unicode codepoints that are not:
;
; * Surrogate pairs: `%xD800-DFFF`
; * Non-characters: `%xNFFFE-NFFFF` / `%x10FFFE-10FFFF` for `N` in `{ 0 .. F }`
;
; See the `valid-non-ascii` rule for the exact ranges that are not allowed
braced-codepoint =
      ("1" / "2" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / "A" / "B" / "C" / "D" / "E" / "F" / "10") unicode-suffix; (Planes 1-16)
    / unbraced-escape ; (Plane 0)
    / 1*3HEXDIG ; %x000-FFF

; Allow zero padding for braced codepoints
braced-escape = *"0" braced-codepoint

; Printable characters except double quote and backslash
double-quote-char =
      %x20-21
        ; %x22 = '"'
    / %x23-5B
        ; %x5C = "\"
    / %x5D-7F
    / valid-non-ascii

double-quote-literal = %x22 *double-quote-chunk %x22

; NOTE: The only way to end a single-quote string literal with a single quote is
; to either interpolate the single quote, like this:
;
;     ''ABC${"'"}''
;
; ... or concatenate another string, like this:
;
;     ''ABC'' ++ "'"
;
; If you try to end the string literal with a single quote then you get "'''",
; which is interpreted as an escaped pair of single quotes
single-quote-continue =
      interpolation single-quote-continue
    / escaped-quote-pair single-quote-continue
    / escaped-interpolation single-quote-continue
    / "''" ; End of text literal
    / single-quote-char single-quote-continue

; Escape two single quotes (i.e. replace this sequence with "''")
escaped-quote-pair = "'''"

; Escape interpolation (i.e. replace this sequence with "${")
escaped-interpolation = "''${"

single-quote-char =
      %x20-7F
    / valid-non-ascii
    / tab
    / end-of-line

single-quote-literal = "''" end-of-line single-quote-continue

interpolation = "${" complete-expression "}"

text-literal = (double-quote-literal / single-quote-literal)

; RFC 5234 interprets string literals as case-insensitive and recommends using
; hex instead for case-sensitive strings
;
; If you don't feel like reading hex, these are all the same as the rule name.
; Keywords that should never be parsed as identifiers
if                    = %x69.66
then                  = %x74.68.65.6e
else                  = %x65.6c.73.65
let                   = %x6c.65.74
in                    = %x69.6e
as                    = %x61.73
using                 = %x75.73.69.6e.67
merge                 = %x6d.65.72.67.65
missing               = %x6d.69.73.73.69.6e.67
Infinity              = %x49.6e.66.69.6e.69.74.79
NaN                   = %x4e.61.4e
Some                  = %x53.6f.6d.65
toMap                 = %x74.6f.4d.61.70
assert                = %x61.73.73.65.72.74
forall-keyword        = %x66.6f.72.61.6c.6c ; "forall"
forall-symbol         = %x2200 ; Unicode FOR ALL
forall                = forall-symbol / forall-keyword
with                  = %x77.69.74.68

; Unused rule that could be used as negative lookahead in the
; `simple-label` rule for parsers that support this.
keyword =
      if / then / else
    / let / in
    / using / missing 
    / assert / as
    / Infinity / NaN
    / merge / Some / toMap
    / forall-keyword
    / with

; Note that there is a corresponding parser test in
; `tests/parser/success/builtinsA.dhall`. Please update it when
; you modify this `builtin` rule.
builtin =
      Natural-fold
    / Natural-build
    / Natural-isZero
    / Natural-even
    / Natural-odd
    / Natural-toInteger
    / Natural-show
    / Integer-toDouble
    / Integer-show
    / Integer-negate
    / Integer-clamp
    / Natural-subtract
    / Double-show
    / List-build
    / List-fold
    / List-length
    / List-head
    / List-last
    / List-indexed
    / List-reverse
    / Text-show
    / Bool
    / True
    / False
    / Optional
    / None
    / Natural
    / Integer
    / Double
    / Text
    / List
    / Type
    / Kind
    / Sort

; Reserved identifiers, needed for some special cases of parsing
Optional              = %x4f.70.74.69.6f.6e.61.6c
Text                  = %x54.65.78.74
List                  = %x4c.69.73.74
Location              = %x4c.6f.63.61.74.69.6f.6e

; Reminder of the reserved identifiers, needed for the `builtin` rule
Bool              = %x42.6f.6f.6c
True              = %x54.72.75.65
False             = %x46.61.6c.73.65
None              = %x4e.6f.6e.65
Natural           = %x4e.61.74.75.72.61.6c
Integer           = %x49.6e.74.65.67.65.72
Double            = %x44.6f.75.62.6c.65
Type              = %x54.79.70.65
Kind              = %x4b.69.6e.64
Sort              = %x53.6f.72.74
Natural-fold      = %x4e.61.74.75.72.61.6c.2f.66.6f.6c.64
Natural-build     = %x4e.61.74.75.72.61.6c.2f.62.75.69.6c.64
Natural-isZero    = %x4e.61.74.75.72.61.6c.2f.69.73.5a.65.72.6f
Natural-even      = %x4e.61.74.75.72.61.6c.2f.65.76.65.6e
Natural-odd       = %x4e.61.74.75.72.61.6c.2f.6f.64.64
Natural-toInteger = %x4e.61.74.75.72.61.6c.2f.74.6f.49.6e.74.65.67.65.72
Natural-show      = %x4e.61.74.75.72.61.6c.2f.73.68.6f.77
Natural-subtract  = %x4e.61.74.75.72.61.6c.2f.73.75.62.74.72.61.63.74
Integer-toDouble  = %x49.6e.74.65.67.65.72.2f.74.6f.44.6f.75.62.6c.65
Integer-show      = %x49.6e.74.65.67.65.72.2f.73.68.6f.77
Integer-negate    = %x49.6e.74.65.67.65.72.2f.6e.65.67.61.74.65
Integer-clamp     = %x49.6e.74.65.67.65.72.2f.63.6c.61.6d.70
Double-show       = %x44.6f.75.62.6c.65.2f.73.68.6f.77
List-build        = %x4c.69.73.74.2f.62.75.69.6c.64
List-fold         = %x4c.69.73.74.2f.66.6f.6c.64
List-length       = %x4c.69.73.74.2f.6c.65.6e.67.74.68
List-head         = %x4c.69.73.74.2f.68.65.61.64
List-last         = %x4c.69.73.74.2f.6c.61.73.74
List-indexed      = %x4c.69.73.74.2f.69.6e.64.65.78.65.64
List-reverse      = %x4c.69.73.74.2f.72.65.76.65.72.73.65
Text-show         = %x54.65.78.74.2f.73.68.6f.77

; Operators
combine       = %x2227 / "/\"
combine-types = %x2A53 / "//\\"
equivalent    = %x2261 / "==="
prefer        = %x2AFD / "//"
lambda        = %x3BB  / "\"
arrow         = %x2192 / "->"
complete      = "::"

exponent = "e" [ "+" / "-" ] 1*DIGIT

numeric-double-literal = [ "+" / "-" ] 1*DIGIT ( "." 1*DIGIT [ exponent ] / exponent)

minus-infinity-literal = "-" Infinity
plus-infinity-literal = Infinity

double-literal =
    ; "2.0"
      numeric-double-literal
    ; "-Infinity"
    / minus-infinity-literal
    ; "Infinity"
    / plus-infinity-literal
    ; "NaN"
    / NaN

natural-literal =
    ; Hexadecimal with "0x" prefix
      "0" %x78 1*HEXDIG
    ; Decimal; leading 0 digits are not allowed
    / ("1" / "2" / "3" / "4" / "5" / "6" / "7" / "8" / "9") *DIGIT
    ; ... except for 0 itself
    / "0"

integer-literal = ( "+" / "-" ) natural-literal

; If the identifier matches one of the names in the `builtin` rule, then it is a
; builtin, and should be treated as the corresponding item in the list of
; "Reserved identifiers for builtins" specified in the `standard/README.md` document.
; It is a syntax error to specify a de Bruijn index in this case.
; Otherwise, this is a variable with name and index matching the label and index.
identifier = variable / builtin

variable = nonreserved-label [ whsp "@" whsp natural-literal ]

; Printable characters other than " ()[]{}<>/\,"
;
; Excluding those characters ensures that paths don't have to end with trailing
; whitespace most of the time
path-character =
        ; %x20 = " "
      %x21
        ; %x22 = "\""
        ; %x23 = "#"
    / %x24-27
        ; %x28 = "("
        ; %x29 = ")"
    / %x2A-2B
        ; %x2C = ","
    / %x2D-2E
        ; %x2F = "/"
    / %x30-3B
        ; %x3C = "<"
    / %x3D
        ; %x3E = ">"
        ; %x3F = "?"
    / %x40-5A
        ; %x5B = "["
        ; %x5C = "\"
        ; %x5D = "]"
    / %x5E-7A
        ; %x7B = "{"
    / %x7C
        ; %x7D = "}"
    / %x7E

quoted-path-character =
      %x20-21
        ; %x22 = "\""
    / %x23-2E
        ; %x2F = "/"
    / %x30-7F
    / valid-non-ascii

unquoted-path-component = 1*path-character
quoted-path-component = 1*quoted-path-character

path-component = "/" ( unquoted-path-component / %x22 quoted-path-component %x22 )

; The last path-component matched by this rule is referred to as "file" in the semantics,
; and the other path-components as "directory".
path = 1*path-component

local =
    parent-path
    / here-path
    / home-path
    ; NOTE: Backtrack if parsing this alternative fails
    ;
    ; This is because the first character of this alternative will be "/", but
    ; if the second character is "/" or "\" then this should have been parsed
    ; as an operator instead of a path
    / absolute-path

parent-path = ".." path  ; Relative path
here-path = "."  path  ; Relative path
home-path = "~"  path  ; Home-anchored path
absolute-path = path  ; Absolute path

; `http[s]` URI grammar based on RFC7230 and RFC 3986 with some differences
; noted below

scheme = %x68.74.74.70 [ %x73 ]  ; "http" [ "s" ]

; NOTE: This does not match the official grammar for a URI.  Specifically:
;
; * this does not support fragment identifiers, which have no meaning within
;   Dhall expressions and do not affect import resolution
; * the characters "(" ")" and "," are not included in the `sub-delims` rule:
;   in particular, these characters can't be used in authority, path or query
;   strings.  This is because those characters have other meaning in Dhall
;   and it would be confusing for the comma in
;       [http://example.com/foo, bar]
;   to be part of the URL instead of part of the list.  If you need a URL
;   which contains parens or a comma, you must percent-encode them.
;
; Reserved characters in quoted path components should be percent-encoded
; according to https://tools.ietf.org/html/rfc3986#section-2
http-raw = scheme "://" authority path-abempty [ "?" query ]

path-abempty = *( "/" segment )

; NOTE: Backtrack if parsing the optional user info prefix fails
authority = [ userinfo "@" ] host [ ":" port ]

userinfo = *( unreserved / pct-encoded / sub-delims / ":" )

host = IP-literal / IPv4address / domain

port = *DIGIT

IP-literal = "[" ( IPv6address / IPvFuture  ) "]"

IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )

; NOTE: Backtrack when parsing each alternative
IPv6address =                            6( h16 ":" ) ls32
            /                       "::" 5( h16 ":" ) ls32
            / [ h16               ] "::" 4( h16 ":" ) ls32
            / [ h16 *1( ":" h16 ) ] "::" 3( h16 ":" ) ls32
            / [ h16 *2( ":" h16 ) ] "::" 2( h16 ":" ) ls32
            / [ h16 *3( ":" h16 ) ] "::"    h16 ":"   ls32
            / [ h16 *4( ":" h16 ) ] "::"              ls32
            / [ h16 *5( ":" h16 ) ] "::"              h16
            / [ h16 *6( ":" h16 ) ] "::"

h16 = 1*4HEXDIG

ls32 = h16 ":" h16 / IPv4address

IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet

; NOTE: Backtrack when parsing these alternatives
dec-octet = "25" %x30-35       ; 250-255
          / "2" %x30-34 DIGIT  ; 200-249
          / "1" 2DIGIT         ; 100-199
          / %x31-39 DIGIT      ; 10-99
          / DIGIT              ; 0-9

; Look in RFC3986 3.2.2 for
; "A registered name intended for lookup in the DNS"
domain = domainlabel *("." domainlabel) [ "." ]

domainlabel = 1*ALPHANUM *(1*"-" 1*ALPHANUM)

segment = *pchar

pchar = unreserved / pct-encoded / sub-delims / ":" / "@"

query = *( pchar / "/" / "?" )

pct-encoded = "%" HEXDIG HEXDIG

unreserved  = ALPHANUM / "-" / "." / "_" / "~"

; this is the RFC3986 sub-delims rule, without "(", ")" or ","
; see comments above the `http-raw` rule above
sub-delims = "!" / "$" / "&" / "'" / "*" / "+" / ";" / "="

http = http-raw [ whsp using whsp1 import-expression ]

; Dhall supports unquoted environment variables that are Bash-compliant or
; quoted environment variables that are POSIX-compliant
env = "env:"
    ( bash-environment-variable
    / %x22 posix-environment-variable %x22
    )

; Bash supports a restricted subset of POSIX environment variables.  From the
; Bash `man` page, an environment variable name is:
;
; > A word consisting only of  alphanumeric  characters  and  under-scores,  and
; > beginning with an alphabetic character or an under-score
bash-environment-variable = (ALPHA / "_") *(ALPHANUM / "_")

; The POSIX standard is significantly more flexible about legal environment
; variable names, which can contain alerts (i.e. '\a'), whitespace, or
; punctuation, for example.  The POSIX standard says about environment variable
; names:
;
; > The value of an environment variable is a string of characters. For a
; > C-language program, an array of strings called the environment shall be made
; > available when a process begins. The array is pointed to by the external
; > variable environ, which is defined as:
; >
; >     extern char **environ;
; >
; > These strings have the form name=value; names shall not contain the
; > character '='. For values to be portable across systems conforming to IEEE
; > Std 1003.1-2001, the value shall be composed of characters from the portable
; > character set (except NUL and as indicated below).
;
; Note that the standard does not explicitly state that the name must have at
; least one character, but `env` does not appear to support this and `env`
; claims to be POSIX-compliant.  To be safe, Dhall requires at least one
; character like `env`
posix-environment-variable = 1*posix-environment-variable-character

; These are all the characters from the POSIX Portable Character Set except for
; '\0' (NUL) and '='.  Note that the POSIX standard does not explicitly state
; that environment variable names cannot have NUL.  However, this is implicit
; in the fact that environment variables are passed to the program as
; NUL-terminated `name=value` strings, which implies that the `name` portion of
; the string cannot have NUL characters
posix-environment-variable-character =
      %x5C                 ; '\'    Beginning of escape sequence
      ( %x22               ; '"'    quotation mark  U+0022
      / %x5C               ; '\'    reverse solidus U+005C
      / %x61               ; 'a'    alert           U+0007
      / %x62               ; 'b'    backspace       U+0008
      / %x66               ; 'f'    form feed       U+000C
      / %x6E               ; 'n'    line feed       U+000A
      / %x72               ; 'r'    carriage return U+000D
      / %x74               ; 't'    tab             U+0009
      / %x76               ; 'v'    vertical tab    U+000B
      )
    ; Printable characters except double quote, backslash and equals
    / %x20-21
        ; %x22 = '"'
    / %x23-3C
        ; %x3D = '='
    / %x3E-5B
        ; %x5C = "\"
    / %x5D-7E

import-type = missing / local / http / env

hash = %x73.68.61.32.35.36.3a 64HEXDIG ; "sha256:XXX...XXX"

import-hashed = import-type [ whsp1 hash ]

; "http://example.com"
; "./foo/bar"
; "env:FOO"
import = import-hashed [ whsp as whsp1 (Text / Location) ]

expression =
    ; "\(x : a) -> b"
      lambda whsp "(" whsp nonreserved-label whsp ":" whsp1 expression whsp ")" whsp arrow whsp expression
    
    ; "if a then b else c"
    / if whsp1 expression whsp then whsp1 expression whsp else whsp1 expression
    
    ; "let x : t = e1 in e2"
    ; "let x     = e1 in e2"
    ; We allow dropping the `in` between adjacent let-expressions; the following are equivalent:
    ; "let x = e1 let y = e2 in e3"
    ; "let x = e1 in let y = e2 in e3"
    / 1*let-binding in whsp1 expression
    
    ; "forall (x : a) -> b"
    / forall whsp "(" whsp nonreserved-label whsp ":" whsp1 expression whsp ")" whsp arrow whsp expression
    
    ; "a -> b"
    ;
    ; NOTE: Backtrack if parsing this alternative fails
    / operator-expression whsp arrow whsp expression
    
    ; "a with x = b"
    ;
    ; NOTE: Backtrack if parsing this alternative fails
    / with-expression
    
    ; "merge e1 e2 : t"
    ;
    ; NOTE: Backtrack if parsing this alternative fails since we can't tell
    ; from the keyword whether there will be a type annotation or not
    / merge whsp1 import-expression whsp1 import-expression whsp ":" whsp1 application-expression
    
    ; "[] : t"
    ;
    ; NOTE: Backtrack if parsing this alternative fails since we can't tell
    ; from the opening bracket whether or not this will be an empty list or
    ; a non-empty list
    / empty-list-literal
    
    ; "toMap e : t"
    ;
    ; NOTE: Backtrack if parsing this alternative fails since we can't tell
    ; from the keyword whether there will be a type annotation or not
    / toMap whsp1 import-expression whsp ":" whsp1 application-expression
    
    ; "assert : Natural/even 1 === False"
    / assert whsp ":" whsp1 expression
    
    ; "x : t"
    / annotated-expression

; Nonempty-whitespace to disambiguate `env:VARIABLE` from type annotations
annotated-expression = operator-expression [ whsp ":" whsp1 expression ]

; "let x = e1"
let-binding = let whsp1 nonreserved-label whsp [ ":" whsp1 expression whsp ] "=" whsp expression whsp

; "[] : t"
empty-list-literal =
    "[" whsp [ "," whsp ] "]" whsp ":" whsp1 application-expression

with-expression =
    import-expression 1*(whsp1 with whsp1 with-clause)

with-clause =
    any-label-or-some *(whsp "." whsp any-label-or-some) whsp "=" whsp operator-expression

operator-expression = equivalent-expression

; Nonempty-whitespace to disambiguate `http://a/a?a`
equivalent-expression    = import-alt-expression    *(whsp equivalent whsp import-alt-expression)
import-alt-expression    = or-expression            *(whsp "?" whsp1 or-expression)
or-expression            = plus-expression          *(whsp "||" whsp plus-expression)
; Nonempty-whitespace to disambiguate `f +2`
plus-expression          = text-append-expression   *(whsp "+" whsp1 text-append-expression)
text-append-expression   = list-append-expression   *(whsp "++" whsp list-append-expression)
list-append-expression   = and-expression           *(whsp "#" whsp and-expression)
and-expression           = combine-expression       *(whsp "&&" whsp combine-expression)
combine-expression       = prefer-expression        *(whsp combine whsp prefer-expression)
prefer-expression        = combine-types-expression *(whsp prefer whsp combine-types-expression)
combine-types-expression = times-expression         *(whsp combine-types whsp times-expression)
times-expression         = equal-expression         *(whsp "*" whsp equal-expression)
equal-expression         = not-equal-expression     *(whsp "==" whsp not-equal-expression)
not-equal-expression     = application-expression   *(whsp "!=" whsp application-expression)


; Import expressions need to be separated by some whitespace, otherwise there
; would be ambiguity: `./ab` could be interpreted as "import the file `./ab`",
; or "apply the import `./a` to label `b`"
application-expression =
    first-application-expression *(whsp1 import-expression)

first-application-expression =
    ; "merge e1 e2"
      merge whsp1 import-expression whsp1 import-expression
    
    ; "Some e"
    / Some whsp1 import-expression
    
    ; "toMap e"
    / toMap whsp1 import-expression
    
    / import-expression

import-expression = import / completion-expression

completion-expression =
    selector-expression [ whsp complete whsp selector-expression ]

; `record.field` extracts one field of a record
;
; `record.{ field0, field1, field2 }` projects out several fields of a record
;
; NOTE: Backtrack when parsing the `*("." ...)`.  The reason why is that you
; can't tell from parsing just the period whether "foo." will become "foo.bar"
; (i.e. accessing field `bar` of the record `foo`) or `foo./bar` (i.e. applying
; the function `foo` to the relative path `./bar`)
selector-expression = primitive-expression *(whsp "." whsp selector)

selector = any-label / labels / type-selector

labels = "{" whsp [ any-label-or-some whsp *("," whsp any-label-or-some whsp) ] "}"

type-selector = "(" whsp expression whsp ")"
; NOTE: Backtrack when parsing the first three alternatives (i.e. the numeric
; literals).  This is because they share leading characters in common
primitive-expression =
    ; "2.0"
      double-literal
    
    ; "2"
    / natural-literal
    
    ; "+2"
    / integer-literal
    
    ; '"ABC"'
    / text-literal
    
    ; "{ foo = 1      , bar = True }"
    ; "{ foo : Integer, bar : Bool }"
    / "{" whsp [ "," whsp ] record-type-or-literal whsp "}"
    
    ; "< Foo : Integer | Bar : Bool >"
    ; "< Foo | Bar : Bool >"
    / "<" whsp [ "|" whsp ] union-type whsp ">"
    
    ; "[1, 2, 3]"
    / non-empty-list-literal
    
    ; "x"
    ; "x@2"
    / identifier
    
    ; "( e )"
    / "(" complete-expression ")"


record-type-or-literal =
      empty-record-literal
    / [non-empty-record-type-or-literal]

empty-record-literal = "="

non-empty-record-type-or-literal =
    (non-empty-record-type / non-empty-record-literal)

non-empty-record-type =
    record-type-entry *(whsp "," whsp record-type-entry)

record-type-entry = any-label-or-some whsp ":" whsp1 expression

non-empty-record-literal =
    record-literal-entry *(whsp "," whsp record-literal-entry)

; If the `record-literal-normal-entry` is absent, that represents a punned
; record entry, such as in `{ x }`, which is a short-hand for `{ x = x }`
record-literal-entry =
    any-label-or-some [record-literal-normal-entry]

record-literal-normal-entry =
    *(whsp "." whsp any-label-or-some) whsp "=" whsp expression

; If the `union-type-entry` is absent, that represents an empty union
; alternative, such as in `< Heads | Tails >`
union-type =
    [union-type-entry *(whsp "|" whsp union-type-entry)]

; x : Natural
; x
union-type-entry = any-label-or-some [ whsp ":" whsp1 expression ]


non-empty-list-literal =
    "[" whsp [ "," whsp ] expression whsp *("," whsp expression whsp) "]"

; This just adds surrounding whitespace for the top-level of the program
complete-expression = whsp expression whsp

A  => src/dhall/term.nim +401 -0
@@ 1,401 @@
import bigints

import std/options, std/strutils, std/tables

type
  UniverseKind* = enum
    uType = "Type", uKind = "Kind", uSort = "Sort"

  BuiltinKind* = enum
    bNaturalBuild = "Natural/build",
    bNaturalFold = "Natural/fold",
    bNaturalIsZero = "Natural/isZero",
    bNaturalEven = "Natural/even",
    bNaturalOdd = "Natural/odd",
    bNaturalToInteger = "Natural/toInteger",
    bNaturalShow = "Natural/show",
    bNaturalSubtract = "Natural/subtract",
    bIntegerDouble = "Integer/toDouble",
    bIntegerShow = "Integer/show",
    bIntegerNegate = "Integer/negate",
    bIntegerClamp = "Integer/clamp",
    bDoubleShow = "Double/show",
    bListBuild = "List/build",
    bListFold = "List/fold",
    bListLength = "List/length",
    bListHead = "List/head",
    bListLast = "List/last",
    bListIndexed = "List/indexed",
    bListReverse = "List/reverse",
    bOptionalBuild = "Optional/build",
    bOptionalFold = "Optional/fold",
    bTextShow = "Text/show",
    bBool = "Bool",
    bOptional = "Optional",
    bNatural = "Natural",
    bInteger = "Integer",
    bDouble = "Double",
    bText = "Text",
    bList = "List",
    bTrue = "True",
    bFalse = "False",
    bNone = "None",
    bType = "Type",
    bKind = "Kind",
    bSort = "Sort"

  OpKind* = enum
    opBoolOr = (0, "||"),
    opBoolAnd = (1, "&&"),
    opBoolEquality = (2, "=="),
    opBoolInequality = (3, "!="),
    opNaturalAdd = (4, "+"),
    opNaturalMultiplication = (5, "*"),
    opTextAppend = (6, "++"),
    opListAppend = (7, "#"),
    opRecordRecursiveMerge = (8, "∧"),
    opRecordBiasedMerge = (9, "⫽"),
    opRecordTypeMerge = (10, "⩓"),
    opAlternateImport = (11, "?"),
    opEquivalience = (12, "≡"),
    opComplete = (13, "::")

  ImportKind* = enum
    iCode = 0,
    iText = 1,
    iLocation = 2

  TermKind* = enum
    tApp = 0,
    tLambda = 1,
    tPi = 2,
    tOp = 3,
    tList = 4,
    tSome = 5,
    tMerge = 6,
    tRecordType = 7,
    tRecordLiteral = 8,
    tField = 9,
    tProject = 10,
    tUnionType = 11,
    tIf = 14,
    tNaturalLiteral = 15,
    tIntegerLiteral = 16,
    tTextLiteral = 18,
    tAssert = 19,
    tImport = 24,
    tLet = 25,
    tAnnotation = 26,
    tToMap = 27
    tEmptyList = 28
    tVar,
    tBuiltin,
    tUniverse,
    tProjectType,
    tBoolLiteral,
    tDoubleLiteral,

    tEntry,
    tTextChunk,
    tBinding

  Term* = ref object
    case kind*: TermKind
    of tVar:
      varName*: string
      varIndex*: int
    of tBuiltin:
      builtin*: BuiltinKind
    of tUniverse:
      universe*: UniverseKind
    of tApp:
      appFun*: Term
      appArgs*: seq[Term]
    of tLambda:
      lambdaLabel*: string
      lambdaType*, lambdaBody*: Term
    of tPi:
      piLabel*: string
      piType*, piBody*: Term
      # TODO: share these fields with tLambda?
    of tOp:
      op*: OpKind
      opL*, opR*: Term
    of tList:
      listType*: Term
      list*: seq[Term]
    of tSome:
      someType*, someVal*: Term
    of tMerge:
      mergeHandler*, mergeUnion*, mergeAnn*: Term
    of tRecordType:
      recordType*: Table[string, Term]
    of tRecordLiteral:
      recordLiteral*: Table[string, Term]
    of tField:
      fieldRecord*: Term
      fieldName*: string
    of tProject:
      projectRecord*: Term
      projectNames*: seq[string]
    of tProjectType:
      projectTypeRecord*: Term
      projectTypeSelector*: Term
    of tUnionType:
      union*: Table[string, Term]
    of tBoolLiteral:
      bool*: bool
    of tIf:
      ifCond*, ifTrue*, ifFalse*: Term
    of tNaturalLiteral:
      natural*: BigInt
    of tIntegerLiteral:
      integer*: BigInt
    of tDoubleLiteral:
      double*: BiggestFloat
    of tTextLiteral:
      textChunks*: seq[Term]
      textSuffix*: string
    of tAssert:
      assertAnn*: Term
    of tImport:
      importCheck*: seq[byte]
      importKind*: ImportKind
      importScheme*: uint8
      importHeaders*: Term
      importElements*: seq[string]
      importQuery*: Option[string]
    of tLet:
      letBinds*: seq[Term] # binding terms
      letBody*: Term
    of tAnnotation:
      annExpr*, annAnn*: Term
    of tToMap:
      toMapBody*, toMapAnn*: Term
    of tEmptyList:
      emptyListType*: Term

    of tEntry:
      # TODO: probably redundant with tBinding
      entryKey*: string
      entryVal*: Term
    of tTextChunk:
      textPrefix*: string
      textExpr*: Term
    of tBinding:
      key*: string
      val*, ann* : Term

func `$`*(t: Term): string =
  if t.isNil: return "null"
  case t.kind:
  of tUniverse:
    result = $t.universe
  of tBuiltin:
    result = $t.builtin
  of tBoolLiteral:
    result = if t.bool: "True" else: "False"
  of tVar:
    if t.varIndex > 0:
      result = t.varName & "@" & $t.varIndex
    else:
      result = t.varName
  of tLambda:
    result = "(λ(" & t.lambdaLabel & " : " & $t.lambdaType & ") → " &
        $t.lambdaBody & ")"
  of tPi:
    if t.piLabel == "_":
      result = $t.piType & " → " & $t.piBody
    else:
      result = "∀(" & t.piLabel & " : " & $t.piType & ") → " & $t.piBody
  of tApp:
    result = "(" & $t.appFun
    for arg in t.appArgs:
      result.add(" ")
      result.add($arg)
    result.add(" )")
  of tOp:
    result = "(" & $t.opL & " " & $t.op & " " & $t.opR & ")"
  of tLet:
    result = ""
    for b in t.letBinds:
      result.add("let ")
      result.add(b.key)
      if not b.ann.isNil:
        result.add(": ")
        result.add($b.ann)
      result.add(" = ")
      result.add($b.val)
    result.add(" in ")
    result.add($t.letBody)
  of tAnnotation:
    result = $t.annExpr & " : " & $t.annAnn
  of tNaturalLiteral:
    result = $t.natural
  of tList:
    if t.list.len == 0:
      result = "[] : " & $t.listType
    else:
      result = "[ " & join(t.list, ", ") & " ]"
  of tTextLiteral:
    var hasNewline = t.textSuffix.contains({'\n'})
    for tc in t.textChunks:
      if hasNewline: break
      hasNewLine = tc.textPrefix.contains({'\n'})
    if hasNewline:
      result = "''\n"
      for tc in t.textChunks:
        result.add $tc
      result.add t.textSuffix
      result.add "''"
    else:
      result = "\""
      for tc in t.textChunks:
        result.add $tc
      result.add t.textSuffix
      result.add "\""
  of tAssert:
    result = "assert : " & $t.assertAnn
  of tImport:
    result = "import "
    result.add case t.importKind
      of iCode: ""
      of iText: "as Text "
      of iLocation: "as Location "
    result.add case t.importScheme
      of 0: "http://"
      of 1: "https://"
      of 2: "/"
      of 3: "./"
      of 4: "../"
      of 5: "~/"
      of 6: "env:"
      of 7: "missing"
      else: raiseAssert("unhandled import scheme " & $t.importScheme)
    result.add(join(t.importElements, "/"))
    if t.importQuery.isSome:
      result.add("?")
      result.add(t.importQuery.get)
    if not t.importCheck.len == 0:
      result.add " sha256:"
      for i in 2..31:
        result.add t.importCheck[i].toHex
  of tIf:
    result = "if " & $t.ifCond & " then " & $t.ifTrue & " else " & $t.ifFalse
  of tDoubleLiteral:
    result = $t.double
  of tIntegerLiteral:
    result = if t.integer > 0:
        "+" & $t.integer
      else: $t.integer
  of tSome:
    result = "Some " & $t.someVal
  of tRecordType:
    if t.recordType.len == 0:
      result = "{}"
    else:
      result = "{ "
      for key, val in t.recordType.pairs:
        if key == "" or key.startsWith(' ') or key.endsWith(' '):
          # TODO: builtin labels
          result.add "`"
          result.add(key)
          result.add "`"
        else:
          result.add(key)
        result.add(" : ")
        result.add($val)
        result.add(", ")
      result[^2] = ' '
      result[^1] = '}'
  of tRecordLiteral:
    if t.recordLiteral.len == 0:
      result = "{=}"
    else:
      result = "{ "
      for key, val in t.recordLiteral.pairs:
        result.add(key)
        result.add(" = ")
        result.add($val)
        result.add(", ")
      result[^2] = ' '
      result[^1] = '}'
  of tToMap:
    if t.toMapAnn.isNil:
      result = "toMap " & $t.toMapBody
    else:
      result = "toMap " & $t.toMapBody & " : " & $t.toMapAnn
  of tEmptyList:
    result = "[] : " & $t.emptyListType
  of tField:
    result = "(" & $t.fieldRecord & ")." & t.fieldName
  of tProject:
    result = $t.projectRecord & ".{" & join(t.projectNames, ", ") & "}"
  of tProjectType:
    result = $t.projectTypeRecord & ".(" & $t.projectTypeSelector & ")"
  of tUnionType:
    if t.union.len == 0:
      result = "<>"
    else:
      result = "< "
      for key, val in t.union.pairs:
        result.add(key)
        if not val.isNil:
          result.add(" : ")
          result.add($val)
        result.add(" | ")
      result[^2] = ' '
      result[^1] = '>'
  of tMerge:
    if t.mergeAnn.isNil:
      result = "merge (" & $t.mergeHandler & ") (" & $t.mergeUnion & ")"
    else:
      result = "merge (" & $t.mergeHandler & ") (" & $t.mergeUnion &
          ") : " & $t.mergeAnn
  of tEntry:
    result = "« " & $t.entryKey & " = " & $t.entryVal & " »"
  of tTextChunk:
    result = t.textPrefix & "${" & $t.textExpr & "}"
  of tBinding:
    result = "« " & $t.key & " = " & $t.val & " »"

func parseBuiltin*(s: string): BuiltinKind =
  case s
  of "Natural/build": bNaturalBuild
  of "Natural/fold": bNaturalFold
  of "Natural/isZero": bNaturalIsZero
  of "Natural/even": bNaturalEven
  of "Natural/odd": bNaturalOdd
  of "Natural/toInteger": bNaturalToInteger
  of "Natural/show": bNaturalShow
  of "Natural/subtract": bNaturalSubtract
  of "Integer/toDouble": bIntegerDouble
  of "Integer/show": bIntegerShow
  of "Integer/negate": bIntegerNegate
  of "Integer/clamp": bIntegerClamp
  of "Double/show": bDoubleShow
  of "List/build": bListBuild
  of "List/fold": bListFold
  of "List/length": bListLength
  of "List/head": bListHead
  of "List/last": bListLast
  of "List/indexed": bListIndexed
  of "List/reverse": bListReverse
  of "Optional/build": bOptionalBuild
  of "Optional/fold": bOptionalFold
  of "Text/show": bTextShow
  of "Bool": bBool
  of "Optional": bOptional
  of "Natural": bNatural
  of "Integer": bInteger
  of "Double": bDouble
  of "Text": bText
  of "List": bList
  of "True": bTrue
  of "False": bFalse
  of "None": bNone
  of "Type": bType
  of "Kind": bKind
  of "Sort": bSort
  else:
    raise newException(ValueError, "invalid builtin " & s)

A  => tests/config.nims +1 -0
@@ 1,1 @@
switch("path", "$projectDir/../src")
\ No newline at end of file