~andyc/oil

a52755a56d2d3121965194f0d8cfba526368358f — Andy Chu 20 days ago f5087eb
[oil-language] Block literals are ^(echo hi | wc -l)

Instead of &(echo hi)

This was in preparation for & meaning "file descriptor" and parse_amp.
That might be deferred but I think this is cleaner anyway.

Related to #980.
M frontend/id_kind_def.py => frontend/id_kind_def.py +3 -1
@@ 368,7 368,9 @@ def AddKinds(spec):
      'ProcSubOut',         # >( )

      'AtParen',            # @( for split command sub
      'AmpParen',           # &( for block literal in expression mode
      'CaretParen',         # ^( for Block literal in expression mode
      'CaretBracket',       # ^[ for Expr literal
      'CaretBrace',         # ^{ for Arglist

      'PercentParen',       # %( for word arrays
  ])

M frontend/lexer_def.py => frontend/lexer_def.py +7 -20
@@ 72,14 72,11 @@ _SIGNIFICANT_SPACE = R(r'[ \t\r]+', Id.WS_Space)
_TILDE_LIKE = R(r'~[a-zA-Z0-9_.-]*', Id.Lit_TildeLike)

_BACKSLASH = [
  # TODO: Should Oil make this stricter?  \a-\z, \A-\Z, \0-\9, should be
  # written without \.
  # But that's a lot of work since this is used in
  # lex_mode_e.{ShCommand,VSub_ArgUnquoted,DBracket,ExtGlob,BashRegex}
  #
  # To be conservative, we could deny a set of chars similar to
  # _LITERAL_WHITELIST_REGEX, rather than allowing all the operator characters
  # like \( and \;.
  #
  # strict_backslash makes this stricter.

  R(r'\\[^\n\0]', Id.Lit_EscapedChar),
  C('\\\n', Id.Ignored_LineCont),


@@ 227,7 224,7 @@ _KEYWORDS = [
  # Tea-only

  # TODO: parse_tea should enable these so we can have 'setvar x = func'
  C('func',      Id.KW_Func),  # 'def' for migration path?
  C('func',      Id.KW_Func),
  C('data',      Id.KW_Data),
  C('enum',      Id.KW_Enum),
  C('class',     Id.KW_Class),


@@ 249,7 246,6 @@ _CONTROL_FLOW = [

# Used by oil_lang/grammar_gen.py too
EXPR_WORDS = [
  # TODO: Should be True/False/None to be Python compatible
  C('null', Id.Expr_Null),
  C('true', Id.Expr_True),
  C('false', Id.Expr_False),


@@ 267,8 263,6 @@ EXPR_WORDS = [

  # for function literals
  C('func', Id.Expr_Func),
  # TODO: also allow 'def' for compatibility?  At the cost of vars named
  # 'def'?

  # Note: can 'virtual' just be 'override'?  What do other languages do?
  C('virtual',   Id.Expr_Virtual),


@@ 530,12 524,8 @@ _C_STRING_COMMON = [
  # This is an incompatible extension to make Oil strings "sane" and QSN
  # compatible.  I don't want to have yet another string syntax!  A lint tool
  # could get rid of the legacy stuff like \U.

  _U_BRACED_CHAR,

  # TODO: Also add \u{123456} here
  # And make sure there are syntax errors

  R(r'\\[0abeEfrtnv\\]', Id.Char_OneChar),

  # Backslash that ends a line.  Note '.' doesn't match a newline character.


@@ 777,7 767,7 @@ BRACE_RANGE_DEF = [
]

#
# Oil lexing.  TODO: Move to a different file?
# Oil lexing
#




@@ 807,7 797,9 @@ OIL_LEFT_UNQUOTED = [

  C('@(', Id.Left_AtParen),         # Split Command Sub

  C('&(', Id.Left_AmpParen),        # Block literals in expression mode
  C('^(', Id.Left_CaretParen),      # Block literals in expression mode
  C('^[', Id.Left_CaretBracket),    # Expr literals
  C('^{', Id.Left_CaretBrace),      # ArgList literals

  C('%(', Id.Left_PercentParen),    # shell-like word arrays.



@@ 872,8 864,6 @@ float = digitpart fraction? exponent? | fraction exponent?
"""
}

# TODO: Should all of these be Kind.Op instead of Kind.Arith?  And Kind.Expr?

# NOTE: Borrowing tokens from Arith (i.e. $(( )) ), but not using LexerPairs().
LEXER_DEF[lex_mode_e.Expr] = \
    _VARS + OIL_LEFT_SUBS + OIL_LEFT_UNQUOTED + EXPR_OPS + EXPR_WORDS + \


@@ 908,9 898,6 @@ LEXER_DEF[lex_mode_e.Expr] = \
  # space around them?
  R(VAR_NAME_RE, Id.Expr_Name),

  # TODO:
  # - Call this Expr_PercentSymbol
  # - Expr_ColonSymbol for interned strings
  R('%' + VAR_NAME_RE, Id.Expr_Symbol),

  #

M oil_lang/expr_eval.py => oil_lang/expr_eval.py +1 -1
@@ 253,7 253,7 @@ class OilEvaluator(object):
    if node.tag == expr_e.CommandSub:
      id_ = node.left_token.id
      # &(echo block literal)
      if id_ == Id.Left_AmpParen:
      if id_ == Id.Left_CaretParen:
        return 'TODO: value.Block'
      else:
        stdout = self.shell_ex.RunCommandSub(node)

M oil_lang/expr_parse.py => oil_lang/expr_parse.py +1 -1
@@ 203,7 203,7 @@ def _PushOilTokens(parse_ctx, gr, p, lex):
        continue

      # $(  @(  &(
      if tok.id in (Id.Left_DollarParen, Id.Left_AtParen, Id.Left_AmpParen):
      if tok.id in (Id.Left_DollarParen, Id.Left_AtParen, Id.Left_CaretParen):

        left_token = tok


M oil_lang/grammar.pgen2 => oil_lang/grammar.pgen2 +1 -1
@@ 240,7 240,7 @@ array_item: (
 | '(' test ')'
)
sh_array_literal: '%(' Expr_CastedDummy Right_ShArrayLiteral
sh_command_sub: ( '$(' | '@(' | '&(' ) Expr_CastedDummy Eof_RParen
sh_command_sub: ( '$(' | '@(' | '^(' ) Expr_CastedDummy Eof_RParen

# Note: could add c"" too
dq_string: (Left_DoubleQuote | Left_TDoubleQuote) Expr_CastedDummy Right_DoubleQuote

M spec/oil-blocks.test.sh => spec/oil-blocks.test.sh +5 -4
@@ 103,12 103,13 @@ x=42
x=42
## END

#### block literal in expression mode: &(echo $PWD)
#### block literal in expression mode: ^(echo $PWD)
shopt -s oil:all

myblock = &(echo $PWD)

b2 = &(echo one; echo two)
const myblock = ^(echo $PWD | wc -l)
const b2 = ^(echo one; echo two)
= myblock
= b2

# TODO:
# Implement something like this?