shell/math: fix parsing of ?: and explain why it's parsed that way

This fixes arith-precedence1.tests.

This breaks arith-ternary2.tests again (we now evaluate variables
on not-taken branches). We need a better logic here anyway:
not only bare variables should not evaluate when not-taken:
	1 ? eval_me : do_not_eval
but any (arbitrarily complex) expressions shouldn't
evaluate as well!
	1 ? var_is_set=1 : ((var_is_not_set=2,var2*=4))

function                                             old     new   delta
evaluate_string                                     1097    1148     +51

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
master
Denys Vlasenko 2023-06-15 10:07:12 +02:00
parent 3829d8b675
commit 5f56a03882
11 changed files with 61 additions and 26 deletions

View File

@ -0,0 +1 @@
3:3

View File

@ -0,0 +1,2 @@
exec 2>&1
echo 3:$((1?(2?(3):4):5))

View File

@ -92,7 +92,7 @@ ghi
./arith.tests: line 190: arithmetic syntax error
16 16
./arith.tests: line 195: arithmetic syntax error
./arith.tests: line 196: malformed ?: operator
./arith.tests: line 196: arithmetic syntax error
./arith.tests: line 197: arithmetic syntax error
9 9
./arith.tests: line 204: arithmetic syntax error

View File

@ -0,0 +1,4 @@
4:4
4:4
4:4
4:4

View File

@ -0,0 +1,15 @@
exec 2>&1
# bash documentation says that precedence order is:
# ...
# expr ? expr1 : expr2
# = *= /= %= += -= <<= >>= &= ^= |=
# exprA , exprB
# but in practice, the rules for expr1 and expr2 are different:
# assignments and commas in expr1 have higher precedence than :?,
# but in expr2 they haven't:
# "v ? 1,2 : 3,4" is parsed as "(v ? (1,2) : 3),4"
# "v ? a=2 : b=4" is parsed as "(v ? (a=1) : b)=4" (thus, this is a syntax error)
echo 4:$((0 ? 1,2 : 3,4))
echo 4:$((1 ? 1,2 : 3,4))
echo 4:"$((0 ? 1,2 : 3,4))"
echo 4:"$((1 ? 1,2 : 3,4))"

View File

@ -1,3 +0,0 @@
6:6
a=b=+err+
b=6

View File

@ -1,7 +0,0 @@
exec 2>&1
a='b=+err+'
b=5
# The not-taken branch should not parse variables
echo 6:$((0 ? a : ++b))
echo "a=$a"
echo "b=$b"

View File

@ -0,0 +1 @@
3:3

View File

@ -0,0 +1,2 @@
exec 2>&1
echo 3:$((1?(2?(3):4):5))

View File

@ -94,7 +94,7 @@ ghi
hush: arithmetic syntax error
16 16
hush: arithmetic syntax error
hush: malformed ?: operator
hush: arithmetic syntax error
hush: arithmetic syntax error
9 9
hush: arithmetic syntax error

View File

@ -157,17 +157,17 @@ typedef unsigned char operator;
#define fix_assignment_prec(prec) do { if (prec == 3) prec = 2; } while (0)
/* Ternary conditional operator is right associative too */
// FIXME:
// bash documentation says that precedence order is:
// ...
// expr ? expr1 : expr2
// = *= /= %= += -= <<= >>= &= ^= |=
// exprA , exprB
// but in practice, the rules for expr1 and expr2 are different:
// assignments and commas in expr1 have higher precedence than ?:,
// but in expr2 they haven't:
// "v ? 1,2 : 3,4" is parsed as "(v ? (1,2) : 3),4"
// "v ? a=2 : b=4" is parsed as "(v ? (a=1) : b)=4" (thus, this is a syntax error)
/*
* bash documentation says that precedence order is:
* ...
* expr ? expr1 : expr2
* = *= /= %= += -= <<= >>= &= ^= |=
* exprA , exprB
* What it omits is that expr1 is parsed as if parenthesized
* (this matches the rules of ?: in C language):
* "v ? 1,2 : 3,4" is parsed as "(v ? (1,2) : 3),4"
* "v ? a=2 : b=4" is parsed as "(v ? (a=1) : b)=4" (thus, this is a syntax error)
*/
#define TOK_CONDITIONAL tok_decl(4,0)
#define TOK_CONDITIONAL_SEP tok_decl(4,1)
@ -629,6 +629,7 @@ evaluate_string(arith_state_t *math_state, const char *expr)
/* Stack of operator tokens */
operator *const opstack = alloca(expr_len * sizeof(opstack[0]));
operator *opstackptr = opstack;
operator insert_op = 0xff;
/* Start with a left paren */
dbg("(%d) op:TOK_LPAREN", (int)(opstackptr - opstack));
@ -751,11 +752,24 @@ evaluate_string(arith_state_t *math_state, const char *expr)
goto err;
}
}
/* NB: expr now points past the operator */
tok_found:
op = p[1]; /* fetch TOK_foo value */
tok_found1:
/* NB: expr now points past the operator */
/* Special rule for "? EXPR :"
* "EXPR in the middle of ? : is parsed as if parenthesized"
* (this quirk originates in C grammar, I think).
*/
if (op == TOK_CONDITIONAL) {
insert_op = TOK_LPAREN;
dbg("insert_op=%02x", insert_op);
}
if (op == TOK_CONDITIONAL_SEP) {
insert_op = op;
op = TOK_RPAREN;
dbg("insert_op=%02x op=%02x", insert_op, op);
}
tok_found1:
/* post grammar: a++ reduce to num */
if (lasttok == TOK_POST_INC || lasttok == TOK_POST_DEC)
lasttok = TOK_NUM;
@ -865,9 +879,15 @@ dbg(" numstack:%d val:%lld '%s'", (int)(numstackptr - numstack), numstackptr[
/* else: LPAREN or UNARY: push it on opstack */
push_op:
/* Push this operator to opstack */
dbg("(%d) op:%02x", (int)(opstackptr - opstack), op);
dbg("(%d) op:%02x insert_op:%02x", (int)(opstackptr - opstack), op, insert_op);
*opstackptr++ = lasttok = op;
next: ;
if (insert_op != 0xff) {
op = insert_op;
insert_op = 0xff;
dbg("inserting %02x", op);
goto tok_found1;
}
} /* while (1) */
err: