diff --git a/editors/awk.c b/editors/awk.c index 8bc214b69..697a44c8c 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -433,36 +433,47 @@ static const char tokenlist[] ALIGN1 = ; static const uint32_t tokeninfo[] ALIGN4 = { - 0, - 0, + 0, /* ( */ + 0, /* ) */ #define TI_REGEXP OC_REGEXP - TI_REGEXP, + TI_REGEXP, /* / */ + /* >> > | */ xS|'a', xS|'w', xS|'|', + /* ++ -- */ OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m', #define TI_PREINC (OC_UNARY|xV|P(9)|'P') #define TI_PREDEC (OC_UNARY|xV|P(9)|'M') + /* ++ -- $ */ TI_PREINC, TI_PREDEC, OC_FIELD|xV|P(5), - OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(38), OC_REPLACE|NV|P(38)|'+', OC_REPLACE|NV|P(38)|'-', - OC_REPLACE|NV|P(38)|'*', OC_REPLACE|NV|P(38)|'/', OC_REPLACE|NV|P(38)|'%', OC_REPLACE|NV|P(38)|'&', - OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(38)|'&', OC_BINARY|NV|P(15)|'&', + /* == = += -= */ + OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-', + /* *= /= %= ^= (^ is exponentiation, NOT xor) */ + OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&', + /* + - **= ** */ + OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&', + /* / % ^ * */ OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*', + /* != >= <= > */ OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, #define TI_LESS (OC_COMPARE|VV|P(39)|2) + /* < !~ ~ && */ TI_LESS, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55), #define TI_TERNARY (OC_TERNARY|Vx|P(64)|'?') #define TI_COLON (OC_COLON|xx|P(67)|':') + /* || ? : */ OC_LOR|Vx|P(59), TI_TERNARY, TI_COLON, #define TI_IN (OC_IN|SV|P(49)) TI_IN, #define TI_COMMA (OC_COMMA|SS|P(80)) TI_COMMA, #define TI_PGETLINE (OC_PGETLINE|SV|P(37)) - TI_PGETLINE, + TI_PGETLINE, /* | */ + /* + - ! */ OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!', 0, /* ] */ - 0, - 0, - 0, + 0, /* { */ + 0, /* } */ + 0, /* ; */ 0, /* \n */ ST_IF, ST_DO, ST_FOR, OC_BREAK, OC_CONTINUE, OC_DELETE|Rx, OC_PRINT, @@ -511,6 +522,38 @@ static const uint32_t tokeninfo[] ALIGN4 = { #undef OC_F }; +/* gawk 5.1.1 manpage says the precedence of comparisons and assignments are as follows: + * ...... + * < > <= >= == != + * ~ !~ + * in + * && + * || + * ?: + * = += -= *= /= %= ^= + * But there are some abnormalities: + * awk 'BEGIN { print v=3==3,v }' - ok: + * 1 1 + * awk 'BEGIN { print 3==v=3,v }' - wrong, (3==v)=3 is not a valid assignment: + * 1 3 + * This also unexpectedly works: echo "foo" | awk '$1==$1="foo" {print $1}' + * More than one comparison op fails to parse: + * awk 'BEGIN { print 3==3==3 }' - syntax error (wrong, should work) + * awk 'BEGIN { print 3==3!=3 }' - syntax error (wrong, should work) + * + * The ternary a?b:c works as follows in gawk: "a" can't be assignment + * ("= has lower precedence than ?") but inside "b" or "c", assignment + * is higher precedence: + * awk 'BEGIN { u=v=w=1; print u=0?v=4:w=5; print u,v,w }' + * 5 + * 5 1 5 + * This differs from C and shell's "test" rules for ?: which have implicit () + * around "b" in ?:, but not around "c" - they would barf on "w=5" above. + * gawk allows nesting of ?: - this works: + * u=0?v=4?5:6:w=7?8:9 means u=0?(v=4?5:6):(w=7?8:9) + * bbox is buggy here, requires parens: "u=0?(v=4):(w=5)" + */ + /* internal variable names and their initial values */ /* asterisk marks SPECIAL vars; $ is just no-named Field0 */ enum { @@ -1409,7 +1452,7 @@ static node *parse_expr(uint32_t term_tc) vn = vn->a.n; if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN); } - if (t_info == TI_TERNARY) + if (t_info == TI_TERNARY) /* "?" operator */ //TODO: why? t_info += PRECEDENCE(6); cn = vn->a.n->r.n = new_node(t_info); diff --git a/testsuite/awk.tests b/testsuite/awk.tests index 063084a1c..be25f6696 100755 --- a/testsuite/awk.tests +++ b/testsuite/awk.tests @@ -5,6 +5,7 @@ . ./testing.sh +sq="'" # testing "description" "command" "result" "infile" "stdin" testing "awk -F case 0" "awk -F '[#]' '{ print NF }'" "" "" "" @@ -479,12 +480,6 @@ testing 'awk backslash+newline eaten with no trace' \ "Hello world\n" \ '' '' -testing 'awk assign while test' \ - "awk '\$1==\$1=\"foo\" {print \$1}'" \ - "foo\n" \ - "" \ - "foo" - # User-supplied bug (SEGV) example, was causing use-after-realloc testing 'awk assign while assign' \ "awk '\$5=\$\$5=\$0'; echo \$?" \ @@ -543,16 +538,30 @@ testing 'awk assign while assign' \ # If field separator FS=' ' (default), fields are split only on # space or tab or linefeed, NOT other whitespace. testing 'awk does not split on CR (char 13)' \ - "awk '{ \$1=\$0; print }'" \ + 'awk '$sq'{ $1=$0; print }'$sq \ 'word1 word2 word3\r word2 word3\r\n' \ '' 'word1 word2 word3\r' -testing "awk = has higher precedence than == (despite what gawk manpage claims)" \ - "awk 'BEGIN { v=1; print 2==v; print 2==v=2; print v; print v=3==3; print v}'" \ - '0\n1\n2\n1\n3\n' \ +# No, it seems a bug in gawk parser. +#testing "awk = has higher precedence than == (despite what gawk manpage claims)" \ +# "awk 'BEGIN { v=1; print 2==v; print 2==v=2; print v; print v=3==3; print v}'" \ +# '0\n1\n2\n1\n3\n' \ +# '' '' +# +#testing 'awk assign while test' \ +# 'awk '$sq'$1==$1="foo" {print $1}'$sq \ +# "foo\n" \ +# "" \ +# "foo" + +testing "awk = and ?: precedence" \ + 'awk '$sq'BEGIN { a=0?"bug":"ok"; print a}'$sq \ + 'ok\n' \ '' '' -sq="'" +# TODO: gawk can do this: awk 'BEGIN { u=v=w=1; print u=0?v=4:w=5; print u,v,w}' +# and even this: u=0?v=4?5:6:w=7?8:9 + testing 'awk gensub backslashes \' \ 'awk '$sq'BEGIN { s="\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ 's=\\