1/**************************************************************** 2Copyright (C) Lucent Technologies 1997 3All Rights Reserved 4 5Permission to use, copy, modify, and distribute this software and 6its documentation for any purpose and without fee is hereby 7granted, provided that the above copyright notice appear in all 8copies and that both that the copyright notice and this 9permission notice and warranty disclaimer appear in supporting 10documentation, and that the name Lucent Technologies or any of 11its entities not be used in advertising or publicity pertaining 12to distribution of the software without specific, written prior 13permission. 14 15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22THIS SOFTWARE. 23****************************************************************/ 24 25%{ 26#include <stdio.h> 27#include <string.h> 28#include "awk.h" 29 30void checkdup(Node *list, Cell *item); 31int yywrap(void) { return(1); } 32 33Node *beginloc = 0; 34Node *endloc = 0; 35bool infunc = false; /* = true if in arglist or body of func */ 36int inloop = 0; /* >= 1 if in while, for, do; can't be bool, since loops can next */ 37char *curfname = 0; /* current function name */ 38Node *arglist = 0; /* list of args for current function */ 39%} 40 41%union { 42 Node *p; 43 Cell *cp; 44 int i; 45 char *s; 46} 47 48%token <i> FIRSTTOKEN /* must be first */ 49%token <p> PROGRAM PASTAT PASTAT2 XBEGIN XEND 50%token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']' 51%token <i> ARRAY 52%token <i> MATCH NOTMATCH MATCHOP 53%token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO 54%token <i> AND BOR APPEND EQ GE GT LE LT NE IN 55%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC 56%token <i> GENSUB SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE 57%token <i> ADD MINUS MULT DIVIDE MOD 58%token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ 59%token <i> PRINT PRINTF SPRINTF 60%token <p> ELSE INTEST CONDEXPR 61%token <i> POSTINCR PREINCR POSTDECR PREDECR 62%token <cp> VAR IVAR VARNF CALL NUMBER STRING 63%token <s> REGEXPR 64 65%type <p> pas pattern ppattern plist pplist patlist prarg term re 66%type <p> pa_pat pa_stat pa_stats 67%type <s> reg_expr 68%type <p> simple_stmt opt_simple_stmt stmt stmtlist 69%type <p> var varname funcname varlist 70%type <p> for if else while 71%type <i> do st 72%type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor 73%type <i> subop print 74%type <cp> string 75 76%right ASGNOP 77%right '?' 78%right ':' 79%left BOR 80%left AND 81%left GETLINE 82%nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|' 83%left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC 84%left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER 85%left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR 86%left REGEXPR VAR VARNF IVAR WHILE '(' 87%left CAT 88%left '+' '-' 89%left '*' '/' '%' 90%left NOT UMINUS UPLUS 91%right POWER 92%right DECR INCR 93%left INDIRECT 94%token LASTTOKEN /* must be last */ 95 96%% 97 98program: 99 pas { if (errorflag==0) 100 winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); } 101 | error { yyclearin; bracecheck(); SYNTAX("bailing out"); } 102 ; 103 104and: 105 AND | and NL 106 ; 107 108bor: 109 BOR | bor NL 110 ; 111 112comma: 113 ',' | comma NL 114 ; 115 116do: 117 DO | do NL 118 ; 119 120else: 121 ELSE | else NL 122 ; 123 124for: 125 FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt 126 { --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); } 127 | FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt 128 { --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); } 129 | FOR '(' varname IN varname rparen {inloop++;} stmt 130 { --inloop; $$ = stat3(IN, $3, makearr($5), $8); } 131 ; 132 133funcname: 134 VAR { setfname($1); } 135 | CALL { setfname($1); } 136 ; 137 138if: 139 IF '(' pattern rparen { $$ = notnull($3); } 140 ; 141 142lbrace: 143 '{' | lbrace NL 144 ; 145 146nl: 147 NL | nl NL 148 ; 149 150opt_nl: 151 /* empty */ { $$ = 0; } 152 | nl 153 ; 154 155opt_pst: 156 /* empty */ { $$ = 0; } 157 | pst 158 ; 159 160 161opt_simple_stmt: 162 /* empty */ { $$ = 0; } 163 | simple_stmt 164 ; 165 166pas: 167 opt_pst { $$ = 0; } 168 | opt_pst pa_stats opt_pst { $$ = $2; } 169 ; 170 171pa_pat: 172 pattern { $$ = notnull($1); } 173 ; 174 175pa_stat: 176 pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); } 177 | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); } 178 | pa_pat ',' opt_nl pa_pat { $$ = pa2stat($1, $4, stat2(PRINT, rectonode(), NIL)); } 179 | pa_pat ',' opt_nl pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $4, $6); } 180 | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); } 181 | XBEGIN lbrace stmtlist '}' 182 { beginloc = linkum(beginloc, $3); $$ = 0; } 183 | XEND lbrace stmtlist '}' 184 { endloc = linkum(endloc, $3); $$ = 0; } 185 | FUNC funcname '(' varlist rparen {infunc = true;} lbrace stmtlist '}' 186 { infunc = false; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; } 187 ; 188 189pa_stats: 190 pa_stat 191 | pa_stats opt_pst pa_stat { $$ = linkum($1, $3); } 192 ; 193 194patlist: 195 pattern 196 | patlist comma pattern { $$ = linkum($1, $3); } 197 ; 198 199ppattern: 200 var ASGNOP ppattern { $$ = op2($2, $1, $3); } 201 | ppattern '?' ppattern ':' ppattern %prec '?' 202 { $$ = op3(CONDEXPR, notnull($1), $3, $5); } 203 | ppattern bor ppattern %prec BOR 204 { $$ = op2(BOR, notnull($1), notnull($3)); } 205 | ppattern and ppattern %prec AND 206 { $$ = op2(AND, notnull($1), notnull($3)); } 207 | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); free($3); } 208 | ppattern MATCHOP ppattern 209 { if (constnode($3)) { 210 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); 211 free($3); 212 } else 213 $$ = op3($2, (Node *)1, $1, $3); } 214 | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } 215 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } 216 | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); } 217 | re 218 | term 219 ; 220 221pattern: 222 var ASGNOP pattern { $$ = op2($2, $1, $3); } 223 | pattern '?' pattern ':' pattern %prec '?' 224 { $$ = op3(CONDEXPR, notnull($1), $3, $5); } 225 | pattern bor pattern %prec BOR 226 { $$ = op2(BOR, notnull($1), notnull($3)); } 227 | pattern and pattern %prec AND 228 { $$ = op2(AND, notnull($1), notnull($3)); } 229 | pattern EQ pattern { $$ = op2($2, $1, $3); } 230 | pattern GE pattern { $$ = op2($2, $1, $3); } 231 | pattern GT pattern { $$ = op2($2, $1, $3); } 232 | pattern LE pattern { $$ = op2($2, $1, $3); } 233 | pattern LT pattern { $$ = op2($2, $1, $3); } 234 | pattern NE pattern { $$ = op2($2, $1, $3); } 235 | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); free($3); } 236 | pattern MATCHOP pattern 237 { if (constnode($3)) { 238 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); 239 free($3); 240 } else 241 $$ = op3($2, (Node *)1, $1, $3); } 242 | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } 243 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } 244 | pattern '|' GETLINE var { 245 if (safe) SYNTAX("cmd | getline is unsafe"); 246 else $$ = op3(GETLINE, $4, itonp($2), $1); } 247 | pattern '|' GETLINE { 248 if (safe) SYNTAX("cmd | getline is unsafe"); 249 else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); } 250 | pattern term %prec CAT { $$ = op2(CAT, $1, $2); } 251 | re 252 | term 253 ; 254 255plist: 256 pattern comma pattern { $$ = linkum($1, $3); } 257 | plist comma pattern { $$ = linkum($1, $3); } 258 ; 259 260pplist: 261 ppattern 262 | pplist comma ppattern { $$ = linkum($1, $3); } 263 ; 264 265prarg: 266 /* empty */ { $$ = rectonode(); } 267 | pplist 268 | '(' plist ')' { $$ = $2; } 269 ; 270 271print: 272 PRINT | PRINTF 273 ; 274 275pst: 276 NL | ';' | pst NL | pst ';' 277 ; 278 279rbrace: 280 '}' | rbrace NL 281 ; 282 283re: 284 reg_expr 285 { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); free($1); } 286 | NOT re { $$ = op1(NOT, notnull($2)); } 287 ; 288 289reg_expr: 290 '/' {startreg();} REGEXPR '/' { $$ = $3; } 291 ; 292 293rparen: 294 ')' | rparen NL 295 ; 296 297simple_stmt: 298 print prarg '|' term { 299 if (safe) SYNTAX("print | is unsafe"); 300 else $$ = stat3($1, $2, itonp($3), $4); } 301 | print prarg APPEND term { 302 if (safe) SYNTAX("print >> is unsafe"); 303 else $$ = stat3($1, $2, itonp($3), $4); } 304 | print prarg GT term { 305 if (safe) SYNTAX("print > is unsafe"); 306 else $$ = stat3($1, $2, itonp($3), $4); } 307 | print prarg { $$ = stat3($1, $2, NIL, NIL); } 308 | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); } 309 | DELETE varname { $$ = stat2(DELETE, makearr($2), 0); } 310 | pattern { $$ = exptostat($1); } 311 | error { yyclearin; SYNTAX("illegal statement"); } 312 ; 313 314st: 315 nl 316 | ';' opt_nl 317 ; 318 319stmt: 320 BREAK st { if (!inloop) SYNTAX("break illegal outside of loops"); 321 $$ = stat1(BREAK, NIL); } 322 | CONTINUE st { if (!inloop) SYNTAX("continue illegal outside of loops"); 323 $$ = stat1(CONTINUE, NIL); } 324 | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st 325 { $$ = stat2(DO, $3, notnull($7)); } 326 | EXIT pattern st { $$ = stat1(EXIT, $2); } 327 | EXIT st { $$ = stat1(EXIT, NIL); } 328 | for 329 | if stmt else stmt { $$ = stat3(IF, $1, $2, $4); } 330 | if stmt { $$ = stat3(IF, $1, $2, NIL); } 331 | lbrace stmtlist rbrace { $$ = $2; } 332 | NEXT st { if (infunc) 333 SYNTAX("next is illegal inside a function"); 334 $$ = stat1(NEXT, NIL); } 335 | NEXTFILE st { if (infunc) 336 SYNTAX("nextfile is illegal inside a function"); 337 $$ = stat1(NEXTFILE, NIL); } 338 | RETURN pattern st { $$ = stat1(RETURN, $2); } 339 | RETURN st { $$ = stat1(RETURN, NIL); } 340 | simple_stmt st 341 | while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); } 342 | ';' opt_nl { $$ = 0; } 343 ; 344 345stmtlist: 346 stmt 347 | stmtlist stmt { $$ = linkum($1, $2); } 348 ; 349 350subop: 351 SUB | GSUB 352 ; 353 354string: 355 STRING 356 | string STRING { $$ = catstr($1, $2); } 357 ; 358 359term: 360 term '/' ASGNOP term { $$ = op2(DIVEQ, $1, $4); } 361 | term '+' term { $$ = op2(ADD, $1, $3); } 362 | term '-' term { $$ = op2(MINUS, $1, $3); } 363 | term '*' term { $$ = op2(MULT, $1, $3); } 364 | term '/' term { $$ = op2(DIVIDE, $1, $3); } 365 | term '%' term { $$ = op2(MOD, $1, $3); } 366 | term POWER term { $$ = op2(POWER, $1, $3); } 367 | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); } 368 | '+' term %prec UMINUS { $$ = op1(UPLUS, $2); } 369 | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); } 370 | BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); } 371 | BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); } 372 | BLTIN { $$ = op2(BLTIN, itonp($1), rectonode()); } 373 | CALL '(' ')' { $$ = op2(CALL, celltonode($1,CVAR), NIL); } 374 | CALL '(' patlist ')' { $$ = op2(CALL, celltonode($1,CVAR), $3); } 375 | CLOSE term { $$ = op1(CLOSE, $2); } 376 | DECR var { $$ = op1(PREDECR, $2); } 377 | INCR var { $$ = op1(PREINCR, $2); } 378 | var DECR { $$ = op1(POSTDECR, $1); } 379 | var INCR { $$ = op1(POSTINCR, $1); } 380 | GENSUB '(' reg_expr comma pattern comma pattern ')' 381 { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, rectonode()); } 382 | GENSUB '(' pattern comma pattern comma pattern ')' 383 { if (constnode($3)) { 384 $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3), 1), $5, $7, rectonode()); 385 free($3); 386 } else 387 $$ = op5(GENSUB, (Node *)1, $3, $5, $7, rectonode()); 388 } 389 | GENSUB '(' reg_expr comma pattern comma pattern comma pattern ')' 390 { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, $9); } 391 | GENSUB '(' pattern comma pattern comma pattern comma pattern ')' 392 { if (constnode($3)) { 393 $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3),1), $5,$7,$9); 394 free($3); 395 } else 396 $$ = op5(GENSUB, (Node *)1, $3, $5, $7, $9); 397 } 398 | GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); } 399 | GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); } 400 | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); } 401 | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); } 402 | INDEX '(' pattern comma pattern ')' 403 { $$ = op2(INDEX, $3, $5); } 404 | INDEX '(' pattern comma reg_expr ')' 405 { SYNTAX("index() doesn't permit regular expressions"); 406 $$ = op2(INDEX, $3, (Node*)$5); } 407 | '(' pattern ')' { $$ = $2; } 408 | MATCHFCN '(' pattern comma reg_expr ')' 409 { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); free($5); } 410 | MATCHFCN '(' pattern comma pattern ')' 411 { if (constnode($5)) { 412 $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1)); 413 free($5); 414 } else 415 $$ = op3(MATCHFCN, (Node *)1, $3, $5); } 416 | NUMBER { $$ = celltonode($1, CCON); } 417 | SPLIT '(' pattern comma varname comma pattern ')' /* string */ 418 { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); } 419 | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */ 420 { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); free($7); } 421 | SPLIT '(' pattern comma varname ')' 422 { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */ 423 | SPRINTF '(' patlist ')' { $$ = op1($1, $3); } 424 | string { $$ = celltonode($1, CCON); } 425 | subop '(' reg_expr comma pattern ')' 426 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); free($3); } 427 | subop '(' pattern comma pattern ')' 428 { if (constnode($3)) { 429 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode()); 430 free($3); 431 } else 432 $$ = op4($1, (Node *)1, $3, $5, rectonode()); } 433 | subop '(' reg_expr comma pattern comma var ')' 434 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); free($3); } 435 | subop '(' pattern comma pattern comma var ')' 436 { if (constnode($3)) { 437 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7); 438 free($3); 439 } else 440 $$ = op4($1, (Node *)1, $3, $5, $7); } 441 | SUBSTR '(' pattern comma pattern comma pattern ')' 442 { $$ = op3(SUBSTR, $3, $5, $7); } 443 | SUBSTR '(' pattern comma pattern ')' 444 { $$ = op3(SUBSTR, $3, $5, NIL); } 445 | var 446 ; 447 448var: 449 varname 450 | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); } 451 | IVAR { $$ = op1(INDIRECT, celltonode($1, CVAR)); } 452 | INDIRECT term { $$ = op1(INDIRECT, $2); } 453 ; 454 455varlist: 456 /* nothing */ { arglist = $$ = 0; } 457 | VAR { arglist = $$ = celltonode($1,CVAR); } 458 | varlist comma VAR { 459 checkdup($1, $3); 460 arglist = $$ = linkum($1,celltonode($3,CVAR)); } 461 ; 462 463varname: 464 VAR { $$ = celltonode($1, CVAR); } 465 | ARG { $$ = op1(ARG, itonp($1)); } 466 | VARNF { $$ = op1(VARNF, (Node *) $1); } 467 ; 468 469 470while: 471 WHILE '(' pattern rparen { $$ = notnull($3); } 472 ; 473 474%% 475 476void setfname(Cell *p) 477{ 478 if (isarr(p)) 479 SYNTAX("%s is an array, not a function", p->nval); 480 else if (isfcn(p)) 481 SYNTAX("you can't define function %s more than once", p->nval); 482 curfname = p->nval; 483} 484 485int constnode(Node *p) 486{ 487 return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON; 488} 489 490char *strnode(Node *p) 491{ 492 return ((Cell *)(p->narg[0]))->sval; 493} 494 495Node *notnull(Node *n) 496{ 497 switch (n->nobj) { 498 case LE: case LT: case EQ: case NE: case GT: case GE: 499 case BOR: case AND: case NOT: 500 return n; 501 default: 502 return op2(NE, n, nullnode); 503 } 504} 505 506void checkdup(Node *vl, Cell *cp) /* check if name already in list */ 507{ 508 char *s = cp->nval; 509 for ( ; vl; vl = vl->nnext) { 510 if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) { 511 SYNTAX("duplicate argument %s", s); 512 break; 513 } 514 } 515} 516