1# This is a shell archive. Save it in a file, remove anything before 2# this line, and then unpack it by entering "sh file". Note, it may 3# create directories; files and directories will be owned by you and 4# have default permissions. 5# 6# This archive contains: 7# 8# cpp5.c 9# cpp6.c 10# 11echo x - cpp5.c 12sed 's/^X//' >cpp5.c << 'END-of-cpp5.c' 13X/* 14X * C P P 5 . C 15X * E x p r e s s i o n E v a l u a t i o n 16X * 17X * Edit History 18X * 31-Aug-84 MM USENET net.sources release 19X * 04-Oct-84 MM __LINE__ and __FILE__ must call ungetstring() 20X * so they work correctly with token concatenation. 21X * Added string formal recognition. 22X * 25-Oct-84 MM "Short-circuit" evaluate #if's so that we 23X * don't print unnecessary error messages for 24X * #if !defined(FOO) && FOO != 0 && 10 / FOO ... 25X * 31-Oct-84 ado/MM Added token concatenation 26X * 6-Nov-84 MM Split from #define stuff, added sizeof stuff 27X * 19-Nov-84 ado #if error returns TRUE for (sigh) compatibility 28X */ 29X 30X#include <stdio.h> 31X#include <ctype.h> 32X#include "cppdef.h" 33X#include "cpp.h" 34X 35X/* 36X * Evaluate an #if expression. 37X */ 38X 39Xstatic char *opname[] = { /* For debug and error messages */ 40X"end of expression", "val", "id", 41X "+", "-", "*", "/", "%", 42X "<<", ">>", "&", "|", "^", 43X "==", "!=", "<", "<=", ">=", ">", 44X "&&", "||", "?", ":", ",", 45X "unary +", "unary -", "~", "!", "(", ")", "(none)", 46X}; 47X 48X/* 49X * opdope[] has the operator precedence: 50X * Bits 51X * 7 Unused (so the value is always positive) 52X * 6-2 Precedence (000x .. 017x) 53X * 1-0 Binary op. flags: 54X * 01 The binop flag should be set/cleared when this op is seen. 55X * 10 The new value of the binop flag. 56X * Note: Expected, New binop 57X * constant 0 1 Binop, end, or ) should follow constants 58X * End of line 1 0 End may not be preceeded by an operator 59X * binary 1 0 Binary op follows a value, value follows. 60X * unary 0 0 Unary op doesn't follow a value, value follows 61X * ( 0 0 Doesn't follow value, value or unop follows 62X * ) 1 1 Follows value. Op follows. 63X */ 64X 65Xstatic char opdope[OP_MAX] = { 66X 0001, /* End of expression */ 67X 0002, /* Digit */ 68X 0000, /* Letter (identifier) */ 69X 0141, 0141, 0151, 0151, 0151, /* ADD, SUB, MUL, DIV, MOD */ 70X 0131, 0131, 0101, 0071, 0071, /* ASL, ASR, AND, OR, XOR */ 71X 0111, 0111, 0121, 0121, 0121, 0121, /* EQ, NE, LT, LE, GE, GT */ 72X 0061, 0051, 0041, 0041, 0031, /* ANA, ORO, QUE, COL, CMA */ 73X/* 74X * Unary op's follow 75X */ 76X 0160, 0160, 0160, 0160, /* NEG, PLU, COM, NOT */ 77X 0170, 0013, 0023, /* LPA, RPA, END */ 78X}; 79X/* 80X * OP_QUE and OP_RPA have alternate precedences: 81X */ 82X#define OP_RPA_PREC 0013 83X#define OP_QUE_PREC 0034 84X 85X/* 86X * S_ANDOR and S_QUEST signal "short-circuit" boolean evaluation, so that 87X * #if FOO != 0 && 10 / FOO ... 88X * doesn't generate an error message. They are stored in optab.skip. 89X */ 90X#define S_ANDOR 2 91X#define S_QUEST 1 92X 93Xtypedef struct optab { 94X char op; /* Operator */ 95X char prec; /* Its precedence */ 96X char skip; /* Short-circuit: TRUE to skip */ 97X} OPTAB; 98Xstatic int evalue; /* Current value from evallex() */ 99X 100X#ifdef nomacargs 101XFILE_LOCAL int 102Xisbinary(op) 103Xregister int op; 104X{ 105X return (op >= FIRST_BINOP && op <= LAST_BINOP); 106X} 107X 108XFILE_LOCAL int 109Xisunary(op) 110Xregister int op; 111X{ 112X return (op >= FIRST_UNOP && op <= LAST_UNOP); 113X} 114X#else 115X#define isbinary(op) (op >= FIRST_BINOP && op <= LAST_BINOP) 116X#define isunary(op) (op >= FIRST_UNOP && op <= LAST_UNOP) 117X#endif 118X 119X/* 120X * The following definitions are used to specify basic variable sizes. 121X */ 122X 123X#ifndef S_CHAR 124X#define S_CHAR (sizeof (char)) 125X#endif 126X#ifndef S_SINT 127X#define S_SINT (sizeof (short int)) 128X#endif 129X#ifndef S_INT 130X#define S_INT (sizeof (int)) 131X#endif 132X#ifndef S_LINT 133X#define S_LINT (sizeof (long int)) 134X#endif 135X#ifndef S_FLOAT 136X#define S_FLOAT (sizeof (float)) 137X#endif 138X#ifndef S_DOUBLE 139X#define S_DOUBLE (sizeof (double)) 140X#endif 141X#ifndef S_PCHAR 142X#define S_PCHAR (sizeof (char *)) 143X#endif 144X#ifndef S_PSINT 145X#define S_PSINT (sizeof (short int *)) 146X#endif 147X#ifndef S_PINT 148X#define S_PINT (sizeof (int *)) 149X#endif 150X#ifndef S_PLINT 151X#define S_PLINT (sizeof (long int *)) 152X#endif 153X#ifndef S_PFLOAT 154X#define S_PFLOAT (sizeof (float *)) 155X#endif 156X#ifndef S_PDOUBLE 157X#define S_PDOUBLE (sizeof (double *)) 158X#endif 159X#ifndef S_PFPTR 160X#define S_PFPTR (sizeof (int (*)())) 161X#endif 162X 163Xtypedef struct types { 164X short type; /* This is the bit if */ 165X char *name; /* this is the token word */ 166X} TYPES; 167X 168Xstatic TYPES basic_types[] = { 169X { T_CHAR, "char", }, 170X { T_INT, "int", }, 171X { T_FLOAT, "float", }, 172X { T_DOUBLE, "double", }, 173X { T_SHORT, "short", }, 174X { T_LONG, "long", }, 175X { T_SIGNED, "signed", }, 176X { T_UNSIGNED, "unsigned", }, 177X { 0, NULL, }, /* Signal end */ 178X}; 179X 180X/* 181X * Test_table[] is used to test for illegal combinations. 182X */ 183Xstatic short test_table[] = { 184X T_FLOAT | T_DOUBLE | T_LONG | T_SHORT, 185X T_FLOAT | T_DOUBLE | T_CHAR | T_INT, 186X T_FLOAT | T_DOUBLE | T_SIGNED | T_UNSIGNED, 187X T_LONG | T_SHORT | T_CHAR, 188X 0 /* end marker */ 189X}; 190X 191X/* 192X * The order of this table is important -- it is also referenced by 193X * the command line processor to allow run-time overriding of the 194X * built-in size values. The order must not be changed: 195X * char, short, int, long, float, double (func pointer) 196X */ 197XSIZES size_table[] = { 198X { T_CHAR, S_CHAR, S_PCHAR }, /* char */ 199X { T_SHORT, S_SINT, S_PSINT }, /* short int */ 200X { T_INT, S_INT, S_PINT }, /* int */ 201X { T_LONG, S_LINT, S_PLINT }, /* long */ 202X { T_FLOAT, S_FLOAT, S_PFLOAT }, /* float */ 203X { T_DOUBLE, S_DOUBLE, S_PDOUBLE }, /* double */ 204X { T_FPTR, 0, S_PFPTR }, /* int (*()) */ 205X { 0, 0, 0 }, /* End of table */ 206X}; 207X 208Xint 209Xeval() 210X/* 211X * Evaluate an expression. Straight-forward operator precedence. 212X * This is called from control() on encountering an #if statement. 213X * It calls the following routines: 214X * evallex Lexical analyser -- returns the type and value of 215X * the next input token. 216X * evaleval Evaluate the current operator, given the values on 217X * the value stack. Returns a pointer to the (new) 218X * value stack. 219X * For compatiblity with older cpp's, this return returns 1 (TRUE) 220X * if a syntax error is detected. 221X */ 222X{ 223X register int op; /* Current operator */ 224X register int *valp; /* -> value vector */ 225X register OPTAB *opp; /* Operator stack */ 226X int prec; /* Op precedence */ 227X int binop; /* Set if binary op. needed */ 228X int op1; /* Operand from stack */ 229X int skip; /* For short-circuit testing */ 230X int value[NEXP]; /* Value stack */ 231X OPTAB opstack[NEXP]; /* Operand stack */ 232X extern int *evaleval(); /* Does actual evaluation */ 233X 234X valp = value; 235X opp = opstack; 236X opp->op = OP_END; /* Mark bottom of stack */ 237X opp->prec = opdope[OP_END]; /* And its precedence */ 238X opp->skip = 0; /* Not skipping now */ 239X binop = 0; 240Xagain: ; 241X#ifdef DEBUG_EVAL 242X printf("In #if at again: skip = %d, binop = %d, line is: %s", 243X opp->skip, binop, infile->bptr); 244X#endif 245X if ((op = evallex(opp->skip)) == OP_SUB && binop == 0) 246X op = OP_NEG; /* Unary minus */ 247X else if (op == OP_ADD && binop == 0) 248X op = OP_PLU; /* Unary plus */ 249X else if (op == OP_FAIL) 250X return (1); /* Error in evallex */ 251X#ifdef DEBUG_EVAL 252X printf("op = %s, opdope = %03o, binop = %d, skip = %d\n", 253X opname[op], opdope[op], binop, opp->skip); 254X#endif 255X if (op == DIG) { /* Value? */ 256X if (binop != 0) { 257X cerror("misplaced constant in #if", NULLST); 258X return (1); 259X } 260X else if (valp >= &value[NEXP-1]) { 261X cerror("#if value stack overflow", NULLST); 262X return (1); 263X } 264X else { 265X#ifdef DEBUG_EVAL 266X printf("pushing %d onto value stack[%d]\n", 267X evalue, valp - value); 268X#endif 269X *valp++ = evalue; 270X binop = 1; 271X } 272X goto again; 273X } 274X else if (op > OP_END) { 275X cerror("Illegal #if line", NULLST); 276X return (1); 277X } 278X prec = opdope[op]; 279X if (binop != (prec & 1)) { 280X cerror("Operator %s in incorrect context", opname[op]); 281X return (1); 282X } 283X binop = (prec & 2) >> 1; 284X for (;;) { 285X#ifdef DEBUG_EVAL 286X printf("op %s, prec %d., stacked op %s, prec %d, skip %d\n", 287X opname[op], prec, opname[opp->op], opp->prec, opp->skip); 288X#endif 289X if (prec > opp->prec) { 290X if (op == OP_LPA) 291X prec = OP_RPA_PREC; 292X else if (op == OP_QUE) 293X prec = OP_QUE_PREC; 294X op1 = opp->skip; /* Save skip for test */ 295X /* 296X * Push operator onto op. stack. 297X */ 298X opp++; 299X if (opp >= &opstack[NEXP]) { 300X cerror("expression stack overflow at op \"%s\"", 301X opname[op]); 302X return (1); 303X } 304X opp->op = op; 305X opp->prec = prec; 306X skip = (valp[-1] != 0); /* Short-circuit tester */ 307X /* 308X * Do the short-circuit stuff here. Short-circuiting 309X * stops automagically when operators are evaluated. 310X */ 311X if ((op == OP_ANA && !skip) 312X || (op == OP_ORO && skip)) 313X opp->skip = S_ANDOR; /* And/or skip starts */ 314X else if (op == OP_QUE) /* Start of ?: operator */ 315X opp->skip = (op1 & S_ANDOR) | ((!skip) ? S_QUEST : 0); 316X else if (op == OP_COL) { /* : inverts S_QUEST */ 317X opp->skip = (op1 & S_ANDOR) 318X | (((op1 & S_QUEST) != 0) ? 0 : S_QUEST); 319X } 320X else { /* Other ops leave */ 321X opp->skip = op1; /* skipping unchanged. */ 322X } 323X#ifdef DEBUG_EVAL 324X printf("stacking %s, valp[-1] == %d at %s", 325X opname[op], valp[-1], infile->bptr); 326X dumpstack(opstack, opp, value, valp); 327X#endif 328X goto again; 329X } 330X /* 331X * Pop operator from op. stack and evaluate it. 332X * End of stack and '(' are specials. 333X */ 334X skip = opp->skip; /* Remember skip value */ 335X switch ((op1 = opp->op)) { /* Look at stacked op */ 336X case OP_END: /* Stack end marker */ 337X if (op == OP_EOE) 338X return (valp[-1]); /* Finished ok. */ 339X goto again; /* Read another op. */ 340X 341X case OP_LPA: /* ( on stack */ 342X if (op != OP_RPA) { /* Matches ) on input */ 343X cerror("unbalanced paren's, op is \"%s\"", opname[op]); 344X return (1); 345X } 346X opp--; /* Unstack it */ 347X /* goto again; -- Fall through */ 348X 349X case OP_QUE: 350X goto again; /* Evaluate true expr. */ 351X 352X case OP_COL: /* : on stack. */ 353X opp--; /* Unstack : */ 354X if (opp->op != OP_QUE) { /* Matches ? on stack? */ 355X cerror("Misplaced '?' or ':', previous operator is %s", 356X opname[opp->op]); 357X return (1); 358X } 359X /* 360X * Evaluate op1. 361X */ 362X default: /* Others: */ 363X opp--; /* Unstack the operator */ 364X#ifdef DEBUG_EVAL 365X printf("Stack before evaluation of %s\n", opname[op1]); 366X dumpstack(opstack, opp, value, valp); 367X#endif 368X valp = evaleval(valp, op1, skip); 369X#ifdef DEBUG_EVAL 370X printf("Stack after evaluation\n"); 371X dumpstack(opstack, opp, value, valp); 372X#endif 373X } /* op1 switch end */ 374X } /* Stack unwind loop */ 375X} 376X 377XFILE_LOCAL int 378Xevallex(skip) 379Xint skip; /* TRUE if short-circuit evaluation */ 380X/* 381X * Return next eval operator or value. Called from eval(). It 382X * calls a special-purpose routines for 'char' strings and 383X * numeric values: 384X * evalchar called to evaluate 'x' 385X * evalnum called to evaluate numbers. 386X */ 387X{ 388X register int c, c1, t; 389X 390Xagain: do { /* Collect the token */ 391X c = skipws(); 392X if ((c = macroid(c)) == EOF_CHAR || c == '\n') { 393X unget(); 394X return (OP_EOE); /* End of expression */ 395X } 396X } while ((t = type[c]) == LET && catenate()); 397X if (t == INV) { /* Total nonsense */ 398X if (!skip) { 399X if (isascii(c) && isprint(c)) 400X cierror("illegal character '%c' in #if", c); 401X else 402X cierror("illegal character (%d decimal) in #if", c); 403X } 404X return (OP_FAIL); 405X } 406X else if (t == QUO) { /* ' or " */ 407X if (c == '\'') { /* Character constant */ 408X evalue = evalchar(skip); /* Somewhat messy */ 409X#ifdef DEBUG_EVAL 410X printf("evalchar returns %d.\n", evalue); 411X#endif 412X return (DIG); /* Return a value */ 413X } 414X cerror("Can't use a string in an #if", NULLST); 415X return (OP_FAIL); 416X } 417X else if (t == LET) { /* ID must be a macro */ 418X if (streq(token, "defined")) { /* Or defined name */ 419X c1 = c = skipws(); 420X if (c == '(') /* Allow defined(name) */ 421X c = skipws(); 422X if (type[c] == LET) { 423X evalue = (lookid(c) != NULL); 424X if (c1 != '(' /* Need to balance */ 425X || skipws() == ')') /* Did we balance? */ 426X return (DIG); /* Parsed ok */ 427X } 428X cerror("Bad #if ... defined() syntax", NULLST); 429X return (OP_FAIL); 430X } 431X else if (streq(token, "sizeof")) /* New sizeof hackery */ 432X return (dosizeof()); /* Gets own routine */ 433X /* 434X * The Draft ANSI C Standard says that an undefined symbol 435X * in an #if has the value zero. We are a bit pickier, 436X * warning except where the programmer was careful to write 437X * #if defined(foo) ? foo : 0 438X */ 439X#ifdef VERBOSE 440X if (!skip) 441X cwarn("undefined symbol \"%s\" in #if, 0 used", token); 442X#endif 443X evalue = 0; 444X return (DIG); 445X } 446X else if (t == DIG) { /* Numbers are harder */ 447X evalue = evalnum(c); 448X#ifdef DEBUG_EVAL 449X printf("evalnum returns %d.\n", evalue); 450X#endif 451X } 452X else if (strchr("!=<>&|\\", c) != NULL) { 453X /* 454X * Process a possible multi-byte lexeme. 455X */ 456X c1 = cget(); /* Peek at next char */ 457X switch (c) { 458X case '!': 459X if (c1 == '=') 460X return (OP_NE); 461X break; 462X 463X case '=': 464X if (c1 != '=') { /* Can't say a=b in #if */ 465X unget(); 466X cerror("= not allowed in #if", NULLST); 467X return (OP_FAIL); 468X } 469X return (OP_EQ); 470X 471X case '>': 472X case '<': 473X if (c1 == c) 474X return ((c == '<') ? OP_ASL : OP_ASR); 475X else if (c1 == '=') 476X return ((c == '<') ? OP_LE : OP_GE); 477X break; 478X 479X case '|': 480X case '&': 481X if (c1 == c) 482X return ((c == '|') ? OP_ORO : OP_ANA); 483X break; 484X 485X case '\\': 486X if (c1 == '\n') /* Multi-line if */ 487X goto again; 488X cerror("Unexpected \\ in #if", NULLST); 489X return (OP_FAIL); 490X } 491X unget(); 492X } 493X return (t); 494X} 495X 496XFILE_LOCAL int 497Xdosizeof() 498X/* 499X * Process the sizeof (basic type) operation in an #if string. 500X * Sets evalue to the size and returns 501X * DIG success 502X * OP_FAIL bad parse or something. 503X */ 504X{ 505X register int c; 506X register TYPES *tp; 507X register SIZES *sizp; 508X register short *testp; 509X short typecode; 510X 511X if ((c = skipws()) != '(') 512X goto nogood; 513X /* 514X * Scan off the tokens. 515X */ 516X typecode = 0; 517X while ((c = skipws())) { 518X if ((c = macroid(c)) == EOF_CHAR || c == '\n') 519X goto nogood; /* End of line is a bug */ 520X else if (c == '(') { /* thing (*)() func ptr */ 521X if (skipws() == '*' 522X && skipws() == ')') { /* We found (*) */ 523X if (skipws() != '(') /* Let () be optional */ 524X unget(); 525X else if (skipws() != ')') 526X goto nogood; 527X typecode |= T_FPTR; /* Function pointer */ 528X } 529X else { /* Junk is a bug */ 530X goto nogood; 531X } 532X } 533X else if (type[c] != LET) /* Exit if not a type */ 534X break; 535X else if (!catenate()) { /* Maybe combine tokens */ 536X /* 537X * Look for this unexpandable token in basic_types. 538X * The code accepts "int long" as well as "long int" 539X * which is a minor bug as bugs go (and one shared with 540X * a lot of C compilers). 541X */ 542X for (tp = basic_types; tp->name != NULLST; tp++) { 543X if (streq(token, tp->name)) 544X break; 545X } 546X if (tp->name == NULLST) { 547X cerror("#if sizeof, unknown type \"%s\"", token); 548X return (OP_FAIL); 549X } 550X typecode |= tp->type; /* Or in the type bit */ 551X } 552X } 553X /* 554X * We are at the end of the type scan. Chew off '*' if necessary. 555X */ 556X if (c == '*') { 557X typecode |= T_PTR; 558X c = skipws(); 559X } 560X if (c == ')') { /* Last syntax check */ 561X for (testp = test_table; *testp != 0; testp++) { 562X if (!bittest(typecode & *testp)) { 563X cerror("#if ... sizeof: illegal type combination", NULLST); 564X return (OP_FAIL); 565X } 566X } 567X /* 568X * We assume that all function pointers are the same size: 569X * sizeof (int (*)()) == sizeof (float (*)()) 570X * We assume that signed and unsigned don't change the size: 571X * sizeof (signed int) == (sizeof unsigned int) 572X */ 573X if ((typecode & T_FPTR) != 0) /* Function pointer */ 574X typecode = T_FPTR | T_PTR; 575X else { /* Var or var * datum */ 576X typecode &= ~(T_SIGNED | T_UNSIGNED); 577X if ((typecode & (T_SHORT | T_LONG)) != 0) 578X typecode &= ~T_INT; 579X } 580X if ((typecode & ~T_PTR) == 0) { 581X cerror("#if sizeof() error, no type specified", NULLST); 582X return (OP_FAIL); 583X } 584X /* 585X * Exactly one bit (and possibly T_PTR) may be set. 586X */ 587X for (sizp = size_table; sizp->bits != 0; sizp++) { 588X if ((typecode & ~T_PTR) == sizp->bits) { 589X evalue = ((typecode & T_PTR) != 0) 590X ? sizp->psize : sizp->size; 591X return (DIG); 592X } 593X } /* We shouldn't fail */ 594X cierror("#if ... sizeof: bug, unknown type code 0x%x", typecode); 595X return (OP_FAIL); 596X } 597X 598Xnogood: unget(); 599X cerror("#if ... sizeof() syntax error", NULLST); 600X return (OP_FAIL); 601X} 602X 603XFILE_LOCAL int 604Xbittest(value) 605X/* 606X * TRUE if value is zero or exactly one bit is set in value. 607X */ 608X{ 609X#if (4096 & ~(-4096)) == 0 610X return ((value & ~(-value)) == 0); 611X#else 612X /* 613X * Do it the hard way (for non 2's complement machines) 614X */ 615X return (value == 0 || value ^ (value - 1) == (value * 2 - 1)); 616X#endif 617X} 618X 619XFILE_LOCAL int 620Xevalnum(c) 621Xregister int c; 622X/* 623X * Expand number for #if lexical analysis. Note: evalnum recognizes 624X * the unsigned suffix, but only returns a signed int value. 625X */ 626X{ 627X register int value; 628X register int base; 629X register int c1; 630X 631X if (c != '0') 632X base = 10; 633X else if ((c = cget()) == 'x' || c == 'X') { 634X base = 16; 635X c = cget(); 636X } 637X else base = 8; 638X value = 0; 639X for (;;) { 640X c1 = c; 641X if (isascii(c) && isupper(c1)) 642X c1 = tolower(c1); 643X if (c1 >= 'a') 644X c1 -= ('a' - 10); 645X else c1 -= '0'; 646X if (c1 < 0 || c1 >= base) 647X break; 648X value *= base; 649X value += c1; 650X c = cget(); 651X } 652X if (c == 'u' || c == 'U') /* Unsigned nonsense */ 653X c = cget(); 654X unget(); 655X return (value); 656X} 657X 658XFILE_LOCAL int 659Xevalchar(skip) 660Xint skip; /* TRUE if short-circuit evaluation */ 661X/* 662X * Get a character constant 663X */ 664X{ 665X register int c; 666X register int value; 667X register int count; 668X 669X instring = TRUE; 670X if ((c = cget()) == '\\') { 671X switch ((c = cget())) { 672X case 'a': /* New in Standard */ 673X#if ('a' == '\a' || '\a' == ALERT) 674X value = ALERT; /* Use predefined value */ 675X#else 676X value = '\a'; /* Use compiler's value */ 677X#endif 678X break; 679X 680X case 'b': 681X value = '\b'; 682X break; 683X 684X case 'f': 685X value = '\f'; 686X break; 687X 688X case 'n': 689X value = '\n'; 690X break; 691X 692X case 'r': 693X value = '\r'; 694X break; 695X 696X case 't': 697X value = '\t'; 698X break; 699X 700X case 'v': /* New in Standard */ 701X#if ('v' == '\v' || '\v' == VT) 702X value = VT; /* Use predefined value */ 703X#else 704X value = '\v'; /* Use compiler's value */ 705X#endif 706X break; 707X 708X case 'x': /* '\xFF' */ 709X count = 3; 710X value = 0; 711X while ((((c = get()) >= '0' && c <= '9') 712X || (c >= 'a' && c <= 'f') 713X || (c >= 'A' && c <= 'F')) 714X && (--count >= 0)) { 715X value *= 16; 716X value += (c <= '9') ? (c - '0') : ((c & 0xF) + 9); 717X } 718X unget(); 719X break; 720X 721X default: 722X if (c >= '0' && c <= '7') { 723X count = 3; 724X value = 0; 725X while (c >= '0' && c <= '7' && --count >= 0) { 726X value *= 8; 727X value += (c - '0'); 728X c = get(); 729X } 730X unget(); 731X } 732X else value = c; 733X break; 734X } 735X } 736X else if (c == '\'') 737X value = 0; 738X else value = c; 739X /* 740X * We warn on multi-byte constants and try to hack 741X * (big|little)endian machines. 742X */ 743X#if BIG_ENDIAN 744X count = 0; 745X#endif 746X while ((c = get()) != '\'' && c != EOF_CHAR && c != '\n') { 747X if (!skip) 748X ciwarn("multi-byte constant '%c' isn't portable", c); 749X#if BIG_ENDIAN 750X count += BITS_CHAR; 751X value += (c << count); 752X#else 753X value <<= BITS_CHAR; 754X value += c; 755X#endif 756X } 757X instring = FALSE; 758X return (value); 759X} 760X 761XFILE_LOCAL int * 762Xevaleval(valp, op, skip) 763Xregister int *valp; 764Xint op; 765Xint skip; /* TRUE if short-circuit evaluation */ 766X/* 767X * Apply the argument operator to the data on the value stack. 768X * One or two values are popped from the value stack and the result 769X * is pushed onto the value stack. 770X * 771X * OP_COL is a special case. 772X * 773X * evaleval() returns the new pointer to the top of the value stack. 774X */ 775X{ 776X register int v1, v2; 777X 778X if (isbinary(op)) 779X v2 = *--valp; 780X v1 = *--valp; 781X#ifdef DEBUG_EVAL 782X printf("%s op %s", (isbinary(op)) ? "binary" : "unary", 783X opname[op]); 784X if (isbinary(op)) 785X printf(", v2 = %d.", v2); 786X printf(", v1 = %d.\n", v1); 787X#endif 788X switch (op) { 789X case OP_EOE: 790X break; 791X 792X case OP_ADD: 793X v1 += v2; 794X break; 795X 796X case OP_SUB: 797X v1 -= v2; 798X break; 799X 800X case OP_MUL: 801X v1 *= v2; 802X break; 803X 804X case OP_DIV: 805X case OP_MOD: 806X if (v2 == 0) { 807X if (!skip) { 808X cwarn("%s by zero in #if, zero result assumed", 809X (op == OP_DIV) ? "divide" : "mod"); 810X } 811X v1 = 0; 812X } 813X else if (op == OP_DIV) 814X v1 /= v2; 815X else 816X v1 %= v2; 817X break; 818X 819X case OP_ASL: 820X v1 <<= v2; 821X break; 822X 823X case OP_ASR: 824X v1 >>= v2; 825X break; 826X 827X case OP_AND: 828X v1 &= v2; 829X break; 830X 831X case OP_OR: 832X v1 |= v2; 833X break; 834X 835X case OP_XOR: 836X v1 ^= v2; 837X break; 838X 839X case OP_EQ: 840X v1 = (v1 == v2); 841X break; 842X 843X case OP_NE: 844X v1 = (v1 != v2); 845X break; 846X 847X case OP_LT: 848X v1 = (v1 < v2); 849X break; 850X 851X case OP_LE: 852X v1 = (v1 <= v2); 853X break; 854X 855X case OP_GE: 856X v1 = (v1 >= v2); 857X break; 858X 859X case OP_GT: 860X v1 = (v1 > v2); 861X break; 862X 863X case OP_ANA: 864X v1 = (v1 && v2); 865X break; 866X 867X case OP_ORO: 868X v1 = (v1 || v2); 869X break; 870X 871X case OP_COL: 872X /* 873X * v1 has the "true" value, v2 the "false" value. 874X * The top of the value stack has the test. 875X */ 876X v1 = (*--valp) ? v1 : v2; 877X break; 878X 879X case OP_NEG: 880X v1 = (-v1); 881X break; 882X 883X case OP_PLU: 884X break; 885X 886X case OP_COM: 887X v1 = ~v1; 888X break; 889X 890X case OP_NOT: 891X v1 = !v1; 892X break; 893X 894X default: 895X cierror("#if bug, operand = %d.", op); 896X v1 = 0; 897X } 898X *valp++ = v1; 899X return (valp); 900X} 901X 902X#ifdef DEBUG_EVAL 903Xdumpstack(opstack, opp, value, valp) 904XOPTAB opstack[NEXP]; /* Operand stack */ 905Xregister OPTAB *opp; /* Operator stack */ 906Xint value[NEXP]; /* Value stack */ 907Xregister int *valp; /* -> value vector */ 908X{ 909X printf("index op prec skip name -- op stack at %s", infile->bptr); 910X while (opp > opstack) { 911X printf(" [%2d] %2d %03o %d %s\n", opp - opstack, 912X opp->op, opp->prec, opp->skip, opname[opp->op]); 913X opp--; 914X } 915X while (--valp >= value) { 916X printf("value[%d] = %d\n", (valp - value), *valp); 917X } 918X} 919X#endif 920X 921END-of-cpp5.c 922echo x - cpp6.c 923sed 's/^X//' >cpp6.c << 'END-of-cpp6.c' 924X/* 925X * C P P 6 . C 926X * S u p p o r t R o u t i n e s 927X * 928X * Edit History 929X * 25-May-84 MM Added 8-bit support to type table. 930X * 30-May-84 ARF sharp() should output filename in quotes 931X * 02-Aug-84 MM Newline and #line hacking. sharp() now in cpp1.c 932X * 31-Aug-84 MM USENET net.sources release 933X * 11-Sep-84 ado/MM Keepcomments, also line number pathological 934X * 12-Sep-84 ado/MM bug if comment changes to space and we unget later. 935X * 03-Oct-84 gkr/MM Fixed scannumber bug for '.e' (as in struct.element). 936X * 04-Oct-84 MM Added ungetstring() for token concatenation 937X * 08-Oct-84 MM Yet another attack on number scanning 938X * 31-Oct-84 ado Parameterized $ in identifiers 939X * 2-Nov-84 MM Token concatenation is messier than I thought 940X * 6-Dec-84 MM \<nl> is everywhere invisible. 941X */ 942X 943X#include <stdio.h> 944X#include <ctype.h> 945X#include "cppdef.h" 946X#include "cpp.h" 947X 948X/* 949X * skipnl() skips over input text to the end of the line. 950X * skipws() skips over "whitespace" (spaces or tabs), but 951X * not skip over the end of the line. It skips over 952X * TOK_SEP, however (though that shouldn't happen). 953X * scanid() reads the next token (C identifier) into token[]. 954X * The caller has already read the first character of 955X * the identifier. Unlike macroid(), the token is 956X * never expanded. 957X * macroid() reads the next token (C identifier) into token[]. 958X * If it is a #defined macro, it is expanded, and 959X * macroid() returns TRUE, otherwise, FALSE. 960X * catenate() Does the dirty work of token concatenation, TRUE if it did. 961X * scanstring() Reads a string from the input stream, calling 962X * a user-supplied function for each character. 963X * This function may be output() to write the 964X * string to the output file, or save() to save 965X * the string in the work buffer. 966X * scannumber() Reads a C numeric constant from the input stream, 967X * calling the user-supplied function for each 968X * character. (output() or save() as noted above.) 969X * save() Save one character in the work[] buffer. 970X * savestring() Saves a string in malloc() memory. 971X * getfile() Initialize a new FILEINFO structure, called when 972X * #include opens a new file, or a macro is to be 973X * expanded. 974X * getmem() Get a specified number of bytes from malloc memory. 975X * output() Write one character to stdout (calling putchar) -- 976X * implemented as a function so its address may be 977X * passed to scanstring() and scannumber(). 978X * lookid() Scans the next token (identifier) from the input 979X * stream. Looks for it in the #defined symbol table. 980X * Returns a pointer to the definition, if found, or NULL 981X * if not present. The identifier is stored in token[]. 982X * defnedel() Define enter/delete subroutine. Updates the 983X * symbol table. 984X * get() Read the next byte from the current input stream, 985X * handling end of (macro/file) input and embedded 986X * comments appropriately. Note that the global 987X * instring is -- essentially -- a parameter to get(). 988X * cget() Like get(), but skip over TOK_SEP. 989X * unget() Push last gotten character back on the input stream. 990X * cerror(), cwarn(), cfatal(), cierror(), ciwarn() 991X * These routines format an print messages to the user. 992X * cerror & cwarn take a format and a single string argument. 993X * cierror & ciwarn take a format and a single int (char) argument. 994X * cfatal takes a format and a single string argument. 995X */ 996X 997X/* 998X * This table must be rewritten for a non-Ascii machine. 999X * 1000X * Note that several "non-visible" characters have special meaning: 1001X * Hex 1D DEF_MAGIC -- a flag to prevent #define recursion. 1002X * Hex 1E TOK_SEP -- a delimiter for token concatenation 1003X * Hex 1F COM_SEP -- a zero-width whitespace for comment concatenation 1004X */ 1005X#if TOK_SEP != 0x1E || COM_SEP != 0x1F || DEF_MAGIC != 0x1D 1006X << error type table isn't correct >> 1007X#endif 1008X 1009X#if OK_DOLLAR 1010X#define DOL LET 1011X#else 1012X#define DOL 000 1013X#endif 1014X 1015Xchar type[256] = { /* Character type codes Hex */ 1016X END, 000, 000, 000, 000, 000, 000, 000, /* 00 */ 1017X 000, SPA, 000, 000, 000, 000, 000, 000, /* 08 */ 1018X 000, 000, 000, 000, 000, 000, 000, 000, /* 10 */ 1019X 000, 000, 000, 000, 000, LET, 000, SPA, /* 18 */ 1020X SPA,OP_NOT, QUO, 000, DOL,OP_MOD,OP_AND, QUO, /* 20 !"#$%&' */ 1021XOP_LPA,OP_RPA,OP_MUL,OP_ADD, 000,OP_SUB, DOT,OP_DIV, /* 28 ()*+,-./ */ 1022X DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, /* 30 01234567 */ 1023X DIG, DIG,OP_COL, 000, OP_LT, OP_EQ, OP_GT,OP_QUE, /* 38 89:;<=>? */ 1024X 000, LET, LET, LET, LET, LET, LET, LET, /* 40 @ABCDEFG */ 1025X LET, LET, LET, LET, LET, LET, LET, LET, /* 48 HIJKLMNO */ 1026X LET, LET, LET, LET, LET, LET, LET, LET, /* 50 PQRSTUVW */ 1027X LET, LET, LET, 000, BSH, 000,OP_XOR, LET, /* 58 XYZ[\]^_ */ 1028X 000, LET, LET, LET, LET, LET, LET, LET, /* 60 `abcdefg */ 1029X LET, LET, LET, LET, LET, LET, LET, LET, /* 68 hijklmno */ 1030X LET, LET, LET, LET, LET, LET, LET, LET, /* 70 pqrstuvw */ 1031X LET, LET, LET, 000, OP_OR, 000,OP_NOT, 000, /* 78 xyz{|}~ */ 1032X 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 1033X 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 1034X 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 1035X 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 1036X 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 1037X 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 1038X 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 1039X 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 1040X}; 1041X 1042Xskipnl() 1043X/* 1044X * Skip to the end of the current input line. 1045X */ 1046X{ 1047X register int c; 1048X 1049X do { /* Skip to newline */ 1050X c = get(); 1051X } while (c != '\n' && c != EOF_CHAR); 1052X} 1053X 1054Xint 1055Xskipws() 1056X/* 1057X * Skip over whitespace 1058X */ 1059X{ 1060X register int c; 1061X 1062X do { /* Skip whitespace */ 1063X c = get(); 1064X#if COMMENT_INVISIBLE 1065X } while (type[c] == SPA || c == COM_SEP); 1066X#else 1067X } while (type[c] == SPA); 1068X#endif 1069X return (c); 1070X} 1071X 1072Xscanid(c) 1073Xregister int c; /* First char of id */ 1074X/* 1075X * Get the next token (an id) into the token buffer. 1076X * Note: this code is duplicated in lookid(). 1077X * Change one, change both. 1078X */ 1079X{ 1080X register char *bp; 1081X 1082X if (c == DEF_MAGIC) /* Eat the magic token */ 1083X c = get(); /* undefiner. */ 1084X bp = token; 1085X do { 1086X if (bp < &token[IDMAX]) /* token dim is IDMAX+1 */ 1087X *bp++ = c; 1088X c = get(); 1089X } while (type[c] == LET || type[c] == DIG); 1090X unget(); 1091X *bp = EOS; 1092X} 1093X 1094Xint 1095Xmacroid(c) 1096Xregister int c; 1097X/* 1098X * If c is a letter, scan the id. if it's #defined, expand it and scan 1099X * the next character and try again. 1100X * 1101X * Else, return the character. If type[c] is a LET, the token is in token. 1102X */ 1103X{ 1104X register DEFBUF *dp; 1105X 1106X if (infile != NULL && infile->fp != NULL) 1107X recursion = 0; 1108X while (type[c] == LET && (dp = lookid(c)) != NULL) { 1109X expand(dp); 1110X c = get(); 1111X } 1112X return (c); 1113X} 1114X 1115Xint 1116Xcatenate() 1117X/* 1118X * A token was just read (via macroid). 1119X * If the next character is TOK_SEP, concatenate the next token 1120X * return TRUE -- which should recall macroid after refreshing 1121X * macroid's argument. If it is not TOK_SEP, unget() the character 1122X * and return FALSE. 1123X */ 1124X{ 1125X register int c; 1126X register char *token1; 1127X 1128X#if OK_CONCAT 1129X if (get() != TOK_SEP) { /* Token concatenation */ 1130X unget(); 1131X return (FALSE); 1132X } 1133X else { 1134X token1 = savestring(token); /* Save first token */ 1135X c = macroid(get()); /* Scan next token */ 1136X switch(type[c]) { /* What was it? */ 1137X case LET: /* An identifier, ... */ 1138X if (strlen(token1) + strlen(token) >= NWORK) 1139X cfatal("work buffer overflow doing %s #", token1); 1140X sprintf(work, "%s%s", token1, token); 1141X break; 1142X 1143X case DIG: /* A digit string */ 1144X strcpy(work, token1); 1145X workp = work + strlen(work); 1146X do { 1147X save(c); 1148X } while ((c = get()) != TOK_SEP); 1149X /* 1150X * The trailing TOK_SEP is no longer needed. 1151X */ 1152X save(EOS); 1153X break; 1154X 1155X default: /* An error, ... */ 1156X if (isprint(c)) 1157X cierror("Strange character '%c' after #", c); 1158X else 1159X cierror("Strange character (%d.) after #", c); 1160X strcpy(work, token1); 1161X unget(); 1162X break; 1163X } 1164X /* 1165X * work has the concatenated token and token1 has 1166X * the first token (no longer needed). Unget the 1167X * new (concatenated) token after freeing token1. 1168X * Finally, setup to read the new token. 1169X */ 1170X free(token1); /* Free up memory */ 1171X ungetstring(work); /* Unget the new thing, */ 1172X return (TRUE); 1173X } 1174X#else 1175X return (FALSE); /* Not supported */ 1176X#endif 1177X} 1178X 1179Xint 1180Xscanstring(delim, outfun) 1181Xregister int delim; /* ' or " */ 1182Xint (*outfun)(); /* Output function */ 1183X/* 1184X * Scan off a string. Warning if terminated by newline or EOF. 1185X * outfun() outputs the character -- to a buffer if in a macro. 1186X * TRUE if ok, FALSE if error. 1187X */ 1188X{ 1189X register int c; 1190X 1191X instring = TRUE; /* Don't strip comments */ 1192X (*outfun)(delim); 1193X while ((c = get()) != delim 1194X && c != '\n' 1195X && c != EOF_CHAR) { 1196X (*outfun)(c); 1197X if (c == '\\') 1198X (*outfun)(get()); 1199X } 1200X instring = FALSE; 1201X if (c == delim) { 1202X (*outfun)(c); 1203X return (TRUE); 1204X } 1205X else { 1206X cerror("Unterminated string", NULLST); 1207X unget(); 1208X return (FALSE); 1209X } 1210X} 1211X 1212Xscannumber(c, outfun) 1213Xregister int c; /* First char of number */ 1214Xregister int (*outfun)(); /* Output/store func */ 1215X/* 1216X * Process a number. We know that c is from 0 to 9 or dot. 1217X * Algorithm from Dave Conroy's Decus C. 1218X */ 1219X{ 1220X register int radix; /* 8, 10, or 16 */ 1221X int expseen; /* 'e' seen in floater */ 1222X int signseen; /* '+' or '-' seen */ 1223X int octal89; /* For bad octal test */ 1224X int dotflag; /* TRUE if '.' was seen */ 1225X 1226X expseen = FALSE; /* No exponent seen yet */ 1227X signseen = TRUE; /* No +/- allowed yet */ 1228X octal89 = FALSE; /* No bad octal yet */ 1229X radix = 10; /* Assume decimal */ 1230X if ((dotflag = (c == '.')) != FALSE) { /* . something? */ 1231X (*outfun)('.'); /* Always out the dot */ 1232X if (type[(c = get())] != DIG) { /* If not a float numb, */ 1233X unget(); /* Rescan strange char */ 1234X return; /* All done for now */ 1235X } 1236X } /* End of float test */ 1237X else if (c == '0') { /* Octal or hex? */ 1238X (*outfun)(c); /* Stuff initial zero */ 1239X radix = 8; /* Assume it's octal */ 1240X c = get(); /* Look for an 'x' */ 1241X if (c == 'x' || c == 'X') { /* Did we get one? */ 1242X radix = 16; /* Remember new radix */ 1243X (*outfun)(c); /* Stuff the 'x' */ 1244X c = get(); /* Get next character */ 1245X } 1246X } 1247X for (;;) { /* Process curr. char. */ 1248X /* 1249X * Note that this algorithm accepts "012e4" and "03.4" 1250X * as legitimate floating-point numbers. 1251X */ 1252X if (radix != 16 && (c == 'e' || c == 'E')) { 1253X if (expseen) /* Already saw 'E'? */ 1254X break; /* Exit loop, bad nbr. */ 1255X expseen = TRUE; /* Set exponent seen */ 1256X signseen = FALSE; /* We can read '+' now */ 1257X radix = 10; /* Decimal exponent */ 1258X } 1259X else if (radix != 16 && c == '.') { 1260X if (dotflag) /* Saw dot already? */ 1261X break; /* Exit loop, two dots */ 1262X dotflag = TRUE; /* Remember the dot */ 1263X radix = 10; /* Decimal fraction */ 1264X } 1265X else if (c == '+' || c == '-') { /* 1.0e+10 */ 1266X if (signseen) /* Sign in wrong place? */ 1267X break; /* Exit loop, not nbr. */ 1268X /* signseen = TRUE; */ /* Remember we saw it */ 1269X } 1270X else { /* Check the digit */ 1271X switch (c) { 1272X case '8': case '9': /* Sometimes wrong */ 1273X octal89 = TRUE; /* Do check later */ 1274X case '0': case '1': case '2': case '3': 1275X case '4': case '5': case '6': case '7': 1276X break; /* Always ok */ 1277X 1278X case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 1279X case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 1280X if (radix == 16) /* Alpha's are ok only */ 1281X break; /* if reading hex. */ 1282X default: /* At number end */ 1283X goto done; /* Break from for loop */ 1284X } /* End of switch */ 1285X } /* End general case */ 1286X (*outfun)(c); /* Accept the character */ 1287X signseen = TRUE; /* Don't read sign now */ 1288X c = get(); /* Read another char */ 1289X } /* End of scan loop */ 1290X /* 1291X * When we break out of the scan loop, c contains the first 1292X * character (maybe) not in the number. If the number is an 1293X * integer, allow a trailing 'L' for long and/or a trailing 'U' 1294X * for unsigned. If not those, push the trailing character back 1295X * on the input stream. Floating point numbers accept a trailing 1296X * 'L' for "long double". 1297X */ 1298Xdone: if (dotflag || expseen) { /* Floating point? */ 1299X if (c == 'l' || c == 'L') { 1300X (*outfun)(c); 1301X c = get(); /* Ungotten later */ 1302X } 1303X } 1304X else { /* Else it's an integer */ 1305X /* 1306X * We know that dotflag and expseen are both zero, now: 1307X * dotflag signals "saw 'L'", and 1308X * expseen signals "saw 'U'". 1309X */ 1310X for (;;) { 1311X switch (c) { 1312X case 'l': 1313X case 'L': 1314X if (dotflag) 1315X goto nomore; 1316X dotflag = TRUE; 1317X break; 1318X 1319X case 'u': 1320X case 'U': 1321X if (expseen) 1322X goto nomore; 1323X expseen = TRUE; 1324X break; 1325X 1326X default: 1327X goto nomore; 1328X } 1329X (*outfun)(c); /* Got 'L' or 'U'. */ 1330X c = get(); /* Look at next, too. */ 1331X } 1332X } 1333Xnomore: unget(); /* Not part of a number */ 1334X if (octal89 && radix == 8) 1335X cwarn("Illegal digit in octal number", NULLST); 1336X} 1337X 1338Xsave(c) 1339Xregister int c; 1340X{ 1341X if (workp >= &work[NWORK]) 1342X cfatal("Work buffer overflow", NULLST); 1343X else *workp++ = c; 1344X} 1345X 1346Xchar * 1347Xsavestring(text) 1348Xchar *text; 1349X/* 1350X * Store a string into free memory. 1351X */ 1352X{ 1353X register char *result; 1354X 1355X result = getmem(strlen(text) + 1); 1356X strcpy(result, text); 1357X return (result); 1358X} 1359X 1360XFILEINFO * 1361Xgetfile(bufsize, name) 1362Xint bufsize; /* Line or define buffer size */ 1363Xchar *name; /* File or macro name string */ 1364X/* 1365X * Common FILEINFO buffer initialization for a new file or macro. 1366X */ 1367X{ 1368X register FILEINFO *file; 1369X register int size; 1370X 1371X size = strlen(name); /* File/macro name */ 1372X file = (FILEINFO *) getmem(sizeof (FILEINFO) + bufsize + size); 1373X file->parent = infile; /* Chain files together */ 1374X file->fp = NULL; /* No file yet */ 1375X file->filename = savestring(name); /* Save file/macro name */ 1376X file->progname = NULL; /* No #line seen yet */ 1377X file->unrecur = 0; /* No macro fixup */ 1378X file->bptr = file->buffer; /* Initialize line ptr */ 1379X file->buffer[0] = EOS; /* Force first read */ 1380X file->line = 0; /* (Not used just yet) */ 1381X if (infile != NULL) /* If #include file */ 1382X infile->line = line; /* Save current line */ 1383X infile = file; /* New current file */ 1384X line = 1; /* Note first line */ 1385X return (file); /* All done. */ 1386X} 1387X 1388Xchar * 1389Xgetmem(size) 1390Xint size; 1391X/* 1392X * Get a block of free memory. 1393X */ 1394X{ 1395X register char *result; 1396X extern char *malloc(); 1397X 1398X if ((result = malloc((unsigned) size)) == NULL) 1399X cfatal("Out of memory", NULLST); 1400X return (result); 1401X} 1402X 1403X/* 1404X * C P P S y m b o l T a b l e s 1405X */ 1406X 1407X/* 1408X * SBSIZE defines the number of hash-table slots for the symbol table. 1409X * It must be a power of 2. 1410X */ 1411X#ifndef SBSIZE 1412X#define SBSIZE 64 1413X#endif 1414X#define SBMASK (SBSIZE - 1) 1415X#if (SBSIZE ^ SBMASK) != ((SBSIZE * 2) - 1) 1416X << error, SBSIZE must be a power of 2 >> 1417X#endif 1418X 1419Xstatic DEFBUF *symtab[SBSIZE]; /* Symbol table queue headers */ 1420X 1421XDEFBUF * 1422Xlookid(c) 1423Xint c; /* First character of token */ 1424X/* 1425X * Look for the next token in the symbol table. Returns token in "token". 1426X * If found, returns the table pointer; Else returns NULL. 1427X */ 1428X{ 1429X register int nhash; 1430X register DEFBUF *dp; 1431X register char *np; 1432X int temp; 1433X int isrecurse; /* For #define foo foo */ 1434X 1435X np = token; 1436X nhash = 0; 1437X if ((isrecurse = (c == DEF_MAGIC))) /* If recursive macro */ 1438X c = get(); /* hack, skip DEF_MAGIC */ 1439X do { 1440X if (np < &token[IDMAX]) { /* token dim is IDMAX+1 */ 1441X *np++ = c; /* Store token byte */ 1442X nhash += c; /* Update hash value */ 1443X } 1444X c = get(); /* And get another byte */ 1445X } while (type[c] == LET || type[c] == DIG); 1446X unget(); /* Rescan terminator */ 1447X *np = EOS; /* Terminate token */ 1448X if (isrecurse) /* Recursive definition */ 1449X return (NULL); /* undefined just now */ 1450X nhash += (np - token); /* Fix hash value */ 1451X dp = symtab[nhash & SBMASK]; /* Starting bucket */ 1452X while (dp != (DEFBUF *) NULL) { /* Search symbol table */ 1453X if (dp->hash == nhash /* Fast precheck */ 1454X && (temp = strcmp(dp->name, token)) >= 0) 1455X break; 1456X dp = dp->link; /* Nope, try next one */ 1457X } 1458X return ((temp == 0) ? dp : NULL); 1459X} 1460X 1461XDEFBUF * 1462Xdefendel(name, delete) 1463Xchar *name; 1464Xint delete; /* TRUE to delete a symbol */ 1465X/* 1466X * Enter this name in the lookup table (delete = FALSE) 1467X * or delete this name (delete = TRUE). 1468X * Returns a pointer to the define block (delete = FALSE) 1469X * Returns NULL if the symbol wasn't defined (delete = TRUE). 1470X */ 1471X{ 1472X register DEFBUF *dp; 1473X register DEFBUF **prevp; 1474X register char *np; 1475X int nhash; 1476X int temp; 1477X int size; 1478X 1479X for (nhash = 0, np = name; *np != EOS;) 1480X nhash += *np++; 1481X size = (np - name); 1482X nhash += size; 1483X prevp = &symtab[nhash & SBMASK]; 1484X while ((dp = *prevp) != (DEFBUF *) NULL) { 1485X if (dp->hash == nhash 1486X && (temp = strcmp(dp->name, name)) >= 0) { 1487X if (temp > 0) 1488X dp = NULL; /* Not found */ 1489X else { 1490X *prevp = dp->link; /* Found, unlink and */ 1491X if (dp->repl != NULL) /* Free the replacement */ 1492X free(dp->repl); /* if any, and then */ 1493X free((char *) dp); /* Free the symbol */ 1494X } 1495X break; 1496X } 1497X prevp = &dp->link; 1498X } 1499X if (!delete) { 1500X dp = (DEFBUF *) getmem(sizeof (DEFBUF) + size); 1501X dp->link = *prevp; 1502X *prevp = dp; 1503X dp->hash = nhash; 1504X dp->repl = NULL; 1505X dp->nargs = 0; 1506X strcpy(dp->name, name); 1507X } 1508X return (dp); 1509X} 1510X 1511X#if DEBUG 1512X 1513Xdumpdef(why) 1514Xchar *why; 1515X{ 1516X register DEFBUF *dp; 1517X register DEFBUF **syp; 1518X 1519X printf("CPP symbol table dump %s\n", why); 1520X for (syp = symtab; syp < &symtab[SBSIZE]; syp++) { 1521X if ((dp = *syp) != (DEFBUF *) NULL) { 1522X printf("symtab[%d]\n", (syp - symtab)); 1523X do { 1524X dumpadef((char *) NULL, dp); 1525X } while ((dp = dp->link) != (DEFBUF *) NULL); 1526X } 1527X } 1528X} 1529X 1530Xdumpadef(why, dp) 1531Xchar *why; /* Notation */ 1532Xregister DEFBUF *dp; 1533X{ 1534X register char *cp; 1535X register int c; 1536X 1537X printf(" \"%s\" [%d]", dp->name, dp->nargs); 1538X if (why != NULL) 1539X printf(" (%s)", why); 1540X if (dp->repl != NULL) { 1541X printf(" => "); 1542X for (cp = dp->repl; (c = *cp++ & 0xFF) != EOS;) { 1543X if (c >= MAC_PARM && c <= (MAC_PARM + PAR_MAC)) 1544X printf("<%d>", c - MAC_PARM); 1545X else if (isprint(c) || c == '\n' || c == '\t') 1546X putchar(c); 1547X else if (c < ' ') 1548X printf("<^%c>", c + '@'); 1549X else 1550X printf("<\\0%o>", c); 1551X } 1552X } 1553X else { 1554X printf(", no replacement."); 1555X } 1556X putchar('\n'); 1557X} 1558X#endif 1559X 1560X/* 1561X * G E T 1562X */ 1563X 1564Xint 1565Xget() 1566X/* 1567X * Return the next character from a macro or the current file. 1568X * Handle end of file from #include files. 1569X */ 1570X{ 1571X register int c; 1572X register FILEINFO *file; 1573X register int popped; /* Recursion fixup */ 1574X 1575X popped = 0; 1576Xget_from_file: 1577X if ((file = infile) == NULL) 1578X return (EOF_CHAR); 1579Xnewline: 1580X#if 0 1581X printf("get(%s), recursion %d, line %d, bptr = %d, buffer \"%s\"\n", 1582X file->filename, recursion, line, 1583X file->bptr - file->buffer, file->buffer); 1584X#endif 1585X /* 1586X * Read a character from the current input line or macro. 1587X * At EOS, either finish the current macro (freeing temp. 1588X * storage) or read another line from the current input file. 1589X * At EOF, exit the current file (#include) or, at EOF from 1590X * the cpp input file, return EOF_CHAR to finish processing. 1591X */ 1592X if ((c = *file->bptr++ & 0xFF) == EOS) { 1593X /* 1594X * Nothing in current line or macro. Get next line (if 1595X * input from a file), or do end of file/macro processing. 1596X * In the latter case, jump back to restart from the top. 1597X */ 1598X if (file->fp == NULL) { /* NULL if macro */ 1599X popped++; 1600X recursion -= file->unrecur; 1601X if (recursion < 0) 1602X recursion = 0; 1603X infile = file->parent; /* Unwind file chain */ 1604X } 1605X else { /* Else get from a file */ 1606X if ((file->bptr = fgets(file->buffer, NBUFF, file->fp)) 1607X != NULL) { 1608X#if DEBUG 1609X if (debug > 1) { /* Dump it to stdout */ 1610X printf("\n#line %d (%s), %s", 1611X line, file->filename, file->buffer); 1612X } 1613X#endif 1614X goto newline; /* process the line */ 1615X } 1616X else { 1617X fclose(file->fp); /* Close finished file */ 1618X if ((infile = file->parent) != NULL) { 1619X /* 1620X * There is an "ungotten" newline in the current 1621X * infile buffer (set there by doinclude() in 1622X * cpp1.c). Thus, we know that the mainline code 1623X * is skipping over blank lines and will do a 1624X * #line at its convenience. 1625X */ 1626X wrongline = TRUE; /* Need a #line now */ 1627X } 1628X } 1629X } 1630X /* 1631X * Free up space used by the (finished) file or macro and 1632X * restart input from the parent file/macro, if any. 1633X */ 1634X free(file->filename); /* Free name and */ 1635X if (file->progname != NULL) /* if a #line was seen, */ 1636X free(file->progname); /* free it, too. */ 1637X free((char *) file); /* Free file space */ 1638X if (infile == NULL) /* If at end of file */ 1639X return (EOF_CHAR); /* Return end of file */ 1640X line = infile->line; /* Reset line number */ 1641X goto get_from_file; /* Get from the top. */ 1642X } 1643X /* 1644X * Common processing for the new character. 1645X */ 1646X if (c == DEF_MAGIC && file->fp != NULL) /* Don't allow delete */ 1647X goto newline; /* from a file */ 1648X if (file->parent != NULL) { /* Macro or #include */ 1649X if (popped != 0) 1650X file->parent->unrecur += popped; 1651X else { 1652X recursion -= file->parent->unrecur; 1653X if (recursion < 0) 1654X recursion = 0; 1655X file->parent->unrecur = 0; 1656X } 1657X } 1658X if (c == '\n') /* Maintain current */ 1659X ++line; /* line counter */ 1660X if (instring) /* Strings just return */ 1661X return (c); /* the character. */ 1662X else if (c == '/') { /* Comment? */ 1663X instring = TRUE; /* So get() won't loop */ 1664X if ((c = get()) != '*') { /* Next byte '*'? */ 1665X instring = FALSE; /* Nope, no comment */ 1666X unget(); /* Push the char. back */ 1667X return ('/'); /* Return the slash */ 1668X } 1669X if (keepcomments) { /* If writing comments */ 1670X putchar('/'); /* Write out the */ 1671X putchar('*'); /* initializer */ 1672X } 1673X for (;;) { /* Eat a comment */ 1674X c = get(); 1675Xtest: if (keepcomments && c != EOF_CHAR) 1676X cput(c); 1677X switch (c) { 1678X case EOF_CHAR: 1679X cerror("EOF in comment", NULLST); 1680X return (EOF_CHAR); 1681X 1682X case '/': 1683X if ((c = get()) != '*') /* Don't let comments */ 1684X goto test; /* Nest. */ 1685X#ifdef VERBOSE 1686X cwarn("Nested comments", NULLST); 1687X#endif 1688X /* Fall into * stuff */ 1689X case '*': 1690X if ((c = get()) != '/') /* If comment doesn't */ 1691X goto test; /* end, look at next */ 1692X instring = FALSE; /* End of comment, */ 1693X if (keepcomments) { /* Put out the comment */ 1694X cput(c); /* terminator, too */ 1695X } 1696X /* 1697X * A comment is syntactically "whitespace" -- 1698X * however, there are certain strange sequences 1699X * such as 1700X * #define foo(x) (something) 1701X * foo|* comment *|(123) 1702X * these are '/' ^ ^ 1703X * where just returning space (or COM_SEP) will cause 1704X * problems. This can be "fixed" by overwriting the 1705X * '/' in the input line buffer with ' ' (or COM_SEP) 1706X * but that may mess up an error message. 1707X * So, we peek ahead -- if the next character is 1708X * "whitespace" we just get another character, if not, 1709X * we modify the buffer. All in the name of purity. 1710X */ 1711X if (*file->bptr == '\n' 1712X || type[*file->bptr & 0xFF] == SPA) 1713X goto newline; 1714X#if COMMENT_INVISIBLE 1715X /* 1716X * Return magic (old-fashioned) syntactic space. 1717X */ 1718X return ((file->bptr[-1] = COM_SEP)); 1719X#else 1720X return ((file->bptr[-1] = ' ')); 1721X#endif 1722X 1723X case '\n': /* we'll need a #line */ 1724X if (!keepcomments) 1725X wrongline = TRUE; /* later... */ 1726X default: /* Anything else is */ 1727X break; /* Just a character */ 1728X } /* End switch */ 1729X } /* End comment loop */ 1730X } /* End if in comment */ 1731X else if (!inmacro && c == '\\') { /* If backslash, peek */ 1732X if ((c = get()) == '\n') { /* for a <nl>. If so, */ 1733X wrongline = TRUE; 1734X goto newline; 1735X } 1736X else { /* Backslash anything */ 1737X unget(); /* Get it later */ 1738X return ('\\'); /* Return the backslash */ 1739X } 1740X } 1741X else if (c == '\f' || c == VT) /* Form Feed, Vertical */ 1742X c = ' '; /* Tab are whitespace */ 1743X return (c); /* Just return the char */ 1744X} 1745X 1746Xunget() 1747X/* 1748X * Backup the pointer to reread the last character. Fatal error 1749X * (code bug) if we backup too far. unget() may be called, 1750X * without problems, at end of file. Only one character may 1751X * be ungotten. If you need to unget more, call ungetstring(). 1752X */ 1753X{ 1754X register FILEINFO *file; 1755X 1756X if ((file = infile) == NULL) 1757X return; /* Unget after EOF */ 1758X if (--file->bptr < file->buffer) 1759X cfatal("Too much pushback", NULLST); 1760X if (*file->bptr == '\n') /* Ungetting a newline? */ 1761X --line; /* Unget the line number, too */ 1762X} 1763X 1764Xungetstring(text) 1765Xchar *text; 1766X/* 1767X * Push a string back on the input stream. This is done by treating 1768X * the text as if it were a macro. 1769X */ 1770X{ 1771X register FILEINFO *file; 1772X extern FILEINFO *getfile(); 1773X 1774X file = getfile(strlen(text) + 1, ""); 1775X strcpy(file->buffer, text); 1776X} 1777X 1778Xint 1779Xcget() 1780X/* 1781X * Get one character, absorb "funny space" after comments or 1782X * token concatenation 1783X */ 1784X{ 1785X register int c; 1786X 1787X do { 1788X c = get(); 1789X#if COMMENT_INVISIBLE 1790X } while (c == TOK_SEP || c == COM_SEP); 1791X#else 1792X } while (c == TOK_SEP); 1793X#endif 1794X return (c); 1795X} 1796X 1797X/* 1798X * Error messages and other hacks. The first byte of severity 1799X * is 'S' for string arguments and 'I' for int arguments. This 1800X * is needed for portability with machines that have int's that 1801X * are shorter than char *'s. 1802X */ 1803X 1804Xstatic 1805Xdomsg(severity, format, arg) 1806Xchar *severity; /* "Error", "Warning", "Fatal" */ 1807Xchar *format; /* Format for the error message */ 1808Xchar *arg; /* Something for the message */ 1809X/* 1810X * Print filenames, macro names, and line numbers for error messages. 1811X */ 1812X{ 1813X register char *tp; 1814X register FILEINFO *file; 1815X 1816X fprintf(stderr, "%sline %d, %s: ", MSG_PREFIX, line, &severity[1]); 1817X if (*severity == 'S') 1818X fprintf(stderr, format, arg); 1819X else 1820X fprintf(stderr, format, (int) arg); 1821X putc('\n', stderr); 1822X if ((file = infile) == NULL) 1823X return; /* At end of file */ 1824X if (file->fp != NULL) { 1825X tp = file->buffer; /* Print current file */ 1826X fprintf(stderr, "%s", tp); /* name, making sure */ 1827X if (tp[strlen(tp) - 1] != '\n') /* there's a newline */ 1828X putc('\n', stderr); 1829X } 1830X while ((file = file->parent) != NULL) { /* Print #includes, too */ 1831X if (file->fp == NULL) 1832X fprintf(stderr, "from macro %s\n", file->filename); 1833X else { 1834X tp = file->buffer; 1835X fprintf(stderr, "from file %s, line %d:\n%s", 1836X (file->progname != NULL) 1837X ? file->progname : file->filename, 1838X file->line, tp); 1839X if (tp[strlen(tp) - 1] != '\n') 1840X putc('\n', stderr); 1841X } 1842X } 1843X} 1844X 1845Xcerror(format, sarg) 1846Xchar *format; 1847Xchar *sarg; /* Single string argument */ 1848X/* 1849X * Print a normal error message, string argument. 1850X */ 1851X{ 1852X domsg("SError", format, sarg); 1853X errors++; 1854X} 1855X 1856Xcierror(format, narg) 1857Xchar *format; 1858Xint narg; /* Single numeric argument */ 1859X/* 1860X * Print a normal error message, numeric argument. 1861X */ 1862X{ 1863X domsg("IError", format, (char *) narg); 1864X errors++; 1865X} 1866X 1867Xcfatal(format, sarg) 1868Xchar *format; 1869Xchar *sarg; /* Single string argument */ 1870X/* 1871X * A real disaster 1872X */ 1873X{ 1874X domsg("SFatal error", format, sarg); 1875X exit(IO_ERROR); 1876X} 1877X 1878Xcwarn(format, sarg) 1879Xchar *format; 1880Xchar *sarg; /* Single string argument */ 1881X/* 1882X * A non-fatal error, string argument. 1883X */ 1884X{ 1885X domsg("SWarning", format, sarg); 1886X} 1887X 1888Xciwarn(format, narg) 1889Xchar *format; 1890Xint narg; /* Single numeric argument */ 1891X/* 1892X * A non-fatal error, numeric argument. 1893X */ 1894X{ 1895X domsg("IWarning", format, (char *) narg); 1896X} 1897X 1898X 1899X 1900END-of-cpp6.c 1901exit 1902