checknr.c revision 282951
1/* 2 * Copyright (c) 1980, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#ifndef lint 31static const char copyright[] = 32"@(#) Copyright (c) 1980, 1993\n\ 33 The Regents of the University of California. All rights reserved.\n"; 34#endif /* not lint */ 35 36#if 0 37#ifndef lint 38static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93"; 39#endif /* not lint */ 40#endif 41 42#include <sys/cdefs.h> 43__FBSDID("$FreeBSD: stable/10/usr.bin/checknr/checknr.c 282951 2015-05-15 09:00:20Z bapt $"); 44 45/* 46 * checknr: check an nroff/troff input file for matching macro calls. 47 * we also attempt to match size and font changes, but only the embedded 48 * kind. These must end in \s0 and \fP resp. Maybe more sophistication 49 * later but for now think of these restrictions as contributions to 50 * structured typesetting. 51 */ 52#include <err.h> 53#define _WITH_GETLINE 54#include <stdio.h> 55#include <stdlib.h> 56#include <string.h> 57#include <ctype.h> 58 59#define MAXSTK 100 /* Stack size */ 60#define MAXBR 100 /* Max number of bracket pairs known */ 61#define MAXCMDS 600 /* Max number of commands known */ 62 63static void addcmd(char *); 64static void addmac(const char *); 65static int binsrch(const char *); 66static void checkknown(const char *); 67static void chkcmd(const char *, const char *); 68static void complain(int); 69static int eq(const char *, const char *); 70static void nomatch(const char *); 71static void pe(int); 72static void process(FILE *); 73static void prop(int); 74static void usage(void); 75 76/* 77 * The stack on which we remember what we've seen so far. 78 */ 79static struct stkstr { 80 int opno; /* number of opening bracket */ 81 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ 82 int parm; /* parm to size, font, etc */ 83 int lno; /* line number */ 84} stk[MAXSTK]; 85static int stktop; 86 87/* 88 * The kinds of opening and closing brackets. 89 */ 90static struct brstr { 91 const char *opbr; 92 const char *clbr; 93} br[MAXBR] = { 94 /* A few bare bones troff commands */ 95#define SZ 0 96 {"sz", "sz"}, /* also \s */ 97#define FT 1 98 {"ft", "ft"}, /* also \f */ 99 /* the -mm package */ 100 {"AL", "LE"}, 101 {"AS", "AE"}, 102 {"BL", "LE"}, 103 {"BS", "BE"}, 104 {"DF", "DE"}, 105 {"DL", "LE"}, 106 {"DS", "DE"}, 107 {"FS", "FE"}, 108 {"ML", "LE"}, 109 {"NS", "NE"}, 110 {"RL", "LE"}, 111 {"VL", "LE"}, 112 /* the -ms package */ 113 {"AB", "AE"}, 114 {"BD", "DE"}, 115 {"CD", "DE"}, 116 {"DS", "DE"}, 117 {"FS", "FE"}, 118 {"ID", "DE"}, 119 {"KF", "KE"}, 120 {"KS", "KE"}, 121 {"LD", "DE"}, 122 {"LG", "NL"}, 123 {"QS", "QE"}, 124 {"RS", "RE"}, 125 {"SM", "NL"}, 126 {"XA", "XE"}, 127 {"XS", "XE"}, 128 /* The -me package */ 129 {"(b", ")b"}, 130 {"(c", ")c"}, 131 {"(d", ")d"}, 132 {"(f", ")f"}, 133 {"(l", ")l"}, 134 {"(q", ")q"}, 135 {"(x", ")x"}, 136 {"(z", ")z"}, 137 /* The -mdoc package */ 138 {"Ao", "Ac"}, 139 {"Bd", "Ed"}, 140 {"Bk", "Ek"}, 141 {"Bo", "Bc"}, 142 {"Do", "Dc"}, 143 {"Fo", "Fc"}, 144 {"Oo", "Oc"}, 145 {"Po", "Pc"}, 146 {"Qo", "Qc"}, 147 {"Rs", "Re"}, 148 {"So", "Sc"}, 149 {"Xo", "Xc"}, 150 /* Things needed by preprocessors */ 151 {"EQ", "EN"}, 152 {"TS", "TE"}, 153 /* Refer */ 154 {"[", "]"}, 155 {0, 0} 156}; 157 158/* 159 * All commands known to nroff, plus macro packages. 160 * Used so we can complain about unrecognized commands. 161 */ 162static const char *knowncmds[MAXCMDS] = { 163"$c", "$f", "$h", "$p", "$s", "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O", 164"%P", "%Q", "%R", "%T", "%V", "(b", "(c", "(d", "(f", "(l", "(q", "(t", "(x", 165"(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++", "+c", "1C", 166"1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M", "@c", "@e", "@f", 167"@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", "AE", "AF", "AI", "AL", 168"AM", "AS", "AT", "AU", "AX", "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", 169"B", "B" , "B1", "B2", "BD", "BE", "BG", "BL", "BS", "BT", "BX", "Bc", "Bd", 170"Bf", "Bk", "Bl", "Bo", "Bq", "Bsx", "Bx", "C1", "C2", "CD", "CM", "CT", "Cd", 171"Cm", "D", "D" , "D1", "DA", "DE", "DF", "DL", "DS", "DT", "Db", "Dc", "Dd", 172"Dl", "Do", "Dq", "Dt", "Dv", "EC", "EF", "EG", "EH", "EM", "EN", "EQ", "EX", 173"Ec", "Ed", "Ef", "Ek", "El", "Em", "Eo", "Er", "Ev", "FA", "FD", "FE", "FG", 174"FJ", "FK", "FL", "FN", "FO", "FQ", "FS", "FV", "FX", "Fa", "Fc", "Fd", "Fl", 175"Fn", "Fo", "Ft", "Fx", "H", "H" , "HC", "HD", "HM", "HO", "HU", "I", "I" , 176"ID", "IE", "IH", "IM", "IP", "IX", "IZ", "Ic", "In", "It", "KD", "KE", "KF", 177"KQ", "KS", "LB", "LC", "LD", "LE", "LG", "LI", "LP", "Lb", "Li", "MC", "ME", 178"MF", "MH", "ML", "MR", "MT", "ND", "NE", "NH", "NL", "NP", "NS", "Nd", "Nm", 179"No", "Ns", "Nx", "OF", "OH", "OK", "OP", "Oc", "Oo", "Op", "Os", "Ot", "Ox", 180"P", "P" , "P1", "PF", "PH", "PP", "PT", "PX", "PY", "Pa", "Pc", "Pf", "Po", 181"Pp", "Pq", "QE", "QP", "QS", "Qc", "Ql", "Qo", "Qq", "R", "R" , "RA", "RC", 182"RE", "RL", "RP", "RQ", "RS", "RT", "Re", "Rs", "S", "S" , "S0", "S2", "S3", 183"SA", "SG", "SH", "SK", "SM", "SP", "SY", "Sc", "Sh", "Sm", "So", "Sq", "Ss", 184"St", "Sx", "Sy", "T&", "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", 185"TQ", "TR", "TS", "TX", "Tn", "UL", "US", "UX", "Ud", "Ux", "VL", "Va", "Vt", 186"WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "Xc", "Xo", "Xr", "[", 187"[" , "[-", "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "\\{", "\\}", 188"]", "]" , "]-", "]<", "]>", "][", "ab", "ac", "ad", "af", "am", "ar", "as", 189"b", "b" , "ba", "bc", "bd", "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", 190"ce", "cf", "ch", "chop", "cs", "ct", "cu", "da", "de", "di", "dl", "dn", "do", 191"ds", "dt", "dw", "dy", "ec", "ef", "eh", "el", "em", "eo", "ep", "ev", "evc", 192"ex", "fallback", "fc", "feature", "fi", "fl", "flig", "fo", "fp", "ft", "ftr", 193"fz", "fzoom", "hc", "he", "hidechar", "hl", "hp", "ht", "hw", "hx", "hy", 194"hylang", "i", "i" , "ie", "if", "ig", "in", "ip", "it", "ix", "kern", 195"kernafter", "kernbefore", "kernpair", "lc", "lc_ctype", "lg", "lhang", "li", 196"ll", "ln", "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", 197"n1", "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx", 198"of", "oh", "os", "pa", "papersize", "pc", "pi", "pl", "pm", "pn", "po", "pp", 199"ps", "q", "q" , "r", "r" , "rb", "rd", "re", "recursionlimit", "return", 200"rhang", "rm", "rn", "ro", "rr", "rs", "rt", "sb", "sc", "sh", "shift", "sk", 201"so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", "ti", "tl", "tm", "tp", 202"tr", "track", "u", "uf", "uh", "ul", "vs", "wh", "xflag", "xp", "yr", 2030 204}; 205 206static int lineno; /* current line number in input file */ 207static const char *cfilename; /* name of current file */ 208static int nfiles; /* number of files to process */ 209static int fflag; /* -f: ignore \f */ 210static int sflag; /* -s: ignore \s */ 211static int ncmds; /* size of knowncmds */ 212static int slot; /* slot in knowncmds found by binsrch */ 213 214int 215main(int argc, char **argv) 216{ 217 FILE *f; 218 int i; 219 char *cp; 220 char b1[4]; 221 222 /* Figure out how many known commands there are */ 223 while (knowncmds[ncmds]) 224 ncmds++; 225 while (argc > 1 && argv[1][0] == '-') { 226 switch(argv[1][1]) { 227 228 /* -a: add pairs of macros */ 229 case 'a': 230 i = strlen(argv[1]) - 2; 231 if (i % 6 != 0) 232 usage(); 233 /* look for empty macro slots */ 234 for (i=0; br[i].opbr; i++) 235 ; 236 for (cp=argv[1]+3; cp[-1]; cp += 6) { 237 char *tmp; 238 239 if (i >= MAXBR) 240 errx(1, "too many pairs"); 241 if ((tmp = malloc(3)) == NULL) 242 err(1, "malloc"); 243 strlcpy(tmp, cp, 3); 244 br[i].opbr = tmp; 245 if ((tmp = malloc(3)) == NULL) 246 err(1, "malloc"); 247 strlcpy(tmp, cp+3, 3); 248 br[i].clbr = tmp; 249 addmac(br[i].opbr); /* knows pairs are also known cmds */ 250 addmac(br[i].clbr); 251 i++; 252 } 253 break; 254 255 /* -c: add known commands */ 256 case 'c': 257 i = strlen(argv[1]) - 2; 258 if (i % 3 != 0) 259 usage(); 260 for (cp=argv[1]+3; cp[-1]; cp += 3) { 261 if (cp[2] && cp[2] != '.') 262 usage(); 263 strncpy(b1, cp, 2); 264 b1[2] = '\0'; 265 addmac(b1); 266 } 267 break; 268 269 /* -f: ignore font changes */ 270 case 'f': 271 fflag = 1; 272 break; 273 274 /* -s: ignore size changes */ 275 case 's': 276 sflag = 1; 277 break; 278 default: 279 usage(); 280 } 281 argc--; argv++; 282 } 283 284 nfiles = argc - 1; 285 286 if (nfiles > 0) { 287 for (i = 1; i < argc; i++) { 288 cfilename = argv[i]; 289 f = fopen(cfilename, "r"); 290 if (f == NULL) 291 warn("%s", cfilename); 292 else { 293 process(f); 294 fclose(f); 295 } 296 } 297 } else { 298 cfilename = "stdin"; 299 process(stdin); 300 } 301 exit(0); 302} 303 304static void 305usage(void) 306{ 307 fprintf(stderr, 308 "usage: checknr [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [-s] [-f] file\n"); 309 exit(1); 310} 311 312static void 313process(FILE *f) 314{ 315 int i, n; 316 char mac[64]; /* The current macro or nroff command */ 317 char *line; 318 size_t linecap; 319 int pl; 320 321 line = NULL; 322 linecap = 0; 323 stktop = -1; 324 for (lineno = 1; getline(&line, &linecap, f) > 0; lineno++) { 325 if (line[0] == '.') { 326 /* 327 * find and isolate the macro/command name. 328 */ 329 strncpy(mac, line+1, 4); 330 if (isspace(mac[0])) { 331 pe(lineno); 332 printf("Empty command\n"); 333 } else if (isspace(mac[1])) { 334 mac[1] = 0; 335 } else if (isspace(mac[2])) { 336 mac[2] = 0; 337 } else if (mac[0] != '\\' || mac[1] != '\"') { 338 pe(lineno); 339 printf("Command too long\n"); 340 } 341 342 /* 343 * Is it a known command? 344 */ 345 checkknown(mac); 346 347 /* 348 * Should we add it? 349 */ 350 if (eq(mac, "de")) 351 addcmd(line); 352 353 chkcmd(line, mac); 354 } 355 356 /* 357 * At this point we process the line looking 358 * for \s and \f. 359 */ 360 for (i = 0; line[i]; i++) 361 if (line[i] == '\\' && (i == 0 || line[i-1] != '\\')) { 362 if (!sflag && line[++i] == 's') { 363 pl = line[++i]; 364 if (isdigit(pl)) { 365 n = pl - '0'; 366 pl = ' '; 367 } else 368 n = 0; 369 while (isdigit(line[++i])) 370 n = 10 * n + line[i] - '0'; 371 i--; 372 if (n == 0) { 373 if (stktop >= 0 && 374 stk[stktop].opno == SZ) { 375 stktop--; 376 } else { 377 pe(lineno); 378 printf("unmatched \\s0\n"); 379 } 380 } else { 381 stk[++stktop].opno = SZ; 382 stk[stktop].pl = pl; 383 stk[stktop].parm = n; 384 stk[stktop].lno = lineno; 385 } 386 } else if (!fflag && line[i] == 'f') { 387 n = line[++i]; 388 if (n == 'P') { 389 if (stktop >= 0 && 390 stk[stktop].opno == FT) { 391 stktop--; 392 } else { 393 pe(lineno); 394 printf("unmatched \\fP\n"); 395 } 396 } else { 397 stk[++stktop].opno = FT; 398 stk[stktop].pl = 1; 399 stk[stktop].parm = n; 400 stk[stktop].lno = lineno; 401 } 402 } 403 } 404 } 405 free(line); 406 /* 407 * We've hit the end and look at all this stuff that hasn't been 408 * matched yet! Complain, complain. 409 */ 410 for (i = stktop; i >= 0; i--) { 411 complain(i); 412 } 413} 414 415static void 416complain(int i) 417{ 418 pe(stk[i].lno); 419 printf("Unmatched "); 420 prop(i); 421 printf("\n"); 422} 423 424static void 425prop(int i) 426{ 427 if (stk[i].pl == 0) 428 printf(".%s", br[stk[i].opno].opbr); 429 else switch(stk[i].opno) { 430 case SZ: 431 printf("\\s%c%d", stk[i].pl, stk[i].parm); 432 break; 433 case FT: 434 printf("\\f%c", stk[i].parm); 435 break; 436 default: 437 printf("Bug: stk[%d].opno = %d = .%s, .%s", 438 i, stk[i].opno, br[stk[i].opno].opbr, 439 br[stk[i].opno].clbr); 440 } 441} 442 443static void 444chkcmd(const char *line __unused, const char *mac) 445{ 446 int i; 447 448 /* 449 * Check to see if it matches top of stack. 450 */ 451 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) 452 stktop--; /* OK. Pop & forget */ 453 else { 454 /* No. Maybe it's an opener */ 455 for (i=0; br[i].opbr; i++) { 456 if (eq(mac, br[i].opbr)) { 457 /* Found. Push it. */ 458 stktop++; 459 stk[stktop].opno = i; 460 stk[stktop].pl = 0; 461 stk[stktop].parm = 0; 462 stk[stktop].lno = lineno; 463 break; 464 } 465 /* 466 * Maybe it's an unmatched closer. 467 * NOTE: this depends on the fact 468 * that none of the closers can be 469 * openers too. 470 */ 471 if (eq(mac, br[i].clbr)) { 472 nomatch(mac); 473 break; 474 } 475 } 476 } 477} 478 479static void 480nomatch(const char *mac) 481{ 482 int i, j; 483 484 /* 485 * Look for a match further down on stack 486 * If we find one, it suggests that the stuff in 487 * between is supposed to match itself. 488 */ 489 for (j=stktop; j>=0; j--) 490 if (eq(mac,br[stk[j].opno].clbr)) { 491 /* Found. Make a good diagnostic. */ 492 if (j == stktop-2) { 493 /* 494 * Check for special case \fx..\fR and don't 495 * complain. 496 */ 497 if (stk[j+1].opno==FT && stk[j+1].parm!='R' 498 && stk[j+2].opno==FT && stk[j+2].parm=='R') { 499 stktop = j -1; 500 return; 501 } 502 /* 503 * We have two unmatched frobs. Chances are 504 * they were intended to match, so we mention 505 * them together. 506 */ 507 pe(stk[j+1].lno); 508 prop(j+1); 509 printf(" does not match %d: ", stk[j+2].lno); 510 prop(j+2); 511 printf("\n"); 512 } else for (i=j+1; i <= stktop; i++) { 513 complain(i); 514 } 515 stktop = j-1; 516 return; 517 } 518 /* Didn't find one. Throw this away. */ 519 pe(lineno); 520 printf("Unmatched .%s\n", mac); 521} 522 523/* eq: are two strings equal? */ 524static int 525eq(const char *s1, const char *s2) 526{ 527 return (strcmp(s1, s2) == 0); 528} 529 530/* print the first part of an error message, given the line number */ 531static void 532pe(int linen) 533{ 534 if (nfiles > 1) 535 printf("%s: ", cfilename); 536 printf("%d: ", linen); 537} 538 539static void 540checkknown(const char *mac) 541{ 542 543 if (eq(mac, ".")) 544 return; 545 if (binsrch(mac) >= 0) 546 return; 547 if (mac[0] == '\\' && mac[1] == '"') /* comments */ 548 return; 549 550 pe(lineno); 551 printf("Unknown command: .%s\n", mac); 552} 553 554/* 555 * We have a .de xx line in "line". Add xx to the list of known commands. 556 */ 557static void 558addcmd(char *line) 559{ 560 char *mac; 561 562 /* grab the macro being defined */ 563 mac = line+4; 564 while (isspace(*mac)) 565 mac++; 566 if (*mac == 0) { 567 pe(lineno); 568 printf("illegal define: %s\n", line); 569 return; 570 } 571 mac[2] = 0; 572 if (isspace(mac[1]) || mac[1] == '\\') 573 mac[1] = 0; 574 if (ncmds >= MAXCMDS) { 575 printf("Only %d known commands allowed\n", MAXCMDS); 576 exit(1); 577 } 578 addmac(mac); 579} 580 581/* 582 * Add mac to the list. We should really have some kind of tree 583 * structure here but this is a quick-and-dirty job and I just don't 584 * have time to mess with it. (I wonder if this will come back to haunt 585 * me someday?) Anyway, I claim that .de is fairly rare in user 586 * nroff programs, and the register loop below is pretty fast. 587 */ 588static void 589addmac(const char *mac) 590{ 591 const char **src, **dest, **loc; 592 593 if (binsrch(mac) >= 0){ /* it's OK to redefine something */ 594#ifdef DEBUG 595 printf("binsrch(%s) -> already in table\n", mac); 596#endif 597 return; 598 } 599 /* binsrch sets slot as a side effect */ 600#ifdef DEBUG 601 printf("binsrch(%s) -> %d\n", mac, slot); 602#endif 603 loc = &knowncmds[slot]; 604 src = &knowncmds[ncmds-1]; 605 dest = src+1; 606 while (dest > loc) 607 *dest-- = *src--; 608 if ((*loc = strdup(mac)) == NULL) 609 err(1, "strdup"); 610 ncmds++; 611#ifdef DEBUG 612 printf("after: %s %s %s %s %s, %d cmds\n", 613 knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], 614 knowncmds[slot+1], knowncmds[slot+2], ncmds); 615#endif 616} 617 618/* 619 * Do a binary search in knowncmds for mac. 620 * If found, return the index. If not, return -1. 621 */ 622static int 623binsrch(const char *mac) 624{ 625 const char *p; /* pointer to current cmd in list */ 626 int d; /* difference if any */ 627 int mid; /* mid point in binary search */ 628 int top, bot; /* boundaries of bin search, inclusive */ 629 630 top = ncmds-1; 631 bot = 0; 632 while (top >= bot) { 633 mid = (top+bot)/2; 634 p = knowncmds[mid]; 635 d = p[0] - mac[0]; 636 if (d == 0) 637 d = p[1] - mac[1]; 638 if (d == 0) 639 return (mid); 640 if (d < 0) 641 bot = mid + 1; 642 else 643 top = mid - 1; 644 } 645 slot = bot; /* place it would have gone */ 646 return (-1); 647} 648