1228072Sbapt/* filter - postprocessing of flex output through filters */ 2228072Sbapt 3228072Sbapt/* This file is part of flex. */ 4228072Sbapt 5228072Sbapt/* Redistribution and use in source and binary forms, with or without */ 6228072Sbapt/* modification, are permitted provided that the following conditions */ 7228072Sbapt/* are met: */ 8228072Sbapt 9228072Sbapt/* 1. Redistributions of source code must retain the above copyright */ 10228072Sbapt/* notice, this list of conditions and the following disclaimer. */ 11228072Sbapt/* 2. Redistributions in binary form must reproduce the above copyright */ 12228072Sbapt/* notice, this list of conditions and the following disclaimer in the */ 13228072Sbapt/* documentation and/or other materials provided with the distribution. */ 14228072Sbapt 15228072Sbapt/* Neither the name of the University nor the names of its contributors */ 16228072Sbapt/* may be used to endorse or promote products derived from this software */ 17228072Sbapt/* without specific prior written permission. */ 18228072Sbapt 19228072Sbapt/* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ 20228072Sbapt/* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ 21228072Sbapt/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ 22228072Sbapt/* PURPOSE. */ 23228072Sbapt 24228072Sbapt#include "flexdef.h" 25228072Sbaptstatic const char * check_4_gnu_m4 = 26228072Sbapt "m4_dnl ifdef(`__gnu__', ," 27228072Sbapt "`errprint(Flex requires GNU M4. Set the PATH or set the M4 environment variable to its path name.)" 28228072Sbapt " m4exit(2)')\n"; 29228072Sbapt 30228072Sbapt 31228072Sbapt/** global chain. */ 32228072Sbaptstruct filter *output_chain = NULL; 33228072Sbapt 34228072Sbapt/* Allocate and initialize an external filter. 35228072Sbapt * @param chain the current chain or NULL for new chain 36228072Sbapt * @param cmd the command to execute. 37228072Sbapt * @param ... a NULL terminated list of (const char*) arguments to command, 38228072Sbapt * not including argv[0]. 39228072Sbapt * @return newest filter in chain 40228072Sbapt */ 41228072Sbaptstruct filter *filter_create_ext (struct filter *chain, const char *cmd, 42228072Sbapt ...) 43228072Sbapt{ 44228072Sbapt struct filter *f; 45228072Sbapt int max_args; 46228072Sbapt const char *s; 47228072Sbapt va_list ap; 48228072Sbapt 49228072Sbapt /* allocate and initialize new filter */ 50228072Sbapt f = (struct filter *) flex_alloc (sizeof (struct filter)); 51250125Sjkim if (!f) 52250125Sjkim flexerror (_("flex_alloc failed (f) in filter_create_ext")); 53228072Sbapt memset (f, 0, sizeof (*f)); 54228072Sbapt f->filter_func = NULL; 55228072Sbapt f->extra = NULL; 56228072Sbapt f->next = NULL; 57228072Sbapt f->argc = 0; 58228072Sbapt 59228072Sbapt if (chain != NULL) { 60228072Sbapt /* append f to end of chain */ 61228072Sbapt while (chain->next) 62228072Sbapt chain = chain->next; 63228072Sbapt chain->next = f; 64228072Sbapt } 65228072Sbapt 66228072Sbapt 67228072Sbapt /* allocate argv, and populate it with the argument list. */ 68228072Sbapt max_args = 8; 69228072Sbapt f->argv = 70228072Sbapt (const char **) flex_alloc (sizeof (char *) * 71228072Sbapt (max_args + 1)); 72250125Sjkim if (!f->argv) 73250125Sjkim flexerror (_("flex_alloc failed (f->argv) in filter_create_ext")); 74228072Sbapt f->argv[f->argc++] = cmd; 75228072Sbapt 76228072Sbapt va_start (ap, cmd); 77228072Sbapt while ((s = va_arg (ap, const char *)) != NULL) { 78228072Sbapt if (f->argc >= max_args) { 79228072Sbapt max_args += 8; 80228072Sbapt f->argv = 81228072Sbapt (const char **) flex_realloc (f->argv, 82228072Sbapt sizeof (char 83228072Sbapt *) * 84228072Sbapt (max_args + 85228072Sbapt 1)); 86228072Sbapt } 87228072Sbapt f->argv[f->argc++] = s; 88228072Sbapt } 89228072Sbapt f->argv[f->argc] = NULL; 90228072Sbapt 91228072Sbapt va_end (ap); 92228072Sbapt return f; 93228072Sbapt} 94228072Sbapt 95228072Sbapt/* Allocate and initialize an internal filter. 96228072Sbapt * @param chain the current chain or NULL for new chain 97228072Sbapt * @param filter_func The function that will perform the filtering. 98228072Sbapt * filter_func should return 0 if successful, and -1 99228072Sbapt * if an error occurs -- or it can simply exit(). 100228072Sbapt * @param extra optional user-defined data to pass to the filter. 101228072Sbapt * @return newest filter in chain 102228072Sbapt */ 103228072Sbaptstruct filter *filter_create_int (struct filter *chain, 104228072Sbapt int (*filter_func) (struct filter *), 105228072Sbapt void *extra) 106228072Sbapt{ 107228072Sbapt struct filter *f; 108228072Sbapt 109228072Sbapt /* allocate and initialize new filter */ 110228072Sbapt f = (struct filter *) flex_alloc (sizeof (struct filter)); 111250125Sjkim if (!f) 112250125Sjkim flexerror (_("flex_alloc failed in filter_create_int")); 113228072Sbapt memset (f, 0, sizeof (*f)); 114228072Sbapt f->next = NULL; 115228072Sbapt f->argc = 0; 116228072Sbapt f->argv = NULL; 117228072Sbapt 118228072Sbapt f->filter_func = filter_func; 119228072Sbapt f->extra = extra; 120228072Sbapt 121228072Sbapt if (chain != NULL) { 122228072Sbapt /* append f to end of chain */ 123228072Sbapt while (chain->next) 124228072Sbapt chain = chain->next; 125228072Sbapt chain->next = f; 126228072Sbapt } 127228072Sbapt 128228072Sbapt return f; 129228072Sbapt} 130228072Sbapt 131228072Sbapt/** Fork and exec entire filter chain. 132228072Sbapt * @param chain The head of the chain. 133228072Sbapt * @return true on success. 134228072Sbapt */ 135228072Sbaptbool filter_apply_chain (struct filter * chain) 136228072Sbapt{ 137228072Sbapt int pid, pipes[2]; 138228072Sbapt 139250125Sjkim 140228072Sbapt /* Tricky recursion, since we want to begin the chain 141228072Sbapt * at the END. Why? Because we need all the forked processes 142228072Sbapt * to be children of the main flex process. 143228072Sbapt */ 144228072Sbapt if (chain) 145228072Sbapt filter_apply_chain (chain->next); 146228072Sbapt else 147228072Sbapt return true; 148228072Sbapt 149228072Sbapt /* Now we are the right-most unprocessed link in the chain. 150228072Sbapt */ 151228072Sbapt 152228072Sbapt fflush (stdout); 153228072Sbapt fflush (stderr); 154228072Sbapt 155250125Sjkim 156228072Sbapt if (pipe (pipes) == -1) 157228072Sbapt flexerror (_("pipe failed")); 158228072Sbapt 159228072Sbapt if ((pid = fork ()) == -1) 160228072Sbapt flexerror (_("fork failed")); 161228072Sbapt 162228072Sbapt if (pid == 0) { 163228072Sbapt /* child */ 164228072Sbapt 165228072Sbapt /* We need stdin (the FILE* stdin) to connect to this new pipe. 166228072Sbapt * There is no portable way to set stdin to a new file descriptor, 167228072Sbapt * as stdin is not an lvalue on some systems (BSD). 168228072Sbapt * So we dup the new pipe onto the stdin descriptor and use a no-op fseek 169228072Sbapt * to sync the stream. This is a Hail Mary situation. It seems to work. 170228072Sbapt */ 171228072Sbapt close (pipes[1]); 172250125Sjkimclearerr(stdin); 173228072Sbapt if (dup2 (pipes[0], fileno (stdin)) == -1) 174228072Sbapt flexfatal (_("dup2(pipes[0],0)")); 175228072Sbapt close (pipes[0]); 176228072Sbapt fseek (stdin, 0, SEEK_CUR); 177228072Sbapt 178228072Sbapt /* run as a filter, either internally or by exec */ 179228072Sbapt if (chain->filter_func) { 180228072Sbapt int r; 181228072Sbapt 182228072Sbapt if ((r = chain->filter_func (chain)) == -1) 183228072Sbapt flexfatal (_("filter_func failed")); 184228072Sbapt exit (0); 185228072Sbapt } 186228072Sbapt else { 187228072Sbapt execvp (chain->argv[0], 188228072Sbapt (char **const) (chain->argv)); 189250125Sjkim lerrsf_fatal ( _("exec of %s failed"), 190250125Sjkim chain->argv[0]); 191228072Sbapt } 192228072Sbapt 193228072Sbapt exit (1); 194228072Sbapt } 195228072Sbapt 196228072Sbapt /* Parent */ 197228072Sbapt close (pipes[0]); 198228072Sbapt if (dup2 (pipes[1], fileno (stdout)) == -1) 199228072Sbapt flexfatal (_("dup2(pipes[1],1)")); 200228072Sbapt close (pipes[1]); 201228072Sbapt fseek (stdout, 0, SEEK_CUR); 202228072Sbapt 203228072Sbapt return true; 204228072Sbapt} 205228072Sbapt 206228072Sbapt/** Truncate the chain to max_len number of filters. 207228072Sbapt * @param chain the current chain. 208228072Sbapt * @param max_len the maximum length of the chain. 209228072Sbapt * @return the resulting length of the chain. 210228072Sbapt */ 211228072Sbaptint filter_truncate (struct filter *chain, int max_len) 212228072Sbapt{ 213228072Sbapt int len = 1; 214228072Sbapt 215228072Sbapt if (!chain) 216228072Sbapt return 0; 217228072Sbapt 218228072Sbapt while (chain->next && len < max_len) { 219228072Sbapt chain = chain->next; 220228072Sbapt ++len; 221228072Sbapt } 222228072Sbapt 223228072Sbapt chain->next = NULL; 224228072Sbapt return len; 225228072Sbapt} 226228072Sbapt 227228072Sbapt/** Splits the chain in order to write to a header file. 228228072Sbapt * Similar in spirit to the 'tee' program. 229228072Sbapt * The header file name is in extra. 230228072Sbapt * @return 0 (zero) on success, and -1 on failure. 231228072Sbapt */ 232228072Sbaptint filter_tee_header (struct filter *chain) 233228072Sbapt{ 234228072Sbapt /* This function reads from stdin and writes to both the C file and the 235228072Sbapt * header file at the same time. 236228072Sbapt */ 237228072Sbapt 238228072Sbapt const int readsz = 512; 239228072Sbapt char *buf; 240228072Sbapt int to_cfd = -1; 241228072Sbapt FILE *to_c = NULL, *to_h = NULL; 242228072Sbapt bool write_header; 243228072Sbapt 244228072Sbapt write_header = (chain->extra != NULL); 245228072Sbapt 246228072Sbapt /* Store a copy of the stdout pipe, which is already piped to C file 247228072Sbapt * through the running chain. Then create a new pipe to the H file as 248228072Sbapt * stdout, and fork the rest of the chain again. 249228072Sbapt */ 250228072Sbapt 251228072Sbapt if ((to_cfd = dup (1)) == -1) 252228072Sbapt flexfatal (_("dup(1) failed")); 253228072Sbapt to_c = fdopen (to_cfd, "w"); 254228072Sbapt 255228072Sbapt if (write_header) { 256228072Sbapt if (freopen ((char *) chain->extra, "w", stdout) == NULL) 257228072Sbapt flexfatal (_("freopen(headerfilename) failed")); 258228072Sbapt 259228072Sbapt filter_apply_chain (chain->next); 260228072Sbapt to_h = stdout; 261228072Sbapt } 262228072Sbapt 263228072Sbapt /* Now to_c is a pipe to the C branch, and to_h is a pipe to the H branch. 264228072Sbapt */ 265228072Sbapt 266228072Sbapt if (write_header) { 267228072Sbapt fputs (check_4_gnu_m4, to_h); 268228072Sbapt fputs ("m4_changecom`'m4_dnl\n", to_h); 269228072Sbapt fputs ("m4_changequote`'m4_dnl\n", to_h); 270228072Sbapt fputs ("m4_changequote([[,]])[[]]m4_dnl\n", to_h); 271228072Sbapt fputs ("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_h); 272228072Sbapt fputs ("m4_define( [[M4_YY_IN_HEADER]],[[]])m4_dnl\n", 273228072Sbapt to_h); 274228072Sbapt fprintf (to_h, "#ifndef %sHEADER_H\n", prefix); 275228072Sbapt fprintf (to_h, "#define %sHEADER_H 1\n", prefix); 276228072Sbapt fprintf (to_h, "#define %sIN_HEADER 1\n\n", prefix); 277228072Sbapt fprintf (to_h, 278228072Sbapt "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n", 279228072Sbapt headerfilename ? headerfilename : "<stdout>"); 280228072Sbapt 281228072Sbapt } 282228072Sbapt 283228072Sbapt fputs (check_4_gnu_m4, to_c); 284228072Sbapt fputs ("m4_changecom`'m4_dnl\n", to_c); 285228072Sbapt fputs ("m4_changequote`'m4_dnl\n", to_c); 286228072Sbapt fputs ("m4_changequote([[,]])[[]]m4_dnl\n", to_c); 287228072Sbapt fputs ("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_c); 288228072Sbapt fprintf (to_c, "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n", 289228072Sbapt outfilename ? outfilename : "<stdout>"); 290228072Sbapt 291228072Sbapt buf = (char *) flex_alloc (readsz); 292250125Sjkim if (!buf) 293250125Sjkim flexerror (_("flex_alloc failed in filter_tee_header")); 294228072Sbapt while (fgets (buf, readsz, stdin)) { 295228072Sbapt fputs (buf, to_c); 296228072Sbapt if (write_header) 297228072Sbapt fputs (buf, to_h); 298228072Sbapt } 299228072Sbapt 300228072Sbapt if (write_header) { 301228072Sbapt fprintf (to_h, "\n"); 302228072Sbapt 303228072Sbapt /* write a fake line number. It will get fixed by the linedir filter. */ 304228072Sbapt fprintf (to_h, "#line 4000 \"M4_YY_OUTFILE_NAME\"\n"); 305228072Sbapt 306228072Sbapt fprintf (to_h, "#undef %sIN_HEADER\n", prefix); 307228072Sbapt fprintf (to_h, "#endif /* %sHEADER_H */\n", prefix); 308228072Sbapt fputs ("m4_undefine( [[M4_YY_IN_HEADER]])m4_dnl\n", to_h); 309228072Sbapt 310228072Sbapt fflush (to_h); 311250125Sjkim if (ferror (to_h)) 312250125Sjkim lerrsf (_("error writing output file %s"), 313250125Sjkim (char *) chain->extra); 314228072Sbapt 315250125Sjkim else if (fclose (to_h)) 316250125Sjkim lerrsf (_("error closing output file %s"), 317250125Sjkim (char *) chain->extra); 318228072Sbapt } 319228072Sbapt 320228072Sbapt fflush (to_c); 321228072Sbapt if (ferror (to_c)) 322228072Sbapt lerrsf (_("error writing output file %s"), 323228072Sbapt outfilename ? outfilename : "<stdout>"); 324228072Sbapt 325228072Sbapt else if (fclose (to_c)) 326228072Sbapt lerrsf (_("error closing output file %s"), 327228072Sbapt outfilename ? outfilename : "<stdout>"); 328228072Sbapt 329228072Sbapt while (wait (0) > 0) ; 330228072Sbapt 331228072Sbapt exit (0); 332228072Sbapt return 0; 333228072Sbapt} 334228072Sbapt 335228072Sbapt/** Adjust the line numbers in the #line directives of the generated scanner. 336228072Sbapt * After the m4 expansion, the line numbers are incorrect since the m4 macros 337228072Sbapt * can add or remove lines. This only adjusts line numbers for generated code, 338228072Sbapt * not user code. This also happens to be a good place to squeeze multiple 339228072Sbapt * blank lines into a single blank line. 340228072Sbapt */ 341228072Sbaptint filter_fix_linedirs (struct filter *chain) 342228072Sbapt{ 343228072Sbapt char *buf; 344228072Sbapt const int readsz = 512; 345228072Sbapt int lineno = 1; 346228072Sbapt bool in_gen = true; /* in generated code */ 347228072Sbapt bool last_was_blank = false; 348228072Sbapt 349228072Sbapt if (!chain) 350228072Sbapt return 0; 351228072Sbapt 352228072Sbapt buf = (char *) flex_alloc (readsz); 353250125Sjkim if (!buf) 354250125Sjkim flexerror (_("flex_alloc failed in filter_fix_linedirs")); 355228072Sbapt 356228072Sbapt while (fgets (buf, readsz, stdin)) { 357228072Sbapt 358228072Sbapt regmatch_t m[10]; 359228072Sbapt 360228072Sbapt /* Check for #line directive. */ 361228072Sbapt if (buf[0] == '#' 362250125Sjkim && regexec (®ex_linedir, buf, 3, m, 0) == 0) { 363228072Sbapt 364228072Sbapt int num; 365228072Sbapt char *fname; 366228072Sbapt 367228072Sbapt /* extract the line number and filename */ 368228072Sbapt num = regmatch_strtol (&m[1], buf, NULL, 0); 369228072Sbapt fname = regmatch_dup (&m[2], buf); 370228072Sbapt 371228072Sbapt if (strcmp (fname, 372228072Sbapt outfilename ? outfilename : "<stdout>") 373228072Sbapt == 0 374228072Sbapt || strcmp (fname, 375228072Sbapt headerfilename ? headerfilename : "<stdout>") 376228072Sbapt == 0) { 377228072Sbapt 378228072Sbapt char *s1, *s2; 379228072Sbapt char filename[MAXLINE]; 380228072Sbapt 381228072Sbapt s1 = fname; 382228072Sbapt s2 = filename; 383228072Sbapt 384228072Sbapt while ((s2 - filename) < (MAXLINE - 1) && *s1) { 385228072Sbapt /* Escape the backslash */ 386228072Sbapt if (*s1 == '\\') 387228072Sbapt *s2++ = '\\'; 388228072Sbapt /* Escape the double quote */ 389228072Sbapt if (*s1 == '\"') 390228072Sbapt *s2++ = '\\'; 391228072Sbapt /* Copy the character as usual */ 392228072Sbapt *s2++ = *s1++; 393228072Sbapt } 394228072Sbapt 395228072Sbapt *s2 = '\0'; 396228072Sbapt 397228072Sbapt /* Adjust the line directives. */ 398228072Sbapt in_gen = true; 399228072Sbapt snprintf (buf, readsz, "#line %d \"%s\"\n", 400228072Sbapt lineno + 1, filename); 401228072Sbapt } 402228072Sbapt else { 403228072Sbapt /* it's a #line directive for code we didn't write */ 404228072Sbapt in_gen = false; 405228072Sbapt } 406228072Sbapt 407228072Sbapt free (fname); 408228072Sbapt last_was_blank = false; 409228072Sbapt } 410228072Sbapt 411228072Sbapt /* squeeze blank lines from generated code */ 412228072Sbapt else if (in_gen 413228072Sbapt && regexec (®ex_blank_line, buf, 0, NULL, 414228072Sbapt 0) == 0) { 415228072Sbapt if (last_was_blank) 416228072Sbapt continue; 417228072Sbapt else 418228072Sbapt last_was_blank = true; 419228072Sbapt } 420228072Sbapt 421228072Sbapt else { 422228072Sbapt /* it's a line of normal, non-empty code. */ 423228072Sbapt last_was_blank = false; 424228072Sbapt } 425228072Sbapt 426228072Sbapt fputs (buf, stdout); 427228072Sbapt lineno++; 428228072Sbapt } 429228072Sbapt fflush (stdout); 430228072Sbapt if (ferror (stdout)) 431228072Sbapt lerrsf (_("error writing output file %s"), 432228072Sbapt outfilename ? outfilename : "<stdout>"); 433228072Sbapt 434228072Sbapt else if (fclose (stdout)) 435228072Sbapt lerrsf (_("error closing output file %s"), 436228072Sbapt outfilename ? outfilename : "<stdout>"); 437228072Sbapt 438228072Sbapt return 0; 439228072Sbapt} 440228072Sbapt 441228072Sbapt/* vim:set expandtab cindent tabstop=4 softtabstop=4 shiftwidth=4 textwidth=0: */ 442