1/* diff - compare files line by line
2
3   Copyright (C) 1988, 1989, 1992, 1993, 1994, 1996, 1998, 2001, 2002,
4   2004 Free Software Foundation, Inc.
5
6   This file is part of GNU DIFF.
7
8   GNU DIFF is free software; you can redistribute it and/or modify
9   it under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2, or (at your option)
11   any later version.
12
13   GNU DIFF is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16   See the GNU General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with GNU DIFF; see the file COPYING.
20   If not, write to the Free Software Foundation,
21   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
22
23#define GDIFF_MAIN
24#include "diff.h"
25#include "paths.h"
26#include <c-stack.h>
27#include <dirname.h>
28#include <error.h>
29#include <exclude.h>
30#include <exit.h>
31#include <exitfail.h>
32#include <file-type.h>
33#include <fnmatch.h>
34#include <getopt.h>
35#include <hard-locale.h>
36#include <posixver.h>
37#include <prepargs.h>
38#include <quotesys.h>
39#include <setmode.h>
40#include <version-etc.h>
41#include <xalloc.h>
42
43#ifndef GUTTER_WIDTH_MINIMUM
44# define GUTTER_WIDTH_MINIMUM 3
45#endif
46
47struct regexp_list
48{
49  char *regexps;	/* chars representing disjunction of the regexps */
50  size_t len;		/* chars used in `regexps' */
51  size_t size;		/* size malloc'ed for `regexps'; 0 if not malloc'ed */
52  bool multiple_regexps;/* Does `regexps' represent a disjunction?  */
53  struct re_pattern_buffer *buf;
54};
55
56static int compare_files (struct comparison const *, char const *, char const *);
57static void add_regexp (struct regexp_list *, char const *);
58static void summarize_regexp_list (struct regexp_list *);
59static void specify_style (enum output_style);
60static void specify_value (char const **, char const *, char const *);
61static void try_help (char const *, char const *) __attribute__((noreturn));
62static void check_stdout (void);
63static void usage (void);
64
65/* If comparing directories, compare their common subdirectories
66   recursively.  */
67static bool recursive;
68
69/* In context diffs, show previous lines that match these regexps.  */
70static struct regexp_list function_regexp_list;
71
72/* Ignore changes affecting only lines that match these regexps.  */
73static struct regexp_list ignore_regexp_list;
74
75#if HAVE_SETMODE_DOS
76/* Use binary I/O when reading and writing data (--binary).
77   On POSIX hosts, this has no effect.  */
78static bool binary;
79#else
80enum { binary = true };
81#endif
82
83/* When comparing directories, if a file appears only in one
84   directory, treat it as present but empty in the other (-N).
85   Then `patch' would create the file with appropriate contents.  */
86static bool new_file;
87
88/* When comparing directories, if a file appears only in the second
89   directory of the two, treat it as present but empty in the other
90   (--unidirectional-new-file).
91   Then `patch' would create the file with appropriate contents.  */
92static bool unidirectional_new_file;
93
94/* Report files compared that are the same (-s).
95   Normally nothing is output when that happens.  */
96static bool report_identical_files;
97
98
99/* Return a string containing the command options with which diff was invoked.
100   Spaces appear between what were separate ARGV-elements.
101   There is a space at the beginning but none at the end.
102   If there were no options, the result is an empty string.
103
104   Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT,
105   the length of that vector.  */
106
107static char *
108option_list (char **optionvec, int count)
109{
110  int i;
111  size_t size = 1;
112  char *result;
113  char *p;
114
115  for (i = 0; i < count; i++)
116    size += 1 + quote_system_arg ((char *) 0, optionvec[i]);
117
118  p = result = xmalloc (size);
119
120  for (i = 0; i < count; i++)
121    {
122      *p++ = ' ';
123      p += quote_system_arg (p, optionvec[i]);
124    }
125
126  *p = 0;
127  return result;
128}
129
130
131/* Return an option value suitable for add_exclude.  */
132
133static int
134exclude_options (void)
135{
136  return EXCLUDE_WILDCARDS | (ignore_file_name_case ? FNM_CASEFOLD : 0);
137}
138
139static char const shortopts[] =
140"0123456789abBcC:dD:eEfF:hHiI:lL:nNopPqrsS:tTuU:vwW:x:X:y";
141
142/* Values for long options that do not have single-letter equivalents.  */
143enum
144{
145  BINARY_OPTION = CHAR_MAX + 1,
146  FROM_FILE_OPTION,
147  HELP_OPTION,
148  HORIZON_LINES_OPTION,
149  IGNORE_FILE_NAME_CASE_OPTION,
150  INHIBIT_HUNK_MERGE_OPTION,
151  LEFT_COLUMN_OPTION,
152  LINE_FORMAT_OPTION,
153  NO_IGNORE_FILE_NAME_CASE_OPTION,
154  NORMAL_OPTION,
155  SDIFF_MERGE_ASSIST_OPTION,
156  STRIP_TRAILING_CR_OPTION,
157  SUPPRESS_COMMON_LINES_OPTION,
158  TABSIZE_OPTION,
159  TO_FILE_OPTION,
160
161  /* These options must be in sequence.  */
162  UNCHANGED_LINE_FORMAT_OPTION,
163  OLD_LINE_FORMAT_OPTION,
164  NEW_LINE_FORMAT_OPTION,
165
166  /* These options must be in sequence.  */
167  UNCHANGED_GROUP_FORMAT_OPTION,
168  OLD_GROUP_FORMAT_OPTION,
169  NEW_GROUP_FORMAT_OPTION,
170  CHANGED_GROUP_FORMAT_OPTION
171};
172
173static char const group_format_option[][sizeof "--unchanged-group-format"] =
174  {
175    "--unchanged-group-format",
176    "--old-group-format",
177    "--new-group-format",
178    "--changed-group-format"
179  };
180
181static char const line_format_option[][sizeof "--unchanged-line-format"] =
182  {
183    "--unchanged-line-format",
184    "--old-line-format",
185    "--new-line-format"
186  };
187
188static struct option const longopts[] =
189{
190  {"binary", 0, 0, BINARY_OPTION},
191  {"brief", 0, 0, 'q'},
192  {"changed-group-format", 1, 0, CHANGED_GROUP_FORMAT_OPTION},
193  {"context", 2, 0, 'C'},
194  {"ed", 0, 0, 'e'},
195  {"exclude", 1, 0, 'x'},
196  {"exclude-from", 1, 0, 'X'},
197  {"expand-tabs", 0, 0, 't'},
198  {"forward-ed", 0, 0, 'f'},
199  {"from-file", 1, 0, FROM_FILE_OPTION},
200  {"help", 0, 0, HELP_OPTION},
201  {"horizon-lines", 1, 0, HORIZON_LINES_OPTION},
202  {"ifdef", 1, 0, 'D'},
203  {"ignore-all-space", 0, 0, 'w'},
204  {"ignore-blank-lines", 0, 0, 'B'},
205  {"ignore-case", 0, 0, 'i'},
206  {"ignore-file-name-case", 0, 0, IGNORE_FILE_NAME_CASE_OPTION},
207  {"ignore-matching-lines", 1, 0, 'I'},
208  {"ignore-space-change", 0, 0, 'b'},
209  {"ignore-tab-expansion", 0, 0, 'E'},
210  {"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION},
211  {"initial-tab", 0, 0, 'T'},
212  {"label", 1, 0, 'L'},
213  {"left-column", 0, 0, LEFT_COLUMN_OPTION},
214  {"line-format", 1, 0, LINE_FORMAT_OPTION},
215  {"minimal", 0, 0, 'd'},
216  {"new-file", 0, 0, 'N'},
217  {"new-group-format", 1, 0, NEW_GROUP_FORMAT_OPTION},
218  {"new-line-format", 1, 0, NEW_LINE_FORMAT_OPTION},
219  {"no-ignore-file-name-case", 0, 0, NO_IGNORE_FILE_NAME_CASE_OPTION},
220  {"normal", 0, 0, NORMAL_OPTION},
221  {"old-group-format", 1, 0, OLD_GROUP_FORMAT_OPTION},
222  {"old-line-format", 1, 0, OLD_LINE_FORMAT_OPTION},
223  {"paginate", 0, 0, 'l'},
224  {"rcs", 0, 0, 'n'},
225  {"recursive", 0, 0, 'r'},
226  {"report-identical-files", 0, 0, 's'},
227  {"sdiff-merge-assist", 0, 0, SDIFF_MERGE_ASSIST_OPTION},
228  {"show-c-function", 0, 0, 'p'},
229  {"show-function-line", 1, 0, 'F'},
230  {"side-by-side", 0, 0, 'y'},
231  {"speed-large-files", 0, 0, 'H'},
232  {"starting-file", 1, 0, 'S'},
233  {"strip-trailing-cr", 0, 0, STRIP_TRAILING_CR_OPTION},
234  {"suppress-common-lines", 0, 0, SUPPRESS_COMMON_LINES_OPTION},
235  {"tabsize", 1, 0, TABSIZE_OPTION},
236  {"text", 0, 0, 'a'},
237  {"to-file", 1, 0, TO_FILE_OPTION},
238  {"unchanged-group-format", 1, 0, UNCHANGED_GROUP_FORMAT_OPTION},
239  {"unchanged-line-format", 1, 0, UNCHANGED_LINE_FORMAT_OPTION},
240  {"unidirectional-new-file", 0, 0, 'P'},
241  {"unified", 2, 0, 'U'},
242  {"version", 0, 0, 'v'},
243  {"width", 1, 0, 'W'},
244  {0, 0, 0, 0}
245};
246
247int
248main (int argc, char **argv)
249{
250  int exit_status = EXIT_SUCCESS;
251  int c;
252  int i;
253  int prev = -1;
254  lin ocontext = -1;
255  bool explicit_context = false;
256  size_t width = 0;
257  bool show_c_function = false;
258  char const *from_file = 0;
259  char const *to_file = 0;
260  uintmax_t numval;
261  char *numend;
262
263  /* Do our initializations.  */
264  exit_failure = 2;
265  initialize_main (&argc, &argv);
266  program_name = argv[0];
267  setlocale (LC_ALL, "");
268  textdomain (PACKAGE);
269  c_stack_action (0);
270  function_regexp_list.buf = &function_regexp;
271  ignore_regexp_list.buf = &ignore_regexp;
272  re_set_syntax (RE_SYNTAX_GREP);
273  excluded = new_exclude ();
274
275  prepend_default_options (getenv ("DIFF_OPTIONS"), &argc, &argv);
276
277  /* Decode the options.  */
278
279  while ((c = getopt_long (argc, argv, shortopts, longopts, 0)) != -1)
280    {
281      switch (c)
282	{
283	case 0:
284	  break;
285
286	case '0':
287	case '1':
288	case '2':
289	case '3':
290	case '4':
291	case '5':
292	case '6':
293	case '7':
294	case '8':
295	case '9':
296	  if (! ISDIGIT (prev))
297	    ocontext = c - '0';
298	  else if (LIN_MAX / 10 < ocontext
299		   || ((ocontext = 10 * ocontext + c - '0') < 0))
300	    ocontext = LIN_MAX;
301	  break;
302
303	case 'a':
304	  text = true;
305	  break;
306
307	case 'b':
308	  if (ignore_white_space < IGNORE_SPACE_CHANGE)
309	    ignore_white_space = IGNORE_SPACE_CHANGE;
310	  break;
311
312	case 'B':
313	  ignore_blank_lines = true;
314	  break;
315
316	case 'C':
317	case 'U':
318	  {
319	    if (optarg)
320	      {
321		numval = strtoumax (optarg, &numend, 10);
322		if (*numend)
323		  try_help ("invalid context length `%s'", optarg);
324		if (LIN_MAX < numval)
325		  numval = LIN_MAX;
326	      }
327	    else
328	      numval = 3;
329
330	    specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
331	    if (context < numval)
332	      context = numval;
333	    explicit_context = true;
334	  }
335	  break;
336
337	case 'c':
338	  specify_style (OUTPUT_CONTEXT);
339	  if (context < 3)
340	    context = 3;
341	  break;
342
343	case 'd':
344	  minimal = true;
345	  break;
346
347	case 'D':
348	  specify_style (OUTPUT_IFDEF);
349	  {
350	    static char const C_ifdef_group_formats[] =
351	      "%%=%c#ifndef %s\n%%<#endif /* ! %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n";
352	    char *b = xmalloc (sizeof C_ifdef_group_formats
353			       + 7 * strlen (optarg) - 14 /* 7*"%s" */
354			       - 8 /* 5*"%%" + 3*"%c" */);
355	    sprintf (b, C_ifdef_group_formats,
356		     0,
357		     optarg, optarg, 0,
358		     optarg, optarg, 0,
359		     optarg, optarg, optarg);
360	    for (i = 0; i < sizeof group_format / sizeof *group_format; i++)
361	      {
362		specify_value (&group_format[i], b, "-D");
363		b += strlen (b) + 1;
364	      }
365	  }
366	  break;
367
368	case 'e':
369	  specify_style (OUTPUT_ED);
370	  break;
371
372	case 'E':
373	  if (ignore_white_space < IGNORE_TAB_EXPANSION)
374	    ignore_white_space = IGNORE_TAB_EXPANSION;
375	  break;
376
377	case 'f':
378	  specify_style (OUTPUT_FORWARD_ED);
379	  break;
380
381	case 'F':
382	  add_regexp (&function_regexp_list, optarg);
383	  break;
384
385	case 'h':
386	  /* Split the files into chunks for faster processing.
387	     Usually does not change the result.
388
389	     This currently has no effect.  */
390	  break;
391
392	case 'H':
393	  speed_large_files = true;
394	  break;
395
396	case 'i':
397	  ignore_case = true;
398	  break;
399
400	case 'I':
401	  add_regexp (&ignore_regexp_list, optarg);
402	  break;
403
404	case 'l':
405	  if (!pr_program[0])
406	    try_help ("pagination not supported on this host", 0);
407	  paginate = true;
408#ifdef SIGCHLD
409	  /* Pagination requires forking and waiting, and
410	     System V fork+wait does not work if SIGCHLD is ignored.  */
411	  signal (SIGCHLD, SIG_DFL);
412#endif
413	  break;
414
415	case 'L':
416	  if (!file_label[0])
417	    file_label[0] = optarg;
418	  else if (!file_label[1])
419	    file_label[1] = optarg;
420	  else
421	    fatal ("too many file label options");
422	  break;
423
424	case 'n':
425	  specify_style (OUTPUT_RCS);
426	  break;
427
428	case 'N':
429	  new_file = true;
430	  break;
431
432	case 'o':
433	  /* Output in the old tradition style.  */
434	  specify_style (OUTPUT_NORMAL);
435	  break;
436
437	case 'p':
438	  show_c_function = true;
439	  add_regexp (&function_regexp_list, "^[[:alpha:]$_]");
440	  break;
441
442	case 'P':
443	  unidirectional_new_file = true;
444	  break;
445
446	case 'q':
447	  brief = true;
448	  break;
449
450	case 'r':
451	  recursive = true;
452	  break;
453
454	case 's':
455	  report_identical_files = true;
456	  break;
457
458	case 'S':
459	  specify_value (&starting_file, optarg, "-S");
460	  break;
461
462	case 't':
463	  expand_tabs = true;
464	  break;
465
466	case 'T':
467	  initial_tab = true;
468	  break;
469
470	case 'u':
471	  specify_style (OUTPUT_UNIFIED);
472	  if (context < 3)
473	    context = 3;
474	  break;
475
476	case 'v':
477	  version_etc (stdout, "diff", PACKAGE_NAME, PACKAGE_VERSION,
478		       "Paul Eggert", "Mike Haertel", "David Hayes",
479		       "Richard Stallman", "Len Tower", (char *) 0);
480	  check_stdout ();
481	  return EXIT_SUCCESS;
482
483	case 'w':
484	  ignore_white_space = IGNORE_ALL_SPACE;
485	  break;
486
487	case 'x':
488	  add_exclude (excluded, optarg, exclude_options ());
489	  break;
490
491	case 'X':
492	  if (add_exclude_file (add_exclude, excluded, optarg,
493				exclude_options (), '\n'))
494	    pfatal_with_name (optarg);
495	  break;
496
497	case 'y':
498	  specify_style (OUTPUT_SDIFF);
499	  break;
500
501	case 'W':
502	  numval = strtoumax (optarg, &numend, 10);
503	  if (! (0 < numval && numval <= SIZE_MAX) || *numend)
504	    try_help ("invalid width `%s'", optarg);
505	  if (width != numval)
506	    {
507	      if (width)
508		fatal ("conflicting width options");
509	      width = numval;
510	    }
511	  break;
512
513	case BINARY_OPTION:
514#if HAVE_SETMODE_DOS
515	  binary = true;
516	  set_binary_mode (STDOUT_FILENO, true);
517#endif
518	  break;
519
520	case FROM_FILE_OPTION:
521	  specify_value (&from_file, optarg, "--from-file");
522	  break;
523
524	case HELP_OPTION:
525	  usage ();
526	  check_stdout ();
527	  return EXIT_SUCCESS;
528
529	case HORIZON_LINES_OPTION:
530	  numval = strtoumax (optarg, &numend, 10);
531	  if (*numend)
532	    try_help ("invalid horizon length `%s'", optarg);
533	  horizon_lines = MAX (horizon_lines, MIN (numval, LIN_MAX));
534	  break;
535
536	case IGNORE_FILE_NAME_CASE_OPTION:
537	  ignore_file_name_case = true;
538	  break;
539
540	case INHIBIT_HUNK_MERGE_OPTION:
541	  /* This option is obsolete, but accept it for backward
542             compatibility.  */
543	  break;
544
545	case LEFT_COLUMN_OPTION:
546	  left_column = true;
547	  break;
548
549	case LINE_FORMAT_OPTION:
550	  specify_style (OUTPUT_IFDEF);
551	  for (i = 0; i < sizeof line_format / sizeof *line_format; i++)
552	    specify_value (&line_format[i], optarg, "--line-format");
553	  break;
554
555	case NO_IGNORE_FILE_NAME_CASE_OPTION:
556	  ignore_file_name_case = false;
557	  break;
558
559	case NORMAL_OPTION:
560	  specify_style (OUTPUT_NORMAL);
561	  break;
562
563	case SDIFF_MERGE_ASSIST_OPTION:
564	  specify_style (OUTPUT_SDIFF);
565	  sdiff_merge_assist = true;
566	  break;
567
568	case STRIP_TRAILING_CR_OPTION:
569	  strip_trailing_cr = true;
570	  break;
571
572	case SUPPRESS_COMMON_LINES_OPTION:
573	  suppress_common_lines = true;
574	  break;
575
576	case TABSIZE_OPTION:
577	  numval = strtoumax (optarg, &numend, 10);
578	  if (! (0 < numval && numval <= SIZE_MAX) || *numend)
579	    try_help ("invalid tabsize `%s'", optarg);
580	  if (tabsize != numval)
581	    {
582	      if (tabsize)
583		fatal ("conflicting tabsize options");
584	      tabsize = numval;
585	    }
586	  break;
587
588	case TO_FILE_OPTION:
589	  specify_value (&to_file, optarg, "--to-file");
590	  break;
591
592	case UNCHANGED_LINE_FORMAT_OPTION:
593	case OLD_LINE_FORMAT_OPTION:
594	case NEW_LINE_FORMAT_OPTION:
595	  specify_style (OUTPUT_IFDEF);
596	  c -= UNCHANGED_LINE_FORMAT_OPTION;
597	  specify_value (&line_format[c], optarg, line_format_option[c]);
598	  break;
599
600	case UNCHANGED_GROUP_FORMAT_OPTION:
601	case OLD_GROUP_FORMAT_OPTION:
602	case NEW_GROUP_FORMAT_OPTION:
603	case CHANGED_GROUP_FORMAT_OPTION:
604	  specify_style (OUTPUT_IFDEF);
605	  c -= UNCHANGED_GROUP_FORMAT_OPTION;
606	  specify_value (&group_format[c], optarg, group_format_option[c]);
607	  break;
608
609	default:
610	  try_help (0, 0);
611	}
612      prev = c;
613    }
614
615  if (output_style == OUTPUT_UNSPECIFIED)
616    {
617      if (show_c_function)
618	{
619	  specify_style (OUTPUT_CONTEXT);
620	  if (ocontext < 0)
621	    context = 3;
622	}
623      else
624	specify_style (OUTPUT_NORMAL);
625    }
626
627  if (output_style != OUTPUT_CONTEXT || hard_locale (LC_TIME))
628    {
629#ifdef ST_MTIM_NSEC
630      time_format = "%Y-%m-%d %H:%M:%S.%N %z";
631#else
632      time_format = "%Y-%m-%d %H:%M:%S %z";
633#endif
634    }
635  else
636    {
637      /* See POSIX 1003.1-2001 for this format.  */
638      time_format = "%a %b %e %T %Y";
639    }
640
641  if (0 <= ocontext)
642    {
643      bool modern_usage = 200112 <= posix2_version ();
644
645      if ((output_style == OUTPUT_CONTEXT
646	   || output_style == OUTPUT_UNIFIED)
647	  && (context < ocontext
648	      || (ocontext < context && ! explicit_context)))
649	{
650	  if (modern_usage)
651	    {
652	      error (0, 0,
653		     _("`-%ld' option is obsolete; use `-%c %ld'"),
654		     (long int) ocontext,
655		     output_style == OUTPUT_CONTEXT ? 'C' : 'U',
656		     (long int) ocontext);
657	      try_help (0, 0);
658	    }
659	  context = ocontext;
660	}
661      else
662	{
663	  if (modern_usage)
664	    {
665	      error (0, 0, _("`-%ld' option is obsolete; omit it"),
666		     (long int) ocontext);
667	      try_help (0, 0);
668	    }
669	}
670    }
671
672  if (! tabsize)
673    tabsize = 8;
674  if (! width)
675    width = 130;
676
677  {
678    /* Maximize first the half line width, and then the gutter width,
679       according to the following constraints:
680
681	1.  Two half lines plus a gutter must fit in a line.
682	2.  If the half line width is nonzero:
683	    a.  The gutter width is at least GUTTER_WIDTH_MINIMUM.
684	    b.  If tabs are not expanded to spaces,
685		a half line plus a gutter is an integral number of tabs,
686		so that tabs in the right column line up.  */
687
688    intmax_t t = expand_tabs ? 1 : tabsize;
689    intmax_t w = width;
690    intmax_t off = (w + t + GUTTER_WIDTH_MINIMUM) / (2 * t)  *  t;
691    sdiff_half_width = MAX (0, MIN (off - GUTTER_WIDTH_MINIMUM, w - off)),
692    sdiff_column2_offset = sdiff_half_width ? off : w;
693  }
694
695  /* Make the horizon at least as large as the context, so that
696     shift_boundaries has more freedom to shift the first and last hunks.  */
697  if (horizon_lines < context)
698    horizon_lines = context;
699
700  summarize_regexp_list (&function_regexp_list);
701  summarize_regexp_list (&ignore_regexp_list);
702
703  if (output_style == OUTPUT_IFDEF)
704    {
705      for (i = 0; i < sizeof line_format / sizeof *line_format; i++)
706	if (!line_format[i])
707	  line_format[i] = "%l\n";
708      if (!group_format[OLD])
709	group_format[OLD]
710	  = group_format[CHANGED] ? group_format[CHANGED] : "%<";
711      if (!group_format[NEW])
712	group_format[NEW]
713	  = group_format[CHANGED] ? group_format[CHANGED] : "%>";
714      if (!group_format[UNCHANGED])
715	group_format[UNCHANGED] = "%=";
716      if (!group_format[CHANGED])
717	group_format[CHANGED] = concat (group_format[OLD],
718					group_format[NEW], "");
719    }
720
721  no_diff_means_no_output =
722    (output_style == OUTPUT_IFDEF ?
723      (!*group_format[UNCHANGED]
724       || (strcmp (group_format[UNCHANGED], "%=") == 0
725	   && !*line_format[UNCHANGED]))
726     : (output_style != OUTPUT_SDIFF) | suppress_common_lines);
727
728  files_can_be_treated_as_binary =
729    (brief & binary
730     & ~ (ignore_blank_lines | ignore_case | strip_trailing_cr
731	  | (ignore_regexp_list.regexps || ignore_white_space)));
732
733  switch_string = option_list (argv + 1, optind - 1);
734
735  if (from_file)
736    {
737      if (to_file)
738	fatal ("--from-file and --to-file both specified");
739      else
740	for (; optind < argc; optind++)
741	  {
742	    int status = compare_files ((struct comparison *) 0,
743					from_file, argv[optind]);
744	    if (exit_status < status)
745	      exit_status = status;
746	  }
747    }
748  else
749    {
750      if (to_file)
751	for (; optind < argc; optind++)
752	  {
753	    int status = compare_files ((struct comparison *) 0,
754					argv[optind], to_file);
755	    if (exit_status < status)
756	      exit_status = status;
757	  }
758      else
759	{
760	  if (argc - optind != 2)
761	    {
762	      if (argc - optind < 2)
763		try_help ("missing operand after `%s'", argv[argc - 1]);
764	      else
765		try_help ("extra operand `%s'", argv[optind + 2]);
766	    }
767
768	  exit_status = compare_files ((struct comparison *) 0,
769				       argv[optind], argv[optind + 1]);
770	}
771    }
772
773  /* Print any messages that were saved up for last.  */
774  print_message_queue ();
775
776  check_stdout ();
777  exit (exit_status);
778  return exit_status;
779}
780
781/* Append to REGLIST the regexp PATTERN.  */
782
783static void
784add_regexp (struct regexp_list *reglist, char const *pattern)
785{
786  size_t patlen = strlen (pattern);
787  char const *m = re_compile_pattern (pattern, patlen, reglist->buf);
788
789  if (m != 0)
790    error (0, 0, "%s: %s", pattern, m);
791  else
792    {
793      char *regexps = reglist->regexps;
794      size_t len = reglist->len;
795      bool multiple_regexps = reglist->multiple_regexps = regexps != 0;
796      size_t newlen = reglist->len = len + 2 * multiple_regexps + patlen;
797      size_t size = reglist->size;
798
799      if (size <= newlen)
800	{
801	  if (!size)
802	    size = 1;
803
804	  do size *= 2;
805	  while (size <= newlen);
806
807	  reglist->size = size;
808	  reglist->regexps = regexps = xrealloc (regexps, size);
809	}
810      if (multiple_regexps)
811	{
812	  regexps[len++] = '\\';
813	  regexps[len++] = '|';
814	}
815      memcpy (regexps + len, pattern, patlen + 1);
816    }
817}
818
819/* Ensure that REGLIST represents the disjunction of its regexps.
820   This is done here, rather than earlier, to avoid O(N^2) behavior.  */
821
822static void
823summarize_regexp_list (struct regexp_list *reglist)
824{
825  if (reglist->regexps)
826    {
827      /* At least one regexp was specified.  Allocate a fastmap for it.  */
828      reglist->buf->fastmap = xmalloc (1 << CHAR_BIT);
829      if (reglist->multiple_regexps)
830	{
831	  /* Compile the disjunction of the regexps.
832	     (If just one regexp was specified, it is already compiled.)  */
833	  char const *m = re_compile_pattern (reglist->regexps, reglist->len,
834					      reglist->buf);
835	  if (m != 0)
836	    error (EXIT_TROUBLE, 0, "%s: %s", reglist->regexps, m);
837	}
838    }
839}
840
841static void
842try_help (char const *reason_msgid, char const *operand)
843{
844  if (reason_msgid)
845    error (0, 0, _(reason_msgid), operand);
846  error (EXIT_TROUBLE, 0, _("Try `%s --help' for more information."),
847	 program_name);
848  abort ();
849}
850
851static void
852check_stdout (void)
853{
854  if (ferror (stdout))
855    fatal ("write failed");
856  else if (fclose (stdout) != 0)
857    pfatal_with_name (_("standard output"));
858}
859
860static char const * const option_help_msgid[] = {
861  N_("Compare files line by line."),
862  "",
863  N_("-i  --ignore-case  Ignore case differences in file contents."),
864  N_("--ignore-file-name-case  Ignore case when comparing file names."),
865  N_("--no-ignore-file-name-case  Consider case when comparing file names."),
866  N_("-E  --ignore-tab-expansion  Ignore changes due to tab expansion."),
867  N_("-b  --ignore-space-change  Ignore changes in the amount of white space."),
868  N_("-w  --ignore-all-space  Ignore all white space."),
869  N_("-B  --ignore-blank-lines  Ignore changes whose lines are all blank."),
870  N_("-I RE  --ignore-matching-lines=RE  Ignore changes whose lines all match RE."),
871  N_("--strip-trailing-cr  Strip trailing carriage return on input."),
872#if HAVE_SETMODE_DOS
873  N_("--binary  Read and write data in binary mode."),
874#endif
875  N_("-a  --text  Treat all files as text."),
876  "",
877  N_("-c  -C NUM  --context[=NUM]  Output NUM (default 3) lines of copied context.\n\
878-u  -U NUM  --unified[=NUM]  Output NUM (default 3) lines of unified context.\n\
879  --label LABEL  Use LABEL instead of file name.\n\
880  -p  --show-c-function  Show which C function each change is in.\n\
881  -F RE  --show-function-line=RE  Show the most recent line matching RE."),
882  N_("-q  --brief  Output only whether files differ."),
883  N_("-e  --ed  Output an ed script."),
884  N_("--normal  Output a normal diff."),
885  N_("-n  --rcs  Output an RCS format diff."),
886  N_("-y  --side-by-side  Output in two columns.\n\
887  -W NUM  --width=NUM  Output at most NUM (default 130) print columns.\n\
888  --left-column  Output only the left column of common lines.\n\
889  --suppress-common-lines  Do not output common lines."),
890  N_("-D NAME  --ifdef=NAME  Output merged file to show `#ifdef NAME' diffs."),
891  N_("--GTYPE-group-format=GFMT  Similar, but format GTYPE input groups with GFMT."),
892  N_("--line-format=LFMT  Similar, but format all input lines with LFMT."),
893  N_("--LTYPE-line-format=LFMT  Similar, but format LTYPE input lines with LFMT."),
894  N_("  LTYPE is `old', `new', or `unchanged'.  GTYPE is LTYPE or `changed'."),
895  N_("  GFMT may contain:\n\
896    %<  lines from FILE1\n\
897    %>  lines from FILE2\n\
898    %=  lines common to FILE1 and FILE2\n\
899    %[-][WIDTH][.[PREC]]{doxX}LETTER  printf-style spec for LETTER\n\
900      LETTERs are as follows for new group, lower case for old group:\n\
901        F  first line number\n\
902        L  last line number\n\
903        N  number of lines = L-F+1\n\
904        E  F-1\n\
905        M  L+1"),
906  N_("  LFMT may contain:\n\
907    %L  contents of line\n\
908    %l  contents of line, excluding any trailing newline\n\
909    %[-][WIDTH][.[PREC]]{doxX}n  printf-style spec for input line number"),
910  N_("  Either GFMT or LFMT may contain:\n\
911    %%  %\n\
912    %c'C'  the single character C\n\
913    %c'\\OOO'  the character with octal code OOO"),
914  "",
915  N_("-l  --paginate  Pass the output through `pr' to paginate it."),
916  N_("-t  --expand-tabs  Expand tabs to spaces in output."),
917  N_("-T  --initial-tab  Make tabs line up by prepending a tab."),
918  N_("--tabsize=NUM  Tab stops are every NUM (default 8) print columns."),
919  "",
920  N_("-r  --recursive  Recursively compare any subdirectories found."),
921  N_("-N  --new-file  Treat absent files as empty."),
922  N_("--unidirectional-new-file  Treat absent first files as empty."),
923  N_("-s  --report-identical-files  Report when two files are the same."),
924  N_("-x PAT  --exclude=PAT  Exclude files that match PAT."),
925  N_("-X FILE  --exclude-from=FILE  Exclude files that match any pattern in FILE."),
926  N_("-S FILE  --starting-file=FILE  Start with FILE when comparing directories."),
927  N_("--from-file=FILE1  Compare FILE1 to all operands.  FILE1 can be a directory."),
928  N_("--to-file=FILE2  Compare all operands to FILE2.  FILE2 can be a directory."),
929  "",
930  N_("--horizon-lines=NUM  Keep NUM lines of the common prefix and suffix."),
931  N_("-d  --minimal  Try hard to find a smaller set of changes."),
932  N_("--speed-large-files  Assume large files and many scattered small changes."),
933  "",
934  N_("-v  --version  Output version info."),
935  N_("--help  Output this help."),
936  "",
937  N_("FILES are `FILE1 FILE2' or `DIR1 DIR2' or `DIR FILE...' or `FILE... DIR'."),
938  N_("If --from-file or --to-file is given, there are no restrictions on FILES."),
939  N_("If a FILE is `-', read standard input."),
940  N_("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."),
941  "",
942  N_("Report bugs to <bug-gnu-utils@gnu.org>."),
943  0
944};
945
946static void
947usage (void)
948{
949  char const * const *p;
950
951  printf (_("Usage: %s [OPTION]... FILES\n"), program_name);
952
953  for (p = option_help_msgid;  *p;  p++)
954    {
955      if (!**p)
956	putchar ('\n');
957      else
958	{
959	  char const *msg = _(*p);
960	  char const *nl;
961	  while ((nl = strchr (msg, '\n')))
962	    {
963	      int msglen = nl + 1 - msg;
964	      printf ("  %.*s", msglen, msg);
965	      msg = nl + 1;
966	    }
967
968	  printf ("  %s\n" + 2 * (*msg != ' ' && *msg != '-'), msg);
969	}
970    }
971}
972
973/* Set VAR to VALUE, reporting an OPTION error if this is a
974   conflict.  */
975static void
976specify_value (char const **var, char const *value, char const *option)
977{
978  if (*var && strcmp (*var, value) != 0)
979    {
980      error (0, 0, _("conflicting %s option value `%s'"), option, value);
981      try_help (0, 0);
982    }
983  *var = value;
984}
985
986/* Set the output style to STYLE, diagnosing conflicts.  */
987static void
988specify_style (enum output_style style)
989{
990  if (output_style != style)
991    {
992      output_style = style;
993    }
994}
995
996/* Set the last-modified time of *ST to be the current time.  */
997
998static void
999set_mtime_to_now (struct stat *st)
1000{
1001#ifdef ST_MTIM_NSEC
1002
1003# if HAVE_CLOCK_GETTIME && defined CLOCK_REALTIME
1004  if (clock_gettime (CLOCK_REALTIME, &st->st_mtim) == 0)
1005    return;
1006# endif
1007
1008# if HAVE_GETTIMEOFDAY
1009  {
1010    struct timeval timeval;
1011    if (gettimeofday (&timeval, 0) == 0)
1012      {
1013	st->st_mtime = timeval.tv_sec;
1014	st->st_mtim.ST_MTIM_NSEC = timeval.tv_usec * 1000;
1015	return;
1016      }
1017  }
1018# endif
1019
1020#endif /* ST_MTIM_NSEC */
1021
1022  time (&st->st_mtime);
1023}
1024
1025/* Compare two files (or dirs) with parent comparison PARENT
1026   and names NAME0 and NAME1.
1027   (If PARENT is 0, then the first name is just NAME0, etc.)
1028   This is self-contained; it opens the files and closes them.
1029
1030   Value is EXIT_SUCCESS if files are the same, EXIT_FAILURE if
1031   different, EXIT_TROUBLE if there is a problem opening them.  */
1032
1033static int
1034compare_files (struct comparison const *parent,
1035	       char const *name0,
1036	       char const *name1)
1037{
1038  struct comparison cmp;
1039#define DIR_P(f) (S_ISDIR (cmp.file[f].stat.st_mode) != 0)
1040  register int f;
1041  int status = EXIT_SUCCESS;
1042  bool same_files;
1043  char *free0, *free1;
1044
1045  /* If this is directory comparison, perhaps we have a file
1046     that exists only in one of the directories.
1047     If so, just print a message to that effect.  */
1048
1049  if (! ((name0 && name1)
1050	 || (unidirectional_new_file && name1)
1051	 || new_file))
1052    {
1053      char const *name = name0 == 0 ? name1 : name0;
1054      char const *dir = parent->file[name0 == 0].name;
1055
1056      /* See POSIX 1003.1-2001 for this format.  */
1057      message ("Only in %s: %s\n", dir, name);
1058
1059      /* Return EXIT_FAILURE so that diff_dirs will return
1060	 EXIT_FAILURE ("some files differ").  */
1061      return EXIT_FAILURE;
1062    }
1063
1064  memset (cmp.file, 0, sizeof cmp.file);
1065  cmp.parent = parent;
1066
1067  /* cmp.file[f].desc markers */
1068#define NONEXISTENT (-1) /* nonexistent file */
1069#define UNOPENED (-2) /* unopened file (e.g. directory) */
1070#define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded errno value */
1071
1072#define ERRNO_DECODE(desc) (-3 - (desc)) /* inverse of ERRNO_ENCODE */
1073
1074  cmp.file[0].desc = name0 == 0 ? NONEXISTENT : UNOPENED;
1075  cmp.file[1].desc = name1 == 0 ? NONEXISTENT : UNOPENED;
1076
1077  /* Now record the full name of each file, including nonexistent ones.  */
1078
1079  if (name0 == 0)
1080    name0 = name1;
1081  if (name1 == 0)
1082    name1 = name0;
1083
1084  if (!parent)
1085    {
1086      free0 = 0;
1087      free1 = 0;
1088      cmp.file[0].name = name0;
1089      cmp.file[1].name = name1;
1090    }
1091  else
1092    {
1093      cmp.file[0].name = free0
1094	= dir_file_pathname (parent->file[0].name, name0);
1095      cmp.file[1].name = free1
1096	= dir_file_pathname (parent->file[1].name, name1);
1097    }
1098
1099  /* Stat the files.  */
1100
1101  for (f = 0; f < 2; f++)
1102    {
1103      if (cmp.file[f].desc != NONEXISTENT)
1104	{
1105	  if (f && file_name_cmp (cmp.file[f].name, cmp.file[0].name) == 0)
1106	    {
1107	      cmp.file[f].desc = cmp.file[0].desc;
1108	      cmp.file[f].stat = cmp.file[0].stat;
1109	    }
1110	  else if (strcmp (cmp.file[f].name, "-") == 0)
1111	    {
1112	      cmp.file[f].desc = STDIN_FILENO;
1113	      if (fstat (STDIN_FILENO, &cmp.file[f].stat) != 0)
1114		cmp.file[f].desc = ERRNO_ENCODE (errno);
1115	      else
1116		{
1117		  if (S_ISREG (cmp.file[f].stat.st_mode))
1118		    {
1119		      off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR);
1120		      if (pos < 0)
1121			cmp.file[f].desc = ERRNO_ENCODE (errno);
1122		      else
1123			cmp.file[f].stat.st_size =
1124			  MAX (0, cmp.file[f].stat.st_size - pos);
1125		    }
1126
1127		  /* POSIX 1003.1-2001 requires current time for
1128		     stdin.  */
1129		  set_mtime_to_now (&cmp.file[f].stat);
1130		}
1131	    }
1132	  else if (stat (cmp.file[f].name, &cmp.file[f].stat) != 0)
1133	    cmp.file[f].desc = ERRNO_ENCODE (errno);
1134	}
1135    }
1136
1137  /* Mark files as nonexistent as needed for -N and -P, if they are
1138     inaccessible empty regular files (the kind of files that 'patch'
1139     creates to indicate nonexistent backups), or if they are
1140     top-level files that do not exist but their counterparts do
1141     exist.  */
1142  for (f = 0; f < 2; f++)
1143    if ((new_file || (f == 0 && unidirectional_new_file))
1144	&& (cmp.file[f].desc == UNOPENED
1145	    ? (S_ISREG (cmp.file[f].stat.st_mode)
1146	       && ! (cmp.file[f].stat.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO))
1147	       && cmp.file[f].stat.st_size == 0)
1148	    : (cmp.file[f].desc == ERRNO_ENCODE (ENOENT)
1149	       && ! parent
1150	       && cmp.file[1 - f].desc == UNOPENED)))
1151      cmp.file[f].desc = NONEXISTENT;
1152
1153  for (f = 0; f < 2; f++)
1154    if (cmp.file[f].desc == NONEXISTENT)
1155      {
1156	memset (&cmp.file[f].stat, 0, sizeof cmp.file[f].stat);
1157	cmp.file[f].stat.st_mode = cmp.file[1 - f].stat.st_mode;
1158      }
1159
1160  for (f = 0; f < 2; f++)
1161    {
1162      int e = ERRNO_DECODE (cmp.file[f].desc);
1163      if (0 <= e)
1164	{
1165	  errno = e;
1166	  perror_with_name (cmp.file[f].name);
1167	  status = EXIT_TROUBLE;
1168	}
1169    }
1170
1171  if (status == EXIT_SUCCESS && ! parent && DIR_P (0) != DIR_P (1))
1172    {
1173      /* If one is a directory, and it was specified in the command line,
1174	 use the file in that dir with the other file's basename.  */
1175
1176      int fnm_arg = DIR_P (0);
1177      int dir_arg = 1 - fnm_arg;
1178      char const *fnm = cmp.file[fnm_arg].name;
1179      char const *dir = cmp.file[dir_arg].name;
1180      char const *filename = cmp.file[dir_arg].name = free0
1181	= dir_file_pathname (dir, base_name (fnm));
1182
1183      if (strcmp (fnm, "-") == 0)
1184	fatal ("cannot compare `-' to a directory");
1185
1186      if (stat (filename, &cmp.file[dir_arg].stat) != 0)
1187	{
1188	  perror_with_name (filename);
1189	  status = EXIT_TROUBLE;
1190	}
1191    }
1192
1193  if (status != EXIT_SUCCESS)
1194    {
1195      /* One of the files should exist but does not.  */
1196    }
1197  else if (cmp.file[0].desc == NONEXISTENT
1198	   && cmp.file[1].desc == NONEXISTENT)
1199    {
1200      /* Neither file "exists", so there's nothing to compare.  */
1201    }
1202  else if ((same_files
1203	    = (cmp.file[0].desc != NONEXISTENT
1204	       && cmp.file[1].desc != NONEXISTENT
1205	       && 0 < same_file (&cmp.file[0].stat, &cmp.file[1].stat)
1206	       && same_file_attributes (&cmp.file[0].stat,
1207					&cmp.file[1].stat)))
1208	   && no_diff_means_no_output)
1209    {
1210      /* The two named files are actually the same physical file.
1211	 We know they are identical without actually reading them.  */
1212    }
1213  else if (DIR_P (0) & DIR_P (1))
1214    {
1215      if (output_style == OUTPUT_IFDEF)
1216	fatal ("-D option not supported with directories");
1217
1218      /* If both are directories, compare the files in them.  */
1219
1220      if (parent && !recursive)
1221	{
1222	  /* But don't compare dir contents one level down
1223	     unless -r was specified.
1224	     See POSIX 1003.1-2001 for this format.  */
1225	  message ("Common subdirectories: %s and %s\n",
1226		   cmp.file[0].name, cmp.file[1].name);
1227	}
1228      else
1229	status = diff_dirs (&cmp, compare_files);
1230    }
1231  else if ((DIR_P (0) | DIR_P (1))
1232	   || (parent
1233	       && (! S_ISREG (cmp.file[0].stat.st_mode)
1234		   || ! S_ISREG (cmp.file[1].stat.st_mode))))
1235    {
1236      if (cmp.file[0].desc == NONEXISTENT || cmp.file[1].desc == NONEXISTENT)
1237	{
1238	  /* We have a subdirectory that exists only in one directory.  */
1239
1240	  if ((DIR_P (0) | DIR_P (1))
1241	      && recursive
1242	      && (new_file
1243		  || (unidirectional_new_file
1244		      && cmp.file[0].desc == NONEXISTENT)))
1245	    status = diff_dirs (&cmp, compare_files);
1246	  else
1247	    {
1248	      char const *dir
1249		= parent->file[cmp.file[0].desc == NONEXISTENT].name;
1250
1251	      /* See POSIX 1003.1-2001 for this format.  */
1252	      message ("Only in %s: %s\n", dir, name0);
1253
1254	      status = EXIT_FAILURE;
1255	    }
1256	}
1257      else
1258	{
1259	  /* We have two files that are not to be compared.  */
1260
1261	  /* See POSIX 1003.1-2001 for this format.  */
1262	  message5 ("File %s is a %s while file %s is a %s\n",
1263		    file_label[0] ? file_label[0] : cmp.file[0].name,
1264		    file_type (&cmp.file[0].stat),
1265		    file_label[1] ? file_label[1] : cmp.file[1].name,
1266		    file_type (&cmp.file[1].stat));
1267
1268	  /* This is a difference.  */
1269	  status = EXIT_FAILURE;
1270	}
1271    }
1272  else if (files_can_be_treated_as_binary
1273	   && S_ISREG (cmp.file[0].stat.st_mode)
1274	   && S_ISREG (cmp.file[1].stat.st_mode)
1275	   && cmp.file[0].stat.st_size != cmp.file[1].stat.st_size)
1276    {
1277      message ("Files %s and %s differ\n",
1278	       file_label[0] ? file_label[0] : cmp.file[0].name,
1279	       file_label[1] ? file_label[1] : cmp.file[1].name);
1280      status = EXIT_FAILURE;
1281    }
1282  else
1283    {
1284      /* Both exist and neither is a directory.  */
1285
1286      /* Open the files and record their descriptors.  */
1287
1288      if (cmp.file[0].desc == UNOPENED)
1289	if ((cmp.file[0].desc = open (cmp.file[0].name, O_RDONLY, 0)) < 0)
1290	  {
1291	    perror_with_name (cmp.file[0].name);
1292	    status = EXIT_TROUBLE;
1293	  }
1294      if (cmp.file[1].desc == UNOPENED)
1295	{
1296	  if (same_files)
1297	    cmp.file[1].desc = cmp.file[0].desc;
1298	  else if ((cmp.file[1].desc = open (cmp.file[1].name, O_RDONLY, 0))
1299		   < 0)
1300	    {
1301	      perror_with_name (cmp.file[1].name);
1302	      status = EXIT_TROUBLE;
1303	    }
1304	}
1305
1306#if HAVE_SETMODE_DOS
1307      if (binary)
1308	for (f = 0; f < 2; f++)
1309	  if (0 <= cmp.file[f].desc)
1310	    set_binary_mode (cmp.file[f].desc, true);
1311#endif
1312
1313      /* Compare the files, if no error was found.  */
1314
1315      if (status == EXIT_SUCCESS)
1316	status = diff_2_files (&cmp);
1317
1318      /* Close the file descriptors.  */
1319
1320      if (0 <= cmp.file[0].desc && close (cmp.file[0].desc) != 0)
1321	{
1322	  perror_with_name (cmp.file[0].name);
1323	  status = EXIT_TROUBLE;
1324	}
1325      if (0 <= cmp.file[1].desc && cmp.file[0].desc != cmp.file[1].desc
1326	  && close (cmp.file[1].desc) != 0)
1327	{
1328	  perror_with_name (cmp.file[1].name);
1329	  status = EXIT_TROUBLE;
1330	}
1331    }
1332
1333  /* Now the comparison has been done, if no error prevented it,
1334     and STATUS is the value this function will return.  */
1335
1336  if (status == EXIT_SUCCESS)
1337    {
1338      if (report_identical_files && !DIR_P (0))
1339	message ("Files %s and %s are identical\n",
1340		 file_label[0] ? file_label[0] : cmp.file[0].name,
1341		 file_label[1] ? file_label[1] : cmp.file[1].name);
1342    }
1343  else
1344    {
1345      /* Flush stdout so that the user sees differences immediately.
1346	 This can hurt performance, unfortunately.  */
1347      if (fflush (stdout) != 0)
1348	pfatal_with_name (_("standard output"));
1349    }
1350
1351  if (free0)
1352    free (free0);
1353  if (free1)
1354    free (free1);
1355
1356  return status;
1357}
1358