1/* Support routines for GNU DIFF.
2
3   Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002,
4   2004 Free Software Foundation, Inc.
5
6   This file is part of GNU DIFF.
7
8   GNU DIFF is free software; you can redistribute it and/or modify
9   it under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2, or (at your option)
11   any later version.
12
13   GNU DIFF is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with this program; see the file COPYING.
20   If not, write to the Free Software Foundation,
21   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
22
23#include "diff.h"
24#include <dirname.h>
25#include <error.h>
26#include <quotesys.h>
27#include <xalloc.h>
28
29char const pr_program[] = PR_PROGRAM;
30
31/* Queue up one-line messages to be printed at the end,
32   when -l is specified.  Each message is recorded with a `struct msg'.  */
33
34struct msg
35{
36  struct msg *next;
37  char args[1]; /* Format + 4 args, each '\0' terminated, concatenated.  */
38};
39
40/* Head of the chain of queues messages.  */
41
42static struct msg *msg_chain;
43
44/* Tail of the chain of queues messages.  */
45
46static struct msg **msg_chain_end = &msg_chain;
47
48/* Use when a system call returns non-zero status.
49   NAME should normally be the file name.  */
50
51void
52perror_with_name (char const *name)
53{
54  error (0, errno, "%s", name);
55}
56
57/* Use when a system call returns non-zero status and that is fatal.  */
58
59void
60pfatal_with_name (char const *name)
61{
62  int e = errno;
63  print_message_queue ();
64  error (EXIT_TROUBLE, e, "%s", name);
65  abort ();
66}
67
68/* Print an error message containing MSGID, then exit.  */
69
70void
71fatal (char const *msgid)
72{
73  print_message_queue ();
74  error (EXIT_TROUBLE, 0, "%s", _(msgid));
75  abort ();
76}
77
78/* Like printf, except if -l in effect then save the message and print later.
79   This is used for things like "Only in ...".  */
80
81void
82message (char const *format_msgid, char const *arg1, char const *arg2)
83{
84  message5 (format_msgid, arg1, arg2, 0, 0);
85}
86
87void
88message5 (char const *format_msgid, char const *arg1, char const *arg2,
89	  char const *arg3, char const *arg4)
90{
91  if (paginate)
92    {
93      char *p;
94      char const *arg[5];
95      int i;
96      size_t size[5];
97      size_t total_size = offsetof (struct msg, args);
98      struct msg *new;
99
100      arg[0] = format_msgid;
101      arg[1] = arg1;
102      arg[2] = arg2;
103      arg[3] = arg3 ? arg3 : "";
104      arg[4] = arg4 ? arg4 : "";
105
106      for (i = 0;  i < 5;  i++)
107	total_size += size[i] = strlen (arg[i]) + 1;
108
109      new = xmalloc (total_size);
110
111      for (i = 0, p = new->args;  i < 5;  p += size[i++])
112	memcpy (p, arg[i], size[i]);
113
114      *msg_chain_end = new;
115      new->next = 0;
116      msg_chain_end = &new->next;
117    }
118  else
119    {
120      if (sdiff_merge_assist)
121	putchar (' ');
122      printf (_(format_msgid), arg1, arg2, arg3, arg4);
123    }
124}
125
126/* Output all the messages that were saved up by calls to `message'.  */
127
128void
129print_message_queue (void)
130{
131  char const *arg[5];
132  int i;
133  struct msg *m = msg_chain;
134
135  while (m)
136    {
137      struct msg *next = m->next;
138      arg[0] = m->args;
139      for (i = 0;  i < 4;  i++)
140	arg[i + 1] = arg[i] + strlen (arg[i]) + 1;
141      printf (_(arg[0]), arg[1], arg[2], arg[3], arg[4]);
142      free (m);
143      m = next;
144    }
145}
146
147/* Call before outputting the results of comparing files NAME0 and NAME1
148   to set up OUTFILE, the stdio stream for the output to go to.
149
150   Usually, OUTFILE is just stdout.  But when -l was specified
151   we fork off a `pr' and make OUTFILE a pipe to it.
152   `pr' then outputs to our stdout.  */
153
154static char const *current_name0;
155static char const *current_name1;
156static bool currently_recursive;
157
158void
159setup_output (char const *name0, char const *name1, bool recursive)
160{
161  current_name0 = name0;
162  current_name1 = name1;
163  currently_recursive = recursive;
164  outfile = 0;
165}
166
167#if HAVE_WORKING_FORK || HAVE_WORKING_VFORK
168static pid_t pr_pid;
169#endif
170
171void
172begin_output (void)
173{
174  char *name;
175
176  if (outfile != 0)
177    return;
178
179  /* Construct the header of this piece of diff.  */
180  name = xmalloc (strlen (current_name0) + strlen (current_name1)
181		  + strlen (switch_string) + 7);
182
183  /* POSIX 1003.1-2001 specifies this format.  But there are some bugs in
184     the standard: it says that we must print only the last component
185     of the pathnames, and it requires two spaces after "diff" if
186     there are no options.  These requirements are silly and do not
187     match historical practice.  */
188  sprintf (name, "diff%s %s %s", switch_string, current_name0, current_name1);
189
190  if (paginate)
191    {
192      if (fflush (stdout) != 0)
193	pfatal_with_name (_("write failed"));
194
195      /* Make OUTFILE a pipe to a subsidiary `pr'.  */
196      {
197#if HAVE_WORKING_FORK || HAVE_WORKING_VFORK
198	int pipes[2];
199
200	if (pipe (pipes) != 0)
201	  pfatal_with_name ("pipe");
202
203	pr_pid = vfork ();
204	if (pr_pid < 0)
205	  pfatal_with_name ("fork");
206
207	if (pr_pid == 0)
208	  {
209	    close (pipes[1]);
210	    if (pipes[0] != STDIN_FILENO)
211	      {
212		if (dup2 (pipes[0], STDIN_FILENO) < 0)
213		  pfatal_with_name ("dup2");
214		close (pipes[0]);
215	      }
216
217	    execl (pr_program, pr_program, "-h", name, (char *) 0);
218	    _exit (errno == ENOENT ? 127 : 126);
219	  }
220	else
221	  {
222	    close (pipes[0]);
223	    outfile = fdopen (pipes[1], "w");
224	    if (!outfile)
225	      pfatal_with_name ("fdopen");
226	  }
227#else
228	char *command = xmalloc (sizeof pr_program - 1 + 7
229				 + quote_system_arg ((char *) 0, name) + 1);
230	char *p;
231	sprintf (command, "%s -f -h ", pr_program);
232	p = command + sizeof pr_program - 1 + 7;
233	p += quote_system_arg (p, name);
234	*p = 0;
235	errno = 0;
236	outfile = popen (command, "w");
237	if (!outfile)
238	  pfatal_with_name (command);
239	free (command);
240#endif
241      }
242    }
243  else
244    {
245
246      /* If -l was not specified, output the diff straight to `stdout'.  */
247
248      outfile = stdout;
249
250      /* If handling multiple files (because scanning a directory),
251	 print which files the following output is about.  */
252      if (currently_recursive)
253	printf ("%s\n", name);
254    }
255
256  free (name);
257
258  /* A special header is needed at the beginning of context output.  */
259  switch (output_style)
260    {
261    case OUTPUT_CONTEXT:
262      print_context_header (files, false);
263      break;
264
265    case OUTPUT_UNIFIED:
266      print_context_header (files, true);
267      break;
268
269    default:
270      break;
271    }
272}
273
274/* Call after the end of output of diffs for one file.
275   Close OUTFILE and get rid of the `pr' subfork.  */
276
277void
278finish_output (void)
279{
280  if (outfile != 0 && outfile != stdout)
281    {
282      int status;
283      int wstatus;
284      int werrno = 0;
285      if (ferror (outfile))
286	fatal ("write failed");
287#if ! (HAVE_WORKING_FORK || HAVE_WORKING_VFORK)
288      wstatus = pclose (outfile);
289      if (wstatus == -1)
290	werrno = errno;
291#else
292      if (fclose (outfile) != 0)
293	pfatal_with_name (_("write failed"));
294      if (waitpid (pr_pid, &wstatus, 0) < 0)
295	pfatal_with_name ("waitpid");
296#endif
297      status = (! werrno && WIFEXITED (wstatus)
298		? WEXITSTATUS (wstatus)
299		: INT_MAX);
300      if (status)
301	error (EXIT_TROUBLE, werrno,
302	       _(status == 126
303		 ? "subsidiary program `%s' could not be invoked"
304		 : status == 127
305		 ? "subsidiary program `%s' not found"
306		 : status == INT_MAX
307		 ? "subsidiary program `%s' failed"
308		 : "subsidiary program `%s' failed (exit status %d)"),
309	       pr_program, status);
310    }
311
312  outfile = 0;
313}
314
315/* Compare two lines (typically one from each input file)
316   according to the command line options.
317   For efficiency, this is invoked only when the lines do not match exactly
318   but an option like -i might cause us to ignore the difference.
319   Return nonzero if the lines differ.  */
320
321bool
322lines_differ (char const *s1, char const *s2)
323{
324  register char const *t1 = s1;
325  register char const *t2 = s2;
326  size_t column = 0;
327
328  while (1)
329    {
330      register unsigned char c1 = *t1++;
331      register unsigned char c2 = *t2++;
332
333      /* Test for exact char equality first, since it's a common case.  */
334      if (c1 != c2)
335	{
336	  switch (ignore_white_space)
337	    {
338	    case IGNORE_ALL_SPACE:
339	      /* For -w, just skip past any white space.  */
340	      while (isspace (c1) && c1 != '\n') c1 = *t1++;
341	      while (isspace (c2) && c2 != '\n') c2 = *t2++;
342	      break;
343
344	    case IGNORE_SPACE_CHANGE:
345	      /* For -b, advance past any sequence of white space in
346		 line 1 and consider it just one space, or nothing at
347		 all if it is at the end of the line.  */
348	      if (isspace (c1))
349		{
350		  while (c1 != '\n')
351		    {
352		      c1 = *t1++;
353		      if (! isspace (c1))
354			{
355			  --t1;
356			  c1 = ' ';
357			  break;
358			}
359		    }
360		}
361
362	      /* Likewise for line 2.  */
363	      if (isspace (c2))
364		{
365		  while (c2 != '\n')
366		    {
367		      c2 = *t2++;
368		      if (! isspace (c2))
369			{
370			  --t2;
371			  c2 = ' ';
372			  break;
373			}
374		    }
375		}
376
377	      if (c1 != c2)
378		{
379		  /* If we went too far when doing the simple test
380		     for equality, go back to the first non-white-space
381		     character in both sides and try again.  */
382		  if (c2 == ' ' && c1 != '\n'
383		      && s1 + 1 < t1
384		      && isspace ((unsigned char) t1[-2]))
385		    {
386		      --t1;
387		      continue;
388		    }
389		  if (c1 == ' ' && c2 != '\n'
390		      && s2 + 1 < t2
391		      && isspace ((unsigned char) t2[-2]))
392		    {
393		      --t2;
394		      continue;
395		    }
396		}
397
398	      break;
399
400	    case IGNORE_TAB_EXPANSION:
401	      if ((c1 == ' ' && c2 == '\t')
402		  || (c1 == '\t' && c2 == ' '))
403		{
404		  size_t column2 = column;
405		  for (;; c1 = *t1++)
406		    {
407		      if (c1 == ' ')
408			column++;
409		      else if (c1 == '\t')
410			column += tabsize - column % tabsize;
411		      else
412			break;
413		    }
414		  for (;; c2 = *t2++)
415		    {
416		      if (c2 == ' ')
417			column2++;
418		      else if (c2 == '\t')
419			column2 += tabsize - column2 % tabsize;
420		      else
421			break;
422		    }
423		  if (column != column2)
424		    return true;
425		}
426	      break;
427
428	    case IGNORE_NO_WHITE_SPACE:
429	      break;
430	    }
431
432	  /* Lowercase all letters if -i is specified.  */
433
434	  if (ignore_case)
435	    {
436	      c1 = tolower (c1);
437	      c2 = tolower (c2);
438	    }
439
440	  if (c1 != c2)
441	    break;
442	}
443      if (c1 == '\n')
444	return false;
445
446      column += c1 == '\t' ? tabsize - column % tabsize : 1;
447    }
448
449  return true;
450}
451
452/* Find the consecutive changes at the start of the script START.
453   Return the last link before the first gap.  */
454
455struct change *
456find_change (struct change *start)
457{
458  return start;
459}
460
461struct change *
462find_reverse_change (struct change *start)
463{
464  return start;
465}
466
467/* Divide SCRIPT into pieces by calling HUNKFUN and
468   print each piece with PRINTFUN.
469   Both functions take one arg, an edit script.
470
471   HUNKFUN is called with the tail of the script
472   and returns the last link that belongs together with the start
473   of the tail.
474
475   PRINTFUN takes a subscript which belongs together (with a null
476   link at the end) and prints it.  */
477
478void
479print_script (struct change *script,
480	      struct change * (*hunkfun) (struct change *),
481	      void (*printfun) (struct change *))
482{
483  struct change *next = script;
484
485  while (next)
486    {
487      struct change *this, *end;
488
489      /* Find a set of changes that belong together.  */
490      this = next;
491      end = (*hunkfun) (next);
492
493      /* Disconnect them from the rest of the changes,
494	 making them a hunk, and remember the rest for next iteration.  */
495      next = end->link;
496      end->link = 0;
497#ifdef DEBUG
498      debug_script (this);
499#endif
500
501      /* Print this hunk.  */
502      (*printfun) (this);
503
504      /* Reconnect the script so it will all be freed properly.  */
505      end->link = next;
506    }
507}
508
509/* Print the text of a single line LINE,
510   flagging it with the characters in LINE_FLAG (which say whether
511   the line is inserted, deleted, changed, etc.).  */
512
513void
514print_1_line (char const *line_flag, char const *const *line)
515{
516  char const *base = line[0], *limit = line[1]; /* Help the compiler.  */
517  FILE *out = outfile; /* Help the compiler some more.  */
518  char const *flag_format = 0;
519
520  /* If -T was specified, use a Tab between the line-flag and the text.
521     Otherwise use a Space (as Unix diff does).
522     Print neither space nor tab if line-flags are empty.  */
523
524  if (line_flag && *line_flag)
525    {
526      flag_format = initial_tab ? "%s\t" : "%s ";
527      fprintf (out, flag_format, line_flag);
528    }
529
530  output_1_line (base, limit, flag_format, line_flag);
531
532  if ((!line_flag || line_flag[0]) && limit[-1] != '\n')
533    fprintf (out, "\n\\ %s\n", _("No newline at end of file"));
534}
535
536/* Output a line from BASE up to LIMIT.
537   With -t, expand white space characters to spaces, and if FLAG_FORMAT
538   is nonzero, output it with argument LINE_FLAG after every
539   internal carriage return, so that tab stops continue to line up.  */
540
541void
542output_1_line (char const *base, char const *limit, char const *flag_format,
543	       char const *line_flag)
544{
545  if (!expand_tabs)
546    fwrite (base, sizeof (char), limit - base, outfile);
547  else
548    {
549      register FILE *out = outfile;
550      register unsigned char c;
551      register char const *t = base;
552      register size_t column = 0;
553      size_t tab_size = tabsize;
554
555      while (t < limit)
556	switch ((c = *t++))
557	  {
558	  case '\t':
559	    {
560	      size_t spaces = tab_size - column % tab_size;
561	      column += spaces;
562	      do
563		putc (' ', out);
564	      while (--spaces);
565	    }
566	    break;
567
568	  case '\r':
569	    putc (c, out);
570	    if (flag_format && t < limit && *t != '\n')
571	      fprintf (out, flag_format, line_flag);
572	    column = 0;
573	    break;
574
575	  case '\b':
576	    if (column == 0)
577	      continue;
578	    column--;
579	    putc (c, out);
580	    break;
581
582	  default:
583	    column += isprint (c) != 0;
584	    putc (c, out);
585	    break;
586	  }
587    }
588}
589
590char const change_letter[] = { 0, 'd', 'a', 'c' };
591
592/* Translate an internal line number (an index into diff's table of lines)
593   into an actual line number in the input file.
594   The internal line number is I.  FILE points to the data on the file.
595
596   Internal line numbers count from 0 starting after the prefix.
597   Actual line numbers count from 1 within the entire file.  */
598
599lin
600translate_line_number (struct file_data const *file, lin i)
601{
602  return i + file->prefix_lines + 1;
603}
604
605/* Translate a line number range.  This is always done for printing,
606   so for convenience translate to long int rather than lin, so that the
607   caller can use printf with "%ld" without casting.  */
608
609void
610translate_range (struct file_data const *file,
611		 lin a, lin b,
612		 long int *aptr, long int *bptr)
613{
614  *aptr = translate_line_number (file, a - 1) + 1;
615  *bptr = translate_line_number (file, b + 1) - 1;
616}
617
618/* Print a pair of line numbers with SEPCHAR, translated for file FILE.
619   If the two numbers are identical, print just one number.
620
621   Args A and B are internal line numbers.
622   We print the translated (real) line numbers.  */
623
624void
625print_number_range (char sepchar, struct file_data *file, lin a, lin b)
626{
627  long int trans_a, trans_b;
628  translate_range (file, a, b, &trans_a, &trans_b);
629
630  /* Note: we can have B < A in the case of a range of no lines.
631     In this case, we should print the line number before the range,
632     which is B.  */
633  if (trans_b > trans_a)
634    fprintf (outfile, "%ld%c%ld", trans_a, sepchar, trans_b);
635  else
636    fprintf (outfile, "%ld", trans_b);
637}
638
639/* Look at a hunk of edit script and report the range of lines in each file
640   that it applies to.  HUNK is the start of the hunk, which is a chain
641   of `struct change'.  The first and last line numbers of file 0 are stored in
642   *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1.
643   Note that these are internal line numbers that count from 0.
644
645   If no lines from file 0 are deleted, then FIRST0 is LAST0+1.
646
647   Return UNCHANGED if only ignorable lines are inserted or deleted,
648   OLD if lines of file 0 are deleted,
649   NEW if lines of file 1 are inserted,
650   and CHANGED if both kinds of changes are found. */
651
652enum changes
653analyze_hunk (struct change *hunk,
654	      lin *first0, lin *last0,
655	      lin *first1, lin *last1)
656{
657  struct change *next;
658  lin l0, l1;
659  lin show_from, show_to;
660  lin i;
661  bool trivial = ignore_blank_lines || ignore_regexp.fastmap;
662  size_t trivial_length = ignore_blank_lines - 1;
663    /* If 0, ignore zero-length lines;
664       if SIZE_MAX, do not ignore lines just because of their length.  */
665  bool skip_leading_white_space =
666    (ignore_blank_lines && IGNORE_SPACE_CHANGE <= ignore_white_space);
667
668  char const * const *linbuf0 = files[0].linbuf;  /* Help the compiler.  */
669  char const * const *linbuf1 = files[1].linbuf;
670
671  show_from = show_to = 0;
672
673  *first0 = hunk->line0;
674  *first1 = hunk->line1;
675
676  next = hunk;
677  do
678    {
679      l0 = next->line0 + next->deleted - 1;
680      l1 = next->line1 + next->inserted - 1;
681      show_from += next->deleted;
682      show_to += next->inserted;
683
684      for (i = next->line0; i <= l0 && trivial; i++)
685	{
686	  char const *line = linbuf0[i];
687	  char const *newline = linbuf0[i + 1] - 1;
688	  size_t len = newline - line;
689	  char const *p = line;
690	  if (skip_leading_white_space)
691	    while (isspace ((unsigned char) *p) && *p != '\n')
692	      p++;
693	  if (newline - p != trivial_length
694	      && (! ignore_regexp.fastmap
695		  || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
696	    trivial = 0;
697	}
698
699      for (i = next->line1; i <= l1 && trivial; i++)
700	{
701	  char const *line = linbuf1[i];
702	  char const *newline = linbuf1[i + 1] - 1;
703	  size_t len = newline - line;
704	  char const *p = line;
705	  if (skip_leading_white_space)
706	    while (isspace ((unsigned char) *p) && *p != '\n')
707	      p++;
708	  if (newline - p != trivial_length
709	      && (! ignore_regexp.fastmap
710		  || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
711	    trivial = 0;
712	}
713    }
714  while ((next = next->link) != 0);
715
716  *last0 = l0;
717  *last1 = l1;
718
719  /* If all inserted or deleted lines are ignorable,
720     tell the caller to ignore this hunk.  */
721
722  if (trivial)
723    return UNCHANGED;
724
725  return (show_from ? OLD : UNCHANGED) | (show_to ? NEW : UNCHANGED);
726}
727
728/* Concatenate three strings, returning a newly malloc'd string.  */
729
730char *
731concat (char const *s1, char const *s2, char const *s3)
732{
733  char *new = xmalloc (strlen (s1) + strlen (s2) + strlen (s3) + 1);
734  sprintf (new, "%s%s%s", s1, s2, s3);
735  return new;
736}
737
738/* Yield a new block of SIZE bytes, initialized to zero.  */
739
740void *
741zalloc (size_t size)
742{
743  void *p = xmalloc (size);
744  memset (p, 0, size);
745  return p;
746}
747
748/* Yield the newly malloc'd pathname
749   of the file in DIR whose filename is FILE.  */
750
751char *
752dir_file_pathname (char const *dir, char const *file)
753{
754  char const *base = base_name (dir);
755  bool omit_slash = !*base || base[strlen (base) - 1] == '/';
756  return concat (dir, "/" + omit_slash, file);
757}
758
759void
760debug_script (struct change *sp)
761{
762  fflush (stdout);
763
764  for (; sp; sp = sp->link)
765    {
766      long int line0 = sp->line0;
767      long int line1 = sp->line1;
768      long int deleted = sp->deleted;
769      long int inserted = sp->inserted;
770      fprintf (stderr, "%3ld %3ld delete %ld insert %ld\n",
771	       line0, line1, deleted, inserted);
772    }
773
774  fflush (stderr);
775}
776