1170754Sdelphij/* Shared definitions for GNU DIFF
2170754Sdelphij
3170754Sdelphij   Copyright (C) 1988, 1989, 1991, 1992, 1993, 1994, 1995, 1998, 2001,
4170754Sdelphij   2002, 2004 Free Software Foundation, Inc.
5170754Sdelphij
6170754Sdelphij   This file is part of GNU DIFF.
7170754Sdelphij
8170754Sdelphij   GNU DIFF is free software; you can redistribute it and/or modify
9170754Sdelphij   it under the terms of the GNU General Public License as published by
10170754Sdelphij   the Free Software Foundation; either version 2, or (at your option)
11170754Sdelphij   any later version.
12170754Sdelphij
13170754Sdelphij   GNU DIFF is distributed in the hope that it will be useful,
14170754Sdelphij   but WITHOUT ANY WARRANTY; without even the implied warranty of
15170754Sdelphij   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16170754Sdelphij   GNU General Public License for more details.
17170754Sdelphij
18170754Sdelphij   You should have received a copy of the GNU General Public License
19170754Sdelphij   along with this program; see the file COPYING.
20170754Sdelphij   If not, write to the Free Software Foundation,
21170754Sdelphij   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
22170754Sdelphij
23170754Sdelphij#include "system.h"
24170754Sdelphij#include <regex.h>
25170754Sdelphij#include <stdio.h>
26170754Sdelphij#include <unlocked-io.h>
27170754Sdelphij
28170754Sdelphij/* What kind of changes a hunk contains.  */
29170754Sdelphijenum changes
30170754Sdelphij{
31170754Sdelphij  /* No changes: lines common to both files.  */
32170754Sdelphij  UNCHANGED,
33170754Sdelphij
34170754Sdelphij  /* Deletes only: lines taken from just the first file.  */
35170754Sdelphij  OLD,
36170754Sdelphij
37170754Sdelphij  /* Inserts only: lines taken from just the second file.  */
38170754Sdelphij  NEW,
39170754Sdelphij
40170754Sdelphij  /* Both deletes and inserts: a hunk containing both old and new lines.  */
41170754Sdelphij  CHANGED
42170754Sdelphij};
43170754Sdelphij
44170754Sdelphij/* Variables for command line options */
45170754Sdelphij
46170754Sdelphij#ifndef GDIFF_MAIN
47170754Sdelphij# define XTERN extern
48170754Sdelphij#else
49170754Sdelphij# define XTERN
50170754Sdelphij#endif
51170754Sdelphij
52170754Sdelphijenum output_style
53170754Sdelphij{
54170754Sdelphij  /* No output style specified.  */
55170754Sdelphij  OUTPUT_UNSPECIFIED,
56170754Sdelphij
57170754Sdelphij  /* Default output style.  */
58170754Sdelphij  OUTPUT_NORMAL,
59170754Sdelphij
60170754Sdelphij  /* Output the differences with lines of context before and after (-c).  */
61170754Sdelphij  OUTPUT_CONTEXT,
62170754Sdelphij
63170754Sdelphij  /* Output the differences in a unified context diff format (-u).  */
64170754Sdelphij  OUTPUT_UNIFIED,
65170754Sdelphij
66170754Sdelphij  /* Output the differences as commands suitable for `ed' (-e).  */
67170754Sdelphij  OUTPUT_ED,
68170754Sdelphij
69170754Sdelphij  /* Output the diff as a forward ed script (-f).  */
70170754Sdelphij  OUTPUT_FORWARD_ED,
71170754Sdelphij
72170754Sdelphij  /* Like -f, but output a count of changed lines in each "command" (-n).  */
73170754Sdelphij  OUTPUT_RCS,
74170754Sdelphij
75170754Sdelphij  /* Output merged #ifdef'd file (-D).  */
76170754Sdelphij  OUTPUT_IFDEF,
77170754Sdelphij
78170754Sdelphij  /* Output sdiff style (-y).  */
79170754Sdelphij  OUTPUT_SDIFF
80170754Sdelphij};
81170754Sdelphij
82170754Sdelphij/* True for output styles that are robust,
83170754Sdelphij   i.e. can handle a file that ends in a non-newline.  */
84170754Sdelphij#define ROBUST_OUTPUT_STYLE(S) ((S) != OUTPUT_ED && (S) != OUTPUT_FORWARD_ED)
85170754Sdelphij
86170754SdelphijXTERN enum output_style output_style;
87170754Sdelphij
88170754Sdelphij/* Nonzero if output cannot be generated for identical files.  */
89170754SdelphijXTERN bool no_diff_means_no_output;
90170754Sdelphij
91170754Sdelphij/* Number of lines of context to show in each set of diffs.
92170754Sdelphij   This is zero when context is not to be shown.  */
93170754SdelphijXTERN lin context;
94170754Sdelphij
95170754Sdelphij/* Consider all files as text files (-a).
96170754Sdelphij   Don't interpret codes over 0177 as implying a "binary file".  */
97170754SdelphijXTERN bool text;
98170754Sdelphij
99170754Sdelphij/* Number of lines to keep in identical prefix and suffix.  */
100170754SdelphijXTERN lin horizon_lines;
101170754Sdelphij
102170754Sdelphij/* The significance of white space during comparisons.  */
103170754SdelphijXTERN enum
104170754Sdelphij{
105170754Sdelphij  /* All white space is significant (the default).  */
106170754Sdelphij  IGNORE_NO_WHITE_SPACE,
107170754Sdelphij
108170754Sdelphij  /* Ignore changes due to tab expansion (-E).  */
109170754Sdelphij  IGNORE_TAB_EXPANSION,
110170754Sdelphij
111170754Sdelphij  /* Ignore changes in horizontal white space (-b).  */
112170754Sdelphij  IGNORE_SPACE_CHANGE,
113170754Sdelphij
114170754Sdelphij  /* Ignore all horizontal white space (-w).  */
115170754Sdelphij  IGNORE_ALL_SPACE
116170754Sdelphij} ignore_white_space;
117170754Sdelphij
118170754Sdelphij/* Ignore changes that affect only blank lines (-B).  */
119170754SdelphijXTERN bool ignore_blank_lines;
120170754Sdelphij
121170754Sdelphij/* Files can be compared byte-by-byte, as if they were binary.
122170754Sdelphij   This depends on various options.  */
123170754SdelphijXTERN bool files_can_be_treated_as_binary;
124170754Sdelphij
125170754Sdelphij/* Ignore differences in case of letters (-i).  */
126170754SdelphijXTERN bool ignore_case;
127170754Sdelphij
128170754Sdelphij/* Ignore differences in case of letters in file names.  */
129170754SdelphijXTERN bool ignore_file_name_case;
130170754Sdelphij
131170754Sdelphij/* File labels for `-c' output headers (--label).  */
132170754SdelphijXTERN char *file_label[2];
133170754Sdelphij
134170754Sdelphij/* Regexp to identify function-header lines (-F).  */
135170754SdelphijXTERN struct re_pattern_buffer function_regexp;
136170754Sdelphij
137170754Sdelphij/* Ignore changes that affect only lines matching this regexp (-I).  */
138170754SdelphijXTERN struct re_pattern_buffer ignore_regexp;
139170754Sdelphij
140170754Sdelphij/* Say only whether files differ, not how (-q).  */
141170754SdelphijXTERN bool brief;
142170754Sdelphij
143170754Sdelphij/* Expand tabs in the output so the text lines up properly
144170754Sdelphij   despite the characters added to the front of each line (-t).  */
145170754SdelphijXTERN bool expand_tabs;
146170754Sdelphij
147170754Sdelphij/* Number of columns between tab stops.  */
148170754SdelphijXTERN size_t tabsize;
149170754Sdelphij
150170754Sdelphij/* Use a tab in the output, rather than a space, before the text of an
151170754Sdelphij   input line, so as to keep the proper alignment in the input line
152170754Sdelphij   without changing the characters in it (-T).  */
153170754SdelphijXTERN bool initial_tab;
154170754Sdelphij
155170754Sdelphij/* Remove trailing carriage returns from input.  */
156170754SdelphijXTERN bool strip_trailing_cr;
157170754Sdelphij
158170754Sdelphij/* In directory comparison, specify file to start with (-S).
159170754Sdelphij   This is used for resuming an aborted comparison.
160170754Sdelphij   All file names less than this name are ignored.  */
161170754SdelphijXTERN char const *starting_file;
162170754Sdelphij
163170754Sdelphij/* Pipe each file's output through pr (-l).  */
164170754SdelphijXTERN bool paginate;
165170754Sdelphij
166170754Sdelphij/* Line group formats for unchanged, old, new, and changed groups.  */
167170754SdelphijXTERN char const *group_format[CHANGED + 1];
168170754Sdelphij
169170754Sdelphij/* Line formats for unchanged, old, and new lines.  */
170170754SdelphijXTERN char const *line_format[NEW + 1];
171170754Sdelphij
172170754Sdelphij/* If using OUTPUT_SDIFF print extra information to help the sdiff filter.  */
173170754SdelphijXTERN bool sdiff_merge_assist;
174170754Sdelphij
175170754Sdelphij/* Tell OUTPUT_SDIFF to show only the left version of common lines.  */
176170754SdelphijXTERN bool left_column;
177170754Sdelphij
178170754Sdelphij/* Tell OUTPUT_SDIFF to not show common lines.  */
179170754SdelphijXTERN bool suppress_common_lines;
180170754Sdelphij
181170754Sdelphij/* The half line width and column 2 offset for OUTPUT_SDIFF.  */
182170754SdelphijXTERN size_t sdiff_half_width;
183170754SdelphijXTERN size_t sdiff_column2_offset;
184170754Sdelphij
185170754Sdelphij/* String containing all the command options diff received,
186170754Sdelphij   with spaces between and at the beginning but none at the end.
187170754Sdelphij   If there were no options given, this string is empty.  */
188170754SdelphijXTERN char *switch_string;
189170754Sdelphij
190170754Sdelphij/* Use heuristics for better speed with large files with a small
191170754Sdelphij   density of changes.  */
192170754SdelphijXTERN bool speed_large_files;
193170754Sdelphij
194170754Sdelphij/* Patterns that match file names to be excluded.  */
195170754SdelphijXTERN struct exclude *excluded;
196170754Sdelphij
197170754Sdelphij/* Don't discard lines.  This makes things slower (sometimes much
198170754Sdelphij   slower) but will find a guaranteed minimal set of changes.  */
199170754SdelphijXTERN bool minimal;
200170754Sdelphij
201170754Sdelphij/* Name of program the user invoked (for error messages).  */
202170754SdelphijXTERN char *program_name;
203170754Sdelphij
204170754Sdelphij/* The strftime format to use for time strings.  */
205170754SdelphijXTERN char const *time_format;
206170754Sdelphij
207170754Sdelphij/* The result of comparison is an "edit script": a chain of `struct change'.
208170754Sdelphij   Each `struct change' represents one place where some lines are deleted
209170754Sdelphij   and some are inserted.
210170754Sdelphij
211170754Sdelphij   LINE0 and LINE1 are the first affected lines in the two files (origin 0).
212170754Sdelphij   DELETED is the number of lines deleted here from file 0.
213170754Sdelphij   INSERTED is the number of lines inserted here in file 1.
214170754Sdelphij
215170754Sdelphij   If DELETED is 0 then LINE0 is the number of the line before
216170754Sdelphij   which the insertion was done; vice versa for INSERTED and LINE1.  */
217170754Sdelphij
218170754Sdelphijstruct change
219170754Sdelphij{
220170754Sdelphij  struct change *link;		/* Previous or next edit command  */
221170754Sdelphij  lin inserted;			/* # lines of file 1 changed here.  */
222170754Sdelphij  lin deleted;			/* # lines of file 0 changed here.  */
223170754Sdelphij  lin line0;			/* Line number of 1st deleted line.  */
224170754Sdelphij  lin line1;			/* Line number of 1st inserted line.  */
225170754Sdelphij  bool ignore;			/* Flag used in context.c.  */
226170754Sdelphij};
227170754Sdelphij
228170754Sdelphij/* Structures that describe the input files.  */
229170754Sdelphij
230170754Sdelphij/* Data on one input file being compared.  */
231170754Sdelphij
232170754Sdelphijstruct file_data {
233170754Sdelphij    int             desc;	/* File descriptor  */
234170754Sdelphij    char const      *name;	/* File name  */
235170754Sdelphij    struct stat     stat;	/* File status */
236170754Sdelphij
237170754Sdelphij    /* Buffer in which text of file is read.  */
238170754Sdelphij    word *buffer;
239170754Sdelphij
240170754Sdelphij    /* Allocated size of buffer, in bytes.  Always a multiple of
241170754Sdelphij       sizeof *buffer.  */
242170754Sdelphij    size_t bufsize;
243170754Sdelphij
244170754Sdelphij    /* Number of valid bytes now in the buffer.  */
245170754Sdelphij    size_t buffered;
246170754Sdelphij
247170754Sdelphij    /* Array of pointers to lines in the file.  */
248170754Sdelphij    char const **linbuf;
249170754Sdelphij
250170754Sdelphij    /* linbuf_base <= buffered_lines <= valid_lines <= alloc_lines.
251170754Sdelphij       linebuf[linbuf_base ... buffered_lines - 1] are possibly differing.
252170754Sdelphij       linebuf[linbuf_base ... valid_lines - 1] contain valid data.
253170754Sdelphij       linebuf[linbuf_base ... alloc_lines - 1] are allocated.  */
254170754Sdelphij    lin linbuf_base, buffered_lines, valid_lines, alloc_lines;
255170754Sdelphij
256170754Sdelphij    /* Pointer to end of prefix of this file to ignore when hashing.  */
257170754Sdelphij    char const *prefix_end;
258170754Sdelphij
259170754Sdelphij    /* Count of lines in the prefix.
260170754Sdelphij       There are this many lines in the file before linbuf[0].  */
261170754Sdelphij    lin prefix_lines;
262170754Sdelphij
263170754Sdelphij    /* Pointer to start of suffix of this file to ignore when hashing.  */
264170754Sdelphij    char const *suffix_begin;
265170754Sdelphij
266170754Sdelphij    /* Vector, indexed by line number, containing an equivalence code for
267170754Sdelphij       each line.  It is this vector that is actually compared with that
268170754Sdelphij       of another file to generate differences.  */
269170754Sdelphij    lin *equivs;
270170754Sdelphij
271170754Sdelphij    /* Vector, like the previous one except that
272170754Sdelphij       the elements for discarded lines have been squeezed out.  */
273170754Sdelphij    lin *undiscarded;
274170754Sdelphij
275170754Sdelphij    /* Vector mapping virtual line numbers (not counting discarded lines)
276170754Sdelphij       to real ones (counting those lines).  Both are origin-0.  */
277170754Sdelphij    lin *realindexes;
278170754Sdelphij
279170754Sdelphij    /* Total number of nondiscarded lines.  */
280170754Sdelphij    lin nondiscarded_lines;
281170754Sdelphij
282170754Sdelphij    /* Vector, indexed by real origin-0 line number,
283170754Sdelphij       containing 1 for a line that is an insertion or a deletion.
284170754Sdelphij       The results of comparison are stored here.  */
285170754Sdelphij    char *changed;
286170754Sdelphij
287170754Sdelphij    /* 1 if file ends in a line with no final newline.  */
288170754Sdelphij    bool missing_newline;
289170754Sdelphij
290170754Sdelphij    /* 1 if at end of file.  */
291170754Sdelphij    bool eof;
292170754Sdelphij
293170754Sdelphij    /* 1 more than the maximum equivalence value used for this or its
294170754Sdelphij       sibling file.  */
295170754Sdelphij    lin equiv_max;
296170754Sdelphij};
297170754Sdelphij
298170754Sdelphij/* The file buffer, considered as an array of bytes rather than
299170754Sdelphij   as an array of words.  */
300170754Sdelphij#define FILE_BUFFER(f) ((char *) (f)->buffer)
301170754Sdelphij
302170754Sdelphij/* Data on two input files being compared.  */
303170754Sdelphij
304170754Sdelphijstruct comparison
305170754Sdelphij  {
306170754Sdelphij    struct file_data file[2];
307170754Sdelphij    struct comparison const *parent;  /* parent, if a recursive comparison */
308170754Sdelphij  };
309170754Sdelphij
310170754Sdelphij/* Describe the two files currently being compared.  */
311170754Sdelphij
312170754SdelphijXTERN struct file_data files[2];
313170754Sdelphij
314170754Sdelphij/* Stdio stream to output diffs to.  */
315170754Sdelphij
316170754SdelphijXTERN FILE *outfile;
317170754Sdelphij
318170754Sdelphij/* Declare various functions.  */
319170754Sdelphij
320170754Sdelphij/* analyze.c */
321170754Sdelphijint diff_2_files (struct comparison *);
322170754Sdelphij
323170754Sdelphij/* context.c */
324170754Sdelphijvoid print_context_header (struct file_data[], bool);
325170754Sdelphijvoid print_context_script (struct change *, bool);
326170754Sdelphij
327170754Sdelphij/* dir.c */
328170754Sdelphijint diff_dirs (struct comparison const *, int (*) (struct comparison const *, char const *, char const *));
329170754Sdelphij
330170754Sdelphij/* ed.c */
331170754Sdelphijvoid print_ed_script (struct change *);
332170754Sdelphijvoid pr_forward_ed_script (struct change *);
333170754Sdelphij
334170754Sdelphij/* ifdef.c */
335170754Sdelphijvoid print_ifdef_script (struct change *);
336170754Sdelphij
337170754Sdelphij/* io.c */
338170754Sdelphijvoid file_block_read (struct file_data *, size_t);
339170754Sdelphijbool read_files (struct file_data[], bool);
340170754Sdelphij
341170754Sdelphij/* normal.c */
342170754Sdelphijvoid print_normal_script (struct change *);
343170754Sdelphij
344170754Sdelphij/* rcs.c */
345170754Sdelphijvoid print_rcs_script (struct change *);
346170754Sdelphij
347170754Sdelphij/* side.c */
348170754Sdelphijvoid print_sdiff_script (struct change *);
349170754Sdelphij
350170754Sdelphij/* util.c */
351170754Sdelphijextern char const change_letter[4];
352170754Sdelphijextern char const pr_program[];
353170754Sdelphijchar *concat (char const *, char const *, char const *);
354170754Sdelphijchar *dir_file_pathname (char const *, char const *);
355170754Sdelphijbool lines_differ (char const *, char const *);
356170754Sdelphijlin translate_line_number (struct file_data const *, lin);
357170754Sdelphijstruct change *find_change (struct change *);
358170754Sdelphijstruct change *find_reverse_change (struct change *);
359170754Sdelphijvoid *zalloc (size_t);
360170754Sdelphijenum changes analyze_hunk (struct change *, lin *, lin *, lin *, lin *);
361170754Sdelphijvoid begin_output (void);
362170754Sdelphijvoid debug_script (struct change *);
363170754Sdelphijvoid fatal (char const *) __attribute__((noreturn));
364170754Sdelphijvoid finish_output (void);
365170754Sdelphijvoid message (char const *, char const *, char const *);
366170754Sdelphijvoid message5 (char const *, char const *, char const *, char const *, char const *);
367170754Sdelphijvoid output_1_line (char const *, char const *, char const *, char const *);
368170754Sdelphijvoid perror_with_name (char const *);
369170754Sdelphijvoid pfatal_with_name (char const *) __attribute__((noreturn));
370170754Sdelphijvoid print_1_line (char const *, char const * const *);
371170754Sdelphijvoid print_message_queue (void);
372170754Sdelphijvoid print_number_range (char, struct file_data *, lin, lin);
373170754Sdelphijvoid print_script (struct change *, struct change * (*) (struct change *), void (*) (struct change *));
374170754Sdelphijvoid setup_output (char const *, char const *, bool);
375170754Sdelphijvoid translate_range (struct file_data const *, lin, lin, long int *, long int *);
376