11553Srgrimes/* Shared definitions for GNU DIFF
21553Srgrimes
31553Srgrimes   Copyright (C) 1988, 1989, 1991, 1992, 1993, 1994, 1995, 1998, 2001,
41553Srgrimes   2002, 2004 Free Software Foundation, Inc.
51553Srgrimes
61553Srgrimes   This file is part of GNU DIFF.
71553Srgrimes
81553Srgrimes   GNU DIFF is free software; you can redistribute it and/or modify
91553Srgrimes   it under the terms of the GNU General Public License as published by
101553Srgrimes   the Free Software Foundation; either version 2, or (at your option)
111553Srgrimes   any later version.
121553Srgrimes
131553Srgrimes   GNU DIFF is distributed in the hope that it will be useful,
141553Srgrimes   but WITHOUT ANY WARRANTY; without even the implied warranty of
151553Srgrimes   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
161553Srgrimes   GNU General Public License for more details.
171553Srgrimes
181553Srgrimes   You should have received a copy of the GNU General Public License
191553Srgrimes   along with this program; see the file COPYING.
201553Srgrimes   If not, write to the Free Software Foundation,
211553Srgrimes   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
221553Srgrimes
231553Srgrimes#include "system.h"
241553Srgrimes#include <regex.h>
251553Srgrimes#include <stdio.h>
261553Srgrimes#include <unlocked-io.h>
271553Srgrimes
281553Srgrimes/* What kind of changes a hunk contains.  */
291553Srgrimesenum changes
301553Srgrimes{
311553Srgrimes  /* No changes: lines common to both files.  */
321553Srgrimes  UNCHANGED,
331553Srgrimes
341553Srgrimes  /* Deletes only: lines taken from just the first file.  */
351553Srgrimes  OLD,
361553Srgrimes
371553Srgrimes  /* Inserts only: lines taken from just the second file.  */
381553Srgrimes  NEW,
391553Srgrimes
401553Srgrimes  /* Both deletes and inserts: a hunk containing both old and new lines.  */
411553Srgrimes  CHANGED
421553Srgrimes};
431553Srgrimes
441553Srgrimes/* Variables for command line options */
451553Srgrimes
461553Srgrimes#ifndef GDIFF_MAIN
471553Srgrimes# define XTERN extern
481553Srgrimes#else
491553Srgrimes# define XTERN
501553Srgrimes#endif
511553Srgrimes
521553Srgrimesenum output_style
532860Srgrimes{
541553Srgrimes  /* No output style specified.  */
551553Srgrimes  OUTPUT_UNSPECIFIED,
563468Srgrimes
571553Srgrimes  /* Default output style.  */
581553Srgrimes  OUTPUT_NORMAL,
591553Srgrimes
601553Srgrimes  /* Output the differences with lines of context before and after (-c).  */
611553Srgrimes  OUTPUT_CONTEXT,
621553Srgrimes
631553Srgrimes  /* Output the differences in a unified context diff format (-u).  */
641553Srgrimes  OUTPUT_UNIFIED,
651553Srgrimes
661553Srgrimes  /* Output the differences as commands suitable for `ed' (-e).  */
671553Srgrimes  OUTPUT_ED,
681553Srgrimes
691553Srgrimes  /* Output the diff as a forward ed script (-f).  */
701553Srgrimes  OUTPUT_FORWARD_ED,
713468Srgrimes
721553Srgrimes  /* Like -f, but output a count of changed lines in each "command" (-n).  */
731553Srgrimes  OUTPUT_RCS,
741553Srgrimes
753468Srgrimes  /* Output merged #ifdef'd file (-D).  */
761553Srgrimes  OUTPUT_IFDEF,
771553Srgrimes
781553Srgrimes  /* Output sdiff style (-y).  */
791553Srgrimes  OUTPUT_SDIFF
801553Srgrimes};
811553Srgrimes
821553Srgrimes/* True for output styles that are robust,
831553Srgrimes   i.e. can handle a file that ends in a non-newline.  */
841553Srgrimes#define ROBUST_OUTPUT_STYLE(S) ((S) != OUTPUT_ED && (S) != OUTPUT_FORWARD_ED)
851553Srgrimes
861553SrgrimesXTERN enum output_style output_style;
871553Srgrimes
881553Srgrimes/* Nonzero if output cannot be generated for identical files.  */
891553SrgrimesXTERN bool no_diff_means_no_output;
902860Srgrimes
912860Srgrimes/* Number of lines of context to show in each set of diffs.
922860Srgrimes   This is zero when context is not to be shown.  */
931553SrgrimesXTERN lin context;
941553Srgrimes
951553Srgrimes/* Consider all files as text files (-a).
961553Srgrimes   Don't interpret codes over 0177 as implying a "binary file".  */
971553SrgrimesXTERN bool text;
981553Srgrimes
991553Srgrimes/* Number of lines to keep in identical prefix and suffix.  */
1001553SrgrimesXTERN lin horizon_lines;
1011553Srgrimes
1021553Srgrimes/* The significance of white space during comparisons.  */
1031553SrgrimesXTERN enum
1042860Srgrimes{
1052860Srgrimes  /* All white space is significant (the default).  */
1062860Srgrimes  IGNORE_NO_WHITE_SPACE,
1071553Srgrimes
1081553Srgrimes  /* Ignore changes due to tab expansion (-E).  */
1091553Srgrimes  IGNORE_TAB_EXPANSION,
1101553Srgrimes
1111553Srgrimes  /* Ignore changes in horizontal white space (-b).  */
1121553Srgrimes  IGNORE_SPACE_CHANGE,
1131553Srgrimes
1141553Srgrimes  /* Ignore all horizontal white space (-w).  */
1151553Srgrimes  IGNORE_ALL_SPACE
1161553Srgrimes} ignore_white_space;
1171553Srgrimes
1183468Srgrimes/* Ignore changes that affect only blank lines (-B).  */
1193468SrgrimesXTERN bool ignore_blank_lines;
1203468Srgrimes
1213468Srgrimes/* Files can be compared byte-by-byte, as if they were binary.
1221553Srgrimes   This depends on various options.  */
1231553SrgrimesXTERN bool files_can_be_treated_as_binary;
1241553Srgrimes
1251553Srgrimes/* Ignore differences in case of letters (-i).  */
1261553SrgrimesXTERN bool ignore_case;
1271553Srgrimes
1281553Srgrimes/* Ignore differences in case of letters in file names.  */
1291553SrgrimesXTERN bool ignore_file_name_case;
1301553Srgrimes
1311553Srgrimes/* File labels for `-c' output headers (--label).  */
1321553SrgrimesXTERN char *file_label[2];
1331553Srgrimes
1341553Srgrimes/* Regexp to identify function-header lines (-F).  */
1351553SrgrimesXTERN struct re_pattern_buffer function_regexp;
1361553Srgrimes
1371553Srgrimes/* Ignore changes that affect only lines matching this regexp (-I).  */
1381553SrgrimesXTERN struct re_pattern_buffer ignore_regexp;
1391553Srgrimes
1401553Srgrimes/* Say only whether files differ, not how (-q).  */
1411553SrgrimesXTERN bool brief;
1421553Srgrimes
1431553Srgrimes/* Expand tabs in the output so the text lines up properly
1441553Srgrimes   despite the characters added to the front of each line (-t).  */
1451553SrgrimesXTERN bool expand_tabs;
1461553Srgrimes
1471553Srgrimes/* Number of columns between tab stops.  */
1483468SrgrimesXTERN size_t tabsize;
1493468Srgrimes
1503468Srgrimes/* Use a tab in the output, rather than a space, before the text of an
1513468Srgrimes   input line, so as to keep the proper alignment in the input line
1521553Srgrimes   without changing the characters in it (-T).  */
1531553SrgrimesXTERN bool initial_tab;
1541553Srgrimes
1551553Srgrimes/* Remove trailing carriage returns from input.  */
1561553SrgrimesXTERN bool strip_trailing_cr;
1571553Srgrimes
1583468Srgrimes/* In directory comparison, specify file to start with (-S).
1591553Srgrimes   This is used for resuming an aborted comparison.
1601553Srgrimes   All file names less than this name are ignored.  */
161XTERN char const *starting_file;
162
163/* Pipe each file's output through pr (-l).  */
164XTERN bool paginate;
165
166/* Line group formats for unchanged, old, new, and changed groups.  */
167XTERN char const *group_format[CHANGED + 1];
168
169/* Line formats for unchanged, old, and new lines.  */
170XTERN char const *line_format[NEW + 1];
171
172/* If using OUTPUT_SDIFF print extra information to help the sdiff filter.  */
173XTERN bool sdiff_merge_assist;
174
175/* Tell OUTPUT_SDIFF to show only the left version of common lines.  */
176XTERN bool left_column;
177
178/* Tell OUTPUT_SDIFF to not show common lines.  */
179XTERN bool suppress_common_lines;
180
181/* The half line width and column 2 offset for OUTPUT_SDIFF.  */
182XTERN size_t sdiff_half_width;
183XTERN size_t sdiff_column2_offset;
184
185/* String containing all the command options diff received,
186   with spaces between and at the beginning but none at the end.
187   If there were no options given, this string is empty.  */
188XTERN char *switch_string;
189
190/* Use heuristics for better speed with large files with a small
191   density of changes.  */
192XTERN bool speed_large_files;
193
194/* Patterns that match file names to be excluded.  */
195XTERN struct exclude *excluded;
196
197/* Don't discard lines.  This makes things slower (sometimes much
198   slower) but will find a guaranteed minimal set of changes.  */
199XTERN bool minimal;
200
201/* Name of program the user invoked (for error messages).  */
202XTERN char *program_name;
203
204/* The strftime format to use for time strings.  */
205XTERN char const *time_format;
206
207/* The result of comparison is an "edit script": a chain of `struct change'.
208   Each `struct change' represents one place where some lines are deleted
209   and some are inserted.
210
211   LINE0 and LINE1 are the first affected lines in the two files (origin 0).
212   DELETED is the number of lines deleted here from file 0.
213   INSERTED is the number of lines inserted here in file 1.
214
215   If DELETED is 0 then LINE0 is the number of the line before
216   which the insertion was done; vice versa for INSERTED and LINE1.  */
217
218struct change
219{
220  struct change *link;		/* Previous or next edit command  */
221  lin inserted;			/* # lines of file 1 changed here.  */
222  lin deleted;			/* # lines of file 0 changed here.  */
223  lin line0;			/* Line number of 1st deleted line.  */
224  lin line1;			/* Line number of 1st inserted line.  */
225  bool ignore;			/* Flag used in context.c.  */
226};
227
228/* Structures that describe the input files.  */
229
230/* Data on one input file being compared.  */
231
232struct file_data {
233    int             desc;	/* File descriptor  */
234    char const      *name;	/* File name  */
235    struct stat     stat;	/* File status */
236
237    /* Buffer in which text of file is read.  */
238    word *buffer;
239
240    /* Allocated size of buffer, in bytes.  Always a multiple of
241       sizeof *buffer.  */
242    size_t bufsize;
243
244    /* Number of valid bytes now in the buffer.  */
245    size_t buffered;
246
247    /* Array of pointers to lines in the file.  */
248    char const **linbuf;
249
250    /* linbuf_base <= buffered_lines <= valid_lines <= alloc_lines.
251       linebuf[linbuf_base ... buffered_lines - 1] are possibly differing.
252       linebuf[linbuf_base ... valid_lines - 1] contain valid data.
253       linebuf[linbuf_base ... alloc_lines - 1] are allocated.  */
254    lin linbuf_base, buffered_lines, valid_lines, alloc_lines;
255
256    /* Pointer to end of prefix of this file to ignore when hashing.  */
257    char const *prefix_end;
258
259    /* Count of lines in the prefix.
260       There are this many lines in the file before linbuf[0].  */
261    lin prefix_lines;
262
263    /* Pointer to start of suffix of this file to ignore when hashing.  */
264    char const *suffix_begin;
265
266    /* Vector, indexed by line number, containing an equivalence code for
267       each line.  It is this vector that is actually compared with that
268       of another file to generate differences.  */
269    lin *equivs;
270
271    /* Vector, like the previous one except that
272       the elements for discarded lines have been squeezed out.  */
273    lin *undiscarded;
274
275    /* Vector mapping virtual line numbers (not counting discarded lines)
276       to real ones (counting those lines).  Both are origin-0.  */
277    lin *realindexes;
278
279    /* Total number of nondiscarded lines.  */
280    lin nondiscarded_lines;
281
282    /* Vector, indexed by real origin-0 line number,
283       containing 1 for a line that is an insertion or a deletion.
284       The results of comparison are stored here.  */
285    char *changed;
286
287    /* 1 if file ends in a line with no final newline.  */
288    bool missing_newline;
289
290    /* 1 if at end of file.  */
291    bool eof;
292
293    /* 1 more than the maximum equivalence value used for this or its
294       sibling file.  */
295    lin equiv_max;
296};
297
298/* The file buffer, considered as an array of bytes rather than
299   as an array of words.  */
300#define FILE_BUFFER(f) ((char *) (f)->buffer)
301
302/* Data on two input files being compared.  */
303
304struct comparison
305  {
306    struct file_data file[2];
307    struct comparison const *parent;  /* parent, if a recursive comparison */
308  };
309
310/* Describe the two files currently being compared.  */
311
312XTERN struct file_data files[2];
313
314/* Stdio stream to output diffs to.  */
315
316XTERN FILE *outfile;
317
318/* Declare various functions.  */
319
320/* analyze.c */
321int diff_2_files (struct comparison *);
322
323/* context.c */
324void print_context_header (struct file_data[], bool);
325void print_context_script (struct change *, bool);
326
327/* dir.c */
328int diff_dirs (struct comparison const *, int (*) (struct comparison const *, char const *, char const *));
329
330/* ed.c */
331void print_ed_script (struct change *);
332void pr_forward_ed_script (struct change *);
333
334/* ifdef.c */
335void print_ifdef_script (struct change *);
336
337/* io.c */
338void file_block_read (struct file_data *, size_t);
339bool read_files (struct file_data[], bool);
340
341/* normal.c */
342void print_normal_script (struct change *);
343
344/* rcs.c */
345void print_rcs_script (struct change *);
346
347/* side.c */
348void print_sdiff_script (struct change *);
349
350/* util.c */
351extern char const change_letter[4];
352extern char const pr_program[];
353char *concat (char const *, char const *, char const *);
354char *dir_file_pathname (char const *, char const *);
355bool lines_differ (char const *, char const *);
356lin translate_line_number (struct file_data const *, lin);
357struct change *find_change (struct change *);
358struct change *find_reverse_change (struct change *);
359void *zalloc (size_t);
360enum changes analyze_hunk (struct change *, lin *, lin *, lin *, lin *);
361void begin_output (void);
362void debug_script (struct change *);
363void fatal (char const *) __attribute__((noreturn));
364void finish_output (void);
365void message (char const *, char const *, char const *);
366void message5 (char const *, char const *, char const *, char const *, char const *);
367void output_1_line (char const *, char const *, char const *, char const *);
368void perror_with_name (char const *);
369void pfatal_with_name (char const *) __attribute__((noreturn));
370void print_1_line (char const *, char const * const *);
371void print_message_queue (void);
372void print_number_range (char, struct file_data *, lin, lin);
373void print_script (struct change *, struct change * (*) (struct change *), void (*) (struct change *));
374void setup_output (char const *, char const *, bool);
375void translate_range (struct file_data const *, lin, lin, long int *, long int *);
376