1/*	$OpenBSD: sdiff.c,v 1.36 2015/12/29 19:04:46 gsoares Exp $ */
2
3/*
4 * Written by Raymond Lai <ray@cyth.net>.
5 * Public domain.
6 */
7
8#include <sys/param.h>
9#include <sys/queue.h>
10#include <sys/stat.h>
11#include <sys/wait.h>
12
13#include <ctype.h>
14#include <err.h>
15#include <errno.h>
16#include <fcntl.h>
17#include <getopt.h>
18#include <limits.h>
19#include <paths.h>
20#include <stdbool.h>
21#include <stdint.h>
22#include <stdio.h>
23#include <stdlib.h>
24#include <string.h>
25#include <unistd.h>
26
27#include "extern.h"
28
29static char diff_path[] = "/usr/bin/diff";
30
31#define WIDTH 126
32/*
33 * Each column must be at least one character wide, plus three
34 * characters between the columns (space, [<|>], space).
35 */
36#define WIDTH_MIN 5
37
38/* 3 kilobytes of chars */
39#define MAX_CHECK 768
40
41/* A single diff line. */
42struct diffline {
43	STAILQ_ENTRY(diffline) diffentries;
44	char	*left;
45	char	 div;
46	char	*right;
47};
48
49static void astrcat(char **, const char *);
50static void enqueue(char *, char, char *);
51static char *mktmpcpy(const char *);
52static int istextfile(FILE *);
53static int bindiff(FILE *, char *, FILE *, char *);
54static void freediff(struct diffline *);
55static void int_usage(void);
56static int parsecmd(FILE *, FILE *, FILE *);
57static void printa(FILE *, size_t);
58static void printc(FILE *, size_t, FILE *, size_t);
59static void printcol(const char *, size_t *, const size_t);
60static void printd(FILE *, size_t);
61static void println(const char *, const char, const char *);
62static void processq(void);
63static void prompt(const char *, const char *);
64static void usage(void) __dead2;
65static char *xfgets(FILE *);
66
67static STAILQ_HEAD(, diffline) diffhead = STAILQ_HEAD_INITIALIZER(diffhead);
68static size_t line_width;	/* width of a line (two columns and divider) */
69static size_t width;		/* width of each column */
70static size_t file1ln, file2ln;	/* line number of file1 and file2 */
71static bool Iflag;		/* ignore sets matching regexp */
72static bool lflag;		/* print only left column for identical lines */
73static bool sflag;		/* skip identical lines */
74static bool tflag;		/* expand tabs */
75static int tabsize = 8;		/* tab size */
76FILE *outfp;			/* file to save changes to */
77const char *tmpdir;		/* TMPDIR or /tmp */
78
79enum {
80	HELP_OPT = CHAR_MAX + 1,
81	NORMAL_OPT,
82	FCASE_SENSITIVE_OPT,
83	FCASE_IGNORE_OPT,
84	STRIPCR_OPT,
85	TSIZE_OPT,
86	DIFFPROG_OPT,
87};
88
89static struct option longopts[] = {
90	/* options only processed in sdiff */
91	{ "suppress-common-lines",	no_argument,		NULL,	's' },
92	{ "width",			required_argument,	NULL,	'w' },
93
94	{ "output",			required_argument,	NULL,	'o' },
95	{ "diff-program",		required_argument,	NULL,	DIFFPROG_OPT },
96
97	/* Options processed by diff. */
98	{ "ignore-file-name-case",	no_argument,		NULL,	FCASE_IGNORE_OPT },
99	{ "no-ignore-file-name-case",	no_argument,		NULL,	FCASE_SENSITIVE_OPT },
100	{ "strip-trailing-cr",		no_argument,		NULL,	STRIPCR_OPT },
101	{ "tabsize",			required_argument,	NULL,	TSIZE_OPT },
102	{ "help",			no_argument,		NULL,	HELP_OPT },
103	{ "text",			no_argument,		NULL,	'a' },
104	{ "ignore-blank-lines",		no_argument,		NULL,	'B' },
105	{ "ignore-space-change",	no_argument,		NULL,	'b' },
106	{ "minimal",			no_argument,		NULL,	'd' },
107	{ "ignore-tab-expansion",	no_argument,		NULL,	'E' },
108	{ "ignore-matching-lines",	required_argument,	NULL,	'I' },
109	{ "ignore-case",		no_argument,		NULL,	'i' },
110	{ "left-column",		no_argument,		NULL,	'l' },
111	{ "expand-tabs",		no_argument,		NULL,	't' },
112	{ "speed-large-files",		no_argument,		NULL,	'H' },
113	{ "ignore-all-space",		no_argument,		NULL,	'W' },
114
115	{ NULL,				0,			NULL,	'\0'}
116};
117
118static const char *help_msg[] = {
119	"usage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1 file2\n",
120	"-l, --left-column: only print the left column for identical lines.",
121	"-o OUTFILE, --output=OUTFILE: interactively merge file1 and file2 into outfile.",
122	"-s, --suppress-common-lines: skip identical lines.",
123	"-w WIDTH, --width=WIDTH: print a maximum of WIDTH characters on each line.",
124	"",
125	"Options passed to diff(1) are:",
126	"\t-a, --text: treat file1 and file2 as text files.",
127	"\t-b, --ignore-trailing-cr: ignore trailing blank spaces.",
128	"\t-d, --minimal: minimize diff size.",
129	"\t-I RE, --ignore-matching-lines=RE: ignore changes whose line matches RE.",
130	"\t-i, --ignore-case: do a case-insensitive comparison.",
131	"\t-t, --expand-tabs: expand tabs to spaces.",
132	"\t-W, --ignore-all-spaces: ignore all spaces.",
133	"\t--speed-large-files: assume large file with scattered changes.",
134	"\t--strip-trailing-cr: strip trailing carriage return.",
135	"\t--ignore-file-name-case: ignore case of file names.",
136	"\t--no-ignore-file-name-case: do not ignore file name case",
137	"\t--tabsize NUM: change size of tabs (default 8.)",
138
139	NULL,
140};
141
142/*
143 * Create temporary file if source_file is not a regular file.
144 * Returns temporary file name if one was malloced, NULL if unnecessary.
145 */
146static char *
147mktmpcpy(const char *source_file)
148{
149	struct stat sb;
150	ssize_t rcount;
151	int ifd, ofd;
152	u_char buf[BUFSIZ];
153	char *target_file;
154
155	/* Open input and output. */
156	ifd = open(source_file, O_RDONLY, 0);
157	/* File was opened successfully. */
158	if (ifd != -1) {
159		if (fstat(ifd, &sb) == -1)
160			err(2, "error getting file status from %s", source_file);
161
162		/* Regular file. */
163		if (S_ISREG(sb.st_mode)) {
164			close(ifd);
165			return (NULL);
166		}
167	} else {
168		/* If ``-'' does not exist the user meant stdin. */
169		if (errno == ENOENT && strcmp(source_file, "-") == 0)
170			ifd = STDIN_FILENO;
171		else
172			err(2, "error opening %s", source_file);
173	}
174
175	/* Not a regular file, so copy input into temporary file. */
176	if (asprintf(&target_file, "%s/sdiff.XXXXXXXXXX", tmpdir) == -1)
177		err(2, "asprintf");
178	if ((ofd = mkstemp(target_file)) == -1) {
179		warn("error opening %s", target_file);
180		goto FAIL;
181	}
182	while ((rcount = read(ifd, buf, sizeof(buf))) != -1 &&
183	    rcount != 0) {
184		ssize_t wcount;
185
186		wcount = write(ofd, buf, (size_t)rcount);
187		if (-1 == wcount || rcount != wcount) {
188			warn("error writing to %s", target_file);
189			goto FAIL;
190		}
191	}
192	if (rcount == -1) {
193		warn("error reading from %s", source_file);
194		goto FAIL;
195	}
196
197	close(ifd);
198	close(ofd);
199
200	return (target_file);
201
202FAIL:
203	unlink(target_file);
204	exit(2);
205}
206
207int
208main(int argc, char **argv)
209{
210	FILE *diffpipe, *file1, *file2;
211	size_t diffargc = 0, flagc = 0, wval = WIDTH;
212	int ch, fd[2], i, ret, status;
213	pid_t pid;
214	const char *errstr, *outfile = NULL;
215	char **diffargv, *diffprog = diff_path, *flagv;
216	char *filename1, *filename2, *tmp1, *tmp2, *s1, *s2;
217	char I_arg[] = "-I";
218	char speed_lf[] = "--speed-large-files";
219
220	/*
221	 * Process diff flags.
222	 */
223	/*
224	 * Allocate memory for diff arguments and NULL.
225	 * Each flag has at most one argument, so doubling argc gives an
226	 * upper limit of how many diff args can be passed.  argv[0],
227	 * file1, and file2 won't have arguments so doubling them will
228	 * waste some memory; however we need an extra space for the
229	 * NULL at the end, so it sort of works out.
230	 */
231	if ((diffargv = calloc(argc, sizeof(char *) * 2)) == NULL)
232		err(2, NULL);
233
234	/* Add first argument, the program name. */
235	diffargv[diffargc++] = diffprog;
236
237	/* create a dynamic string for merging single-character options */
238	if ((flagv = malloc(flagc + 2)) == NULL)
239		err(2, NULL);
240	flagv[flagc] = '-';
241	flagv[flagc + 1] = '\0';
242	diffargv[diffargc++] = flagv;
243
244	while ((ch = getopt_long(argc, argv, "aBbdEHI:ilo:stWw:",
245	    longopts, NULL)) != -1) {
246		switch (ch) {
247		/* only compatible --long-name-form with diff */
248		case FCASE_IGNORE_OPT:
249		case FCASE_SENSITIVE_OPT:
250		case STRIPCR_OPT:
251		case 'S':
252		break;
253		/* combine no-arg single switches */
254		case 'a':
255		case 'B':
256		case 'b':
257		case 'd':
258		case 'E':
259		case 'i':
260		case 'W':
261			flagc++;
262			flagv = realloc(flagv, flagc + 2);
263			/*
264			 * In diff, the 'W' option is 'w' and the 'w' is 'W'.
265			 */
266			flagv[flagc] = ch == 'W' ? 'w' : ch;
267			flagv[flagc + 1] = '\0';
268			break;
269		case 'H':
270			diffargv[diffargc++] = speed_lf;
271			break;
272		case DIFFPROG_OPT:
273			diffargv[0] = diffprog = optarg;
274			break;
275		case 'I':
276			Iflag = true;
277			diffargv[diffargc++] = I_arg;
278			diffargv[diffargc++] = optarg;
279			break;
280		case 'l':
281			lflag = true;
282			break;
283		case 'o':
284			outfile = optarg;
285			break;
286		case 's':
287			sflag = true;
288			break;
289		case 't':
290			tflag = true;
291			break;
292		case 'w':
293			wval = strtonum(optarg, WIDTH_MIN,
294			    INT_MAX, &errstr);
295			if (errstr)
296				errx(2, "width is %s: %s", errstr, optarg);
297			break;
298		case HELP_OPT:
299			for (i = 0; help_msg[i] != NULL; i++)
300				printf("%s\n", help_msg[i]);
301			exit(0);
302			break;
303		case TSIZE_OPT:
304			tabsize = strtonum(optarg, 1, INT_MAX, &errstr);
305			if (errstr)
306				errx(2, "tabsize is %s: %s", errstr, optarg);
307			break;
308		default:
309			usage();
310			break;
311		}
312	}
313
314	/* no single-character options were used */
315	if (flagc == 0) {
316		memmove(diffargv + 1, diffargv + 2,
317		    sizeof(char *) * (diffargc - 2));
318		diffargc--;
319		free(flagv);
320	}
321
322	argc -= optind;
323	argv += optind;
324
325	if (argc != 2)
326		usage();
327
328	if (outfile && (outfp = fopen(outfile, "w")) == NULL)
329		err(2, "could not open: %s", optarg);
330
331	if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
332		tmpdir = _PATH_TMP;
333
334	filename1 = argv[0];
335	filename2 = argv[1];
336
337	/*
338	 * Create temporary files for diff and sdiff to share if file1
339	 * or file2 are not regular files.  This allows sdiff and diff
340	 * to read the same inputs if one or both inputs are stdin.
341	 *
342	 * If any temporary files were created, their names would be
343	 * saved in tmp1 or tmp2.  tmp1 should never equal tmp2.
344	 */
345	tmp1 = tmp2 = NULL;
346	/* file1 and file2 are the same, so copy to same temp file. */
347	if (strcmp(filename1, filename2) == 0) {
348		if ((tmp1 = mktmpcpy(filename1)))
349			filename1 = filename2 = tmp1;
350	/* Copy file1 and file2 into separate temp files. */
351	} else {
352		if ((tmp1 = mktmpcpy(filename1)))
353			filename1 = tmp1;
354		if ((tmp2 = mktmpcpy(filename2)))
355			filename2 = tmp2;
356	}
357
358	if ((file1 = fopen(filename1, "r")) == NULL)
359		err(2, "could not open %s", filename1);
360	if ((file2 = fopen(filename2, "r")) == NULL)
361		err(2, "could not open %s", filename2);
362	if (!istextfile(file1) || !istextfile(file2)) {
363		ret = bindiff(file1, filename1, file2, filename2);
364		goto done;
365	}
366
367	diffargv[diffargc++] = filename1;
368	diffargv[diffargc++] = filename2;
369	/* Add NULL to end of array to indicate end of array. */
370	diffargv[diffargc++] = NULL;
371
372	/* Subtract column divider and divide by two. */
373	width = (wval - 3) / 2;
374	/* Make sure line_width can fit in size_t. */
375	if (width > (SIZE_MAX - 3) / 2)
376		errx(2, "width is too large: %zu", width);
377	line_width = width * 2 + 3;
378
379	if (pipe(fd))
380		err(2, "pipe");
381
382	if ((pid = fork()) < 0)
383		err(1, "fork()");
384	if (pid == 0) {
385		/* child */
386		/* We don't read from the pipe. */
387		close(fd[0]);
388		if (dup2(fd[1], STDOUT_FILENO) != STDOUT_FILENO)
389			_exit(2);
390		/* Free unused descriptor. */
391		close(fd[1]);
392		execvp(diffprog, diffargv);
393		_exit(2);
394	}
395
396	/* parent */
397	/* We don't write to the pipe. */
398	close(fd[1]);
399
400	/* Open pipe to diff command. */
401	if ((diffpipe = fdopen(fd[0], "r")) == NULL)
402		err(2, "could not open diff pipe");
403
404	/* Line numbers start at one. */
405	file1ln = file2ln = 1;
406
407	/* Read and parse diff output. */
408	while (parsecmd(diffpipe, file1, file2) != EOF)
409		;
410	fclose(diffpipe);
411
412	/* Wait for diff to exit. */
413	if (waitpid(pid, &status, 0) == -1 || !WIFEXITED(status) ||
414	    WEXITSTATUS(status) >= 2)
415		errx(2, "diff exited abnormally");
416	ret = WEXITSTATUS(status);
417
418	/* No more diffs, so enqueue common lines. */
419	if (lflag)
420		while ((s1 = xfgets(file1)))
421			enqueue(s1, ' ', NULL);
422	else
423		for (;;) {
424			s1 = xfgets(file1);
425			s2 = xfgets(file2);
426			if (s1 || s2)
427				enqueue(s1, ' ', s2);
428			else
429				break;
430		}
431	fclose(file1);
432	fclose(file2);
433	/* Process unmodified lines. */
434	processq();
435
436done:
437	/* Delete and free unneeded temporary files. */
438	if (tmp1 != NULL) {
439		if (unlink(tmp1) != 0)
440			warn("failed to delete %s", tmp1);
441		free(tmp1);
442	}
443	if (tmp2 != NULL) {
444		if (unlink(tmp2) != 0)
445			warn("failed to delete %s", tmp2);
446		free(tmp2);
447	}
448
449	/* Return diff exit status. */
450	free(diffargv);
451	if (flagc > 0)
452		free(flagv);
453	return (ret);
454}
455
456/*
457 * When sdiff detects a binary file as input.
458 */
459static int
460bindiff(FILE *f1, char *fn1, FILE *f2, char *fn2)
461{
462	int ch1, ch2;
463
464	flockfile(f1);
465	flockfile(f2);
466	do {
467		ch1 = getc_unlocked(f1);
468		ch2 = getc_unlocked(f2);
469	} while (ch1 != EOF && ch2 != EOF && ch1 == ch2);
470	funlockfile(f2);
471	funlockfile(f1);
472	if (ferror(f1)) {
473		warn("%s", fn1);
474		return (2);
475	}
476	if (ferror(f2)) {
477		warn("%s", fn2);
478		return (2);
479	}
480	if (ch1 != EOF || ch2 != EOF) {
481		printf("Binary files %s and %s differ\n", fn1, fn2);
482		return (1);
483	}
484	return (0);
485}
486
487/*
488 * Checks whether a file appears to be a text file.
489 */
490static int
491istextfile(FILE *f)
492{
493	int	ch, i;
494
495	if (f == NULL)
496		return (1);
497	rewind(f);
498	for (i = 0; i <= MAX_CHECK; i++) {
499		ch = fgetc(f);
500		if (ch == '\0') {
501			rewind(f);
502			return (0);
503		}
504		if (ch == EOF)
505			break;
506	}
507	rewind(f);
508	return (1);
509}
510
511/*
512 * Prints an individual column (left or right), taking into account
513 * that tabs are variable-width.  Takes a string, the current column
514 * the cursor is on the screen, and the maximum value of the column.
515 * The column value is updated as we go along.
516 */
517static void
518printcol(const char *s, size_t *col, const size_t col_max)
519{
520
521	for (; *s && *col < col_max; ++s) {
522		size_t new_col;
523
524		switch (*s) {
525		case '\t':
526			/*
527			 * If rounding to next multiple of eight causes
528			 * an integer overflow, just return.
529			 */
530			if (*col > SIZE_MAX - tabsize)
531				return;
532
533			/* Round to next multiple of eight. */
534			new_col = (*col / tabsize + 1) * tabsize;
535
536			/*
537			 * If printing the tab goes past the column
538			 * width, don't print it and just quit.
539			 */
540			if (new_col > col_max)
541				return;
542
543			if (tflag) {
544				do {
545					putchar(' ');
546				} while (++*col < new_col);
547			} else {
548				putchar(*s);
549				*col = new_col;
550			}
551			break;
552		default:
553			++*col;
554			putchar(*s);
555		}
556	}
557}
558
559/*
560 * Prompts user to either choose between two strings or edit one, both,
561 * or neither.
562 */
563static void
564prompt(const char *s1, const char *s2)
565{
566	char *cmd;
567
568	/* Print command prompt. */
569	putchar('%');
570
571	/* Get user input. */
572	for (; (cmd = xfgets(stdin)); free(cmd)) {
573		const char *p;
574
575		/* Skip leading whitespace. */
576		for (p = cmd; isspace((unsigned char)*p); ++p)
577			;
578		switch (*p) {
579		case 'e':
580			/* Skip `e'. */
581			++p;
582			if (eparse(p, s1, s2) == -1)
583				goto USAGE;
584			break;
585		case 'l':
586		case '1':
587			/* Choose left column as-is. */
588			if (s1 != NULL)
589				fprintf(outfp, "%s\n", s1);
590			/* End of command parsing. */
591			break;
592		case 'q':
593			goto QUIT;
594		case 'r':
595		case '2':
596			/* Choose right column as-is. */
597			if (s2 != NULL)
598				fprintf(outfp, "%s\n", s2);
599			/* End of command parsing. */
600			break;
601		case 's':
602			sflag = true;
603			goto PROMPT;
604		case 'v':
605			sflag = false;
606			/* FALLTHROUGH */
607		default:
608			/* Interactive usage help. */
609USAGE:
610			int_usage();
611PROMPT:
612			putchar('%');
613
614			/* Prompt user again. */
615			continue;
616		}
617		free(cmd);
618		return;
619	}
620
621	/*
622	 * If there was no error, we received an EOF from stdin, so we
623	 * should quit.
624	 */
625QUIT:
626	fclose(outfp);
627	exit(0);
628}
629
630/*
631 * Takes two strings, separated by a column divider.  NULL strings are
632 * treated as empty columns.  If the divider is the ` ' character, the
633 * second column is not printed (-l flag).  In this case, the second
634 * string must be NULL.  When the second column is NULL, the divider
635 * does not print the trailing space following the divider character.
636 *
637 * Takes into account that tabs can take multiple columns.
638 */
639static void
640println(const char *s1, const char divider, const char *s2)
641{
642	size_t col;
643
644	/* Print first column.  Skips if s1 == NULL. */
645	col = 0;
646	if (s1) {
647		/* Skip angle bracket and space. */
648		printcol(s1, &col, width);
649
650	}
651
652	/* Otherwise, we pad this column up to width. */
653	for (; col < width; ++col)
654		putchar(' ');
655
656	/* Only print left column. */
657	if (divider == ' ' && !s2) {
658		printf(" (\n");
659		return;
660	}
661
662	/*
663	 * Print column divider.  If there is no second column, we don't
664	 * need to add the space for padding.
665	 */
666	if (!s2) {
667		printf(" %c\n", divider);
668		return;
669	}
670	printf(" %c ", divider);
671	col += 3;
672
673	/* Skip angle bracket and space. */
674	printcol(s2, &col, line_width);
675
676	putchar('\n');
677}
678
679/*
680 * Reads a line from file and returns as a string.  If EOF is reached,
681 * NULL is returned.  The returned string must be freed afterwards.
682 */
683static char *
684xfgets(FILE *file)
685{
686	size_t linecap;
687	ssize_t l;
688	char *s;
689
690	clearerr(file);
691	linecap = 0;
692	s = NULL;
693
694	if ((l = getline(&s, &linecap, file)) == -1) {
695		if (ferror(file))
696			err(2, "error reading file");
697		return (NULL);
698	}
699
700	if (s[l-1] == '\n')
701		s[l-1] = '\0';
702
703	return (s);
704}
705
706/*
707 * Parse ed commands from diffpipe and print lines from file1 (lines
708 * to change or delete) or file2 (lines to add or change).
709 * Returns EOF or 0.
710 */
711static int
712parsecmd(FILE *diffpipe, FILE *file1, FILE *file2)
713{
714	size_t file1start, file1end, file2start, file2end, n;
715	/* ed command line and pointer to characters in line */
716	char *line, *p, *q;
717	const char *errstr;
718	char c, cmd;
719
720	/* Read ed command. */
721	if (!(line = xfgets(diffpipe)))
722		return (EOF);
723
724	p = line;
725	/* Go to character after line number. */
726	while (isdigit((unsigned char)*p))
727		++p;
728	c = *p;
729	*p++ = 0;
730	file1start = strtonum(line, 0, INT_MAX, &errstr);
731	if (errstr)
732		errx(2, "file1 start is %s: %s", errstr, line);
733
734	/* A range is specified for file1. */
735	if (c == ',') {
736		q = p;
737		/* Go to character after file2end. */
738		while (isdigit((unsigned char)*p))
739			++p;
740		c = *p;
741		*p++ = 0;
742		file1end = strtonum(q, 0, INT_MAX, &errstr);
743		if (errstr)
744			errx(2, "file1 end is %s: %s", errstr, line);
745		if (file1start > file1end)
746			errx(2, "invalid line range in file1: %s", line);
747	} else
748		file1end = file1start;
749
750	cmd = c;
751	/* Check that cmd is valid. */
752	if (!(cmd == 'a' || cmd == 'c' || cmd == 'd'))
753		errx(2, "ed command not recognized: %c: %s", cmd, line);
754
755	q = p;
756	/* Go to character after line number. */
757	while (isdigit((unsigned char)*p))
758		++p;
759	c = *p;
760	*p++ = 0;
761	file2start = strtonum(q, 0, INT_MAX, &errstr);
762	if (errstr)
763		errx(2, "file2 start is %s: %s", errstr, line);
764
765	/*
766	 * There should either be a comma signifying a second line
767	 * number or the line should just end here.
768	 */
769	if (c != ',' && c != '\0')
770		errx(2, "invalid line range in file2: %c: %s", c, line);
771
772	if (c == ',') {
773
774		file2end = strtonum(p, 0, INT_MAX, &errstr);
775		if (errstr)
776			errx(2, "file2 end is %s: %s", errstr, line);
777		if (file2start >= file2end)
778			errx(2, "invalid line range in file2: %s", line);
779	} else
780		file2end = file2start;
781
782	/* Appends happen _after_ stated line. */
783	if (cmd == 'a') {
784		if (file1start != file1end)
785			errx(2, "append cannot have a file1 range: %s",
786			    line);
787		if (file1start == SIZE_MAX)
788			errx(2, "file1 line range too high: %s", line);
789		file1start = ++file1end;
790	}
791	/*
792	 * I'm not sure what the deal is with the line numbers for
793	 * deletes, though.
794	 */
795	else if (cmd == 'd') {
796		if (file2start != file2end)
797			errx(2, "delete cannot have a file2 range: %s",
798			    line);
799		if (file2start == SIZE_MAX)
800			errx(2, "file2 line range too high: %s", line);
801		file2start = ++file2end;
802	}
803
804	/*
805	 * Continue reading file1 and file2 until we reach line numbers
806	 * specified by diff.  Should only happen with -I flag.
807	 */
808	for (; file1ln < file1start && file2ln < file2start;
809	    ++file1ln, ++file2ln) {
810		char *s1, *s2;
811
812		if (!(s1 = xfgets(file1)))
813			errx(2, "file1 shorter than expected");
814		if (!(s2 = xfgets(file2)))
815			errx(2, "file2 shorter than expected");
816
817		/* If the -l flag was specified, print only left column. */
818		if (lflag) {
819			free(s2);
820			/*
821			 * XXX - If -l and -I are both specified, all
822			 * unchanged or ignored lines are shown with a
823			 * `(' divider.  This matches GNU sdiff, but I
824			 * believe it is a bug.  Just check out:
825			 * gsdiff -l -I '^$' samefile samefile.
826			 */
827			if (Iflag)
828				enqueue(s1, '(', NULL);
829			else
830				enqueue(s1, ' ', NULL);
831		} else
832			enqueue(s1, ' ', s2);
833	}
834	/* Ignore deleted lines. */
835	for (; file1ln < file1start; ++file1ln) {
836		char *s;
837
838		if (!(s = xfgets(file1)))
839			errx(2, "file1 shorter than expected");
840
841		enqueue(s, '(', NULL);
842	}
843	/* Ignore added lines. */
844	for (; file2ln < file2start; ++file2ln) {
845		char *s;
846
847		if (!(s = xfgets(file2)))
848			errx(2, "file2 shorter than expected");
849
850		/* If -l flag was given, don't print right column. */
851		if (lflag)
852			free(s);
853		else
854			enqueue(NULL, ')', s);
855	}
856
857	/* Process unmodified or skipped lines. */
858	processq();
859
860	switch (cmd) {
861	case 'a':
862		printa(file2, file2end);
863		n = file2end - file2start + 1;
864		break;
865	case 'c':
866		printc(file1, file1end, file2, file2end);
867		n = file1end - file1start + 1 + 1 + file2end - file2start + 1;
868		break;
869	case 'd':
870		printd(file1, file1end);
871		n = file1end - file1start + 1;
872		break;
873	default:
874		errx(2, "invalid diff command: %c: %s", cmd, line);
875	}
876	free(line);
877
878	/* Skip to next ed line. */
879	while (n--) {
880		if (!(line = xfgets(diffpipe)))
881			errx(2, "diff ended early");
882		free(line);
883	}
884
885	return (0);
886}
887
888/*
889 * Queues up a diff line.
890 */
891static void
892enqueue(char *left, char divider, char *right)
893{
894	struct diffline *diffp;
895
896	if (!(diffp = malloc(sizeof(struct diffline))))
897		err(2, "enqueue");
898	diffp->left = left;
899	diffp->div = divider;
900	diffp->right = right;
901	STAILQ_INSERT_TAIL(&diffhead, diffp, diffentries);
902}
903
904/*
905 * Free a diffline structure and its elements.
906 */
907static void
908freediff(struct diffline *diffp)
909{
910
911	free(diffp->left);
912	free(diffp->right);
913	free(diffp);
914}
915
916/*
917 * Append second string into first.  Repeated appends to the same string
918 * are cached, making this an O(n) function, where n = strlen(append).
919 */
920static void
921astrcat(char **s, const char *append)
922{
923	/* Length of string in previous run. */
924	static size_t offset = 0;
925	size_t newsiz;
926	/*
927	 * String from previous run.  Compared to *s to see if we are
928	 * dealing with the same string.  If so, we can use offset.
929	 */
930	static const char *oldstr = NULL;
931	char *newstr;
932
933	/*
934	 * First string is NULL, so just copy append.
935	 */
936	if (!*s) {
937		if (!(*s = strdup(append)))
938			err(2, "astrcat");
939
940		/* Keep track of string. */
941		offset = strlen(*s);
942		oldstr = *s;
943
944		return;
945	}
946
947	/*
948	 * *s is a string so concatenate.
949	 */
950
951	/* Did we process the same string in the last run? */
952	/*
953	 * If this is a different string from the one we just processed
954	 * cache new string.
955	 */
956	if (oldstr != *s) {
957		offset = strlen(*s);
958		oldstr = *s;
959	}
960
961	/* Size = strlen(*s) + \n + strlen(append) + '\0'. */
962	newsiz = offset + 1 + strlen(append) + 1;
963
964	/* Resize *s to fit new string. */
965	newstr = realloc(*s, newsiz);
966	if (newstr == NULL)
967		err(2, "astrcat");
968	*s = newstr;
969
970	/* *s + offset should be end of string. */
971	/* Concatenate. */
972	strlcpy(*s + offset, "\n", newsiz - offset);
973	strlcat(*s + offset, append, newsiz - offset);
974
975	/* New string length should be exactly newsiz - 1 characters. */
976	/* Store generated string's values. */
977	offset = newsiz - 1;
978	oldstr = *s;
979}
980
981/*
982 * Process diff set queue, printing, prompting, and saving each diff
983 * line stored in queue.
984 */
985static void
986processq(void)
987{
988	struct diffline *diffp;
989	char divc, *left, *right;
990
991	/* Don't process empty queue. */
992	if (STAILQ_EMPTY(&diffhead))
993		return;
994
995	/* Remember the divider. */
996	divc = STAILQ_FIRST(&diffhead)->div;
997
998	left = NULL;
999	right = NULL;
1000	/*
1001	 * Go through set of diffs, concatenating each line in left or
1002	 * right column into two long strings, `left' and `right'.
1003	 */
1004	STAILQ_FOREACH(diffp, &diffhead, diffentries) {
1005		/*
1006		 * Print changed lines if -s was given,
1007		 * print all lines if -s was not given.
1008		 */
1009		if (!sflag || diffp->div == '|' || diffp->div == '<' ||
1010		    diffp->div == '>')
1011			println(diffp->left, diffp->div, diffp->right);
1012
1013		/* Append new lines to diff set. */
1014		if (diffp->left)
1015			astrcat(&left, diffp->left);
1016		if (diffp->right)
1017			astrcat(&right, diffp->right);
1018	}
1019
1020	/* Empty queue and free each diff line and its elements. */
1021	while (!STAILQ_EMPTY(&diffhead)) {
1022		diffp = STAILQ_FIRST(&diffhead);
1023		STAILQ_REMOVE_HEAD(&diffhead, diffentries);
1024		freediff(diffp);
1025	}
1026
1027	/* Write to outfp, prompting user if lines are different. */
1028	if (outfp)
1029		switch (divc) {
1030		case ' ': case '(': case ')':
1031			fprintf(outfp, "%s\n", left);
1032			break;
1033		case '|': case '<': case '>':
1034			prompt(left, right);
1035			break;
1036		default:
1037			errx(2, "invalid divider: %c", divc);
1038		}
1039
1040	/* Free left and right. */
1041	free(left);
1042	free(right);
1043}
1044
1045/*
1046 * Print lines following an (a)ppend command.
1047 */
1048static void
1049printa(FILE *file, size_t line2)
1050{
1051	char *line;
1052
1053	for (; file2ln <= line2; ++file2ln) {
1054		if (!(line = xfgets(file)))
1055			errx(2, "append ended early");
1056		enqueue(NULL, '>', line);
1057	}
1058	processq();
1059}
1060
1061/*
1062 * Print lines following a (c)hange command, from file1ln to file1end
1063 * and from file2ln to file2end.
1064 */
1065static void
1066printc(FILE *file1, size_t file1end, FILE *file2, size_t file2end)
1067{
1068	struct fileline {
1069		STAILQ_ENTRY(fileline)	 fileentries;
1070		char			*line;
1071	};
1072	STAILQ_HEAD(, fileline) delqhead = STAILQ_HEAD_INITIALIZER(delqhead);
1073
1074	/* Read lines to be deleted. */
1075	for (; file1ln <= file1end; ++file1ln) {
1076		struct fileline *linep;
1077		char *line1;
1078
1079		/* Read lines from both. */
1080		if (!(line1 = xfgets(file1)))
1081			errx(2, "error reading file1 in delete in change");
1082
1083		/* Add to delete queue. */
1084		if (!(linep = malloc(sizeof(struct fileline))))
1085			err(2, "printc");
1086		linep->line = line1;
1087		STAILQ_INSERT_TAIL(&delqhead, linep, fileentries);
1088	}
1089
1090	/* Process changed lines.. */
1091	for (; !STAILQ_EMPTY(&delqhead) && file2ln <= file2end;
1092	    ++file2ln) {
1093		struct fileline *del;
1094		char *add;
1095
1096		/* Get add line. */
1097		if (!(add = xfgets(file2)))
1098			errx(2, "error reading add in change");
1099
1100		del = STAILQ_FIRST(&delqhead);
1101		enqueue(del->line, '|', add);
1102		STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1103		/*
1104		 * Free fileline structure but not its elements since
1105		 * they are queued up.
1106		 */
1107		free(del);
1108	}
1109	processq();
1110
1111	/* Process remaining lines to add. */
1112	for (; file2ln <= file2end; ++file2ln) {
1113		char *add;
1114
1115		/* Get add line. */
1116		if (!(add = xfgets(file2)))
1117			errx(2, "error reading add in change");
1118
1119		enqueue(NULL, '>', add);
1120	}
1121	processq();
1122
1123	/* Process remaining lines to delete. */
1124	while (!STAILQ_EMPTY(&delqhead)) {
1125		struct fileline *filep;
1126
1127		filep = STAILQ_FIRST(&delqhead);
1128		enqueue(filep->line, '<', NULL);
1129		STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1130		free(filep);
1131	}
1132	processq();
1133}
1134
1135/*
1136 * Print deleted lines from file, from file1ln to file1end.
1137 */
1138static void
1139printd(FILE *file1, size_t file1end)
1140{
1141	char *line1;
1142
1143	/* Print out lines file1ln to line2. */
1144	for (; file1ln <= file1end; ++file1ln) {
1145		if (!(line1 = xfgets(file1)))
1146			errx(2, "file1 ended early in delete");
1147		enqueue(line1, '<', NULL);
1148	}
1149	processq();
1150}
1151
1152/*
1153 * Interactive mode usage.
1154 */
1155static void
1156int_usage(void)
1157{
1158
1159	puts("e:\tedit blank diff\n"
1160	    "eb:\tedit both diffs concatenated\n"
1161	    "el:\tedit left diff\n"
1162	    "er:\tedit right diff\n"
1163	    "l | 1:\tchoose left diff\n"
1164	    "r | 2:\tchoose right diff\n"
1165	    "s:\tsilent mode--don't print identical lines\n"
1166	    "v:\tverbose mode--print identical lines\n"
1167	    "q:\tquit");
1168}
1169
1170static void
1171usage(void)
1172{
1173
1174	fprintf(stderr,
1175	    "usage: sdiff [-abdilstHW] [-I regexp] [-o outfile] [-w width] file1"
1176	    " file2\n");
1177	exit(2);
1178}
1179