1/*
2 * Copyright (c) 2018 Martin Pieuchot
3 * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include <sys/types.h>
19#include <sys/capsicum.h>
20#ifndef DIFF_NO_MMAP
21#include <sys/mman.h>
22#endif
23#include <sys/stat.h>
24
25#include <capsicum_helpers.h>
26#include <err.h>
27#include <fcntl.h>
28#include <stdbool.h>
29#include <stdint.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <time.h>
34#include <unistd.h>
35
36#include "diff.h"
37#include <arraylist.h>
38#include <diff_main.h>
39#include <diff_output.h>
40
41const char *format_label(const char *, struct stat *);
42
43enum diffreg_algo {
44	DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE = 0,
45	DIFFREG_ALGO_MYERS_THEN_PATIENCE = 1,
46	DIFFREG_ALGO_PATIENCE = 2,
47	DIFFREG_ALGO_NONE = 3,
48};
49
50int		 diffreg_new(char *, char *, int, int);
51FILE *		 openfile(const char *, char **, struct stat *);
52
53static const struct diff_algo_config myers_then_patience;
54static const struct diff_algo_config myers_then_myers_divide;
55static const struct diff_algo_config patience;
56static const struct diff_algo_config myers_divide;
57
58static const struct diff_algo_config myers_then_patience = (struct diff_algo_config){
59	.impl = diff_algo_myers,
60	.permitted_state_size = 1024 * 1024 * sizeof(int),
61	.fallback_algo = &patience,
62};
63
64static const struct diff_algo_config myers_then_myers_divide =
65	(struct diff_algo_config){
66	.impl = diff_algo_myers,
67	.permitted_state_size = 1024 * 1024 * sizeof(int),
68	.fallback_algo = &myers_divide,
69};
70
71static const struct diff_algo_config patience = (struct diff_algo_config){
72	.impl = diff_algo_patience,
73	/* After subdivision, do Patience again: */
74	.inner_algo = &patience,
75	/* If subdivision failed, do Myers Divide et Impera: */
76	.fallback_algo = &myers_then_myers_divide,
77};
78
79static const struct diff_algo_config myers_divide = (struct diff_algo_config){
80	.impl = diff_algo_myers_divide,
81	/* When division succeeded, start from the top: */
82	.inner_algo = &myers_then_myers_divide,
83	/* (fallback_algo = NULL implies diff_algo_none). */
84};
85
86static const struct diff_algo_config no_algo = (struct diff_algo_config){
87	.impl = diff_algo_none,
88};
89
90/* If the state for a forward-Myers is small enough, use Myers, otherwise first
91 * do a Myers-divide. */
92static const struct diff_config diff_config_myers_then_myers_divide = {
93	.atomize_func = diff_atomize_text_by_line,
94	.algo = &myers_then_myers_divide,
95};
96
97/* If the state for a forward-Myers is small enough, use Myers, otherwise first
98 * do a Patience. */
99static const struct diff_config diff_config_myers_then_patience = {
100	.atomize_func = diff_atomize_text_by_line,
101	.algo = &myers_then_patience,
102};
103
104/* Directly force Patience as a first divider of the source file. */
105static const struct diff_config diff_config_patience = {
106	.atomize_func = diff_atomize_text_by_line,
107	.algo = &patience,
108};
109
110/* Directly force Patience as a first divider of the source file. */
111static const struct diff_config diff_config_no_algo = {
112	.atomize_func = diff_atomize_text_by_line,
113};
114
115const char *
116format_label(const char *oldlabel, struct stat *stb)
117{
118	const char *time_format = "%Y-%m-%d %H:%M:%S";
119	char *newlabel;
120	char buf[256];
121	char end[10];
122	struct tm tm, *tm_ptr;
123	int nsec = stb->st_mtim.tv_nsec;
124	size_t newlabellen, timelen, endlen;
125	tm_ptr = localtime_r(&stb->st_mtime, &tm);
126
127	timelen = strftime(buf, 256, time_format, tm_ptr);
128	endlen = strftime(end, 10, "%z", tm_ptr);
129
130	/*
131	 * The new label is the length of the time, old label, timezone,
132	 * 9 characters for nanoseconds, and 4 characters for a period
133	 * and for formatting.
134	 */
135	newlabellen = timelen + strlen(oldlabel) + endlen + 9 + 4;
136	newlabel = calloc(newlabellen, sizeof(char));
137
138	snprintf(newlabel, newlabellen ,"%s\t%s.%.9d %s\n",
139		oldlabel, buf, nsec, end);
140
141	return newlabel;
142}
143
144int
145diffreg_new(char *file1, char *file2, int flags, int capsicum)
146{
147	char *str1, *str2;
148	FILE *f1, *f2;
149	struct stat st1, st2;
150	struct diff_input_info info;
151	struct diff_data left = {}, right = {};
152	struct diff_result *result = NULL;
153	bool force_text, have_binary;
154	int rc, atomizer_flags, rflags, diff_flags = 0;
155	int context_lines = diff_context;
156	const struct diff_config *cfg;
157	enum diffreg_algo algo;
158	cap_rights_t rights_ro;
159
160	algo = DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE;
161
162	switch (algo) {
163	default:
164	case DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE:
165		cfg = &diff_config_myers_then_myers_divide;
166		break;
167	case DIFFREG_ALGO_MYERS_THEN_PATIENCE:
168		cfg = &diff_config_myers_then_patience;
169		break;
170	case DIFFREG_ALGO_PATIENCE:
171		cfg = &diff_config_patience;
172		break;
173	case DIFFREG_ALGO_NONE:
174		cfg = &diff_config_no_algo;
175		break;
176	}
177
178	f1 = openfile(file1, &str1, &st1);
179	f2 = openfile(file2, &str2, &st2);
180
181	if (capsicum) {
182		cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
183		if (caph_rights_limit(fileno(f1), &rights_ro) < 0)
184			err(2, "unable to limit rights on: %s", file1);
185		if (caph_rights_limit(fileno(f2), &rights_ro) < 0)
186			err(2, "unable to limit rights on: %s", file2);
187		if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
188			/* stdin has already been limited */
189			if (caph_limit_stderr() == -1)
190				err(2, "unable to limit stderr");
191			if (caph_limit_stdout() == -1)
192				err(2, "unable to limit stdout");
193		} else if (caph_limit_stdio() == -1)
194				err(2, "unable to limit stdio");
195		caph_cache_catpages();
196		caph_cache_tzdata();
197		if (caph_enter() < 0)
198			err(2, "unable to enter capability mode");
199	}
200	/*
201	 * If we have been given a label use that for the paths, if not format
202	 * the path with the files modification time.
203	 */
204	info.flags = 0;
205	info.left_path = (label[0] != NULL) ?
206		label[0] : format_label(file1, &stb1);
207	info.right_path = (label[1] != NULL) ?
208		label[1] : format_label(file2, &stb2);
209
210	if (flags & D_FORCEASCII)
211		diff_flags |= DIFF_FLAG_FORCE_TEXT_DATA;
212	if (flags & D_IGNOREBLANKS)
213		diff_flags |= DIFF_FLAG_IGNORE_WHITESPACE;
214	if (flags & D_PROTOTYPE)
215		diff_flags |= DIFF_FLAG_SHOW_PROTOTYPES;
216
217	if (diff_atomize_file(&left, cfg, f1, (uint8_t *)str1, st1.st_size, diff_flags)) {
218		rc = D_ERROR;
219		goto done;
220	}
221	if (left.atomizer_flags & DIFF_ATOMIZER_FILE_TRUNCATED)
222		warnx("%s truncated", file1);
223	if (diff_atomize_file(&right, cfg, f2, (uint8_t *)str2, st2.st_size, diff_flags)) {
224		rc = D_ERROR;
225		goto done;
226	}
227	if (right.atomizer_flags & DIFF_ATOMIZER_FILE_TRUNCATED)
228		warnx("%s truncated", file2);
229
230	result = diff_main(cfg, &left, &right);
231	if (result->rc != DIFF_RC_OK) {
232		rc = D_ERROR;
233		status |= 2;
234		goto done;
235	}
236	/*
237	 * If there wasn't an error, but we don't have any printable chunks
238	 * then the files must match.
239	 */
240	if (!diff_result_contains_printable_chunks(result)) {
241		rc = D_SAME;
242		goto done;
243	}
244
245	atomizer_flags = (result->left->atomizer_flags | result->right->atomizer_flags);
246	rflags = (result->left->root->diff_flags | result->right->root->diff_flags);
247	force_text = (rflags & DIFF_FLAG_FORCE_TEXT_DATA);
248	have_binary = (atomizer_flags & DIFF_ATOMIZER_FOUND_BINARY_DATA);
249
250	if (have_binary && !force_text) {
251		rc = D_BINARY;
252		status |= 1;
253		goto done;
254	}
255
256	if (diff_format == D_NORMAL) {
257		rc = diff_output_plain(NULL, stdout, &info, result, false);
258	} else if (diff_format == D_EDIT) {
259		rc = diff_output_edscript(NULL, stdout, &info, result);
260	} else {
261		rc = diff_output_unidiff(NULL, stdout, &info, result,
262		    context_lines);
263	}
264	if (rc != DIFF_RC_OK) {
265		rc = D_ERROR;
266		status |= 2;
267	} else {
268		rc = D_DIFFER;
269		status |= 1;
270	}
271done:
272	diff_result_free(result);
273	diff_data_free(&left);
274	diff_data_free(&right);
275#ifndef DIFF_NO_MMAP
276	if (str1)
277		munmap(str1, st1.st_size);
278	if (str2)
279		munmap(str2, st2.st_size);
280#endif
281	fclose(f1);
282	fclose(f2);
283
284	return rc;
285}
286
287FILE *
288openfile(const char *path, char **p, struct stat *st)
289{
290	FILE *f = NULL;
291
292	if (strcmp(path, "-") == 0)
293		f = stdin;
294	else
295		f = fopen(path, "r");
296
297	if (f == NULL)
298		err(2, "%s", path);
299
300	if (fstat(fileno(f), st) == -1)
301		err(2, "%s", path);
302
303#ifndef DIFF_NO_MMAP
304	*p = mmap(NULL, st->st_size, PROT_READ, MAP_PRIVATE, fileno(f), 0);
305	if (*p == MAP_FAILED)
306#endif
307		*p = NULL; /* fall back on file I/O */
308
309	return f;
310}
311
312bool
313can_libdiff(int flags)
314{
315	/* We can't use fifos with libdiff yet */
316	if (S_ISFIFO(stb1.st_mode) || S_ISFIFO(stb2.st_mode))
317		return false;
318
319	/* Is this one of the supported input/output modes for diffreg_new? */
320	if ((flags == 0 || !(flags & ~D_NEWALGO_FLAGS)) &&
321		ignore_pats == NULL && (
322		diff_format == D_NORMAL ||
323#if 0
324		diff_format == D_EDIT ||
325#endif
326		diff_format == D_UNIFIED) &&
327		(diff_algorithm == D_DIFFMYERS || diff_algorithm == D_DIFFPATIENCE)) {
328		return true;
329	}
330
331	/* Fallback to using stone. */
332	return false;
333}
334