mkctm.c revision 124993
1/* $FreeBSD: head/usr.sbin/ctm/mkCTM/mkctm.c 124993 2004-01-26 04:08:43Z mckay $ */
2
3/* Still missing:
4 *
5 * mkctm
6 *	-B regex	Bogus
7 *	-I regex	Ignore
8 *      -D int		Damage
9 *	-q		decrease verbosity
10 *	-v		increase verbosity
11 *      -l file		logfile
12 *	name		cvs-cur
13 *	prefix		src/secure
14 *	dir1		"Soll"
15 *	dir2		"Ist"
16 *
17 * $FreeBSD: head/usr.sbin/ctm/mkCTM/mkctm.c 124993 2004-01-26 04:08:43Z mckay $
18 */
19
20#include <sys/types.h>
21#include <sys/stat.h>
22#include <sys/mman.h>
23#include <sys/wait.h>
24#include <dirent.h>
25#include <regex.h>
26#include <stdio.h>
27#include <fcntl.h>
28#include <string.h>
29#include <stdlib.h>
30#include <unistd.h>
31#include <md5.h>
32#include <err.h>
33#include <paths.h>
34#include <signal.h>
35
36#define DEFAULT_IGNORE	"/CVS$|/\\.#|00_TRANS\\.TBL$"
37#define DEFAULT_BOGUS	"\\.core$|\\.orig$|\\.rej$|\\.o$"
38regex_t reg_ignore,  reg_bogus;
39int	flag_ignore, flag_bogus;
40
41int	verbose;
42int	damage, damage_limit;
43int	change;
44
45FILE	*logf;
46
47u_long s1_ignored,	s2_ignored;
48u_long s1_bogus,	s2_bogus;
49u_long s1_wrong,	s2_wrong;
50u_long s_new_dirs,	s_new_files,	s_new_bytes;
51u_long s_del_dirs,	s_del_files,	                s_del_bytes;
52u_long 			s_files_chg,	s_bytes_add,	s_bytes_del;
53u_long s_same_dirs,	s_same_files,	s_same_bytes;
54u_long 			s_edit_files,	s_edit_bytes,	s_edit_saves;
55u_long 			s_sub_files,	s_sub_bytes;
56
57void
58Usage(void)
59{
60	fprintf(stderr,
61		"usage: mkctm [-options] name number timestamp prefix dir1 dir2\n");
62	fprintf(stderr, "options:\n");
63	fprintf(stderr, "\t\t-B bogus_regexp\n");
64	fprintf(stderr, "\t\t-D damage_limit\n");
65	fprintf(stderr, "\t\t-I ignore_regexp\n");
66	fprintf(stderr, "\t\t-q\n");
67	fprintf(stderr, "\t\t-v\n");
68}
69
70void
71print_stat(FILE *fd, char *pre)
72{
73    fprintf(fd, "%sNames:\n", pre);
74    fprintf(fd, "%s  ignore:  %5lu ref   %5lu target\n",
75	    pre, s1_ignored, s2_ignored);
76    fprintf(fd, "%s  bogus:   %5lu ref   %5lu target\n",
77	    pre, s1_bogus, s2_bogus);
78    fprintf(fd, "%s  wrong:   %5lu ref   %5lu target\n",
79	    pre, s1_wrong, s2_wrong);
80    fprintf(fd, "%sDelta:\n", pre);
81    fprintf(fd, "%s  new:     %5lu dirs  %5lu files  %9lu plus\n",
82	    pre, s_new_dirs, s_new_files, s_new_bytes);
83    fprintf(fd, "%s  del:     %5lu dirs  %5lu files                   %9lu minus\n",
84	    pre, s_del_dirs, s_del_files, s_del_bytes);
85    fprintf(fd, "%s  chg:                 %5lu files  %9lu plus   %9lu minus\n",
86	    pre, s_files_chg, s_bytes_add, s_bytes_del);
87    fprintf(fd, "%s  same:    %5lu dirs  %5lu files  %9lu bytes\n",
88	    pre, s_same_dirs, s_same_files, s_same_bytes);
89    fprintf(fd, "%sMethod:\n", pre);
90    fprintf(fd, "%s  edit:                %5lu files  %9lu bytes  %9lu saved\n",
91	    pre, s_edit_files, s_edit_bytes, s_edit_saves);
92    fprintf(fd, "%s  sub:                 %5lu files  %9lu bytes\n",
93	    pre, s_sub_files, s_sub_bytes);
94
95}
96
97void
98stat_info(int foo)
99{
100	signal(SIGINFO, stat_info);
101	print_stat(stderr, "INFO: ");
102}
103
104void DoDir(const char *dir1, const char *dir2, const char *name);
105
106static struct stat st;
107static __inline struct stat *
108StatFile(char *name)
109{
110	if (lstat(name, &st) < 0)
111		err(1, "couldn't stat %s", name);
112	return &st;
113}
114
115int
116dirselect(struct dirent *de)
117{
118	if (!strcmp(de->d_name, "."))	return 0;
119	if (!strcmp(de->d_name, ".."))	return 0;
120	return 1;
121}
122
123void
124name_stat(const char *pfx, const char *dir, const char *name, struct dirent *de)
125{
126	char *buf = alloca(strlen(dir) + strlen(name) +
127		strlen(de->d_name) + 3);
128	struct stat *st;
129
130	strcpy(buf, dir);
131		strcat(buf, "/"); strcat(buf, name);
132		strcat(buf, "/"); strcat(buf, de->d_name);
133	st = StatFile(buf);
134	printf("%s %s%s %u %u %o",
135	    pfx, name, de->d_name,
136	    st->st_uid, st->st_gid, st->st_mode & ~S_IFMT);
137	fprintf(logf, "%s %s%s\n", pfx, name, de->d_name);
138	if (verbose > 1) {
139		fprintf(stderr, "%s %s%s\n", pfx, name, de->d_name);
140	}
141}
142
143void
144Equ(const char *dir1, const char *dir2, const char *name, struct dirent *de)
145{
146	if (de->d_type == DT_DIR) {
147		char *p = alloca(strlen(name)+strlen(de->d_name)+2);
148
149		strcpy(p, name);  strcat(p, de->d_name); strcat(p, "/");
150		DoDir(dir1, dir2, p);
151		s_same_dirs++;
152	} else {
153		char *buf1 = alloca(strlen(dir1) + strlen(name) +
154			strlen(de->d_name) + 3);
155		char *buf2 = alloca(strlen(dir2) + strlen(name) +
156			strlen(de->d_name) + 3);
157		char *m1, md5_1[33], *m2, md5_2[33];
158		u_char *p1, *p2;
159		int fd1, fd2;
160		struct stat s1, s2;
161
162		strcpy(buf1, dir1);
163			strcat(buf1, "/"); strcat(buf1, name);
164			strcat(buf1, "/"); strcat(buf1, de->d_name);
165		fd1 = open(buf1, O_RDONLY);
166		if(fd1 < 0) { err(3, "%s", buf1); }
167		fstat(fd1, &s1);
168		strcpy(buf2, dir2);
169			strcat(buf2, "/"); strcat(buf2, name);
170			strcat(buf2, "/"); strcat(buf2, de->d_name);
171		fd2 = open(buf2, O_RDONLY);
172		if(fd2 < 0) { err(3, "%s", buf2); }
173		fstat(fd2, &s2);
174#if 0
175		/* XXX if we could just trust the size to change... */
176		if (s1.st_size == s2.st_size) {
177			s_same_files++;
178			s_same_bytes += s1.st_size;
179			close(fd1);
180			close(fd2);
181			goto finish;
182		}
183#endif
184		p1=mmap(0, s1.st_size, PROT_READ, MAP_PRIVATE, fd1, 0);
185		if (p1 == (u_char *)MAP_FAILED) { err(3, "%s", buf1); }
186		close(fd1);
187
188		p2=mmap(0, s2.st_size, PROT_READ, MAP_PRIVATE, fd2, 0);
189		if (p2 == (u_char *)MAP_FAILED) { err(3, "%s", buf2); }
190		close(fd2);
191
192		/* If identical, we're done. */
193		if((s1.st_size == s2.st_size) && !memcmp(p1, p2, s1.st_size)) {
194			s_same_files++;
195			s_same_bytes += s1.st_size;
196			goto finish;
197		}
198
199		s_files_chg++;
200		change++;
201		if (s1.st_size > s2.st_size)
202			s_bytes_del += (s1.st_size - s2.st_size);
203		else
204			s_bytes_add += (s2.st_size - s1.st_size);
205
206		m1 = MD5Data(p1, s1.st_size, md5_1);
207		m2 = MD5Data(p2, s2.st_size, md5_2);
208
209		/* Just a curiosity... */
210		if(!strcmp(m1, m2)) {
211			if (s1.st_size != s2.st_size)
212				fprintf(stderr,
213		"Notice: MD5 same for files of diffent size:\n\t%s\n\t%s\n",
214					buf1, buf2);
215			goto finish;
216		}
217
218		{
219			u_long l = s2.st_size + 2;
220			u_char *cmd = alloca(strlen(buf1)+strlen(buf2)+100);
221			u_char *ob = alloca(l), *p;
222			int j;
223			FILE *F;
224
225			if (s1.st_size && p1[s1.st_size-1] != '\n') {
226				if (verbose > 0)
227					fprintf(stderr,
228					    "last char != \\n in %s\n",
229					     buf1);
230				goto subst;
231			}
232
233			if (s2.st_size && p2[s2.st_size-1] != '\n') {
234				if (verbose > 0)
235					fprintf(stderr,
236					    "last char != \\n in %s\n",
237					     buf2);
238				goto subst;
239			}
240
241			for (p=p1; p<p1+s1.st_size; p++)
242				if (!*p) {
243					if (verbose > 0)
244						fprintf(stderr,
245						    "NULL char in %s\n",
246						     buf1);
247					goto subst;
248				}
249
250			for (p=p2; p<p2+s2.st_size; p++)
251				if (!*p) {
252					if (verbose > 0)
253						fprintf(stderr,
254						    "NULL char in %s\n",
255						     buf2);
256					goto subst;
257				}
258
259			strcpy(cmd, "diff -n ");
260			strcat(cmd, buf1);
261			strcat(cmd, " ");
262			strcat(cmd, buf2);
263			F = popen(cmd, "r");
264			for (j = 1, l = 0; l < s2.st_size; ) {
265				j = fread(ob+l, 1, s2.st_size - l, F);
266				if (j < 1)
267					break;
268				l += j;
269				continue;
270			}
271			if (j) {
272				l = 0;
273				while (EOF != fgetc(F))
274					continue;
275			}
276			pclose(F);
277
278			if (l && l < s2.st_size) {
279				name_stat("CTMFN", dir2, name, de);
280				printf(" %s %s %d\n", m1, m2, (unsigned)l);
281				fwrite(ob, 1, l, stdout);
282				putchar('\n');
283				s_edit_files++;
284				s_edit_bytes += l;
285				s_edit_saves += (s2.st_size - l);
286			} else {
287			subst:
288				name_stat("CTMFS", dir2, name, de);
289				printf(" %s %s %u\n", m1, m2, (unsigned)s2.st_size);
290				fwrite(p2, 1, s2.st_size, stdout);
291				putchar('\n');
292				s_sub_files++;
293				s_sub_bytes += s2.st_size;
294			}
295		}
296	    finish:
297		munmap(p1, s1.st_size);
298		munmap(p2, s2.st_size);
299	}
300}
301
302void
303Add(const char *dir1, const char *dir2, const char *name, struct dirent *de)
304{
305	change++;
306	if (de->d_type == DT_DIR) {
307		char *p = alloca(strlen(name)+strlen(de->d_name)+2);
308		strcpy(p, name);  strcat(p, de->d_name); strcat(p, "/");
309		name_stat("CTMDM", dir2, name, de);
310		putchar('\n');
311		s_new_dirs++;
312		DoDir(dir1, dir2, p);
313	} else if (de->d_type == DT_REG) {
314		char *buf2 = alloca(strlen(dir2) + strlen(name) +
315			strlen(de->d_name) + 3);
316		char *m2, md5_2[33];
317		u_char *p1;
318		struct stat st;
319		int fd1;
320
321		strcpy(buf2, dir2);
322			strcat(buf2, "/"); strcat(buf2, name);
323			strcat(buf2, "/"); strcat(buf2, de->d_name);
324		fd1 = open(buf2, O_RDONLY);
325		if (fd1 < 0) { err(3, "%s", buf2); }
326		fstat(fd1, &st);
327		p1=mmap(0, st.st_size, PROT_READ, MAP_PRIVATE, fd1, 0);
328		if (p1 == (u_char *)MAP_FAILED) { err(3, "%s", buf2); }
329		close(fd1);
330		m2 = MD5Data(p1, st.st_size, md5_2);
331		name_stat("CTMFM", dir2, name, de);
332		printf(" %s %u\n", m2, (unsigned)st.st_size);
333		fwrite(p1, 1, st.st_size, stdout);
334		putchar('\n');
335		munmap(p1, st.st_size);
336		s_new_files++;
337		s_new_bytes += st.st_size;
338	}
339}
340
341void
342Del (const char *dir1, const char *dir2, const char *name, struct dirent *de)
343{
344	damage++;
345	change++;
346	if (de->d_type == DT_DIR) {
347		char *p = alloca(strlen(name)+strlen(de->d_name)+2);
348		strcpy(p, name);  strcat(p, de->d_name); strcat(p, "/");
349		DoDir(dir1, dir2, p);
350		printf("CTMDR %s%s\n", name, de->d_name);
351		fprintf(logf, "CTMDR %s%s\n", name, de->d_name);
352		if (verbose > 1) {
353			fprintf(stderr, "CTMDR %s%s\n", name, de->d_name);
354		}
355		s_del_dirs++;
356	} else if (de->d_type == DT_REG) {
357		char *buf1 = alloca(strlen(dir1) + strlen(name) +
358			strlen(de->d_name) + 3);
359		char *m1, md5_1[33];
360		strcpy(buf1, dir1);
361			strcat(buf1, "/"); strcat(buf1, name);
362			strcat(buf1, "/"); strcat(buf1, de->d_name);
363		m1 = MD5File(buf1, md5_1);
364		printf("CTMFR %s%s %s\n", name, de->d_name, m1);
365		fprintf(logf, "CTMFR %s%s %s\n", name, de->d_name, m1);
366		if (verbose > 1) {
367			fprintf(stderr, "CTMFR %s%s\n", name, de->d_name);
368		}
369		s_del_files++;
370		s_del_bytes += StatFile(buf1)->st_size;
371	}
372}
373
374void
375GetNext(int *i, int *n, struct dirent **nl, const char *dir, const char *name, u_long *ignored, u_long *bogus, u_long *wrong)
376{
377	char buf[BUFSIZ];
378	char buf1[BUFSIZ];
379
380	for (;;) {
381		for (;;) {
382			(*i)++;
383			if (*i >= *n)
384				return;
385			strcpy(buf1, name);
386			if (buf1[strlen(buf1)-1] != '/')
387				strcat(buf1, "/");
388			strcat(buf1, nl[*i]->d_name);
389			if (flag_ignore &&
390			    !regexec(&reg_ignore, buf1, 0, 0, 0)) {
391				(*ignored)++;
392				fprintf(logf, "Ignore %s\n", buf1);
393				if (verbose > 2) {
394					fprintf(stderr, "Ignore %s\n", buf1);
395				}
396			} else if (flag_bogus &&
397			    !regexec(&reg_bogus, buf1, 0, 0, 0)) {
398				(*bogus)++;
399				fprintf(logf, "Bogus %s\n", buf1);
400				fprintf(stderr, "Bogus %s\n", buf1);
401				damage++;
402			} else {
403				*buf = 0;
404				if (*dir != '/')
405					strcat(buf, "/");
406				strcat(buf, dir);
407				if (buf[strlen(buf)-1] != '/')
408					strcat(buf, "/");
409				strcat(buf, buf1);
410				break;
411			}
412			free(nl[*i]); nl[*i] = 0;
413		}
414		/* If the filesystem didn't tell us, find type */
415		if (nl[*i]->d_type == DT_UNKNOWN)
416			nl[*i]->d_type = IFTODT(StatFile(buf)->st_mode);
417		if (nl[*i]->d_type == DT_REG || nl[*i]->d_type == DT_DIR)
418			break;
419		(*wrong)++;
420		if (verbose > 0)
421			fprintf(stderr, "Wrong %s\n", buf);
422		free(nl[*i]); nl[*i] = 0;
423	}
424}
425
426void
427DoDir(const char *dir1, const char *dir2, const char *name)
428{
429	int i1, i2, n1, n2, i;
430	struct dirent **nl1, **nl2;
431	char *buf1 = alloca(strlen(dir1) + strlen(name) + 4);
432	char *buf2 = alloca(strlen(dir2) + strlen(name) + 4);
433
434	strcpy(buf1, dir1); strcat(buf1, "/"); strcat(buf1, name);
435	strcpy(buf2, dir2); strcat(buf2, "/"); strcat(buf2, name);
436	n1 = scandir(buf1, &nl1, dirselect, alphasort);
437	n2 = scandir(buf2, &nl2, dirselect, alphasort);
438	i1 = i2 = -1;
439	GetNext(&i1, &n1, nl1, dir1, name, &s1_ignored, &s1_bogus, &s1_wrong);
440	GetNext(&i2, &n2, nl2, dir2, name, &s2_ignored, &s2_bogus, &s2_wrong);
441	for (;i1 < n1 || i2 < n2;) {
442
443		if (damage_limit && damage > damage_limit)
444			break;
445
446		/* Get next item from list 1 */
447		if (i1 < n1 && !nl1[i1])
448			GetNext(&i1, &n1, nl1, dir1, name,
449				&s1_ignored, &s1_bogus, &s1_wrong);
450
451		/* Get next item from list 2 */
452		if (i2 < n2 && !nl2[i2])
453			GetNext(&i2, &n2, nl2, dir2, name,
454				&s2_ignored, &s2_bogus, &s2_wrong);
455
456		if (i1 >= n1 && i2 >= n2) {
457			/* Done */
458			break;
459		} else if (i1 >= n1 && i2 < n2) {
460			/* end of list 1, add anything left on list 2 */
461			Add(dir1, dir2, name, nl2[i2]);
462			free(nl2[i2]); nl2[i2] = 0;
463		} else if (i1 < n1 && i2 >= n2) {
464			/* end of list 2, delete anything left on list 1 */
465			Del(dir1, dir2, name, nl1[i1]);
466			free(nl1[i1]); nl1[i1] = 0;
467		} else if (!(i = strcmp(nl1[i1]->d_name, nl2[i2]->d_name))) {
468			/* Identical names */
469			if (nl1[i1]->d_type == nl2[i2]->d_type) {
470				/* same type */
471				Equ(dir1, dir2, name, nl1[i1]);
472			} else {
473				/* different types */
474				Del(dir1, dir2, name, nl1[i1]);
475				Add(dir1, dir2, name, nl2[i2]);
476			}
477			free(nl1[i1]); nl1[i1] = 0;
478			free(nl2[i2]); nl2[i2] = 0;
479		} else if (i < 0) {
480			/* Something extra in list 1, delete it */
481			Del(dir1, dir2, name, nl1[i1]);
482			free(nl1[i1]); nl1[i1] = 0;
483		} else {
484			/* Something extra in list 2, add it */
485			Add(dir1, dir2, name, nl2[i2]);
486			free(nl2[i2]); nl2[i2] = 0;
487		}
488	}
489	if (n1 >= 0)
490		free(nl1);
491	if (n2 >= 0)
492		free(nl2);
493}
494
495int
496main(int argc, char **argv)
497{
498	int i;
499
500	setbuf(stderr, NULL);
501
502#if 0
503	if (regcomp(&reg_bogus, DEFAULT_BOGUS, REG_EXTENDED | REG_NEWLINE))
504		/* XXX use regerror to explain it */
505		errx(1, "default regular expression argument to -B is botched");
506	flag_bogus = 1;
507
508	if (regcomp(&reg_ignore, DEFAULT_IGNORE, REG_EXTENDED | REG_NEWLINE))
509		/* XXX use regerror to explain it */
510		errx(1, "default regular expression argument to -I is botched");
511	flag_ignore = 1;
512#endif
513
514	while ((i = getopt(argc, argv, "D:I:B:l:qv")) != -1)
515		switch (i) {
516		case 'D':
517			damage_limit = strtol(optarg, 0, 0);
518			if (damage_limit < 0)
519				errx(1, "damage limit must be positive");
520			break;
521		case 'I':
522			if (flag_ignore)
523				regfree(&reg_ignore);
524			flag_ignore = 0;
525			if (!*optarg)
526				break;
527			if (regcomp(&reg_ignore, optarg,
528			    REG_EXTENDED | REG_NEWLINE))
529				/* XXX use regerror to explain it */
530				errx(1, "regular expression argument to -I is botched");
531			flag_ignore = 1;
532			break;
533		case 'B':
534			if (flag_bogus)
535				regfree(&reg_bogus);
536			flag_bogus = 0;
537			if (!*optarg)
538				break;
539			if (regcomp(&reg_bogus, optarg,
540			    REG_EXTENDED | REG_NEWLINE))
541				/* XXX use regerror to explain it */
542				errx(1, "regular expression argument to -B is botched");
543			flag_bogus = 1;
544			break;
545		case 'l':
546			logf = fopen(optarg, "w");
547			if (!logf)
548				err(1, "%s", optarg);
549			setlinebuf(logf);
550			break;
551		case 'q':
552			verbose--;
553			break;
554		case 'v':
555			verbose++;
556			break;
557		case '?':
558		default:
559			Usage();
560			return (1);
561		}
562	argc -= optind;
563	argv += optind;
564
565	if (!logf)
566		logf = fopen(_PATH_DEVNULL, "w");
567
568	setbuf(stdout, 0);
569
570	if (argc != 6) {
571		Usage();
572		return (1);
573	}
574
575	signal(SIGINFO, stat_info);
576
577	fprintf(stderr, "CTM_BEGIN 2.0 %s %s %s %s\n",
578		argv[0], argv[1], argv[2], argv[3]);
579	fprintf(logf, "CTM_BEGIN 2.0 %s %s %s %s\n",
580		argv[0], argv[1], argv[2], argv[3]);
581	printf("CTM_BEGIN 2.0 %s %s %s %s\n",
582		argv[0], argv[1], argv[2], argv[3]);
583	DoDir(argv[4], argv[5], "");
584	if (damage_limit && damage > damage_limit) {
585		print_stat(stderr, "DAMAGE: ");
586		errx(1, "damage of %d would exceed %d files",
587			damage, damage_limit);
588	} else if (change < 2) {
589		errx(4, "no changes");
590	} else {
591		printf("CTM_END ");
592		fprintf(logf, "CTM_END\n");
593		print_stat(stderr, "END: ");
594	}
595	exit(0);
596}
597