mkctm.c revision 69793
1/* $FreeBSD: head/usr.sbin/ctm/mkCTM/mkctm.c 69793 2000-12-09 09:35:55Z obrien $ */
2
3/* Still missing:
4 *
5 * mkctm
6 *	-B regex	Bogus
7 *	-I regex	Ignore
8 *      -D int		Damage
9 *	-q		decrease verbosity
10 *	-v		increase verbosity
11 *      -l file		logfile
12 *	name		cvs-cur
13 *	prefix		src/secure
14 *	dir1		"Soll"
15 *	dir2		"Ist"
16 *
17 */
18
19#include <sys/types.h>
20#include <sys/stat.h>
21#include <sys/mman.h>
22#include <sys/wait.h>
23#include <dirent.h>
24#include <regex.h>
25#include <stdio.h>
26#include <fcntl.h>
27#include <string.h>
28#include <stdlib.h>
29#include <unistd.h>
30#include <md5.h>
31#include <err.h>
32#include <paths.h>
33#include <signal.h>
34
35#define DEFAULT_IGNORE	"/CVS$|/\\.#|00_TRANS\\.TBL$"
36#define DEFAULT_BOGUS	"\\.core$|\\.orig$|\\.rej$|\\.o$"
37regex_t reg_ignore,  reg_bogus;
38int	flag_ignore, flag_bogus;
39
40int	verbose;
41int	damage, damage_limit;
42int	change;
43
44FILE	*logf;
45
46u_long s1_ignored,	s2_ignored;
47u_long s1_bogus,	s2_bogus;
48u_long s1_wrong,	s2_wrong;
49u_long s_new_dirs,	s_new_files,	s_new_bytes;
50u_long s_del_dirs,	s_del_files,	                s_del_bytes;
51u_long 			s_files_chg,	s_bytes_add,	s_bytes_del;
52u_long s_same_dirs,	s_same_files,	s_same_bytes;
53u_long 			s_edit_files,	s_edit_bytes,	s_edit_saves;
54u_long 			s_sub_files,	s_sub_bytes;
55
56void
57Usage(void)
58{
59	fprintf(stderr,
60		"usage: mkctm [-options] name number timestamp prefix dir1 dir2\n");
61	fprintf(stderr, "options:\n");
62	fprintf(stderr, "\t\t-B bogus_regexp\n");
63	fprintf(stderr, "\t\t-D damage_limit\n");
64	fprintf(stderr, "\t\t-I ignore_regexp\n");
65	fprintf(stderr, "\t\t-q\n");
66	fprintf(stderr, "\t\t-v\n");
67}
68
69void
70print_stat(FILE *fd, char *pre)
71{
72    fprintf(fd, "%sNames:\n", pre);
73    fprintf(fd, "%s  ignore:  %5lu ref   %5lu target\n",
74	    pre, s1_ignored, s2_ignored);
75    fprintf(fd, "%s  bogus:   %5lu ref   %5lu target\n",
76	    pre, s1_bogus, s2_bogus);
77    fprintf(fd, "%s  wrong:   %5lu ref   %5lu target\n",
78	    pre, s1_wrong, s2_wrong);
79    fprintf(fd, "%sDelta:\n", pre);
80    fprintf(fd, "%s  new:     %5lu dirs  %5lu files  %9lu plus\n",
81	    pre, s_new_dirs, s_new_files, s_new_bytes);
82    fprintf(fd, "%s  del:     %5lu dirs  %5lu files                   %9lu minus\n",
83	    pre, s_del_dirs, s_del_files, s_del_bytes);
84    fprintf(fd, "%s  chg:                 %5lu files  %9lu plus   %9lu minus\n",
85	    pre, s_files_chg, s_bytes_add, s_bytes_del);
86    fprintf(fd, "%s  same:    %5lu dirs  %5lu files  %9lu bytes\n",
87	    pre, s_same_dirs, s_same_files, s_same_bytes);
88    fprintf(fd, "%sMethod:\n", pre);
89    fprintf(fd, "%s  edit:                %5lu files  %9lu bytes  %9lu saved\n",
90	    pre, s_edit_files, s_edit_bytes, s_edit_saves);
91    fprintf(fd, "%s  sub:                 %5lu files  %9lu bytes\n",
92	    pre, s_sub_files, s_sub_bytes);
93
94}
95
96void
97stat_info(int foo)
98{
99	signal(SIGINFO, stat_info);
100	print_stat(stderr, "INFO: ");
101}
102
103void DoDir(const char *dir1, const char *dir2, const char *name);
104
105static struct stat st;
106static __inline struct stat *
107StatFile(char *name)
108{
109	if (lstat(name, &st) < 0)
110		err(1, "couldn't stat %s", name);
111	return &st;
112}
113
114int
115dirselect(struct dirent *de)
116{
117	if (!strcmp(de->d_name, "."))	return 0;
118	if (!strcmp(de->d_name, ".."))	return 0;
119	return 1;
120}
121
122void
123name_stat(const char *pfx, const char *dir, const char *name, struct dirent *de)
124{
125	char *buf = alloca(strlen(dir) + strlen(name) +
126		strlen(de->d_name) + 3);
127	struct stat *st;
128
129	strcpy(buf, dir);
130		strcat(buf, "/"); strcat(buf, name);
131		strcat(buf, "/"); strcat(buf, de->d_name);
132	st = StatFile(buf);
133	printf("%s %s%s %u %u %o",
134	    pfx, name, de->d_name,
135	    st->st_uid, st->st_gid, st->st_mode & ~S_IFMT);
136	fprintf(logf, "%s %s%s\n", pfx, name, de->d_name);
137	if (verbose > 1) {
138		fprintf(stderr, "%s %s%s\n", pfx, name, de->d_name);
139	}
140}
141
142void
143Equ(const char *dir1, const char *dir2, const char *name, struct dirent *de)
144{
145	if (de->d_type == DT_DIR) {
146		char *p = alloca(strlen(name)+strlen(de->d_name)+2);
147
148		strcpy(p, name);  strcat(p, de->d_name); strcat(p, "/");
149		DoDir(dir1, dir2, p);
150		s_same_dirs++;
151	} else {
152		char *buf1 = alloca(strlen(dir1) + strlen(name) +
153			strlen(de->d_name) + 3);
154		char *buf2 = alloca(strlen(dir2) + strlen(name) +
155			strlen(de->d_name) + 3);
156		char *m1, md5_1[33], *m2, md5_2[33];
157		u_char *p1, *p2;
158		int fd1, fd2;
159		struct stat s1, s2;
160
161		strcpy(buf1, dir1);
162			strcat(buf1, "/"); strcat(buf1, name);
163			strcat(buf1, "/"); strcat(buf1, de->d_name);
164		fd1 = open(buf1, O_RDONLY);
165		if(fd1 < 0) { err(3, "%s", buf1); }
166		fstat(fd1, &s1);
167		strcpy(buf2, dir2);
168			strcat(buf2, "/"); strcat(buf2, name);
169			strcat(buf2, "/"); strcat(buf2, de->d_name);
170		fd2 = open(buf2, O_RDONLY);
171		if(fd2 < 0) { err(3, "%s", buf2); }
172		fstat(fd2, &s2);
173#if 0
174		/* XXX if we could just trust the size to change... */
175		if (s1.st_size == s2.st_size) {
176			s_same_files++;
177			s_same_bytes += s1.st_size;
178			close(fd1);
179			close(fd2);
180			goto finish;
181		}
182#endif
183		p1=mmap(0, s1.st_size, PROT_READ, MAP_PRIVATE, fd1, 0);
184		if (p1 == (u_char *)MAP_FAILED) { err(3, "%s", buf1); }
185		close(fd1);
186
187		p2=mmap(0, s2.st_size, PROT_READ, MAP_PRIVATE, fd2, 0);
188		if (p2 == (u_char *)MAP_FAILED) { err(3, "%s", buf2); }
189		close(fd2);
190
191		/* If identical, we're done. */
192		if((s1.st_size == s2.st_size) && !memcmp(p1, p2, s1.st_size)) {
193			s_same_files++;
194			s_same_bytes += s1.st_size;
195			goto finish;
196		}
197
198		s_files_chg++;
199		change++;
200		if (s1.st_size > s2.st_size)
201			s_bytes_del += (s1.st_size - s2.st_size);
202		else
203			s_bytes_add += (s2.st_size - s1.st_size);
204
205		m1 = MD5Data(p1, s1.st_size, md5_1);
206		m2 = MD5Data(p2, s2.st_size, md5_2);
207
208		/* Just a curiosity... */
209		if(!strcmp(m1, m2)) {
210			if (s1.st_size != s2.st_size)
211				fprintf(stderr,
212		"Notice: MD5 same for files of diffent size:\n\t%s\n\t%s\n",
213					buf1, buf2);
214			goto finish;
215		}
216
217		{
218			u_long l = s2.st_size + 2;
219			u_char *cmd = alloca(strlen(buf1)+strlen(buf2)+100);
220			u_char *ob = alloca(l), *p;
221			int j;
222			FILE *F;
223
224			if (s1.st_size && p1[s1.st_size-1] != '\n') {
225				if (verbose > 0)
226					fprintf(stderr,
227					    "last char != \\n in %s\n",
228					     buf1);
229				goto subst;
230			}
231
232			if (s2.st_size && p2[s2.st_size-1] != '\n') {
233				if (verbose > 0)
234					fprintf(stderr,
235					    "last char != \\n in %s\n",
236					     buf2);
237				goto subst;
238			}
239
240			for (p=p1; p<p1+s1.st_size; p++)
241				if (!*p) {
242					if (verbose > 0)
243						fprintf(stderr,
244						    "NULL char in %s\n",
245						     buf1);
246					goto subst;
247				}
248
249			for (p=p2; p<p2+s2.st_size; p++)
250				if (!*p) {
251					if (verbose > 0)
252						fprintf(stderr,
253						    "NULL char in %s\n",
254						     buf2);
255					goto subst;
256				}
257
258			strcpy(cmd, "diff -n ");
259			strcat(cmd, buf1);
260			strcat(cmd, " ");
261			strcat(cmd, buf2);
262			F = popen(cmd, "r");
263			for (j = 1, l = 0; l < s2.st_size; ) {
264				j = fread(ob+l, 1, s2.st_size - l, F);
265				if (j < 1)
266					break;
267				l += j;
268				continue;
269			}
270			if (j) {
271				l = 0;
272				while (EOF != fgetc(F))
273					continue;
274			}
275			pclose(F);
276
277			if (l && l < s2.st_size) {
278				name_stat("CTMFN", dir2, name, de);
279				printf(" %s %s %d\n", m1, m2, (unsigned)l);
280				fwrite(ob, 1, l, stdout);
281				putchar('\n');
282				s_edit_files++;
283				s_edit_bytes += l;
284				s_edit_saves += (s2.st_size - l);
285			} else {
286			subst:
287				name_stat("CTMFS", dir2, name, de);
288				printf(" %s %s %u\n", m1, m2, (unsigned)s2.st_size);
289				fwrite(p2, 1, s2.st_size, stdout);
290				putchar('\n');
291				s_sub_files++;
292				s_sub_bytes += s2.st_size;
293			}
294		}
295	    finish:
296		munmap(p1, s1.st_size);
297		munmap(p2, s2.st_size);
298	}
299}
300
301void
302Add(const char *dir1, const char *dir2, const char *name, struct dirent *de)
303{
304	change++;
305	if (de->d_type == DT_DIR) {
306		char *p = alloca(strlen(name)+strlen(de->d_name)+2);
307		strcpy(p, name);  strcat(p, de->d_name); strcat(p, "/");
308		name_stat("CTMDM", dir2, name, de);
309		putchar('\n');
310		s_new_dirs++;
311		DoDir(dir1, dir2, p);
312	} else if (de->d_type == DT_REG) {
313		char *buf2 = alloca(strlen(dir2) + strlen(name) +
314			strlen(de->d_name) + 3);
315		char *m2, md5_2[33];
316		u_char *p1;
317		struct stat st;
318		int fd1;
319
320		strcpy(buf2, dir2);
321			strcat(buf2, "/"); strcat(buf2, name);
322			strcat(buf2, "/"); strcat(buf2, de->d_name);
323		fd1 = open(buf2, O_RDONLY);
324		if (fd1 < 0) { err(3, "%s", buf2); }
325		fstat(fd1, &st);
326		p1=mmap(0, st.st_size, PROT_READ, MAP_PRIVATE, fd1, 0);
327		if (p1 == (u_char *)MAP_FAILED) { err(3, "%s", buf2); }
328		close(fd1);
329		m2 = MD5Data(p1, st.st_size, md5_2);
330		name_stat("CTMFM", dir2, name, de);
331		printf(" %s %u\n", m2, (unsigned)st.st_size);
332		fwrite(p1, 1, st.st_size, stdout);
333		putchar('\n');
334		munmap(p1, st.st_size);
335		s_new_files++;
336		s_new_bytes += st.st_size;
337	}
338}
339
340void
341Del (const char *dir1, const char *dir2, const char *name, struct dirent *de)
342{
343	damage++;
344	change++;
345	if (de->d_type == DT_DIR) {
346		char *p = alloca(strlen(name)+strlen(de->d_name)+2);
347		strcpy(p, name);  strcat(p, de->d_name); strcat(p, "/");
348		DoDir(dir1, dir2, p);
349		printf("CTMDR %s%s\n", name, de->d_name);
350		fprintf(logf, "CTMDR %s%s\n", name, de->d_name);
351		if (verbose > 1) {
352			fprintf(stderr, "CTMDR %s%s\n", name, de->d_name);
353		}
354		s_del_dirs++;
355	} else if (de->d_type == DT_REG) {
356		char *buf1 = alloca(strlen(dir1) + strlen(name) +
357			strlen(de->d_name) + 3);
358		char *m1, md5_1[33];
359		strcpy(buf1, dir1);
360			strcat(buf1, "/"); strcat(buf1, name);
361			strcat(buf1, "/"); strcat(buf1, de->d_name);
362		m1 = MD5File(buf1, md5_1);
363		printf("CTMFR %s%s %s\n", name, de->d_name, m1);
364		fprintf(logf, "CTMFR %s%s %s\n", name, de->d_name, m1);
365		if (verbose > 1) {
366			fprintf(stderr, "CTMFR %s%s\n", name, de->d_name);
367		}
368		s_del_files++;
369		s_del_bytes += StatFile(buf1)->st_size;
370	}
371}
372
373void
374GetNext(int *i, int *n, struct dirent **nl, const char *dir, const char *name, u_long *ignored, u_long *bogus, u_long *wrong)
375{
376	char buf[BUFSIZ];
377	char buf1[BUFSIZ];
378
379	for (;;) {
380		for (;;) {
381			(*i)++;
382			if (*i >= *n)
383				return;
384			strcpy(buf1, name);
385			if (buf1[strlen(buf1)-1] != '/')
386				strcat(buf1, "/");
387			strcat(buf1, nl[*i]->d_name);
388			if (flag_ignore &&
389			    !regexec(&reg_ignore, buf1, 0, 0, 0)) {
390				(*ignored)++;
391				fprintf(logf, "Ignore %s\n", buf1);
392				if (verbose > 2) {
393					fprintf(stderr, "Ignore %s\n", buf1);
394				}
395			} else if (flag_bogus &&
396			    !regexec(&reg_bogus, buf1, 0, 0, 0)) {
397				(*bogus)++;
398				fprintf(logf, "Bogus %s\n", buf1);
399				fprintf(stderr, "Bogus %s\n", buf1);
400				damage++;
401			} else {
402				*buf = 0;
403				if (*dir != '/')
404					strcat(buf, "/");
405				strcat(buf, dir);
406				if (buf[strlen(buf)-1] != '/')
407					strcat(buf, "/");
408				strcat(buf, buf1);
409				break;
410			}
411			free(nl[*i]); nl[*i] = 0;
412		}
413		/* If the filesystem didn't tell us, find type */
414		if (nl[*i]->d_type == DT_UNKNOWN)
415			nl[*i]->d_type = IFTODT(StatFile(buf)->st_mode);
416		if (nl[*i]->d_type == DT_REG || nl[*i]->d_type == DT_DIR)
417			break;
418		(*wrong)++;
419		if (verbose > 0)
420			fprintf(stderr, "Wrong %s\n", buf);
421		free(nl[*i]); nl[*i] = 0;
422	}
423}
424
425void
426DoDir(const char *dir1, const char *dir2, const char *name)
427{
428	int i1, i2, n1, n2, i;
429	struct dirent **nl1, **nl2;
430	char *buf1 = alloca(strlen(dir1) + strlen(name) + 4);
431	char *buf2 = alloca(strlen(dir2) + strlen(name) + 4);
432
433	strcpy(buf1, dir1); strcat(buf1, "/"); strcat(buf1, name);
434	strcpy(buf2, dir2); strcat(buf2, "/"); strcat(buf2, name);
435	n1 = scandir(buf1, &nl1, dirselect, alphasort);
436	n2 = scandir(buf2, &nl2, dirselect, alphasort);
437	i1 = i2 = -1;
438	GetNext(&i1, &n1, nl1, dir1, name, &s1_ignored, &s1_bogus, &s1_wrong);
439	GetNext(&i2, &n2, nl2, dir2, name, &s2_ignored, &s2_bogus, &s2_wrong);
440	for (;i1 < n1 || i2 < n2;) {
441
442		if (damage_limit && damage > damage_limit)
443			break;
444
445		/* Get next item from list 1 */
446		if (i1 < n1 && !nl1[i1])
447			GetNext(&i1, &n1, nl1, dir1, name,
448				&s1_ignored, &s1_bogus, &s1_wrong);
449
450		/* Get next item from list 2 */
451		if (i2 < n2 && !nl2[i2])
452			GetNext(&i2, &n2, nl2, dir2, name,
453				&s2_ignored, &s2_bogus, &s2_wrong);
454
455		if (i1 >= n1 && i2 >= n2) {
456			/* Done */
457			break;
458		} else if (i1 >= n1 && i2 < n2) {
459			/* end of list 1, add anything left on list 2 */
460			Add(dir1, dir2, name, nl2[i2]);
461			free(nl2[i2]); nl2[i2] = 0;
462		} else if (i1 < n1 && i2 >= n2) {
463			/* end of list 2, delete anything left on list 1 */
464			Del(dir1, dir2, name, nl1[i1]);
465			free(nl1[i1]); nl1[i1] = 0;
466		} else if (!(i = strcmp(nl1[i1]->d_name, nl2[i2]->d_name))) {
467			/* Identical names */
468			if (nl1[i1]->d_type == nl2[i2]->d_type) {
469				/* same type */
470				Equ(dir1, dir2, name, nl1[i1]);
471			} else {
472				/* different types */
473				Del(dir1, dir2, name, nl1[i1]);
474				Add(dir1, dir2, name, nl2[i2]);
475			}
476			free(nl1[i1]); nl1[i1] = 0;
477			free(nl2[i2]); nl2[i2] = 0;
478		} else if (i < 0) {
479			/* Something extra in list 1, delete it */
480			Del(dir1, dir2, name, nl1[i1]);
481			free(nl1[i1]); nl1[i1] = 0;
482		} else {
483			/* Something extra in list 2, add it */
484			Add(dir1, dir2, name, nl2[i2]);
485			free(nl2[i2]); nl2[i2] = 0;
486		}
487	}
488	if (n1 >= 0)
489		free(nl1);
490	if (n2 >= 0)
491		free(nl2);
492}
493
494int
495main(int argc, char **argv)
496{
497	int i;
498	extern char *optarg;
499	extern int optind;
500
501	setbuf(stderr, NULL);
502
503#if 0
504	if (regcomp(&reg_bogus, DEFAULT_BOGUS, REG_EXTENDED | REG_NEWLINE))
505		/* XXX use regerror to explain it */
506		errx(1, "default regular expression argument to -B is botched");
507	flag_bogus = 1;
508
509	if (regcomp(&reg_ignore, DEFAULT_IGNORE, REG_EXTENDED | REG_NEWLINE))
510		/* XXX use regerror to explain it */
511		errx(1, "default regular expression argument to -I is botched");
512	flag_ignore = 1;
513#endif
514
515	while ((i = getopt(argc, argv, "D:I:B:l:qv")) != -1)
516		switch (i) {
517		case 'D':
518			damage_limit = strtol(optarg, 0, 0);
519			if (damage_limit < 0)
520				errx(1, "damage limit must be positive");
521			break;
522		case 'I':
523			if (flag_ignore)
524				regfree(&reg_ignore);
525			flag_ignore = 0;
526			if (!*optarg)
527				break;
528			if (regcomp(&reg_ignore, optarg,
529			    REG_EXTENDED | REG_NEWLINE))
530				/* XXX use regerror to explain it */
531				errx(1, "regular expression argument to -I is botched");
532			flag_ignore = 1;
533			break;
534		case 'B':
535			if (flag_bogus)
536				regfree(&reg_bogus);
537			flag_bogus = 0;
538			if (!*optarg)
539				break;
540			if (regcomp(&reg_bogus, optarg,
541			    REG_EXTENDED | REG_NEWLINE))
542				/* XXX use regerror to explain it */
543				errx(1, "regular expression argument to -B is botched");
544			flag_bogus = 1;
545			break;
546		case 'l':
547			logf = fopen(optarg, "w");
548			if (!logf)
549				err(1, "%s", optarg);
550			break;
551		case 'q':
552			verbose--;
553			break;
554		case 'v':
555			verbose++;
556			break;
557		case '?':
558		default:
559			Usage();
560			return (1);
561		}
562	argc -= optind;
563	argv += optind;
564
565	if (!logf)
566		logf = fopen(_PATH_DEVNULL, "w");
567
568	setbuf(stdout, 0);
569
570	if (argc != 6) {
571		Usage();
572		return (1);
573	}
574
575	signal(SIGINFO, stat_info);
576
577	fprintf(stderr, "CTM_BEGIN 2.0 %s %s %s %s\n",
578		argv[0], argv[1], argv[2], argv[3]);
579	fprintf(logf, "CTM_BEGIN 2.0 %s %s %s %s\n",
580		argv[0], argv[1], argv[2], argv[3]);
581	printf("CTM_BEGIN 2.0 %s %s %s %s\n",
582		argv[0], argv[1], argv[2], argv[3]);
583	DoDir(argv[4], argv[5], "");
584	if (damage_limit && damage > damage_limit) {
585		print_stat(stderr, "DAMAGE: ");
586		errx(1, "damage of %d would exceed %d files",
587			damage, damage_limit);
588	} else if (change < 2) {
589		errx(4, "no changes");
590	} else {
591		printf("CTM_END ");
592		fprintf(logf, "CTM_END\n");
593		print_stat(stderr, "END: ");
594	}
595	exit(0);
596}
597