1206156Sume/*	$NetBSD: uniq.c,v 1.4 2008/04/28 20:24:17 martin Exp $	*/
2206156Sume
3206156Sume/*-
4206156Sume * Copyright (c) 2007 The NetBSD Foundation, Inc.
5206156Sume * All rights reserved.
6206156Sume *
7206156Sume * This code is derived from software contributed to The NetBSD Foundation
8206156Sume * by Christos Zoulas.
9206156Sume *
10206156Sume * Redistribution and use in source and binary forms, with or without
11206156Sume * modification, are permitted provided that the following conditions
12206156Sume * are met:
13206156Sume * 1. Redistributions of source code must retain the above copyright
14206156Sume *    notice, this list of conditions and the following disclaimer.
15206156Sume * 2. Redistributions in binary form must reproduce the above copyright
16206156Sume *    notice, this list of conditions and the following disclaimer in the
17206156Sume *    documentation and/or other materials provided with the distribution.
18206156Sume *
19206156Sume * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20206156Sume * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21206156Sume * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22206156Sume * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23206156Sume * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24206156Sume * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25206156Sume * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26206156Sume * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27206156Sume * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28206156Sume * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29206156Sume * POSSIBILITY OF SUCH DAMAGE.
30206156Sume */
31206156Sume#include <sys/cdefs.h>
32206156Sume__FBSDID("$FreeBSD$");
33206156Sume
34206156Sume#include <stdio.h>
35206156Sume#include <string.h>
36206156Sume#include <stdlib.h>
37206156Sume#include <db.h>
38206156Sume#include <err.h>
39206156Sume#include <libutil.h>
40206156Sume#include <ctype.h>
41206156Sume#include <fcntl.h>
42206156Sume
43241777Sed#include "extern.h"
44206156Sume
45206156Sumestatic int comp(const char *, char **, size_t *);
46206156Sume
47206156Sume/*
48206156Sume * Preserve only unique content lines in a file. Input lines that have
49206156Sume * content [alphanumeric characters before a comment] are white-space
50206156Sume * normalized and have their comments removed. Then they are placed
51206156Sume * in a hash table, and only the first instance of them is printed.
52206156Sume * Comment lines without any alphanumeric content are always printed
53206156Sume * since they are there to make the file "pretty". Comment lines with
54206156Sume * alphanumeric content are also placed into the hash table and only
55206156Sume * printed once.
56206156Sume */
57206156Sumevoid
58206156Sumeuniq(const char *fname)
59206156Sume{
60206156Sume	DB *db;
61206156Sume	DBT key;
62206156Sume	static const DBT data = { NULL, 0 };
63206156Sume	FILE *fp;
64206156Sume	char *line;
65206156Sume	size_t len;
66206156Sume
67206156Sume	if ((db = dbopen(NULL, O_RDWR, 0, DB_HASH, &hinfo)) == NULL)
68206156Sume		err(1, "Cannot create in memory database");
69206156Sume
70206156Sume	if ((fp = fopen(fname, "r")) == NULL)
71206156Sume		err(1, "Cannot open `%s'", fname);
72206156Sume	while ((line = fgetln(fp, &len)) != NULL) {
73206156Sume		size_t complen = len;
74206156Sume		char *compline;
75206156Sume		if (!comp(line, &compline, &complen)) {
76206156Sume			(void)fprintf(stdout, "%*.*s", (int)len, (int)len,
77206156Sume			    line);
78206156Sume			continue;
79206156Sume		}
80206156Sume		key.data = compline;
81206156Sume		key.size = complen;
82206156Sume		switch ((db->put)(db, &key, &data, R_NOOVERWRITE)) {
83206156Sume		case 0:
84206156Sume			(void)fprintf(stdout, "%*.*s", (int)len, (int)len,
85206156Sume			    line);
86206156Sume			break;
87206156Sume		case 1:
88206156Sume			break;
89206156Sume		case -1:
90206156Sume			err(1, "put");
91206156Sume		default:
92206156Sume			abort();
93206156Sume			break;
94206156Sume		}
95206156Sume	}
96206156Sume	(void)fflush(stdout);
97206156Sume	exit(0);
98206156Sume}
99206156Sume
100206156Sume/*
101206156Sume * normalize whitespace in the original line and place a new string
102206156Sume * with whitespace converted to a single space in compline. If the line
103206156Sume * contains just comments, we preserve them. If it contains data and
104206156Sume * comments, we kill the comments. Return 1 if the line had actual
105206156Sume * contents, or 0 if it was just a comment without alphanumeric characters.
106206156Sume */
107206156Sumestatic int
108206156Sumecomp(const char *origline, char **compline, size_t *len)
109206156Sume{
110206156Sume	const unsigned char *p;
111206156Sume	unsigned char *q;
112206156Sume	char *cline;
113206156Sume	size_t l = *len, complen;
114206156Sume	int hasalnum, iscomment;
115206156Sume
116206156Sume	/* Eat leading space */
117206156Sume	for (p = (const unsigned char *)origline; l && *p && isspace(*p);
118206156Sume	    p++, l--)
119206156Sume		continue;
120206156Sume	if ((cline = malloc(l + 1)) == NULL)
121206156Sume		err(1, "Cannot allocate %zu bytes", l + 1);
122206156Sume	(void)memcpy(cline, p, l);
123206156Sume	cline[l] = '\0';
124206156Sume	if (*cline == '\0')
125206156Sume		return 0;
126206156Sume
127206156Sume	complen = 0;
128206156Sume	hasalnum = 0;
129206156Sume	iscomment = 0;
130206156Sume
131206156Sume	for (q = (unsigned char *)cline; l && *p; p++, l--) {
132206156Sume		if (isspace(*p)) {
133206156Sume			if (complen && isspace(q[-1]))
134206156Sume				continue;
135206156Sume			*q++ = ' ';
136206156Sume			complen++;
137206156Sume		} else {
138206156Sume			if (!iscomment && *p == '#') {
139206156Sume				if (hasalnum)
140206156Sume					break;
141206156Sume				iscomment = 1;
142206156Sume			} else
143206156Sume				hasalnum |= isalnum(*p);
144206156Sume			*q++ = *p;
145206156Sume			complen++;
146206156Sume		}
147206156Sume	}
148206156Sume
149206156Sume	/* Eat trailing space */
150206156Sume	while (complen && isspace(q[-1])) {
151206156Sume		--q;
152206156Sume		--complen;
153206156Sume	}
154206156Sume	*q = '\0';
155206156Sume	*compline = cline;
156206156Sume	*len = complen;
157206156Sume	return hasalnum;
158206156Sume}
159