1206156Sume/* $NetBSD: uniq.c,v 1.4 2008/04/28 20:24:17 martin Exp $ */ 2206156Sume 3206156Sume/*- 4206156Sume * Copyright (c) 2007 The NetBSD Foundation, Inc. 5206156Sume * All rights reserved. 6206156Sume * 7206156Sume * This code is derived from software contributed to The NetBSD Foundation 8206156Sume * by Christos Zoulas. 9206156Sume * 10206156Sume * Redistribution and use in source and binary forms, with or without 11206156Sume * modification, are permitted provided that the following conditions 12206156Sume * are met: 13206156Sume * 1. Redistributions of source code must retain the above copyright 14206156Sume * notice, this list of conditions and the following disclaimer. 15206156Sume * 2. Redistributions in binary form must reproduce the above copyright 16206156Sume * notice, this list of conditions and the following disclaimer in the 17206156Sume * documentation and/or other materials provided with the distribution. 18206156Sume * 19206156Sume * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20206156Sume * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21206156Sume * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22206156Sume * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23206156Sume * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24206156Sume * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25206156Sume * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26206156Sume * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27206156Sume * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28206156Sume * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29206156Sume * POSSIBILITY OF SUCH DAMAGE. 30206156Sume */ 31206156Sume#include <sys/cdefs.h> 32206156Sume__FBSDID("$FreeBSD$"); 33206156Sume 34206156Sume#include <stdio.h> 35206156Sume#include <string.h> 36206156Sume#include <stdlib.h> 37206156Sume#include <db.h> 38206156Sume#include <err.h> 39206156Sume#include <libutil.h> 40206156Sume#include <ctype.h> 41206156Sume#include <fcntl.h> 42206156Sume 43241777Sed#include "extern.h" 44206156Sume 45206156Sumestatic int comp(const char *, char **, size_t *); 46206156Sume 47206156Sume/* 48206156Sume * Preserve only unique content lines in a file. Input lines that have 49206156Sume * content [alphanumeric characters before a comment] are white-space 50206156Sume * normalized and have their comments removed. Then they are placed 51206156Sume * in a hash table, and only the first instance of them is printed. 52206156Sume * Comment lines without any alphanumeric content are always printed 53206156Sume * since they are there to make the file "pretty". Comment lines with 54206156Sume * alphanumeric content are also placed into the hash table and only 55206156Sume * printed once. 56206156Sume */ 57206156Sumevoid 58206156Sumeuniq(const char *fname) 59206156Sume{ 60206156Sume DB *db; 61206156Sume DBT key; 62206156Sume static const DBT data = { NULL, 0 }; 63206156Sume FILE *fp; 64206156Sume char *line; 65206156Sume size_t len; 66206156Sume 67206156Sume if ((db = dbopen(NULL, O_RDWR, 0, DB_HASH, &hinfo)) == NULL) 68206156Sume err(1, "Cannot create in memory database"); 69206156Sume 70206156Sume if ((fp = fopen(fname, "r")) == NULL) 71206156Sume err(1, "Cannot open `%s'", fname); 72206156Sume while ((line = fgetln(fp, &len)) != NULL) { 73206156Sume size_t complen = len; 74206156Sume char *compline; 75206156Sume if (!comp(line, &compline, &complen)) { 76206156Sume (void)fprintf(stdout, "%*.*s", (int)len, (int)len, 77206156Sume line); 78206156Sume continue; 79206156Sume } 80206156Sume key.data = compline; 81206156Sume key.size = complen; 82206156Sume switch ((db->put)(db, &key, &data, R_NOOVERWRITE)) { 83206156Sume case 0: 84206156Sume (void)fprintf(stdout, "%*.*s", (int)len, (int)len, 85206156Sume line); 86206156Sume break; 87206156Sume case 1: 88206156Sume break; 89206156Sume case -1: 90206156Sume err(1, "put"); 91206156Sume default: 92206156Sume abort(); 93206156Sume break; 94206156Sume } 95206156Sume } 96206156Sume (void)fflush(stdout); 97206156Sume exit(0); 98206156Sume} 99206156Sume 100206156Sume/* 101206156Sume * normalize whitespace in the original line and place a new string 102206156Sume * with whitespace converted to a single space in compline. If the line 103206156Sume * contains just comments, we preserve them. If it contains data and 104206156Sume * comments, we kill the comments. Return 1 if the line had actual 105206156Sume * contents, or 0 if it was just a comment without alphanumeric characters. 106206156Sume */ 107206156Sumestatic int 108206156Sumecomp(const char *origline, char **compline, size_t *len) 109206156Sume{ 110206156Sume const unsigned char *p; 111206156Sume unsigned char *q; 112206156Sume char *cline; 113206156Sume size_t l = *len, complen; 114206156Sume int hasalnum, iscomment; 115206156Sume 116206156Sume /* Eat leading space */ 117206156Sume for (p = (const unsigned char *)origline; l && *p && isspace(*p); 118206156Sume p++, l--) 119206156Sume continue; 120206156Sume if ((cline = malloc(l + 1)) == NULL) 121206156Sume err(1, "Cannot allocate %zu bytes", l + 1); 122206156Sume (void)memcpy(cline, p, l); 123206156Sume cline[l] = '\0'; 124206156Sume if (*cline == '\0') 125206156Sume return 0; 126206156Sume 127206156Sume complen = 0; 128206156Sume hasalnum = 0; 129206156Sume iscomment = 0; 130206156Sume 131206156Sume for (q = (unsigned char *)cline; l && *p; p++, l--) { 132206156Sume if (isspace(*p)) { 133206156Sume if (complen && isspace(q[-1])) 134206156Sume continue; 135206156Sume *q++ = ' '; 136206156Sume complen++; 137206156Sume } else { 138206156Sume if (!iscomment && *p == '#') { 139206156Sume if (hasalnum) 140206156Sume break; 141206156Sume iscomment = 1; 142206156Sume } else 143206156Sume hasalnum |= isalnum(*p); 144206156Sume *q++ = *p; 145206156Sume complen++; 146206156Sume } 147206156Sume } 148206156Sume 149206156Sume /* Eat trailing space */ 150206156Sume while (complen && isspace(q[-1])) { 151206156Sume --q; 152206156Sume --complen; 153206156Sume } 154206156Sume *q = '\0'; 155206156Sume *compline = cline; 156206156Sume *len = complen; 157206156Sume return hasalnum; 158206156Sume} 159