locate.c revision 330897
1/* 2 * SPDX-License-Identifier: BSD-4-Clause 3 * 4 * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin. 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * James A. Woods. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 */ 39 40#ifndef lint 41static const char copyright[] = 42"@(#) Copyright (c) 1995-1996 Wolfram Schneider, Berlin.\n\ 43@(#) Copyright (c) 1989, 1993\n\ 44 The Regents of the University of California. All rights reserved.\n"; 45#endif /* not lint */ 46 47#ifndef lint 48#if 0 49static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93"; 50#endif 51static const char rcsid[] = 52 "$FreeBSD: stable/11/usr.bin/locate/locate/locate.c 330897 2018-03-14 03:19:51Z eadler $"; 53#endif /* not lint */ 54 55/* 56 * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8. 57 * 58 * Locate scans a file list for the full pathname of a file given only part 59 * of the name. The list has been processed with with "front-compression" 60 * and bigram coding. Front compression reduces space by a factor of 4-5, 61 * bigram coding by a further 20-25%. 62 * 63 * The codes are: 64 * 65 * 0-28 likeliest differential counts + offset to make nonnegative 66 * 30 switch code for out-of-range count to follow in next word 67 * 31 an 8 bit char followed 68 * 128-255 bigram codes (128 most common, as determined by 'updatedb') 69 * 32-127 single character (printable) ascii residue (ie, literal) 70 * 71 * A novel two-tiered string search technique is employed: 72 * 73 * First, a metacharacter-free subpattern and partial pathname is matched 74 * BACKWARDS to avoid full expansion of the pathname list. The time savings 75 * is 40-50% over forward matching, which cannot efficiently handle 76 * overlapped search patterns and compressed path residue. 77 * 78 * Then, the actual shell glob-style regular expression (if in this form) is 79 * matched against the candidate pathnames using the slower routines provided 80 * in the standard 'find'. 81 */ 82 83#include <sys/param.h> 84#include <ctype.h> 85#include <err.h> 86#include <fnmatch.h> 87#include <locale.h> 88#include <stdio.h> 89#include <stdlib.h> 90#include <string.h> 91#include <unistd.h> 92 93#ifdef MMAP 94# include <sys/types.h> 95# include <sys/stat.h> 96# include <sys/mman.h> 97# include <fcntl.h> 98#endif 99 100 101#include "locate.h" 102#include "pathnames.h" 103 104#ifdef DEBUG 105# include <sys/time.h> 106# include <sys/types.h> 107# include <sys/resource.h> 108#endif 109 110int f_mmap; /* use mmap */ 111int f_icase; /* ignore case */ 112int f_stdin; /* read database from stdin */ 113int f_statistic; /* print statistic */ 114int f_silent; /* suppress output, show only count of matches */ 115int f_limit; /* limit number of output lines, 0 == infinite */ 116u_int counter; /* counter for matches [-c] */ 117char separator='\n'; /* line separator */ 118 119 120void usage(void); 121void statistic(FILE *, char *); 122void fastfind(FILE *, char *, char *); 123void fastfind_icase(FILE *, char *, char *); 124void fastfind_mmap(char *, caddr_t, int, char *); 125void fastfind_mmap_icase(char *, caddr_t, int, char *); 126void search_mmap(char *, char **); 127void search_fopen(char *, char **); 128unsigned long cputime(void); 129 130extern char **colon(char **, char*, char*); 131extern void print_matches(u_int); 132extern int getwm(caddr_t); 133extern int getwf(FILE *); 134extern u_char *tolower_word(u_char *); 135extern int check_bigram_char(int); 136extern char *patprep(char *); 137 138int 139main(argc, argv) 140 int argc; 141 char **argv; 142{ 143 register int ch; 144 char **dbv = NULL; 145 char *path_fcodes; /* locate database */ 146#ifdef MMAP 147 f_mmap = 1; /* mmap is default */ 148#endif 149 (void) setlocale(LC_ALL, ""); 150 151 while ((ch = getopt(argc, argv, "0Scd:il:ms")) != -1) 152 switch(ch) { 153 case '0': /* 'find -print0' style */ 154 separator = '\0'; 155 break; 156 case 'S': /* statistic lines */ 157 f_statistic = 1; 158 break; 159 case 'l': /* limit number of output lines, 0 == infinite */ 160 f_limit = atoi(optarg); 161 break; 162 case 'd': /* database */ 163 dbv = colon(dbv, optarg, _PATH_FCODES); 164 break; 165 case 'i': /* ignore case */ 166 f_icase = 1; 167 break; 168 case 'm': /* mmap */ 169#ifdef MMAP 170 f_mmap = 1; 171#else 172 warnx("mmap(2) not implemented"); 173#endif 174 break; 175 case 's': /* stdio lib */ 176 f_mmap = 0; 177 break; 178 case 'c': /* suppress output, show only count of matches */ 179 f_silent = 1; 180 break; 181 default: 182 usage(); 183 } 184 argv += optind; 185 argc -= optind; 186 187 /* to few arguments */ 188 if (argc < 1 && !(f_statistic)) 189 usage(); 190 191 /* no (valid) database as argument */ 192 if (dbv == NULL || *dbv == NULL) { 193 /* try to read database from environment */ 194 if ((path_fcodes = getenv("LOCATE_PATH")) == NULL || 195 *path_fcodes == '\0') 196 /* use default database */ 197 dbv = colon(dbv, _PATH_FCODES, _PATH_FCODES); 198 else /* $LOCATE_PATH */ 199 dbv = colon(dbv, path_fcodes, _PATH_FCODES); 200 } 201 202 if (f_icase && UCHAR_MAX < 4096) /* init tolower lookup table */ 203 for (ch = 0; ch < UCHAR_MAX + 1; ch++) 204 myctype[ch] = tolower(ch); 205 206 /* foreach database ... */ 207 while((path_fcodes = *dbv) != NULL) { 208 dbv++; 209 210 if (!strcmp(path_fcodes, "-")) 211 f_stdin = 1; 212 else 213 f_stdin = 0; 214 215#ifndef MMAP 216 f_mmap = 0; /* be paranoid */ 217#endif 218 if (!f_mmap || f_stdin || f_statistic) 219 search_fopen(path_fcodes, argv); 220 else 221 search_mmap(path_fcodes, argv); 222 } 223 224 if (f_silent) 225 print_matches(counter); 226 exit(0); 227} 228 229 230void 231search_fopen(db, s) 232 char *db; /* database */ 233 char **s; /* search strings */ 234{ 235 FILE *fp; 236#ifdef DEBUG 237 long t0; 238#endif 239 240 /* can only read stdin once */ 241 if (f_stdin) { 242 fp = stdin; 243 if (*(s+1) != NULL) { 244 warnx("read database from stdin, use only `%s' as pattern", *s); 245 *(s+1) = NULL; 246 } 247 } 248 else if ((fp = fopen(db, "r")) == NULL) 249 err(1, "`%s'", db); 250 251 /* count only chars or lines */ 252 if (f_statistic) { 253 statistic(fp, db); 254 (void)fclose(fp); 255 return; 256 } 257 258 /* foreach search string ... */ 259 while(*s != NULL) { 260#ifdef DEBUG 261 t0 = cputime(); 262#endif 263 if (!f_stdin && 264 fseek(fp, (long)0, SEEK_SET) == -1) 265 err(1, "fseek to begin of ``%s''\n", db); 266 267 if (f_icase) 268 fastfind_icase(fp, *s, db); 269 else 270 fastfind(fp, *s, db); 271#ifdef DEBUG 272 warnx("fastfind %ld ms", cputime () - t0); 273#endif 274 s++; 275 } 276 (void)fclose(fp); 277} 278 279#ifdef MMAP 280void 281search_mmap(db, s) 282 char *db; /* database */ 283 char **s; /* search strings */ 284{ 285 struct stat sb; 286 int fd; 287 caddr_t p; 288 off_t len; 289#ifdef DEBUG 290 long t0; 291#endif 292 if ((fd = open(db, O_RDONLY)) == -1 || 293 fstat(fd, &sb) == -1) 294 err(1, "`%s'", db); 295 len = sb.st_size; 296 if (len < (2*NBG)) 297 errx(1, 298 "database too small: %s\nRun /usr/libexec/locate.updatedb", 299 db); 300 301 if ((p = mmap((caddr_t)0, (size_t)len, 302 PROT_READ, MAP_SHARED, 303 fd, (off_t)0)) == MAP_FAILED) 304 err(1, "mmap ``%s''", db); 305 306 /* foreach search string ... */ 307 while (*s != NULL) { 308#ifdef DEBUG 309 t0 = cputime(); 310#endif 311 if (f_icase) 312 fastfind_mmap_icase(*s, p, (int)len, db); 313 else 314 fastfind_mmap(*s, p, (int)len, db); 315#ifdef DEBUG 316 warnx("fastfind %ld ms", cputime () - t0); 317#endif 318 s++; 319 } 320 321 if (munmap(p, (size_t)len) == -1) 322 warn("munmap %s\n", db); 323 324 (void)close(fd); 325} 326#endif /* MMAP */ 327 328#ifdef DEBUG 329unsigned long 330cputime () 331{ 332 struct rusage rus; 333 334 getrusage(RUSAGE_SELF, &rus); 335 return(rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000); 336} 337#endif /* DEBUG */ 338 339void 340usage () 341{ 342 (void)fprintf(stderr, 343 "usage: locate [-0Scims] [-l limit] [-d database] pattern ...\n\n"); 344 (void)fprintf(stderr, 345 "default database: `%s' or $LOCATE_PATH\n", _PATH_FCODES); 346 exit(1); 347} 348 349 350/* load fastfind functions */ 351 352/* statistic */ 353/* fastfind_mmap, fastfind_mmap_icase */ 354#ifdef MMAP 355#undef FF_MMAP 356#undef FF_ICASE 357 358#define FF_MMAP 359#include "fastfind.c" 360#define FF_ICASE 361#include "fastfind.c" 362#endif /* MMAP */ 363 364/* fopen */ 365/* fastfind, fastfind_icase */ 366#undef FF_MMAP 367#undef FF_ICASE 368#include "fastfind.c" 369#define FF_ICASE 370#include "fastfind.c" 371