1/*	$NetBSD: file.c,v 1.5 2011/02/16 18:35:39 joerg Exp $	*/
2/*	$OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $	*/
3
4/*-
5 * SPDX-License-Identifier: BSD-2-Clause
6 *
7 * Copyright (c) 1999 James Howard and Dag-Erling Sm��rgrav
8 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
9 * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com>
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 *    notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#include <sys/param.h>
35#include <sys/mman.h>
36#include <sys/stat.h>
37#include <sys/types.h>
38
39#include <err.h>
40#include <errno.h>
41#include <fcntl.h>
42#include <stddef.h>
43#include <stdlib.h>
44#include <string.h>
45#include <unistd.h>
46#include <wchar.h>
47#include <wctype.h>
48
49#include "grep.h"
50
51#define	MAXBUFSIZ	(32 * 1024)
52#define	LNBUFBUMP	80
53
54static char *buffer;
55static char *bufpos;
56static size_t bufrem;
57static size_t fsiz;
58
59static char *lnbuf;
60static size_t lnbuflen;
61
62static inline int
63grep_refill(struct file *f)
64{
65	ssize_t nr;
66
67	if (filebehave == FILE_MMAP)
68		return (0);
69
70	bufpos = buffer;
71	bufrem = 0;
72
73	nr = read(f->fd, buffer, MAXBUFSIZ);
74	if (nr < 0 && errno == EISDIR)
75		nr = 0;
76	if (nr < 0)
77		return (-1);
78
79	bufrem = nr;
80	return (0);
81}
82
83static inline int
84grep_lnbufgrow(size_t newlen)
85{
86
87	if (lnbuflen < newlen) {
88		lnbuf = grep_realloc(lnbuf, newlen);
89		lnbuflen = newlen;
90	}
91
92	return (0);
93}
94
95char *
96grep_fgetln(struct file *f, struct parsec *pc)
97{
98	char *p;
99	size_t len;
100	size_t off;
101	ptrdiff_t diff;
102
103	/* Fill the buffer, if necessary */
104	if (bufrem == 0 && grep_refill(f) != 0)
105		goto error;
106
107	if (bufrem == 0) {
108		/* Return zero length to indicate EOF */
109		pc->ln.len= 0;
110		return (bufpos);
111	}
112
113	/* Look for a newline in the remaining part of the buffer */
114	if ((p = memchr(bufpos, fileeol, bufrem)) != NULL) {
115		++p; /* advance over newline */
116		len = p - bufpos;
117		if (grep_lnbufgrow(len + 1))
118			goto error;
119		memcpy(lnbuf, bufpos, len);
120		bufrem -= len;
121		bufpos = p;
122		pc->ln.len = len;
123		lnbuf[len] = '\0';
124		return (lnbuf);
125	}
126
127	/* We have to copy the current buffered data to the line buffer */
128	for (len = bufrem, off = 0; ; len += bufrem) {
129		/* Make sure there is room for more data */
130		if (grep_lnbufgrow(len + LNBUFBUMP))
131			goto error;
132		memcpy(lnbuf + off, bufpos, len - off);
133		/* With FILE_MMAP, this is EOF; there's no more to refill */
134		if (filebehave == FILE_MMAP) {
135			bufrem -= len;
136			break;
137		}
138		off = len;
139		/* Fetch more to try and find EOL/EOF */
140		if (grep_refill(f) != 0)
141			goto error;
142		if (bufrem == 0)
143			/* EOF: return partial line */
144			break;
145		if ((p = memchr(bufpos, fileeol, bufrem)) == NULL)
146			continue;
147		/* got it: finish up the line (like code above) */
148		++p;
149		diff = p - bufpos;
150		len += diff;
151		if (grep_lnbufgrow(len + 1))
152		    goto error;
153		memcpy(lnbuf + off, bufpos, diff);
154		bufrem -= diff;
155		bufpos = p;
156		break;
157	}
158	pc->ln.len = len;
159	lnbuf[len] = '\0';
160	return (lnbuf);
161
162error:
163	pc->ln.len = 0;
164	return (NULL);
165}
166
167/*
168 * Opens a file for processing.
169 */
170struct file *
171grep_open(const char *path)
172{
173	struct file *f;
174
175	f = grep_malloc(sizeof *f);
176	memset(f, 0, sizeof *f);
177	if (path == NULL) {
178		/* Processing stdin implies --line-buffered. */
179		lbflag = true;
180		f->fd = STDIN_FILENO;
181	} else if ((f->fd = open(path, O_RDONLY)) == -1)
182		goto error1;
183
184	if (filebehave == FILE_MMAP) {
185		struct stat st;
186
187		if (fstat(f->fd, &st) == -1 || !S_ISREG(st.st_mode))
188			filebehave = FILE_STDIO;
189		else {
190			int flags = MAP_PRIVATE | MAP_NOCORE | MAP_NOSYNC;
191#ifdef MAP_PREFAULT_READ
192			flags |= MAP_PREFAULT_READ;
193#endif
194			fsiz = st.st_size;
195			buffer = mmap(NULL, fsiz, PROT_READ, flags,
196			     f->fd, (off_t)0);
197			if (buffer == MAP_FAILED)
198				filebehave = FILE_STDIO;
199			else {
200				bufrem = st.st_size;
201				bufpos = buffer;
202				madvise(buffer, st.st_size, MADV_SEQUENTIAL);
203			}
204		}
205	}
206
207	if ((buffer == NULL) || (buffer == MAP_FAILED))
208		buffer = grep_malloc(MAXBUFSIZ);
209
210	/* Fill read buffer, also catches errors early */
211	if (bufrem == 0 && grep_refill(f) != 0)
212		goto error2;
213
214	/* Check for binary stuff, if necessary */
215	if (binbehave != BINFILE_TEXT && fileeol != '\0' &&
216	    memchr(bufpos, '\0', bufrem) != NULL)
217		f->binary = true;
218
219	return (f);
220
221error2:
222	close(f->fd);
223error1:
224	free(f);
225	return (NULL);
226}
227
228/*
229 * Closes a file.
230 */
231void
232grep_close(struct file *f)
233{
234
235	close(f->fd);
236
237	/* Reset read buffer and line buffer */
238	if (filebehave == FILE_MMAP) {
239		munmap(buffer, fsiz);
240		buffer = NULL;
241	}
242	bufpos = buffer;
243	bufrem = 0;
244
245	free(lnbuf);
246	lnbuf = NULL;
247	lnbuflen = 0;
248}
249