1/*-
2 * Copyright (c) 1982, 1986, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)kern_subr.c	8.3 (Berkeley) 1/21/94
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD$");
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/kernel.h>
43#include <sys/limits.h>
44#include <sys/lock.h>
45#include <sys/mman.h>
46#include <sys/proc.h>
47#include <sys/resourcevar.h>
48#include <sys/rwlock.h>
49#include <sys/sched.h>
50#include <sys/sysctl.h>
51#include <sys/vnode.h>
52
53#include <vm/vm.h>
54#include <vm/vm_param.h>
55#include <vm/vm_extern.h>
56#include <vm/vm_page.h>
57#include <vm/vm_pageout.h>
58#include <vm/vm_map.h>
59
60SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV,
61	"Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)");
62
63static int uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault);
64
65int
66copyin_nofault(const void *udaddr, void *kaddr, size_t len)
67{
68	int error, save;
69
70	save = vm_fault_disable_pagefaults();
71	error = copyin(udaddr, kaddr, len);
72	vm_fault_enable_pagefaults(save);
73	return (error);
74}
75
76int
77copyout_nofault(const void *kaddr, void *udaddr, size_t len)
78{
79	int error, save;
80
81	save = vm_fault_disable_pagefaults();
82	error = copyout(kaddr, udaddr, len);
83	vm_fault_enable_pagefaults(save);
84	return (error);
85}
86
87#define	PHYS_PAGE_COUNT(len)	(howmany(len, PAGE_SIZE) + 1)
88
89int
90physcopyin(void *src, vm_paddr_t dst, size_t len)
91{
92	vm_page_t m[PHYS_PAGE_COUNT(len)];
93	struct iovec iov[1];
94	struct uio uio;
95	int i;
96
97	iov[0].iov_base = src;
98	iov[0].iov_len = len;
99	uio.uio_iov = iov;
100	uio.uio_iovcnt = 1;
101	uio.uio_offset = 0;
102	uio.uio_resid = len;
103	uio.uio_segflg = UIO_SYSSPACE;
104	uio.uio_rw = UIO_WRITE;
105	for (i = 0; i < PHYS_PAGE_COUNT(len); i++, dst += PAGE_SIZE)
106		m[i] = PHYS_TO_VM_PAGE(dst);
107	return (uiomove_fromphys(m, dst & PAGE_MASK, len, &uio));
108}
109
110int
111physcopyout(vm_paddr_t src, void *dst, size_t len)
112{
113	vm_page_t m[PHYS_PAGE_COUNT(len)];
114	struct iovec iov[1];
115	struct uio uio;
116	int i;
117
118	iov[0].iov_base = dst;
119	iov[0].iov_len = len;
120	uio.uio_iov = iov;
121	uio.uio_iovcnt = 1;
122	uio.uio_offset = 0;
123	uio.uio_resid = len;
124	uio.uio_segflg = UIO_SYSSPACE;
125	uio.uio_rw = UIO_READ;
126	for (i = 0; i < PHYS_PAGE_COUNT(len); i++, src += PAGE_SIZE)
127		m[i] = PHYS_TO_VM_PAGE(src);
128	return (uiomove_fromphys(m, src & PAGE_MASK, len, &uio));
129}
130
131#undef PHYS_PAGE_COUNT
132
133int
134uiomove(void *cp, int n, struct uio *uio)
135{
136
137	return (uiomove_faultflag(cp, n, uio, 0));
138}
139
140int
141uiomove_nofault(void *cp, int n, struct uio *uio)
142{
143
144	return (uiomove_faultflag(cp, n, uio, 1));
145}
146
147static int
148uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault)
149{
150	struct thread *td;
151	struct iovec *iov;
152	size_t cnt;
153	int error, newflags, save;
154
155	td = curthread;
156	error = 0;
157
158	KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE,
159	    ("uiomove: mode"));
160	KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == td,
161	    ("uiomove proc"));
162	if (!nofault)
163		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
164		    "Calling uiomove()");
165
166	/* XXX does it make a sense to set TDP_DEADLKTREAT for UIO_SYSSPACE ? */
167	newflags = TDP_DEADLKTREAT;
168	if (uio->uio_segflg == UIO_USERSPACE && nofault) {
169		/*
170		 * Fail if a non-spurious page fault occurs.
171		 */
172		newflags |= TDP_NOFAULTING | TDP_RESETSPUR;
173	}
174	save = curthread_pflags_set(newflags);
175
176	while (n > 0 && uio->uio_resid) {
177		iov = uio->uio_iov;
178		cnt = iov->iov_len;
179		if (cnt == 0) {
180			uio->uio_iov++;
181			uio->uio_iovcnt--;
182			continue;
183		}
184		if (cnt > n)
185			cnt = n;
186
187		switch (uio->uio_segflg) {
188
189		case UIO_USERSPACE:
190			maybe_yield();
191			if (uio->uio_rw == UIO_READ)
192				error = copyout(cp, iov->iov_base, cnt);
193			else
194				error = copyin(iov->iov_base, cp, cnt);
195			if (error)
196				goto out;
197			break;
198
199		case UIO_SYSSPACE:
200			if (uio->uio_rw == UIO_READ)
201				bcopy(cp, iov->iov_base, cnt);
202			else
203				bcopy(iov->iov_base, cp, cnt);
204			break;
205		case UIO_NOCOPY:
206			break;
207		}
208		iov->iov_base = (char *)iov->iov_base + cnt;
209		iov->iov_len -= cnt;
210		uio->uio_resid -= cnt;
211		uio->uio_offset += cnt;
212		cp = (char *)cp + cnt;
213		n -= cnt;
214	}
215out:
216	curthread_pflags_restore(save);
217	return (error);
218}
219
220/*
221 * Wrapper for uiomove() that validates the arguments against a known-good
222 * kernel buffer.  Currently, uiomove accepts a signed (n) argument, which
223 * is almost definitely a bad thing, so we catch that here as well.  We
224 * return a runtime failure, but it might be desirable to generate a runtime
225 * assertion failure instead.
226 */
227int
228uiomove_frombuf(void *buf, int buflen, struct uio *uio)
229{
230	size_t offset, n;
231
232	if (uio->uio_offset < 0 || uio->uio_resid < 0 ||
233	    (offset = uio->uio_offset) != uio->uio_offset)
234		return (EINVAL);
235	if (buflen <= 0 || offset >= buflen)
236		return (0);
237	if ((n = buflen - offset) > IOSIZE_MAX)
238		return (EINVAL);
239	return (uiomove((char *)buf + offset, n, uio));
240}
241
242/*
243 * Give next character to user as result of read.
244 */
245int
246ureadc(int c, struct uio *uio)
247{
248	struct iovec *iov;
249	char *iov_base;
250
251	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
252	    "Calling ureadc()");
253
254again:
255	if (uio->uio_iovcnt == 0 || uio->uio_resid == 0)
256		panic("ureadc");
257	iov = uio->uio_iov;
258	if (iov->iov_len == 0) {
259		uio->uio_iovcnt--;
260		uio->uio_iov++;
261		goto again;
262	}
263	switch (uio->uio_segflg) {
264
265	case UIO_USERSPACE:
266		if (subyte(iov->iov_base, c) < 0)
267			return (EFAULT);
268		break;
269
270	case UIO_SYSSPACE:
271		iov_base = iov->iov_base;
272		*iov_base = c;
273		break;
274
275	case UIO_NOCOPY:
276		break;
277	}
278	iov->iov_base = (char *)iov->iov_base + 1;
279	iov->iov_len--;
280	uio->uio_resid--;
281	uio->uio_offset++;
282	return (0);
283}
284
285int
286copyinfrom(const void * __restrict src, void * __restrict dst, size_t len,
287    int seg)
288{
289	int error = 0;
290
291	switch (seg) {
292	case UIO_USERSPACE:
293		error = copyin(src, dst, len);
294		break;
295	case UIO_SYSSPACE:
296		bcopy(src, dst, len);
297		break;
298	default:
299		panic("copyinfrom: bad seg %d\n", seg);
300	}
301	return (error);
302}
303
304int
305copyinstrfrom(const void * __restrict src, void * __restrict dst, size_t len,
306    size_t * __restrict copied, int seg)
307{
308	int error = 0;
309
310	switch (seg) {
311	case UIO_USERSPACE:
312		error = copyinstr(src, dst, len, copied);
313		break;
314	case UIO_SYSSPACE:
315		error = copystr(src, dst, len, copied);
316		break;
317	default:
318		panic("copyinstrfrom: bad seg %d\n", seg);
319	}
320	return (error);
321}
322
323int
324copyiniov(const struct iovec *iovp, u_int iovcnt, struct iovec **iov, int error)
325{
326	u_int iovlen;
327
328	*iov = NULL;
329	if (iovcnt > UIO_MAXIOV)
330		return (error);
331	iovlen = iovcnt * sizeof (struct iovec);
332	*iov = malloc(iovlen, M_IOV, M_WAITOK);
333	error = copyin(iovp, *iov, iovlen);
334	if (error) {
335		free(*iov, M_IOV);
336		*iov = NULL;
337	}
338	return (error);
339}
340
341int
342copyinuio(const struct iovec *iovp, u_int iovcnt, struct uio **uiop)
343{
344	struct iovec *iov;
345	struct uio *uio;
346	u_int iovlen;
347	int error, i;
348
349	*uiop = NULL;
350	if (iovcnt > UIO_MAXIOV)
351		return (EINVAL);
352	iovlen = iovcnt * sizeof (struct iovec);
353	uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK);
354	iov = (struct iovec *)(uio + 1);
355	error = copyin(iovp, iov, iovlen);
356	if (error) {
357		free(uio, M_IOV);
358		return (error);
359	}
360	uio->uio_iov = iov;
361	uio->uio_iovcnt = iovcnt;
362	uio->uio_segflg = UIO_USERSPACE;
363	uio->uio_offset = -1;
364	uio->uio_resid = 0;
365	for (i = 0; i < iovcnt; i++) {
366		if (iov->iov_len > IOSIZE_MAX - uio->uio_resid) {
367			free(uio, M_IOV);
368			return (EINVAL);
369		}
370		uio->uio_resid += iov->iov_len;
371		iov++;
372	}
373	*uiop = uio;
374	return (0);
375}
376
377struct uio *
378cloneuio(struct uio *uiop)
379{
380	struct uio *uio;
381	int iovlen;
382
383	iovlen = uiop->uio_iovcnt * sizeof (struct iovec);
384	uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK);
385	*uio = *uiop;
386	uio->uio_iov = (struct iovec *)(uio + 1);
387	bcopy(uiop->uio_iov, uio->uio_iov, iovlen);
388	return (uio);
389}
390
391/*
392 * Map some anonymous memory in user space of size sz, rounded up to the page
393 * boundary.
394 */
395int
396copyout_map(struct thread *td, vm_offset_t *addr, size_t sz)
397{
398	struct vmspace *vms;
399	int error;
400	vm_size_t size;
401
402	vms = td->td_proc->p_vmspace;
403
404	/*
405	 * Map somewhere after heap in process memory.
406	 */
407	PROC_LOCK(td->td_proc);
408	*addr = round_page((vm_offset_t)vms->vm_daddr +
409	    lim_max(td->td_proc, RLIMIT_DATA));
410	PROC_UNLOCK(td->td_proc);
411
412	/* round size up to page boundry */
413	size = (vm_size_t)round_page(sz);
414
415	error = vm_mmap(&vms->vm_map, addr, size, PROT_READ | PROT_WRITE,
416	    VM_PROT_ALL, MAP_PRIVATE | MAP_ANON, OBJT_DEFAULT, NULL, 0);
417
418	return (error);
419}
420
421/*
422 * Unmap memory in user space.
423 */
424int
425copyout_unmap(struct thread *td, vm_offset_t addr, size_t sz)
426{
427	vm_map_t map;
428	vm_size_t size;
429
430	if (sz == 0)
431		return (0);
432
433	map = &td->td_proc->p_vmspace->vm_map;
434	size = (vm_size_t)round_page(sz);
435
436	if (vm_map_remove(map, addr, addr + size) != KERN_SUCCESS)
437		return (EINVAL);
438
439	return (0);
440}
441