1/*
2 * Copyright (c) 1999-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 *	File:	ubc_subr.c
30 *	Author:	Umesh Vaishampayan [umeshv@apple.com]
31 *		05-Aug-1999	umeshv	Created.
32 *
33 *	Functions related to Unified Buffer cache.
34 *
35 * Caller of UBC functions MUST have a valid reference on the vnode.
36 *
37 */
38
39#include <sys/types.h>
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/lock.h>
43#include <sys/mman.h>
44#include <sys/mount_internal.h>
45#include <sys/vnode_internal.h>
46#include <sys/ubc_internal.h>
47#include <sys/ucred.h>
48#include <sys/proc_internal.h>
49#include <sys/kauth.h>
50#include <sys/buf.h>
51#include <sys/user.h>
52#include <sys/codesign.h>
53
54#include <mach/mach_types.h>
55#include <mach/memory_object_types.h>
56#include <mach/memory_object_control.h>
57#include <mach/vm_map.h>
58#include <mach/mach_vm.h>
59#include <mach/upl.h>
60
61#include <kern/kern_types.h>
62#include <kern/kalloc.h>
63#include <kern/zalloc.h>
64#include <kern/thread.h>
65#include <vm/vm_kern.h>
66#include <vm/vm_protos.h> /* last */
67
68#include <libkern/crypto/sha1.h>
69
70#include <security/mac_framework.h>
71
72/* XXX These should be in a BSD accessible Mach header, but aren't. */
73extern kern_return_t memory_object_pages_resident(memory_object_control_t,
74							boolean_t *);
75extern kern_return_t	memory_object_signed(memory_object_control_t control,
76					     boolean_t is_signed);
77extern boolean_t	memory_object_is_slid(memory_object_control_t	control);
78
79extern void Debugger(const char *message);
80
81
82/* XXX no one uses this interface! */
83kern_return_t ubc_page_op_with_control(
84	memory_object_control_t	 control,
85	off_t		         f_offset,
86	int		         ops,
87	ppnum_t	                 *phys_entryp,
88	int		         *flagsp);
89
90
91#if DIAGNOSTIC
92#if defined(assert)
93#undef assert
94#endif
95#define assert(cond)    \
96    ((void) ((cond) ? 0 : panic("Assert failed: %s", # cond)))
97#else
98#include <kern/assert.h>
99#endif /* DIAGNOSTIC */
100
101static int ubc_info_init_internal(struct vnode *vp, int withfsize, off_t filesize);
102static int ubc_umcallback(vnode_t, void *);
103static int ubc_msync_internal(vnode_t, off_t, off_t, off_t *, int, int *);
104static void ubc_cs_free(struct ubc_info *uip);
105
106struct zone	*ubc_info_zone;
107
108
109/*
110 * CODESIGNING
111 * Routines to navigate code signing data structures in the kernel...
112 */
113
114extern int cs_debug;
115
116static boolean_t
117cs_valid_range(
118	const void *start,
119	const void *end,
120	const void *lower_bound,
121	const void *upper_bound)
122{
123	if (upper_bound < lower_bound ||
124	    end < start) {
125		return FALSE;
126	}
127
128	if (start < lower_bound ||
129	    end > upper_bound) {
130		return FALSE;
131	}
132
133	return TRUE;
134}
135
136/*
137 * Magic numbers used by Code Signing
138 */
139enum {
140	CSMAGIC_REQUIREMENT = 0xfade0c00,		/* single Requirement blob */
141	CSMAGIC_REQUIREMENTS = 0xfade0c01,		/* Requirements vector (internal requirements) */
142	CSMAGIC_CODEDIRECTORY = 0xfade0c02,		/* CodeDirectory blob */
143	CSMAGIC_EMBEDDED_SIGNATURE = 0xfade0cc0, /* embedded form of signature data */
144	CSMAGIC_EMBEDDED_SIGNATURE_OLD = 0xfade0b02,	/* XXX */
145	CSMAGIC_EMBEDDED_ENTITLEMENTS = 0xfade7171,	/* embedded entitlements */
146	CSMAGIC_DETACHED_SIGNATURE = 0xfade0cc1, /* multi-arch collection of embedded signatures */
147
148	CSSLOT_CODEDIRECTORY = 0,				/* slot index for CodeDirectory */
149	CSSLOT_ENTITLEMENTS = 5
150};
151
152static const uint32_t supportsScatter = 0x20100;	// first version to support scatter option
153
154/*
155 * Structure of an embedded-signature SuperBlob
156 */
157typedef struct __BlobIndex {
158	uint32_t type;					/* type of entry */
159	uint32_t offset;				/* offset of entry */
160} CS_BlobIndex;
161
162typedef struct __SuperBlob {
163	uint32_t magic;					/* magic number */
164	uint32_t length;				/* total length of SuperBlob */
165	uint32_t count;					/* number of index entries following */
166	CS_BlobIndex index[];			/* (count) entries */
167	/* followed by Blobs in no particular order as indicated by offsets in index */
168} CS_SuperBlob;
169
170typedef struct __GenericBlob {
171	uint32_t magic;				/* magic number */
172	uint32_t length;			/* total length of blob */
173	char data[];
174} CS_GenericBlob;
175
176struct Scatter {
177	uint32_t count;			// number of pages; zero for sentinel (only)
178	uint32_t base;			// first page number
179	uint64_t targetOffset;		// offset in target
180	uint64_t spare;			// reserved
181};
182
183/*
184 * C form of a CodeDirectory.
185 */
186typedef struct __CodeDirectory {
187	uint32_t magic;					/* magic number (CSMAGIC_CODEDIRECTORY) */
188	uint32_t length;				/* total length of CodeDirectory blob */
189	uint32_t version;				/* compatibility version */
190	uint32_t flags;					/* setup and mode flags */
191	uint32_t hashOffset;			/* offset of hash slot element at index zero */
192	uint32_t identOffset;			/* offset of identifier string */
193	uint32_t nSpecialSlots;			/* number of special hash slots */
194	uint32_t nCodeSlots;			/* number of ordinary (code) hash slots */
195	uint32_t codeLimit;				/* limit to main image signature range */
196	uint8_t hashSize;				/* size of each hash in bytes */
197	uint8_t hashType;				/* type of hash (cdHashType* constants) */
198	uint8_t spare1;					/* unused (must be zero) */
199	uint8_t	pageSize;				/* log2(page size in bytes); 0 => infinite */
200	uint32_t spare2;				/* unused (must be zero) */
201	/* Version 0x20100 */
202	uint32_t scatterOffset;				/* offset of optional scatter vector */
203	/* followed by dynamic content as located by offset fields above */
204} CS_CodeDirectory;
205
206
207/*
208 * Locate the CodeDirectory from an embedded signature blob
209 */
210static const
211CS_CodeDirectory *findCodeDirectory(
212	const CS_SuperBlob *embedded,
213	char *lower_bound,
214	char *upper_bound)
215{
216	const CS_CodeDirectory *cd = NULL;
217
218	if (embedded &&
219	    cs_valid_range(embedded, embedded + 1, lower_bound, upper_bound) &&
220	    ntohl(embedded->magic) == CSMAGIC_EMBEDDED_SIGNATURE) {
221		const CS_BlobIndex *limit;
222		const CS_BlobIndex *p;
223
224		limit = &embedded->index[ntohl(embedded->count)];
225		if (!cs_valid_range(&embedded->index[0], limit,
226				    lower_bound, upper_bound)) {
227			return NULL;
228		}
229		for (p = embedded->index; p < limit; ++p) {
230			if (ntohl(p->type) == CSSLOT_CODEDIRECTORY) {
231				const unsigned char *base;
232
233				base = (const unsigned char *)embedded;
234				cd = (const CS_CodeDirectory *)(base + ntohl(p->offset));
235				break;
236			}
237		}
238	} else {
239		/*
240		 * Detached signatures come as a bare CS_CodeDirectory,
241		 * without a blob.
242		 */
243		cd = (const CS_CodeDirectory *) embedded;
244	}
245
246	if (cd &&
247	    cs_valid_range(cd, cd + 1, lower_bound, upper_bound) &&
248	    cs_valid_range(cd, (const char *) cd + ntohl(cd->length),
249			   lower_bound, upper_bound) &&
250	    cs_valid_range(cd, (const char *) cd + ntohl(cd->hashOffset),
251			   lower_bound, upper_bound) &&
252	    cs_valid_range(cd, (const char *) cd +
253			   ntohl(cd->hashOffset) +
254			   (ntohl(cd->nCodeSlots) * SHA1_RESULTLEN),
255			   lower_bound, upper_bound) &&
256
257	    ntohl(cd->magic) == CSMAGIC_CODEDIRECTORY) {
258		return cd;
259	}
260
261	// not found or not a valid code directory
262	return NULL;
263}
264
265
266/*
267 * Locating a page hash
268 */
269static const unsigned char *
270hashes(
271	const CS_CodeDirectory *cd,
272	unsigned page,
273	char *lower_bound,
274	char *upper_bound)
275{
276	const unsigned char *base, *top, *hash;
277	uint32_t nCodeSlots = ntohl(cd->nCodeSlots);
278
279	assert(cs_valid_range(cd, cd + 1, lower_bound, upper_bound));
280
281	if((ntohl(cd->version) >= supportsScatter) && (ntohl(cd->scatterOffset))) {
282		/* Get first scatter struct */
283		const struct Scatter *scatter = (const struct Scatter*)
284			((const char*)cd + ntohl(cd->scatterOffset));
285		uint32_t hashindex=0, scount, sbase=0;
286		/* iterate all scatter structs */
287		do {
288			if((const char*)scatter > (const char*)cd + ntohl(cd->length)) {
289				if(cs_debug) {
290					printf("CODE SIGNING: Scatter extends past Code Directory\n");
291				}
292				return NULL;
293			}
294
295			scount = ntohl(scatter->count);
296			uint32_t new_base = ntohl(scatter->base);
297
298			/* last scatter? */
299			if (scount == 0) {
300				return NULL;
301			}
302
303			if((hashindex > 0) && (new_base <= sbase)) {
304				if(cs_debug) {
305					printf("CODE SIGNING: unordered Scatter, prev base %d, cur base %d\n",
306					sbase, new_base);
307				}
308				return NULL;	/* unordered scatter array */
309			}
310			sbase = new_base;
311
312			/* this scatter beyond page we're looking for? */
313			if (sbase > page) {
314				return NULL;
315			}
316
317			if (sbase+scount >= page) {
318				/* Found the scatter struct that is
319				 * referencing our page */
320
321				/* base = address of first hash covered by scatter */
322				base = (const unsigned char *)cd + ntohl(cd->hashOffset) +
323					hashindex * SHA1_RESULTLEN;
324				/* top = address of first hash after this scatter */
325				top = base + scount * SHA1_RESULTLEN;
326				if (!cs_valid_range(base, top, lower_bound,
327						    upper_bound) ||
328				    hashindex > nCodeSlots) {
329					return NULL;
330				}
331
332				break;
333			}
334
335			/* this scatter struct is before the page we're looking
336			 * for. Iterate. */
337			hashindex+=scount;
338			scatter++;
339		} while(1);
340
341		hash = base + (page - sbase) * SHA1_RESULTLEN;
342	} else {
343		base = (const unsigned char *)cd + ntohl(cd->hashOffset);
344		top = base + nCodeSlots * SHA1_RESULTLEN;
345		if (!cs_valid_range(base, top, lower_bound, upper_bound) ||
346		    page > nCodeSlots) {
347			return NULL;
348		}
349		assert(page < nCodeSlots);
350
351		hash = base + page * SHA1_RESULTLEN;
352	}
353
354	if (!cs_valid_range(hash, hash + SHA1_RESULTLEN,
355			    lower_bound, upper_bound)) {
356		hash = NULL;
357	}
358
359	return hash;
360}
361/*
362 * CODESIGNING
363 * End of routines to navigate code signing data structures in the kernel.
364 */
365
366/*
367 * ENTITLEMENTS
368 * Routines to navigate entitlements in the kernel.
369 */
370
371/* Retrieve the entitlements blob for a process.
372 * Returns:
373 *   EINVAL	no text vnode associated with the process
374 *   EBADEXEC   invalid code signing data
375 *   ENOMEM	you should reboot
376 *   0		no error occurred
377 *
378 * On success, out_start and out_length will point to the
379 * entitlements blob if found; or will be set to NULL/zero
380 * if there were no entitlements.
381 */
382int
383cs_entitlements_blob_get(proc_t p, void **out_start, size_t *out_length)
384{
385	SHA1_CTX context;			/* XXX hash agility */
386	int error = 0;
387	struct cs_blob *blob_list_entry;
388	CS_SuperBlob *super_blob;
389	CS_BlobIndex *blob_index;
390	CS_GenericBlob *blob;
391	CS_CodeDirectory *code_dir;
392	unsigned char *computed_hash = NULL;
393	unsigned char *embedded_hash = NULL;
394	void *start = NULL;
395	size_t length = 0;
396	size_t hash_size = 0;
397	unsigned int i, count;
398
399	if (NULL == p->p_textvp) {
400		error = EINVAL;
401		goto out;
402	}
403	if (NULL == (blob_list_entry = ubc_cs_blob_get(p->p_textvp, -1,
404	    p->p_textoff)))
405		goto out;
406	super_blob = (void *)blob_list_entry->csb_mem_kaddr;
407	if (CSMAGIC_EMBEDDED_SIGNATURE != ntohl(super_blob->magic)) {
408		error = EBADEXEC;
409		goto out;
410	}
411	count = ntohl(super_blob->count);
412	for (i = 0; i < count; ++i) {
413		blob_index = &super_blob->index[i];
414		blob = (void *)((char *)super_blob + ntohl(blob_index->offset));
415		switch (ntohl(blob_index->type)) {
416		case CSSLOT_CODEDIRECTORY:
417			if (CSMAGIC_CODEDIRECTORY != ntohl(blob->magic))
418				break;
419			code_dir = (void *)blob;
420			hash_size = code_dir->hashSize;
421			if (CSSLOT_ENTITLEMENTS <=
422			    ntohl(code_dir->nSpecialSlots)) {
423				embedded_hash = (void *)((char *)code_dir +
424				    ntohl(code_dir->hashOffset) -
425				    (hash_size * CSSLOT_ENTITLEMENTS));
426			}
427			break;
428		case CSSLOT_ENTITLEMENTS:
429			if (CSMAGIC_EMBEDDED_ENTITLEMENTS != ntohl(blob->magic))
430				break;
431			start = (void *)blob;
432			length = ntohl(blob->length);
433			break;
434		default:
435			break;
436		}
437	}
438	if (NULL == start && NULL == embedded_hash) {
439		error = 0;
440		goto out;
441	} else if (NULL == start || NULL == embedded_hash) {
442		error = EBADEXEC;
443		goto out;
444	}
445	if (NULL == (computed_hash = kalloc(hash_size))) {
446		error = ENOMEM;
447		goto out;
448	}
449#ifndef __arm__
450	SHA1Init(&context);
451	SHA1Update(&context, start, length);
452	SHA1Final(computed_hash, &context);
453	if (0 != memcmp(computed_hash, embedded_hash, hash_size)) {
454		error = EBADEXEC;
455		goto out;
456	}
457#endif
458	error = 0;
459out:
460	if (NULL != computed_hash)
461		kfree(computed_hash, hash_size);
462	if (0 == error) {
463		*out_start = start;
464		*out_length = length;
465	}
466	return error;
467}
468
469/*
470 * ENTITLEMENTS
471 * End of routines to navigate entitlements in the kernel.
472 */
473
474
475
476/*
477 * ubc_init
478 *
479 * Initialization of the zone for Unified Buffer Cache.
480 *
481 * Parameters:	(void)
482 *
483 * Returns:	(void)
484 *
485 * Implicit returns:
486 *		ubc_info_zone(global)	initialized for subsequent allocations
487 */
488__private_extern__ void
489ubc_init(void)
490{
491	int	i;
492
493	i = (vm_size_t) sizeof (struct ubc_info);
494
495	ubc_info_zone = zinit (i, 10000*i, 8192, "ubc_info zone");
496
497	zone_change(ubc_info_zone, Z_NOENCRYPT, TRUE);
498}
499
500
501/*
502 * ubc_info_init
503 *
504 * Allocate and attach an empty ubc_info structure to a vnode
505 *
506 * Parameters:	vp			Pointer to the vnode
507 *
508 * Returns:	0			Success
509 *	vnode_size:ENOMEM		Not enough space
510 *	vnode_size:???			Other error from vnode_getattr
511 *
512 */
513int
514ubc_info_init(struct vnode *vp)
515{
516	return(ubc_info_init_internal(vp, 0, 0));
517}
518
519
520/*
521 * ubc_info_init_withsize
522 *
523 * Allocate and attach a sized ubc_info structure to a vnode
524 *
525 * Parameters:	vp			Pointer to the vnode
526 *		filesize		The size of the file
527 *
528 * Returns:	0			Success
529 *	vnode_size:ENOMEM		Not enough space
530 *	vnode_size:???			Other error from vnode_getattr
531 */
532int
533ubc_info_init_withsize(struct vnode *vp, off_t filesize)
534{
535	return(ubc_info_init_internal(vp, 1, filesize));
536}
537
538
539/*
540 * ubc_info_init_internal
541 *
542 * Allocate and attach a ubc_info structure to a vnode
543 *
544 * Parameters:	vp			Pointer to the vnode
545 *		withfsize{0,1}		Zero if the size should be obtained
546 *					from the vnode; otherwise, use filesize
547 *		filesize		The size of the file, if withfsize == 1
548 *
549 * Returns:	0			Success
550 *	vnode_size:ENOMEM		Not enough space
551 *	vnode_size:???			Other error from vnode_getattr
552 *
553 * Notes:	We call a blocking zalloc(), and the zone was created as an
554 *		expandable and collectable zone, so if no memory is available,
555 *		it is possible for zalloc() to block indefinitely.  zalloc()
556 *		may also panic if the zone of zones is exhausted, since it's
557 *		NOT expandable.
558 *
559 *		We unconditionally call vnode_pager_setup(), even if this is
560 *		a reuse of a ubc_info; in that case, we should probably assert
561 *		that it does not already have a pager association, but do not.
562 *
563 *		Since memory_object_create_named() can only fail from receiving
564 *		an invalid pager argument, the explicit check and panic is
565 *		merely precautionary.
566 */
567static int
568ubc_info_init_internal(vnode_t vp, int withfsize, off_t filesize)
569{
570	register struct ubc_info	*uip;
571	void *  pager;
572	int error = 0;
573	kern_return_t kret;
574	memory_object_control_t control;
575
576	uip = vp->v_ubcinfo;
577
578	/*
579	 * If there is not already a ubc_info attached to the vnode, we
580	 * attach one; otherwise, we will reuse the one that's there.
581	 */
582	if (uip == UBC_INFO_NULL) {
583
584		uip = (struct ubc_info *) zalloc(ubc_info_zone);
585		bzero((char *)uip, sizeof(struct ubc_info));
586
587		uip->ui_vnode = vp;
588		uip->ui_flags = UI_INITED;
589		uip->ui_ucred = NOCRED;
590	}
591	assert(uip->ui_flags != UI_NONE);
592	assert(uip->ui_vnode == vp);
593
594	/* now set this ubc_info in the vnode */
595	vp->v_ubcinfo = uip;
596
597	/*
598	 * Allocate a pager object for this vnode
599	 *
600	 * XXX The value of the pager parameter is currently ignored.
601	 * XXX Presumably, this API changed to avoid the race between
602	 * XXX setting the pager and the UI_HASPAGER flag.
603	 */
604	pager = (void *)vnode_pager_setup(vp, uip->ui_pager);
605	assert(pager);
606
607	/*
608	 * Explicitly set the pager into the ubc_info, after setting the
609	 * UI_HASPAGER flag.
610	 */
611	SET(uip->ui_flags, UI_HASPAGER);
612	uip->ui_pager = pager;
613
614	/*
615	 * Note: We can not use VNOP_GETATTR() to get accurate
616	 * value of ui_size because this may be an NFS vnode, and
617	 * nfs_getattr() can call vinvalbuf(); if this happens,
618	 * ubc_info is not set up to deal with that event.
619	 * So use bogus size.
620	 */
621
622	/*
623	 * create a vnode - vm_object association
624	 * memory_object_create_named() creates a "named" reference on the
625	 * memory object we hold this reference as long as the vnode is
626	 * "alive."  Since memory_object_create_named() took its own reference
627	 * on the vnode pager we passed it, we can drop the reference
628	 * vnode_pager_setup() returned here.
629	 */
630	kret = memory_object_create_named(pager,
631		(memory_object_size_t)uip->ui_size, &control);
632	vnode_pager_deallocate(pager);
633	if (kret != KERN_SUCCESS)
634		panic("ubc_info_init: memory_object_create_named returned %d", kret);
635
636	assert(control);
637	uip->ui_control = control;	/* cache the value of the mo control */
638	SET(uip->ui_flags, UI_HASOBJREF);	/* with a named reference */
639
640	if (withfsize == 0) {
641		/* initialize the size */
642		error = vnode_size(vp, &uip->ui_size, vfs_context_current());
643		if (error)
644			uip->ui_size = 0;
645	} else {
646		uip->ui_size = filesize;
647	}
648	vp->v_lflag |= VNAMED_UBC;	/* vnode has a named ubc reference */
649
650	return (error);
651}
652
653
654/*
655 * ubc_info_free
656 *
657 * Free a ubc_info structure
658 *
659 * Parameters:	uip			A pointer to the ubc_info to free
660 *
661 * Returns:	(void)
662 *
663 * Notes:	If there is a credential that has subsequently been associated
664 *		with the ubc_info via a call to ubc_setcred(), the reference
665 *		to the credential is dropped.
666 *
667 *		It's actually impossible for a ubc_info.ui_control to take the
668 *		value MEMORY_OBJECT_CONTROL_NULL.
669 */
670static void
671ubc_info_free(struct ubc_info *uip)
672{
673	if (IS_VALID_CRED(uip->ui_ucred)) {
674		kauth_cred_unref(&uip->ui_ucred);
675	}
676
677	if (uip->ui_control != MEMORY_OBJECT_CONTROL_NULL)
678		memory_object_control_deallocate(uip->ui_control);
679
680	cluster_release(uip);
681	ubc_cs_free(uip);
682
683	zfree(ubc_info_zone, uip);
684	return;
685}
686
687
688void
689ubc_info_deallocate(struct ubc_info *uip)
690{
691        ubc_info_free(uip);
692}
693
694
695/*
696 * ubc_setsize
697 *
698 * Tell the  VM that the the size of the file represented by the vnode has
699 * changed
700 *
701 * Parameters:	vp			The vp whose backing file size is
702 *					being changed
703 *		nsize			The new size of the backing file
704 *
705 * Returns:	1			Success
706 *		0			Failure
707 *
708 * Notes:	This function will indicate failure if the new size that's
709 *		being attempted to be set is negative.
710 *
711 *		This function will fail if there is no ubc_info currently
712 *		associated with the vnode.
713 *
714 *		This function will indicate success it the new size is the
715 *		same or larger than the old size (in this case, the remainder
716 *		of the file will require modification or use of an existing upl
717 *		to access successfully).
718 *
719 *		This function will fail if the new file size is smaller, and
720 *		the memory region being invalidated was unable to actually be
721 *		invalidated and/or the last page could not be flushed, if the
722 *		new size is not aligned to a page boundary.  This is usually
723 *		indicative of an I/O error.
724 */
725int
726ubc_setsize(struct vnode *vp, off_t nsize)
727{
728	off_t osize;	/* ui_size before change */
729	off_t lastpg, olastpgend, lastoff;
730	struct ubc_info *uip;
731	memory_object_control_t control;
732	kern_return_t kret = KERN_SUCCESS;
733
734	if (nsize < (off_t)0)
735		return (0);
736
737	if (!UBCINFOEXISTS(vp))
738		return (0);
739
740	uip = vp->v_ubcinfo;
741	osize = uip->ui_size;
742	/*
743	 * Update the size before flushing the VM
744	 */
745	uip->ui_size = nsize;
746
747	if (nsize >= osize) {	/* Nothing more to do */
748		if (nsize > osize) {
749			lock_vnode_and_post(vp, NOTE_EXTEND);
750		}
751
752		return (1);		/* return success */
753	}
754
755	/*
756	 * When the file shrinks, invalidate the pages beyond the
757	 * new size. Also get rid of garbage beyond nsize on the
758	 * last page. The ui_size already has the nsize, so any
759	 * subsequent page-in will zero-fill the tail properly
760	 */
761	lastpg = trunc_page_64(nsize);
762	olastpgend = round_page_64(osize);
763	control = uip->ui_control;
764	assert(control);
765	lastoff = (nsize & PAGE_MASK_64);
766
767	if (lastoff) {
768	        upl_t		upl;
769		upl_page_info_t	*pl;
770
771
772	        /*
773		 * new EOF ends up in the middle of a page
774		 * zero the tail of this page if its currently
775		 * present in the cache
776		 */
777	        kret = ubc_create_upl(vp, lastpg, PAGE_SIZE, &upl, &pl, UPL_SET_LITE);
778
779		if (kret != KERN_SUCCESS)
780		        panic("ubc_setsize: ubc_create_upl (error = %d)\n", kret);
781
782		if (upl_valid_page(pl, 0))
783		        cluster_zero(upl, (uint32_t)lastoff, PAGE_SIZE - (uint32_t)lastoff, NULL);
784
785		ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
786
787		lastpg += PAGE_SIZE_64;
788	}
789	if (olastpgend > lastpg) {
790		int	flags;
791
792		if (lastpg == 0)
793			flags = MEMORY_OBJECT_DATA_FLUSH_ALL;
794		else
795			flags = MEMORY_OBJECT_DATA_FLUSH;
796	        /*
797		 * invalidate the pages beyond the new EOF page
798		 *
799		 */
800	        kret = memory_object_lock_request(control,
801						  (memory_object_offset_t)lastpg,
802						  (memory_object_size_t)(olastpgend - lastpg), NULL, NULL,
803						  MEMORY_OBJECT_RETURN_NONE, flags, VM_PROT_NO_CHANGE);
804		if (kret != KERN_SUCCESS)
805		        printf("ubc_setsize: invalidate failed (error = %d)\n", kret);
806	}
807	return ((kret == KERN_SUCCESS) ? 1 : 0);
808}
809
810
811/*
812 * ubc_getsize
813 *
814 * Get the size of the file assocated with the specified vnode
815 *
816 * Parameters:	vp			The vnode whose size is of interest
817 *
818 * Returns:	0			There is no ubc_info associated with
819 *					this vnode, or the size is zero
820 *		!0			The size of the file
821 *
822 * Notes:	Using this routine, it is not possible for a caller to
823 *		successfully distinguish between a vnode associate with a zero
824 *		length file, and a vnode with no associated ubc_info.  The
825 *		caller therefore needs to not care, or needs to ensure that
826 *		they have previously successfully called ubc_info_init() or
827 *		ubc_info_init_withsize().
828 */
829off_t
830ubc_getsize(struct vnode *vp)
831{
832	/* people depend on the side effect of this working this way
833	 * as they call this for directory
834	 */
835	if (!UBCINFOEXISTS(vp))
836		return ((off_t)0);
837	return (vp->v_ubcinfo->ui_size);
838}
839
840
841/*
842 * ubc_umount
843 *
844 * Call ubc_sync_range(vp, 0, EOF, UBC_PUSHALL) on all the vnodes for this
845 * mount point
846 *
847 * Parameters:	mp			The mount point
848 *
849 * Returns:	0			Success
850 *
851 * Notes:	There is no failure indication for this function.
852 *
853 *		This function is used in the unmount path; since it may block
854 *		I/O indefinitely, it should not be used in the forced unmount
855 *		path, since a device unavailability could also block that
856 *		indefinitely.
857 *
858 *		Because there is no device ejection interlock on USB, FireWire,
859 *		or similar devices, it's possible that an ejection that begins
860 *		subsequent to the vnode_iterate() completing, either on one of
861 *		those devices, or a network mount for which the server quits
862 *		responding, etc., may cause the caller to block indefinitely.
863 */
864__private_extern__ int
865ubc_umount(struct mount *mp)
866{
867	vnode_iterate(mp, 0, ubc_umcallback, 0);
868	return(0);
869}
870
871
872/*
873 * ubc_umcallback
874 *
875 * Used by ubc_umount() as an internal implementation detail; see ubc_umount()
876 * and vnode_iterate() for details of implementation.
877 */
878static int
879ubc_umcallback(vnode_t vp, __unused void * args)
880{
881
882	if (UBCINFOEXISTS(vp)) {
883
884		(void) ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL, UBC_PUSHALL);
885	}
886	return (VNODE_RETURNED);
887}
888
889
890/*
891 * ubc_getcred
892 *
893 * Get the credentials currently active for the ubc_info associated with the
894 * vnode.
895 *
896 * Parameters:	vp			The vnode whose ubc_info credentials
897 *					are to be retrieved
898 *
899 * Returns:	!NOCRED			The credentials
900 *		NOCRED			If there is no ubc_info for the vnode,
901 *					or if there is one, but it has not had
902 *					any credentials associated with it via
903 *					a call to ubc_setcred()
904 */
905kauth_cred_t
906ubc_getcred(struct vnode *vp)
907{
908        if (UBCINFOEXISTS(vp))
909	        return (vp->v_ubcinfo->ui_ucred);
910
911	return (NOCRED);
912}
913
914
915/*
916 * ubc_setthreadcred
917 *
918 * If they are not already set, set the credentials of the ubc_info structure
919 * associated with the vnode to those of the supplied thread; otherwise leave
920 * them alone.
921 *
922 * Parameters:	vp			The vnode whose ubc_info creds are to
923 *					be set
924 *		p			The process whose credentials are to
925 *					be used, if not running on an assumed
926 *					credential
927 *		thread			The thread whose credentials are to
928 *					be used
929 *
930 * Returns:	1			This vnode has no associated ubc_info
931 *		0			Success
932 *
933 * Notes:	This function takes a proc parameter to account for bootstrap
934 *		issues where a task or thread may call this routine, either
935 *		before credentials have been initialized by bsd_init(), or if
936 *		there is no BSD info asscoiate with a mach thread yet.  This
937 *		is known to happen in both the initial swap and memory mapping
938 *		calls.
939 *
940 *		This function is generally used only in the following cases:
941 *
942 *		o	a memory mapped file via the mmap() system call
943 *		o	a memory mapped file via the deprecated map_fd() call
944 *		o	a swap store backing file
945 *		o	subsequent to a successful write via vn_write()
946 *
947 *		The information is then used by the NFS client in order to
948 *		cons up a wire message in either the page-in or page-out path.
949 *
950 *		There are two potential problems with the use of this API:
951 *
952 *		o	Because the write path only set it on a successful
953 *			write, there is a race window between setting the
954 *			credential and its use to evict the pages to the
955 *			remote file server
956 *
957 *		o	Because a page-in may occur prior to a write, the
958 *			credential may not be set at this time, if the page-in
959 *			is not the result of a mapping established via mmap()
960 *			or map_fd().
961 *
962 *		In both these cases, this will be triggered from the paging
963 *		path, which will instead use the credential of the current
964 *		process, which in this case is either the dynamic_pager or
965 *		the kernel task, both of which utilize "root" credentials.
966 *
967 *		This may potentially permit operations to occur which should
968 *		be denied, or it may cause to be denied operations which
969 *		should be permitted, depending on the configuration of the NFS
970 *		server.
971 */
972int
973ubc_setthreadcred(struct vnode *vp, proc_t p, thread_t thread)
974{
975	struct ubc_info *uip;
976	kauth_cred_t credp;
977	struct uthread  *uthread = get_bsdthread_info(thread);
978
979	if (!UBCINFOEXISTS(vp))
980		return (1);
981
982	vnode_lock(vp);
983
984	uip = vp->v_ubcinfo;
985	credp = uip->ui_ucred;
986
987	if (!IS_VALID_CRED(credp)) {
988		/* use per-thread cred, if assumed identity, else proc cred */
989		if (uthread == NULL || (uthread->uu_flag & UT_SETUID) == 0) {
990			uip->ui_ucred = kauth_cred_proc_ref(p);
991		} else {
992			uip->ui_ucred = uthread->uu_ucred;
993			kauth_cred_ref(uip->ui_ucred);
994		}
995	}
996	vnode_unlock(vp);
997
998	return (0);
999}
1000
1001
1002/*
1003 * ubc_setcred
1004 *
1005 * If they are not already set, set the credentials of the ubc_info structure
1006 * associated with the vnode to those of the process; otherwise leave them
1007 * alone.
1008 *
1009 * Parameters:	vp			The vnode whose ubc_info creds are to
1010 *					be set
1011 *		p			The process whose credentials are to
1012 *					be used
1013 *
1014 * Returns:	0			This vnode has no associated ubc_info
1015 *		1			Success
1016 *
1017 * Notes:	The return values for this function are inverted from nearly
1018 *		all other uses in the kernel.
1019 *
1020 *		See also ubc_setthreadcred(), above.
1021 *
1022 *		This function is considered deprecated, and generally should
1023 *		not be used, as it is incompatible with per-thread credentials;
1024 *		it exists for legacy KPI reasons.
1025 *
1026 * DEPRECATION:	ubc_setcred() is being deprecated. Please use
1027 *		ubc_setthreadcred() instead.
1028 */
1029int
1030ubc_setcred(struct vnode *vp, proc_t p)
1031{
1032	struct ubc_info *uip;
1033	kauth_cred_t credp;
1034
1035	/* If there is no ubc_info, deny the operation */
1036	if ( !UBCINFOEXISTS(vp))
1037		return (0);
1038
1039	/*
1040	 * Check to see if there is already a credential reference in the
1041	 * ubc_info; if there is not, take one on the supplied credential.
1042	 */
1043	vnode_lock(vp);
1044	uip = vp->v_ubcinfo;
1045	credp = uip->ui_ucred;
1046	if (!IS_VALID_CRED(credp)) {
1047		uip->ui_ucred = kauth_cred_proc_ref(p);
1048	}
1049	vnode_unlock(vp);
1050
1051	return (1);
1052}
1053
1054/*
1055 * ubc_getpager
1056 *
1057 * Get the pager associated with the ubc_info associated with the vnode.
1058 *
1059 * Parameters:	vp			The vnode to obtain the pager from
1060 *
1061 * Returns:	!VNODE_PAGER_NULL	The memory_object_t for the pager
1062 *		VNODE_PAGER_NULL	There is no ubc_info for this vnode
1063 *
1064 * Notes:	For each vnode that has a ubc_info associated with it, that
1065 *		ubc_info SHALL have a pager associated with it, so in the
1066 *		normal case, it's impossible to return VNODE_PAGER_NULL for
1067 *		a vnode with an associated ubc_info.
1068 */
1069__private_extern__ memory_object_t
1070ubc_getpager(struct vnode *vp)
1071{
1072        if (UBCINFOEXISTS(vp))
1073	        return (vp->v_ubcinfo->ui_pager);
1074
1075	return (0);
1076}
1077
1078
1079/*
1080 * ubc_getobject
1081 *
1082 * Get the memory object control associated with the ubc_info associated with
1083 * the vnode
1084 *
1085 * Parameters:	vp			The vnode to obtain the memory object
1086 *					from
1087 *		flags			DEPRECATED
1088 *
1089 * Returns:	!MEMORY_OBJECT_CONTROL_NULL
1090 *		MEMORY_OBJECT_CONTROL_NULL
1091 *
1092 * Notes:	Historically, if the flags were not "do not reactivate", this
1093 *		function would look up the memory object using the pager if
1094 *		it did not exist (this could be the case if the vnode had
1095 *		been previously reactivated).  The flags would also permit a
1096 *		hold to be requested, which would have created an object
1097 *		reference, if one had not already existed.  This usage is
1098 *		deprecated, as it would permit a race between finding and
1099 *		taking the reference vs. a single reference being dropped in
1100 *		another thread.
1101 */
1102memory_object_control_t
1103ubc_getobject(struct vnode *vp, __unused int flags)
1104{
1105        if (UBCINFOEXISTS(vp))
1106	        return((vp->v_ubcinfo->ui_control));
1107
1108	return (MEMORY_OBJECT_CONTROL_NULL);
1109}
1110
1111boolean_t
1112ubc_strict_uncached_IO(struct vnode *vp)
1113{
1114        boolean_t result = FALSE;
1115
1116	if (UBCINFOEXISTS(vp)) {
1117	        result = memory_object_is_slid(vp->v_ubcinfo->ui_control);
1118	}
1119	return result;
1120}
1121
1122/*
1123 * ubc_blktooff
1124 *
1125 * Convert a given block number to a memory backing object (file) offset for a
1126 * given vnode
1127 *
1128 * Parameters:	vp			The vnode in which the block is located
1129 *		blkno			The block number to convert
1130 *
1131 * Returns:	!-1			The offset into the backing object
1132 *		-1			There is no ubc_info associated with
1133 *					the vnode
1134 *		-1			An error occurred in the underlying VFS
1135 *					while translating the block to an
1136 *					offset; the most likely cause is that
1137 *					the caller specified a block past the
1138 *					end of the file, but this could also be
1139 *					any other error from VNOP_BLKTOOFF().
1140 *
1141 * Note:	Representing the error in band loses some information, but does
1142 *		not occlude a valid offset, since an off_t of -1 is normally
1143 *		used to represent EOF.  If we had a more reliable constant in
1144 *		our header files for it (i.e. explicitly cast to an off_t), we
1145 *		would use it here instead.
1146 */
1147off_t
1148ubc_blktooff(vnode_t vp, daddr64_t blkno)
1149{
1150	off_t file_offset = -1;
1151	int error;
1152
1153	if (UBCINFOEXISTS(vp)) {
1154		error = VNOP_BLKTOOFF(vp, blkno, &file_offset);
1155		if (error)
1156			file_offset = -1;
1157	}
1158
1159	return (file_offset);
1160}
1161
1162
1163/*
1164 * ubc_offtoblk
1165 *
1166 * Convert a given offset in a memory backing object into a block number for a
1167 * given vnode
1168 *
1169 * Parameters:	vp			The vnode in which the offset is
1170 *					located
1171 *		offset			The offset into the backing object
1172 *
1173 * Returns:	!-1			The returned block number
1174 *		-1			There is no ubc_info associated with
1175 *					the vnode
1176 *		-1			An error occurred in the underlying VFS
1177 *					while translating the block to an
1178 *					offset; the most likely cause is that
1179 *					the caller specified a block past the
1180 *					end of the file, but this could also be
1181 *					any other error from VNOP_OFFTOBLK().
1182 *
1183 * Note:	Representing the error in band loses some information, but does
1184 *		not occlude a valid block number, since block numbers exceed
1185 *		the valid range for offsets, due to their relative sizes.  If
1186 *		we had a more reliable constant than -1 in our header files
1187 *		for it (i.e. explicitly cast to an daddr64_t), we would use it
1188 *		here instead.
1189 */
1190daddr64_t
1191ubc_offtoblk(vnode_t vp, off_t offset)
1192{
1193	daddr64_t blkno = -1;
1194	int error = 0;
1195
1196	if (UBCINFOEXISTS(vp)) {
1197		error = VNOP_OFFTOBLK(vp, offset, &blkno);
1198		if (error)
1199			blkno = -1;
1200	}
1201
1202	return (blkno);
1203}
1204
1205
1206/*
1207 * ubc_pages_resident
1208 *
1209 * Determine whether or not a given vnode has pages resident via the memory
1210 * object control associated with the ubc_info associated with the vnode
1211 *
1212 * Parameters:	vp			The vnode we want to know about
1213 *
1214 * Returns:	1			Yes
1215 *		0			No
1216 */
1217int
1218ubc_pages_resident(vnode_t vp)
1219{
1220	kern_return_t		kret;
1221	boolean_t			has_pages_resident;
1222
1223	if (!UBCINFOEXISTS(vp))
1224		return (0);
1225
1226	/*
1227	 * The following call may fail if an invalid ui_control is specified,
1228	 * or if there is no VM object associated with the control object.  In
1229	 * either case, reacting to it as if there were no pages resident will
1230	 * result in correct behavior.
1231	 */
1232	kret = memory_object_pages_resident(vp->v_ubcinfo->ui_control, &has_pages_resident);
1233
1234	if (kret != KERN_SUCCESS)
1235		return (0);
1236
1237	if (has_pages_resident == TRUE)
1238		return (1);
1239
1240	return (0);
1241}
1242
1243
1244/*
1245 * ubc_sync_range
1246 *
1247 * Clean and/or invalidate a range in the memory object that backs this vnode
1248 *
1249 * Parameters:	vp			The vnode whose associated ubc_info's
1250 *					associated memory object is to have a
1251 *					range invalidated within it
1252 *		beg_off			The start of the range, as an offset
1253 *		end_off			The end of the range, as an offset
1254 *		flags			See ubc_msync_internal()
1255 *
1256 * Returns:	1			Success
1257 *		0			Failure
1258 *
1259 * Notes:	see ubc_msync_internal() for more detailed information.
1260 *
1261 * DEPRECATED:	This interface is obsolete due to a failure to return error
1262 *		information needed in order to correct failures.  The currently
1263 *		recommended interface is ubc_msync().
1264 */
1265int
1266ubc_sync_range(vnode_t vp, off_t beg_off, off_t end_off, int flags)
1267{
1268        return (ubc_msync_internal(vp, beg_off, end_off, NULL, flags, NULL));
1269}
1270
1271
1272/*
1273 * ubc_msync
1274 *
1275 * Clean and/or invalidate a range in the memory object that backs this vnode
1276 *
1277 * Parameters:	vp			The vnode whose associated ubc_info's
1278 *					associated memory object is to have a
1279 *					range invalidated within it
1280 *		beg_off			The start of the range, as an offset
1281 *		end_off			The end of the range, as an offset
1282 *		resid_off		The address of an off_t supplied by the
1283 *					caller; may be set to NULL to ignore
1284 *		flags			See ubc_msync_internal()
1285 *
1286 * Returns:	0			Success
1287 *		!0			Failure; an errno is returned
1288 *
1289 * Implicit Returns:
1290 *		*resid_off, modified	If non-NULL, the  contents are ALWAYS
1291 *					modified; they are initialized to the
1292 *					beg_off, and in case of an I/O error,
1293 *					the difference between beg_off and the
1294 *					current value will reflect what was
1295 *					able to be written before the error
1296 *					occurred.  If no error is returned, the
1297 *					value of the resid_off is undefined; do
1298 *					NOT use it in place of end_off if you
1299 *					intend to increment from the end of the
1300 *					last call and call iteratively.
1301 *
1302 * Notes:	see ubc_msync_internal() for more detailed information.
1303 *
1304 */
1305errno_t
1306ubc_msync(vnode_t vp, off_t beg_off, off_t end_off, off_t *resid_off, int flags)
1307{
1308        int retval;
1309	int io_errno = 0;
1310
1311	if (resid_off)
1312	        *resid_off = beg_off;
1313
1314        retval = ubc_msync_internal(vp, beg_off, end_off, resid_off, flags, &io_errno);
1315
1316	if (retval == 0 && io_errno == 0)
1317	        return (EINVAL);
1318	return (io_errno);
1319}
1320
1321
1322/*
1323 * Clean and/or invalidate a range in the memory object that backs this vnode
1324 *
1325 * Parameters:	vp			The vnode whose associated ubc_info's
1326 *					associated memory object is to have a
1327 *					range invalidated within it
1328 *		beg_off			The start of the range, as an offset
1329 *		end_off			The end of the range, as an offset
1330 *		resid_off		The address of an off_t supplied by the
1331 *					caller; may be set to NULL to ignore
1332 *		flags			MUST contain at least one of the flags
1333 *					UBC_INVALIDATE, UBC_PUSHDIRTY, or
1334 *					UBC_PUSHALL; if UBC_PUSHDIRTY is used,
1335 *					UBC_SYNC may also be specified to cause
1336 *					this function to block until the
1337 *					operation is complete.  The behavior
1338 *					of UBC_SYNC is otherwise undefined.
1339 *		io_errno		The address of an int to contain the
1340 *					errno from a failed I/O operation, if
1341 *					one occurs; may be set to NULL to
1342 *					ignore
1343 *
1344 * Returns:	1			Success
1345 *		0			Failure
1346 *
1347 * Implicit Returns:
1348 *		*resid_off, modified	The contents of this offset MAY be
1349 *					modified; in case of an I/O error, the
1350 *					difference between beg_off and the
1351 *					current value will reflect what was
1352 *					able to be written before the error
1353 *					occurred.
1354 *		*io_errno, modified	The contents of this offset are set to
1355 *					an errno, if an error occurs; if the
1356 *					caller supplies an io_errno parameter,
1357 *					they should be careful to initialize it
1358 *					to 0 before calling this function to
1359 *					enable them to distinguish an error
1360 *					with a valid *resid_off from an invalid
1361 *					one, and to avoid potentially falsely
1362 *					reporting an error, depending on use.
1363 *
1364 * Notes:	If there is no ubc_info associated with the vnode supplied,
1365 *		this function immediately returns success.
1366 *
1367 *		If the value of end_off is less than or equal to beg_off, this
1368 *		function immediately returns success; that is, end_off is NOT
1369 *		inclusive.
1370 *
1371 *		IMPORTANT: one of the flags UBC_INVALIDATE, UBC_PUSHDIRTY, or
1372 *		UBC_PUSHALL MUST be specified; that is, it is NOT possible to
1373 *		attempt to block on in-progress I/O by calling this function
1374 *		with UBC_PUSHDIRTY, and then later call it with just UBC_SYNC
1375 *		in order to block pending on the I/O already in progress.
1376 *
1377 *		The start offset is truncated to the page boundary and the
1378 *		size is adjusted to include the last page in the range; that
1379 *		is, end_off on exactly a page boundary will not change if it
1380 *		is rounded, and the range of bytes written will be from the
1381 *		truncate beg_off to the rounded (end_off - 1).
1382 */
1383static int
1384ubc_msync_internal(vnode_t vp, off_t beg_off, off_t end_off, off_t *resid_off, int flags, int *io_errno)
1385{
1386	memory_object_size_t	tsize;
1387	kern_return_t		kret;
1388	int request_flags = 0;
1389	int flush_flags   = MEMORY_OBJECT_RETURN_NONE;
1390
1391	if ( !UBCINFOEXISTS(vp))
1392	        return (0);
1393	if ((flags & (UBC_INVALIDATE | UBC_PUSHDIRTY | UBC_PUSHALL)) == 0)
1394	        return (0);
1395	if (end_off <= beg_off)
1396	        return (1);
1397
1398	if (flags & UBC_INVALIDATE)
1399	        /*
1400		 * discard the resident pages
1401		 */
1402		request_flags = (MEMORY_OBJECT_DATA_FLUSH | MEMORY_OBJECT_DATA_NO_CHANGE);
1403
1404	if (flags & UBC_SYNC)
1405	        /*
1406		 * wait for all the I/O to complete before returning
1407		 */
1408	        request_flags |= MEMORY_OBJECT_IO_SYNC;
1409
1410	if (flags & UBC_PUSHDIRTY)
1411	        /*
1412		 * we only return the dirty pages in the range
1413		 */
1414	        flush_flags = MEMORY_OBJECT_RETURN_DIRTY;
1415
1416	if (flags & UBC_PUSHALL)
1417	        /*
1418		 * then return all the interesting pages in the range (both
1419		 * dirty and precious) to the pager
1420		 */
1421	        flush_flags = MEMORY_OBJECT_RETURN_ALL;
1422
1423	beg_off = trunc_page_64(beg_off);
1424	end_off = round_page_64(end_off);
1425	tsize   = (memory_object_size_t)end_off - beg_off;
1426
1427	/* flush and/or invalidate pages in the range requested */
1428	kret = memory_object_lock_request(vp->v_ubcinfo->ui_control,
1429					  beg_off, tsize,
1430					  (memory_object_offset_t *)resid_off,
1431					  io_errno, flush_flags, request_flags,
1432					  VM_PROT_NO_CHANGE);
1433
1434	return ((kret == KERN_SUCCESS) ? 1 : 0);
1435}
1436
1437
1438/*
1439 * ubc_msync_internal
1440 *
1441 * Explicitly map a vnode that has an associate ubc_info, and add a reference
1442 * to it for the ubc system, if there isn't one already, so it will not be
1443 * recycled while it's in use, and set flags on the ubc_info to indicate that
1444 * we have done this
1445 *
1446 * Parameters:	vp			The vnode to map
1447 *		flags			The mapping flags for the vnode; this
1448 *					will be a combination of one or more of
1449 *					PROT_READ, PROT_WRITE, and PROT_EXEC
1450 *
1451 * Returns:	0			Success
1452 *		EPERM			Permission was denied
1453 *
1454 * Notes:	An I/O reference on the vnode must already be held on entry
1455 *
1456 *		If there is no ubc_info associated with the vnode, this function
1457 *		will return success.
1458 *
1459 *		If a permission error occurs, this function will return
1460 *		failure; all other failures will cause this function to return
1461 *		success.
1462 *
1463 *		IMPORTANT: This is an internal use function, and its symbols
1464 *		are not exported, hence its error checking is not very robust.
1465 *		It is primarily used by:
1466 *
1467 *		o	mmap(), when mapping a file
1468 *		o	The deprecated map_fd() interface, when mapping a file
1469 *		o	When mapping a shared file (a shared library in the
1470 *			shared segment region)
1471 *		o	When loading a program image during the exec process
1472 *
1473 *		...all of these uses ignore the return code, and any fault that
1474 *		results later because of a failure is handled in the fix-up path
1475 *		of the fault handler.  The interface exists primarily as a
1476 *		performance hint.
1477 *
1478 *		Given that third party implementation of the type of interfaces
1479 *		that would use this function, such as alternative executable
1480 *		formats, etc., are unsupported, this function is not exported
1481 *		for general use.
1482 *
1483 *		The extra reference is held until the VM system unmaps the
1484 *		vnode from its own context to maintain a vnode reference in
1485 *		cases like open()/mmap()/close(), which leave the backing
1486 *		object referenced by a mapped memory region in a process
1487 *		address space.
1488 */
1489__private_extern__ int
1490ubc_map(vnode_t vp, int flags)
1491{
1492	struct ubc_info *uip;
1493	int error = 0;
1494	int need_ref = 0;
1495	int need_wakeup = 0;
1496
1497	if (UBCINFOEXISTS(vp)) {
1498
1499		vnode_lock(vp);
1500		uip = vp->v_ubcinfo;
1501
1502		while (ISSET(uip->ui_flags, UI_MAPBUSY)) {
1503			SET(uip->ui_flags, UI_MAPWAITING);
1504			(void) msleep(&uip->ui_flags, &vp->v_lock,
1505				      PRIBIO, "ubc_map", NULL);
1506		}
1507		SET(uip->ui_flags, UI_MAPBUSY);
1508		vnode_unlock(vp);
1509
1510		error = VNOP_MMAP(vp, flags, vfs_context_current());
1511
1512		if (error != EPERM)
1513		        error = 0;
1514
1515		vnode_lock_spin(vp);
1516
1517		if (error == 0) {
1518			if ( !ISSET(uip->ui_flags, UI_ISMAPPED))
1519			        need_ref = 1;
1520			SET(uip->ui_flags, (UI_WASMAPPED | UI_ISMAPPED));
1521		}
1522		CLR(uip->ui_flags, UI_MAPBUSY);
1523
1524		if (ISSET(uip->ui_flags, UI_MAPWAITING)) {
1525			CLR(uip->ui_flags, UI_MAPWAITING);
1526			need_wakeup = 1;
1527		}
1528		vnode_unlock(vp);
1529
1530		if (need_wakeup)
1531			wakeup(&uip->ui_flags);
1532
1533		if (need_ref)
1534			vnode_ref(vp);
1535	}
1536	return (error);
1537}
1538
1539
1540/*
1541 * ubc_destroy_named
1542 *
1543 * Destroy the named memory object associated with the ubc_info control object
1544 * associated with the designated vnode, if there is a ubc_info associated
1545 * with the vnode, and a control object is associated with it
1546 *
1547 * Parameters:	vp			The designated vnode
1548 *
1549 * Returns:	(void)
1550 *
1551 * Notes:	This function is called on vnode termination for all vnodes,
1552 *		and must therefore not assume that there is a ubc_info that is
1553 *		associated with the vnode, nor that there is a control object
1554 *		associated with the ubc_info.
1555 *
1556 *		If all the conditions necessary are present, this function
1557 *		calls memory_object_destory(), which will in turn end up
1558 *		calling ubc_unmap() to release any vnode references that were
1559 *		established via ubc_map().
1560 *
1561 *		IMPORTANT: This is an internal use function that is used
1562 *		exclusively by the internal use function vclean().
1563 */
1564__private_extern__ void
1565ubc_destroy_named(vnode_t vp)
1566{
1567	memory_object_control_t control;
1568	struct ubc_info *uip;
1569	kern_return_t kret;
1570
1571	if (UBCINFOEXISTS(vp)) {
1572	        uip = vp->v_ubcinfo;
1573
1574		/* Terminate the memory object  */
1575		control = ubc_getobject(vp, UBC_HOLDOBJECT);
1576		if (control != MEMORY_OBJECT_CONTROL_NULL) {
1577		        kret = memory_object_destroy(control, 0);
1578			if (kret != KERN_SUCCESS)
1579			        panic("ubc_destroy_named: memory_object_destroy failed");
1580		}
1581	}
1582}
1583
1584
1585/*
1586 * ubc_isinuse
1587 *
1588 * Determine whether or not a vnode is currently in use by ubc at a level in
1589 * excess of the requested busycount
1590 *
1591 * Parameters:	vp			The vnode to check
1592 *		busycount		The threshold busy count, used to bias
1593 *					the count usually already held by the
1594 *					caller to avoid races
1595 *
1596 * Returns:	1			The vnode is in use over the threshold
1597 *		0			The vnode is not in use over the
1598 *					threshold
1599 *
1600 * Notes:	Because the vnode is only held locked while actually asking
1601 *		the use count, this function only represents a snapshot of the
1602 *		current state of the vnode.  If more accurate information is
1603 *		required, an additional busycount should be held by the caller
1604 *		and a non-zero busycount used.
1605 *
1606 *		If there is no ubc_info associated with the vnode, this
1607 *		function will report that the vnode is not in use by ubc.
1608 */
1609int
1610ubc_isinuse(struct vnode *vp, int busycount)
1611{
1612	if ( !UBCINFOEXISTS(vp))
1613		return (0);
1614	return(ubc_isinuse_locked(vp, busycount, 0));
1615}
1616
1617
1618/*
1619 * ubc_isinuse_locked
1620 *
1621 * Determine whether or not a vnode is currently in use by ubc at a level in
1622 * excess of the requested busycount
1623 *
1624 * Parameters:	vp			The vnode to check
1625 *		busycount		The threshold busy count, used to bias
1626 *					the count usually already held by the
1627 *					caller to avoid races
1628 *		locked			True if the vnode is already locked by
1629 *					the caller
1630 *
1631 * Returns:	1			The vnode is in use over the threshold
1632 *		0			The vnode is not in use over the
1633 *					threshold
1634 *
1635 * Notes:	If the vnode is not locked on entry, it is locked while
1636 *		actually asking the use count.  If this is the case, this
1637 *		function only represents a snapshot of the current state of
1638 *		the vnode.  If more accurate information is required, the
1639 *		vnode lock should be held by the caller, otherwise an
1640 *		additional busycount should be held by the caller and a
1641 *		non-zero busycount used.
1642 *
1643 *		If there is no ubc_info associated with the vnode, this
1644 *		function will report that the vnode is not in use by ubc.
1645 */
1646int
1647ubc_isinuse_locked(struct vnode *vp, int busycount, int locked)
1648{
1649	int retval = 0;
1650
1651
1652	if (!locked)
1653		vnode_lock_spin(vp);
1654
1655	if ((vp->v_usecount - vp->v_kusecount) > busycount)
1656		retval = 1;
1657
1658	if (!locked)
1659		vnode_unlock(vp);
1660	return (retval);
1661}
1662
1663
1664/*
1665 * ubc_unmap
1666 *
1667 * Reverse the effects of a ubc_map() call for a given vnode
1668 *
1669 * Parameters:	vp			vnode to unmap from ubc
1670 *
1671 * Returns:	(void)
1672 *
1673 * Notes:	This is an internal use function used by vnode_pager_unmap().
1674 *		It will attempt to obtain a reference on the supplied vnode,
1675 *		and if it can do so, and there is an associated ubc_info, and
1676 *		the flags indicate that it was mapped via ubc_map(), then the
1677 *		flag is cleared, the mapping removed, and the reference taken
1678 *		by ubc_map() is released.
1679 *
1680 *		IMPORTANT: This MUST only be called by the VM
1681 *		to prevent race conditions.
1682 */
1683__private_extern__ void
1684ubc_unmap(struct vnode *vp)
1685{
1686	struct ubc_info *uip;
1687	int	need_rele = 0;
1688	int	need_wakeup = 0;
1689
1690	if (vnode_getwithref(vp))
1691	        return;
1692
1693	if (UBCINFOEXISTS(vp)) {
1694		vnode_lock(vp);
1695		uip = vp->v_ubcinfo;
1696
1697		while (ISSET(uip->ui_flags, UI_MAPBUSY)) {
1698			SET(uip->ui_flags, UI_MAPWAITING);
1699			(void) msleep(&uip->ui_flags, &vp->v_lock,
1700				      PRIBIO, "ubc_unmap", NULL);
1701		}
1702		SET(uip->ui_flags, UI_MAPBUSY);
1703
1704		if (ISSET(uip->ui_flags, UI_ISMAPPED)) {
1705		        CLR(uip->ui_flags, UI_ISMAPPED);
1706			need_rele = 1;
1707		}
1708		vnode_unlock(vp);
1709
1710		if (need_rele) {
1711		        (void)VNOP_MNOMAP(vp, vfs_context_current());
1712		        vnode_rele(vp);
1713		}
1714
1715		vnode_lock_spin(vp);
1716
1717		CLR(uip->ui_flags, UI_MAPBUSY);
1718		if (ISSET(uip->ui_flags, UI_MAPWAITING)) {
1719			CLR(uip->ui_flags, UI_MAPWAITING);
1720			need_wakeup = 1;
1721		}
1722		vnode_unlock(vp);
1723
1724		if (need_wakeup)
1725		        wakeup(&uip->ui_flags);
1726
1727	}
1728	/*
1729	 * the drop of the vnode ref will cleanup
1730	 */
1731	vnode_put(vp);
1732}
1733
1734
1735/*
1736 * ubc_page_op
1737 *
1738 * Manipulate individual page state for a vnode with an associated ubc_info
1739 * with an associated memory object control.
1740 *
1741 * Parameters:	vp			The vnode backing the page
1742 *		f_offset		A file offset interior to the page
1743 *		ops			The operations to perform, as a bitmap
1744 *					(see below for more information)
1745 *		phys_entryp		The address of a ppnum_t; may be NULL
1746 *					to ignore
1747 *		flagsp			A pointer to an int to contain flags;
1748 *					may be NULL to ignore
1749 *
1750 * Returns:	KERN_SUCCESS		Success
1751 *		KERN_INVALID_ARGUMENT	If the memory object control has no VM
1752 *					object associated
1753 *		KERN_INVALID_OBJECT	If UPL_POP_PHYSICAL and the object is
1754 *					not physically contiguous
1755 *		KERN_INVALID_OBJECT	If !UPL_POP_PHYSICAL and the object is
1756 *					physically contiguous
1757 *		KERN_FAILURE		If the page cannot be looked up
1758 *
1759 * Implicit Returns:
1760 *		*phys_entryp (modified)	If phys_entryp is non-NULL and
1761 *					UPL_POP_PHYSICAL
1762 *		*flagsp (modified)	If flagsp is non-NULL and there was
1763 *					!UPL_POP_PHYSICAL and a KERN_SUCCESS
1764 *
1765 * Notes:	For object boundaries, it is considerably more efficient to
1766 *		ensure that f_offset is in fact on a page boundary, as this
1767 *		will avoid internal use of the hash table to identify the
1768 *		page, and would therefore skip a number of early optimizations.
1769 *		Since this is a page operation anyway, the caller should try
1770 *		to pass only a page aligned offset because of this.
1771 *
1772 *		*flagsp may be modified even if this function fails.  If it is
1773 *		modified, it will contain the condition of the page before the
1774 *		requested operation was attempted; these will only include the
1775 *		bitmap flags, and not the PL_POP_PHYSICAL, UPL_POP_DUMP,
1776 *		UPL_POP_SET, or UPL_POP_CLR bits.
1777 *
1778 *		The flags field may contain a specific operation, such as
1779 *		UPL_POP_PHYSICAL or UPL_POP_DUMP:
1780 *
1781 *		o	UPL_POP_PHYSICAL	Fail if not contiguous; if
1782 *						*phys_entryp and successful, set
1783 *						*phys_entryp
1784 *		o	UPL_POP_DUMP		Dump the specified page
1785 *
1786 *		Otherwise, it is treated as a bitmap of one or more page
1787 *		operations to perform on the final memory object; allowable
1788 *		bit values are:
1789 *
1790 *		o	UPL_POP_DIRTY		The page is dirty
1791 *		o	UPL_POP_PAGEOUT		The page is paged out
1792 *		o	UPL_POP_PRECIOUS	The page is precious
1793 *		o	UPL_POP_ABSENT		The page is absent
1794 *		o	UPL_POP_BUSY		The page is busy
1795 *
1796 *		If the page status is only being queried and not modified, then
1797 *		not other bits should be specified.  However, if it is being
1798 *		modified, exactly ONE of the following bits should be set:
1799 *
1800 *		o	UPL_POP_SET		Set the current bitmap bits
1801 *		o	UPL_POP_CLR		Clear the current bitmap bits
1802 *
1803 *		Thus to effect a combination of setting an clearing, it may be
1804 *		necessary to call this function twice.  If this is done, the
1805 *		set should be used before the clear, since clearing may trigger
1806 *		a wakeup on the destination page, and if the page is backed by
1807 *		an encrypted swap file, setting will trigger the decryption
1808 *		needed before the wakeup occurs.
1809 */
1810kern_return_t
1811ubc_page_op(
1812	struct vnode 	*vp,
1813	off_t		f_offset,
1814	int		ops,
1815	ppnum_t	*phys_entryp,
1816	int		*flagsp)
1817{
1818	memory_object_control_t		control;
1819
1820	control = ubc_getobject(vp, UBC_FLAGS_NONE);
1821	if (control == MEMORY_OBJECT_CONTROL_NULL)
1822		return KERN_INVALID_ARGUMENT;
1823
1824	return (memory_object_page_op(control,
1825				      (memory_object_offset_t)f_offset,
1826				      ops,
1827				      phys_entryp,
1828				      flagsp));
1829}
1830
1831
1832/*
1833 * ubc_range_op
1834 *
1835 * Manipulate page state for a range of memory for a vnode with an associated
1836 * ubc_info with an associated memory object control, when page level state is
1837 * not required to be returned from the call (i.e. there are no phys_entryp or
1838 * flagsp parameters to this call, and it takes a range which may contain
1839 * multiple pages, rather than an offset interior to a single page).
1840 *
1841 * Parameters:	vp			The vnode backing the page
1842 *		f_offset_beg		A file offset interior to the start page
1843 *		f_offset_end		A file offset interior to the end page
1844 *		ops			The operations to perform, as a bitmap
1845 *					(see below for more information)
1846 *		range			The address of an int; may be NULL to
1847 *					ignore
1848 *
1849 * Returns:	KERN_SUCCESS		Success
1850 *		KERN_INVALID_ARGUMENT	If the memory object control has no VM
1851 *					object associated
1852 *		KERN_INVALID_OBJECT	If the object is physically contiguous
1853 *
1854 * Implicit Returns:
1855 *		*range (modified)	If range is non-NULL, its contents will
1856 *					be modified to contain the number of
1857 *					bytes successfully operated upon.
1858 *
1859 * Notes:	IMPORTANT: This function cannot be used on a range that
1860 *		consists of physically contiguous pages.
1861 *
1862 *		For object boundaries, it is considerably more efficient to
1863 *		ensure that f_offset_beg and f_offset_end are in fact on page
1864 *		boundaries, as this will avoid internal use of the hash table
1865 *		to identify the page, and would therefore skip a number of
1866 *		early optimizations.  Since this is an operation on a set of
1867 *		pages anyway, the caller should try to pass only a page aligned
1868 *		offsets because of this.
1869 *
1870 *		*range will be modified only if this function succeeds.
1871 *
1872 *		The flags field MUST contain a specific operation; allowable
1873 *		values are:
1874 *
1875 *		o	UPL_ROP_ABSENT	Returns the extent of the range
1876 *					presented which is absent, starting
1877 *					with the start address presented
1878 *
1879 *		o	UPL_ROP_PRESENT	Returns the extent of the range
1880 *					presented which is present (resident),
1881 *					starting with the start address
1882 *					presented
1883 *		o	UPL_ROP_DUMP	Dump the pages which are found in the
1884 *					target object for the target range.
1885 *
1886 *		IMPORTANT: For UPL_ROP_ABSENT and UPL_ROP_PRESENT; if there are
1887 *		multiple regions in the range, only the first matching region
1888 *		is returned.
1889 */
1890kern_return_t
1891ubc_range_op(
1892	struct vnode 	*vp,
1893	off_t		f_offset_beg,
1894	off_t		f_offset_end,
1895	int             ops,
1896	int             *range)
1897{
1898	memory_object_control_t		control;
1899
1900	control = ubc_getobject(vp, UBC_FLAGS_NONE);
1901	if (control == MEMORY_OBJECT_CONTROL_NULL)
1902		return KERN_INVALID_ARGUMENT;
1903
1904	return (memory_object_range_op(control,
1905				      (memory_object_offset_t)f_offset_beg,
1906				      (memory_object_offset_t)f_offset_end,
1907				      ops,
1908				      range));
1909}
1910
1911
1912/*
1913 * ubc_create_upl
1914 *
1915 * Given a vnode, cause the population of a portion of the vm_object; based on
1916 * the nature of the request, the pages returned may contain valid data, or
1917 * they may be uninitialized.
1918 *
1919 * Parameters:	vp			The vnode from which to create the upl
1920 *		f_offset		The start offset into the backing store
1921 *					represented by the vnode
1922 *		bufsize			The size of the upl to create
1923 *		uplp			Pointer to the upl_t to receive the
1924 *					created upl; MUST NOT be NULL
1925 *		plp			Pointer to receive the internal page
1926 *					list for the created upl; MAY be NULL
1927 *					to ignore
1928 *
1929 * Returns:	KERN_SUCCESS		The requested upl has been created
1930 *		KERN_INVALID_ARGUMENT	The bufsize argument is not an even
1931 *					multiple of the page size
1932 *		KERN_INVALID_ARGUMENT	There is no ubc_info associated with
1933 *					the vnode, or there is no memory object
1934 *					control associated with the ubc_info
1935 *	memory_object_upl_request:KERN_INVALID_VALUE
1936 *					The supplied upl_flags argument is
1937 *					invalid
1938 * Implicit Returns:
1939 *		*uplp (modified)
1940 *		*plp (modified)		If non-NULL, the value of *plp will be
1941 *					modified to point to the internal page
1942 *					list; this modification may occur even
1943 *					if this function is unsuccessful, in
1944 *					which case the contents may be invalid
1945 *
1946 * Note:	If successful, the returned *uplp MUST subsequently be freed
1947 *		via a call to ubc_upl_commit(), ubc_upl_commit_range(),
1948 *		ubc_upl_abort(), or ubc_upl_abort_range().
1949 */
1950kern_return_t
1951ubc_create_upl(
1952	struct vnode	*vp,
1953	off_t 		f_offset,
1954	int		bufsize,
1955	upl_t		*uplp,
1956	upl_page_info_t	**plp,
1957	int		uplflags)
1958{
1959	memory_object_control_t		control;
1960	kern_return_t			kr;
1961
1962	if (plp != NULL)
1963		*plp = NULL;
1964	*uplp = NULL;
1965
1966	if (bufsize & 0xfff)
1967		return KERN_INVALID_ARGUMENT;
1968
1969	if (bufsize > MAX_UPL_SIZE * PAGE_SIZE)
1970		return KERN_INVALID_ARGUMENT;
1971
1972	if (uplflags & (UPL_UBC_MSYNC | UPL_UBC_PAGEOUT | UPL_UBC_PAGEIN)) {
1973
1974		if (uplflags & UPL_UBC_MSYNC) {
1975			uplflags &= UPL_RET_ONLY_DIRTY;
1976
1977			uplflags |= UPL_COPYOUT_FROM | UPL_CLEAN_IN_PLACE |
1978				    UPL_SET_INTERNAL | UPL_SET_LITE;
1979
1980		} else if (uplflags & UPL_UBC_PAGEOUT) {
1981			uplflags &= UPL_RET_ONLY_DIRTY;
1982
1983			if (uplflags & UPL_RET_ONLY_DIRTY)
1984				uplflags |= UPL_NOBLOCK;
1985
1986			uplflags |= UPL_FOR_PAGEOUT | UPL_CLEAN_IN_PLACE |
1987                                    UPL_COPYOUT_FROM | UPL_SET_INTERNAL | UPL_SET_LITE;
1988		} else {
1989			uplflags |= UPL_RET_ONLY_ABSENT |
1990				    UPL_NO_SYNC | UPL_CLEAN_IN_PLACE |
1991				    UPL_SET_INTERNAL | UPL_SET_LITE;
1992
1993			/*
1994			 * if the requested size == PAGE_SIZE, we don't want to set
1995			 * the UPL_NOBLOCK since we may be trying to recover from a
1996			 * previous partial pagein I/O that occurred because we were low
1997			 * on memory and bailed early in order to honor the UPL_NOBLOCK...
1998			 * since we're only asking for a single page, we can block w/o fear
1999			 * of tying up pages while waiting for more to become available
2000			 */
2001			if (bufsize > PAGE_SIZE)
2002				uplflags |= UPL_NOBLOCK;
2003		}
2004	} else {
2005		uplflags &= ~UPL_FOR_PAGEOUT;
2006
2007		if (uplflags & UPL_WILL_BE_DUMPED) {
2008			uplflags &= ~UPL_WILL_BE_DUMPED;
2009			uplflags |= (UPL_NO_SYNC|UPL_SET_INTERNAL);
2010		} else
2011			uplflags |= (UPL_NO_SYNC|UPL_CLEAN_IN_PLACE|UPL_SET_INTERNAL);
2012	}
2013	control = ubc_getobject(vp, UBC_FLAGS_NONE);
2014	if (control == MEMORY_OBJECT_CONTROL_NULL)
2015		return KERN_INVALID_ARGUMENT;
2016
2017	kr = memory_object_upl_request(control, f_offset, bufsize, uplp, NULL, NULL, uplflags);
2018
2019	if (kr == KERN_SUCCESS && plp != NULL)
2020		*plp = UPL_GET_INTERNAL_PAGE_LIST(*uplp);
2021	return kr;
2022}
2023
2024
2025/*
2026 * ubc_upl_maxbufsize
2027 *
2028 * Return the maximum bufsize ubc_create_upl( ) will take.
2029 *
2030 * Parameters:	none
2031 *
2032 * Returns:	maximum size buffer (in bytes) ubc_create_upl( ) will take.
2033 */
2034upl_size_t
2035ubc_upl_maxbufsize(
2036	void)
2037{
2038	return(MAX_UPL_SIZE * PAGE_SIZE);
2039}
2040
2041/*
2042 * ubc_upl_map
2043 *
2044 * Map the page list assocated with the supplied upl into the kernel virtual
2045 * address space at the virtual address indicated by the dst_addr argument;
2046 * the entire upl is mapped
2047 *
2048 * Parameters:	upl			The upl to map
2049 *		dst_addr		The address at which to map the upl
2050 *
2051 * Returns:	KERN_SUCCESS		The upl has been mapped
2052 *		KERN_INVALID_ARGUMENT	The upl is UPL_NULL
2053 *		KERN_FAILURE		The upl is already mapped
2054 *	vm_map_enter:KERN_INVALID_ARGUMENT
2055 *					A failure code from vm_map_enter() due
2056 *					to an invalid argument
2057 */
2058kern_return_t
2059ubc_upl_map(
2060	upl_t		upl,
2061	vm_offset_t	*dst_addr)
2062{
2063	return (vm_upl_map(kernel_map, upl, dst_addr));
2064}
2065
2066
2067/*
2068 * ubc_upl_unmap
2069 *
2070 * Unmap the page list assocated with the supplied upl from the kernel virtual
2071 * address space; the entire upl is unmapped.
2072 *
2073 * Parameters:	upl			The upl to unmap
2074 *
2075 * Returns:	KERN_SUCCESS		The upl has been unmapped
2076 *		KERN_FAILURE		The upl is not currently mapped
2077 *		KERN_INVALID_ARGUMENT	If the upl is UPL_NULL
2078 */
2079kern_return_t
2080ubc_upl_unmap(
2081	upl_t	upl)
2082{
2083	return(vm_upl_unmap(kernel_map, upl));
2084}
2085
2086
2087/*
2088 * ubc_upl_commit
2089 *
2090 * Commit the contents of the upl to the backing store
2091 *
2092 * Parameters:	upl			The upl to commit
2093 *
2094 * Returns:	KERN_SUCCESS		The upl has been committed
2095 *		KERN_INVALID_ARGUMENT	The supplied upl was UPL_NULL
2096 *		KERN_FAILURE		The supplied upl does not represent
2097 *					device memory, and the offset plus the
2098 *					size would exceed the actual size of
2099 *					the upl
2100 *
2101 * Notes:	In practice, the only return value for this function should be
2102 *		KERN_SUCCESS, unless there has been data structure corruption;
2103 *		since the upl is deallocated regardless of success or failure,
2104 *		there's really nothing to do about this other than panic.
2105 *
2106 *		IMPORTANT: Use of this function should not be mixed with use of
2107 *		ubc_upl_commit_range(), due to the unconditional deallocation
2108 *		by this function.
2109 */
2110kern_return_t
2111ubc_upl_commit(
2112	upl_t 			upl)
2113{
2114	upl_page_info_t	*pl;
2115	kern_return_t 	kr;
2116
2117	pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
2118	kr = upl_commit(upl, pl, MAX_UPL_SIZE);
2119	upl_deallocate(upl);
2120	return kr;
2121}
2122
2123
2124/*
2125 * ubc_upl_commit
2126 *
2127 * Commit the contents of the specified range of the upl to the backing store
2128 *
2129 * Parameters:	upl			The upl to commit
2130 *		offset			The offset into the upl
2131 *		size			The size of the region to be committed,
2132 *					starting at the specified offset
2133 *		flags			commit type (see below)
2134 *
2135 * Returns:	KERN_SUCCESS		The range has been committed
2136 *		KERN_INVALID_ARGUMENT	The supplied upl was UPL_NULL
2137 *		KERN_FAILURE		The supplied upl does not represent
2138 *					device memory, and the offset plus the
2139 *					size would exceed the actual size of
2140 *					the upl
2141 *
2142 * Notes:	IMPORTANT: If the commit is successful, and the object is now
2143 *		empty, the upl will be deallocated.  Since the caller cannot
2144 *		check that this is the case, the UPL_COMMIT_FREE_ON_EMPTY flag
2145 *		should generally only be used when the offset is 0 and the size
2146 *		is equal to the upl size.
2147 *
2148 *		The flags argument is a bitmap of flags on the rage of pages in
2149 *		the upl to be committed; allowable flags are:
2150 *
2151 *		o	UPL_COMMIT_FREE_ON_EMPTY	Free the upl when it is
2152 *							both empty and has been
2153 *							successfully committed
2154 *		o	UPL_COMMIT_CLEAR_DIRTY		Clear each pages dirty
2155 *							bit; will prevent a
2156 *							later pageout
2157 *		o	UPL_COMMIT_SET_DIRTY		Set each pages dirty
2158 *							bit; will cause a later
2159 *							pageout
2160 *		o	UPL_COMMIT_INACTIVATE		Clear each pages
2161 *							reference bit; the page
2162 *							will not be accessed
2163 *		o	UPL_COMMIT_ALLOW_ACCESS		Unbusy each page; pages
2164 *							become busy when an
2165 *							IOMemoryDescriptor is
2166 *							mapped or redirected,
2167 *							and we have to wait for
2168 *							an IOKit driver
2169 *
2170 *		The flag UPL_COMMIT_NOTIFY_EMPTY is used internally, and should
2171 *		not be specified by the caller.
2172 *
2173 *		The UPL_COMMIT_CLEAR_DIRTY and UPL_COMMIT_SET_DIRTY flags are
2174 *		mutually exclusive, and should not be combined.
2175 */
2176kern_return_t
2177ubc_upl_commit_range(
2178	upl_t 			upl,
2179	upl_offset_t		offset,
2180	upl_size_t		size,
2181	int				flags)
2182{
2183	upl_page_info_t	*pl;
2184	boolean_t		empty;
2185	kern_return_t 	kr;
2186
2187	if (flags & UPL_COMMIT_FREE_ON_EMPTY)
2188		flags |= UPL_COMMIT_NOTIFY_EMPTY;
2189
2190	if (flags & UPL_COMMIT_KERNEL_ONLY_FLAGS) {
2191		return KERN_INVALID_ARGUMENT;
2192	}
2193
2194	pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
2195
2196	kr = upl_commit_range(upl, offset, size, flags,
2197						  pl, MAX_UPL_SIZE, &empty);
2198
2199	if((flags & UPL_COMMIT_FREE_ON_EMPTY) && empty)
2200		upl_deallocate(upl);
2201
2202	return kr;
2203}
2204
2205
2206/*
2207 * ubc_upl_abort_range
2208 *
2209 * Abort the contents of the specified range of the specified upl
2210 *
2211 * Parameters:	upl			The upl to abort
2212 *		offset			The offset into the upl
2213 *		size			The size of the region to be aborted,
2214 *					starting at the specified offset
2215 *		abort_flags		abort type (see below)
2216 *
2217 * Returns:	KERN_SUCCESS		The range has been aborted
2218 *		KERN_INVALID_ARGUMENT	The supplied upl was UPL_NULL
2219 *		KERN_FAILURE		The supplied upl does not represent
2220 *					device memory, and the offset plus the
2221 *					size would exceed the actual size of
2222 *					the upl
2223 *
2224 * Notes:	IMPORTANT: If the abort is successful, and the object is now
2225 *		empty, the upl will be deallocated.  Since the caller cannot
2226 *		check that this is the case, the UPL_ABORT_FREE_ON_EMPTY flag
2227 *		should generally only be used when the offset is 0 and the size
2228 *		is equal to the upl size.
2229 *
2230 *		The abort_flags argument is a bitmap of flags on the range of
2231 *		pages in the upl to be aborted; allowable flags are:
2232 *
2233 *		o	UPL_ABORT_FREE_ON_EMPTY	Free the upl when it is both
2234 *						empty and has been successfully
2235 *						aborted
2236 *		o	UPL_ABORT_RESTART	The operation must be restarted
2237 *		o	UPL_ABORT_UNAVAILABLE	The pages are unavailable
2238 *		o	UPL_ABORT_ERROR		An I/O error occurred
2239 *		o	UPL_ABORT_DUMP_PAGES	Just free the pages
2240 *		o	UPL_ABORT_NOTIFY_EMPTY	RESERVED
2241 *		o	UPL_ABORT_ALLOW_ACCESS	RESERVED
2242 *
2243 *		The UPL_ABORT_NOTIFY_EMPTY is an internal use flag and should
2244 *		not be specified by the caller.  It is intended to fulfill the
2245 *		same role as UPL_COMMIT_NOTIFY_EMPTY does in the function
2246 *		ubc_upl_commit_range(), but is never referenced internally.
2247 *
2248 *		The UPL_ABORT_ALLOW_ACCESS is defined, but neither set nor
2249 *		referenced; do not use it.
2250 */
2251kern_return_t
2252ubc_upl_abort_range(
2253	upl_t			upl,
2254	upl_offset_t		offset,
2255	upl_size_t		size,
2256	int				abort_flags)
2257{
2258	kern_return_t 	kr;
2259	boolean_t		empty = FALSE;
2260
2261	if (abort_flags & UPL_ABORT_FREE_ON_EMPTY)
2262		abort_flags |= UPL_ABORT_NOTIFY_EMPTY;
2263
2264	kr = upl_abort_range(upl, offset, size, abort_flags, &empty);
2265
2266	if((abort_flags & UPL_ABORT_FREE_ON_EMPTY) && empty)
2267		upl_deallocate(upl);
2268
2269	return kr;
2270}
2271
2272
2273/*
2274 * ubc_upl_abort
2275 *
2276 * Abort the contents of the specified upl
2277 *
2278 * Parameters:	upl			The upl to abort
2279 *		abort_type		abort type (see below)
2280 *
2281 * Returns:	KERN_SUCCESS		The range has been aborted
2282 *		KERN_INVALID_ARGUMENT	The supplied upl was UPL_NULL
2283 *		KERN_FAILURE		The supplied upl does not represent
2284 *					device memory, and the offset plus the
2285 *					size would exceed the actual size of
2286 *					the upl
2287 *
2288 * Notes:	IMPORTANT: If the abort is successful, and the object is now
2289 *		empty, the upl will be deallocated.  Since the caller cannot
2290 *		check that this is the case, the UPL_ABORT_FREE_ON_EMPTY flag
2291 *		should generally only be used when the offset is 0 and the size
2292 *		is equal to the upl size.
2293 *
2294 *		The abort_type is a bitmap of flags on the range of
2295 *		pages in the upl to be aborted; allowable flags are:
2296 *
2297 *		o	UPL_ABORT_FREE_ON_EMPTY	Free the upl when it is both
2298 *						empty and has been successfully
2299 *						aborted
2300 *		o	UPL_ABORT_RESTART	The operation must be restarted
2301 *		o	UPL_ABORT_UNAVAILABLE	The pages are unavailable
2302 *		o	UPL_ABORT_ERROR		An I/O error occurred
2303 *		o	UPL_ABORT_DUMP_PAGES	Just free the pages
2304 *		o	UPL_ABORT_NOTIFY_EMPTY	RESERVED
2305 *		o	UPL_ABORT_ALLOW_ACCESS	RESERVED
2306 *
2307 *		The UPL_ABORT_NOTIFY_EMPTY is an internal use flag and should
2308 *		not be specified by the caller.  It is intended to fulfill the
2309 *		same role as UPL_COMMIT_NOTIFY_EMPTY does in the function
2310 *		ubc_upl_commit_range(), but is never referenced internally.
2311 *
2312 *		The UPL_ABORT_ALLOW_ACCESS is defined, but neither set nor
2313 *		referenced; do not use it.
2314 */
2315kern_return_t
2316ubc_upl_abort(
2317	upl_t			upl,
2318	int				abort_type)
2319{
2320	kern_return_t	kr;
2321
2322	kr = upl_abort(upl, abort_type);
2323	upl_deallocate(upl);
2324	return kr;
2325}
2326
2327
2328/*
2329 * ubc_upl_pageinfo
2330 *
2331 *  Retrieve the internal page list for the specified upl
2332 *
2333 * Parameters:	upl			The upl to obtain the page list from
2334 *
2335 * Returns:	!NULL			The (upl_page_info_t *) for the page
2336 *					list internal to the upl
2337 *		NULL			Error/no page list associated
2338 *
2339 * Notes:	IMPORTANT: The function is only valid on internal objects
2340 *		where the list request was made with the UPL_INTERNAL flag.
2341 *
2342 *		This function is a utility helper function, since some callers
2343 *		may not have direct access to the header defining the macro,
2344 *		due to abstraction layering constraints.
2345 */
2346upl_page_info_t *
2347ubc_upl_pageinfo(
2348	upl_t			upl)
2349{
2350	return (UPL_GET_INTERNAL_PAGE_LIST(upl));
2351}
2352
2353
2354int
2355UBCINFOEXISTS(struct vnode * vp)
2356{
2357        return((vp) && ((vp)->v_type == VREG) && ((vp)->v_ubcinfo != UBC_INFO_NULL));
2358}
2359
2360
2361void
2362ubc_upl_range_needed(
2363	upl_t		upl,
2364	int		index,
2365	int		count)
2366{
2367	upl_range_needed(upl, index, count);
2368}
2369
2370
2371/*
2372 * CODE SIGNING
2373 */
2374#define CS_BLOB_PAGEABLE 0
2375static volatile SInt32 cs_blob_size = 0;
2376static volatile SInt32 cs_blob_count = 0;
2377static SInt32 cs_blob_size_peak = 0;
2378static UInt32 cs_blob_size_max = 0;
2379static SInt32 cs_blob_count_peak = 0;
2380
2381int cs_validation = 1;
2382
2383#ifndef SECURE_KERNEL
2384SYSCTL_INT(_vm, OID_AUTO, cs_validation, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_validation, 0, "Do validate code signatures");
2385#endif
2386SYSCTL_INT(_vm, OID_AUTO, cs_blob_count, CTLFLAG_RD | CTLFLAG_LOCKED, (int *)(uintptr_t)&cs_blob_count, 0, "Current number of code signature blobs");
2387SYSCTL_INT(_vm, OID_AUTO, cs_blob_size, CTLFLAG_RD | CTLFLAG_LOCKED, (int *)(uintptr_t)&cs_blob_size, 0, "Current size of all code signature blobs");
2388SYSCTL_INT(_vm, OID_AUTO, cs_blob_count_peak, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_blob_count_peak, 0, "Peak number of code signature blobs");
2389SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_peak, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_blob_size_peak, 0, "Peak size of code signature blobs");
2390SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_max, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_blob_size_max, 0, "Size of biggest code signature blob");
2391
2392kern_return_t
2393ubc_cs_blob_allocate(
2394	vm_offset_t	*blob_addr_p,
2395	vm_size_t	*blob_size_p)
2396{
2397	kern_return_t	kr;
2398
2399#if CS_BLOB_PAGEABLE
2400	*blob_size_p = round_page(*blob_size_p);
2401	kr = kmem_alloc(kernel_map, blob_addr_p, *blob_size_p);
2402#else	/* CS_BLOB_PAGEABLE */
2403	*blob_addr_p = (vm_offset_t) kalloc(*blob_size_p);
2404	if (*blob_addr_p == 0) {
2405		kr = KERN_NO_SPACE;
2406	} else {
2407		kr = KERN_SUCCESS;
2408	}
2409#endif	/* CS_BLOB_PAGEABLE */
2410	return kr;
2411}
2412
2413void
2414ubc_cs_blob_deallocate(
2415	vm_offset_t	blob_addr,
2416	vm_size_t	blob_size)
2417{
2418#if CS_BLOB_PAGEABLE
2419	kmem_free(kernel_map, blob_addr, blob_size);
2420#else	/* CS_BLOB_PAGEABLE */
2421	kfree((void *) blob_addr, blob_size);
2422#endif	/* CS_BLOB_PAGEABLE */
2423}
2424
2425int
2426ubc_cs_blob_add(
2427	struct vnode	*vp,
2428	cpu_type_t	cputype,
2429	off_t		base_offset,
2430	vm_address_t	addr,
2431	vm_size_t	size)
2432{
2433	kern_return_t		kr;
2434	struct ubc_info		*uip;
2435	struct cs_blob		*blob, *oblob;
2436	int			error;
2437	ipc_port_t		blob_handle;
2438	memory_object_size_t	blob_size;
2439	const CS_CodeDirectory *cd;
2440	off_t			blob_start_offset, blob_end_offset;
2441	SHA1_CTX		sha1ctxt;
2442
2443	blob_handle = IPC_PORT_NULL;
2444
2445	blob = (struct cs_blob *) kalloc(sizeof (struct cs_blob));
2446	if (blob == NULL) {
2447		return ENOMEM;
2448	}
2449
2450#if CS_BLOB_PAGEABLE
2451	/* get a memory entry on the blob */
2452	blob_size = (memory_object_size_t) size;
2453	kr = mach_make_memory_entry_64(kernel_map,
2454				       &blob_size,
2455				       addr,
2456				       VM_PROT_READ,
2457				       &blob_handle,
2458				       IPC_PORT_NULL);
2459	if (kr != KERN_SUCCESS) {
2460		error = ENOMEM;
2461		goto out;
2462	}
2463	if (memory_object_round_page(blob_size) !=
2464	    (memory_object_size_t) round_page(size)) {
2465		printf("ubc_cs_blob_add: size mismatch 0x%llx 0x%lx !?\n",
2466		       blob_size, (size_t)size);
2467		panic("XXX FBDP size mismatch 0x%llx 0x%lx\n", blob_size, (size_t)size);
2468		error = EINVAL;
2469		goto out;
2470	}
2471#else
2472	blob_size = (memory_object_size_t) size;
2473	blob_handle = IPC_PORT_NULL;
2474#endif
2475
2476	/* fill in the new blob */
2477	blob->csb_cpu_type = cputype;
2478	blob->csb_base_offset = base_offset;
2479	blob->csb_mem_size = size;
2480	blob->csb_mem_offset = 0;
2481	blob->csb_mem_handle = blob_handle;
2482	blob->csb_mem_kaddr = addr;
2483
2484	/*
2485	 * Validate the blob's contents
2486	 */
2487	cd = findCodeDirectory(
2488		(const CS_SuperBlob *) addr,
2489		(char *) addr,
2490		(char *) addr + blob->csb_mem_size);
2491	if (cd == NULL) {
2492		/* no code directory => useless blob ! */
2493		blob->csb_flags = 0;
2494		blob->csb_start_offset = 0;
2495		blob->csb_end_offset = 0;
2496	} else {
2497		const unsigned char *sha1_base;
2498		int sha1_size;
2499
2500		blob->csb_flags = ntohl(cd->flags) | CS_VALID;
2501		blob->csb_end_offset = round_page(ntohl(cd->codeLimit));
2502		if((ntohl(cd->version) >= supportsScatter) && (ntohl(cd->scatterOffset))) {
2503			const struct Scatter *scatter = (const struct Scatter*)
2504				((const char*)cd + ntohl(cd->scatterOffset));
2505			blob->csb_start_offset = ntohl(scatter->base) * PAGE_SIZE;
2506		} else {
2507			blob->csb_start_offset = (blob->csb_end_offset -
2508						  (ntohl(cd->nCodeSlots) * PAGE_SIZE));
2509		}
2510		/* compute the blob's SHA1 hash */
2511		sha1_base = (const unsigned char *) cd;
2512		sha1_size = ntohl(cd->length);
2513#ifdef _NOTYET
2514		SHA1Init(&sha1ctxt);
2515		SHA1Update(&sha1ctxt, sha1_base, sha1_size);
2516		SHA1Final(blob->csb_sha1, &sha1ctxt);
2517#else
2518        bzero(blob->csb_sha1, 20);
2519#endif
2520	}
2521
2522	/*
2523	 * Let policy module check whether the blob's signature is accepted.
2524	 */
2525#if CONFIG_MACF
2526	error = mac_vnode_check_signature(vp, blob->csb_sha1, (void*)addr, size);
2527	if (error)
2528		goto out;
2529#endif
2530
2531	/*
2532	 * Validate the blob's coverage
2533	 */
2534	blob_start_offset = blob->csb_base_offset + blob->csb_start_offset;
2535	blob_end_offset = blob->csb_base_offset + blob->csb_end_offset;
2536
2537	if (blob_start_offset >= blob_end_offset ||
2538	    blob_start_offset < 0 ||
2539	    blob_end_offset <= 0) {
2540		/* reject empty or backwards blob */
2541		error = EINVAL;
2542		goto out;
2543	}
2544
2545	vnode_lock(vp);
2546	if (! UBCINFOEXISTS(vp)) {
2547		vnode_unlock(vp);
2548		error = ENOENT;
2549		goto out;
2550	}
2551	uip = vp->v_ubcinfo;
2552
2553	/* check if this new blob overlaps with an existing blob */
2554	for (oblob = uip->cs_blobs;
2555	     oblob != NULL;
2556	     oblob = oblob->csb_next) {
2557		 off_t oblob_start_offset, oblob_end_offset;
2558
2559		 oblob_start_offset = (oblob->csb_base_offset +
2560				       oblob->csb_start_offset);
2561		 oblob_end_offset = (oblob->csb_base_offset +
2562				     oblob->csb_end_offset);
2563		 if (blob_start_offset >= oblob_end_offset ||
2564		     blob_end_offset <= oblob_start_offset) {
2565			 /* no conflict with this existing blob */
2566		 } else {
2567			 /* conflict ! */
2568			 if (blob_start_offset == oblob_start_offset &&
2569			     blob_end_offset == oblob_end_offset &&
2570			     blob->csb_mem_size == oblob->csb_mem_size &&
2571			     blob->csb_flags == oblob->csb_flags &&
2572			     (blob->csb_cpu_type == CPU_TYPE_ANY ||
2573			      oblob->csb_cpu_type == CPU_TYPE_ANY ||
2574			      blob->csb_cpu_type == oblob->csb_cpu_type) &&
2575			     !bcmp(blob->csb_sha1,
2576				   oblob->csb_sha1,
2577				   SHA1_RESULTLEN)) {
2578				 /*
2579				  * We already have this blob:
2580				  * we'll return success but
2581				  * throw away the new blob.
2582				  */
2583				 if (oblob->csb_cpu_type == CPU_TYPE_ANY) {
2584					 /*
2585					  * The old blob matches this one
2586					  * but doesn't have any CPU type.
2587					  * Update it with whatever the caller
2588					  * provided this time.
2589					  */
2590					 oblob->csb_cpu_type = cputype;
2591				 }
2592				 vnode_unlock(vp);
2593				 error = EAGAIN;
2594				 goto out;
2595			 } else {
2596				 /* different blob: reject the new one */
2597				 vnode_unlock(vp);
2598				 error = EALREADY;
2599				 goto out;
2600			 }
2601		 }
2602
2603	}
2604
2605
2606	/* mark this vnode's VM object as having "signed pages" */
2607	kr = memory_object_signed(uip->ui_control, TRUE);
2608	if (kr != KERN_SUCCESS) {
2609		vnode_unlock(vp);
2610		error = ENOENT;
2611		goto out;
2612	}
2613
2614	/*
2615	 * Add this blob to the list of blobs for this vnode.
2616	 * We always add at the front of the list and we never remove a
2617	 * blob from the list, so ubc_cs_get_blobs() can return whatever
2618	 * the top of the list was and that list will remain valid
2619	 * while we validate a page, even after we release the vnode's lock.
2620	 */
2621	blob->csb_next = uip->cs_blobs;
2622	uip->cs_blobs = blob;
2623
2624	OSAddAtomic(+1, &cs_blob_count);
2625	if (cs_blob_count > cs_blob_count_peak) {
2626		cs_blob_count_peak = cs_blob_count; /* XXX atomic ? */
2627	}
2628	OSAddAtomic((SInt32) +blob->csb_mem_size, &cs_blob_size);
2629	if ((SInt32) cs_blob_size > cs_blob_size_peak) {
2630		cs_blob_size_peak = (SInt32) cs_blob_size; /* XXX atomic ? */
2631	}
2632	if ((UInt32) blob->csb_mem_size > cs_blob_size_max) {
2633		cs_blob_size_max = (UInt32) blob->csb_mem_size;
2634	}
2635
2636	if (cs_debug > 1) {
2637		proc_t p;
2638
2639		p = current_proc();
2640		printf("CODE SIGNING: proc %d(%s) "
2641		       "loaded %s signatures for file (%s) "
2642		       "range 0x%llx:0x%llx flags 0x%x\n",
2643		       p->p_pid, p->p_comm,
2644		       blob->csb_cpu_type == -1 ? "detached" : "embedded",
2645		       vnode_name(vp),
2646		       blob->csb_base_offset + blob->csb_start_offset,
2647		       blob->csb_base_offset + blob->csb_end_offset,
2648		       blob->csb_flags);
2649	}
2650
2651	vnode_unlock(vp);
2652
2653	error = 0;	/* success ! */
2654
2655out:
2656	if (error) {
2657		/* we failed; release what we allocated */
2658		if (blob) {
2659			kfree(blob, sizeof (*blob));
2660			blob = NULL;
2661		}
2662		if (blob_handle != IPC_PORT_NULL) {
2663			mach_memory_entry_port_release(blob_handle);
2664			blob_handle = IPC_PORT_NULL;
2665		}
2666	}
2667
2668	if (error == EAGAIN) {
2669		/*
2670		 * See above:  error is EAGAIN if we were asked
2671		 * to add an existing blob again.  We cleaned the new
2672		 * blob and we want to return success.
2673		 */
2674		error = 0;
2675		/*
2676		 * Since we're not failing, consume the data we received.
2677		 */
2678		ubc_cs_blob_deallocate(addr, size);
2679	}
2680
2681	return error;
2682}
2683
2684
2685struct cs_blob *
2686ubc_cs_blob_get(
2687	struct vnode	*vp,
2688	cpu_type_t	cputype,
2689	off_t		offset)
2690{
2691	struct ubc_info	*uip;
2692	struct cs_blob	*blob;
2693	off_t offset_in_blob;
2694
2695	vnode_lock_spin(vp);
2696
2697	if (! UBCINFOEXISTS(vp)) {
2698		blob = NULL;
2699		goto out;
2700	}
2701
2702	uip = vp->v_ubcinfo;
2703	for (blob = uip->cs_blobs;
2704	     blob != NULL;
2705	     blob = blob->csb_next) {
2706		if (cputype != -1 && blob->csb_cpu_type == cputype) {
2707			break;
2708		}
2709		if (offset != -1) {
2710			offset_in_blob = offset - blob->csb_base_offset;
2711			if (offset_in_blob >= blob->csb_start_offset &&
2712			    offset_in_blob < blob->csb_end_offset) {
2713				/* our offset is covered by this blob */
2714				break;
2715			}
2716		}
2717	}
2718
2719out:
2720	vnode_unlock(vp);
2721
2722	return blob;
2723}
2724
2725static void
2726ubc_cs_free(
2727	struct ubc_info	*uip)
2728{
2729	struct cs_blob	*blob, *next_blob;
2730
2731	for (blob = uip->cs_blobs;
2732	     blob != NULL;
2733	     blob = next_blob) {
2734		next_blob = blob->csb_next;
2735		if (blob->csb_mem_kaddr != 0) {
2736			ubc_cs_blob_deallocate(blob->csb_mem_kaddr,
2737					       blob->csb_mem_size);
2738			blob->csb_mem_kaddr = 0;
2739		}
2740		if (blob->csb_mem_handle != IPC_PORT_NULL) {
2741			mach_memory_entry_port_release(blob->csb_mem_handle);
2742		}
2743		blob->csb_mem_handle = IPC_PORT_NULL;
2744		OSAddAtomic(-1, &cs_blob_count);
2745		OSAddAtomic((SInt32) -blob->csb_mem_size, &cs_blob_size);
2746		kfree(blob, sizeof (*blob));
2747	}
2748#if CHECK_CS_VALIDATION_BITMAP
2749	ubc_cs_validation_bitmap_deallocate( uip->ui_vnode );
2750#endif
2751	uip->cs_blobs = NULL;
2752}
2753
2754struct cs_blob *
2755ubc_get_cs_blobs(
2756	struct vnode	*vp)
2757{
2758	struct ubc_info	*uip;
2759	struct cs_blob	*blobs;
2760
2761	/*
2762	 * No need to take the vnode lock here.  The caller must be holding
2763	 * a reference on the vnode (via a VM mapping or open file descriptor),
2764	 * so the vnode will not go away.  The ubc_info stays until the vnode
2765	 * goes away.  And we only modify "blobs" by adding to the head of the
2766	 * list.
2767	 * The ubc_info could go away entirely if the vnode gets reclaimed as
2768	 * part of a forced unmount.  In the case of a code-signature validation
2769	 * during a page fault, the "paging_in_progress" reference on the VM
2770	 * object guarantess that the vnode pager (and the ubc_info) won't go
2771	 * away during the fault.
2772	 * Other callers need to protect against vnode reclaim by holding the
2773	 * vnode lock, for example.
2774	 */
2775
2776	if (! UBCINFOEXISTS(vp)) {
2777		blobs = NULL;
2778		goto out;
2779	}
2780
2781	uip = vp->v_ubcinfo;
2782	blobs = uip->cs_blobs;
2783
2784out:
2785	return blobs;
2786}
2787
2788unsigned long cs_validate_page_no_hash = 0;
2789unsigned long cs_validate_page_bad_hash = 0;
2790boolean_t
2791cs_validate_page(
2792	void			*_blobs,
2793	memory_object_t		pager,
2794	memory_object_offset_t	page_offset,
2795	const void		*data,
2796	boolean_t		*tainted)
2797{
2798	SHA1_CTX		sha1ctxt;
2799	unsigned char		actual_hash[SHA1_RESULTLEN];
2800	unsigned char		expected_hash[SHA1_RESULTLEN];
2801	boolean_t		found_hash;
2802	struct cs_blob		*blobs, *blob;
2803	const CS_CodeDirectory	*cd;
2804	const CS_SuperBlob	*embedded;
2805	const unsigned char	*hash;
2806	boolean_t		validated;
2807	off_t			offset;	/* page offset in the file */
2808	size_t			size;
2809	off_t			codeLimit = 0;
2810	char			*lower_bound, *upper_bound;
2811	vm_offset_t		kaddr, blob_addr;
2812	vm_size_t		ksize;
2813	kern_return_t		kr;
2814
2815	offset = page_offset;
2816
2817	/* retrieve the expected hash */
2818	found_hash = FALSE;
2819	blobs = (struct cs_blob *) _blobs;
2820
2821	for (blob = blobs;
2822	     blob != NULL;
2823	     blob = blob->csb_next) {
2824		offset = page_offset - blob->csb_base_offset;
2825		if (offset < blob->csb_start_offset ||
2826		    offset >= blob->csb_end_offset) {
2827			/* our page is not covered by this blob */
2828			continue;
2829		}
2830
2831		/* map the blob in the kernel address space */
2832		kaddr = blob->csb_mem_kaddr;
2833		if (kaddr == 0) {
2834			ksize = (vm_size_t) (blob->csb_mem_size +
2835					     blob->csb_mem_offset);
2836			kr = vm_map(kernel_map,
2837				    &kaddr,
2838				    ksize,
2839				    0,
2840				    VM_FLAGS_ANYWHERE,
2841				    blob->csb_mem_handle,
2842				    0,
2843				    TRUE,
2844				    VM_PROT_READ,
2845				    VM_PROT_READ,
2846				    VM_INHERIT_NONE);
2847			if (kr != KERN_SUCCESS) {
2848				/* XXX FBDP what to do !? */
2849				printf("cs_validate_page: failed to map blob, "
2850				       "size=0x%lx kr=0x%x\n",
2851				       (size_t)blob->csb_mem_size, kr);
2852				break;
2853			}
2854		}
2855		blob_addr = kaddr + blob->csb_mem_offset;
2856
2857		lower_bound = CAST_DOWN(char *, blob_addr);
2858		upper_bound = lower_bound + blob->csb_mem_size;
2859
2860		embedded = (const CS_SuperBlob *) blob_addr;
2861		cd = findCodeDirectory(embedded, lower_bound, upper_bound);
2862		if (cd != NULL) {
2863			if (cd->pageSize != PAGE_SHIFT ||
2864			    cd->hashType != 0x1 ||
2865			    cd->hashSize != SHA1_RESULTLEN) {
2866				/* bogus blob ? */
2867				continue;
2868			}
2869
2870			offset = page_offset - blob->csb_base_offset;
2871			if (offset < blob->csb_start_offset ||
2872			    offset >= blob->csb_end_offset) {
2873				/* our page is not covered by this blob */
2874				continue;
2875			}
2876
2877			codeLimit = ntohl(cd->codeLimit);
2878			hash = hashes(cd, atop(offset),
2879				      lower_bound, upper_bound);
2880			if (hash != NULL) {
2881				bcopy(hash, expected_hash,
2882				      sizeof (expected_hash));
2883				found_hash = TRUE;
2884			}
2885
2886			break;
2887		}
2888	}
2889
2890	if (found_hash == FALSE) {
2891		/*
2892		 * We can't verify this page because there is no signature
2893		 * for it (yet).  It's possible that this part of the object
2894		 * is not signed, or that signatures for that part have not
2895		 * been loaded yet.
2896		 * Report that the page has not been validated and let the
2897		 * caller decide if it wants to accept it or not.
2898		 */
2899		cs_validate_page_no_hash++;
2900		if (cs_debug > 1) {
2901			printf("CODE SIGNING: cs_validate_page: "
2902			       "mobj %p off 0x%llx: no hash to validate !?\n",
2903			       pager, page_offset);
2904		}
2905		validated = FALSE;
2906		*tainted = FALSE;
2907	} else {
2908
2909		size = PAGE_SIZE;
2910		const uint32_t *asha1, *esha1;
2911		if ((off_t)(offset + size) > codeLimit) {
2912			/* partial page at end of segment */
2913			assert(offset < codeLimit);
2914			size = (size_t) (codeLimit & PAGE_MASK);
2915		}
2916		/* compute the actual page's SHA1 hash */
2917#ifdef _NOTYET
2918		SHA1Init(&sha1ctxt);
2919		SHA1UpdateUsePhysicalAddress(&sha1ctxt, data, size);
2920		SHA1Final(actual_hash, &sha1ctxt);
2921#else
2922        bzero(actual_hash, 20);
2923        bzero(expected_hash, 20);
2924#endif
2925
2926
2927		asha1 = (const uint32_t *) actual_hash;
2928		esha1 = (const uint32_t *) expected_hash;
2929
2930		if (bcmp(expected_hash, actual_hash, SHA1_RESULTLEN) != 0) {
2931			if (cs_debug) {
2932				printf("CODE SIGNING: cs_validate_page: "
2933				       "mobj %p off 0x%llx size 0x%lx: "
2934				       "actual [0x%x 0x%x 0x%x 0x%x 0x%x] != "
2935				       "expected [0x%x 0x%x 0x%x 0x%x 0x%x]\n",
2936				       pager, page_offset, size,
2937				       asha1[0], asha1[1], asha1[2],
2938				       asha1[3], asha1[4],
2939				       esha1[0], esha1[1], esha1[2],
2940				       esha1[3], esha1[4]);
2941			}
2942			cs_validate_page_bad_hash++;
2943			*tainted = TRUE;
2944		} else {
2945			if (cs_debug > 1) {
2946				printf("CODE SIGNING: cs_validate_page: "
2947				       "mobj %p off 0x%llx size 0x%lx: "
2948				       "SHA1 OK\n",
2949				       pager, page_offset, size);
2950			}
2951			*tainted = FALSE;
2952		}
2953		validated = TRUE;
2954	}
2955
2956	return validated;
2957}
2958
2959int
2960ubc_cs_getcdhash(
2961	vnode_t		vp,
2962	off_t		offset,
2963	unsigned char	*cdhash)
2964{
2965	struct cs_blob	*blobs, *blob;
2966	off_t		rel_offset;
2967	int		ret;
2968
2969	vnode_lock(vp);
2970
2971	blobs = ubc_get_cs_blobs(vp);
2972	for (blob = blobs;
2973	     blob != NULL;
2974	     blob = blob->csb_next) {
2975		/* compute offset relative to this blob */
2976		rel_offset = offset - blob->csb_base_offset;
2977		if (rel_offset >= blob->csb_start_offset &&
2978		    rel_offset < blob->csb_end_offset) {
2979			/* this blob does cover our "offset" ! */
2980			break;
2981		}
2982	}
2983
2984	if (blob == NULL) {
2985		/* we didn't find a blob covering "offset" */
2986		ret = EBADEXEC; /* XXX any better error ? */
2987	} else {
2988		/* get the SHA1 hash of that blob */
2989		bcopy(blob->csb_sha1, cdhash, sizeof (blob->csb_sha1));
2990		ret = 0;
2991	}
2992
2993	vnode_unlock(vp);
2994
2995	return ret;
2996}
2997
2998#if CHECK_CS_VALIDATION_BITMAP
2999#define stob(s)	((atop_64((s)) + 07) >> 3)
3000extern	boolean_t	root_fs_upgrade_try;
3001
3002/*
3003 * Should we use the code-sign bitmap to avoid repeated code-sign validation?
3004 * Depends:
3005 * a) Is the target vnode on the root filesystem?
3006 * b) Has someone tried to mount the root filesystem read-write?
3007 * If answers are (a) yes AND (b) no, then we can use the bitmap.
3008 */
3009#define USE_CODE_SIGN_BITMAP(vp)	( (vp != NULL) && (vp->v_mount != NULL) && (vp->v_mount->mnt_flag & MNT_ROOTFS) && !root_fs_upgrade_try)
3010kern_return_t
3011ubc_cs_validation_bitmap_allocate(
3012	vnode_t		vp)
3013{
3014	kern_return_t	kr = KERN_SUCCESS;
3015	struct ubc_info *uip;
3016	char		*target_bitmap;
3017	vm_object_size_t	bitmap_size;
3018
3019	if ( ! USE_CODE_SIGN_BITMAP(vp) || (! UBCINFOEXISTS(vp))) {
3020		kr = KERN_INVALID_ARGUMENT;
3021	} else {
3022		uip = vp->v_ubcinfo;
3023
3024		if ( uip->cs_valid_bitmap == NULL ) {
3025			bitmap_size = stob(uip->ui_size);
3026			target_bitmap = (char*) kalloc( (vm_size_t)bitmap_size );
3027			if (target_bitmap == 0) {
3028				kr = KERN_NO_SPACE;
3029			} else {
3030				kr = KERN_SUCCESS;
3031			}
3032			if( kr == KERN_SUCCESS ) {
3033				memset( target_bitmap, 0, (size_t)bitmap_size);
3034				uip->cs_valid_bitmap = (void*)target_bitmap;
3035				uip->cs_valid_bitmap_size = bitmap_size;
3036			}
3037		}
3038	}
3039	return kr;
3040}
3041
3042kern_return_t
3043ubc_cs_check_validation_bitmap (
3044	vnode_t			vp,
3045	memory_object_offset_t		offset,
3046	int			optype)
3047{
3048	kern_return_t	kr = KERN_SUCCESS;
3049
3050	if ( ! USE_CODE_SIGN_BITMAP(vp) || ! UBCINFOEXISTS(vp)) {
3051		kr = KERN_INVALID_ARGUMENT;
3052	} else {
3053		struct ubc_info *uip = vp->v_ubcinfo;
3054		char		*target_bitmap = uip->cs_valid_bitmap;
3055
3056		if ( target_bitmap == NULL ) {
3057		       kr = KERN_INVALID_ARGUMENT;
3058		} else {
3059			uint64_t	bit, byte;
3060			bit = atop_64( offset );
3061			byte = bit >> 3;
3062
3063			if ( byte > uip->cs_valid_bitmap_size ) {
3064			       kr = KERN_INVALID_ARGUMENT;
3065			} else {
3066
3067				if (optype == CS_BITMAP_SET) {
3068					target_bitmap[byte] |= (1 << (bit & 07));
3069					kr = KERN_SUCCESS;
3070				} else if (optype == CS_BITMAP_CLEAR) {
3071					target_bitmap[byte] &= ~(1 << (bit & 07));
3072					kr = KERN_SUCCESS;
3073				} else if (optype == CS_BITMAP_CHECK) {
3074					if ( target_bitmap[byte] & (1 << (bit & 07))) {
3075						kr = KERN_SUCCESS;
3076					} else {
3077						kr = KERN_FAILURE;
3078					}
3079				}
3080			}
3081		}
3082	}
3083	return kr;
3084}
3085
3086void
3087ubc_cs_validation_bitmap_deallocate(
3088	vnode_t		vp)
3089{
3090	struct ubc_info *uip;
3091	void		*target_bitmap;
3092	vm_object_size_t	bitmap_size;
3093
3094	if ( UBCINFOEXISTS(vp)) {
3095		uip = vp->v_ubcinfo;
3096
3097		if ( (target_bitmap = uip->cs_valid_bitmap) != NULL ) {
3098			bitmap_size = uip->cs_valid_bitmap_size;
3099			kfree( target_bitmap, (vm_size_t) bitmap_size );
3100			uip->cs_valid_bitmap = NULL;
3101		}
3102	}
3103}
3104#else
3105kern_return_t	ubc_cs_validation_bitmap_allocate(__unused vnode_t vp){
3106	return KERN_INVALID_ARGUMENT;
3107}
3108
3109kern_return_t ubc_cs_check_validation_bitmap(
3110	__unused struct vnode *vp,
3111	__unused memory_object_offset_t offset,
3112	__unused int optype){
3113
3114	return KERN_INVALID_ARGUMENT;
3115}
3116
3117void	ubc_cs_validation_bitmap_deallocate(__unused vnode_t vp){
3118	return;
3119}
3120#endif /* CHECK_CS_VALIDATION_BITMAP */
3121