1/*
2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1982, 1986, 1989, 1993
31 *	The Regents of the University of California.  All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 *    notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 *    notice, this list of conditions and the following disclaimer in the
45 *    documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 *    must display the following acknowledgement:
48 *	This product includes software developed by the University of
49 *	California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 *    may be used to endorse or promote products derived from this software
52 *    without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 *	@(#)vfs_lookup.c	8.10 (Berkeley) 5/27/95
67 */
68/*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
70 * support for mandatory and extensible security protections.  This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
74
75#include <sys/param.h>
76#include <sys/systm.h>
77#include <sys/syslimits.h>
78#include <sys/time.h>
79#include <sys/namei.h>
80#include <sys/vm.h>
81#include <sys/vnode_internal.h>
82#include <sys/mount_internal.h>
83#include <sys/errno.h>
84#include <sys/malloc.h>
85#include <sys/filedesc.h>
86#include <sys/proc_internal.h>
87#include <sys/kdebug.h>
88#include <sys/unistd.h>		/* For _PC_NAME_MAX */
89#include <sys/uio_internal.h>
90#include <sys/kauth.h>
91
92#include <security/audit/audit.h>
93
94#if CONFIG_MACF
95#include <security/mac_framework.h>
96#endif
97
98#if NAMEDRSRCFORK
99#include <sys/xattr.h>
100#endif
101/*
102 * The minimum volfs-style pathname is 9.
103 * Example:  "/.vol/1/2"
104 */
105#define VOLFS_MIN_PATH_LEN  9
106
107
108static	void kdebug_lookup(struct vnode *dp, struct componentname *cnp);
109
110#if CONFIG_VOLFS
111static int vfs_getrealpath(const char * path, char * realpath, size_t bufsize, vfs_context_t ctx);
112#endif
113
114boolean_t 	lookup_continue_ok(struct nameidata *ndp);
115int		lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, int vbusyflags, vfs_context_t ctx);
116int 		lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx);
117int		lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized_in_cache, vfs_context_t ctx);
118void		lookup_consider_update_cache(vnode_t dvp, vnode_t vp, struct componentname *cnp, int nc_generation);
119int		lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname *cnp, int wantparent, vfs_context_t ctx);
120int		lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int rdonly,
121			int vbusyflags, int *keep_going, int nc_generation,
122			int wantparent, int atroot, vfs_context_t ctx);
123int 		lookup_handle_emptyname(struct nameidata *ndp, struct componentname *cnp, int wantparent);
124
125/*
126 * Convert a pathname into a pointer to a locked inode.
127 *
128 * The FOLLOW flag is set when symbolic links are to be followed
129 * when they occur at the end of the name translation process.
130 * Symbolic links are always followed for all other pathname
131 * components other than the last.
132 *
133 * The segflg defines whether the name is to be copied from user
134 * space or kernel space.
135 *
136 * Overall outline of namei:
137 *
138 *	copy in name
139 *	get starting directory
140 *	while (!done && !error) {
141 *		call lookup to search path.
142 *		if symbolic link, massage name in buffer and continue
143 *	}
144 *
145 * Returns:	0			Success
146 *		ENOENT			No such file or directory
147 *		ELOOP			Too many levels of symbolic links
148 *		ENAMETOOLONG		Filename too long
149 *		copyinstr:EFAULT	Bad address
150 *		copyinstr:ENAMETOOLONG	Filename too long
151 *		lookup:EBADF		Bad file descriptor
152 *		lookup:EROFS
153 *		lookup:EACCES
154 *		lookup:EPERM
155 *		lookup:ERECYCLE	 vnode was recycled from underneath us in lookup.
156 *						 This means we should re-drive lookup from this point.
157 *		lookup: ???
158 *		VNOP_READLINK:???
159 */
160int
161namei(struct nameidata *ndp)
162{
163	struct filedesc *fdp;	/* pointer to file descriptor state */
164	struct vnode *dp;	/* the directory we are searching */
165	struct vnode *usedvp = ndp->ni_dvp;  /* store pointer to vp in case we must loop due to
166										   	heavy vnode pressure */
167	u_long cnpflags = ndp->ni_cnd.cn_flags; /* store in case we have to restore after loop */
168	int error;
169	struct componentname *cnp = &ndp->ni_cnd;
170	vfs_context_t ctx = cnp->cn_context;
171	proc_t p = vfs_context_proc(ctx);
172#if CONFIG_AUDIT
173/* XXX ut should be from context */
174	uthread_t ut = (struct uthread *)get_bsdthread_info(current_thread());
175#endif
176
177	fdp = p->p_fd;
178
179#if DIAGNOSTIC
180	if (!vfs_context_ucred(ctx) || !p)
181		panic ("namei: bad cred/proc");
182	if (cnp->cn_nameiop & (~OPMASK))
183		panic ("namei: nameiop contaminated with flags");
184	if (cnp->cn_flags & OPMASK)
185		panic ("namei: flags contaminated with nameiops");
186#endif
187
188	/*
189	 * A compound VNOP found something that needs further processing:
190	 * either a trigger vnode, a covered directory, or a symlink.
191	 */
192	if (ndp->ni_flag & NAMEI_CONTLOOKUP) {
193		int rdonly, vbusyflags, keep_going, wantparent;
194
195		rdonly = cnp->cn_flags & RDONLY;
196		vbusyflags = ((cnp->cn_flags & CN_NBMOUNTLOOK) != 0) ? LK_NOWAIT : 0;
197		keep_going = 0;
198		wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
199
200		ndp->ni_flag &= ~(NAMEI_CONTLOOKUP);
201
202		error = lookup_handle_found_vnode(ndp, &ndp->ni_cnd, rdonly, vbusyflags,
203				&keep_going, ndp->ni_ncgeneration, wantparent, 0, ctx);
204		if (error)
205			goto out_drop;
206		if (keep_going) {
207			if ((cnp->cn_flags & ISSYMLINK) == 0) {
208				panic("We need to keep going on a continued lookup, but for vp type %d (tag %d)\n", ndp->ni_vp->v_type, ndp->ni_vp->v_tag);
209			}
210			goto continue_symlink;
211		}
212
213		return 0;
214
215	}
216
217vnode_recycled:
218
219	/*
220	 * Get a buffer for the name to be translated, and copy the
221	 * name into the buffer.
222	 */
223	if ((cnp->cn_flags & HASBUF) == 0) {
224		cnp->cn_pnbuf = ndp->ni_pathbuf;
225		cnp->cn_pnlen = PATHBUFLEN;
226	}
227#if LP64_DEBUG
228	if ((UIO_SEG_IS_USER_SPACE(ndp->ni_segflg) == 0)
229		&& (ndp->ni_segflg != UIO_SYSSPACE)
230		&& (ndp->ni_segflg != UIO_SYSSPACE32)) {
231		panic("%s :%d - invalid ni_segflg\n", __FILE__, __LINE__);
232	}
233#endif /* LP64_DEBUG */
234
235retry_copy:
236	if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
237		error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf,
238			    cnp->cn_pnlen, (size_t *)&ndp->ni_pathlen);
239	} else {
240		error = copystr(CAST_DOWN(void *, ndp->ni_dirp), cnp->cn_pnbuf,
241			    cnp->cn_pnlen, (size_t *)&ndp->ni_pathlen);
242	}
243	if (error == ENAMETOOLONG && !(cnp->cn_flags & HASBUF)) {
244		MALLOC_ZONE(cnp->cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
245		if (cnp->cn_pnbuf == NULL) {
246			error = ENOMEM;
247			goto error_out;
248		}
249
250		cnp->cn_flags |= HASBUF;
251		cnp->cn_pnlen = MAXPATHLEN;
252
253		goto retry_copy;
254	}
255	if (error)
256	        goto error_out;
257
258#if CONFIG_VOLFS
259 	/*
260	 * Check for legacy volfs style pathnames.
261	 *
262	 * For compatibility reasons we currently allow these paths,
263	 * but future versions of the OS may not support them.
264	 */
265	if (ndp->ni_pathlen >= VOLFS_MIN_PATH_LEN &&
266	    cnp->cn_pnbuf[0] == '/' &&
267	    cnp->cn_pnbuf[1] == '.' &&
268	    cnp->cn_pnbuf[2] == 'v' &&
269	    cnp->cn_pnbuf[3] == 'o' &&
270	    cnp->cn_pnbuf[4] == 'l' &&
271	    cnp->cn_pnbuf[5] == '/' ) {
272		char * realpath;
273		int realpath_err;
274		/* Attempt to resolve a legacy volfs style pathname. */
275		MALLOC_ZONE(realpath, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
276		if (realpath) {
277			/*
278			 * We only error out on the ENAMETOOLONG cases where we know that
279			 * vfs_getrealpath translation succeeded but the path could not fit into
280			 * MAXPATHLEN characters.  In other failure cases, we may be dealing with a path
281			 * that legitimately looks like /.vol/1234/567 and is not meant to be translated
282			 */
283			if ((realpath_err= vfs_getrealpath(&cnp->cn_pnbuf[6], realpath, MAXPATHLEN, ctx))) {
284				FREE_ZONE(realpath, MAXPATHLEN, M_NAMEI);
285				if (realpath_err == ENOSPC || realpath_err == ENAMETOOLONG){
286					error = ENAMETOOLONG;
287					goto error_out;
288				}
289			} else {
290				if (cnp->cn_flags & HASBUF) {
291					FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
292				}
293				cnp->cn_pnbuf = realpath;
294				cnp->cn_pnlen = MAXPATHLEN;
295				ndp->ni_pathlen = strlen(realpath) + 1;
296				cnp->cn_flags |= HASBUF | CN_VOLFSPATH;
297			}
298		}
299	}
300#endif /* CONFIG_VOLFS */
301
302#if CONFIG_AUDIT
303	/* If we are auditing the kernel pathname, save the user pathname */
304	if (cnp->cn_flags & AUDITVNPATH1)
305		AUDIT_ARG(upath, ut->uu_cdir, cnp->cn_pnbuf, ARG_UPATH1);
306	if (cnp->cn_flags & AUDITVNPATH2)
307		AUDIT_ARG(upath, ut->uu_cdir, cnp->cn_pnbuf, ARG_UPATH2);
308#endif /* CONFIG_AUDIT */
309
310	/*
311	 * Do not allow empty pathnames
312	 */
313	if (*cnp->cn_pnbuf == '\0') {
314		error = ENOENT;
315		goto error_out;
316	}
317	ndp->ni_loopcnt = 0;
318
319	/*
320	 * determine the starting point for the translation.
321	 */
322	if ((ndp->ni_rootdir = fdp->fd_rdir) == NULLVP) {
323	        if ( !(fdp->fd_flags & FD_CHROOT))
324		        ndp->ni_rootdir = rootvnode;
325	}
326	cnp->cn_nameptr = cnp->cn_pnbuf;
327
328	ndp->ni_usedvp = NULLVP;
329
330	if (*(cnp->cn_nameptr) == '/') {
331	        while (*(cnp->cn_nameptr) == '/') {
332		        cnp->cn_nameptr++;
333			ndp->ni_pathlen--;
334		}
335		dp = ndp->ni_rootdir;
336	} else if (cnp->cn_flags & USEDVP) {
337	        dp = ndp->ni_dvp;
338		ndp->ni_usedvp = dp;
339	} else
340	        dp = vfs_context_cwd(ctx);
341
342	if (dp == NULLVP || (dp->v_lflag & VL_DEAD)) {
343	        error = ENOENT;
344		goto error_out;
345	}
346	ndp->ni_dvp = NULLVP;
347	ndp->ni_vp  = NULLVP;
348
349	for (;;) {
350		ndp->ni_startdir = dp;
351
352		if ( (error = lookup(ndp)) ) {
353			goto error_out;
354		}
355		/*
356		 * Check for symbolic link
357		 */
358		if ((cnp->cn_flags & ISSYMLINK) == 0) {
359			return (0);
360		}
361
362continue_symlink:
363		/* Gives us a new path to process, and a starting dir */
364		error = lookup_handle_symlink(ndp, &dp, ctx);
365		if (error != 0) {
366			break;
367		}
368	}
369	/*
370	 * only come here if we fail to handle a SYMLINK...
371	 * if either ni_dvp or ni_vp is non-NULL, then
372	 * we need to drop the iocount that was picked
373	 * up in the lookup routine
374	 */
375out_drop:
376	if (ndp->ni_dvp)
377	        vnode_put(ndp->ni_dvp);
378	if (ndp->ni_vp)
379	        vnode_put(ndp->ni_vp);
380 error_out:
381	if ( (cnp->cn_flags & HASBUF) ) {
382		cnp->cn_flags &= ~HASBUF;
383		FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
384	}
385	cnp->cn_pnbuf = NULL;
386	ndp->ni_vp = NULLVP;
387	ndp->ni_dvp = NULLVP;
388	if (error == ERECYCLE){
389		/* vnode was recycled underneath us. re-drive lookup to start at
390		   the beginning again, since recycling invalidated last lookup*/
391		ndp->ni_cnd.cn_flags = cnpflags;
392		ndp->ni_dvp = usedvp;
393		goto vnode_recycled;
394	}
395
396
397	return (error);
398}
399
400int
401namei_compound_available(vnode_t dp, struct nameidata *ndp)
402{
403	if ((ndp->ni_flag & NAMEI_COMPOUNDOPEN) != 0) {
404		return vnode_compound_open_available(dp);
405	}
406
407	return 0;
408}
409int
410lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized_in_cache, vfs_context_t ctx)
411{
412	int error;
413
414	if (!dp_authorized_in_cache) {
415		error = vnode_authorize(dp, NULL, KAUTH_VNODE_SEARCH, ctx);
416		if (error)
417			return error;
418	}
419#if CONFIG_MACF
420	error = mac_vnode_check_lookup(ctx, dp, cnp);
421	if (error)
422		return error;
423#endif /* CONFIG_MACF */
424
425	return 0;
426}
427
428void
429lookup_consider_update_cache(vnode_t dvp, vnode_t vp, struct componentname *cnp, int nc_generation)
430{
431	int isdot_or_dotdot;
432	isdot_or_dotdot = (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') || (cnp->cn_flags & ISDOTDOT);
433
434	if (vp->v_name == NULL || vp->v_parent == NULLVP) {
435		int  update_flags = 0;
436
437		if (isdot_or_dotdot == 0) {
438			if (vp->v_name == NULL)
439				update_flags |= VNODE_UPDATE_NAME;
440			if (dvp != NULLVP && vp->v_parent == NULLVP)
441				update_flags |= VNODE_UPDATE_PARENT;
442
443			if (update_flags)
444				vnode_update_identity(vp, dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, update_flags);
445		}
446	}
447	if ( (cnp->cn_flags & MAKEENTRY) && (vp->v_flag & VNCACHEABLE) && LIST_FIRST(&vp->v_nclinks) == NULL) {
448		/*
449		 * missing from name cache, but should
450		 * be in it... this can happen if volfs
451		 * causes the vnode to be created or the
452		 * name cache entry got recycled but the
453		 * vnode didn't...
454		 * check to make sure that ni_dvp is valid
455		 * cache_lookup_path may return a NULL
456		 * do a quick check to see if the generation of the
457		 * directory matches our snapshot... this will get
458		 * rechecked behind the name cache lock, but if it
459		 * already fails to match, no need to go any further
460		 */
461		if (dvp != NULLVP && (nc_generation == dvp->v_nc_generation) && (!isdot_or_dotdot))
462			cache_enter_with_gen(dvp, vp, cnp, nc_generation);
463	}
464
465}
466
467#if NAMEDRSRCFORK
468/*
469 * Can change ni_dvp and ni_vp.  On success, returns with iocounts on stream vnode (always) and
470 * data fork if requested.  On failure, returns with iocount data fork (always) and its parent directory
471 * (if one was provided).
472 */
473int
474lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname *cnp, int wantparent, vfs_context_t ctx)
475{
476	vnode_t svp = NULLVP;
477	enum nsoperation nsop;
478	int error;
479
480	if (dp->v_type != VREG) {
481		error = ENOENT;
482		goto out;
483	}
484	switch (cnp->cn_nameiop) {
485		case DELETE:
486			if (cnp->cn_flags & CN_ALLOWRSRCFORK) {
487				nsop = NS_DELETE;
488			} else {
489				error = EPERM;
490				goto out;
491			}
492			break;
493		case CREATE:
494			if (cnp->cn_flags & CN_ALLOWRSRCFORK) {
495				nsop = NS_CREATE;
496			} else {
497				error = EPERM;
498				goto out;
499			}
500			break;
501		case LOOKUP:
502			/* Make sure our lookup of "/..namedfork/rsrc" is allowed. */
503			if (cnp->cn_flags & CN_ALLOWRSRCFORK) {
504				nsop = NS_OPEN;
505			} else {
506				error = EPERM;
507				goto out;
508			}
509			break;
510		default:
511			error = EPERM;
512			goto out;
513	}
514	/* Ask the file system for the resource fork. */
515	error = vnode_getnamedstream(dp, &svp, XATTR_RESOURCEFORK_NAME, nsop, 0, ctx);
516
517	/* During a create, it OK for stream vnode to be missing. */
518	if (error == ENOATTR || error == ENOENT) {
519		error = (nsop == NS_CREATE) ? 0 : ENOENT;
520	}
521	if (error) {
522		goto out;
523	}
524	/* The "parent" of the stream is the file. */
525	if (wantparent) {
526		if (ndp->ni_dvp) {
527#if CONFIG_VFS_FUNNEL
528			if (ndp->ni_cnd.cn_flags & FSNODELOCKHELD) {
529				ndp->ni_cnd.cn_flags &= ~FSNODELOCKHELD;
530				unlock_fsnode(ndp->ni_dvp, NULL);
531			}
532#endif /* CONFIG_VFS_FUNNEL */
533			vnode_put(ndp->ni_dvp);
534		}
535		ndp->ni_dvp = dp;
536	} else {
537		vnode_put(dp);
538	}
539	ndp->ni_vp = svp;  /* on create this may be null */
540
541	/* Restore the truncated pathname buffer (for audits). */
542	if (ndp->ni_pathlen == 1 && ndp->ni_next[0] == '\0') {
543		ndp->ni_next[0] = '/';
544	}
545	cnp->cn_flags  &= ~MAKEENTRY;
546
547	return 0;
548out:
549	return error;
550}
551#endif /* NAMEDRSRCFORK */
552
553/*
554 * iocounts in:
555 * 	--One on ni_vp.  One on ni_dvp if there is more path, or we didn't come through the
556 * 	cache, or we came through the cache and the caller doesn't want the parent.
557 *
558 * iocounts out:
559 *	--Leaves us in the correct state for the next step, whatever that might be.
560 *	--If we find a symlink, returns with iocounts on both ni_vp and ni_dvp.
561 *	--If we are to look up another component, then we have an iocount on ni_vp and
562 *	nothing else.
563 *	--If we are done, returns an iocount on ni_vp, and possibly on ni_dvp depending on nameidata flags.
564 *	--In the event of an error, may return with ni_dvp NULL'ed out (in which case, iocount
565 *	was dropped).
566 */
567int
568lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int rdonly,
569		int vbusyflags, int *keep_going, int nc_generation,
570		int wantparent, int atroot, vfs_context_t ctx)
571{
572	vnode_t dp;
573	int error;
574	char *cp;
575
576	dp = ndp->ni_vp;
577	*keep_going = 0;
578
579	if (ndp->ni_vp == NULLVP) {
580		panic("NULL ni_vp in %s\n", __FUNCTION__);
581	}
582
583	if (atroot) {
584		goto nextname;
585	}
586
587#if CONFIG_TRIGGERS
588	if (dp->v_resolve) {
589		error = vnode_trigger_resolve(dp, ndp, ctx);
590		if (error) {
591			goto out;
592		}
593	}
594#endif /* CONFIG_TRIGGERS */
595
596	/*
597	 * Take into account any additional components consumed by
598	 * the underlying filesystem.
599	 */
600	if (cnp->cn_consume > 0) {
601		cnp->cn_nameptr += cnp->cn_consume;
602		ndp->ni_next += cnp->cn_consume;
603		ndp->ni_pathlen -= cnp->cn_consume;
604		cnp->cn_consume = 0;
605	} else {
606		lookup_consider_update_cache(ndp->ni_dvp, dp, cnp, nc_generation);
607	}
608
609	/*
610	 * Check to see if the vnode has been mounted on...
611	 * if so find the root of the mounted file system.
612	 * Updates ndp->ni_vp.
613	 */
614	error = lookup_traverse_mountpoints(ndp, cnp, dp, vbusyflags, ctx);
615	dp = ndp->ni_vp;
616	if (error) {
617		goto out;
618	}
619
620#if CONFIG_MACF
621	if (vfs_flags(vnode_mount(dp)) & MNT_MULTILABEL) {
622		error = vnode_label(vnode_mount(dp), NULL, dp, NULL, 0, ctx);
623		if (error)
624			goto out;
625	}
626#endif
627
628	/*
629	 * Check for symbolic link
630	 */
631	if ((dp->v_type == VLNK) &&
632	    ((cnp->cn_flags & FOLLOW) || (ndp->ni_flag & NAMEI_TRAILINGSLASH) || *ndp->ni_next == '/')) {
633		cnp->cn_flags |= ISSYMLINK;
634		*keep_going = 1;
635		return (0);
636	}
637
638	/*
639	 * Check for bogus trailing slashes.
640	 */
641	if ((ndp->ni_flag & NAMEI_TRAILINGSLASH)) {
642		if (dp->v_type != VDIR) {
643			error = ENOTDIR;
644			goto out;
645		}
646		ndp->ni_flag &= ~(NAMEI_TRAILINGSLASH);
647	}
648
649#if NAMEDSTREAMS
650	/*
651	 * Deny namei/lookup requests to resolve paths that point to shadow files.
652	 * Access to shadow files must be conducted by explicit calls to VNOP_LOOKUP
653	 * directly, and not use lookup/namei
654	 */
655	if (vnode_isshadow (dp)) {
656		error = ENOENT;
657		goto out;
658	}
659#endif
660
661nextname:
662	/*
663	 * Not a symbolic link.  If more pathname,
664	 * continue at next component, else return.
665	 *
666	 * Definitely have a dvp if there's another slash
667	 */
668	if (*ndp->ni_next == '/') {
669		cnp->cn_nameptr = ndp->ni_next + 1;
670		ndp->ni_pathlen--;
671		while (*cnp->cn_nameptr == '/') {
672			cnp->cn_nameptr++;
673			ndp->ni_pathlen--;
674		}
675
676		cp = cnp->cn_nameptr;
677		vnode_put(ndp->ni_dvp);
678		ndp->ni_dvp = NULLVP;
679
680		if (*cp == '\0') {
681			goto emptyname;
682		}
683
684		*keep_going = 1;
685		return 0;
686	}
687
688	/*
689	 * Disallow directory write attempts on read-only file systems.
690	 */
691	if (rdonly &&
692	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
693		error = EROFS;
694		goto out;
695	}
696
697	/* If SAVESTART is set, we should have a dvp */
698	if (cnp->cn_flags & SAVESTART) {
699	        /*
700		 * note that we already hold a reference
701		 * on both dp and ni_dvp, but for some reason
702		 * can't get another one... in this case we
703		 * need to do vnode_put on dp in 'bad2'
704		 */
705	        if ( (vnode_get(ndp->ni_dvp)) ) {
706		        error = ENOENT;
707			goto out;
708		}
709		ndp->ni_startdir = ndp->ni_dvp;
710	}
711	if (!wantparent && ndp->ni_dvp) {
712		vnode_put(ndp->ni_dvp);
713		ndp->ni_dvp = NULLVP;
714	}
715
716	if (cnp->cn_flags & AUDITVNPATH1)
717		AUDIT_ARG(vnpath, dp, ARG_VNODE1);
718	else if (cnp->cn_flags & AUDITVNPATH2)
719		AUDIT_ARG(vnpath, dp, ARG_VNODE2);
720
721#if NAMEDRSRCFORK
722	/*
723	 * Caller wants the resource fork.
724	 */
725	if ((cnp->cn_flags & CN_WANTSRSRCFORK) && (dp != NULLVP)) {
726		error = lookup_handle_rsrc_fork(dp, ndp, cnp, wantparent, ctx);
727		if (error != 0)
728			goto out;
729
730		dp = ndp->ni_vp;
731	}
732#endif
733	if (kdebug_enable)
734	        kdebug_lookup(dp, cnp);
735
736	return 0;
737
738emptyname:
739	error = lookup_handle_emptyname(ndp, cnp, wantparent);
740	if (error != 0)
741		goto out;
742
743	return 0;
744out:
745	return error;
746
747}
748
749/*
750 * Comes in iocount on ni_vp.  May overwrite ni_dvp, but doesn't interpret incoming value.
751 */
752int
753lookup_handle_emptyname(struct nameidata *ndp, struct componentname *cnp, int wantparent)
754{
755	vnode_t dp;
756	int error = 0;
757
758	dp = ndp->ni_vp;
759	cnp->cn_namelen = 0;
760	/*
761	 * A degenerate name (e.g. / or "") which is a way of
762	 * talking about a directory, e.g. like "/." or ".".
763	 */
764	if (dp->v_type != VDIR) {
765		error = ENOTDIR;
766		goto out;
767	}
768	if (cnp->cn_nameiop != LOOKUP) {
769		error = EISDIR;
770		goto out;
771	}
772	if (wantparent) {
773	        /*
774		 * note that we already hold a reference
775		 * on dp, but for some reason can't
776		 * get another one... in this case we
777		 * need to do vnode_put on dp in 'bad'
778		 */
779	        if ( (vnode_get(dp)) ) {
780		        error = ENOENT;
781			goto out;
782		}
783		ndp->ni_dvp = dp;
784	}
785	cnp->cn_flags &= ~ISDOTDOT;
786	cnp->cn_flags |= ISLASTCN;
787	ndp->ni_next = cnp->cn_nameptr;
788	ndp->ni_vp = dp;
789
790	if (cnp->cn_flags & AUDITVNPATH1)
791		AUDIT_ARG(vnpath, dp, ARG_VNODE1);
792	else if (cnp->cn_flags & AUDITVNPATH2)
793		AUDIT_ARG(vnpath, dp, ARG_VNODE2);
794	if (cnp->cn_flags & SAVESTART)
795		panic("lookup: SAVESTART");
796
797	return 0;
798out:
799	return error;
800}
801/*
802 * Search a pathname.
803 * This is a very central and rather complicated routine.
804 *
805 * The pathname is pointed to by ni_ptr and is of length ni_pathlen.
806 * The starting directory is taken from ni_startdir. The pathname is
807 * descended until done, or a symbolic link is encountered. The variable
808 * ni_more is clear if the path is completed; it is set to one if a
809 * symbolic link needing interpretation is encountered.
810 *
811 * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
812 * whether the name is to be looked up, created, renamed, or deleted.
813 * When CREATE, RENAME, or DELETE is specified, information usable in
814 * creating, renaming, or deleting a directory entry may be calculated.
815 * If flag has LOCKPARENT or'ed into it, the parent directory is returned
816 * locked. If flag has WANTPARENT or'ed into it, the parent directory is
817 * returned unlocked. Otherwise the parent directory is not returned. If
818 * the target of the pathname exists and LOCKLEAF is or'ed into the flag
819 * the target is returned locked, otherwise it is returned unlocked.
820 * When creating or renaming and LOCKPARENT is specified, the target may not
821 * be ".".  When deleting and LOCKPARENT is specified, the target may be ".".
822 *
823 * Overall outline of lookup:
824 *
825 * dirloop:
826 *	identify next component of name at ndp->ni_ptr
827 *	handle degenerate case where name is null string
828 *	if .. and crossing mount points and on mounted filesys, find parent
829 *	call VNOP_LOOKUP routine for next component name
830 *	    directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set
831 *	    component vnode returned in ni_vp (if it exists), locked.
832 *	if result vnode is mounted on and crossing mount points,
833 *	    find mounted on vnode
834 *	if more components of name, do next level at dirloop
835 *	return the answer in ni_vp, locked if LOCKLEAF set
836 *	    if LOCKPARENT set, return locked parent in ni_dvp
837 *	    if WANTPARENT set, return unlocked parent in ni_dvp
838 *
839 * Returns:	0			Success
840 *		ENOENT			No such file or directory
841 *		EBADF			Bad file descriptor
842 *		ENOTDIR			Not a directory
843 *		EROFS			Read-only file system [CREATE]
844 *		EISDIR			Is a directory [CREATE]
845 *		cache_lookup_path:ERECYCLE  (vnode was recycled from underneath us, redrive lookup again)
846 *		vnode_authorize:EROFS
847 *		vnode_authorize:EACCES
848 *		vnode_authorize:EPERM
849 *		vnode_authorize:???
850 *		VNOP_LOOKUP:ENOENT	No such file or directory
851 *		VNOP_LOOKUP:EJUSTRETURN	Restart system call (INTERNAL)
852 *		VNOP_LOOKUP:???
853 *		VFS_ROOT:ENOTSUP
854 *		VFS_ROOT:ENOENT
855 *		VFS_ROOT:???
856 */
857int
858lookup(struct nameidata *ndp)
859{
860	char	*cp;		/* pointer into pathname argument */
861	vnode_t		tdp;		/* saved dp */
862	vnode_t		dp;		/* the directory we are searching */
863	int docache = 1;		/* == 0 do not cache last component */
864	int wantparent;			/* 1 => wantparent or lockparent flag */
865	int rdonly;			/* lookup read-only flag bit */
866	int dp_authorized = 0;
867	int error = 0;
868	struct componentname *cnp = &ndp->ni_cnd;
869	vfs_context_t ctx = cnp->cn_context;
870	int vbusyflags = 0;
871	int nc_generation = 0;
872	vnode_t last_dp = NULLVP;
873	int keep_going;
874	int atroot;
875
876	/*
877	 * Setup: break out flag bits into variables.
878	 */
879	if (cnp->cn_flags & (NOCACHE | DOWHITEOUT)) {
880	        if ((cnp->cn_flags & NOCACHE) || (cnp->cn_nameiop == DELETE))
881		        docache = 0;
882	}
883	wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
884	rdonly = cnp->cn_flags & RDONLY;
885	cnp->cn_flags &= ~ISSYMLINK;
886	cnp->cn_consume = 0;
887
888	dp = ndp->ni_startdir;
889	ndp->ni_startdir = NULLVP;
890
891	if ((cnp->cn_flags & CN_NBMOUNTLOOK) != 0)
892			vbusyflags = LK_NOWAIT;
893	cp = cnp->cn_nameptr;
894
895	if (*cp == '\0') {
896	        if ( (vnode_getwithref(dp)) ) {
897			dp = NULLVP;
898		        error = ENOENT;
899			goto bad;
900		}
901		ndp->ni_vp = dp;
902		error = lookup_handle_emptyname(ndp, cnp, wantparent);
903		if (error) {
904			goto bad;
905		}
906
907		return 0;
908	}
909dirloop:
910	atroot = 0;
911	ndp->ni_vp = NULLVP;
912
913	if ( (error = cache_lookup_path(ndp, cnp, dp, ctx, &dp_authorized, last_dp)) ) {
914		dp = NULLVP;
915		goto bad;
916	}
917	if ((cnp->cn_flags & ISLASTCN)) {
918	        if (docache)
919		        cnp->cn_flags |= MAKEENTRY;
920	} else
921	        cnp->cn_flags |= MAKEENTRY;
922
923	dp = ndp->ni_dvp;
924
925	if (ndp->ni_vp != NULLVP) {
926	        /*
927		 * cache_lookup_path returned a non-NULL ni_vp then,
928		 * we're guaranteed that the dp is a VDIR, it's
929		 * been authorized, and vp is not ".."
930		 *
931		 * make sure we don't try to enter the name back into
932		 * the cache if this vp is purged before we get to that
933		 * check since we won't have serialized behind whatever
934		 * activity is occurring in the FS that caused the purge
935		 */
936	        if (dp != NULLVP)
937		        nc_generation = dp->v_nc_generation - 1;
938
939	        goto returned_from_lookup_path;
940	}
941
942	/*
943	 * Handle "..": two special cases.
944	 * 1. If at root directory (e.g. after chroot)
945	 *    or at absolute root directory
946	 *    then ignore it so can't get out.
947	 * 2. If this vnode is the root of a mounted
948	 *    filesystem, then replace it with the
949	 *    vnode which was mounted on so we take the
950	 *    .. in the other file system.
951	 */
952	if ( (cnp->cn_flags & ISDOTDOT) ) {
953		for (;;) {
954		        if (dp == ndp->ni_rootdir || dp == rootvnode) {
955			        ndp->ni_dvp = dp;
956				ndp->ni_vp = dp;
957				/*
958				 * we're pinned at the root
959				 * we've already got one reference on 'dp'
960				 * courtesy of cache_lookup_path... take
961				 * another one for the ".."
962				 * if we fail to get the new reference, we'll
963				 * drop our original down in 'bad'
964				 */
965				if ( (vnode_get(dp)) ) {
966					error = ENOENT;
967					goto bad;
968				}
969				atroot = 1;
970				goto returned_from_lookup_path;
971			}
972			if ((dp->v_flag & VROOT) == 0 ||
973			    (cnp->cn_flags & NOCROSSMOUNT))
974			        break;
975			if (dp->v_mount == NULL) {	/* forced umount */
976			        error = EBADF;
977				goto bad;
978			}
979			tdp = dp;
980			dp = tdp->v_mount->mnt_vnodecovered;
981
982			vnode_put(tdp);
983
984			if ( (vnode_getwithref(dp)) ) {
985			        dp = NULLVP;
986				error = ENOENT;
987				goto bad;
988			}
989			ndp->ni_dvp = dp;
990			dp_authorized = 0;
991		}
992	}
993
994	/*
995	 * We now have a segment name to search for, and a directory to search.
996	 */
997unionlookup:
998	ndp->ni_vp = NULLVP;
999
1000	if (dp->v_type != VDIR) {
1001	        error = ENOTDIR;
1002	        goto lookup_error;
1003	}
1004	if ( (cnp->cn_flags & DONOTAUTH) != DONOTAUTH ) {
1005		error = lookup_authorize_search(dp, cnp, dp_authorized, ctx);
1006		if (error) {
1007			goto lookup_error;
1008		}
1009	}
1010
1011	/*
1012	 * Now that we've authorized a lookup, can bail out if the filesystem
1013	 * will be doing a batched operation.  Return an iocount on dvp.
1014	 */
1015#if NAMEDRSRCFORK
1016	if ((cnp->cn_flags & ISLASTCN) && namei_compound_available(dp, ndp) && !(cnp->cn_flags & CN_WANTSRSRCFORK)) {
1017#else
1018	if ((cnp->cn_flags & ISLASTCN) && namei_compound_available(dp, ndp)) {
1019#endif /* NAMEDRSRCFORK */
1020		ndp->ni_flag |= NAMEI_UNFINISHED;
1021		ndp->ni_ncgeneration = dp->v_nc_generation;
1022		return 0;
1023	}
1024
1025        nc_generation = dp->v_nc_generation;
1026
1027	error = VNOP_LOOKUP(dp, &ndp->ni_vp, cnp, ctx);
1028
1029
1030	if ( error ) {
1031lookup_error:
1032		if ((error == ENOENT) &&
1033		    (dp->v_flag & VROOT) && (dp->v_mount != NULL) &&
1034		    (dp->v_mount->mnt_flag & MNT_UNION)) {
1035#if CONFIG_VFS_FUNNEL
1036		        if ((cnp->cn_flags & FSNODELOCKHELD)) {
1037			        cnp->cn_flags &= ~FSNODELOCKHELD;
1038				unlock_fsnode(dp, NULL);
1039			}
1040#endif /* CONFIG_VFS_FUNNEL */
1041			tdp = dp;
1042			dp = tdp->v_mount->mnt_vnodecovered;
1043
1044			vnode_put(tdp);
1045
1046			if ( (vnode_getwithref(dp)) ) {
1047			        dp = NULLVP;
1048				error = ENOENT;
1049				goto bad;
1050			}
1051			ndp->ni_dvp = dp;
1052			dp_authorized = 0;
1053			goto unionlookup;
1054		}
1055
1056		if (error != EJUSTRETURN)
1057			goto bad;
1058
1059		if (ndp->ni_vp != NULLVP)
1060			panic("leaf should be empty");
1061
1062		error = lookup_validate_creation_path(ndp);
1063		if (error)
1064			goto bad;
1065		/*
1066		 * We return with ni_vp NULL to indicate that the entry
1067		 * doesn't currently exist, leaving a pointer to the
1068		 * referenced directory vnode in ndp->ni_dvp.
1069		 */
1070		if (cnp->cn_flags & SAVESTART) {
1071			if ( (vnode_get(ndp->ni_dvp)) ) {
1072				error = ENOENT;
1073				goto bad;
1074			}
1075			ndp->ni_startdir = ndp->ni_dvp;
1076		}
1077		if (!wantparent)
1078		        vnode_put(ndp->ni_dvp);
1079
1080		if (kdebug_enable)
1081		        kdebug_lookup(ndp->ni_dvp, cnp);
1082		return (0);
1083	}
1084returned_from_lookup_path:
1085	/* We'll always have an iocount on ni_vp when this finishes. */
1086	error = lookup_handle_found_vnode(ndp, cnp, rdonly, vbusyflags, &keep_going, nc_generation, wantparent, atroot, ctx);
1087	if (error != 0) {
1088		goto bad2;
1089	}
1090
1091	if (keep_going) {
1092		dp = ndp->ni_vp;
1093
1094		/* namei() will handle symlinks */
1095		if ((dp->v_type == VLNK) &&
1096				((cnp->cn_flags & FOLLOW) || (ndp->ni_flag & NAMEI_TRAILINGSLASH) || *ndp->ni_next == '/')) {
1097			return 0;
1098		}
1099
1100		/*
1101		 * Otherwise, there's more path to process.
1102		 * cache_lookup_path is now responsible for dropping io ref on dp
1103		 * when it is called again in the dirloop.  This ensures we hold
1104		 * a ref on dp until we complete the next round of lookup.
1105		 */
1106		last_dp = dp;
1107
1108		goto dirloop;
1109	}
1110
1111	return (0);
1112bad2:
1113#if CONFIG_VFS_FUNNEL
1114	if ((cnp->cn_flags & FSNODELOCKHELD)) {
1115	        cnp->cn_flags &= ~FSNODELOCKHELD;
1116		unlock_fsnode(ndp->ni_dvp, NULL);
1117	}
1118#endif /* CONFIG_VFS_FUNNEL */
1119	if (ndp->ni_dvp)
1120		vnode_put(ndp->ni_dvp);
1121
1122	vnode_put(ndp->ni_vp);
1123	ndp->ni_vp = NULLVP;
1124
1125	if (kdebug_enable)
1126	        kdebug_lookup(dp, cnp);
1127	return (error);
1128
1129bad:
1130#if CONFIG_VFS_FUNNEL
1131	if ((cnp->cn_flags & FSNODELOCKHELD)) {
1132	        cnp->cn_flags &= ~FSNODELOCKHELD;
1133		unlock_fsnode(ndp->ni_dvp, NULL);
1134	}
1135#endif /* CONFIG_VFS_FUNNEL */
1136	if (dp)
1137	        vnode_put(dp);
1138	ndp->ni_vp = NULLVP;
1139
1140	if (kdebug_enable)
1141	        kdebug_lookup(dp, cnp);
1142	return (error);
1143}
1144
1145int
1146lookup_validate_creation_path(struct nameidata *ndp)
1147{
1148	struct componentname *cnp = &ndp->ni_cnd;
1149
1150	/*
1151	 * If creating and at end of pathname, then can consider
1152	 * allowing file to be created.
1153	 */
1154	if (cnp->cn_flags & RDONLY) {
1155		return EROFS;
1156	}
1157	if ((cnp->cn_flags & ISLASTCN) && (ndp->ni_flag & NAMEI_TRAILINGSLASH) && !(cnp->cn_flags & WILLBEDIR)) {
1158		return ENOENT;
1159	}
1160
1161	return 0;
1162}
1163
1164/*
1165 * Modifies only ni_vp.  Always returns with ni_vp still valid (iocount held).
1166 */
1167int
1168lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vnode_t dp,
1169		int vbusyflags, vfs_context_t ctx)
1170{
1171	mount_t mp;
1172	vnode_t tdp;
1173	int error = 0;
1174	uthread_t uth;
1175	uint32_t depth = 0;
1176	int dont_cache_mp = 0;
1177	vnode_t	mounted_on_dp;
1178	int current_mount_generation = 0;
1179
1180	mounted_on_dp = dp;
1181	current_mount_generation = mount_generation;
1182
1183	while ((dp->v_type == VDIR) && dp->v_mountedhere &&
1184			((cnp->cn_flags & NOCROSSMOUNT) == 0)) {
1185#if CONFIG_TRIGGERS
1186		/*
1187		 * For a trigger vnode, call its resolver when crossing its mount (if requested)
1188		 */
1189		if (dp->v_resolve) {
1190			(void) vnode_trigger_resolve(dp, ndp, ctx);
1191		}
1192#endif
1193		vnode_lock(dp);
1194
1195		if ((dp->v_type == VDIR) && (mp = dp->v_mountedhere)) {
1196
1197			mp->mnt_crossref++;
1198			vnode_unlock(dp);
1199
1200
1201			if (vfs_busy(mp, vbusyflags)) {
1202				mount_dropcrossref(mp, dp, 0);
1203				if (vbusyflags == LK_NOWAIT) {
1204					error = ENOENT;
1205					goto out;
1206				}
1207
1208				continue;
1209			}
1210
1211
1212			/*
1213			 * XXX - if this is the last component of the
1214			 * pathname, and it's either not a lookup operation
1215			 * or the NOTRIGGER flag is set for the operation,
1216			 * set a uthread flag to let VFS_ROOT() for autofs
1217			 * know it shouldn't trigger a mount.
1218			 */
1219			uth = (struct uthread *)get_bsdthread_info(current_thread());
1220			if ((cnp->cn_flags & ISLASTCN) &&
1221					(cnp->cn_nameiop != LOOKUP ||
1222					 (cnp->cn_flags & NOTRIGGER))) {
1223				uth->uu_notrigger = 1;
1224				dont_cache_mp = 1;
1225			}
1226
1227			error = VFS_ROOT(mp, &tdp, ctx);
1228			/* XXX - clear the uthread flag */
1229			uth->uu_notrigger = 0;
1230
1231			mount_dropcrossref(mp, dp, 0);
1232			vfs_unbusy(mp);
1233
1234			if (error) {
1235				goto out;
1236			}
1237
1238			vnode_put(dp);
1239			ndp->ni_vp = dp = tdp;
1240			depth++;
1241
1242#if CONFIG_TRIGGERS
1243			/*
1244			 * Check if root dir is a trigger vnode
1245			 */
1246			if (dp->v_resolve) {
1247				error = vnode_trigger_resolve(dp, ndp, ctx);
1248				if (error) {
1249					goto out;
1250				}
1251			}
1252#endif
1253
1254		} else {
1255			vnode_unlock(dp);
1256			break;
1257		}
1258	}
1259
1260	if (depth && !dont_cache_mp) {
1261	        mp = mounted_on_dp->v_mountedhere;
1262
1263		if (mp) {
1264		        mount_lock_spin(mp);
1265			mp->mnt_realrootvp_vid = dp->v_id;
1266			mp->mnt_realrootvp = dp;
1267			mp->mnt_generation = current_mount_generation;
1268			mount_unlock(mp);
1269		}
1270	}
1271
1272	return 0;
1273
1274out:
1275	return error;
1276}
1277
1278/*
1279 * Takes ni_vp and ni_dvp non-NULL.  Returns with *new_dp set to the location
1280 * at which to start a lookup with a resolved path, and all other iocounts dropped.
1281 */
1282int
1283lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx)
1284{
1285	int error;
1286	char *cp;		/* pointer into pathname argument */
1287	uio_t auio;
1288	char uio_buf[ UIO_SIZEOF(1) ];
1289	int need_newpathbuf;
1290	u_int linklen;
1291	struct componentname *cnp = &ndp->ni_cnd;
1292	vnode_t dp;
1293	char *tmppn;
1294
1295#if CONFIG_VFS_FUNNEL
1296	if ((cnp->cn_flags & FSNODELOCKHELD)) {
1297		cnp->cn_flags &= ~FSNODELOCKHELD;
1298		unlock_fsnode(ndp->ni_dvp, NULL);
1299	}
1300#endif /* CONFIG_VFS_FUNNEL */
1301
1302	if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
1303		return ELOOP;
1304	}
1305#if CONFIG_MACF
1306	if ((error = mac_vnode_check_readlink(ctx, ndp->ni_vp)) != 0)
1307		return error;
1308#endif /* MAC */
1309	if (ndp->ni_pathlen > 1 || !(cnp->cn_flags & HASBUF))
1310		need_newpathbuf = 1;
1311	else
1312		need_newpathbuf = 0;
1313
1314	if (need_newpathbuf) {
1315		MALLOC_ZONE(cp, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
1316		if (cp == NULL) {
1317			return ENOMEM;
1318		}
1319	} else {
1320		cp = cnp->cn_pnbuf;
1321	}
1322	auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf));
1323
1324	uio_addiov(auio, CAST_USER_ADDR_T(cp), MAXPATHLEN);
1325
1326	error = VNOP_READLINK(ndp->ni_vp, auio, ctx);
1327	if (error) {
1328		if (need_newpathbuf)
1329			FREE_ZONE(cp, MAXPATHLEN, M_NAMEI);
1330		return error;
1331	}
1332
1333	/*
1334	 * Safe to set unsigned with a [larger] signed type here
1335	 * because 0 <= uio_resid <= MAXPATHLEN and MAXPATHLEN
1336	 * is only 1024.
1337	 */
1338	linklen = MAXPATHLEN - (u_int)uio_resid(auio);
1339	if (linklen + ndp->ni_pathlen > MAXPATHLEN) {
1340		if (need_newpathbuf)
1341			FREE_ZONE(cp, MAXPATHLEN, M_NAMEI);
1342
1343		return ENAMETOOLONG;
1344	}
1345	if (need_newpathbuf) {
1346		long len = cnp->cn_pnlen;
1347
1348		tmppn = cnp->cn_pnbuf;
1349		bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
1350		cnp->cn_pnbuf = cp;
1351		cnp->cn_pnlen = MAXPATHLEN;
1352
1353		if ( (cnp->cn_flags & HASBUF) )
1354			FREE_ZONE(tmppn, len, M_NAMEI);
1355		else
1356			cnp->cn_flags |= HASBUF;
1357	} else
1358		cnp->cn_pnbuf[linklen] = '\0';
1359
1360	ndp->ni_pathlen += linklen;
1361	cnp->cn_nameptr = cnp->cn_pnbuf;
1362
1363	/*
1364	 * starting point for 'relative'
1365	 * symbolic link path
1366	 */
1367	dp = ndp->ni_dvp;
1368
1369	/*
1370	 * get rid of references returned via 'lookup'
1371	 */
1372	vnode_put(ndp->ni_vp);
1373	vnode_put(ndp->ni_dvp);	/* ALWAYS have a dvp for a symlink */
1374
1375	ndp->ni_vp = NULLVP;
1376	ndp->ni_dvp = NULLVP;
1377
1378	/*
1379	 * Check if symbolic link restarts us at the root
1380	 */
1381	if (*(cnp->cn_nameptr) == '/') {
1382		while (*(cnp->cn_nameptr) == '/') {
1383			cnp->cn_nameptr++;
1384			ndp->ni_pathlen--;
1385		}
1386		if ((dp = ndp->ni_rootdir) == NULLVP) {
1387			return ENOENT;
1388		}
1389	}
1390
1391	*new_dp = dp;
1392
1393	return 0;
1394}
1395
1396/*
1397 * relookup - lookup a path name component
1398 *    Used by lookup to re-aquire things.
1399 */
1400int
1401relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
1402{
1403	struct vnode *dp = NULL;		/* the directory we are searching */
1404	int wantparent;			/* 1 => wantparent or lockparent flag */
1405	int rdonly;			/* lookup read-only flag bit */
1406	int error = 0;
1407#ifdef NAMEI_DIAGNOSTIC
1408	int i, newhash;			/* DEBUG: check name hash */
1409	char *cp;			/* DEBUG: check name ptr/len */
1410#endif
1411	vfs_context_t ctx = cnp->cn_context;;
1412
1413	/*
1414	 * Setup: break out flag bits into variables.
1415	 */
1416	wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT);
1417	rdonly = cnp->cn_flags & RDONLY;
1418	cnp->cn_flags &= ~ISSYMLINK;
1419
1420	if (cnp->cn_flags & NOCACHE)
1421	        cnp->cn_flags &= ~MAKEENTRY;
1422	else
1423	        cnp->cn_flags |= MAKEENTRY;
1424
1425	dp = dvp;
1426
1427	/*
1428	 * Check for degenerate name (e.g. / or "")
1429	 * which is a way of talking about a directory,
1430	 * e.g. like "/." or ".".
1431	 */
1432	if (cnp->cn_nameptr[0] == '\0') {
1433		if (cnp->cn_nameiop != LOOKUP || wantparent) {
1434			error = EISDIR;
1435			goto bad;
1436		}
1437		if (dp->v_type != VDIR) {
1438			error = ENOTDIR;
1439			goto bad;
1440		}
1441		if ( (vnode_get(dp)) ) {
1442		        error = ENOENT;
1443			goto bad;
1444		}
1445		*vpp = dp;
1446
1447		if (cnp->cn_flags & SAVESTART)
1448			panic("lookup: SAVESTART");
1449		return (0);
1450	}
1451	/*
1452	 * We now have a segment name to search for, and a directory to search.
1453	 */
1454	if ( (error = VNOP_LOOKUP(dp, vpp, cnp, ctx)) ) {
1455		if (error != EJUSTRETURN)
1456			goto bad;
1457#if DIAGNOSTIC
1458		if (*vpp != NULL)
1459			panic("leaf should be empty");
1460#endif
1461		/*
1462		 * If creating and at end of pathname, then can consider
1463		 * allowing file to be created.
1464		 */
1465		if (rdonly) {
1466			error = EROFS;
1467			goto bad;
1468		}
1469		/*
1470		 * We return with ni_vp NULL to indicate that the entry
1471		 * doesn't currently exist, leaving a pointer to the
1472		 * (possibly locked) directory inode in ndp->ni_dvp.
1473		 */
1474		return (0);
1475	}
1476	dp = *vpp;
1477
1478#if DIAGNOSTIC
1479	/*
1480	 * Check for symbolic link
1481	 */
1482	if (dp->v_type == VLNK && (cnp->cn_flags & FOLLOW))
1483		panic ("relookup: symlink found.\n");
1484#endif
1485
1486	/*
1487	 * Disallow directory write attempts on read-only file systems.
1488	 */
1489	if (rdonly &&
1490	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
1491		error = EROFS;
1492		goto bad2;
1493	}
1494	/* ASSERT(dvp == ndp->ni_startdir) */
1495
1496	return (0);
1497
1498bad2:
1499	vnode_put(dp);
1500bad:
1501	*vpp = NULL;
1502
1503	return (error);
1504}
1505
1506void
1507namei_unlock_fsnode(struct nameidata *ndp)
1508{
1509#if CONFIG_VFS_FUNNEL
1510	if ((ndp->ni_cnd.cn_flags & FSNODELOCKHELD)) {
1511	        ndp->ni_cnd.cn_flags &= ~FSNODELOCKHELD;
1512		unlock_fsnode(ndp->ni_dvp, NULL);
1513	}
1514#else
1515	(void)ndp;
1516#endif /* CONFIG_VFS_FUNNEL */
1517}
1518
1519/*
1520 * Free pathname buffer
1521 */
1522void
1523nameidone(struct nameidata *ndp)
1524{
1525	namei_unlock_fsnode(ndp);
1526
1527	if (ndp->ni_cnd.cn_flags & HASBUF) {
1528		char *tmp = ndp->ni_cnd.cn_pnbuf;
1529
1530		ndp->ni_cnd.cn_pnbuf = NULL;
1531		ndp->ni_cnd.cn_flags &= ~HASBUF;
1532		FREE_ZONE(tmp, ndp->ni_cnd.cn_pnlen, M_NAMEI);
1533	}
1534}
1535
1536
1537#define NUMPARMS 23
1538
1539/*
1540 * Log (part of) a pathname using the KERNEL_DEBUG_CONSTANT mechanism, as used
1541 * by fs_usage.  The path up to and including the current component name are
1542 * logged.  Up to NUMPARMS*4 bytes of pathname will be logged.  If the path
1543 * to be logged is longer than that, then the last NUMPARMS*4 bytes are logged.
1544 * That is, the truncation removes the leading portion of the path.
1545 *
1546 * The logging is done via multiple KERNEL_DEBUG_CONSTANT calls.  The first one
1547 * is marked with DBG_FUNC_START.  The last one is marked with DBG_FUNC_END
1548 * (in addition to DBG_FUNC_START if it is also the first).  There may be
1549 * intermediate ones with neither DBG_FUNC_START nor DBG_FUNC_END.
1550 *
1551 * The first KERNEL_DEBUG_CONSTANT passes the vnode pointer and 12 bytes of
1552 * pathname.  The remaining KERNEL_DEBUG_CONSTANT calls add 16 bytes of pathname
1553 * each.  The minimum number of KERNEL_DEBUG_CONSTANT calls required to pass
1554 * the path are used.  Any excess padding in the final KERNEL_DEBUG_CONSTANT
1555 * (because not all of the 12 or 16 bytes are needed for the remainder of the
1556 * path) is set to zero bytes, or '>' if there is more path beyond the
1557 * current component name (usually because an intermediate component was not
1558 * found).
1559 *
1560 * NOTE: If the path length is greater than NUMPARMS*4, or is not of the form
1561 * 12+N*16, there will be no padding.
1562 *
1563 * TODO: If there is more path beyond the current component name, should we
1564 * force some padding?  For example, a lookup for /foo_bar_baz/spam that
1565 * fails because /foo_bar_baz is not found will only log "/foo_bar_baz", with
1566 * no '>' padding.  But /foo_bar/spam would log "/foo_bar>>>>".
1567 */
1568#if (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST)
1569static void
1570kdebug_lookup(struct vnode *dp, struct componentname *cnp)
1571{
1572	unsigned int i;
1573	int code;
1574	int dbg_namelen;
1575	char *dbg_nameptr;
1576	long dbg_parms[NUMPARMS];
1577
1578	/* Collect the pathname for tracing */
1579	dbg_namelen = (cnp->cn_nameptr - cnp->cn_pnbuf) + cnp->cn_namelen;
1580	dbg_nameptr = cnp->cn_nameptr + cnp->cn_namelen;
1581
1582	if (dbg_namelen > (int)sizeof(dbg_parms))
1583		dbg_namelen = sizeof(dbg_parms);
1584	dbg_nameptr -= dbg_namelen;
1585
1586	/* Copy the (possibly truncated) path itself */
1587	memcpy(dbg_parms, dbg_nameptr, dbg_namelen);
1588
1589	/* Pad with '\0' or '>' */
1590	if (dbg_namelen < (int)sizeof(dbg_parms)) {
1591		memset((char *)dbg_parms + dbg_namelen,
1592		       *(cnp->cn_nameptr + cnp->cn_namelen) ? '>' : 0,
1593		       sizeof(dbg_parms) - dbg_namelen);
1594	}
1595
1596	/*
1597	 * In the event that we collect multiple, consecutive pathname
1598	 * entries, we must mark the start of the path's string and the end.
1599	 */
1600	code = (FSDBG_CODE(DBG_FSRW,36)) | DBG_FUNC_START;
1601
1602	if (dbg_namelen <= 12)
1603		code |= DBG_FUNC_END;
1604
1605	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, code, dp, dbg_parms[0], dbg_parms[1], dbg_parms[2], 0);
1606
1607	code &= ~DBG_FUNC_START;
1608
1609	for (i=3, dbg_namelen -= 12; dbg_namelen > 0; i+=4, dbg_namelen -= 16) {
1610		if (dbg_namelen <= 16)
1611			code |= DBG_FUNC_END;
1612
1613		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, code, dbg_parms[i], dbg_parms[i+1], dbg_parms[i+2], dbg_parms[i+3], 0);
1614	}
1615}
1616#else /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST) */
1617static void
1618kdebug_lookup(struct vnode *dp __unused, struct componentname *cnp __unused)
1619{
1620}
1621#endif /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST) */
1622
1623int
1624vfs_getbyid(fsid_t *fsid, ino64_t ino, vnode_t *vpp, vfs_context_t ctx)
1625{
1626	mount_t mp;
1627	int error;
1628
1629	mp = mount_lookupby_volfsid(fsid->val[0], 1);
1630	if (mp == NULL) {
1631		return EINVAL;
1632	}
1633
1634	/* Get the target vnode. */
1635	if (ino == 2) {
1636		error = VFS_ROOT(mp, vpp, ctx);
1637	} else {
1638		error = VFS_VGET(mp, ino, vpp, ctx);
1639	}
1640
1641	vfs_unbusy(mp);
1642	return error;
1643}
1644/*
1645 * Obtain the real path from a legacy volfs style path.
1646 *
1647 * Valid formats of input path:
1648 *
1649 *	"555/@"
1650 *	"555/2"
1651 *	"555/123456"
1652 *	"555/123456/foobar"
1653 *
1654 * Where:
1655 *	555 represents the volfs file system id
1656 *	'@' and '2' are aliases to the root of a file system
1657 *	123456 represents a file id
1658 *	"foobar" represents a file name
1659 */
1660#if CONFIG_VOLFS
1661static int
1662vfs_getrealpath(const char * path, char * realpath, size_t bufsize, vfs_context_t ctx)
1663{
1664	vnode_t vp;
1665	struct mount *mp = NULL;
1666	char  *str;
1667	char ch;
1668	uint32_t  id;
1669	ino64_t ino;
1670	int error;
1671	int length;
1672
1673	/* Get file system id and move str to next component. */
1674	id = strtoul(path, &str, 10);
1675	if (id == 0 || str[0] != '/') {
1676		return (EINVAL);
1677	}
1678	while (*str == '/') {
1679		str++;
1680	}
1681	ch = *str;
1682
1683	mp = mount_lookupby_volfsid(id, 1);
1684	if (mp == NULL) {
1685		return (EINVAL);  /* unexpected failure */
1686	}
1687	/* Check for an alias to a file system root. */
1688	if (ch == '@' && str[1] == '\0') {
1689		ino = 2;
1690		str++;
1691	} else {
1692		/* Get file id and move str to next component. */
1693	    ino = strtouq(str, &str, 10);
1694	}
1695
1696	/* Get the target vnode. */
1697	if (ino == 2) {
1698		error = VFS_ROOT(mp, &vp, ctx);
1699	} else {
1700		error = VFS_VGET(mp, ino, &vp, ctx);
1701	}
1702	vfs_unbusy(mp);
1703	if (error) {
1704		goto out;
1705	}
1706	realpath[0] = '\0';
1707
1708	/* Get the absolute path to this vnode. */
1709	error = build_path(vp, realpath, bufsize, &length, 0, ctx);
1710	vnode_put(vp);
1711
1712	if (error == 0 && *str != '\0') {
1713		int attempt = strlcat(realpath, str, MAXPATHLEN);
1714		if (attempt > MAXPATHLEN){
1715			error = ENAMETOOLONG;
1716		}
1717	}
1718out:
1719	return (error);
1720}
1721#endif
1722
1723void
1724lookup_compound_vnop_post_hook(int error, vnode_t dvp, vnode_t vp, struct nameidata *ndp, int did_create)
1725{
1726	if (error == 0 && vp == NULLVP) {
1727		panic("NULL vp with error == 0.\n");
1728	}
1729
1730	/*
1731	 * We don't want to do any of this if we didn't use the compound vnop
1732	 * to perform the lookup... i.e. if we're allowing and using the legacy pattern,
1733	 * where we did a full lookup.
1734	 */
1735	if ((ndp->ni_flag & NAMEI_COMPOUND_OP_MASK) == 0) {
1736		return;
1737	}
1738
1739	/*
1740	 * If we're going to continue the lookup, we'll handle
1741	 * all lookup-related updates at that time.
1742	 */
1743	if (error == EKEEPLOOKING) {
1744		return;
1745	}
1746
1747	/*
1748	 * Only audit or update cache for *found* vnodes.  For creation
1749	 * neither would happen in the non-compound-vnop case.
1750	 */
1751	if ((vp != NULLVP) && !did_create) {
1752		/*
1753		 * If MAKEENTRY isn't set, and we've done a successful compound VNOP,
1754		 * then we certainly don't want to update cache or identity.
1755		 */
1756		if ((error != 0) || (ndp->ni_cnd.cn_flags & MAKEENTRY)) {
1757			lookup_consider_update_cache(dvp, vp, &ndp->ni_cnd, ndp->ni_ncgeneration);
1758		}
1759		if (ndp->ni_cnd.cn_flags & AUDITVNPATH1)
1760			AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1761		else if (ndp->ni_cnd.cn_flags & AUDITVNPATH2)
1762			AUDIT_ARG(vnpath, vp, ARG_VNODE2);
1763	}
1764
1765	/*
1766	 * If you created (whether you opened or not), cut a lookup tracepoint
1767	 * for the parent dir (as would happen without a compound vnop).  Note: we may need
1768	 * a vnode despite failure in this case!
1769	 *
1770	 * If you did not create:
1771	 * 	Found child (succeeded or not): cut a tracepoint for the child.
1772	 * 	Did not find child: cut a tracepoint with the parent.
1773	 */
1774	if (kdebug_enable) {
1775	        kdebug_lookup(vp ? vp : dvp, &ndp->ni_cnd);
1776	}
1777}
1778