1139804Simp/*-
21541Srgrimes * Copyright (c) 1982, 1986, 1989, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes * (c) UNIX System Laboratories, Inc.
51541Srgrimes * All or some portions of this file are derived from material licensed
61541Srgrimes * to the University of California by American Telephone and Telegraph
71541Srgrimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with
81541Srgrimes * the permission of UNIX System Laboratories, Inc.
91541Srgrimes *
101541Srgrimes * Redistribution and use in source and binary forms, with or without
111541Srgrimes * modification, are permitted provided that the following conditions
121541Srgrimes * are met:
131541Srgrimes * 1. Redistributions of source code must retain the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer.
151541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
161541Srgrimes *    notice, this list of conditions and the following disclaimer in the
171541Srgrimes *    documentation and/or other materials provided with the distribution.
181541Srgrimes * 4. Neither the name of the University nor the names of its contributors
191541Srgrimes *    may be used to endorse or promote products derived from this software
201541Srgrimes *    without specific prior written permission.
211541Srgrimes *
221541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
231541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
241541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
251541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
261541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
271541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
281541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
291541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
301541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
311541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
321541Srgrimes * SUCH DAMAGE.
331541Srgrimes *
341541Srgrimes *	@(#)vfs_lookup.c	8.4 (Berkeley) 2/16/94
351541Srgrimes */
361541Srgrimes
37116182Sobrien#include <sys/cdefs.h>
38116182Sobrien__FBSDID("$FreeBSD$");
39116182Sobrien
40224778Srwatson#include "opt_capsicum.h"
41190759Srwatson#include "opt_kdtrace.h"
4213203Swollman#include "opt_ktrace.h"
4313203Swollman
441541Srgrimes#include <sys/param.h>
452112Swollman#include <sys/systm.h>
4669664Speter#include <sys/kernel.h>
47224778Srwatson#include <sys/capability.h>
48177785Skib#include <sys/fcntl.h>
49192895Sjamie#include <sys/jail.h>
5076166Smarkm#include <sys/lock.h>
5189316Salfred#include <sys/mutex.h>
521541Srgrimes#include <sys/namei.h>
531541Srgrimes#include <sys/vnode.h>
541541Srgrimes#include <sys/mount.h>
551541Srgrimes#include <sys/filedesc.h>
561541Srgrimes#include <sys/proc.h>
57190759Srwatson#include <sys/sdt.h>
58141471Sjhb#include <sys/syscallsubr.h>
59144613Sjeff#include <sys/sysctl.h>
601541Srgrimes#ifdef KTRACE
611541Srgrimes#include <sys/ktrace.h>
621541Srgrimes#endif
631541Srgrimes
64155334Srwatson#include <security/audit/audit.h>
65163606Srwatson#include <security/mac/mac_framework.h>
66155334Srwatson
6792751Sjeff#include <vm/uma.h>
6832011Sbde
69155168Sjeff#define	NAMEI_DIAGNOSTIC 1
70138345Sphk#undef NAMEI_DIAGNOSTIC
71138345Sphk
72190759SrwatsonSDT_PROVIDER_DECLARE(vfs);
73260817SavgSDT_PROBE_DEFINE3(vfs, namei, lookup, entry, "struct vnode *", "char *",
74190759Srwatson    "unsigned long");
75260817SavgSDT_PROBE_DEFINE2(vfs, namei, lookup, return, "int", "struct vnode *");
76190759Srwatson
771541Srgrimes/*
7869664Speter * Allocation zone for namei
7969664Speter */
8092751Sjeffuma_zone_t namei_zone;
81166167Skib/*
82166167Skib * Placeholder vnode for mp traversal
83166167Skib */
84166167Skibstatic struct vnode *vp_crossmp;
8569664Speter
8669664Speterstatic void
8769664Speternameiinit(void *dummy __unused)
8869664Speter{
89168138Srwatson
9092654Sjeff	namei_zone = uma_zcreate("NAMEI", MAXPATHLEN, NULL, NULL, NULL, NULL,
9192654Sjeff	    UMA_ALIGN_PTR, 0);
92211531Sjhb	getnewvnode("crossmp", NULL, &dead_vnodeops, &vp_crossmp);
93211531Sjhb	vn_lock(vp_crossmp, LK_EXCLUSIVE);
94176519Sattilio	VN_LOCK_ASHARE(vp_crossmp);
95211531Sjhb	VOP_UNLOCK(vp_crossmp, 0);
9669664Speter}
97177253SrwatsonSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL);
9869664Speter
99183520Sjhbstatic int lookup_shared = 1;
100144613SjeffSYSCTL_INT(_vfs, OID_AUTO, lookup_shared, CTLFLAG_RW, &lookup_shared, 0,
101144613Sjeff    "Enables/Disables shared locks for path name translation");
102183519SjhbTUNABLE_INT("vfs.lookup_shared", &lookup_shared);
103144613Sjeff
10469664Speter/*
105161010Srwatson * Convert a pathname into a pointer to a locked vnode.
1061541Srgrimes *
1071541Srgrimes * The FOLLOW flag is set when symbolic links are to be followed
1081541Srgrimes * when they occur at the end of the name translation process.
1091541Srgrimes * Symbolic links are always followed for all other pathname
1101541Srgrimes * components other than the last.
1111541Srgrimes *
1121541Srgrimes * The segflg defines whether the name is to be copied from user
1131541Srgrimes * space or kernel space.
1141541Srgrimes *
1151541Srgrimes * Overall outline of namei:
1161541Srgrimes *
1171541Srgrimes *	copy in name
1181541Srgrimes *	get starting directory
1191541Srgrimes *	while (!done && !error) {
1201541Srgrimes *		call lookup to search path.
1211541Srgrimes *		if symbolic link, massage name in buffer and continue
1221541Srgrimes *	}
1231541Srgrimes */
124273414Sdelphijstatic void
125273414Sdelphijnamei_cleanup_cnp(struct componentname *cnp)
126273414Sdelphij{
127273414Sdelphij	uma_zfree(namei_zone, cnp->cn_pnbuf);
128273414Sdelphij#ifdef DIAGNOSTIC
129273414Sdelphij	cnp->cn_pnbuf = NULL;
130273414Sdelphij	cnp->cn_nameptr = NULL;
131273414Sdelphij#endif
132273414Sdelphij}
133273414Sdelphij
1341541Srgrimesint
135161011Srwatsonnamei(struct nameidata *ndp)
1361541Srgrimes{
137161011Srwatson	struct filedesc *fdp;	/* pointer to file descriptor state */
138161011Srwatson	char *cp;		/* pointer into pathname argument */
139161011Srwatson	struct vnode *dp;	/* the directory we are searching */
1401541Srgrimes	struct iovec aiov;		/* uio for reading symbolic links */
1411541Srgrimes	struct uio auio;
1421541Srgrimes	int error, linklen;
1431541Srgrimes	struct componentname *cnp = &ndp->ni_cnd;
14483366Sjulian	struct thread *td = cnp->cn_thread;
14583366Sjulian	struct proc *p = td->td_proc;
1461541Srgrimes
14791419Sjhb	ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_thread->td_ucred;
14883366Sjulian	KASSERT(cnp->cn_cred && p, ("namei: bad cred/proc"));
14942408Seivind	KASSERT((cnp->cn_nameiop & (~OPMASK)) == 0,
15042453Seivind	    ("namei: nameiop contaminated with flags"));
15142408Seivind	KASSERT((cnp->cn_flags & OPMASK) == 0,
15242453Seivind	    ("namei: flags contaminated with nameiops"));
153144613Sjeff	if (!lookup_shared)
154144613Sjeff		cnp->cn_flags &= ~LOCKSHARED;
15583366Sjulian	fdp = p->p_fd;
1561541Srgrimes
157193028Sdes	/* We will set this ourselves if we need it. */
158193028Sdes	cnp->cn_flags &= ~TRAILINGSLASH;
159193028Sdes
1601541Srgrimes	/*
1611541Srgrimes	 * Get a buffer for the name to be translated, and copy the
1621541Srgrimes	 * name into the buffer.
1631541Srgrimes	 */
1641541Srgrimes	if ((cnp->cn_flags & HASBUF) == 0)
165111119Simp		cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
1661541Srgrimes	if (ndp->ni_segflg == UIO_SYSSPACE)
1671541Srgrimes		error = copystr(ndp->ni_dirp, cnp->cn_pnbuf,
16836735Sdfr			    MAXPATHLEN, (size_t *)&ndp->ni_pathlen);
1691541Srgrimes	else
1701541Srgrimes		error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf,
17136735Sdfr			    MAXPATHLEN, (size_t *)&ndp->ni_pathlen);
17220069Sbde
17320069Sbde	/*
17420069Sbde	 * Don't allow empty pathnames.
17520069Sbde	 */
17620069Sbde	if (!error && *cnp->cn_pnbuf == '\0')
17720069Sbde		error = ENOENT;
17820069Sbde
179224810Sjonathan#ifdef CAPABILITY_MODE
180224810Sjonathan	/*
181224810Sjonathan	 * In capability mode, lookups must be "strictly relative" (i.e.
182224810Sjonathan	 * not an absolute path, and not containing '..' components) to
183224810Sjonathan	 * a real file descriptor, not the pseudo-descriptor AT_FDCWD.
184224810Sjonathan	 */
185253969Skib	if (error == 0 && IN_CAPABILITY_MODE(td) &&
186253969Skib	    (cnp->cn_flags & NOCAPCHECK) == 0) {
187224810Sjonathan		ndp->ni_strictrelative = 1;
188226495Sdes		if (ndp->ni_dirfd == AT_FDCWD) {
189226495Sdes#ifdef KTRACE
190226495Sdes			if (KTRPOINT(td, KTR_CAPFAIL))
191255677Spjd				ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL);
192226495Sdes#endif
193224810Sjonathan			error = ECAPMODE;
194226495Sdes		}
195224810Sjonathan	}
196224810Sjonathan#endif
1971541Srgrimes	if (error) {
198273414Sdelphij		namei_cleanup_cnp(cnp);
1991541Srgrimes		ndp->ni_vp = NULL;
2001541Srgrimes		return (error);
2011541Srgrimes	}
2021541Srgrimes	ndp->ni_loopcnt = 0;
2031541Srgrimes#ifdef KTRACE
20497994Sjhb	if (KTRPOINT(td, KTR_NAMEI)) {
20597994Sjhb		KASSERT(cnp->cn_thread == curthread,
20697994Sjhb		    ("namei not using curthread"));
20797994Sjhb		ktrnamei(cnp->cn_pnbuf);
20897994Sjhb	}
2091541Srgrimes#endif
2101541Srgrimes	/*
2111541Srgrimes	 * Get starting point for the translation.
2121541Srgrimes	 */
213168355Srwatson	FILEDESC_SLOCK(fdp);
21433360Sdyson	ndp->ni_rootdir = fdp->fd_rdir;
21551649Sphk	ndp->ni_topdir = fdp->fd_jdir;
21633360Sdyson
217243726Spjd	/*
218243726Spjd	 * If we are auditing the kernel pathname, save the user pathname.
219243726Spjd	 */
220243726Spjd	if (cnp->cn_flags & AUDITVNODE1)
221243746Spjd		AUDIT_ARG_UPATH1(td, ndp->ni_dirfd, cnp->cn_pnbuf);
222243726Spjd	if (cnp->cn_flags & AUDITVNODE2)
223243746Spjd		AUDIT_ARG_UPATH2(td, ndp->ni_dirfd, cnp->cn_pnbuf);
224243726Spjd
225185029Spjd	dp = NULL;
226185029Spjd	if (cnp->cn_pnbuf[0] != '/') {
227185029Spjd		if (ndp->ni_startdir != NULL) {
228185029Spjd			dp = ndp->ni_startdir;
229185029Spjd			error = 0;
230195925Srwatson		} else if (ndp->ni_dirfd != AT_FDCWD) {
231255219Spjd			cap_rights_t rights;
232255219Spjd
233255219Spjd			rights = ndp->ni_rightsneeded;
234255219Spjd			cap_rights_set(&rights, CAP_LOOKUP);
235255219Spjd
236195925Srwatson			if (cnp->cn_flags & AUDITVNODE1)
237195925Srwatson				AUDIT_ARG_ATFD1(ndp->ni_dirfd);
238195925Srwatson			if (cnp->cn_flags & AUDITVNODE2)
239195925Srwatson				AUDIT_ARG_ATFD2(ndp->ni_dirfd);
240224810Sjonathan			error = fgetvp_rights(td, ndp->ni_dirfd,
241255219Spjd			    &rights, &ndp->ni_filecaps, &dp);
242224810Sjonathan#ifdef CAPABILITIES
243224810Sjonathan			/*
244247602Spjd			 * If file descriptor doesn't have all rights,
245247602Spjd			 * all lookups relative to it must also be
246224810Sjonathan			 * strictly relative.
247224810Sjonathan			 */
248255219Spjd			CAP_ALL(&rights);
249255219Spjd			if (!cap_rights_contains(&ndp->ni_filecaps.fc_rights,
250255219Spjd			    &rights) ||
251247602Spjd			    ndp->ni_filecaps.fc_fcntls != CAP_FCNTL_ALL ||
252247602Spjd			    ndp->ni_filecaps.fc_nioctls != -1) {
253224810Sjonathan				ndp->ni_strictrelative = 1;
254247602Spjd			}
255224778Srwatson#endif
256195925Srwatson		}
257185029Spjd		if (error != 0 || dp != NULL) {
258185029Spjd			FILEDESC_SUNLOCK(fdp);
259185029Spjd			if (error == 0 && dp->v_type != VDIR) {
260185029Spjd				vrele(dp);
261185029Spjd				error = ENOTDIR;
262185029Spjd			}
263177785Skib		}
264177785Skib		if (error) {
265273414Sdelphij			namei_cleanup_cnp(cnp);
266177785Skib			return (error);
267177785Skib		}
268185029Spjd	}
269185029Spjd	if (dp == NULL) {
270177785Skib		dp = fdp->fd_cdir;
271177785Skib		VREF(dp);
272177785Skib		FILEDESC_SUNLOCK(fdp);
273241896Skib		if (ndp->ni_startdir != NULL)
274185029Spjd			vrele(ndp->ni_startdir);
275177785Skib	}
276190759Srwatson	SDT_PROBE(vfs, namei, lookup, entry, dp, cnp->cn_pnbuf,
277190759Srwatson	    cnp->cn_flags, 0, 0);
2781541Srgrimes	for (;;) {
2791541Srgrimes		/*
2801541Srgrimes		 * Check if root directory should replace current directory.
2811541Srgrimes		 * Done at start of translation and after symbolic link.
2821541Srgrimes		 */
2831541Srgrimes		cnp->cn_nameptr = cnp->cn_pnbuf;
2841541Srgrimes		if (*(cnp->cn_nameptr) == '/') {
2851541Srgrimes			vrele(dp);
286226495Sdes			if (ndp->ni_strictrelative != 0) {
287226495Sdes#ifdef KTRACE
288226495Sdes				if (KTRPOINT(curthread, KTR_CAPFAIL))
289255677Spjd					ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL);
290226495Sdes#endif
291273414Sdelphij				namei_cleanup_cnp(cnp);
292224810Sjonathan				return (ENOTCAPABLE);
293226495Sdes			}
2941541Srgrimes			while (*(cnp->cn_nameptr) == '/') {
2951541Srgrimes				cnp->cn_nameptr++;
2961541Srgrimes				ndp->ni_pathlen--;
2971541Srgrimes			}
2981541Srgrimes			dp = ndp->ni_rootdir;
2991541Srgrimes			VREF(dp);
3001541Srgrimes		}
3011541Srgrimes		ndp->ni_startdir = dp;
3023148Sphk		error = lookup(ndp);
3033148Sphk		if (error) {
304273414Sdelphij			namei_cleanup_cnp(cnp);
305190759Srwatson			SDT_PROBE(vfs, namei, lookup, return, error, NULL, 0,
306190759Srwatson			    0, 0);
3071541Srgrimes			return (error);
3081541Srgrimes		}
3091541Srgrimes		/*
310193027Sdes		 * If not a symbolic link, we're done.
3111541Srgrimes		 */
3121541Srgrimes		if ((cnp->cn_flags & ISSYMLINK) == 0) {
313100613Srwatson			if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) {
314273414Sdelphij				namei_cleanup_cnp(cnp);
315100613Srwatson			} else
3161541Srgrimes				cnp->cn_flags |= HASBUF;
31732286Sdyson
318190759Srwatson			SDT_PROBE(vfs, namei, lookup, return, 0, ndp->ni_vp,
319190759Srwatson			    0, 0, 0);
3201541Srgrimes			return (0);
3211541Srgrimes		}
3221541Srgrimes		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
3231541Srgrimes			error = ELOOP;
3241541Srgrimes			break;
3251541Srgrimes		}
326101127Srwatson#ifdef MAC
327105479Srwatson		if ((cnp->cn_flags & NOMACCHECK) == 0) {
328172930Srwatson			error = mac_vnode_check_readlink(td->td_ucred,
329105479Srwatson			    ndp->ni_vp);
330105479Srwatson			if (error)
331105479Srwatson				break;
332105479Srwatson		}
333101127Srwatson#endif
3341541Srgrimes		if (ndp->ni_pathlen > 1)
335111119Simp			cp = uma_zalloc(namei_zone, M_WAITOK);
3361541Srgrimes		else
3371541Srgrimes			cp = cnp->cn_pnbuf;
3381541Srgrimes		aiov.iov_base = cp;
3391541Srgrimes		aiov.iov_len = MAXPATHLEN;
3401541Srgrimes		auio.uio_iov = &aiov;
3411541Srgrimes		auio.uio_iovcnt = 1;
3421541Srgrimes		auio.uio_offset = 0;
3431541Srgrimes		auio.uio_rw = UIO_READ;
3441541Srgrimes		auio.uio_segflg = UIO_SYSSPACE;
345247116Sjhb		auio.uio_td = td;
3461541Srgrimes		auio.uio_resid = MAXPATHLEN;
3473148Sphk		error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
3483148Sphk		if (error) {
3491541Srgrimes			if (ndp->ni_pathlen > 1)
35092751Sjeff				uma_zfree(namei_zone, cp);
3511541Srgrimes			break;
3521541Srgrimes		}
3531541Srgrimes		linklen = MAXPATHLEN - auio.uio_resid;
35478692Sdillon		if (linklen == 0) {
35578692Sdillon			if (ndp->ni_pathlen > 1)
35692751Sjeff				uma_zfree(namei_zone, cp);
35778692Sdillon			error = ENOENT;
35878692Sdillon			break;
35978692Sdillon		}
3601541Srgrimes		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
3611541Srgrimes			if (ndp->ni_pathlen > 1)
36292751Sjeff				uma_zfree(namei_zone, cp);
3631541Srgrimes			error = ENAMETOOLONG;
3641541Srgrimes			break;
3651541Srgrimes		}
3661541Srgrimes		if (ndp->ni_pathlen > 1) {
3671541Srgrimes			bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
36892751Sjeff			uma_zfree(namei_zone, cnp->cn_pnbuf);
3691541Srgrimes			cnp->cn_pnbuf = cp;
3701541Srgrimes		} else
3711541Srgrimes			cnp->cn_pnbuf[linklen] = '\0';
3721541Srgrimes		ndp->ni_pathlen += linklen;
3731541Srgrimes		vput(ndp->ni_vp);
3741541Srgrimes		dp = ndp->ni_dvp;
3751541Srgrimes	}
376273414Sdelphij	namei_cleanup_cnp(cnp);
377144833Sjeff	vput(ndp->ni_vp);
378144833Sjeff	ndp->ni_vp = NULL;
3791541Srgrimes	vrele(ndp->ni_dvp);
380190759Srwatson	SDT_PROBE(vfs, namei, lookup, return, error, NULL, 0, 0, 0);
3811541Srgrimes	return (error);
3821541Srgrimes}
3831541Srgrimes
384162288Smohansstatic int
385240283Skibcompute_cn_lkflags(struct mount *mp, int lkflags, int cnflags)
386162288Smohans{
387184597Sjhb
388240283Skib	if (mp == NULL || ((lkflags & LK_SHARED) &&
389240283Skib	    (!(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED) ||
390240283Skib	    ((cnflags & ISDOTDOT) &&
391240283Skib	    (mp->mnt_kern_flag & MNTK_LOOKUP_EXCL_DOTDOT))))) {
392162288Smohans		lkflags &= ~LK_SHARED;
393162288Smohans		lkflags |= LK_EXCLUSIVE;
394162288Smohans	}
395184597Sjhb	return (lkflags);
396162288Smohans}
397162288Smohans
398189696Sjhbstatic __inline int
399189696Sjhbneeds_exclusive_leaf(struct mount *mp, int flags)
400189696Sjhb{
401189696Sjhb
402189696Sjhb	/*
403189696Sjhb	 * Intermediate nodes can use shared locks, we only need to
404189696Sjhb	 * force an exclusive lock for leaf nodes.
405189696Sjhb	 */
406189696Sjhb	if ((flags & (ISLASTCN | LOCKLEAF)) != (ISLASTCN | LOCKLEAF))
407189696Sjhb		return (0);
408189696Sjhb
409189696Sjhb	/* Always use exclusive locks if LOCKSHARED isn't set. */
410189696Sjhb	if (!(flags & LOCKSHARED))
411189696Sjhb		return (1);
412189696Sjhb
413189696Sjhb	/*
414189696Sjhb	 * For lookups during open(), if the mount point supports
415189696Sjhb	 * extended shared operations, then use a shared lock for the
416189696Sjhb	 * leaf node, otherwise use an exclusive lock.
417189696Sjhb	 */
418259294Skib	if ((flags & ISOPEN) != 0)
419259294Skib		return (!MNT_EXTENDED_SHARED(mp));
420189696Sjhb
421189696Sjhb	/*
422189696Sjhb	 * Lookup requests outside of open() that specify LOCKSHARED
423189696Sjhb	 * only need a shared lock on the leaf vnode.
424189696Sjhb	 */
425189697Sjhb	return (0);
426189696Sjhb}
427189696Sjhb
4281541Srgrimes/*
4291541Srgrimes * Search a pathname.
4301541Srgrimes * This is a very central and rather complicated routine.
4311541Srgrimes *
4321541Srgrimes * The pathname is pointed to by ni_ptr and is of length ni_pathlen.
4331541Srgrimes * The starting directory is taken from ni_startdir. The pathname is
4341541Srgrimes * descended until done, or a symbolic link is encountered. The variable
4351541Srgrimes * ni_more is clear if the path is completed; it is set to one if a
4361541Srgrimes * symbolic link needing interpretation is encountered.
4371541Srgrimes *
4381541Srgrimes * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
4391541Srgrimes * whether the name is to be looked up, created, renamed, or deleted.
4401541Srgrimes * When CREATE, RENAME, or DELETE is specified, information usable in
4411541Srgrimes * creating, renaming, or deleting a directory entry may be calculated.
4421541Srgrimes * If flag has LOCKPARENT or'ed into it, the parent directory is returned
4431541Srgrimes * locked. If flag has WANTPARENT or'ed into it, the parent directory is
4441541Srgrimes * returned unlocked. Otherwise the parent directory is not returned. If
4451541Srgrimes * the target of the pathname exists and LOCKLEAF is or'ed into the flag
4461541Srgrimes * the target is returned locked, otherwise it is returned unlocked.
4471541Srgrimes * When creating or renaming and LOCKPARENT is specified, the target may not
4481541Srgrimes * be ".".  When deleting and LOCKPARENT is specified, the target may be ".".
4498876Srgrimes *
4501541Srgrimes * Overall outline of lookup:
4511541Srgrimes *
4521541Srgrimes * dirloop:
4531541Srgrimes *	identify next component of name at ndp->ni_ptr
4541541Srgrimes *	handle degenerate case where name is null string
4551541Srgrimes *	if .. and crossing mount points and on mounted filesys, find parent
4561541Srgrimes *	call VOP_LOOKUP routine for next component name
4571541Srgrimes *	    directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set
4581541Srgrimes *	    component vnode returned in ni_vp (if it exists), locked.
4591541Srgrimes *	if result vnode is mounted on and crossing mount points,
4601541Srgrimes *	    find mounted on vnode
4611541Srgrimes *	if more components of name, do next level at dirloop
4621541Srgrimes *	return the answer in ni_vp, locked if LOCKLEAF set
4631541Srgrimes *	    if LOCKPARENT set, return locked parent in ni_dvp
4641541Srgrimes *	    if WANTPARENT set, return unlocked parent in ni_dvp
4651541Srgrimes */
4661541Srgrimesint
467161011Srwatsonlookup(struct nameidata *ndp)
4681541Srgrimes{
469161011Srwatson	char *cp;		/* pointer into pathname argument */
470161011Srwatson	struct vnode *dp = 0;	/* the directory we are searching */
4711541Srgrimes	struct vnode *tdp;		/* saved dp */
4721541Srgrimes	struct mount *mp;		/* mount table entry */
473192895Sjamie	struct prison *pr;
4741541Srgrimes	int docache;			/* == 0 do not cache last component */
4751541Srgrimes	int wantparent;			/* 1 => wantparent or lockparent flag */
4761541Srgrimes	int rdonly;			/* lookup read-only flag bit */
4771541Srgrimes	int error = 0;
47865805Sbp	int dpunlocked = 0;		/* dp has already been unlocked */
4791541Srgrimes	struct componentname *cnp = &ndp->ni_cnd;
480162288Smohans	int lkflags_save;
481229185Skib	int ni_dvp_unlocked;
482162288Smohans
4831541Srgrimes	/*
4841541Srgrimes	 * Setup: break out flag bits into variables.
4851541Srgrimes	 */
486229185Skib	ni_dvp_unlocked = 0;
4871541Srgrimes	wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
488144229Sjeff	KASSERT(cnp->cn_nameiop == LOOKUP || wantparent,
489144229Sjeff	    ("CREATE, DELETE, RENAME require LOCKPARENT or WANTPARENT."));
4901541Srgrimes	docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE;
4911541Srgrimes	if (cnp->cn_nameiop == DELETE ||
49222874Sbde	    (wantparent && cnp->cn_nameiop != CREATE &&
49322874Sbde	     cnp->cn_nameiop != LOOKUP))
4941541Srgrimes		docache = 0;
4951541Srgrimes	rdonly = cnp->cn_flags & RDONLY;
496144286Sjeff	cnp->cn_flags &= ~ISSYMLINK;
4971541Srgrimes	ndp->ni_dvp = NULL;
498144286Sjeff	/*
499144286Sjeff	 * We use shared locks until we hit the parent of the last cn then
500144286Sjeff	 * we adjust based on the requesting flags.
501144286Sjeff	 */
502144613Sjeff	if (lookup_shared)
503144613Sjeff		cnp->cn_lkflags = LK_SHARED;
504144613Sjeff	else
505144613Sjeff		cnp->cn_lkflags = LK_EXCLUSIVE;
5061541Srgrimes	dp = ndp->ni_startdir;
5071541Srgrimes	ndp->ni_startdir = NULLVP;
508175202Sattilio	vn_lock(dp,
509240283Skib	    compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY,
510240283Skib	    cnp->cn_flags));
5111541Srgrimes
5121541Srgrimesdirloop:
5131541Srgrimes	/*
5141541Srgrimes	 * Search a new directory.
5151541Srgrimes	 *
5161541Srgrimes	 * The last component of the filename is left accessible via
5171541Srgrimes	 * cnp->cn_nameptr for callers that need the name. Callers needing
5181541Srgrimes	 * the name set the SAVENAME flag. When done, they assume
5191541Srgrimes	 * responsibility for freeing the pathname buffer.
5201541Srgrimes	 */
5211541Srgrimes	cnp->cn_consume = 0;
5221541Srgrimes	for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++)
52351906Sphk		continue;
5241541Srgrimes	cnp->cn_namelen = cp - cnp->cn_nameptr;
5251541Srgrimes	if (cnp->cn_namelen > NAME_MAX) {
5261541Srgrimes		error = ENAMETOOLONG;
5271541Srgrimes		goto bad;
5281541Srgrimes	}
5291541Srgrimes#ifdef NAMEI_DIAGNOSTIC
5301541Srgrimes	{ char c = *cp;
5311541Srgrimes	*cp = '\0';
5321541Srgrimes	printf("{%s}: ", cnp->cn_nameptr);
5331541Srgrimes	*cp = c; }
5341541Srgrimes#endif
5351541Srgrimes	ndp->ni_pathlen -= cnp->cn_namelen;
5361541Srgrimes	ndp->ni_next = cp;
5379804Sbde
5389804Sbde	/*
5399804Sbde	 * Replace multiple slashes by a single slash and trailing slashes
5409804Sbde	 * by a null.  This must be done before VOP_LOOKUP() because some
5419804Sbde	 * fs's don't know about trailing slashes.  Remember if there were
5429804Sbde	 * trailing slashes to handle symlinks, existing non-directories
5439804Sbde	 * and non-existing files that won't be directories specially later.
5449804Sbde	 */
5459804Sbde	while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) {
5469804Sbde		cp++;
5479804Sbde		ndp->ni_pathlen--;
5489804Sbde		if (*cp == '\0') {
549193557Sdes			*ndp->ni_next = '\0';
550193028Sdes			cnp->cn_flags |= TRAILINGSLASH;
5519804Sbde		}
5529804Sbde	}
5539804Sbde	ndp->ni_next = cp;
5549804Sbde
5551541Srgrimes	cnp->cn_flags |= MAKEENTRY;
5561541Srgrimes	if (*cp == '\0' && docache == 0)
5571541Srgrimes		cnp->cn_flags &= ~MAKEENTRY;
5581541Srgrimes	if (cnp->cn_namelen == 2 &&
5591541Srgrimes	    cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')
5601541Srgrimes		cnp->cn_flags |= ISDOTDOT;
5611541Srgrimes	else
5621541Srgrimes		cnp->cn_flags &= ~ISDOTDOT;
5631541Srgrimes	if (*ndp->ni_next == 0)
5641541Srgrimes		cnp->cn_flags |= ISLASTCN;
5651541Srgrimes	else
5661541Srgrimes		cnp->cn_flags &= ~ISLASTCN;
5671541Srgrimes
568199137Skib	if ((cnp->cn_flags & ISLASTCN) != 0 &&
569199137Skib	    cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.' &&
570199137Skib	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
571199137Skib		error = EINVAL;
572199137Skib		goto bad;
573199137Skib	}
5741541Srgrimes
5751541Srgrimes	/*
5761541Srgrimes	 * Check for degenerate name (e.g. / or "")
5771541Srgrimes	 * which is a way of talking about a directory,
5781541Srgrimes	 * e.g. like "/." or ".".
5791541Srgrimes	 */
5801541Srgrimes	if (cnp->cn_nameptr[0] == '\0') {
58122521Sdyson		if (dp->v_type != VDIR) {
58222521Sdyson			error = ENOTDIR;
58322521Sdyson			goto bad;
58422521Sdyson		}
5851541Srgrimes		if (cnp->cn_nameiop != LOOKUP) {
5861541Srgrimes			error = EISDIR;
5871541Srgrimes			goto bad;
5881541Srgrimes		}
5891541Srgrimes		if (wantparent) {
5901541Srgrimes			ndp->ni_dvp = dp;
5911541Srgrimes			VREF(dp);
5921541Srgrimes		}
5931541Srgrimes		ndp->ni_vp = dp;
594155334Srwatson
595155334Srwatson		if (cnp->cn_flags & AUDITVNODE1)
596195926Srwatson			AUDIT_ARG_VNODE1(dp);
597155334Srwatson		else if (cnp->cn_flags & AUDITVNODE2)
598195926Srwatson			AUDIT_ARG_VNODE2(dp);
599155334Srwatson
6001541Srgrimes		if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF)))
601175294Sattilio			VOP_UNLOCK(dp, 0);
60254655Seivind		/* XXX This should probably move to the top of function. */
6031541Srgrimes		if (cnp->cn_flags & SAVESTART)
6041541Srgrimes			panic("lookup: SAVESTART");
605140714Sjeff		goto success;
6061541Srgrimes	}
6071541Srgrimes
6081541Srgrimes	/*
609224810Sjonathan	 * Handle "..": five special cases.
610224810Sjonathan	 * 0. If doing a capability lookup, return ENOTCAPABLE (this is a
611224810Sjonathan	 *    fairly conservative design choice, but it's the only one that we
612224810Sjonathan	 *    are satisfied guarantees the property we're looking for).
613154649Struckman	 * 1. Return an error if this is the last component of
614154649Struckman	 *    the name and the operation is DELETE or RENAME.
615154649Struckman	 * 2. If at root directory (e.g. after chroot)
6161541Srgrimes	 *    or at absolute root directory
6171541Srgrimes	 *    then ignore it so can't get out.
618154649Struckman	 * 3. If this vnode is the root of a mounted
6191541Srgrimes	 *    filesystem, then replace it with the
6201541Srgrimes	 *    vnode which was mounted on so we take the
62196755Strhodes	 *    .. in the other filesystem.
622154649Struckman	 * 4. If the vnode is the top directory of
62351649Sphk	 *    the jail or chroot, don't let them out.
6241541Srgrimes	 */
6251541Srgrimes	if (cnp->cn_flags & ISDOTDOT) {
626224810Sjonathan		if (ndp->ni_strictrelative != 0) {
627226495Sdes#ifdef KTRACE
628226495Sdes			if (KTRPOINT(curthread, KTR_CAPFAIL))
629255677Spjd				ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL);
630226495Sdes#endif
631224810Sjonathan			error = ENOTCAPABLE;
632224810Sjonathan			goto bad;
633224810Sjonathan		}
634154649Struckman		if ((cnp->cn_flags & ISLASTCN) != 0 &&
635154649Struckman		    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
636154690Struckman			error = EINVAL;
637154649Struckman			goto bad;
638154649Struckman		}
6391541Srgrimes		for (;;) {
640192895Sjamie			for (pr = cnp->cn_cred->cr_prison; pr != NULL;
641192895Sjamie			     pr = pr->pr_parent)
642192895Sjamie				if (dp == pr->pr_root)
643192895Sjamie					break;
64451649Sphk			if (dp == ndp->ni_rootdir ||
64551649Sphk			    dp == ndp->ni_topdir ||
646166744Skib			    dp == rootvnode ||
647192895Sjamie			    pr != NULL ||
648166744Skib			    ((dp->v_vflag & VV_ROOT) != 0 &&
649166744Skib			     (cnp->cn_flags & NOCROSSMOUNT) != 0)) {
6501541Srgrimes				ndp->ni_dvp = dp;
6511541Srgrimes				ndp->ni_vp = dp;
6521541Srgrimes				VREF(dp);
6531541Srgrimes				goto nextname;
6541541Srgrimes			}
655166744Skib			if ((dp->v_vflag & VV_ROOT) == 0)
6561541Srgrimes				break;
657155385Sjeff			if (dp->v_iflag & VI_DOOMED) {	/* forced unmount */
658190387Sjhb				error = ENOENT;
65969405Salfred				goto bad;
66069405Salfred			}
6611541Srgrimes			tdp = dp;
662144833Sjeff			dp = dp->v_mount->mnt_vnodecovered;
663144833Sjeff			VREF(dp);
6641541Srgrimes			vput(tdp);
665175202Sattilio			vn_lock(dp,
666175202Sattilio			    compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags |
667240283Skib			    LK_RETRY, ISDOTDOT));
6681541Srgrimes		}
6691541Srgrimes	}
6701541Srgrimes
6711541Srgrimes	/*
6721541Srgrimes	 * We now have a segment name to search for, and a directory to search.
6731541Srgrimes	 */
6741541Srgrimesunionlookup:
675101127Srwatson#ifdef MAC
676105479Srwatson	if ((cnp->cn_flags & NOMACCHECK) == 0) {
677191990Sattilio		error = mac_vnode_check_lookup(cnp->cn_thread->td_ucred, dp,
678191990Sattilio		    cnp);
679105479Srwatson		if (error)
680105479Srwatson			goto bad;
681105479Srwatson	}
682101127Srwatson#endif
6831541Srgrimes	ndp->ni_dvp = dp;
68422521Sdyson	ndp->ni_vp = NULL;
68524624Sdfr	ASSERT_VOP_LOCKED(dp, "lookup");
686144286Sjeff	/*
687144286Sjeff	 * If we have a shared lock we may need to upgrade the lock for the
688144286Sjeff	 * last operation.
689144286Sjeff	 */
690166167Skib	if (dp != vp_crossmp &&
691176559Sattilio	    VOP_ISLOCKED(dp) == LK_SHARED &&
692144286Sjeff	    (cnp->cn_flags & ISLASTCN) && (cnp->cn_flags & LOCKPARENT))
693175202Sattilio		vn_lock(dp, LK_UPGRADE|LK_RETRY);
694248969Skib	if ((dp->v_iflag & VI_DOOMED) != 0) {
695248969Skib		error = ENOENT;
696248969Skib		goto bad;
697248969Skib	}
698144286Sjeff	/*
699144286Sjeff	 * If we're looking up the last component and we need an exclusive
700144286Sjeff	 * lock, adjust our lkflags.
701144286Sjeff	 */
702189696Sjhb	if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags))
703144286Sjeff		cnp->cn_lkflags = LK_EXCLUSIVE;
704138345Sphk#ifdef NAMEI_DIAGNOSTIC
705138345Sphk	vprint("lookup in", dp);
706138345Sphk#endif
707162288Smohans	lkflags_save = cnp->cn_lkflags;
708240283Skib	cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags,
709240283Skib	    cnp->cn_flags);
71043301Sdillon	if ((error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) != 0) {
711162288Smohans		cnp->cn_lkflags = lkflags_save;
71242408Seivind		KASSERT(ndp->ni_vp == NULL, ("leaf should be empty"));
7131541Srgrimes#ifdef NAMEI_DIAGNOSTIC
7141541Srgrimes		printf("not found\n");
7151541Srgrimes#endif
7161541Srgrimes		if ((error == ENOENT) &&
717101308Sjeff		    (dp->v_vflag & VV_ROOT) && (dp->v_mount != NULL) &&
7181541Srgrimes		    (dp->v_mount->mnt_flag & MNT_UNION)) {
7191541Srgrimes			tdp = dp;
720144833Sjeff			dp = dp->v_mount->mnt_vnodecovered;
721144833Sjeff			VREF(dp);
722144203Sjeff			vput(tdp);
723175202Sattilio			vn_lock(dp,
724175202Sattilio			    compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags |
725240283Skib			    LK_RETRY, cnp->cn_flags));
7261541Srgrimes			goto unionlookup;
7271541Srgrimes		}
7281541Srgrimes
7291541Srgrimes		if (error != EJUSTRETURN)
7301541Srgrimes			goto bad;
7311541Srgrimes		/*
732193557Sdes		 * At this point, we know we're at the end of the
733193557Sdes		 * pathname.  If creating / renaming, we can consider
734193557Sdes		 * allowing the file or directory to be created / renamed,
735193557Sdes		 * provided we're not on a read-only filesystem.
7361541Srgrimes		 */
73711644Sdg		if (rdonly) {
7381541Srgrimes			error = EROFS;
7391541Srgrimes			goto bad;
7401541Srgrimes		}
741193557Sdes		/* trailing slash only allowed for directories */
742193557Sdes		if ((cnp->cn_flags & TRAILINGSLASH) &&
743193557Sdes		    !(cnp->cn_flags & WILLBEDIR)) {
7449804Sbde			error = ENOENT;
7459804Sbde			goto bad;
7469804Sbde		}
747144203Sjeff		if ((cnp->cn_flags & LOCKPARENT) == 0)
748175294Sattilio			VOP_UNLOCK(dp, 0);
7491541Srgrimes		/*
7501541Srgrimes		 * We return with ni_vp NULL to indicate that the entry
7511541Srgrimes		 * doesn't currently exist, leaving a pointer to the
752161010Srwatson		 * (possibly locked) directory vnode in ndp->ni_dvp.
7531541Srgrimes		 */
7541541Srgrimes		if (cnp->cn_flags & SAVESTART) {
7551541Srgrimes			ndp->ni_startdir = ndp->ni_dvp;
7561541Srgrimes			VREF(ndp->ni_startdir);
7571541Srgrimes		}
758140714Sjeff		goto success;
759162288Smohans	} else
760162288Smohans		cnp->cn_lkflags = lkflags_save;
7611541Srgrimes#ifdef NAMEI_DIAGNOSTIC
7621541Srgrimes	printf("found\n");
7631541Srgrimes#endif
764144203Sjeff	/*
7651541Srgrimes	 * Take into account any additional components consumed by
7661541Srgrimes	 * the underlying filesystem.
7671541Srgrimes	 */
7681541Srgrimes	if (cnp->cn_consume > 0) {
7691541Srgrimes		cnp->cn_nameptr += cnp->cn_consume;
7701541Srgrimes		ndp->ni_next += cnp->cn_consume;
7711541Srgrimes		ndp->ni_pathlen -= cnp->cn_consume;
7721541Srgrimes		cnp->cn_consume = 0;
7731541Srgrimes	}
7741541Srgrimes
7751541Srgrimes	dp = ndp->ni_vp;
7761541Srgrimes
7771541Srgrimes	/*
7781541Srgrimes	 * Check to see if the vnode has been mounted on;
77996755Strhodes	 * if so find the root of the mounted filesystem.
7801541Srgrimes	 */
7811541Srgrimes	while (dp->v_type == VDIR && (mp = dp->v_mountedhere) &&
7821541Srgrimes	       (cnp->cn_flags & NOCROSSMOUNT) == 0) {
783184554Sattilio		if (vfs_busy(mp, 0))
7841541Srgrimes			continue;
785144833Sjeff		vput(dp);
786158094Sjeff		if (dp != ndp->ni_dvp)
787166167Skib			vput(ndp->ni_dvp);
788166167Skib		else
789166167Skib			vrele(ndp->ni_dvp);
790166167Skib		vref(vp_crossmp);
791166167Skib		ndp->ni_dvp = vp_crossmp;
792240283Skib		error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags,
793240283Skib		    cnp->cn_flags), &tdp);
794182542Sattilio		vfs_unbusy(mp);
795175202Sattilio		if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT))
796166167Skib			panic("vp_crossmp exclusively locked or reclaimed");
79765805Sbp		if (error) {
79865805Sbp			dpunlocked = 1;
7991541Srgrimes			goto bad2;
80065805Sbp		}
8011541Srgrimes		ndp->ni_vp = dp = tdp;
8021541Srgrimes	}
8031541Srgrimes
80410219Sdfr	/*
80510219Sdfr	 * Check for symbolic link
80610219Sdfr	 */
80710219Sdfr	if ((dp->v_type == VLNK) &&
808193557Sdes	    ((cnp->cn_flags & FOLLOW) || (cnp->cn_flags & TRAILINGSLASH) ||
80910219Sdfr	     *ndp->ni_next == '/')) {
81010219Sdfr		cnp->cn_flags |= ISSYMLINK;
811155385Sjeff		if (dp->v_iflag & VI_DOOMED) {
812190387Sjhb			/*
813190387Sjhb			 * We can't know whether the directory was mounted with
814190387Sjhb			 * NOSYMFOLLOW, so we can't follow safely.
815190387Sjhb			 */
816190387Sjhb			error = ENOENT;
81769405Salfred			goto bad2;
81869405Salfred		}
81935105Swosch		if (dp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) {
82035105Swosch			error = EACCES;
82135105Swosch			goto bad2;
82235105Swosch		}
823144833Sjeff		/*
824144833Sjeff		 * Symlink code always expects an unlocked dvp.
825144833Sjeff		 */
826229185Skib		if (ndp->ni_dvp != ndp->ni_vp) {
827175294Sattilio			VOP_UNLOCK(ndp->ni_dvp, 0);
828229185Skib			ni_dvp_unlocked = 1;
829229185Skib		}
830140714Sjeff		goto success;
83110219Sdfr	}
83210219Sdfr
8331541Srgrimesnextname:
8341541Srgrimes	/*
835193557Sdes	 * Not a symbolic link that we will follow.  Continue with the
836193557Sdes	 * next component if there is any; otherwise, we're done.
8371541Srgrimes	 */
838144203Sjeff	KASSERT((cnp->cn_flags & ISLASTCN) || *ndp->ni_next == '/',
839144203Sjeff	    ("lookup: invalid path state."));
8401541Srgrimes	if (*ndp->ni_next == '/') {
8411541Srgrimes		cnp->cn_nameptr = ndp->ni_next;
8421541Srgrimes		while (*cnp->cn_nameptr == '/') {
8431541Srgrimes			cnp->cn_nameptr++;
8441541Srgrimes			ndp->ni_pathlen--;
8451541Srgrimes		}
846144833Sjeff		if (ndp->ni_dvp != dp)
847144833Sjeff			vput(ndp->ni_dvp);
848144833Sjeff		else
849144833Sjeff			vrele(ndp->ni_dvp);
8501541Srgrimes		goto dirloop;
8511541Srgrimes	}
8521541Srgrimes	/*
853193028Sdes	 * If we're processing a path with a trailing slash,
854193028Sdes	 * check that the end result is a directory.
855193028Sdes	 */
856193028Sdes	if ((cnp->cn_flags & TRAILINGSLASH) && dp->v_type != VDIR) {
857193028Sdes		error = ENOTDIR;
858193028Sdes		goto bad2;
859193028Sdes	}
860193028Sdes	/*
86196755Strhodes	 * Disallow directory write attempts on read-only filesystems.
8621541Srgrimes	 */
86311644Sdg	if (rdonly &&
86411644Sdg	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
86511644Sdg		error = EROFS;
86611644Sdg		goto bad2;
8671541Srgrimes	}
8681541Srgrimes	if (cnp->cn_flags & SAVESTART) {
8691541Srgrimes		ndp->ni_startdir = ndp->ni_dvp;
8701541Srgrimes		VREF(ndp->ni_startdir);
8711541Srgrimes	}
872144833Sjeff	if (!wantparent) {
873229185Skib		ni_dvp_unlocked = 2;
874144833Sjeff		if (ndp->ni_dvp != dp)
875144833Sjeff			vput(ndp->ni_dvp);
876144833Sjeff		else
877144833Sjeff			vrele(ndp->ni_dvp);
878229185Skib	} else if ((cnp->cn_flags & LOCKPARENT) == 0 && ndp->ni_dvp != dp) {
879175294Sattilio		VOP_UNLOCK(ndp->ni_dvp, 0);
880229185Skib		ni_dvp_unlocked = 1;
881229185Skib	}
88232071Sdyson
883155334Srwatson	if (cnp->cn_flags & AUDITVNODE1)
884195926Srwatson		AUDIT_ARG_VNODE1(dp);
885155334Srwatson	else if (cnp->cn_flags & AUDITVNODE2)
886195926Srwatson		AUDIT_ARG_VNODE2(dp);
887155334Srwatson
8881541Srgrimes	if ((cnp->cn_flags & LOCKLEAF) == 0)
889175294Sattilio		VOP_UNLOCK(dp, 0);
890140714Sjeffsuccess:
891172274Spjd	/*
892172274Spjd	 * Because of lookup_shared we may have the vnode shared locked, but
893172274Spjd	 * the caller may want it to be exclusively locked.
894172274Spjd	 */
895189696Sjhb	if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags) &&
896189696Sjhb	    VOP_ISLOCKED(dp) != LK_EXCLUSIVE) {
897175202Sattilio		vn_lock(dp, LK_UPGRADE | LK_RETRY);
898186276Skib		if (dp->v_iflag & VI_DOOMED) {
899186276Skib			error = ENOENT;
900186276Skib			goto bad2;
901186276Skib		}
902172274Spjd	}
9031541Srgrimes	return (0);
9041541Srgrimes
9051541Srgrimesbad2:
906229185Skib	if (ni_dvp_unlocked != 2) {
907229185Skib		if (dp != ndp->ni_dvp && !ni_dvp_unlocked)
908229185Skib			vput(ndp->ni_dvp);
909229185Skib		else
910229185Skib			vrele(ndp->ni_dvp);
911229185Skib	}
9121541Srgrimesbad:
913144833Sjeff	if (!dpunlocked)
91465805Sbp		vput(dp);
9151541Srgrimes	ndp->ni_vp = NULL;
9161541Srgrimes	return (error);
9171541Srgrimes}
9181541Srgrimes
9193148Sphk/*
9203148Sphk * relookup - lookup a path name component
921170035Srwatson *    Used by lookup to re-acquire things.
9223148Sphk */
9233148Sphkint
924161011Srwatsonrelookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
9253148Sphk{
92622521Sdyson	struct vnode *dp = 0;		/* the directory we are searching */
9273148Sphk	int wantparent;			/* 1 => wantparent or lockparent flag */
9283148Sphk	int rdonly;			/* lookup read-only flag bit */
9293148Sphk	int error = 0;
9301541Srgrimes
931144203Sjeff	KASSERT(cnp->cn_flags & ISLASTCN,
932144203Sjeff	    ("relookup: Not given last component."));
9333148Sphk	/*
9343148Sphk	 * Setup: break out flag bits into variables.
9353148Sphk	 */
9363148Sphk	wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT);
937145004Sjeff	KASSERT(wantparent, ("relookup: parent not wanted."));
9383148Sphk	rdonly = cnp->cn_flags & RDONLY;
9393148Sphk	cnp->cn_flags &= ~ISSYMLINK;
9403148Sphk	dp = dvp;
941144286Sjeff	cnp->cn_lkflags = LK_EXCLUSIVE;
942175202Sattilio	vn_lock(dp, LK_EXCLUSIVE | LK_RETRY);
9433148Sphk
9443148Sphk	/*
9453148Sphk	 * Search a new directory.
9463148Sphk	 *
9473148Sphk	 * The last component of the filename is left accessible via
9483148Sphk	 * cnp->cn_nameptr for callers that need the name. Callers needing
9493148Sphk	 * the name set the SAVENAME flag. When done, they assume
9503148Sphk	 * responsibility for freeing the pathname buffer.
9513148Sphk	 */
9523148Sphk#ifdef NAMEI_DIAGNOSTIC
9533148Sphk	printf("{%s}: ", cnp->cn_nameptr);
9543148Sphk#endif
9553148Sphk
9563148Sphk	/*
957205682Sjh	 * Check for "" which represents the root directory after slash
958205682Sjh	 * removal.
9593148Sphk	 */
9603148Sphk	if (cnp->cn_nameptr[0] == '\0') {
961205682Sjh		/*
962205682Sjh		 * Support only LOOKUP for "/" because lookup()
963205682Sjh		 * can't succeed for CREATE, DELETE and RENAME.
964205682Sjh		 */
965205682Sjh		KASSERT(cnp->cn_nameiop == LOOKUP, ("nameiop must be LOOKUP"));
966205682Sjh		KASSERT(dp->v_type == VDIR, ("dp is not a directory"));
967205682Sjh
9683148Sphk		if (!(cnp->cn_flags & LOCKLEAF))
969175294Sattilio			VOP_UNLOCK(dp, 0);
9703148Sphk		*vpp = dp;
97154655Seivind		/* XXX This should probably move to the top of function. */
9723148Sphk		if (cnp->cn_flags & SAVESTART)
9733148Sphk			panic("lookup: SAVESTART");
9743148Sphk		return (0);
9753148Sphk	}
9763148Sphk
9773148Sphk	if (cnp->cn_flags & ISDOTDOT)
9783148Sphk		panic ("relookup: lookup on dot-dot");
9793148Sphk
9803148Sphk	/*
9813148Sphk	 * We now have a segment name to search for, and a directory to search.
9823148Sphk	 */
983138345Sphk#ifdef NAMEI_DIAGNOSTIC
984138345Sphk	vprint("search in:", dp);
985138345Sphk#endif
98643311Sdillon	if ((error = VOP_LOOKUP(dp, vpp, cnp)) != 0) {
98742408Seivind		KASSERT(*vpp == NULL, ("leaf should be empty"));
9883148Sphk		if (error != EJUSTRETURN)
9893148Sphk			goto bad;
9903148Sphk		/*
9913148Sphk		 * If creating and at end of pathname, then can consider
9923148Sphk		 * allowing file to be created.
9933148Sphk		 */
99411644Sdg		if (rdonly) {
9953148Sphk			error = EROFS;
9963148Sphk			goto bad;
9973148Sphk		}
9983148Sphk		/* ASSERT(dvp == ndp->ni_startdir) */
9993148Sphk		if (cnp->cn_flags & SAVESTART)
10003148Sphk			VREF(dvp);
1001144203Sjeff		if ((cnp->cn_flags & LOCKPARENT) == 0)
1002175294Sattilio			VOP_UNLOCK(dp, 0);
10033148Sphk		/*
10043148Sphk		 * We return with ni_vp NULL to indicate that the entry
10053148Sphk		 * doesn't currently exist, leaving a pointer to the
1006161010Srwatson		 * (possibly locked) directory vnode in ndp->ni_dvp.
10073148Sphk		 */
10083148Sphk		return (0);
10093148Sphk	}
1010162288Smohans
10113148Sphk	dp = *vpp;
10123148Sphk
10133148Sphk	/*
101496755Strhodes	 * Disallow directory write attempts on read-only filesystems.
10153148Sphk	 */
101611644Sdg	if (rdonly &&
101711644Sdg	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
1018145004Sjeff		if (dvp == dp)
1019145004Sjeff			vrele(dvp);
1020145004Sjeff		else
1021145004Sjeff			vput(dvp);
102211644Sdg		error = EROFS;
1023145004Sjeff		goto bad;
10243148Sphk	}
1025145004Sjeff	/*
1026145004Sjeff	 * Set the parent lock/ref state to the requested state.
1027145004Sjeff	 */
1028145004Sjeff	if ((cnp->cn_flags & LOCKPARENT) == 0 && dvp != dp) {
1029145004Sjeff		if (wantparent)
1030175294Sattilio			VOP_UNLOCK(dvp, 0);
1031145004Sjeff		else
1032145004Sjeff			vput(dvp);
1033145004Sjeff	} else if (!wantparent)
1034145004Sjeff		vrele(dvp);
1035145004Sjeff	/*
1036145004Sjeff	 * Check for symbolic link
1037145004Sjeff	 */
1038145004Sjeff	KASSERT(dp->v_type != VLNK || !(cnp->cn_flags & FOLLOW),
1039145004Sjeff	    ("relookup: symlink found.\n"));
1040145004Sjeff
10413148Sphk	/* ASSERT(dvp == ndp->ni_startdir) */
10423148Sphk	if (cnp->cn_flags & SAVESTART)
10433148Sphk		VREF(dvp);
104422521Sdyson
10453148Sphk	if ((cnp->cn_flags & LOCKLEAF) == 0)
1046175294Sattilio		VOP_UNLOCK(dp, 0);
10473148Sphk	return (0);
10483148Sphkbad:
10493148Sphk	vput(dp);
10503148Sphk	*vpp = NULL;
10513148Sphk	return (error);
10523148Sphk}
1053141471Sjhb
1054255219Spjdvoid
1055255219SpjdNDINIT_ALL(struct nameidata *ndp, u_long op, u_long flags, enum uio_seg segflg,
1056255219Spjd    const char *namep, int dirfd, struct vnode *startdir, cap_rights_t *rightsp,
1057255219Spjd    struct thread *td)
1058255219Spjd{
1059255219Spjd
1060255219Spjd	ndp->ni_cnd.cn_nameiop = op;
1061255219Spjd	ndp->ni_cnd.cn_flags = flags;
1062255219Spjd	ndp->ni_segflg = segflg;
1063255219Spjd	ndp->ni_dirp = namep;
1064255219Spjd	ndp->ni_dirfd = dirfd;
1065255219Spjd	ndp->ni_startdir = startdir;
1066255219Spjd	ndp->ni_strictrelative = 0;
1067255219Spjd	if (rightsp != NULL)
1068255219Spjd		ndp->ni_rightsneeded = *rightsp;
1069255219Spjd	else
1070255219Spjd		cap_rights_init(&ndp->ni_rightsneeded);
1071255219Spjd	filecaps_init(&ndp->ni_filecaps);
1072255219Spjd	ndp->ni_cnd.cn_thread = td;
1073255219Spjd}
1074255219Spjd
1075141471Sjhb/*
1076144661Sjeff * Free data allocated by namei(); see namei(9) for details.
1077144661Sjeff */
1078144661Sjeffvoid
1079161011SrwatsonNDFREE(struct nameidata *ndp, const u_int flags)
1080144661Sjeff{
1081144833Sjeff	int unlock_dvp;
1082144833Sjeff	int unlock_vp;
1083144661Sjeff
1084144833Sjeff	unlock_dvp = 0;
1085144833Sjeff	unlock_vp = 0;
1086144833Sjeff
1087144661Sjeff	if (!(flags & NDF_NO_FREE_PNBUF) &&
1088144661Sjeff	    (ndp->ni_cnd.cn_flags & HASBUF)) {
1089144661Sjeff		uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
1090144661Sjeff		ndp->ni_cnd.cn_flags &= ~HASBUF;
1091144661Sjeff	}
1092144833Sjeff	if (!(flags & NDF_NO_VP_UNLOCK) &&
1093144833Sjeff	    (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp)
1094144833Sjeff		unlock_vp = 1;
1095144833Sjeff	if (!(flags & NDF_NO_VP_RELE) && ndp->ni_vp) {
1096144833Sjeff		if (unlock_vp) {
1097144833Sjeff			vput(ndp->ni_vp);
1098144833Sjeff			unlock_vp = 0;
1099144833Sjeff		} else
1100144833Sjeff			vrele(ndp->ni_vp);
1101144833Sjeff		ndp->ni_vp = NULL;
1102144833Sjeff	}
1103144833Sjeff	if (unlock_vp)
1104175294Sattilio		VOP_UNLOCK(ndp->ni_vp, 0);
1105144661Sjeff	if (!(flags & NDF_NO_DVP_UNLOCK) &&
1106144661Sjeff	    (ndp->ni_cnd.cn_flags & LOCKPARENT) &&
1107144661Sjeff	    ndp->ni_dvp != ndp->ni_vp)
1108144833Sjeff		unlock_dvp = 1;
1109144661Sjeff	if (!(flags & NDF_NO_DVP_RELE) &&
1110144661Sjeff	    (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) {
1111144833Sjeff		if (unlock_dvp) {
1112144833Sjeff			vput(ndp->ni_dvp);
1113144833Sjeff			unlock_dvp = 0;
1114144833Sjeff		} else
1115144833Sjeff			vrele(ndp->ni_dvp);
1116144661Sjeff		ndp->ni_dvp = NULL;
1117144661Sjeff	}
1118144833Sjeff	if (unlock_dvp)
1119175294Sattilio		VOP_UNLOCK(ndp->ni_dvp, 0);
1120144661Sjeff	if (!(flags & NDF_NO_STARTDIR_RELE) &&
1121144661Sjeff	    (ndp->ni_cnd.cn_flags & SAVESTART)) {
1122144661Sjeff		vrele(ndp->ni_startdir);
1123144661Sjeff		ndp->ni_startdir = NULL;
1124144661Sjeff	}
1125144661Sjeff}
1126144661Sjeff
1127144661Sjeff/*
1128141471Sjhb * Determine if there is a suitable alternate filename under the specified
1129141471Sjhb * prefix for the specified path.  If the create flag is set, then the
1130141471Sjhb * alternate prefix will be used so long as the parent directory exists.
1131141471Sjhb * This is used by the various compatiblity ABIs so that Linux binaries prefer
1132141471Sjhb * files under /compat/linux for example.  The chosen path (whether under
1133141471Sjhb * the prefix or under /) is returned in a kernel malloc'd buffer pointed
1134141471Sjhb * to by pathbuf.  The caller is responsible for free'ing the buffer from
1135141471Sjhb * the M_TEMP bucket if one is returned.
1136141471Sjhb */
1137141471Sjhbint
1138177997Skibkern_alternate_path(struct thread *td, const char *prefix, const char *path,
1139177997Skib    enum uio_seg pathseg, char **pathbuf, int create, int dirfd)
1140141471Sjhb{
1141141471Sjhb	struct nameidata nd, ndroot;
1142141471Sjhb	char *ptr, *buf, *cp;
1143141471Sjhb	size_t len, sz;
1144141471Sjhb	int error;
1145141471Sjhb
1146141471Sjhb	buf = (char *) malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
1147141471Sjhb	*pathbuf = buf;
1148141471Sjhb
1149141471Sjhb	/* Copy the prefix into the new pathname as a starting point. */
1150141471Sjhb	len = strlcpy(buf, prefix, MAXPATHLEN);
1151141471Sjhb	if (len >= MAXPATHLEN) {
1152141471Sjhb		*pathbuf = NULL;
1153141471Sjhb		free(buf, M_TEMP);
1154141471Sjhb		return (EINVAL);
1155141471Sjhb	}
1156141471Sjhb	sz = MAXPATHLEN - len;
1157141471Sjhb	ptr = buf + len;
1158141471Sjhb
1159141471Sjhb	/* Append the filename to the prefix. */
1160141471Sjhb	if (pathseg == UIO_SYSSPACE)
1161141471Sjhb		error = copystr(path, ptr, sz, &len);
1162141471Sjhb	else
1163141471Sjhb		error = copyinstr(path, ptr, sz, &len);
1164141471Sjhb
1165141471Sjhb	if (error) {
1166141471Sjhb		*pathbuf = NULL;
1167141471Sjhb		free(buf, M_TEMP);
1168141471Sjhb		return (error);
1169141471Sjhb	}
1170141471Sjhb
1171141471Sjhb	/* Only use a prefix with absolute pathnames. */
1172141471Sjhb	if (*ptr != '/') {
1173141471Sjhb		error = EINVAL;
1174141471Sjhb		goto keeporig;
1175141471Sjhb	}
1176141471Sjhb
1177177997Skib	if (dirfd != AT_FDCWD) {
1178177997Skib		/*
1179177997Skib		 * We want the original because the "prefix" is
1180177997Skib		 * included in the already opened dirfd.
1181177997Skib		 */
1182177997Skib		bcopy(ptr, buf, len);
1183177997Skib		return (0);
1184177997Skib	}
1185177997Skib
1186141471Sjhb	/*
1187141471Sjhb	 * We know that there is a / somewhere in this pathname.
1188141471Sjhb	 * Search backwards for it, to find the file's parent dir
1189141471Sjhb	 * to see if it exists in the alternate tree. If it does,
1190141471Sjhb	 * and we want to create a file (cflag is set). We don't
1191141471Sjhb	 * need to worry about the root comparison in this case.
1192141471Sjhb	 */
1193141471Sjhb
1194141471Sjhb	if (create) {
1195141471Sjhb		for (cp = &ptr[len] - 1; *cp != '/'; cp--);
1196141471Sjhb		*cp = '\0';
1197141471Sjhb
1198241896Skib		NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, buf, td);
1199141471Sjhb		error = namei(&nd);
1200141471Sjhb		*cp = '/';
1201141471Sjhb		if (error != 0)
1202150431Sjhb			goto keeporig;
1203141471Sjhb	} else {
1204241896Skib		NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, buf, td);
1205141471Sjhb
1206141471Sjhb		error = namei(&nd);
1207141471Sjhb		if (error != 0)
1208150431Sjhb			goto keeporig;
1209141471Sjhb
1210141471Sjhb		/*
1211141471Sjhb		 * We now compare the vnode of the prefix to the one
1212141471Sjhb		 * vnode asked. If they resolve to be the same, then we
1213141471Sjhb		 * ignore the match so that the real root gets used.
1214141471Sjhb		 * This avoids the problem of traversing "../.." to find the
1215141471Sjhb		 * root directory and never finding it, because "/" resolves
1216141471Sjhb		 * to the emulation root directory. This is expensive :-(
1217141471Sjhb		 */
1218241896Skib		NDINIT(&ndroot, LOOKUP, FOLLOW, UIO_SYSSPACE, prefix,
1219150431Sjhb		    td);
1220141471Sjhb
1221141471Sjhb		/* We shouldn't ever get an error from this namei(). */
1222141471Sjhb		error = namei(&ndroot);
1223141471Sjhb		if (error == 0) {
1224141471Sjhb			if (nd.ni_vp == ndroot.ni_vp)
1225141471Sjhb				error = ENOENT;
1226141471Sjhb
1227141471Sjhb			NDFREE(&ndroot, NDF_ONLY_PNBUF);
1228141471Sjhb			vrele(ndroot.ni_vp);
1229141471Sjhb		}
1230141471Sjhb	}
1231141471Sjhb
1232141471Sjhb	NDFREE(&nd, NDF_ONLY_PNBUF);
1233141471Sjhb	vrele(nd.ni_vp);
1234141471Sjhb
1235141471Sjhbkeeporig:
1236141471Sjhb	/* If there was an error, use the original path name. */
1237141471Sjhb	if (error)
1238141471Sjhb		bcopy(ptr, buf, len);
1239141471Sjhb	return (error);
1240141471Sjhb}
1241