vfs_syscalls.c revision 293474
1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37#include <sys/cdefs.h> 38__FBSDID("$FreeBSD: stable/10/sys/kern/vfs_syscalls.c 293474 2016-01-09 14:20:23Z dchagin $"); 39 40#include "opt_capsicum.h" 41#include "opt_compat.h" 42#include "opt_kdtrace.h" 43#include "opt_ktrace.h" 44 45#include <sys/param.h> 46#include <sys/systm.h> 47#include <sys/bio.h> 48#include <sys/buf.h> 49#include <sys/capsicum.h> 50#include <sys/disk.h> 51#include <sys/sysent.h> 52#include <sys/malloc.h> 53#include <sys/mount.h> 54#include <sys/mutex.h> 55#include <sys/sysproto.h> 56#include <sys/namei.h> 57#include <sys/filedesc.h> 58#include <sys/kernel.h> 59#include <sys/fcntl.h> 60#include <sys/file.h> 61#include <sys/filio.h> 62#include <sys/limits.h> 63#include <sys/linker.h> 64#include <sys/rwlock.h> 65#include <sys/sdt.h> 66#include <sys/stat.h> 67#include <sys/sx.h> 68#include <sys/unistd.h> 69#include <sys/vnode.h> 70#include <sys/priv.h> 71#include <sys/proc.h> 72#include <sys/dirent.h> 73#include <sys/jail.h> 74#include <sys/syscallsubr.h> 75#include <sys/sysctl.h> 76#ifdef KTRACE 77#include <sys/ktrace.h> 78#endif 79 80#include <machine/stdarg.h> 81 82#include <security/audit/audit.h> 83#include <security/mac/mac_framework.h> 84 85#include <vm/vm.h> 86#include <vm/vm_object.h> 87#include <vm/vm_page.h> 88#include <vm/uma.h> 89 90#include <ufs/ufs/quota.h> 91 92MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 93 94SDT_PROVIDER_DEFINE(vfs); 95SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 96SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 97 98static int chroot_refuse_vdir_fds(struct filedesc *fdp); 99static int kern_chflags(struct thread *td, const char *path, 100 enum uio_seg pathseg, u_long flags); 101static int kern_chflagsat(struct thread *td, int fd, const char *path, 102 enum uio_seg pathseg, u_long flags, int atflag); 103static int setfflags(struct thread *td, struct vnode *, u_long); 104static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 105static int getutimens(const struct timespec *, enum uio_seg, 106 struct timespec *, int *); 107static int setutimes(struct thread *td, struct vnode *, 108 const struct timespec *, int, int); 109static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 110 struct thread *td); 111 112/* 113 * The module initialization routine for POSIX asynchronous I/O will 114 * set this to the version of AIO that it implements. (Zero means 115 * that it is not implemented.) This value is used here by pathconf() 116 * and in kern_descrip.c by fpathconf(). 117 */ 118int async_io_version; 119 120/* 121 * Sync each mounted filesystem. 122 */ 123#ifndef _SYS_SYSPROTO_H_ 124struct sync_args { 125 int dummy; 126}; 127#endif 128/* ARGSUSED */ 129int 130sys_sync(td, uap) 131 struct thread *td; 132 struct sync_args *uap; 133{ 134 struct mount *mp, *nmp; 135 int save; 136 137 mtx_lock(&mountlist_mtx); 138 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 139 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 140 nmp = TAILQ_NEXT(mp, mnt_list); 141 continue; 142 } 143 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 144 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 145 save = curthread_pflags_set(TDP_SYNCIO); 146 vfs_msync(mp, MNT_NOWAIT); 147 VFS_SYNC(mp, MNT_NOWAIT); 148 curthread_pflags_restore(save); 149 vn_finished_write(mp); 150 } 151 mtx_lock(&mountlist_mtx); 152 nmp = TAILQ_NEXT(mp, mnt_list); 153 vfs_unbusy(mp); 154 } 155 mtx_unlock(&mountlist_mtx); 156 return (0); 157} 158 159/* 160 * Change filesystem quotas. 161 */ 162#ifndef _SYS_SYSPROTO_H_ 163struct quotactl_args { 164 char *path; 165 int cmd; 166 int uid; 167 caddr_t arg; 168}; 169#endif 170int 171sys_quotactl(td, uap) 172 struct thread *td; 173 register struct quotactl_args /* { 174 char *path; 175 int cmd; 176 int uid; 177 caddr_t arg; 178 } */ *uap; 179{ 180 struct mount *mp; 181 struct nameidata nd; 182 int error; 183 184 AUDIT_ARG_CMD(uap->cmd); 185 AUDIT_ARG_UID(uap->uid); 186 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 187 return (EPERM); 188 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 189 uap->path, td); 190 if ((error = namei(&nd)) != 0) 191 return (error); 192 NDFREE(&nd, NDF_ONLY_PNBUF); 193 mp = nd.ni_vp->v_mount; 194 vfs_ref(mp); 195 vput(nd.ni_vp); 196 error = vfs_busy(mp, 0); 197 vfs_rel(mp); 198 if (error != 0) 199 return (error); 200 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 201 202 /* 203 * Since quota on operation typically needs to open quota 204 * file, the Q_QUOTAON handler needs to unbusy the mount point 205 * before calling into namei. Otherwise, unmount might be 206 * started between two vfs_busy() invocations (first is our, 207 * second is from mount point cross-walk code in lookup()), 208 * causing deadlock. 209 * 210 * Require that Q_QUOTAON handles the vfs_busy() reference on 211 * its own, always returning with ubusied mount point. 212 */ 213 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 214 vfs_unbusy(mp); 215 return (error); 216} 217 218/* 219 * Used by statfs conversion routines to scale the block size up if 220 * necessary so that all of the block counts are <= 'max_size'. Note 221 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 222 * value of 'n'. 223 */ 224void 225statfs_scale_blocks(struct statfs *sf, long max_size) 226{ 227 uint64_t count; 228 int shift; 229 230 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 231 232 /* 233 * Attempt to scale the block counts to give a more accurate 234 * overview to userland of the ratio of free space to used 235 * space. To do this, find the largest block count and compute 236 * a divisor that lets it fit into a signed integer <= max_size. 237 */ 238 if (sf->f_bavail < 0) 239 count = -sf->f_bavail; 240 else 241 count = sf->f_bavail; 242 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 243 if (count <= max_size) 244 return; 245 246 count >>= flsl(max_size); 247 shift = 0; 248 while (count > 0) { 249 shift++; 250 count >>=1; 251 } 252 253 sf->f_bsize <<= shift; 254 sf->f_blocks >>= shift; 255 sf->f_bfree >>= shift; 256 sf->f_bavail >>= shift; 257} 258 259/* 260 * Get filesystem statistics. 261 */ 262#ifndef _SYS_SYSPROTO_H_ 263struct statfs_args { 264 char *path; 265 struct statfs *buf; 266}; 267#endif 268int 269sys_statfs(td, uap) 270 struct thread *td; 271 register struct statfs_args /* { 272 char *path; 273 struct statfs *buf; 274 } */ *uap; 275{ 276 struct statfs sf; 277 int error; 278 279 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 280 if (error == 0) 281 error = copyout(&sf, uap->buf, sizeof(sf)); 282 return (error); 283} 284 285int 286kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 287 struct statfs *buf) 288{ 289 struct mount *mp; 290 struct statfs *sp, sb; 291 struct nameidata nd; 292 int error; 293 294 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 295 pathseg, path, td); 296 error = namei(&nd); 297 if (error != 0) 298 return (error); 299 mp = nd.ni_vp->v_mount; 300 vfs_ref(mp); 301 NDFREE(&nd, NDF_ONLY_PNBUF); 302 vput(nd.ni_vp); 303 error = vfs_busy(mp, 0); 304 vfs_rel(mp); 305 if (error != 0) 306 return (error); 307#ifdef MAC 308 error = mac_mount_check_stat(td->td_ucred, mp); 309 if (error != 0) 310 goto out; 311#endif 312 /* 313 * Set these in case the underlying filesystem fails to do so. 314 */ 315 sp = &mp->mnt_stat; 316 sp->f_version = STATFS_VERSION; 317 sp->f_namemax = NAME_MAX; 318 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 319 error = VFS_STATFS(mp, sp); 320 if (error != 0) 321 goto out; 322 if (priv_check(td, PRIV_VFS_GENERATION)) { 323 bcopy(sp, &sb, sizeof(sb)); 324 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 325 prison_enforce_statfs(td->td_ucred, mp, &sb); 326 sp = &sb; 327 } 328 *buf = *sp; 329out: 330 vfs_unbusy(mp); 331 return (error); 332} 333 334/* 335 * Get filesystem statistics. 336 */ 337#ifndef _SYS_SYSPROTO_H_ 338struct fstatfs_args { 339 int fd; 340 struct statfs *buf; 341}; 342#endif 343int 344sys_fstatfs(td, uap) 345 struct thread *td; 346 register struct fstatfs_args /* { 347 int fd; 348 struct statfs *buf; 349 } */ *uap; 350{ 351 struct statfs sf; 352 int error; 353 354 error = kern_fstatfs(td, uap->fd, &sf); 355 if (error == 0) 356 error = copyout(&sf, uap->buf, sizeof(sf)); 357 return (error); 358} 359 360int 361kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 362{ 363 struct file *fp; 364 struct mount *mp; 365 struct statfs *sp, sb; 366 struct vnode *vp; 367 cap_rights_t rights; 368 int error; 369 370 AUDIT_ARG_FD(fd); 371 error = getvnode(td->td_proc->p_fd, fd, 372 cap_rights_init(&rights, CAP_FSTATFS), &fp); 373 if (error != 0) 374 return (error); 375 vp = fp->f_vnode; 376 vn_lock(vp, LK_SHARED | LK_RETRY); 377#ifdef AUDIT 378 AUDIT_ARG_VNODE1(vp); 379#endif 380 mp = vp->v_mount; 381 if (mp) 382 vfs_ref(mp); 383 VOP_UNLOCK(vp, 0); 384 fdrop(fp, td); 385 if (mp == NULL) { 386 error = EBADF; 387 goto out; 388 } 389 error = vfs_busy(mp, 0); 390 vfs_rel(mp); 391 if (error != 0) 392 return (error); 393#ifdef MAC 394 error = mac_mount_check_stat(td->td_ucred, mp); 395 if (error != 0) 396 goto out; 397#endif 398 /* 399 * Set these in case the underlying filesystem fails to do so. 400 */ 401 sp = &mp->mnt_stat; 402 sp->f_version = STATFS_VERSION; 403 sp->f_namemax = NAME_MAX; 404 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 405 error = VFS_STATFS(mp, sp); 406 if (error != 0) 407 goto out; 408 if (priv_check(td, PRIV_VFS_GENERATION)) { 409 bcopy(sp, &sb, sizeof(sb)); 410 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 411 prison_enforce_statfs(td->td_ucred, mp, &sb); 412 sp = &sb; 413 } 414 *buf = *sp; 415out: 416 if (mp) 417 vfs_unbusy(mp); 418 return (error); 419} 420 421/* 422 * Get statistics on all filesystems. 423 */ 424#ifndef _SYS_SYSPROTO_H_ 425struct getfsstat_args { 426 struct statfs *buf; 427 long bufsize; 428 int flags; 429}; 430#endif 431int 432sys_getfsstat(td, uap) 433 struct thread *td; 434 register struct getfsstat_args /* { 435 struct statfs *buf; 436 long bufsize; 437 int flags; 438 } */ *uap; 439{ 440 441 return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE, 442 uap->flags)); 443} 444 445/* 446 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 447 * The caller is responsible for freeing memory which will be allocated 448 * in '*buf'. 449 */ 450int 451kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 452 enum uio_seg bufseg, int flags) 453{ 454 struct mount *mp, *nmp; 455 struct statfs *sfsp, *sp, sb; 456 size_t count, maxcount; 457 int error; 458 459 maxcount = bufsize / sizeof(struct statfs); 460 if (bufsize == 0) 461 sfsp = NULL; 462 else if (bufseg == UIO_USERSPACE) 463 sfsp = *buf; 464 else /* if (bufseg == UIO_SYSSPACE) */ { 465 count = 0; 466 mtx_lock(&mountlist_mtx); 467 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 468 count++; 469 } 470 mtx_unlock(&mountlist_mtx); 471 if (maxcount > count) 472 maxcount = count; 473 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 474 M_WAITOK); 475 } 476 count = 0; 477 mtx_lock(&mountlist_mtx); 478 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 479 if (prison_canseemount(td->td_ucred, mp) != 0) { 480 nmp = TAILQ_NEXT(mp, mnt_list); 481 continue; 482 } 483#ifdef MAC 484 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 485 nmp = TAILQ_NEXT(mp, mnt_list); 486 continue; 487 } 488#endif 489 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 490 nmp = TAILQ_NEXT(mp, mnt_list); 491 continue; 492 } 493 if (sfsp && count < maxcount) { 494 sp = &mp->mnt_stat; 495 /* 496 * Set these in case the underlying filesystem 497 * fails to do so. 498 */ 499 sp->f_version = STATFS_VERSION; 500 sp->f_namemax = NAME_MAX; 501 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 502 /* 503 * If MNT_NOWAIT or MNT_LAZY is specified, do not 504 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 505 * overrides MNT_WAIT. 506 */ 507 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 508 (flags & MNT_WAIT)) && 509 (error = VFS_STATFS(mp, sp))) { 510 mtx_lock(&mountlist_mtx); 511 nmp = TAILQ_NEXT(mp, mnt_list); 512 vfs_unbusy(mp); 513 continue; 514 } 515 if (priv_check(td, PRIV_VFS_GENERATION)) { 516 bcopy(sp, &sb, sizeof(sb)); 517 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 518 prison_enforce_statfs(td->td_ucred, mp, &sb); 519 sp = &sb; 520 } 521 if (bufseg == UIO_SYSSPACE) 522 bcopy(sp, sfsp, sizeof(*sp)); 523 else /* if (bufseg == UIO_USERSPACE) */ { 524 error = copyout(sp, sfsp, sizeof(*sp)); 525 if (error != 0) { 526 vfs_unbusy(mp); 527 return (error); 528 } 529 } 530 sfsp++; 531 } 532 count++; 533 mtx_lock(&mountlist_mtx); 534 nmp = TAILQ_NEXT(mp, mnt_list); 535 vfs_unbusy(mp); 536 } 537 mtx_unlock(&mountlist_mtx); 538 if (sfsp && count > maxcount) 539 td->td_retval[0] = maxcount; 540 else 541 td->td_retval[0] = count; 542 return (0); 543} 544 545#ifdef COMPAT_FREEBSD4 546/* 547 * Get old format filesystem statistics. 548 */ 549static void cvtstatfs(struct statfs *, struct ostatfs *); 550 551#ifndef _SYS_SYSPROTO_H_ 552struct freebsd4_statfs_args { 553 char *path; 554 struct ostatfs *buf; 555}; 556#endif 557int 558freebsd4_statfs(td, uap) 559 struct thread *td; 560 struct freebsd4_statfs_args /* { 561 char *path; 562 struct ostatfs *buf; 563 } */ *uap; 564{ 565 struct ostatfs osb; 566 struct statfs sf; 567 int error; 568 569 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 570 if (error != 0) 571 return (error); 572 cvtstatfs(&sf, &osb); 573 return (copyout(&osb, uap->buf, sizeof(osb))); 574} 575 576/* 577 * Get filesystem statistics. 578 */ 579#ifndef _SYS_SYSPROTO_H_ 580struct freebsd4_fstatfs_args { 581 int fd; 582 struct ostatfs *buf; 583}; 584#endif 585int 586freebsd4_fstatfs(td, uap) 587 struct thread *td; 588 struct freebsd4_fstatfs_args /* { 589 int fd; 590 struct ostatfs *buf; 591 } */ *uap; 592{ 593 struct ostatfs osb; 594 struct statfs sf; 595 int error; 596 597 error = kern_fstatfs(td, uap->fd, &sf); 598 if (error != 0) 599 return (error); 600 cvtstatfs(&sf, &osb); 601 return (copyout(&osb, uap->buf, sizeof(osb))); 602} 603 604/* 605 * Get statistics on all filesystems. 606 */ 607#ifndef _SYS_SYSPROTO_H_ 608struct freebsd4_getfsstat_args { 609 struct ostatfs *buf; 610 long bufsize; 611 int flags; 612}; 613#endif 614int 615freebsd4_getfsstat(td, uap) 616 struct thread *td; 617 register struct freebsd4_getfsstat_args /* { 618 struct ostatfs *buf; 619 long bufsize; 620 int flags; 621 } */ *uap; 622{ 623 struct statfs *buf, *sp; 624 struct ostatfs osb; 625 size_t count, size; 626 int error; 627 628 count = uap->bufsize / sizeof(struct ostatfs); 629 size = count * sizeof(struct statfs); 630 error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags); 631 if (size > 0) { 632 count = td->td_retval[0]; 633 sp = buf; 634 while (count > 0 && error == 0) { 635 cvtstatfs(sp, &osb); 636 error = copyout(&osb, uap->buf, sizeof(osb)); 637 sp++; 638 uap->buf++; 639 count--; 640 } 641 free(buf, M_TEMP); 642 } 643 return (error); 644} 645 646/* 647 * Implement fstatfs() for (NFS) file handles. 648 */ 649#ifndef _SYS_SYSPROTO_H_ 650struct freebsd4_fhstatfs_args { 651 struct fhandle *u_fhp; 652 struct ostatfs *buf; 653}; 654#endif 655int 656freebsd4_fhstatfs(td, uap) 657 struct thread *td; 658 struct freebsd4_fhstatfs_args /* { 659 struct fhandle *u_fhp; 660 struct ostatfs *buf; 661 } */ *uap; 662{ 663 struct ostatfs osb; 664 struct statfs sf; 665 fhandle_t fh; 666 int error; 667 668 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 669 if (error != 0) 670 return (error); 671 error = kern_fhstatfs(td, fh, &sf); 672 if (error != 0) 673 return (error); 674 cvtstatfs(&sf, &osb); 675 return (copyout(&osb, uap->buf, sizeof(osb))); 676} 677 678/* 679 * Convert a new format statfs structure to an old format statfs structure. 680 */ 681static void 682cvtstatfs(nsp, osp) 683 struct statfs *nsp; 684 struct ostatfs *osp; 685{ 686 687 statfs_scale_blocks(nsp, LONG_MAX); 688 bzero(osp, sizeof(*osp)); 689 osp->f_bsize = nsp->f_bsize; 690 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 691 osp->f_blocks = nsp->f_blocks; 692 osp->f_bfree = nsp->f_bfree; 693 osp->f_bavail = nsp->f_bavail; 694 osp->f_files = MIN(nsp->f_files, LONG_MAX); 695 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 696 osp->f_owner = nsp->f_owner; 697 osp->f_type = nsp->f_type; 698 osp->f_flags = nsp->f_flags; 699 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 700 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 701 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 702 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 703 strlcpy(osp->f_fstypename, nsp->f_fstypename, 704 MIN(MFSNAMELEN, OMFSNAMELEN)); 705 strlcpy(osp->f_mntonname, nsp->f_mntonname, 706 MIN(MNAMELEN, OMNAMELEN)); 707 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 708 MIN(MNAMELEN, OMNAMELEN)); 709 osp->f_fsid = nsp->f_fsid; 710} 711#endif /* COMPAT_FREEBSD4 */ 712 713/* 714 * Change current working directory to a given file descriptor. 715 */ 716#ifndef _SYS_SYSPROTO_H_ 717struct fchdir_args { 718 int fd; 719}; 720#endif 721int 722sys_fchdir(td, uap) 723 struct thread *td; 724 struct fchdir_args /* { 725 int fd; 726 } */ *uap; 727{ 728 register struct filedesc *fdp = td->td_proc->p_fd; 729 struct vnode *vp, *tdp, *vpold; 730 struct mount *mp; 731 struct file *fp; 732 cap_rights_t rights; 733 int error; 734 735 AUDIT_ARG_FD(uap->fd); 736 error = getvnode(fdp, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 737 &fp); 738 if (error != 0) 739 return (error); 740 vp = fp->f_vnode; 741 VREF(vp); 742 fdrop(fp, td); 743 vn_lock(vp, LK_SHARED | LK_RETRY); 744 AUDIT_ARG_VNODE1(vp); 745 error = change_dir(vp, td); 746 while (!error && (mp = vp->v_mountedhere) != NULL) { 747 if (vfs_busy(mp, 0)) 748 continue; 749 error = VFS_ROOT(mp, LK_SHARED, &tdp); 750 vfs_unbusy(mp); 751 if (error != 0) 752 break; 753 vput(vp); 754 vp = tdp; 755 } 756 if (error != 0) { 757 vput(vp); 758 return (error); 759 } 760 VOP_UNLOCK(vp, 0); 761 FILEDESC_XLOCK(fdp); 762 vpold = fdp->fd_cdir; 763 fdp->fd_cdir = vp; 764 FILEDESC_XUNLOCK(fdp); 765 vrele(vpold); 766 return (0); 767} 768 769/* 770 * Change current working directory (``.''). 771 */ 772#ifndef _SYS_SYSPROTO_H_ 773struct chdir_args { 774 char *path; 775}; 776#endif 777int 778sys_chdir(td, uap) 779 struct thread *td; 780 struct chdir_args /* { 781 char *path; 782 } */ *uap; 783{ 784 785 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 786} 787 788int 789kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 790{ 791 register struct filedesc *fdp = td->td_proc->p_fd; 792 struct nameidata nd; 793 struct vnode *vp; 794 int error; 795 796 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 797 pathseg, path, td); 798 if ((error = namei(&nd)) != 0) 799 return (error); 800 if ((error = change_dir(nd.ni_vp, td)) != 0) { 801 vput(nd.ni_vp); 802 NDFREE(&nd, NDF_ONLY_PNBUF); 803 return (error); 804 } 805 VOP_UNLOCK(nd.ni_vp, 0); 806 NDFREE(&nd, NDF_ONLY_PNBUF); 807 FILEDESC_XLOCK(fdp); 808 vp = fdp->fd_cdir; 809 fdp->fd_cdir = nd.ni_vp; 810 FILEDESC_XUNLOCK(fdp); 811 vrele(vp); 812 return (0); 813} 814 815/* 816 * Helper function for raised chroot(2) security function: Refuse if 817 * any filedescriptors are open directories. 818 */ 819static int 820chroot_refuse_vdir_fds(fdp) 821 struct filedesc *fdp; 822{ 823 struct vnode *vp; 824 struct file *fp; 825 int fd; 826 827 FILEDESC_LOCK_ASSERT(fdp); 828 829 for (fd = 0; fd <= fdp->fd_lastfile; fd++) { 830 fp = fget_locked(fdp, fd); 831 if (fp == NULL) 832 continue; 833 if (fp->f_type == DTYPE_VNODE) { 834 vp = fp->f_vnode; 835 if (vp->v_type == VDIR) 836 return (EPERM); 837 } 838 } 839 return (0); 840} 841 842/* 843 * This sysctl determines if we will allow a process to chroot(2) if it 844 * has a directory open: 845 * 0: disallowed for all processes. 846 * 1: allowed for processes that were not already chroot(2)'ed. 847 * 2: allowed for all processes. 848 */ 849 850static int chroot_allow_open_directories = 1; 851 852SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 853 &chroot_allow_open_directories, 0, 854 "Allow a process to chroot(2) if it has a directory open"); 855 856/* 857 * Change notion of root (``/'') directory. 858 */ 859#ifndef _SYS_SYSPROTO_H_ 860struct chroot_args { 861 char *path; 862}; 863#endif 864int 865sys_chroot(td, uap) 866 struct thread *td; 867 struct chroot_args /* { 868 char *path; 869 } */ *uap; 870{ 871 struct nameidata nd; 872 int error; 873 874 error = priv_check(td, PRIV_VFS_CHROOT); 875 if (error != 0) 876 return (error); 877 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 878 UIO_USERSPACE, uap->path, td); 879 error = namei(&nd); 880 if (error != 0) 881 goto error; 882 error = change_dir(nd.ni_vp, td); 883 if (error != 0) 884 goto e_vunlock; 885#ifdef MAC 886 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 887 if (error != 0) 888 goto e_vunlock; 889#endif 890 VOP_UNLOCK(nd.ni_vp, 0); 891 error = change_root(nd.ni_vp, td); 892 vrele(nd.ni_vp); 893 NDFREE(&nd, NDF_ONLY_PNBUF); 894 return (error); 895e_vunlock: 896 vput(nd.ni_vp); 897error: 898 NDFREE(&nd, NDF_ONLY_PNBUF); 899 return (error); 900} 901 902/* 903 * Common routine for chroot and chdir. Callers must provide a locked vnode 904 * instance. 905 */ 906int 907change_dir(vp, td) 908 struct vnode *vp; 909 struct thread *td; 910{ 911#ifdef MAC 912 int error; 913#endif 914 915 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 916 if (vp->v_type != VDIR) 917 return (ENOTDIR); 918#ifdef MAC 919 error = mac_vnode_check_chdir(td->td_ucred, vp); 920 if (error != 0) 921 return (error); 922#endif 923 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 924} 925 926/* 927 * Common routine for kern_chroot() and jail_attach(). The caller is 928 * responsible for invoking priv_check() and mac_vnode_check_chroot() to 929 * authorize this operation. 930 */ 931int 932change_root(vp, td) 933 struct vnode *vp; 934 struct thread *td; 935{ 936 struct filedesc *fdp; 937 struct vnode *oldvp; 938 int error; 939 940 fdp = td->td_proc->p_fd; 941 FILEDESC_XLOCK(fdp); 942 if (chroot_allow_open_directories == 0 || 943 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 944 error = chroot_refuse_vdir_fds(fdp); 945 if (error != 0) { 946 FILEDESC_XUNLOCK(fdp); 947 return (error); 948 } 949 } 950 oldvp = fdp->fd_rdir; 951 fdp->fd_rdir = vp; 952 VREF(fdp->fd_rdir); 953 if (!fdp->fd_jdir) { 954 fdp->fd_jdir = vp; 955 VREF(fdp->fd_jdir); 956 } 957 FILEDESC_XUNLOCK(fdp); 958 vrele(oldvp); 959 return (0); 960} 961 962static __inline void 963flags_to_rights(int flags, cap_rights_t *rightsp) 964{ 965 966 if (flags & O_EXEC) { 967 cap_rights_set(rightsp, CAP_FEXECVE); 968 } else { 969 switch ((flags & O_ACCMODE)) { 970 case O_RDONLY: 971 cap_rights_set(rightsp, CAP_READ); 972 break; 973 case O_RDWR: 974 cap_rights_set(rightsp, CAP_READ); 975 /* FALLTHROUGH */ 976 case O_WRONLY: 977 cap_rights_set(rightsp, CAP_WRITE); 978 if (!(flags & (O_APPEND | O_TRUNC))) 979 cap_rights_set(rightsp, CAP_SEEK); 980 break; 981 } 982 } 983 984 if (flags & O_CREAT) 985 cap_rights_set(rightsp, CAP_CREATE); 986 987 if (flags & O_TRUNC) 988 cap_rights_set(rightsp, CAP_FTRUNCATE); 989 990 if (flags & (O_SYNC | O_FSYNC)) 991 cap_rights_set(rightsp, CAP_FSYNC); 992 993 if (flags & (O_EXLOCK | O_SHLOCK)) 994 cap_rights_set(rightsp, CAP_FLOCK); 995} 996 997/* 998 * Check permissions, allocate an open file structure, and call the device 999 * open routine if any. 1000 */ 1001#ifndef _SYS_SYSPROTO_H_ 1002struct open_args { 1003 char *path; 1004 int flags; 1005 int mode; 1006}; 1007#endif 1008int 1009sys_open(td, uap) 1010 struct thread *td; 1011 register struct open_args /* { 1012 char *path; 1013 int flags; 1014 int mode; 1015 } */ *uap; 1016{ 1017 1018 return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode)); 1019} 1020 1021#ifndef _SYS_SYSPROTO_H_ 1022struct openat_args { 1023 int fd; 1024 char *path; 1025 int flag; 1026 int mode; 1027}; 1028#endif 1029int 1030sys_openat(struct thread *td, struct openat_args *uap) 1031{ 1032 1033 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1034 uap->mode)); 1035} 1036 1037int 1038kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags, 1039 int mode) 1040{ 1041 1042 return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode)); 1043} 1044 1045int 1046kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1047 int flags, int mode) 1048{ 1049 struct proc *p = td->td_proc; 1050 struct filedesc *fdp = p->p_fd; 1051 struct file *fp; 1052 struct vnode *vp; 1053 struct nameidata nd; 1054 cap_rights_t rights; 1055 int cmode, error, indx; 1056 1057 indx = -1; 1058 1059 AUDIT_ARG_FFLAGS(flags); 1060 AUDIT_ARG_MODE(mode); 1061 /* XXX: audit dirfd */ 1062 cap_rights_init(&rights, CAP_LOOKUP); 1063 flags_to_rights(flags, &rights); 1064 /* 1065 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1066 * may be specified. 1067 */ 1068 if (flags & O_EXEC) { 1069 if (flags & O_ACCMODE) 1070 return (EINVAL); 1071 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1072 return (EINVAL); 1073 } else { 1074 flags = FFLAGS(flags); 1075 } 1076 1077 /* 1078 * Allocate the file descriptor, but don't install a descriptor yet. 1079 */ 1080 error = falloc_noinstall(td, &fp); 1081 if (error != 0) 1082 return (error); 1083 /* 1084 * An extra reference on `fp' has been held for us by 1085 * falloc_noinstall(). 1086 */ 1087 /* Set the flags early so the finit in devfs can pick them up. */ 1088 fp->f_flag = flags & FMASK; 1089 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1090 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1091 &rights, td); 1092 td->td_dupfd = -1; /* XXX check for fdopen */ 1093 error = vn_open(&nd, &flags, cmode, fp); 1094 if (error != 0) { 1095 /* 1096 * If the vn_open replaced the method vector, something 1097 * wonderous happened deep below and we just pass it up 1098 * pretending we know what we do. 1099 */ 1100 if (error == ENXIO && fp->f_ops != &badfileops) 1101 goto success; 1102 1103 /* 1104 * Handle special fdopen() case. bleh. 1105 * 1106 * Don't do this for relative (capability) lookups; we don't 1107 * understand exactly what would happen, and we don't think 1108 * that it ever should. 1109 */ 1110 if (nd.ni_strictrelative == 0 && 1111 (error == ENODEV || error == ENXIO) && 1112 td->td_dupfd >= 0) { 1113 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1114 &indx); 1115 if (error == 0) 1116 goto success; 1117 } 1118 1119 goto bad; 1120 } 1121 td->td_dupfd = 0; 1122 NDFREE(&nd, NDF_ONLY_PNBUF); 1123 vp = nd.ni_vp; 1124 1125 /* 1126 * Store the vnode, for any f_type. Typically, the vnode use 1127 * count is decremented by direct call to vn_closefile() for 1128 * files that switched type in the cdevsw fdopen() method. 1129 */ 1130 fp->f_vnode = vp; 1131 /* 1132 * If the file wasn't claimed by devfs bind it to the normal 1133 * vnode operations here. 1134 */ 1135 if (fp->f_ops == &badfileops) { 1136 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1137 fp->f_seqcount = 1; 1138 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1139 DTYPE_VNODE, vp, &vnops); 1140 } 1141 1142 VOP_UNLOCK(vp, 0); 1143 if (flags & O_TRUNC) { 1144 error = fo_truncate(fp, 0, td->td_ucred, td); 1145 if (error != 0) 1146 goto bad; 1147 } 1148success: 1149 /* 1150 * If we haven't already installed the FD (for dupfdopen), do so now. 1151 */ 1152 if (indx == -1) { 1153 struct filecaps *fcaps; 1154 1155#ifdef CAPABILITIES 1156 if (nd.ni_strictrelative == 1) 1157 fcaps = &nd.ni_filecaps; 1158 else 1159#endif 1160 fcaps = NULL; 1161 error = finstall(td, fp, &indx, flags, fcaps); 1162 /* On success finstall() consumes fcaps. */ 1163 if (error != 0) { 1164 filecaps_free(&nd.ni_filecaps); 1165 goto bad; 1166 } 1167 } else { 1168 filecaps_free(&nd.ni_filecaps); 1169 } 1170 1171 /* 1172 * Release our private reference, leaving the one associated with 1173 * the descriptor table intact. 1174 */ 1175 fdrop(fp, td); 1176 td->td_retval[0] = indx; 1177 return (0); 1178bad: 1179 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1180 fdrop(fp, td); 1181 return (error); 1182} 1183 1184#ifdef COMPAT_43 1185/* 1186 * Create a file. 1187 */ 1188#ifndef _SYS_SYSPROTO_H_ 1189struct ocreat_args { 1190 char *path; 1191 int mode; 1192}; 1193#endif 1194int 1195ocreat(td, uap) 1196 struct thread *td; 1197 register struct ocreat_args /* { 1198 char *path; 1199 int mode; 1200 } */ *uap; 1201{ 1202 1203 return (kern_open(td, uap->path, UIO_USERSPACE, 1204 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1205} 1206#endif /* COMPAT_43 */ 1207 1208/* 1209 * Create a special file. 1210 */ 1211#ifndef _SYS_SYSPROTO_H_ 1212struct mknod_args { 1213 char *path; 1214 int mode; 1215 int dev; 1216}; 1217#endif 1218int 1219sys_mknod(td, uap) 1220 struct thread *td; 1221 register struct mknod_args /* { 1222 char *path; 1223 int mode; 1224 int dev; 1225 } */ *uap; 1226{ 1227 1228 return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); 1229} 1230 1231#ifndef _SYS_SYSPROTO_H_ 1232struct mknodat_args { 1233 int fd; 1234 char *path; 1235 mode_t mode; 1236 dev_t dev; 1237}; 1238#endif 1239int 1240sys_mknodat(struct thread *td, struct mknodat_args *uap) 1241{ 1242 1243 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1244 uap->dev)); 1245} 1246 1247int 1248kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode, 1249 int dev) 1250{ 1251 1252 return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev)); 1253} 1254 1255int 1256kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1257 int mode, int dev) 1258{ 1259 struct vnode *vp; 1260 struct mount *mp; 1261 struct vattr vattr; 1262 struct nameidata nd; 1263 cap_rights_t rights; 1264 int error, whiteout = 0; 1265 1266 AUDIT_ARG_MODE(mode); 1267 AUDIT_ARG_DEV(dev); 1268 switch (mode & S_IFMT) { 1269 case S_IFCHR: 1270 case S_IFBLK: 1271 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1272 break; 1273 case S_IFMT: 1274 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1275 break; 1276 case S_IFWHT: 1277 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1278 break; 1279 case S_IFIFO: 1280 if (dev == 0) 1281 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1282 /* FALLTHROUGH */ 1283 default: 1284 error = EINVAL; 1285 break; 1286 } 1287 if (error != 0) 1288 return (error); 1289restart: 1290 bwillwrite(); 1291 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1292 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), 1293 td); 1294 if ((error = namei(&nd)) != 0) 1295 return (error); 1296 vp = nd.ni_vp; 1297 if (vp != NULL) { 1298 NDFREE(&nd, NDF_ONLY_PNBUF); 1299 if (vp == nd.ni_dvp) 1300 vrele(nd.ni_dvp); 1301 else 1302 vput(nd.ni_dvp); 1303 vrele(vp); 1304 return (EEXIST); 1305 } else { 1306 VATTR_NULL(&vattr); 1307 vattr.va_mode = (mode & ALLPERMS) & 1308 ~td->td_proc->p_fd->fd_cmask; 1309 vattr.va_rdev = dev; 1310 whiteout = 0; 1311 1312 switch (mode & S_IFMT) { 1313 case S_IFMT: /* used by badsect to flag bad sectors */ 1314 vattr.va_type = VBAD; 1315 break; 1316 case S_IFCHR: 1317 vattr.va_type = VCHR; 1318 break; 1319 case S_IFBLK: 1320 vattr.va_type = VBLK; 1321 break; 1322 case S_IFWHT: 1323 whiteout = 1; 1324 break; 1325 default: 1326 panic("kern_mknod: invalid mode"); 1327 } 1328 } 1329 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1330 NDFREE(&nd, NDF_ONLY_PNBUF); 1331 vput(nd.ni_dvp); 1332 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1333 return (error); 1334 goto restart; 1335 } 1336#ifdef MAC 1337 if (error == 0 && !whiteout) 1338 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1339 &nd.ni_cnd, &vattr); 1340#endif 1341 if (error == 0) { 1342 if (whiteout) 1343 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1344 else { 1345 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1346 &nd.ni_cnd, &vattr); 1347 if (error == 0) 1348 vput(nd.ni_vp); 1349 } 1350 } 1351 NDFREE(&nd, NDF_ONLY_PNBUF); 1352 vput(nd.ni_dvp); 1353 vn_finished_write(mp); 1354 return (error); 1355} 1356 1357/* 1358 * Create a named pipe. 1359 */ 1360#ifndef _SYS_SYSPROTO_H_ 1361struct mkfifo_args { 1362 char *path; 1363 int mode; 1364}; 1365#endif 1366int 1367sys_mkfifo(td, uap) 1368 struct thread *td; 1369 register struct mkfifo_args /* { 1370 char *path; 1371 int mode; 1372 } */ *uap; 1373{ 1374 1375 return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode)); 1376} 1377 1378#ifndef _SYS_SYSPROTO_H_ 1379struct mkfifoat_args { 1380 int fd; 1381 char *path; 1382 mode_t mode; 1383}; 1384#endif 1385int 1386sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1387{ 1388 1389 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1390 uap->mode)); 1391} 1392 1393int 1394kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode) 1395{ 1396 1397 return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode)); 1398} 1399 1400int 1401kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1402 int mode) 1403{ 1404 struct mount *mp; 1405 struct vattr vattr; 1406 struct nameidata nd; 1407 cap_rights_t rights; 1408 int error; 1409 1410 AUDIT_ARG_MODE(mode); 1411restart: 1412 bwillwrite(); 1413 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1414 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), 1415 td); 1416 if ((error = namei(&nd)) != 0) 1417 return (error); 1418 if (nd.ni_vp != NULL) { 1419 NDFREE(&nd, NDF_ONLY_PNBUF); 1420 if (nd.ni_vp == nd.ni_dvp) 1421 vrele(nd.ni_dvp); 1422 else 1423 vput(nd.ni_dvp); 1424 vrele(nd.ni_vp); 1425 return (EEXIST); 1426 } 1427 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1428 NDFREE(&nd, NDF_ONLY_PNBUF); 1429 vput(nd.ni_dvp); 1430 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1431 return (error); 1432 goto restart; 1433 } 1434 VATTR_NULL(&vattr); 1435 vattr.va_type = VFIFO; 1436 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1437#ifdef MAC 1438 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1439 &vattr); 1440 if (error != 0) 1441 goto out; 1442#endif 1443 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1444 if (error == 0) 1445 vput(nd.ni_vp); 1446#ifdef MAC 1447out: 1448#endif 1449 vput(nd.ni_dvp); 1450 vn_finished_write(mp); 1451 NDFREE(&nd, NDF_ONLY_PNBUF); 1452 return (error); 1453} 1454 1455/* 1456 * Make a hard file link. 1457 */ 1458#ifndef _SYS_SYSPROTO_H_ 1459struct link_args { 1460 char *path; 1461 char *link; 1462}; 1463#endif 1464int 1465sys_link(td, uap) 1466 struct thread *td; 1467 register struct link_args /* { 1468 char *path; 1469 char *link; 1470 } */ *uap; 1471{ 1472 1473 return (kern_link(td, uap->path, uap->link, UIO_USERSPACE)); 1474} 1475 1476#ifndef _SYS_SYSPROTO_H_ 1477struct linkat_args { 1478 int fd1; 1479 char *path1; 1480 int fd2; 1481 char *path2; 1482 int flag; 1483}; 1484#endif 1485int 1486sys_linkat(struct thread *td, struct linkat_args *uap) 1487{ 1488 int flag; 1489 1490 flag = uap->flag; 1491 if (flag & ~AT_SYMLINK_FOLLOW) 1492 return (EINVAL); 1493 1494 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1495 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1496} 1497 1498int hardlink_check_uid = 0; 1499SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1500 &hardlink_check_uid, 0, 1501 "Unprivileged processes cannot create hard links to files owned by other " 1502 "users"); 1503static int hardlink_check_gid = 0; 1504SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1505 &hardlink_check_gid, 0, 1506 "Unprivileged processes cannot create hard links to files owned by other " 1507 "groups"); 1508 1509static int 1510can_hardlink(struct vnode *vp, struct ucred *cred) 1511{ 1512 struct vattr va; 1513 int error; 1514 1515 if (!hardlink_check_uid && !hardlink_check_gid) 1516 return (0); 1517 1518 error = VOP_GETATTR(vp, &va, cred); 1519 if (error != 0) 1520 return (error); 1521 1522 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1523 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1524 if (error != 0) 1525 return (error); 1526 } 1527 1528 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1529 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1530 if (error != 0) 1531 return (error); 1532 } 1533 1534 return (0); 1535} 1536 1537int 1538kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg) 1539{ 1540 1541 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW)); 1542} 1543 1544int 1545kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1546 enum uio_seg segflg, int follow) 1547{ 1548 struct vnode *vp; 1549 struct mount *mp; 1550 struct nameidata nd; 1551 cap_rights_t rights; 1552 int error; 1553 1554again: 1555 bwillwrite(); 1556 NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td); 1557 1558 if ((error = namei(&nd)) != 0) 1559 return (error); 1560 NDFREE(&nd, NDF_ONLY_PNBUF); 1561 vp = nd.ni_vp; 1562 if (vp->v_type == VDIR) { 1563 vrele(vp); 1564 return (EPERM); /* POSIX */ 1565 } 1566 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2 | 1567 NOCACHE, segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT), 1568 td); 1569 if ((error = namei(&nd)) == 0) { 1570 if (nd.ni_vp != NULL) { 1571 NDFREE(&nd, NDF_ONLY_PNBUF); 1572 if (nd.ni_dvp == nd.ni_vp) 1573 vrele(nd.ni_dvp); 1574 else 1575 vput(nd.ni_dvp); 1576 vrele(nd.ni_vp); 1577 vrele(vp); 1578 return (EEXIST); 1579 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1580 /* 1581 * Cross-device link. No need to recheck 1582 * vp->v_type, since it cannot change, except 1583 * to VBAD. 1584 */ 1585 NDFREE(&nd, NDF_ONLY_PNBUF); 1586 vput(nd.ni_dvp); 1587 vrele(vp); 1588 return (EXDEV); 1589 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1590 error = can_hardlink(vp, td->td_ucred); 1591#ifdef MAC 1592 if (error == 0) 1593 error = mac_vnode_check_link(td->td_ucred, 1594 nd.ni_dvp, vp, &nd.ni_cnd); 1595#endif 1596 if (error != 0) { 1597 vput(vp); 1598 vput(nd.ni_dvp); 1599 NDFREE(&nd, NDF_ONLY_PNBUF); 1600 return (error); 1601 } 1602 error = vn_start_write(vp, &mp, V_NOWAIT); 1603 if (error != 0) { 1604 vput(vp); 1605 vput(nd.ni_dvp); 1606 NDFREE(&nd, NDF_ONLY_PNBUF); 1607 error = vn_start_write(NULL, &mp, 1608 V_XSLEEP | PCATCH); 1609 if (error != 0) 1610 return (error); 1611 goto again; 1612 } 1613 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1614 VOP_UNLOCK(vp, 0); 1615 vput(nd.ni_dvp); 1616 vn_finished_write(mp); 1617 NDFREE(&nd, NDF_ONLY_PNBUF); 1618 } else { 1619 vput(nd.ni_dvp); 1620 NDFREE(&nd, NDF_ONLY_PNBUF); 1621 vrele(vp); 1622 goto again; 1623 } 1624 } 1625 vrele(vp); 1626 return (error); 1627} 1628 1629/* 1630 * Make a symbolic link. 1631 */ 1632#ifndef _SYS_SYSPROTO_H_ 1633struct symlink_args { 1634 char *path; 1635 char *link; 1636}; 1637#endif 1638int 1639sys_symlink(td, uap) 1640 struct thread *td; 1641 register struct symlink_args /* { 1642 char *path; 1643 char *link; 1644 } */ *uap; 1645{ 1646 1647 return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE)); 1648} 1649 1650#ifndef _SYS_SYSPROTO_H_ 1651struct symlinkat_args { 1652 char *path; 1653 int fd; 1654 char *path2; 1655}; 1656#endif 1657int 1658sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1659{ 1660 1661 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1662 UIO_USERSPACE)); 1663} 1664 1665int 1666kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg) 1667{ 1668 1669 return (kern_symlinkat(td, path, AT_FDCWD, link, segflg)); 1670} 1671 1672int 1673kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1674 enum uio_seg segflg) 1675{ 1676 struct mount *mp; 1677 struct vattr vattr; 1678 char *syspath; 1679 struct nameidata nd; 1680 int error; 1681 cap_rights_t rights; 1682 1683 if (segflg == UIO_SYSSPACE) { 1684 syspath = path1; 1685 } else { 1686 syspath = uma_zalloc(namei_zone, M_WAITOK); 1687 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1688 goto out; 1689 } 1690 AUDIT_ARG_TEXT(syspath); 1691restart: 1692 bwillwrite(); 1693 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1694 NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), 1695 td); 1696 if ((error = namei(&nd)) != 0) 1697 goto out; 1698 if (nd.ni_vp) { 1699 NDFREE(&nd, NDF_ONLY_PNBUF); 1700 if (nd.ni_vp == nd.ni_dvp) 1701 vrele(nd.ni_dvp); 1702 else 1703 vput(nd.ni_dvp); 1704 vrele(nd.ni_vp); 1705 error = EEXIST; 1706 goto out; 1707 } 1708 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1709 NDFREE(&nd, NDF_ONLY_PNBUF); 1710 vput(nd.ni_dvp); 1711 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1712 goto out; 1713 goto restart; 1714 } 1715 VATTR_NULL(&vattr); 1716 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1717#ifdef MAC 1718 vattr.va_type = VLNK; 1719 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1720 &vattr); 1721 if (error != 0) 1722 goto out2; 1723#endif 1724 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1725 if (error == 0) 1726 vput(nd.ni_vp); 1727#ifdef MAC 1728out2: 1729#endif 1730 NDFREE(&nd, NDF_ONLY_PNBUF); 1731 vput(nd.ni_dvp); 1732 vn_finished_write(mp); 1733out: 1734 if (segflg != UIO_SYSSPACE) 1735 uma_zfree(namei_zone, syspath); 1736 return (error); 1737} 1738 1739/* 1740 * Delete a whiteout from the filesystem. 1741 */ 1742int 1743sys_undelete(td, uap) 1744 struct thread *td; 1745 register struct undelete_args /* { 1746 char *path; 1747 } */ *uap; 1748{ 1749 struct mount *mp; 1750 struct nameidata nd; 1751 int error; 1752 1753restart: 1754 bwillwrite(); 1755 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1756 UIO_USERSPACE, uap->path, td); 1757 error = namei(&nd); 1758 if (error != 0) 1759 return (error); 1760 1761 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1762 NDFREE(&nd, NDF_ONLY_PNBUF); 1763 if (nd.ni_vp == nd.ni_dvp) 1764 vrele(nd.ni_dvp); 1765 else 1766 vput(nd.ni_dvp); 1767 if (nd.ni_vp) 1768 vrele(nd.ni_vp); 1769 return (EEXIST); 1770 } 1771 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1772 NDFREE(&nd, NDF_ONLY_PNBUF); 1773 vput(nd.ni_dvp); 1774 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1775 return (error); 1776 goto restart; 1777 } 1778 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1779 NDFREE(&nd, NDF_ONLY_PNBUF); 1780 vput(nd.ni_dvp); 1781 vn_finished_write(mp); 1782 return (error); 1783} 1784 1785/* 1786 * Delete a name from the filesystem. 1787 */ 1788#ifndef _SYS_SYSPROTO_H_ 1789struct unlink_args { 1790 char *path; 1791}; 1792#endif 1793int 1794sys_unlink(td, uap) 1795 struct thread *td; 1796 struct unlink_args /* { 1797 char *path; 1798 } */ *uap; 1799{ 1800 1801 return (kern_unlink(td, uap->path, UIO_USERSPACE)); 1802} 1803 1804#ifndef _SYS_SYSPROTO_H_ 1805struct unlinkat_args { 1806 int fd; 1807 char *path; 1808 int flag; 1809}; 1810#endif 1811int 1812sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1813{ 1814 int flag = uap->flag; 1815 int fd = uap->fd; 1816 char *path = uap->path; 1817 1818 if (flag & ~AT_REMOVEDIR) 1819 return (EINVAL); 1820 1821 if (flag & AT_REMOVEDIR) 1822 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1823 else 1824 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1825} 1826 1827int 1828kern_unlink(struct thread *td, char *path, enum uio_seg pathseg) 1829{ 1830 1831 return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0)); 1832} 1833 1834int 1835kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1836 ino_t oldinum) 1837{ 1838 struct mount *mp; 1839 struct vnode *vp; 1840 struct nameidata nd; 1841 struct stat sb; 1842 cap_rights_t rights; 1843 int error; 1844 1845restart: 1846 bwillwrite(); 1847 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1848 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1849 if ((error = namei(&nd)) != 0) 1850 return (error == EINVAL ? EPERM : error); 1851 vp = nd.ni_vp; 1852 if (vp->v_type == VDIR && oldinum == 0) { 1853 error = EPERM; /* POSIX */ 1854 } else if (oldinum != 0 && 1855 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1856 sb.st_ino != oldinum) { 1857 error = EIDRM; /* Identifier removed */ 1858 } else { 1859 /* 1860 * The root of a mounted filesystem cannot be deleted. 1861 * 1862 * XXX: can this only be a VDIR case? 1863 */ 1864 if (vp->v_vflag & VV_ROOT) 1865 error = EBUSY; 1866 } 1867 if (error == 0) { 1868 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1869 NDFREE(&nd, NDF_ONLY_PNBUF); 1870 vput(nd.ni_dvp); 1871 if (vp == nd.ni_dvp) 1872 vrele(vp); 1873 else 1874 vput(vp); 1875 if ((error = vn_start_write(NULL, &mp, 1876 V_XSLEEP | PCATCH)) != 0) 1877 return (error); 1878 goto restart; 1879 } 1880#ifdef MAC 1881 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1882 &nd.ni_cnd); 1883 if (error != 0) 1884 goto out; 1885#endif 1886 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1887 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1888#ifdef MAC 1889out: 1890#endif 1891 vn_finished_write(mp); 1892 } 1893 NDFREE(&nd, NDF_ONLY_PNBUF); 1894 vput(nd.ni_dvp); 1895 if (vp == nd.ni_dvp) 1896 vrele(vp); 1897 else 1898 vput(vp); 1899 return (error); 1900} 1901 1902/* 1903 * Reposition read/write file offset. 1904 */ 1905#ifndef _SYS_SYSPROTO_H_ 1906struct lseek_args { 1907 int fd; 1908 int pad; 1909 off_t offset; 1910 int whence; 1911}; 1912#endif 1913int 1914sys_lseek(td, uap) 1915 struct thread *td; 1916 register struct lseek_args /* { 1917 int fd; 1918 int pad; 1919 off_t offset; 1920 int whence; 1921 } */ *uap; 1922{ 1923 struct file *fp; 1924 cap_rights_t rights; 1925 int error; 1926 1927 AUDIT_ARG_FD(uap->fd); 1928 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1929 if (error != 0) 1930 return (error); 1931 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1932 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1933 fdrop(fp, td); 1934 return (error); 1935} 1936 1937#if defined(COMPAT_43) 1938/* 1939 * Reposition read/write file offset. 1940 */ 1941#ifndef _SYS_SYSPROTO_H_ 1942struct olseek_args { 1943 int fd; 1944 long offset; 1945 int whence; 1946}; 1947#endif 1948int 1949olseek(td, uap) 1950 struct thread *td; 1951 register struct olseek_args /* { 1952 int fd; 1953 long offset; 1954 int whence; 1955 } */ *uap; 1956{ 1957 struct lseek_args /* { 1958 int fd; 1959 int pad; 1960 off_t offset; 1961 int whence; 1962 } */ nuap; 1963 1964 nuap.fd = uap->fd; 1965 nuap.offset = uap->offset; 1966 nuap.whence = uap->whence; 1967 return (sys_lseek(td, &nuap)); 1968} 1969#endif /* COMPAT_43 */ 1970 1971/* Version with the 'pad' argument */ 1972int 1973freebsd6_lseek(td, uap) 1974 struct thread *td; 1975 register struct freebsd6_lseek_args *uap; 1976{ 1977 struct lseek_args ouap; 1978 1979 ouap.fd = uap->fd; 1980 ouap.offset = uap->offset; 1981 ouap.whence = uap->whence; 1982 return (sys_lseek(td, &ouap)); 1983} 1984 1985/* 1986 * Check access permissions using passed credentials. 1987 */ 1988static int 1989vn_access(vp, user_flags, cred, td) 1990 struct vnode *vp; 1991 int user_flags; 1992 struct ucred *cred; 1993 struct thread *td; 1994{ 1995 accmode_t accmode; 1996 int error; 1997 1998 /* Flags == 0 means only check for existence. */ 1999 error = 0; 2000 if (user_flags) { 2001 accmode = 0; 2002 if (user_flags & R_OK) 2003 accmode |= VREAD; 2004 if (user_flags & W_OK) 2005 accmode |= VWRITE; 2006 if (user_flags & X_OK) 2007 accmode |= VEXEC; 2008#ifdef MAC 2009 error = mac_vnode_check_access(cred, vp, accmode); 2010 if (error != 0) 2011 return (error); 2012#endif 2013 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2014 error = VOP_ACCESS(vp, accmode, cred, td); 2015 } 2016 return (error); 2017} 2018 2019/* 2020 * Check access permissions using "real" credentials. 2021 */ 2022#ifndef _SYS_SYSPROTO_H_ 2023struct access_args { 2024 char *path; 2025 int amode; 2026}; 2027#endif 2028int 2029sys_access(td, uap) 2030 struct thread *td; 2031 register struct access_args /* { 2032 char *path; 2033 int amode; 2034 } */ *uap; 2035{ 2036 2037 return (kern_access(td, uap->path, UIO_USERSPACE, uap->amode)); 2038} 2039 2040#ifndef _SYS_SYSPROTO_H_ 2041struct faccessat_args { 2042 int dirfd; 2043 char *path; 2044 int amode; 2045 int flag; 2046} 2047#endif 2048int 2049sys_faccessat(struct thread *td, struct faccessat_args *uap) 2050{ 2051 2052 if (uap->flag & ~AT_EACCESS) 2053 return (EINVAL); 2054 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2055 uap->amode)); 2056} 2057 2058int 2059kern_access(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2060{ 2061 2062 return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, amode)); 2063} 2064 2065int 2066kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2067 int flag, int amode) 2068{ 2069 struct ucred *cred, *tmpcred; 2070 struct vnode *vp; 2071 struct nameidata nd; 2072 cap_rights_t rights; 2073 int error; 2074 2075 /* 2076 * Create and modify a temporary credential instead of one that 2077 * is potentially shared. 2078 */ 2079 if (!(flag & AT_EACCESS)) { 2080 cred = td->td_ucred; 2081 tmpcred = crdup(cred); 2082 tmpcred->cr_uid = cred->cr_ruid; 2083 tmpcred->cr_groups[0] = cred->cr_rgid; 2084 td->td_ucred = tmpcred; 2085 } else 2086 cred = tmpcred = td->td_ucred; 2087 AUDIT_ARG_VALUE(amode); 2088 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2089 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 2090 td); 2091 if ((error = namei(&nd)) != 0) 2092 goto out1; 2093 vp = nd.ni_vp; 2094 2095 error = vn_access(vp, amode, tmpcred, td); 2096 NDFREE(&nd, NDF_ONLY_PNBUF); 2097 vput(vp); 2098out1: 2099 if (!(flag & AT_EACCESS)) { 2100 td->td_ucred = cred; 2101 crfree(tmpcred); 2102 } 2103 return (error); 2104} 2105 2106/* 2107 * Check access permissions using "effective" credentials. 2108 */ 2109#ifndef _SYS_SYSPROTO_H_ 2110struct eaccess_args { 2111 char *path; 2112 int amode; 2113}; 2114#endif 2115int 2116sys_eaccess(td, uap) 2117 struct thread *td; 2118 register struct eaccess_args /* { 2119 char *path; 2120 int amode; 2121 } */ *uap; 2122{ 2123 2124 return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->amode)); 2125} 2126 2127int 2128kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2129{ 2130 2131 return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, amode)); 2132} 2133 2134#if defined(COMPAT_43) 2135/* 2136 * Get file status; this version follows links. 2137 */ 2138#ifndef _SYS_SYSPROTO_H_ 2139struct ostat_args { 2140 char *path; 2141 struct ostat *ub; 2142}; 2143#endif 2144int 2145ostat(td, uap) 2146 struct thread *td; 2147 register struct ostat_args /* { 2148 char *path; 2149 struct ostat *ub; 2150 } */ *uap; 2151{ 2152 struct stat sb; 2153 struct ostat osb; 2154 int error; 2155 2156 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2157 if (error != 0) 2158 return (error); 2159 cvtstat(&sb, &osb); 2160 return (copyout(&osb, uap->ub, sizeof (osb))); 2161} 2162 2163/* 2164 * Get file status; this version does not follow links. 2165 */ 2166#ifndef _SYS_SYSPROTO_H_ 2167struct olstat_args { 2168 char *path; 2169 struct ostat *ub; 2170}; 2171#endif 2172int 2173olstat(td, uap) 2174 struct thread *td; 2175 register struct olstat_args /* { 2176 char *path; 2177 struct ostat *ub; 2178 } */ *uap; 2179{ 2180 struct stat sb; 2181 struct ostat osb; 2182 int error; 2183 2184 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2185 if (error != 0) 2186 return (error); 2187 cvtstat(&sb, &osb); 2188 return (copyout(&osb, uap->ub, sizeof (osb))); 2189} 2190 2191/* 2192 * Convert from an old to a new stat structure. 2193 */ 2194void 2195cvtstat(st, ost) 2196 struct stat *st; 2197 struct ostat *ost; 2198{ 2199 2200 ost->st_dev = st->st_dev; 2201 ost->st_ino = st->st_ino; 2202 ost->st_mode = st->st_mode; 2203 ost->st_nlink = st->st_nlink; 2204 ost->st_uid = st->st_uid; 2205 ost->st_gid = st->st_gid; 2206 ost->st_rdev = st->st_rdev; 2207 if (st->st_size < (quad_t)1 << 32) 2208 ost->st_size = st->st_size; 2209 else 2210 ost->st_size = -2; 2211 ost->st_atim = st->st_atim; 2212 ost->st_mtim = st->st_mtim; 2213 ost->st_ctim = st->st_ctim; 2214 ost->st_blksize = st->st_blksize; 2215 ost->st_blocks = st->st_blocks; 2216 ost->st_flags = st->st_flags; 2217 ost->st_gen = st->st_gen; 2218} 2219#endif /* COMPAT_43 */ 2220 2221/* 2222 * Get file status; this version follows links. 2223 */ 2224#ifndef _SYS_SYSPROTO_H_ 2225struct stat_args { 2226 char *path; 2227 struct stat *ub; 2228}; 2229#endif 2230int 2231sys_stat(td, uap) 2232 struct thread *td; 2233 register struct stat_args /* { 2234 char *path; 2235 struct stat *ub; 2236 } */ *uap; 2237{ 2238 struct stat sb; 2239 int error; 2240 2241 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2242 if (error == 0) 2243 error = copyout(&sb, uap->ub, sizeof (sb)); 2244 return (error); 2245} 2246 2247#ifndef _SYS_SYSPROTO_H_ 2248struct fstatat_args { 2249 int fd; 2250 char *path; 2251 struct stat *buf; 2252 int flag; 2253} 2254#endif 2255int 2256sys_fstatat(struct thread *td, struct fstatat_args *uap) 2257{ 2258 struct stat sb; 2259 int error; 2260 2261 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2262 UIO_USERSPACE, &sb); 2263 if (error == 0) 2264 error = copyout(&sb, uap->buf, sizeof (sb)); 2265 return (error); 2266} 2267 2268int 2269kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2270{ 2271 2272 return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp)); 2273} 2274 2275int 2276kern_statat(struct thread *td, int flag, int fd, char *path, 2277 enum uio_seg pathseg, struct stat *sbp) 2278{ 2279 2280 return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL)); 2281} 2282 2283int 2284kern_statat_vnhook(struct thread *td, int flag, int fd, char *path, 2285 enum uio_seg pathseg, struct stat *sbp, 2286 void (*hook)(struct vnode *vp, struct stat *sbp)) 2287{ 2288 struct nameidata nd; 2289 struct stat sb; 2290 cap_rights_t rights; 2291 int error; 2292 2293 if (flag & ~AT_SYMLINK_NOFOLLOW) 2294 return (EINVAL); 2295 2296 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2297 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2298 cap_rights_init(&rights, CAP_FSTAT), td); 2299 2300 if ((error = namei(&nd)) != 0) 2301 return (error); 2302 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2303 if (error == 0) { 2304 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode); 2305 if (S_ISREG(sb.st_mode)) 2306 SDT_PROBE2(vfs, , stat, reg, path, pathseg); 2307 if (__predict_false(hook != NULL)) 2308 hook(nd.ni_vp, &sb); 2309 } 2310 NDFREE(&nd, NDF_ONLY_PNBUF); 2311 vput(nd.ni_vp); 2312 if (error != 0) 2313 return (error); 2314 *sbp = sb; 2315#ifdef KTRACE 2316 if (KTRPOINT(td, KTR_STRUCT)) 2317 ktrstat(&sb); 2318#endif 2319 return (0); 2320} 2321 2322/* 2323 * Get file status; this version does not follow links. 2324 */ 2325#ifndef _SYS_SYSPROTO_H_ 2326struct lstat_args { 2327 char *path; 2328 struct stat *ub; 2329}; 2330#endif 2331int 2332sys_lstat(td, uap) 2333 struct thread *td; 2334 register struct lstat_args /* { 2335 char *path; 2336 struct stat *ub; 2337 } */ *uap; 2338{ 2339 struct stat sb; 2340 int error; 2341 2342 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2343 if (error == 0) 2344 error = copyout(&sb, uap->ub, sizeof (sb)); 2345 return (error); 2346} 2347 2348int 2349kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2350{ 2351 2352 return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg, 2353 sbp)); 2354} 2355 2356/* 2357 * Implementation of the NetBSD [l]stat() functions. 2358 */ 2359void 2360cvtnstat(sb, nsb) 2361 struct stat *sb; 2362 struct nstat *nsb; 2363{ 2364 2365 bzero(nsb, sizeof *nsb); 2366 nsb->st_dev = sb->st_dev; 2367 nsb->st_ino = sb->st_ino; 2368 nsb->st_mode = sb->st_mode; 2369 nsb->st_nlink = sb->st_nlink; 2370 nsb->st_uid = sb->st_uid; 2371 nsb->st_gid = sb->st_gid; 2372 nsb->st_rdev = sb->st_rdev; 2373 nsb->st_atim = sb->st_atim; 2374 nsb->st_mtim = sb->st_mtim; 2375 nsb->st_ctim = sb->st_ctim; 2376 nsb->st_size = sb->st_size; 2377 nsb->st_blocks = sb->st_blocks; 2378 nsb->st_blksize = sb->st_blksize; 2379 nsb->st_flags = sb->st_flags; 2380 nsb->st_gen = sb->st_gen; 2381 nsb->st_birthtim = sb->st_birthtim; 2382} 2383 2384#ifndef _SYS_SYSPROTO_H_ 2385struct nstat_args { 2386 char *path; 2387 struct nstat *ub; 2388}; 2389#endif 2390int 2391sys_nstat(td, uap) 2392 struct thread *td; 2393 register struct nstat_args /* { 2394 char *path; 2395 struct nstat *ub; 2396 } */ *uap; 2397{ 2398 struct stat sb; 2399 struct nstat nsb; 2400 int error; 2401 2402 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2403 if (error != 0) 2404 return (error); 2405 cvtnstat(&sb, &nsb); 2406 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2407} 2408 2409/* 2410 * NetBSD lstat. Get file status; this version does not follow links. 2411 */ 2412#ifndef _SYS_SYSPROTO_H_ 2413struct lstat_args { 2414 char *path; 2415 struct stat *ub; 2416}; 2417#endif 2418int 2419sys_nlstat(td, uap) 2420 struct thread *td; 2421 register struct nlstat_args /* { 2422 char *path; 2423 struct nstat *ub; 2424 } */ *uap; 2425{ 2426 struct stat sb; 2427 struct nstat nsb; 2428 int error; 2429 2430 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2431 if (error != 0) 2432 return (error); 2433 cvtnstat(&sb, &nsb); 2434 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2435} 2436 2437/* 2438 * Get configurable pathname variables. 2439 */ 2440#ifndef _SYS_SYSPROTO_H_ 2441struct pathconf_args { 2442 char *path; 2443 int name; 2444}; 2445#endif 2446int 2447sys_pathconf(td, uap) 2448 struct thread *td; 2449 register struct pathconf_args /* { 2450 char *path; 2451 int name; 2452 } */ *uap; 2453{ 2454 2455 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2456} 2457 2458#ifndef _SYS_SYSPROTO_H_ 2459struct lpathconf_args { 2460 char *path; 2461 int name; 2462}; 2463#endif 2464int 2465sys_lpathconf(td, uap) 2466 struct thread *td; 2467 register struct lpathconf_args /* { 2468 char *path; 2469 int name; 2470 } */ *uap; 2471{ 2472 2473 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2474 NOFOLLOW)); 2475} 2476 2477int 2478kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2479 u_long flags) 2480{ 2481 struct nameidata nd; 2482 int error; 2483 2484 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2485 pathseg, path, td); 2486 if ((error = namei(&nd)) != 0) 2487 return (error); 2488 NDFREE(&nd, NDF_ONLY_PNBUF); 2489 2490 /* If asynchronous I/O is available, it works for all files. */ 2491 if (name == _PC_ASYNC_IO) 2492 td->td_retval[0] = async_io_version; 2493 else 2494 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2495 vput(nd.ni_vp); 2496 return (error); 2497} 2498 2499/* 2500 * Return target name of a symbolic link. 2501 */ 2502#ifndef _SYS_SYSPROTO_H_ 2503struct readlink_args { 2504 char *path; 2505 char *buf; 2506 size_t count; 2507}; 2508#endif 2509int 2510sys_readlink(td, uap) 2511 struct thread *td; 2512 register struct readlink_args /* { 2513 char *path; 2514 char *buf; 2515 size_t count; 2516 } */ *uap; 2517{ 2518 2519 return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf, 2520 UIO_USERSPACE, uap->count)); 2521} 2522#ifndef _SYS_SYSPROTO_H_ 2523struct readlinkat_args { 2524 int fd; 2525 char *path; 2526 char *buf; 2527 size_t bufsize; 2528}; 2529#endif 2530int 2531sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2532{ 2533 2534 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2535 uap->buf, UIO_USERSPACE, uap->bufsize)); 2536} 2537 2538int 2539kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf, 2540 enum uio_seg bufseg, size_t count) 2541{ 2542 2543 return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg, 2544 count)); 2545} 2546 2547int 2548kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2549 char *buf, enum uio_seg bufseg, size_t count) 2550{ 2551 struct vnode *vp; 2552 struct iovec aiov; 2553 struct uio auio; 2554 struct nameidata nd; 2555 int error; 2556 2557 if (count > IOSIZE_MAX) 2558 return (EINVAL); 2559 2560 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2561 pathseg, path, fd, td); 2562 2563 if ((error = namei(&nd)) != 0) 2564 return (error); 2565 NDFREE(&nd, NDF_ONLY_PNBUF); 2566 vp = nd.ni_vp; 2567#ifdef MAC 2568 error = mac_vnode_check_readlink(td->td_ucred, vp); 2569 if (error != 0) { 2570 vput(vp); 2571 return (error); 2572 } 2573#endif 2574 if (vp->v_type != VLNK) 2575 error = EINVAL; 2576 else { 2577 aiov.iov_base = buf; 2578 aiov.iov_len = count; 2579 auio.uio_iov = &aiov; 2580 auio.uio_iovcnt = 1; 2581 auio.uio_offset = 0; 2582 auio.uio_rw = UIO_READ; 2583 auio.uio_segflg = bufseg; 2584 auio.uio_td = td; 2585 auio.uio_resid = count; 2586 error = VOP_READLINK(vp, &auio, td->td_ucred); 2587 td->td_retval[0] = count - auio.uio_resid; 2588 } 2589 vput(vp); 2590 return (error); 2591} 2592 2593/* 2594 * Common implementation code for chflags() and fchflags(). 2595 */ 2596static int 2597setfflags(td, vp, flags) 2598 struct thread *td; 2599 struct vnode *vp; 2600 u_long flags; 2601{ 2602 struct mount *mp; 2603 struct vattr vattr; 2604 int error; 2605 2606 /* We can't support the value matching VNOVAL. */ 2607 if (flags == VNOVAL) 2608 return (EOPNOTSUPP); 2609 2610 /* 2611 * Prevent non-root users from setting flags on devices. When 2612 * a device is reused, users can retain ownership of the device 2613 * if they are allowed to set flags and programs assume that 2614 * chown can't fail when done as root. 2615 */ 2616 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2617 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2618 if (error != 0) 2619 return (error); 2620 } 2621 2622 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2623 return (error); 2624 VATTR_NULL(&vattr); 2625 vattr.va_flags = flags; 2626 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2627#ifdef MAC 2628 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2629 if (error == 0) 2630#endif 2631 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2632 VOP_UNLOCK(vp, 0); 2633 vn_finished_write(mp); 2634 return (error); 2635} 2636 2637/* 2638 * Change flags of a file given a path name. 2639 */ 2640#ifndef _SYS_SYSPROTO_H_ 2641struct chflags_args { 2642 const char *path; 2643 u_long flags; 2644}; 2645#endif 2646int 2647sys_chflags(td, uap) 2648 struct thread *td; 2649 register struct chflags_args /* { 2650 const char *path; 2651 u_long flags; 2652 } */ *uap; 2653{ 2654 2655 return (kern_chflags(td, uap->path, UIO_USERSPACE, uap->flags)); 2656} 2657 2658#ifndef _SYS_SYSPROTO_H_ 2659struct chflagsat_args { 2660 int fd; 2661 const char *path; 2662 u_long flags; 2663 int atflag; 2664} 2665#endif 2666int 2667sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2668{ 2669 int fd = uap->fd; 2670 const char *path = uap->path; 2671 u_long flags = uap->flags; 2672 int atflag = uap->atflag; 2673 2674 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2675 return (EINVAL); 2676 2677 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2678} 2679 2680static int 2681kern_chflags(struct thread *td, const char *path, enum uio_seg pathseg, 2682 u_long flags) 2683{ 2684 2685 return (kern_chflagsat(td, AT_FDCWD, path, pathseg, flags, 0)); 2686} 2687 2688/* 2689 * Same as chflags() but doesn't follow symlinks. 2690 */ 2691int 2692sys_lchflags(td, uap) 2693 struct thread *td; 2694 register struct lchflags_args /* { 2695 const char *path; 2696 u_long flags; 2697 } */ *uap; 2698{ 2699 2700 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2701 uap->flags, AT_SYMLINK_NOFOLLOW)); 2702} 2703 2704static int 2705kern_chflagsat(struct thread *td, int fd, const char *path, 2706 enum uio_seg pathseg, u_long flags, int atflag) 2707{ 2708 struct nameidata nd; 2709 cap_rights_t rights; 2710 int error, follow; 2711 2712 AUDIT_ARG_FFLAGS(flags); 2713 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2714 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2715 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2716 if ((error = namei(&nd)) != 0) 2717 return (error); 2718 NDFREE(&nd, NDF_ONLY_PNBUF); 2719 error = setfflags(td, nd.ni_vp, flags); 2720 vrele(nd.ni_vp); 2721 return (error); 2722} 2723 2724/* 2725 * Change flags of a file given a file descriptor. 2726 */ 2727#ifndef _SYS_SYSPROTO_H_ 2728struct fchflags_args { 2729 int fd; 2730 u_long flags; 2731}; 2732#endif 2733int 2734sys_fchflags(td, uap) 2735 struct thread *td; 2736 register struct fchflags_args /* { 2737 int fd; 2738 u_long flags; 2739 } */ *uap; 2740{ 2741 struct file *fp; 2742 cap_rights_t rights; 2743 int error; 2744 2745 AUDIT_ARG_FD(uap->fd); 2746 AUDIT_ARG_FFLAGS(uap->flags); 2747 error = getvnode(td->td_proc->p_fd, uap->fd, 2748 cap_rights_init(&rights, CAP_FCHFLAGS), &fp); 2749 if (error != 0) 2750 return (error); 2751#ifdef AUDIT 2752 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2753 AUDIT_ARG_VNODE1(fp->f_vnode); 2754 VOP_UNLOCK(fp->f_vnode, 0); 2755#endif 2756 error = setfflags(td, fp->f_vnode, uap->flags); 2757 fdrop(fp, td); 2758 return (error); 2759} 2760 2761/* 2762 * Common implementation code for chmod(), lchmod() and fchmod(). 2763 */ 2764int 2765setfmode(td, cred, vp, mode) 2766 struct thread *td; 2767 struct ucred *cred; 2768 struct vnode *vp; 2769 int mode; 2770{ 2771 struct mount *mp; 2772 struct vattr vattr; 2773 int error; 2774 2775 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2776 return (error); 2777 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2778 VATTR_NULL(&vattr); 2779 vattr.va_mode = mode & ALLPERMS; 2780#ifdef MAC 2781 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2782 if (error == 0) 2783#endif 2784 error = VOP_SETATTR(vp, &vattr, cred); 2785 VOP_UNLOCK(vp, 0); 2786 vn_finished_write(mp); 2787 return (error); 2788} 2789 2790/* 2791 * Change mode of a file given path name. 2792 */ 2793#ifndef _SYS_SYSPROTO_H_ 2794struct chmod_args { 2795 char *path; 2796 int mode; 2797}; 2798#endif 2799int 2800sys_chmod(td, uap) 2801 struct thread *td; 2802 register struct chmod_args /* { 2803 char *path; 2804 int mode; 2805 } */ *uap; 2806{ 2807 2808 return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode)); 2809} 2810 2811#ifndef _SYS_SYSPROTO_H_ 2812struct fchmodat_args { 2813 int dirfd; 2814 char *path; 2815 mode_t mode; 2816 int flag; 2817} 2818#endif 2819int 2820sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2821{ 2822 int flag = uap->flag; 2823 int fd = uap->fd; 2824 char *path = uap->path; 2825 mode_t mode = uap->mode; 2826 2827 if (flag & ~AT_SYMLINK_NOFOLLOW) 2828 return (EINVAL); 2829 2830 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2831} 2832 2833int 2834kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode) 2835{ 2836 2837 return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0)); 2838} 2839 2840/* 2841 * Change mode of a file given path name (don't follow links.) 2842 */ 2843#ifndef _SYS_SYSPROTO_H_ 2844struct lchmod_args { 2845 char *path; 2846 int mode; 2847}; 2848#endif 2849int 2850sys_lchmod(td, uap) 2851 struct thread *td; 2852 register struct lchmod_args /* { 2853 char *path; 2854 int mode; 2855 } */ *uap; 2856{ 2857 2858 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2859 uap->mode, AT_SYMLINK_NOFOLLOW)); 2860} 2861 2862int 2863kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2864 mode_t mode, int flag) 2865{ 2866 struct nameidata nd; 2867 cap_rights_t rights; 2868 int error, follow; 2869 2870 AUDIT_ARG_MODE(mode); 2871 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2872 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2873 cap_rights_init(&rights, CAP_FCHMOD), td); 2874 if ((error = namei(&nd)) != 0) 2875 return (error); 2876 NDFREE(&nd, NDF_ONLY_PNBUF); 2877 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2878 vrele(nd.ni_vp); 2879 return (error); 2880} 2881 2882/* 2883 * Change mode of a file given a file descriptor. 2884 */ 2885#ifndef _SYS_SYSPROTO_H_ 2886struct fchmod_args { 2887 int fd; 2888 int mode; 2889}; 2890#endif 2891int 2892sys_fchmod(struct thread *td, struct fchmod_args *uap) 2893{ 2894 struct file *fp; 2895 cap_rights_t rights; 2896 int error; 2897 2898 AUDIT_ARG_FD(uap->fd); 2899 AUDIT_ARG_MODE(uap->mode); 2900 2901 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2902 if (error != 0) 2903 return (error); 2904 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2905 fdrop(fp, td); 2906 return (error); 2907} 2908 2909/* 2910 * Common implementation for chown(), lchown(), and fchown() 2911 */ 2912int 2913setfown(td, cred, vp, uid, gid) 2914 struct thread *td; 2915 struct ucred *cred; 2916 struct vnode *vp; 2917 uid_t uid; 2918 gid_t gid; 2919{ 2920 struct mount *mp; 2921 struct vattr vattr; 2922 int error; 2923 2924 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2925 return (error); 2926 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2927 VATTR_NULL(&vattr); 2928 vattr.va_uid = uid; 2929 vattr.va_gid = gid; 2930#ifdef MAC 2931 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2932 vattr.va_gid); 2933 if (error == 0) 2934#endif 2935 error = VOP_SETATTR(vp, &vattr, cred); 2936 VOP_UNLOCK(vp, 0); 2937 vn_finished_write(mp); 2938 return (error); 2939} 2940 2941/* 2942 * Set ownership given a path name. 2943 */ 2944#ifndef _SYS_SYSPROTO_H_ 2945struct chown_args { 2946 char *path; 2947 int uid; 2948 int gid; 2949}; 2950#endif 2951int 2952sys_chown(td, uap) 2953 struct thread *td; 2954 register struct chown_args /* { 2955 char *path; 2956 int uid; 2957 int gid; 2958 } */ *uap; 2959{ 2960 2961 return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 2962} 2963 2964#ifndef _SYS_SYSPROTO_H_ 2965struct fchownat_args { 2966 int fd; 2967 const char * path; 2968 uid_t uid; 2969 gid_t gid; 2970 int flag; 2971}; 2972#endif 2973int 2974sys_fchownat(struct thread *td, struct fchownat_args *uap) 2975{ 2976 int flag; 2977 2978 flag = uap->flag; 2979 if (flag & ~AT_SYMLINK_NOFOLLOW) 2980 return (EINVAL); 2981 2982 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2983 uap->gid, uap->flag)); 2984} 2985 2986int 2987kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 2988 int gid) 2989{ 2990 2991 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0)); 2992} 2993 2994int 2995kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2996 int uid, int gid, int flag) 2997{ 2998 struct nameidata nd; 2999 cap_rights_t rights; 3000 int error, follow; 3001 3002 AUDIT_ARG_OWNER(uid, gid); 3003 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3004 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 3005 cap_rights_init(&rights, CAP_FCHOWN), td); 3006 3007 if ((error = namei(&nd)) != 0) 3008 return (error); 3009 NDFREE(&nd, NDF_ONLY_PNBUF); 3010 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 3011 vrele(nd.ni_vp); 3012 return (error); 3013} 3014 3015/* 3016 * Set ownership given a path name, do not cross symlinks. 3017 */ 3018#ifndef _SYS_SYSPROTO_H_ 3019struct lchown_args { 3020 char *path; 3021 int uid; 3022 int gid; 3023}; 3024#endif 3025int 3026sys_lchown(td, uap) 3027 struct thread *td; 3028 register struct lchown_args /* { 3029 char *path; 3030 int uid; 3031 int gid; 3032 } */ *uap; 3033{ 3034 3035 return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 3036} 3037 3038int 3039kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 3040 int gid) 3041{ 3042 3043 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 3044 AT_SYMLINK_NOFOLLOW)); 3045} 3046 3047/* 3048 * Set ownership given a file descriptor. 3049 */ 3050#ifndef _SYS_SYSPROTO_H_ 3051struct fchown_args { 3052 int fd; 3053 int uid; 3054 int gid; 3055}; 3056#endif 3057int 3058sys_fchown(td, uap) 3059 struct thread *td; 3060 register struct fchown_args /* { 3061 int fd; 3062 int uid; 3063 int gid; 3064 } */ *uap; 3065{ 3066 struct file *fp; 3067 cap_rights_t rights; 3068 int error; 3069 3070 AUDIT_ARG_FD(uap->fd); 3071 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3072 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 3073 if (error != 0) 3074 return (error); 3075 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3076 fdrop(fp, td); 3077 return (error); 3078} 3079 3080/* 3081 * Common implementation code for utimes(), lutimes(), and futimes(). 3082 */ 3083static int 3084getutimes(usrtvp, tvpseg, tsp) 3085 const struct timeval *usrtvp; 3086 enum uio_seg tvpseg; 3087 struct timespec *tsp; 3088{ 3089 struct timeval tv[2]; 3090 const struct timeval *tvp; 3091 int error; 3092 3093 if (usrtvp == NULL) { 3094 vfs_timestamp(&tsp[0]); 3095 tsp[1] = tsp[0]; 3096 } else { 3097 if (tvpseg == UIO_SYSSPACE) { 3098 tvp = usrtvp; 3099 } else { 3100 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3101 return (error); 3102 tvp = tv; 3103 } 3104 3105 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3106 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3107 return (EINVAL); 3108 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3109 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3110 } 3111 return (0); 3112} 3113 3114/* 3115 * Common implementation code for futimens(), utimensat(). 3116 */ 3117#define UTIMENS_NULL 0x1 3118#define UTIMENS_EXIT 0x2 3119static int 3120getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 3121 struct timespec *tsp, int *retflags) 3122{ 3123 struct timespec tsnow; 3124 int error; 3125 3126 vfs_timestamp(&tsnow); 3127 *retflags = 0; 3128 if (usrtsp == NULL) { 3129 tsp[0] = tsnow; 3130 tsp[1] = tsnow; 3131 *retflags |= UTIMENS_NULL; 3132 return (0); 3133 } 3134 if (tspseg == UIO_SYSSPACE) { 3135 tsp[0] = usrtsp[0]; 3136 tsp[1] = usrtsp[1]; 3137 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 3138 return (error); 3139 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 3140 *retflags |= UTIMENS_EXIT; 3141 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 3142 *retflags |= UTIMENS_NULL; 3143 if (tsp[0].tv_nsec == UTIME_OMIT) 3144 tsp[0].tv_sec = VNOVAL; 3145 else if (tsp[0].tv_nsec == UTIME_NOW) 3146 tsp[0] = tsnow; 3147 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 3148 return (EINVAL); 3149 if (tsp[1].tv_nsec == UTIME_OMIT) 3150 tsp[1].tv_sec = VNOVAL; 3151 else if (tsp[1].tv_nsec == UTIME_NOW) 3152 tsp[1] = tsnow; 3153 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 3154 return (EINVAL); 3155 3156 return (0); 3157} 3158 3159/* 3160 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3161 * and utimensat(). 3162 */ 3163static int 3164setutimes(td, vp, ts, numtimes, nullflag) 3165 struct thread *td; 3166 struct vnode *vp; 3167 const struct timespec *ts; 3168 int numtimes; 3169 int nullflag; 3170{ 3171 struct mount *mp; 3172 struct vattr vattr; 3173 int error, setbirthtime; 3174 3175 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3176 return (error); 3177 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3178 setbirthtime = 0; 3179 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3180 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3181 setbirthtime = 1; 3182 VATTR_NULL(&vattr); 3183 vattr.va_atime = ts[0]; 3184 vattr.va_mtime = ts[1]; 3185 if (setbirthtime) 3186 vattr.va_birthtime = ts[1]; 3187 if (numtimes > 2) 3188 vattr.va_birthtime = ts[2]; 3189 if (nullflag) 3190 vattr.va_vaflags |= VA_UTIMES_NULL; 3191#ifdef MAC 3192 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3193 vattr.va_mtime); 3194#endif 3195 if (error == 0) 3196 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3197 VOP_UNLOCK(vp, 0); 3198 vn_finished_write(mp); 3199 return (error); 3200} 3201 3202/* 3203 * Set the access and modification times of a file. 3204 */ 3205#ifndef _SYS_SYSPROTO_H_ 3206struct utimes_args { 3207 char *path; 3208 struct timeval *tptr; 3209}; 3210#endif 3211int 3212sys_utimes(td, uap) 3213 struct thread *td; 3214 register struct utimes_args /* { 3215 char *path; 3216 struct timeval *tptr; 3217 } */ *uap; 3218{ 3219 3220 return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3221 UIO_USERSPACE)); 3222} 3223 3224#ifndef _SYS_SYSPROTO_H_ 3225struct futimesat_args { 3226 int fd; 3227 const char * path; 3228 const struct timeval * times; 3229}; 3230#endif 3231int 3232sys_futimesat(struct thread *td, struct futimesat_args *uap) 3233{ 3234 3235 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3236 uap->times, UIO_USERSPACE)); 3237} 3238 3239int 3240kern_utimes(struct thread *td, char *path, enum uio_seg pathseg, 3241 struct timeval *tptr, enum uio_seg tptrseg) 3242{ 3243 3244 return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg)); 3245} 3246 3247int 3248kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3249 struct timeval *tptr, enum uio_seg tptrseg) 3250{ 3251 struct nameidata nd; 3252 struct timespec ts[2]; 3253 cap_rights_t rights; 3254 int error; 3255 3256 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3257 return (error); 3258 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3259 cap_rights_init(&rights, CAP_FUTIMES), td); 3260 3261 if ((error = namei(&nd)) != 0) 3262 return (error); 3263 NDFREE(&nd, NDF_ONLY_PNBUF); 3264 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3265 vrele(nd.ni_vp); 3266 return (error); 3267} 3268 3269/* 3270 * Set the access and modification times of a file. 3271 */ 3272#ifndef _SYS_SYSPROTO_H_ 3273struct lutimes_args { 3274 char *path; 3275 struct timeval *tptr; 3276}; 3277#endif 3278int 3279sys_lutimes(td, uap) 3280 struct thread *td; 3281 register struct lutimes_args /* { 3282 char *path; 3283 struct timeval *tptr; 3284 } */ *uap; 3285{ 3286 3287 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3288 UIO_USERSPACE)); 3289} 3290 3291int 3292kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3293 struct timeval *tptr, enum uio_seg tptrseg) 3294{ 3295 struct timespec ts[2]; 3296 struct nameidata nd; 3297 int error; 3298 3299 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3300 return (error); 3301 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3302 if ((error = namei(&nd)) != 0) 3303 return (error); 3304 NDFREE(&nd, NDF_ONLY_PNBUF); 3305 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3306 vrele(nd.ni_vp); 3307 return (error); 3308} 3309 3310/* 3311 * Set the access and modification times of a file. 3312 */ 3313#ifndef _SYS_SYSPROTO_H_ 3314struct futimes_args { 3315 int fd; 3316 struct timeval *tptr; 3317}; 3318#endif 3319int 3320sys_futimes(td, uap) 3321 struct thread *td; 3322 register struct futimes_args /* { 3323 int fd; 3324 struct timeval *tptr; 3325 } */ *uap; 3326{ 3327 3328 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3329} 3330 3331int 3332kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3333 enum uio_seg tptrseg) 3334{ 3335 struct timespec ts[2]; 3336 struct file *fp; 3337 cap_rights_t rights; 3338 int error; 3339 3340 AUDIT_ARG_FD(fd); 3341 error = getutimes(tptr, tptrseg, ts); 3342 if (error != 0) 3343 return (error); 3344 error = getvnode(td->td_proc->p_fd, fd, 3345 cap_rights_init(&rights, CAP_FUTIMES), &fp); 3346 if (error != 0) 3347 return (error); 3348#ifdef AUDIT 3349 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3350 AUDIT_ARG_VNODE1(fp->f_vnode); 3351 VOP_UNLOCK(fp->f_vnode, 0); 3352#endif 3353 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3354 fdrop(fp, td); 3355 return (error); 3356} 3357 3358int 3359sys_futimens(struct thread *td, struct futimens_args *uap) 3360{ 3361 3362 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3363} 3364 3365int 3366kern_futimens(struct thread *td, int fd, struct timespec *tptr, 3367 enum uio_seg tptrseg) 3368{ 3369 struct timespec ts[2]; 3370 struct file *fp; 3371 cap_rights_t rights; 3372 int error, flags; 3373 3374 AUDIT_ARG_FD(fd); 3375 error = getutimens(tptr, tptrseg, ts, &flags); 3376 if (error != 0) 3377 return (error); 3378 if (flags & UTIMENS_EXIT) 3379 return (0); 3380 error = getvnode(td->td_proc->p_fd, fd, 3381 cap_rights_init(&rights, CAP_FUTIMES), &fp); 3382 if (error != 0) 3383 return (error); 3384#ifdef AUDIT 3385 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3386 AUDIT_ARG_VNODE1(fp->f_vnode); 3387 VOP_UNLOCK(fp->f_vnode, 0); 3388#endif 3389 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3390 fdrop(fp, td); 3391 return (error); 3392} 3393 3394int 3395sys_utimensat(struct thread *td, struct utimensat_args *uap) 3396{ 3397 3398 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3399 uap->times, UIO_USERSPACE, uap->flag)); 3400} 3401 3402int 3403kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3404 struct timespec *tptr, enum uio_seg tptrseg, int flag) 3405{ 3406 struct nameidata nd; 3407 struct timespec ts[2]; 3408 int error, flags; 3409 3410 if (flag & ~AT_SYMLINK_NOFOLLOW) 3411 return (EINVAL); 3412 3413 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3414 return (error); 3415 NDINIT_AT(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 3416 FOLLOW) | AUDITVNODE1, pathseg, path, fd, td); 3417 if ((error = namei(&nd)) != 0) 3418 return (error); 3419 /* 3420 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3421 * POSIX states: 3422 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3423 * "Search permission is denied by a component of the path prefix." 3424 */ 3425 NDFREE(&nd, NDF_ONLY_PNBUF); 3426 if ((flags & UTIMENS_EXIT) == 0) 3427 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3428 vrele(nd.ni_vp); 3429 return (error); 3430} 3431 3432/* 3433 * Truncate a file given its path name. 3434 */ 3435#ifndef _SYS_SYSPROTO_H_ 3436struct truncate_args { 3437 char *path; 3438 int pad; 3439 off_t length; 3440}; 3441#endif 3442int 3443sys_truncate(td, uap) 3444 struct thread *td; 3445 register struct truncate_args /* { 3446 char *path; 3447 int pad; 3448 off_t length; 3449 } */ *uap; 3450{ 3451 3452 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3453} 3454 3455int 3456kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3457{ 3458 struct mount *mp; 3459 struct vnode *vp; 3460 void *rl_cookie; 3461 struct vattr vattr; 3462 struct nameidata nd; 3463 int error; 3464 3465 if (length < 0) 3466 return(EINVAL); 3467 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3468 if ((error = namei(&nd)) != 0) 3469 return (error); 3470 vp = nd.ni_vp; 3471 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3472 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3473 vn_rangelock_unlock(vp, rl_cookie); 3474 vrele(vp); 3475 return (error); 3476 } 3477 NDFREE(&nd, NDF_ONLY_PNBUF); 3478 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3479 if (vp->v_type == VDIR) 3480 error = EISDIR; 3481#ifdef MAC 3482 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3483 } 3484#endif 3485 else if ((error = vn_writechk(vp)) == 0 && 3486 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3487 VATTR_NULL(&vattr); 3488 vattr.va_size = length; 3489 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3490 } 3491 VOP_UNLOCK(vp, 0); 3492 vn_finished_write(mp); 3493 vn_rangelock_unlock(vp, rl_cookie); 3494 vrele(vp); 3495 return (error); 3496} 3497 3498#if defined(COMPAT_43) 3499/* 3500 * Truncate a file given its path name. 3501 */ 3502#ifndef _SYS_SYSPROTO_H_ 3503struct otruncate_args { 3504 char *path; 3505 long length; 3506}; 3507#endif 3508int 3509otruncate(td, uap) 3510 struct thread *td; 3511 register struct otruncate_args /* { 3512 char *path; 3513 long length; 3514 } */ *uap; 3515{ 3516 struct truncate_args /* { 3517 char *path; 3518 int pad; 3519 off_t length; 3520 } */ nuap; 3521 3522 nuap.path = uap->path; 3523 nuap.length = uap->length; 3524 return (sys_truncate(td, &nuap)); 3525} 3526#endif /* COMPAT_43 */ 3527 3528/* Versions with the pad argument */ 3529int 3530freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3531{ 3532 struct truncate_args ouap; 3533 3534 ouap.path = uap->path; 3535 ouap.length = uap->length; 3536 return (sys_truncate(td, &ouap)); 3537} 3538 3539int 3540freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3541{ 3542 struct ftruncate_args ouap; 3543 3544 ouap.fd = uap->fd; 3545 ouap.length = uap->length; 3546 return (sys_ftruncate(td, &ouap)); 3547} 3548 3549/* 3550 * Sync an open file. 3551 */ 3552#ifndef _SYS_SYSPROTO_H_ 3553struct fsync_args { 3554 int fd; 3555}; 3556#endif 3557int 3558sys_fsync(td, uap) 3559 struct thread *td; 3560 struct fsync_args /* { 3561 int fd; 3562 } */ *uap; 3563{ 3564 struct vnode *vp; 3565 struct mount *mp; 3566 struct file *fp; 3567 cap_rights_t rights; 3568 int error, lock_flags; 3569 3570 AUDIT_ARG_FD(uap->fd); 3571 error = getvnode(td->td_proc->p_fd, uap->fd, 3572 cap_rights_init(&rights, CAP_FSYNC), &fp); 3573 if (error != 0) 3574 return (error); 3575 vp = fp->f_vnode; 3576 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3577 if (error != 0) 3578 goto drop; 3579 if (MNT_SHARED_WRITES(mp) || 3580 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3581 lock_flags = LK_SHARED; 3582 } else { 3583 lock_flags = LK_EXCLUSIVE; 3584 } 3585 vn_lock(vp, lock_flags | LK_RETRY); 3586 AUDIT_ARG_VNODE1(vp); 3587 if (vp->v_object != NULL) { 3588 VM_OBJECT_WLOCK(vp->v_object); 3589 vm_object_page_clean(vp->v_object, 0, 0, 0); 3590 VM_OBJECT_WUNLOCK(vp->v_object); 3591 } 3592 error = VOP_FSYNC(vp, MNT_WAIT, td); 3593 3594 VOP_UNLOCK(vp, 0); 3595 vn_finished_write(mp); 3596drop: 3597 fdrop(fp, td); 3598 return (error); 3599} 3600 3601/* 3602 * Rename files. Source and destination must either both be directories, or 3603 * both not be directories. If target is a directory, it must be empty. 3604 */ 3605#ifndef _SYS_SYSPROTO_H_ 3606struct rename_args { 3607 char *from; 3608 char *to; 3609}; 3610#endif 3611int 3612sys_rename(td, uap) 3613 struct thread *td; 3614 register struct rename_args /* { 3615 char *from; 3616 char *to; 3617 } */ *uap; 3618{ 3619 3620 return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE)); 3621} 3622 3623#ifndef _SYS_SYSPROTO_H_ 3624struct renameat_args { 3625 int oldfd; 3626 char *old; 3627 int newfd; 3628 char *new; 3629}; 3630#endif 3631int 3632sys_renameat(struct thread *td, struct renameat_args *uap) 3633{ 3634 3635 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3636 UIO_USERSPACE)); 3637} 3638 3639int 3640kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg) 3641{ 3642 3643 return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg)); 3644} 3645 3646int 3647kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3648 enum uio_seg pathseg) 3649{ 3650 struct mount *mp = NULL; 3651 struct vnode *tvp, *fvp, *tdvp; 3652 struct nameidata fromnd, tond; 3653 cap_rights_t rights; 3654 int error; 3655 3656again: 3657 bwillwrite(); 3658#ifdef MAC 3659 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3660 AUDITVNODE1, pathseg, old, oldfd, 3661 cap_rights_init(&rights, CAP_RENAMEAT), td); 3662#else 3663 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3664 pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT), td); 3665#endif 3666 3667 if ((error = namei(&fromnd)) != 0) 3668 return (error); 3669#ifdef MAC 3670 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3671 fromnd.ni_vp, &fromnd.ni_cnd); 3672 VOP_UNLOCK(fromnd.ni_dvp, 0); 3673 if (fromnd.ni_dvp != fromnd.ni_vp) 3674 VOP_UNLOCK(fromnd.ni_vp, 0); 3675#endif 3676 fvp = fromnd.ni_vp; 3677 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3678 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3679 cap_rights_init(&rights, CAP_LINKAT), td); 3680 if (fromnd.ni_vp->v_type == VDIR) 3681 tond.ni_cnd.cn_flags |= WILLBEDIR; 3682 if ((error = namei(&tond)) != 0) { 3683 /* Translate error code for rename("dir1", "dir2/."). */ 3684 if (error == EISDIR && fvp->v_type == VDIR) 3685 error = EINVAL; 3686 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3687 vrele(fromnd.ni_dvp); 3688 vrele(fvp); 3689 goto out1; 3690 } 3691 tdvp = tond.ni_dvp; 3692 tvp = tond.ni_vp; 3693 error = vn_start_write(fvp, &mp, V_NOWAIT); 3694 if (error != 0) { 3695 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3696 NDFREE(&tond, NDF_ONLY_PNBUF); 3697 if (tvp != NULL) 3698 vput(tvp); 3699 if (tdvp == tvp) 3700 vrele(tdvp); 3701 else 3702 vput(tdvp); 3703 vrele(fromnd.ni_dvp); 3704 vrele(fvp); 3705 vrele(tond.ni_startdir); 3706 if (fromnd.ni_startdir != NULL) 3707 vrele(fromnd.ni_startdir); 3708 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3709 if (error != 0) 3710 return (error); 3711 goto again; 3712 } 3713 if (tvp != NULL) { 3714 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3715 error = ENOTDIR; 3716 goto out; 3717 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3718 error = EISDIR; 3719 goto out; 3720 } 3721#ifdef CAPABILITIES 3722 if (newfd != AT_FDCWD) { 3723 /* 3724 * If the target already exists we require CAP_UNLINKAT 3725 * from 'newfd'. 3726 */ 3727 error = cap_check(&tond.ni_filecaps.fc_rights, 3728 cap_rights_init(&rights, CAP_UNLINKAT)); 3729 if (error != 0) 3730 goto out; 3731 } 3732#endif 3733 } 3734 if (fvp == tdvp) { 3735 error = EINVAL; 3736 goto out; 3737 } 3738 /* 3739 * If the source is the same as the destination (that is, if they 3740 * are links to the same vnode), then there is nothing to do. 3741 */ 3742 if (fvp == tvp) 3743 error = -1; 3744#ifdef MAC 3745 else 3746 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3747 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3748#endif 3749out: 3750 if (error == 0) { 3751 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3752 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3753 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3754 NDFREE(&tond, NDF_ONLY_PNBUF); 3755 } else { 3756 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3757 NDFREE(&tond, NDF_ONLY_PNBUF); 3758 if (tvp != NULL) 3759 vput(tvp); 3760 if (tdvp == tvp) 3761 vrele(tdvp); 3762 else 3763 vput(tdvp); 3764 vrele(fromnd.ni_dvp); 3765 vrele(fvp); 3766 } 3767 vrele(tond.ni_startdir); 3768 vn_finished_write(mp); 3769out1: 3770 if (fromnd.ni_startdir) 3771 vrele(fromnd.ni_startdir); 3772 if (error == -1) 3773 return (0); 3774 return (error); 3775} 3776 3777/* 3778 * Make a directory file. 3779 */ 3780#ifndef _SYS_SYSPROTO_H_ 3781struct mkdir_args { 3782 char *path; 3783 int mode; 3784}; 3785#endif 3786int 3787sys_mkdir(td, uap) 3788 struct thread *td; 3789 register struct mkdir_args /* { 3790 char *path; 3791 int mode; 3792 } */ *uap; 3793{ 3794 3795 return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode)); 3796} 3797 3798#ifndef _SYS_SYSPROTO_H_ 3799struct mkdirat_args { 3800 int fd; 3801 char *path; 3802 mode_t mode; 3803}; 3804#endif 3805int 3806sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3807{ 3808 3809 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3810} 3811 3812int 3813kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode) 3814{ 3815 3816 return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode)); 3817} 3818 3819int 3820kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3821 int mode) 3822{ 3823 struct mount *mp; 3824 struct vnode *vp; 3825 struct vattr vattr; 3826 struct nameidata nd; 3827 cap_rights_t rights; 3828 int error; 3829 3830 AUDIT_ARG_MODE(mode); 3831restart: 3832 bwillwrite(); 3833 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3834 NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), 3835 td); 3836 nd.ni_cnd.cn_flags |= WILLBEDIR; 3837 if ((error = namei(&nd)) != 0) 3838 return (error); 3839 vp = nd.ni_vp; 3840 if (vp != NULL) { 3841 NDFREE(&nd, NDF_ONLY_PNBUF); 3842 /* 3843 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3844 * the strange behaviour of leaving the vnode unlocked 3845 * if the target is the same vnode as the parent. 3846 */ 3847 if (vp == nd.ni_dvp) 3848 vrele(nd.ni_dvp); 3849 else 3850 vput(nd.ni_dvp); 3851 vrele(vp); 3852 return (EEXIST); 3853 } 3854 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3855 NDFREE(&nd, NDF_ONLY_PNBUF); 3856 vput(nd.ni_dvp); 3857 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3858 return (error); 3859 goto restart; 3860 } 3861 VATTR_NULL(&vattr); 3862 vattr.va_type = VDIR; 3863 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3864#ifdef MAC 3865 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3866 &vattr); 3867 if (error != 0) 3868 goto out; 3869#endif 3870 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3871#ifdef MAC 3872out: 3873#endif 3874 NDFREE(&nd, NDF_ONLY_PNBUF); 3875 vput(nd.ni_dvp); 3876 if (error == 0) 3877 vput(nd.ni_vp); 3878 vn_finished_write(mp); 3879 return (error); 3880} 3881 3882/* 3883 * Remove a directory file. 3884 */ 3885#ifndef _SYS_SYSPROTO_H_ 3886struct rmdir_args { 3887 char *path; 3888}; 3889#endif 3890int 3891sys_rmdir(td, uap) 3892 struct thread *td; 3893 struct rmdir_args /* { 3894 char *path; 3895 } */ *uap; 3896{ 3897 3898 return (kern_rmdir(td, uap->path, UIO_USERSPACE)); 3899} 3900 3901int 3902kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg) 3903{ 3904 3905 return (kern_rmdirat(td, AT_FDCWD, path, pathseg)); 3906} 3907 3908int 3909kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3910{ 3911 struct mount *mp; 3912 struct vnode *vp; 3913 struct nameidata nd; 3914 cap_rights_t rights; 3915 int error; 3916 3917restart: 3918 bwillwrite(); 3919 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3920 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3921 if ((error = namei(&nd)) != 0) 3922 return (error); 3923 vp = nd.ni_vp; 3924 if (vp->v_type != VDIR) { 3925 error = ENOTDIR; 3926 goto out; 3927 } 3928 /* 3929 * No rmdir "." please. 3930 */ 3931 if (nd.ni_dvp == vp) { 3932 error = EINVAL; 3933 goto out; 3934 } 3935 /* 3936 * The root of a mounted filesystem cannot be deleted. 3937 */ 3938 if (vp->v_vflag & VV_ROOT) { 3939 error = EBUSY; 3940 goto out; 3941 } 3942#ifdef MAC 3943 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3944 &nd.ni_cnd); 3945 if (error != 0) 3946 goto out; 3947#endif 3948 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3949 NDFREE(&nd, NDF_ONLY_PNBUF); 3950 vput(vp); 3951 if (nd.ni_dvp == vp) 3952 vrele(nd.ni_dvp); 3953 else 3954 vput(nd.ni_dvp); 3955 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3956 return (error); 3957 goto restart; 3958 } 3959 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3960 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3961 vn_finished_write(mp); 3962out: 3963 NDFREE(&nd, NDF_ONLY_PNBUF); 3964 vput(vp); 3965 if (nd.ni_dvp == vp) 3966 vrele(nd.ni_dvp); 3967 else 3968 vput(nd.ni_dvp); 3969 return (error); 3970} 3971 3972#ifdef COMPAT_43 3973/* 3974 * Read a block of directory entries in a filesystem independent format. 3975 */ 3976#ifndef _SYS_SYSPROTO_H_ 3977struct ogetdirentries_args { 3978 int fd; 3979 char *buf; 3980 u_int count; 3981 long *basep; 3982}; 3983#endif 3984int 3985ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3986{ 3987 long loff; 3988 int error; 3989 3990 error = kern_ogetdirentries(td, uap, &loff); 3991 if (error == 0) 3992 error = copyout(&loff, uap->basep, sizeof(long)); 3993 return (error); 3994} 3995 3996int 3997kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3998 long *ploff) 3999{ 4000 struct vnode *vp; 4001 struct file *fp; 4002 struct uio auio, kuio; 4003 struct iovec aiov, kiov; 4004 struct dirent *dp, *edp; 4005 cap_rights_t rights; 4006 caddr_t dirbuf; 4007 int error, eofflag, readcnt; 4008 long loff; 4009 off_t foffset; 4010 4011 /* XXX arbitrary sanity limit on `count'. */ 4012 if (uap->count > 64 * 1024) 4013 return (EINVAL); 4014 error = getvnode(td->td_proc->p_fd, uap->fd, 4015 cap_rights_init(&rights, CAP_READ), &fp); 4016 if (error != 0) 4017 return (error); 4018 if ((fp->f_flag & FREAD) == 0) { 4019 fdrop(fp, td); 4020 return (EBADF); 4021 } 4022 vp = fp->f_vnode; 4023 foffset = foffset_lock(fp, 0); 4024unionread: 4025 if (vp->v_type != VDIR) { 4026 foffset_unlock(fp, foffset, 0); 4027 fdrop(fp, td); 4028 return (EINVAL); 4029 } 4030 aiov.iov_base = uap->buf; 4031 aiov.iov_len = uap->count; 4032 auio.uio_iov = &aiov; 4033 auio.uio_iovcnt = 1; 4034 auio.uio_rw = UIO_READ; 4035 auio.uio_segflg = UIO_USERSPACE; 4036 auio.uio_td = td; 4037 auio.uio_resid = uap->count; 4038 vn_lock(vp, LK_SHARED | LK_RETRY); 4039 loff = auio.uio_offset = foffset; 4040#ifdef MAC 4041 error = mac_vnode_check_readdir(td->td_ucred, vp); 4042 if (error != 0) { 4043 VOP_UNLOCK(vp, 0); 4044 foffset_unlock(fp, foffset, FOF_NOUPDATE); 4045 fdrop(fp, td); 4046 return (error); 4047 } 4048#endif 4049# if (BYTE_ORDER != LITTLE_ENDIAN) 4050 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 4051 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 4052 NULL, NULL); 4053 foffset = auio.uio_offset; 4054 } else 4055# endif 4056 { 4057 kuio = auio; 4058 kuio.uio_iov = &kiov; 4059 kuio.uio_segflg = UIO_SYSSPACE; 4060 kiov.iov_len = uap->count; 4061 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 4062 kiov.iov_base = dirbuf; 4063 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 4064 NULL, NULL); 4065 foffset = kuio.uio_offset; 4066 if (error == 0) { 4067 readcnt = uap->count - kuio.uio_resid; 4068 edp = (struct dirent *)&dirbuf[readcnt]; 4069 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 4070# if (BYTE_ORDER == LITTLE_ENDIAN) 4071 /* 4072 * The expected low byte of 4073 * dp->d_namlen is our dp->d_type. 4074 * The high MBZ byte of dp->d_namlen 4075 * is our dp->d_namlen. 4076 */ 4077 dp->d_type = dp->d_namlen; 4078 dp->d_namlen = 0; 4079# else 4080 /* 4081 * The dp->d_type is the high byte 4082 * of the expected dp->d_namlen, 4083 * so must be zero'ed. 4084 */ 4085 dp->d_type = 0; 4086# endif 4087 if (dp->d_reclen > 0) { 4088 dp = (struct dirent *) 4089 ((char *)dp + dp->d_reclen); 4090 } else { 4091 error = EIO; 4092 break; 4093 } 4094 } 4095 if (dp >= edp) 4096 error = uiomove(dirbuf, readcnt, &auio); 4097 } 4098 free(dirbuf, M_TEMP); 4099 } 4100 if (error != 0) { 4101 VOP_UNLOCK(vp, 0); 4102 foffset_unlock(fp, foffset, 0); 4103 fdrop(fp, td); 4104 return (error); 4105 } 4106 if (uap->count == auio.uio_resid && 4107 (vp->v_vflag & VV_ROOT) && 4108 (vp->v_mount->mnt_flag & MNT_UNION)) { 4109 struct vnode *tvp = vp; 4110 vp = vp->v_mount->mnt_vnodecovered; 4111 VREF(vp); 4112 fp->f_vnode = vp; 4113 fp->f_data = vp; 4114 foffset = 0; 4115 vput(tvp); 4116 goto unionread; 4117 } 4118 VOP_UNLOCK(vp, 0); 4119 foffset_unlock(fp, foffset, 0); 4120 fdrop(fp, td); 4121 td->td_retval[0] = uap->count - auio.uio_resid; 4122 if (error == 0) 4123 *ploff = loff; 4124 return (error); 4125} 4126#endif /* COMPAT_43 */ 4127 4128/* 4129 * Read a block of directory entries in a filesystem independent format. 4130 */ 4131#ifndef _SYS_SYSPROTO_H_ 4132struct getdirentries_args { 4133 int fd; 4134 char *buf; 4135 u_int count; 4136 long *basep; 4137}; 4138#endif 4139int 4140sys_getdirentries(td, uap) 4141 struct thread *td; 4142 register struct getdirentries_args /* { 4143 int fd; 4144 char *buf; 4145 u_int count; 4146 long *basep; 4147 } */ *uap; 4148{ 4149 long base; 4150 int error; 4151 4152 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4153 NULL, UIO_USERSPACE); 4154 if (error != 0) 4155 return (error); 4156 if (uap->basep != NULL) 4157 error = copyout(&base, uap->basep, sizeof(long)); 4158 return (error); 4159} 4160 4161int 4162kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 4163 long *basep, ssize_t *residp, enum uio_seg bufseg) 4164{ 4165 struct vnode *vp; 4166 struct file *fp; 4167 struct uio auio; 4168 struct iovec aiov; 4169 cap_rights_t rights; 4170 long loff; 4171 int error, eofflag; 4172 off_t foffset; 4173 4174 AUDIT_ARG_FD(fd); 4175 if (count > IOSIZE_MAX) 4176 return (EINVAL); 4177 auio.uio_resid = count; 4178 error = getvnode(td->td_proc->p_fd, fd, 4179 cap_rights_init(&rights, CAP_READ), &fp); 4180 if (error != 0) 4181 return (error); 4182 if ((fp->f_flag & FREAD) == 0) { 4183 fdrop(fp, td); 4184 return (EBADF); 4185 } 4186 vp = fp->f_vnode; 4187 foffset = foffset_lock(fp, 0); 4188unionread: 4189 if (vp->v_type != VDIR) { 4190 error = EINVAL; 4191 goto fail; 4192 } 4193 aiov.iov_base = buf; 4194 aiov.iov_len = count; 4195 auio.uio_iov = &aiov; 4196 auio.uio_iovcnt = 1; 4197 auio.uio_rw = UIO_READ; 4198 auio.uio_segflg = bufseg; 4199 auio.uio_td = td; 4200 vn_lock(vp, LK_SHARED | LK_RETRY); 4201 AUDIT_ARG_VNODE1(vp); 4202 loff = auio.uio_offset = foffset; 4203#ifdef MAC 4204 error = mac_vnode_check_readdir(td->td_ucred, vp); 4205 if (error == 0) 4206#endif 4207 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4208 NULL); 4209 foffset = auio.uio_offset; 4210 if (error != 0) { 4211 VOP_UNLOCK(vp, 0); 4212 goto fail; 4213 } 4214 if (count == auio.uio_resid && 4215 (vp->v_vflag & VV_ROOT) && 4216 (vp->v_mount->mnt_flag & MNT_UNION)) { 4217 struct vnode *tvp = vp; 4218 4219 vp = vp->v_mount->mnt_vnodecovered; 4220 VREF(vp); 4221 fp->f_vnode = vp; 4222 fp->f_data = vp; 4223 foffset = 0; 4224 vput(tvp); 4225 goto unionread; 4226 } 4227 VOP_UNLOCK(vp, 0); 4228 *basep = loff; 4229 if (residp != NULL) 4230 *residp = auio.uio_resid; 4231 td->td_retval[0] = count - auio.uio_resid; 4232fail: 4233 foffset_unlock(fp, foffset, 0); 4234 fdrop(fp, td); 4235 return (error); 4236} 4237 4238#ifndef _SYS_SYSPROTO_H_ 4239struct getdents_args { 4240 int fd; 4241 char *buf; 4242 size_t count; 4243}; 4244#endif 4245int 4246sys_getdents(td, uap) 4247 struct thread *td; 4248 register struct getdents_args /* { 4249 int fd; 4250 char *buf; 4251 u_int count; 4252 } */ *uap; 4253{ 4254 struct getdirentries_args ap; 4255 4256 ap.fd = uap->fd; 4257 ap.buf = uap->buf; 4258 ap.count = uap->count; 4259 ap.basep = NULL; 4260 return (sys_getdirentries(td, &ap)); 4261} 4262 4263/* 4264 * Set the mode mask for creation of filesystem nodes. 4265 */ 4266#ifndef _SYS_SYSPROTO_H_ 4267struct umask_args { 4268 int newmask; 4269}; 4270#endif 4271int 4272sys_umask(td, uap) 4273 struct thread *td; 4274 struct umask_args /* { 4275 int newmask; 4276 } */ *uap; 4277{ 4278 register struct filedesc *fdp; 4279 4280 FILEDESC_XLOCK(td->td_proc->p_fd); 4281 fdp = td->td_proc->p_fd; 4282 td->td_retval[0] = fdp->fd_cmask; 4283 fdp->fd_cmask = uap->newmask & ALLPERMS; 4284 FILEDESC_XUNLOCK(td->td_proc->p_fd); 4285 return (0); 4286} 4287 4288/* 4289 * Void all references to file by ripping underlying filesystem away from 4290 * vnode. 4291 */ 4292#ifndef _SYS_SYSPROTO_H_ 4293struct revoke_args { 4294 char *path; 4295}; 4296#endif 4297int 4298sys_revoke(td, uap) 4299 struct thread *td; 4300 register struct revoke_args /* { 4301 char *path; 4302 } */ *uap; 4303{ 4304 struct vnode *vp; 4305 struct vattr vattr; 4306 struct nameidata nd; 4307 int error; 4308 4309 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4310 uap->path, td); 4311 if ((error = namei(&nd)) != 0) 4312 return (error); 4313 vp = nd.ni_vp; 4314 NDFREE(&nd, NDF_ONLY_PNBUF); 4315 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4316 error = EINVAL; 4317 goto out; 4318 } 4319#ifdef MAC 4320 error = mac_vnode_check_revoke(td->td_ucred, vp); 4321 if (error != 0) 4322 goto out; 4323#endif 4324 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4325 if (error != 0) 4326 goto out; 4327 if (td->td_ucred->cr_uid != vattr.va_uid) { 4328 error = priv_check(td, PRIV_VFS_ADMIN); 4329 if (error != 0) 4330 goto out; 4331 } 4332 if (vcount(vp) > 1) 4333 VOP_REVOKE(vp, REVOKEALL); 4334out: 4335 vput(vp); 4336 return (error); 4337} 4338 4339/* 4340 * Convert a user file descriptor to a kernel file entry and check that, if it 4341 * is a capability, the correct rights are present. A reference on the file 4342 * entry is held upon returning. 4343 */ 4344int 4345getvnode(struct filedesc *fdp, int fd, cap_rights_t *rightsp, struct file **fpp) 4346{ 4347 struct file *fp; 4348 int error; 4349 4350 error = fget_unlocked(fdp, fd, rightsp, 0, &fp, NULL); 4351 if (error != 0) 4352 return (error); 4353 4354 /* 4355 * The file could be not of the vnode type, or it may be not 4356 * yet fully initialized, in which case the f_vnode pointer 4357 * may be set, but f_ops is still badfileops. E.g., 4358 * devfs_open() transiently create such situation to 4359 * facilitate csw d_fdopen(). 4360 * 4361 * Dupfdopen() handling in kern_openat() installs the 4362 * half-baked file into the process descriptor table, allowing 4363 * other thread to dereference it. Guard against the race by 4364 * checking f_ops. 4365 */ 4366 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4367 fdrop(fp, curthread); 4368 return (EINVAL); 4369 } 4370 *fpp = fp; 4371 return (0); 4372} 4373 4374 4375/* 4376 * Get an (NFS) file handle. 4377 */ 4378#ifndef _SYS_SYSPROTO_H_ 4379struct lgetfh_args { 4380 char *fname; 4381 fhandle_t *fhp; 4382}; 4383#endif 4384int 4385sys_lgetfh(td, uap) 4386 struct thread *td; 4387 register struct lgetfh_args *uap; 4388{ 4389 struct nameidata nd; 4390 fhandle_t fh; 4391 register struct vnode *vp; 4392 int error; 4393 4394 error = priv_check(td, PRIV_VFS_GETFH); 4395 if (error != 0) 4396 return (error); 4397 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4398 uap->fname, td); 4399 error = namei(&nd); 4400 if (error != 0) 4401 return (error); 4402 NDFREE(&nd, NDF_ONLY_PNBUF); 4403 vp = nd.ni_vp; 4404 bzero(&fh, sizeof(fh)); 4405 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4406 error = VOP_VPTOFH(vp, &fh.fh_fid); 4407 vput(vp); 4408 if (error == 0) 4409 error = copyout(&fh, uap->fhp, sizeof (fh)); 4410 return (error); 4411} 4412 4413#ifndef _SYS_SYSPROTO_H_ 4414struct getfh_args { 4415 char *fname; 4416 fhandle_t *fhp; 4417}; 4418#endif 4419int 4420sys_getfh(td, uap) 4421 struct thread *td; 4422 register struct getfh_args *uap; 4423{ 4424 struct nameidata nd; 4425 fhandle_t fh; 4426 register struct vnode *vp; 4427 int error; 4428 4429 error = priv_check(td, PRIV_VFS_GETFH); 4430 if (error != 0) 4431 return (error); 4432 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4433 uap->fname, td); 4434 error = namei(&nd); 4435 if (error != 0) 4436 return (error); 4437 NDFREE(&nd, NDF_ONLY_PNBUF); 4438 vp = nd.ni_vp; 4439 bzero(&fh, sizeof(fh)); 4440 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4441 error = VOP_VPTOFH(vp, &fh.fh_fid); 4442 vput(vp); 4443 if (error == 0) 4444 error = copyout(&fh, uap->fhp, sizeof (fh)); 4445 return (error); 4446} 4447 4448/* 4449 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4450 * open descriptor. 4451 * 4452 * warning: do not remove the priv_check() call or this becomes one giant 4453 * security hole. 4454 */ 4455#ifndef _SYS_SYSPROTO_H_ 4456struct fhopen_args { 4457 const struct fhandle *u_fhp; 4458 int flags; 4459}; 4460#endif 4461int 4462sys_fhopen(td, uap) 4463 struct thread *td; 4464 struct fhopen_args /* { 4465 const struct fhandle *u_fhp; 4466 int flags; 4467 } */ *uap; 4468{ 4469 struct mount *mp; 4470 struct vnode *vp; 4471 struct fhandle fhp; 4472 struct file *fp; 4473 int fmode, error; 4474 int indx; 4475 4476 error = priv_check(td, PRIV_VFS_FHOPEN); 4477 if (error != 0) 4478 return (error); 4479 indx = -1; 4480 fmode = FFLAGS(uap->flags); 4481 /* why not allow a non-read/write open for our lockd? */ 4482 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4483 return (EINVAL); 4484 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4485 if (error != 0) 4486 return(error); 4487 /* find the mount point */ 4488 mp = vfs_busyfs(&fhp.fh_fsid); 4489 if (mp == NULL) 4490 return (ESTALE); 4491 /* now give me my vnode, it gets returned to me locked */ 4492 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4493 vfs_unbusy(mp); 4494 if (error != 0) 4495 return (error); 4496 4497 error = falloc_noinstall(td, &fp); 4498 if (error != 0) { 4499 vput(vp); 4500 return (error); 4501 } 4502 /* 4503 * An extra reference on `fp' has been held for us by 4504 * falloc_noinstall(). 4505 */ 4506 4507#ifdef INVARIANTS 4508 td->td_dupfd = -1; 4509#endif 4510 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4511 if (error != 0) { 4512 KASSERT(fp->f_ops == &badfileops, 4513 ("VOP_OPEN in fhopen() set f_ops")); 4514 KASSERT(td->td_dupfd < 0, 4515 ("fhopen() encountered fdopen()")); 4516 4517 vput(vp); 4518 goto bad; 4519 } 4520#ifdef INVARIANTS 4521 td->td_dupfd = 0; 4522#endif 4523 fp->f_vnode = vp; 4524 fp->f_seqcount = 1; 4525 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4526 &vnops); 4527 VOP_UNLOCK(vp, 0); 4528 if ((fmode & O_TRUNC) != 0) { 4529 error = fo_truncate(fp, 0, td->td_ucred, td); 4530 if (error != 0) 4531 goto bad; 4532 } 4533 4534 error = finstall(td, fp, &indx, fmode, NULL); 4535bad: 4536 fdrop(fp, td); 4537 td->td_retval[0] = indx; 4538 return (error); 4539} 4540 4541/* 4542 * Stat an (NFS) file handle. 4543 */ 4544#ifndef _SYS_SYSPROTO_H_ 4545struct fhstat_args { 4546 struct fhandle *u_fhp; 4547 struct stat *sb; 4548}; 4549#endif 4550int 4551sys_fhstat(td, uap) 4552 struct thread *td; 4553 register struct fhstat_args /* { 4554 struct fhandle *u_fhp; 4555 struct stat *sb; 4556 } */ *uap; 4557{ 4558 struct stat sb; 4559 struct fhandle fh; 4560 int error; 4561 4562 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4563 if (error != 0) 4564 return (error); 4565 error = kern_fhstat(td, fh, &sb); 4566 if (error == 0) 4567 error = copyout(&sb, uap->sb, sizeof(sb)); 4568 return (error); 4569} 4570 4571int 4572kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4573{ 4574 struct mount *mp; 4575 struct vnode *vp; 4576 int error; 4577 4578 error = priv_check(td, PRIV_VFS_FHSTAT); 4579 if (error != 0) 4580 return (error); 4581 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4582 return (ESTALE); 4583 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4584 vfs_unbusy(mp); 4585 if (error != 0) 4586 return (error); 4587 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4588 vput(vp); 4589 return (error); 4590} 4591 4592/* 4593 * Implement fstatfs() for (NFS) file handles. 4594 */ 4595#ifndef _SYS_SYSPROTO_H_ 4596struct fhstatfs_args { 4597 struct fhandle *u_fhp; 4598 struct statfs *buf; 4599}; 4600#endif 4601int 4602sys_fhstatfs(td, uap) 4603 struct thread *td; 4604 struct fhstatfs_args /* { 4605 struct fhandle *u_fhp; 4606 struct statfs *buf; 4607 } */ *uap; 4608{ 4609 struct statfs sf; 4610 fhandle_t fh; 4611 int error; 4612 4613 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4614 if (error != 0) 4615 return (error); 4616 error = kern_fhstatfs(td, fh, &sf); 4617 if (error != 0) 4618 return (error); 4619 return (copyout(&sf, uap->buf, sizeof(sf))); 4620} 4621 4622int 4623kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4624{ 4625 struct statfs *sp; 4626 struct mount *mp; 4627 struct vnode *vp; 4628 int error; 4629 4630 error = priv_check(td, PRIV_VFS_FHSTATFS); 4631 if (error != 0) 4632 return (error); 4633 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4634 return (ESTALE); 4635 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4636 if (error != 0) { 4637 vfs_unbusy(mp); 4638 return (error); 4639 } 4640 vput(vp); 4641 error = prison_canseemount(td->td_ucred, mp); 4642 if (error != 0) 4643 goto out; 4644#ifdef MAC 4645 error = mac_mount_check_stat(td->td_ucred, mp); 4646 if (error != 0) 4647 goto out; 4648#endif 4649 /* 4650 * Set these in case the underlying filesystem fails to do so. 4651 */ 4652 sp = &mp->mnt_stat; 4653 sp->f_version = STATFS_VERSION; 4654 sp->f_namemax = NAME_MAX; 4655 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4656 error = VFS_STATFS(mp, sp); 4657 if (error == 0) 4658 *buf = *sp; 4659out: 4660 vfs_unbusy(mp); 4661 return (error); 4662} 4663 4664int 4665kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4666{ 4667 struct file *fp; 4668 struct mount *mp; 4669 struct vnode *vp; 4670 cap_rights_t rights; 4671 off_t olen, ooffset; 4672 int error; 4673 4674 if (offset < 0 || len <= 0) 4675 return (EINVAL); 4676 /* Check for wrap. */ 4677 if (offset > OFF_MAX - len) 4678 return (EFBIG); 4679 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4680 if (error != 0) 4681 return (error); 4682 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4683 error = ESPIPE; 4684 goto out; 4685 } 4686 if ((fp->f_flag & FWRITE) == 0) { 4687 error = EBADF; 4688 goto out; 4689 } 4690 if (fp->f_type != DTYPE_VNODE) { 4691 error = ENODEV; 4692 goto out; 4693 } 4694 vp = fp->f_vnode; 4695 if (vp->v_type != VREG) { 4696 error = ENODEV; 4697 goto out; 4698 } 4699 4700 /* Allocating blocks may take a long time, so iterate. */ 4701 for (;;) { 4702 olen = len; 4703 ooffset = offset; 4704 4705 bwillwrite(); 4706 mp = NULL; 4707 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4708 if (error != 0) 4709 break; 4710 error = vn_lock(vp, LK_EXCLUSIVE); 4711 if (error != 0) { 4712 vn_finished_write(mp); 4713 break; 4714 } 4715#ifdef MAC 4716 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4717 if (error == 0) 4718#endif 4719 error = VOP_ALLOCATE(vp, &offset, &len); 4720 VOP_UNLOCK(vp, 0); 4721 vn_finished_write(mp); 4722 4723 if (olen + ooffset != offset + len) { 4724 panic("offset + len changed from %jx/%jx to %jx/%jx", 4725 ooffset, olen, offset, len); 4726 } 4727 if (error != 0 || len == 0) 4728 break; 4729 KASSERT(olen > len, ("Iteration did not make progress?")); 4730 maybe_yield(); 4731 } 4732 out: 4733 fdrop(fp, td); 4734 return (error); 4735} 4736 4737int 4738sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4739{ 4740 4741 td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset, 4742 uap->len); 4743 return (0); 4744} 4745 4746/* 4747 * Unlike madvise(2), we do not make a best effort to remember every 4748 * possible caching hint. Instead, we remember the last setting with 4749 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4750 * region of any current setting. 4751 */ 4752int 4753kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4754 int advice) 4755{ 4756 struct fadvise_info *fa, *new; 4757 struct file *fp; 4758 struct vnode *vp; 4759 cap_rights_t rights; 4760 off_t end; 4761 int error; 4762 4763 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4764 return (EINVAL); 4765 switch (advice) { 4766 case POSIX_FADV_SEQUENTIAL: 4767 case POSIX_FADV_RANDOM: 4768 case POSIX_FADV_NOREUSE: 4769 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4770 break; 4771 case POSIX_FADV_NORMAL: 4772 case POSIX_FADV_WILLNEED: 4773 case POSIX_FADV_DONTNEED: 4774 new = NULL; 4775 break; 4776 default: 4777 return (EINVAL); 4778 } 4779 /* XXX: CAP_POSIX_FADVISE? */ 4780 error = fget(td, fd, cap_rights_init(&rights), &fp); 4781 if (error != 0) 4782 goto out; 4783 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4784 error = ESPIPE; 4785 goto out; 4786 } 4787 if (fp->f_type != DTYPE_VNODE) { 4788 error = ENODEV; 4789 goto out; 4790 } 4791 vp = fp->f_vnode; 4792 if (vp->v_type != VREG) { 4793 error = ENODEV; 4794 goto out; 4795 } 4796 if (len == 0) 4797 end = OFF_MAX; 4798 else 4799 end = offset + len - 1; 4800 switch (advice) { 4801 case POSIX_FADV_SEQUENTIAL: 4802 case POSIX_FADV_RANDOM: 4803 case POSIX_FADV_NOREUSE: 4804 /* 4805 * Try to merge any existing non-standard region with 4806 * this new region if possible, otherwise create a new 4807 * non-standard region for this request. 4808 */ 4809 mtx_pool_lock(mtxpool_sleep, fp); 4810 fa = fp->f_advice; 4811 if (fa != NULL && fa->fa_advice == advice && 4812 ((fa->fa_start <= end && fa->fa_end >= offset) || 4813 (end != OFF_MAX && fa->fa_start == end + 1) || 4814 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4815 if (offset < fa->fa_start) 4816 fa->fa_start = offset; 4817 if (end > fa->fa_end) 4818 fa->fa_end = end; 4819 } else { 4820 new->fa_advice = advice; 4821 new->fa_start = offset; 4822 new->fa_end = end; 4823 new->fa_prevstart = 0; 4824 new->fa_prevend = 0; 4825 fp->f_advice = new; 4826 new = fa; 4827 } 4828 mtx_pool_unlock(mtxpool_sleep, fp); 4829 break; 4830 case POSIX_FADV_NORMAL: 4831 /* 4832 * If a the "normal" region overlaps with an existing 4833 * non-standard region, trim or remove the 4834 * non-standard region. 4835 */ 4836 mtx_pool_lock(mtxpool_sleep, fp); 4837 fa = fp->f_advice; 4838 if (fa != NULL) { 4839 if (offset <= fa->fa_start && end >= fa->fa_end) { 4840 new = fa; 4841 fp->f_advice = NULL; 4842 } else if (offset <= fa->fa_start && 4843 end >= fa->fa_start) 4844 fa->fa_start = end + 1; 4845 else if (offset <= fa->fa_end && end >= fa->fa_end) 4846 fa->fa_end = offset - 1; 4847 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4848 /* 4849 * If the "normal" region is a middle 4850 * portion of the existing 4851 * non-standard region, just remove 4852 * the whole thing rather than picking 4853 * one side or the other to 4854 * preserve. 4855 */ 4856 new = fa; 4857 fp->f_advice = NULL; 4858 } 4859 } 4860 mtx_pool_unlock(mtxpool_sleep, fp); 4861 break; 4862 case POSIX_FADV_WILLNEED: 4863 case POSIX_FADV_DONTNEED: 4864 error = VOP_ADVISE(vp, offset, end, advice); 4865 break; 4866 } 4867out: 4868 if (fp != NULL) 4869 fdrop(fp, td); 4870 free(new, M_FADVISE); 4871 return (error); 4872} 4873 4874int 4875sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4876{ 4877 4878 td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset, 4879 uap->len, uap->advice); 4880 return (0); 4881} 4882