vfs_syscalls.c revision 301054
1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37#include <sys/cdefs.h> 38__FBSDID("$FreeBSD: stable/10/sys/kern/vfs_syscalls.c 301054 2016-05-31 16:57:42Z glebius $"); 39 40#include "opt_capsicum.h" 41#include "opt_compat.h" 42#include "opt_kdtrace.h" 43#include "opt_ktrace.h" 44 45#include <sys/param.h> 46#include <sys/systm.h> 47#include <sys/bio.h> 48#include <sys/buf.h> 49#include <sys/capsicum.h> 50#include <sys/disk.h> 51#include <sys/sysent.h> 52#include <sys/malloc.h> 53#include <sys/mount.h> 54#include <sys/mutex.h> 55#include <sys/sysproto.h> 56#include <sys/namei.h> 57#include <sys/filedesc.h> 58#include <sys/kernel.h> 59#include <sys/fcntl.h> 60#include <sys/file.h> 61#include <sys/filio.h> 62#include <sys/limits.h> 63#include <sys/linker.h> 64#include <sys/rwlock.h> 65#include <sys/sdt.h> 66#include <sys/stat.h> 67#include <sys/sx.h> 68#include <sys/unistd.h> 69#include <sys/vnode.h> 70#include <sys/priv.h> 71#include <sys/proc.h> 72#include <sys/dirent.h> 73#include <sys/jail.h> 74#include <sys/syscallsubr.h> 75#include <sys/sysctl.h> 76#ifdef KTRACE 77#include <sys/ktrace.h> 78#endif 79 80#include <machine/stdarg.h> 81 82#include <security/audit/audit.h> 83#include <security/mac/mac_framework.h> 84 85#include <vm/vm.h> 86#include <vm/vm_object.h> 87#include <vm/vm_page.h> 88#include <vm/uma.h> 89 90#include <ufs/ufs/quota.h> 91 92MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 93 94SDT_PROVIDER_DEFINE(vfs); 95SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 96SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 97 98static int chroot_refuse_vdir_fds(struct filedesc *fdp); 99static int kern_chflags(struct thread *td, const char *path, 100 enum uio_seg pathseg, u_long flags); 101static int kern_chflagsat(struct thread *td, int fd, const char *path, 102 enum uio_seg pathseg, u_long flags, int atflag); 103static int setfflags(struct thread *td, struct vnode *, u_long); 104static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 105static int getutimens(const struct timespec *, enum uio_seg, 106 struct timespec *, int *); 107static int setutimes(struct thread *td, struct vnode *, 108 const struct timespec *, int, int); 109static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 110 struct thread *td); 111 112/* 113 * The module initialization routine for POSIX asynchronous I/O will 114 * set this to the version of AIO that it implements. (Zero means 115 * that it is not implemented.) This value is used here by pathconf() 116 * and in kern_descrip.c by fpathconf(). 117 */ 118int async_io_version; 119 120/* 121 * Sync each mounted filesystem. 122 */ 123#ifndef _SYS_SYSPROTO_H_ 124struct sync_args { 125 int dummy; 126}; 127#endif 128/* ARGSUSED */ 129int 130sys_sync(td, uap) 131 struct thread *td; 132 struct sync_args *uap; 133{ 134 struct mount *mp, *nmp; 135 int save; 136 137 mtx_lock(&mountlist_mtx); 138 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 139 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 140 nmp = TAILQ_NEXT(mp, mnt_list); 141 continue; 142 } 143 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 144 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 145 save = curthread_pflags_set(TDP_SYNCIO); 146 vfs_msync(mp, MNT_NOWAIT); 147 VFS_SYNC(mp, MNT_NOWAIT); 148 curthread_pflags_restore(save); 149 vn_finished_write(mp); 150 } 151 mtx_lock(&mountlist_mtx); 152 nmp = TAILQ_NEXT(mp, mnt_list); 153 vfs_unbusy(mp); 154 } 155 mtx_unlock(&mountlist_mtx); 156 return (0); 157} 158 159/* 160 * Change filesystem quotas. 161 */ 162#ifndef _SYS_SYSPROTO_H_ 163struct quotactl_args { 164 char *path; 165 int cmd; 166 int uid; 167 caddr_t arg; 168}; 169#endif 170int 171sys_quotactl(td, uap) 172 struct thread *td; 173 register struct quotactl_args /* { 174 char *path; 175 int cmd; 176 int uid; 177 caddr_t arg; 178 } */ *uap; 179{ 180 struct mount *mp; 181 struct nameidata nd; 182 int error; 183 184 AUDIT_ARG_CMD(uap->cmd); 185 AUDIT_ARG_UID(uap->uid); 186 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 187 return (EPERM); 188 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 189 uap->path, td); 190 if ((error = namei(&nd)) != 0) 191 return (error); 192 NDFREE(&nd, NDF_ONLY_PNBUF); 193 mp = nd.ni_vp->v_mount; 194 vfs_ref(mp); 195 vput(nd.ni_vp); 196 error = vfs_busy(mp, 0); 197 vfs_rel(mp); 198 if (error != 0) 199 return (error); 200 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 201 202 /* 203 * Since quota on operation typically needs to open quota 204 * file, the Q_QUOTAON handler needs to unbusy the mount point 205 * before calling into namei. Otherwise, unmount might be 206 * started between two vfs_busy() invocations (first is our, 207 * second is from mount point cross-walk code in lookup()), 208 * causing deadlock. 209 * 210 * Require that Q_QUOTAON handles the vfs_busy() reference on 211 * its own, always returning with ubusied mount point. 212 */ 213 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 214 vfs_unbusy(mp); 215 return (error); 216} 217 218/* 219 * Used by statfs conversion routines to scale the block size up if 220 * necessary so that all of the block counts are <= 'max_size'. Note 221 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 222 * value of 'n'. 223 */ 224void 225statfs_scale_blocks(struct statfs *sf, long max_size) 226{ 227 uint64_t count; 228 int shift; 229 230 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 231 232 /* 233 * Attempt to scale the block counts to give a more accurate 234 * overview to userland of the ratio of free space to used 235 * space. To do this, find the largest block count and compute 236 * a divisor that lets it fit into a signed integer <= max_size. 237 */ 238 if (sf->f_bavail < 0) 239 count = -sf->f_bavail; 240 else 241 count = sf->f_bavail; 242 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 243 if (count <= max_size) 244 return; 245 246 count >>= flsl(max_size); 247 shift = 0; 248 while (count > 0) { 249 shift++; 250 count >>=1; 251 } 252 253 sf->f_bsize <<= shift; 254 sf->f_blocks >>= shift; 255 sf->f_bfree >>= shift; 256 sf->f_bavail >>= shift; 257} 258 259/* 260 * Get filesystem statistics. 261 */ 262#ifndef _SYS_SYSPROTO_H_ 263struct statfs_args { 264 char *path; 265 struct statfs *buf; 266}; 267#endif 268int 269sys_statfs(td, uap) 270 struct thread *td; 271 register struct statfs_args /* { 272 char *path; 273 struct statfs *buf; 274 } */ *uap; 275{ 276 struct statfs sf; 277 int error; 278 279 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 280 if (error == 0) 281 error = copyout(&sf, uap->buf, sizeof(sf)); 282 return (error); 283} 284 285int 286kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 287 struct statfs *buf) 288{ 289 struct mount *mp; 290 struct statfs *sp, sb; 291 struct nameidata nd; 292 int error; 293 294 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 295 pathseg, path, td); 296 error = namei(&nd); 297 if (error != 0) 298 return (error); 299 mp = nd.ni_vp->v_mount; 300 vfs_ref(mp); 301 NDFREE(&nd, NDF_ONLY_PNBUF); 302 vput(nd.ni_vp); 303 error = vfs_busy(mp, 0); 304 vfs_rel(mp); 305 if (error != 0) 306 return (error); 307#ifdef MAC 308 error = mac_mount_check_stat(td->td_ucred, mp); 309 if (error != 0) 310 goto out; 311#endif 312 /* 313 * Set these in case the underlying filesystem fails to do so. 314 */ 315 sp = &mp->mnt_stat; 316 sp->f_version = STATFS_VERSION; 317 sp->f_namemax = NAME_MAX; 318 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 319 error = VFS_STATFS(mp, sp); 320 if (error != 0) 321 goto out; 322 if (priv_check(td, PRIV_VFS_GENERATION)) { 323 bcopy(sp, &sb, sizeof(sb)); 324 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 325 prison_enforce_statfs(td->td_ucred, mp, &sb); 326 sp = &sb; 327 } 328 *buf = *sp; 329out: 330 vfs_unbusy(mp); 331 return (error); 332} 333 334/* 335 * Get filesystem statistics. 336 */ 337#ifndef _SYS_SYSPROTO_H_ 338struct fstatfs_args { 339 int fd; 340 struct statfs *buf; 341}; 342#endif 343int 344sys_fstatfs(td, uap) 345 struct thread *td; 346 register struct fstatfs_args /* { 347 int fd; 348 struct statfs *buf; 349 } */ *uap; 350{ 351 struct statfs sf; 352 int error; 353 354 error = kern_fstatfs(td, uap->fd, &sf); 355 if (error == 0) 356 error = copyout(&sf, uap->buf, sizeof(sf)); 357 return (error); 358} 359 360int 361kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 362{ 363 struct file *fp; 364 struct mount *mp; 365 struct statfs *sp, sb; 366 struct vnode *vp; 367 cap_rights_t rights; 368 int error; 369 370 AUDIT_ARG_FD(fd); 371 error = getvnode(td->td_proc->p_fd, fd, 372 cap_rights_init(&rights, CAP_FSTATFS), &fp); 373 if (error != 0) 374 return (error); 375 vp = fp->f_vnode; 376 vn_lock(vp, LK_SHARED | LK_RETRY); 377#ifdef AUDIT 378 AUDIT_ARG_VNODE1(vp); 379#endif 380 mp = vp->v_mount; 381 if (mp) 382 vfs_ref(mp); 383 VOP_UNLOCK(vp, 0); 384 fdrop(fp, td); 385 if (mp == NULL) { 386 error = EBADF; 387 goto out; 388 } 389 error = vfs_busy(mp, 0); 390 vfs_rel(mp); 391 if (error != 0) 392 return (error); 393#ifdef MAC 394 error = mac_mount_check_stat(td->td_ucred, mp); 395 if (error != 0) 396 goto out; 397#endif 398 /* 399 * Set these in case the underlying filesystem fails to do so. 400 */ 401 sp = &mp->mnt_stat; 402 sp->f_version = STATFS_VERSION; 403 sp->f_namemax = NAME_MAX; 404 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 405 error = VFS_STATFS(mp, sp); 406 if (error != 0) 407 goto out; 408 if (priv_check(td, PRIV_VFS_GENERATION)) { 409 bcopy(sp, &sb, sizeof(sb)); 410 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 411 prison_enforce_statfs(td->td_ucred, mp, &sb); 412 sp = &sb; 413 } 414 *buf = *sp; 415out: 416 if (mp) 417 vfs_unbusy(mp); 418 return (error); 419} 420 421/* 422 * Get statistics on all filesystems. 423 */ 424#ifndef _SYS_SYSPROTO_H_ 425struct getfsstat_args { 426 struct statfs *buf; 427 long bufsize; 428 int flags; 429}; 430#endif 431int 432sys_getfsstat(td, uap) 433 struct thread *td; 434 register struct getfsstat_args /* { 435 struct statfs *buf; 436 long bufsize; 437 int flags; 438 } */ *uap; 439{ 440 441 return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE, 442 uap->flags)); 443} 444 445/* 446 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 447 * The caller is responsible for freeing memory which will be allocated 448 * in '*buf'. 449 */ 450int 451kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 452 enum uio_seg bufseg, int flags) 453{ 454 struct mount *mp, *nmp; 455 struct statfs *sfsp, *sp, sb; 456 size_t count, maxcount; 457 int error; 458 459 maxcount = bufsize / sizeof(struct statfs); 460 if (bufsize == 0) 461 sfsp = NULL; 462 else if (bufseg == UIO_USERSPACE) 463 sfsp = *buf; 464 else /* if (bufseg == UIO_SYSSPACE) */ { 465 count = 0; 466 mtx_lock(&mountlist_mtx); 467 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 468 count++; 469 } 470 mtx_unlock(&mountlist_mtx); 471 if (maxcount > count) 472 maxcount = count; 473 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 474 M_WAITOK); 475 } 476 count = 0; 477 mtx_lock(&mountlist_mtx); 478 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 479 if (prison_canseemount(td->td_ucred, mp) != 0) { 480 nmp = TAILQ_NEXT(mp, mnt_list); 481 continue; 482 } 483#ifdef MAC 484 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 485 nmp = TAILQ_NEXT(mp, mnt_list); 486 continue; 487 } 488#endif 489 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 490 nmp = TAILQ_NEXT(mp, mnt_list); 491 continue; 492 } 493 if (sfsp && count < maxcount) { 494 sp = &mp->mnt_stat; 495 /* 496 * Set these in case the underlying filesystem 497 * fails to do so. 498 */ 499 sp->f_version = STATFS_VERSION; 500 sp->f_namemax = NAME_MAX; 501 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 502 /* 503 * If MNT_NOWAIT or MNT_LAZY is specified, do not 504 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 505 * overrides MNT_WAIT. 506 */ 507 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 508 (flags & MNT_WAIT)) && 509 (error = VFS_STATFS(mp, sp))) { 510 mtx_lock(&mountlist_mtx); 511 nmp = TAILQ_NEXT(mp, mnt_list); 512 vfs_unbusy(mp); 513 continue; 514 } 515 if (priv_check(td, PRIV_VFS_GENERATION)) { 516 bcopy(sp, &sb, sizeof(sb)); 517 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 518 prison_enforce_statfs(td->td_ucred, mp, &sb); 519 sp = &sb; 520 } 521 if (bufseg == UIO_SYSSPACE) 522 bcopy(sp, sfsp, sizeof(*sp)); 523 else /* if (bufseg == UIO_USERSPACE) */ { 524 error = copyout(sp, sfsp, sizeof(*sp)); 525 if (error != 0) { 526 vfs_unbusy(mp); 527 return (error); 528 } 529 } 530 sfsp++; 531 } 532 count++; 533 mtx_lock(&mountlist_mtx); 534 nmp = TAILQ_NEXT(mp, mnt_list); 535 vfs_unbusy(mp); 536 } 537 mtx_unlock(&mountlist_mtx); 538 if (sfsp && count > maxcount) 539 td->td_retval[0] = maxcount; 540 else 541 td->td_retval[0] = count; 542 return (0); 543} 544 545#ifdef COMPAT_FREEBSD4 546/* 547 * Get old format filesystem statistics. 548 */ 549static void cvtstatfs(struct statfs *, struct ostatfs *); 550 551#ifndef _SYS_SYSPROTO_H_ 552struct freebsd4_statfs_args { 553 char *path; 554 struct ostatfs *buf; 555}; 556#endif 557int 558freebsd4_statfs(td, uap) 559 struct thread *td; 560 struct freebsd4_statfs_args /* { 561 char *path; 562 struct ostatfs *buf; 563 } */ *uap; 564{ 565 struct ostatfs osb; 566 struct statfs sf; 567 int error; 568 569 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 570 if (error != 0) 571 return (error); 572 cvtstatfs(&sf, &osb); 573 return (copyout(&osb, uap->buf, sizeof(osb))); 574} 575 576/* 577 * Get filesystem statistics. 578 */ 579#ifndef _SYS_SYSPROTO_H_ 580struct freebsd4_fstatfs_args { 581 int fd; 582 struct ostatfs *buf; 583}; 584#endif 585int 586freebsd4_fstatfs(td, uap) 587 struct thread *td; 588 struct freebsd4_fstatfs_args /* { 589 int fd; 590 struct ostatfs *buf; 591 } */ *uap; 592{ 593 struct ostatfs osb; 594 struct statfs sf; 595 int error; 596 597 error = kern_fstatfs(td, uap->fd, &sf); 598 if (error != 0) 599 return (error); 600 cvtstatfs(&sf, &osb); 601 return (copyout(&osb, uap->buf, sizeof(osb))); 602} 603 604/* 605 * Get statistics on all filesystems. 606 */ 607#ifndef _SYS_SYSPROTO_H_ 608struct freebsd4_getfsstat_args { 609 struct ostatfs *buf; 610 long bufsize; 611 int flags; 612}; 613#endif 614int 615freebsd4_getfsstat(td, uap) 616 struct thread *td; 617 register struct freebsd4_getfsstat_args /* { 618 struct ostatfs *buf; 619 long bufsize; 620 int flags; 621 } */ *uap; 622{ 623 struct statfs *buf, *sp; 624 struct ostatfs osb; 625 size_t count, size; 626 int error; 627 628 count = uap->bufsize / sizeof(struct ostatfs); 629 size = count * sizeof(struct statfs); 630 error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags); 631 if (size > 0) { 632 count = td->td_retval[0]; 633 sp = buf; 634 while (count > 0 && error == 0) { 635 cvtstatfs(sp, &osb); 636 error = copyout(&osb, uap->buf, sizeof(osb)); 637 sp++; 638 uap->buf++; 639 count--; 640 } 641 free(buf, M_TEMP); 642 } 643 return (error); 644} 645 646/* 647 * Implement fstatfs() for (NFS) file handles. 648 */ 649#ifndef _SYS_SYSPROTO_H_ 650struct freebsd4_fhstatfs_args { 651 struct fhandle *u_fhp; 652 struct ostatfs *buf; 653}; 654#endif 655int 656freebsd4_fhstatfs(td, uap) 657 struct thread *td; 658 struct freebsd4_fhstatfs_args /* { 659 struct fhandle *u_fhp; 660 struct ostatfs *buf; 661 } */ *uap; 662{ 663 struct ostatfs osb; 664 struct statfs sf; 665 fhandle_t fh; 666 int error; 667 668 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 669 if (error != 0) 670 return (error); 671 error = kern_fhstatfs(td, fh, &sf); 672 if (error != 0) 673 return (error); 674 cvtstatfs(&sf, &osb); 675 return (copyout(&osb, uap->buf, sizeof(osb))); 676} 677 678/* 679 * Convert a new format statfs structure to an old format statfs structure. 680 */ 681static void 682cvtstatfs(nsp, osp) 683 struct statfs *nsp; 684 struct ostatfs *osp; 685{ 686 687 statfs_scale_blocks(nsp, LONG_MAX); 688 bzero(osp, sizeof(*osp)); 689 osp->f_bsize = nsp->f_bsize; 690 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 691 osp->f_blocks = nsp->f_blocks; 692 osp->f_bfree = nsp->f_bfree; 693 osp->f_bavail = nsp->f_bavail; 694 osp->f_files = MIN(nsp->f_files, LONG_MAX); 695 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 696 osp->f_owner = nsp->f_owner; 697 osp->f_type = nsp->f_type; 698 osp->f_flags = nsp->f_flags; 699 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 700 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 701 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 702 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 703 strlcpy(osp->f_fstypename, nsp->f_fstypename, 704 MIN(MFSNAMELEN, OMFSNAMELEN)); 705 strlcpy(osp->f_mntonname, nsp->f_mntonname, 706 MIN(MNAMELEN, OMNAMELEN)); 707 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 708 MIN(MNAMELEN, OMNAMELEN)); 709 osp->f_fsid = nsp->f_fsid; 710} 711#endif /* COMPAT_FREEBSD4 */ 712 713/* 714 * Change current working directory to a given file descriptor. 715 */ 716#ifndef _SYS_SYSPROTO_H_ 717struct fchdir_args { 718 int fd; 719}; 720#endif 721int 722sys_fchdir(td, uap) 723 struct thread *td; 724 struct fchdir_args /* { 725 int fd; 726 } */ *uap; 727{ 728 register struct filedesc *fdp = td->td_proc->p_fd; 729 struct vnode *vp, *tdp, *vpold; 730 struct mount *mp; 731 struct file *fp; 732 cap_rights_t rights; 733 int error; 734 735 AUDIT_ARG_FD(uap->fd); 736 error = getvnode(fdp, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 737 &fp); 738 if (error != 0) 739 return (error); 740 vp = fp->f_vnode; 741 VREF(vp); 742 fdrop(fp, td); 743 vn_lock(vp, LK_SHARED | LK_RETRY); 744 AUDIT_ARG_VNODE1(vp); 745 error = change_dir(vp, td); 746 while (!error && (mp = vp->v_mountedhere) != NULL) { 747 if (vfs_busy(mp, 0)) 748 continue; 749 error = VFS_ROOT(mp, LK_SHARED, &tdp); 750 vfs_unbusy(mp); 751 if (error != 0) 752 break; 753 vput(vp); 754 vp = tdp; 755 } 756 if (error != 0) { 757 vput(vp); 758 return (error); 759 } 760 VOP_UNLOCK(vp, 0); 761 FILEDESC_XLOCK(fdp); 762 vpold = fdp->fd_cdir; 763 fdp->fd_cdir = vp; 764 FILEDESC_XUNLOCK(fdp); 765 vrele(vpold); 766 return (0); 767} 768 769/* 770 * Change current working directory (``.''). 771 */ 772#ifndef _SYS_SYSPROTO_H_ 773struct chdir_args { 774 char *path; 775}; 776#endif 777int 778sys_chdir(td, uap) 779 struct thread *td; 780 struct chdir_args /* { 781 char *path; 782 } */ *uap; 783{ 784 785 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 786} 787 788int 789kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 790{ 791 register struct filedesc *fdp = td->td_proc->p_fd; 792 struct nameidata nd; 793 struct vnode *vp; 794 int error; 795 796 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 797 pathseg, path, td); 798 if ((error = namei(&nd)) != 0) 799 return (error); 800 if ((error = change_dir(nd.ni_vp, td)) != 0) { 801 vput(nd.ni_vp); 802 NDFREE(&nd, NDF_ONLY_PNBUF); 803 return (error); 804 } 805 VOP_UNLOCK(nd.ni_vp, 0); 806 NDFREE(&nd, NDF_ONLY_PNBUF); 807 FILEDESC_XLOCK(fdp); 808 vp = fdp->fd_cdir; 809 fdp->fd_cdir = nd.ni_vp; 810 FILEDESC_XUNLOCK(fdp); 811 vrele(vp); 812 return (0); 813} 814 815/* 816 * Helper function for raised chroot(2) security function: Refuse if 817 * any filedescriptors are open directories. 818 */ 819static int 820chroot_refuse_vdir_fds(fdp) 821 struct filedesc *fdp; 822{ 823 struct vnode *vp; 824 struct file *fp; 825 int fd; 826 827 FILEDESC_LOCK_ASSERT(fdp); 828 829 for (fd = 0; fd <= fdp->fd_lastfile; fd++) { 830 fp = fget_locked(fdp, fd); 831 if (fp == NULL) 832 continue; 833 if (fp->f_type == DTYPE_VNODE) { 834 vp = fp->f_vnode; 835 if (vp->v_type == VDIR) 836 return (EPERM); 837 } 838 } 839 return (0); 840} 841 842/* 843 * This sysctl determines if we will allow a process to chroot(2) if it 844 * has a directory open: 845 * 0: disallowed for all processes. 846 * 1: allowed for processes that were not already chroot(2)'ed. 847 * 2: allowed for all processes. 848 */ 849 850static int chroot_allow_open_directories = 1; 851 852SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 853 &chroot_allow_open_directories, 0, 854 "Allow a process to chroot(2) if it has a directory open"); 855 856/* 857 * Change notion of root (``/'') directory. 858 */ 859#ifndef _SYS_SYSPROTO_H_ 860struct chroot_args { 861 char *path; 862}; 863#endif 864int 865sys_chroot(td, uap) 866 struct thread *td; 867 struct chroot_args /* { 868 char *path; 869 } */ *uap; 870{ 871 struct nameidata nd; 872 int error; 873 874 error = priv_check(td, PRIV_VFS_CHROOT); 875 if (error != 0) 876 return (error); 877 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 878 UIO_USERSPACE, uap->path, td); 879 error = namei(&nd); 880 if (error != 0) 881 goto error; 882 error = change_dir(nd.ni_vp, td); 883 if (error != 0) 884 goto e_vunlock; 885#ifdef MAC 886 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 887 if (error != 0) 888 goto e_vunlock; 889#endif 890 VOP_UNLOCK(nd.ni_vp, 0); 891 error = change_root(nd.ni_vp, td); 892 vrele(nd.ni_vp); 893 NDFREE(&nd, NDF_ONLY_PNBUF); 894 return (error); 895e_vunlock: 896 vput(nd.ni_vp); 897error: 898 NDFREE(&nd, NDF_ONLY_PNBUF); 899 return (error); 900} 901 902/* 903 * Common routine for chroot and chdir. Callers must provide a locked vnode 904 * instance. 905 */ 906int 907change_dir(vp, td) 908 struct vnode *vp; 909 struct thread *td; 910{ 911#ifdef MAC 912 int error; 913#endif 914 915 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 916 if (vp->v_type != VDIR) 917 return (ENOTDIR); 918#ifdef MAC 919 error = mac_vnode_check_chdir(td->td_ucred, vp); 920 if (error != 0) 921 return (error); 922#endif 923 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 924} 925 926/* 927 * Common routine for kern_chroot() and jail_attach(). The caller is 928 * responsible for invoking priv_check() and mac_vnode_check_chroot() to 929 * authorize this operation. 930 */ 931int 932change_root(vp, td) 933 struct vnode *vp; 934 struct thread *td; 935{ 936 struct filedesc *fdp; 937 struct vnode *oldvp; 938 int error; 939 940 fdp = td->td_proc->p_fd; 941 FILEDESC_XLOCK(fdp); 942 if (chroot_allow_open_directories == 0 || 943 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 944 error = chroot_refuse_vdir_fds(fdp); 945 if (error != 0) { 946 FILEDESC_XUNLOCK(fdp); 947 return (error); 948 } 949 } 950 oldvp = fdp->fd_rdir; 951 fdp->fd_rdir = vp; 952 VREF(fdp->fd_rdir); 953 if (!fdp->fd_jdir) { 954 fdp->fd_jdir = vp; 955 VREF(fdp->fd_jdir); 956 } 957 FILEDESC_XUNLOCK(fdp); 958 vrele(oldvp); 959 return (0); 960} 961 962static __inline void 963flags_to_rights(int flags, cap_rights_t *rightsp) 964{ 965 966 if (flags & O_EXEC) { 967 cap_rights_set(rightsp, CAP_FEXECVE); 968 } else { 969 switch ((flags & O_ACCMODE)) { 970 case O_RDONLY: 971 cap_rights_set(rightsp, CAP_READ); 972 break; 973 case O_RDWR: 974 cap_rights_set(rightsp, CAP_READ); 975 /* FALLTHROUGH */ 976 case O_WRONLY: 977 cap_rights_set(rightsp, CAP_WRITE); 978 if (!(flags & (O_APPEND | O_TRUNC))) 979 cap_rights_set(rightsp, CAP_SEEK); 980 break; 981 } 982 } 983 984 if (flags & O_CREAT) 985 cap_rights_set(rightsp, CAP_CREATE); 986 987 if (flags & O_TRUNC) 988 cap_rights_set(rightsp, CAP_FTRUNCATE); 989 990 if (flags & (O_SYNC | O_FSYNC)) 991 cap_rights_set(rightsp, CAP_FSYNC); 992 993 if (flags & (O_EXLOCK | O_SHLOCK)) 994 cap_rights_set(rightsp, CAP_FLOCK); 995} 996 997/* 998 * Check permissions, allocate an open file structure, and call the device 999 * open routine if any. 1000 */ 1001#ifndef _SYS_SYSPROTO_H_ 1002struct open_args { 1003 char *path; 1004 int flags; 1005 int mode; 1006}; 1007#endif 1008int 1009sys_open(td, uap) 1010 struct thread *td; 1011 register struct open_args /* { 1012 char *path; 1013 int flags; 1014 int mode; 1015 } */ *uap; 1016{ 1017 1018 return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode)); 1019} 1020 1021#ifndef _SYS_SYSPROTO_H_ 1022struct openat_args { 1023 int fd; 1024 char *path; 1025 int flag; 1026 int mode; 1027}; 1028#endif 1029int 1030sys_openat(struct thread *td, struct openat_args *uap) 1031{ 1032 1033 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1034 uap->mode)); 1035} 1036 1037int 1038kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags, 1039 int mode) 1040{ 1041 1042 return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode)); 1043} 1044 1045int 1046kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1047 int flags, int mode) 1048{ 1049 struct proc *p = td->td_proc; 1050 struct filedesc *fdp = p->p_fd; 1051 struct file *fp; 1052 struct vnode *vp; 1053 struct nameidata nd; 1054 cap_rights_t rights; 1055 int cmode, error, indx; 1056 1057 indx = -1; 1058 1059 AUDIT_ARG_FFLAGS(flags); 1060 AUDIT_ARG_MODE(mode); 1061 /* XXX: audit dirfd */ 1062 cap_rights_init(&rights, CAP_LOOKUP); 1063 flags_to_rights(flags, &rights); 1064 /* 1065 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1066 * may be specified. 1067 */ 1068 if (flags & O_EXEC) { 1069 if (flags & O_ACCMODE) 1070 return (EINVAL); 1071 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1072 return (EINVAL); 1073 } else { 1074 flags = FFLAGS(flags); 1075 } 1076 1077 /* 1078 * Allocate the file descriptor, but don't install a descriptor yet. 1079 */ 1080 error = falloc_noinstall(td, &fp); 1081 if (error != 0) 1082 return (error); 1083 /* 1084 * An extra reference on `fp' has been held for us by 1085 * falloc_noinstall(). 1086 */ 1087 /* Set the flags early so the finit in devfs can pick them up. */ 1088 fp->f_flag = flags & FMASK; 1089 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1090 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1091 &rights, td); 1092 td->td_dupfd = -1; /* XXX check for fdopen */ 1093 error = vn_open(&nd, &flags, cmode, fp); 1094 if (error != 0) { 1095 /* 1096 * If the vn_open replaced the method vector, something 1097 * wonderous happened deep below and we just pass it up 1098 * pretending we know what we do. 1099 */ 1100 if (error == ENXIO && fp->f_ops != &badfileops) 1101 goto success; 1102 1103 /* 1104 * Handle special fdopen() case. bleh. 1105 * 1106 * Don't do this for relative (capability) lookups; we don't 1107 * understand exactly what would happen, and we don't think 1108 * that it ever should. 1109 */ 1110 if (nd.ni_strictrelative == 0 && 1111 (error == ENODEV || error == ENXIO) && 1112 td->td_dupfd >= 0) { 1113 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1114 &indx); 1115 if (error == 0) 1116 goto success; 1117 } 1118 1119 goto bad; 1120 } 1121 td->td_dupfd = 0; 1122 NDFREE(&nd, NDF_ONLY_PNBUF); 1123 vp = nd.ni_vp; 1124 1125 /* 1126 * Store the vnode, for any f_type. Typically, the vnode use 1127 * count is decremented by direct call to vn_closefile() for 1128 * files that switched type in the cdevsw fdopen() method. 1129 */ 1130 fp->f_vnode = vp; 1131 /* 1132 * If the file wasn't claimed by devfs bind it to the normal 1133 * vnode operations here. 1134 */ 1135 if (fp->f_ops == &badfileops) { 1136 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1137 fp->f_seqcount = 1; 1138 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1139 DTYPE_VNODE, vp, &vnops); 1140 } 1141 1142 VOP_UNLOCK(vp, 0); 1143 if (flags & O_TRUNC) { 1144 error = fo_truncate(fp, 0, td->td_ucred, td); 1145 if (error != 0) 1146 goto bad; 1147 } 1148success: 1149 /* 1150 * If we haven't already installed the FD (for dupfdopen), do so now. 1151 */ 1152 if (indx == -1) { 1153 struct filecaps *fcaps; 1154 1155#ifdef CAPABILITIES 1156 if (nd.ni_strictrelative == 1) 1157 fcaps = &nd.ni_filecaps; 1158 else 1159#endif 1160 fcaps = NULL; 1161 error = finstall(td, fp, &indx, flags, fcaps); 1162 /* On success finstall() consumes fcaps. */ 1163 if (error != 0) { 1164 filecaps_free(&nd.ni_filecaps); 1165 goto bad; 1166 } 1167 } else { 1168 filecaps_free(&nd.ni_filecaps); 1169 } 1170 1171 /* 1172 * Release our private reference, leaving the one associated with 1173 * the descriptor table intact. 1174 */ 1175 fdrop(fp, td); 1176 td->td_retval[0] = indx; 1177 return (0); 1178bad: 1179 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1180 fdrop(fp, td); 1181 return (error); 1182} 1183 1184#ifdef COMPAT_43 1185/* 1186 * Create a file. 1187 */ 1188#ifndef _SYS_SYSPROTO_H_ 1189struct ocreat_args { 1190 char *path; 1191 int mode; 1192}; 1193#endif 1194int 1195ocreat(td, uap) 1196 struct thread *td; 1197 register struct ocreat_args /* { 1198 char *path; 1199 int mode; 1200 } */ *uap; 1201{ 1202 1203 return (kern_open(td, uap->path, UIO_USERSPACE, 1204 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1205} 1206#endif /* COMPAT_43 */ 1207 1208/* 1209 * Create a special file. 1210 */ 1211#ifndef _SYS_SYSPROTO_H_ 1212struct mknod_args { 1213 char *path; 1214 int mode; 1215 int dev; 1216}; 1217#endif 1218int 1219sys_mknod(td, uap) 1220 struct thread *td; 1221 register struct mknod_args /* { 1222 char *path; 1223 int mode; 1224 int dev; 1225 } */ *uap; 1226{ 1227 1228 return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); 1229} 1230 1231#ifndef _SYS_SYSPROTO_H_ 1232struct mknodat_args { 1233 int fd; 1234 char *path; 1235 mode_t mode; 1236 dev_t dev; 1237}; 1238#endif 1239int 1240sys_mknodat(struct thread *td, struct mknodat_args *uap) 1241{ 1242 1243 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1244 uap->dev)); 1245} 1246 1247int 1248kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode, 1249 int dev) 1250{ 1251 1252 return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev)); 1253} 1254 1255int 1256kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1257 int mode, int dev) 1258{ 1259 struct vnode *vp; 1260 struct mount *mp; 1261 struct vattr vattr; 1262 struct nameidata nd; 1263 cap_rights_t rights; 1264 int error, whiteout = 0; 1265 1266 AUDIT_ARG_MODE(mode); 1267 AUDIT_ARG_DEV(dev); 1268 switch (mode & S_IFMT) { 1269 case S_IFCHR: 1270 case S_IFBLK: 1271 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1272 break; 1273 case S_IFMT: 1274 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1275 break; 1276 case S_IFWHT: 1277 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1278 break; 1279 case S_IFIFO: 1280 if (dev == 0) 1281 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1282 /* FALLTHROUGH */ 1283 default: 1284 error = EINVAL; 1285 break; 1286 } 1287 if (error != 0) 1288 return (error); 1289restart: 1290 bwillwrite(); 1291 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1292 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), 1293 td); 1294 if ((error = namei(&nd)) != 0) 1295 return (error); 1296 vp = nd.ni_vp; 1297 if (vp != NULL) { 1298 NDFREE(&nd, NDF_ONLY_PNBUF); 1299 if (vp == nd.ni_dvp) 1300 vrele(nd.ni_dvp); 1301 else 1302 vput(nd.ni_dvp); 1303 vrele(vp); 1304 return (EEXIST); 1305 } else { 1306 VATTR_NULL(&vattr); 1307 vattr.va_mode = (mode & ALLPERMS) & 1308 ~td->td_proc->p_fd->fd_cmask; 1309 vattr.va_rdev = dev; 1310 whiteout = 0; 1311 1312 switch (mode & S_IFMT) { 1313 case S_IFMT: /* used by badsect to flag bad sectors */ 1314 vattr.va_type = VBAD; 1315 break; 1316 case S_IFCHR: 1317 vattr.va_type = VCHR; 1318 break; 1319 case S_IFBLK: 1320 vattr.va_type = VBLK; 1321 break; 1322 case S_IFWHT: 1323 whiteout = 1; 1324 break; 1325 default: 1326 panic("kern_mknod: invalid mode"); 1327 } 1328 } 1329 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1330 NDFREE(&nd, NDF_ONLY_PNBUF); 1331 vput(nd.ni_dvp); 1332 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1333 return (error); 1334 goto restart; 1335 } 1336#ifdef MAC 1337 if (error == 0 && !whiteout) 1338 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1339 &nd.ni_cnd, &vattr); 1340#endif 1341 if (error == 0) { 1342 if (whiteout) 1343 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1344 else { 1345 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1346 &nd.ni_cnd, &vattr); 1347 if (error == 0) 1348 vput(nd.ni_vp); 1349 } 1350 } 1351 NDFREE(&nd, NDF_ONLY_PNBUF); 1352 vput(nd.ni_dvp); 1353 vn_finished_write(mp); 1354 return (error); 1355} 1356 1357/* 1358 * Create a named pipe. 1359 */ 1360#ifndef _SYS_SYSPROTO_H_ 1361struct mkfifo_args { 1362 char *path; 1363 int mode; 1364}; 1365#endif 1366int 1367sys_mkfifo(td, uap) 1368 struct thread *td; 1369 register struct mkfifo_args /* { 1370 char *path; 1371 int mode; 1372 } */ *uap; 1373{ 1374 1375 return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode)); 1376} 1377 1378#ifndef _SYS_SYSPROTO_H_ 1379struct mkfifoat_args { 1380 int fd; 1381 char *path; 1382 mode_t mode; 1383}; 1384#endif 1385int 1386sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1387{ 1388 1389 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1390 uap->mode)); 1391} 1392 1393int 1394kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode) 1395{ 1396 1397 return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode)); 1398} 1399 1400int 1401kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1402 int mode) 1403{ 1404 struct mount *mp; 1405 struct vattr vattr; 1406 struct nameidata nd; 1407 cap_rights_t rights; 1408 int error; 1409 1410 AUDIT_ARG_MODE(mode); 1411restart: 1412 bwillwrite(); 1413 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1414 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), 1415 td); 1416 if ((error = namei(&nd)) != 0) 1417 return (error); 1418 if (nd.ni_vp != NULL) { 1419 NDFREE(&nd, NDF_ONLY_PNBUF); 1420 if (nd.ni_vp == nd.ni_dvp) 1421 vrele(nd.ni_dvp); 1422 else 1423 vput(nd.ni_dvp); 1424 vrele(nd.ni_vp); 1425 return (EEXIST); 1426 } 1427 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1428 NDFREE(&nd, NDF_ONLY_PNBUF); 1429 vput(nd.ni_dvp); 1430 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1431 return (error); 1432 goto restart; 1433 } 1434 VATTR_NULL(&vattr); 1435 vattr.va_type = VFIFO; 1436 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1437#ifdef MAC 1438 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1439 &vattr); 1440 if (error != 0) 1441 goto out; 1442#endif 1443 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1444 if (error == 0) 1445 vput(nd.ni_vp); 1446#ifdef MAC 1447out: 1448#endif 1449 vput(nd.ni_dvp); 1450 vn_finished_write(mp); 1451 NDFREE(&nd, NDF_ONLY_PNBUF); 1452 return (error); 1453} 1454 1455/* 1456 * Make a hard file link. 1457 */ 1458#ifndef _SYS_SYSPROTO_H_ 1459struct link_args { 1460 char *path; 1461 char *link; 1462}; 1463#endif 1464int 1465sys_link(td, uap) 1466 struct thread *td; 1467 register struct link_args /* { 1468 char *path; 1469 char *link; 1470 } */ *uap; 1471{ 1472 1473 return (kern_link(td, uap->path, uap->link, UIO_USERSPACE)); 1474} 1475 1476#ifndef _SYS_SYSPROTO_H_ 1477struct linkat_args { 1478 int fd1; 1479 char *path1; 1480 int fd2; 1481 char *path2; 1482 int flag; 1483}; 1484#endif 1485int 1486sys_linkat(struct thread *td, struct linkat_args *uap) 1487{ 1488 int flag; 1489 1490 flag = uap->flag; 1491 if (flag & ~AT_SYMLINK_FOLLOW) 1492 return (EINVAL); 1493 1494 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1495 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1496} 1497 1498int hardlink_check_uid = 0; 1499SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1500 &hardlink_check_uid, 0, 1501 "Unprivileged processes cannot create hard links to files owned by other " 1502 "users"); 1503static int hardlink_check_gid = 0; 1504SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1505 &hardlink_check_gid, 0, 1506 "Unprivileged processes cannot create hard links to files owned by other " 1507 "groups"); 1508 1509static int 1510can_hardlink(struct vnode *vp, struct ucred *cred) 1511{ 1512 struct vattr va; 1513 int error; 1514 1515 if (!hardlink_check_uid && !hardlink_check_gid) 1516 return (0); 1517 1518 error = VOP_GETATTR(vp, &va, cred); 1519 if (error != 0) 1520 return (error); 1521 1522 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1523 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1524 if (error != 0) 1525 return (error); 1526 } 1527 1528 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1529 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1530 if (error != 0) 1531 return (error); 1532 } 1533 1534 return (0); 1535} 1536 1537int 1538kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg) 1539{ 1540 1541 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW)); 1542} 1543 1544int 1545kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1546 enum uio_seg segflg, int follow) 1547{ 1548 struct vnode *vp; 1549 struct mount *mp; 1550 struct nameidata nd; 1551 cap_rights_t rights; 1552 int error; 1553 1554again: 1555 bwillwrite(); 1556 NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td); 1557 1558 if ((error = namei(&nd)) != 0) 1559 return (error); 1560 NDFREE(&nd, NDF_ONLY_PNBUF); 1561 vp = nd.ni_vp; 1562 if (vp->v_type == VDIR) { 1563 vrele(vp); 1564 return (EPERM); /* POSIX */ 1565 } 1566 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2 | 1567 NOCACHE, segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT), 1568 td); 1569 if ((error = namei(&nd)) == 0) { 1570 if (nd.ni_vp != NULL) { 1571 NDFREE(&nd, NDF_ONLY_PNBUF); 1572 if (nd.ni_dvp == nd.ni_vp) 1573 vrele(nd.ni_dvp); 1574 else 1575 vput(nd.ni_dvp); 1576 vrele(nd.ni_vp); 1577 vrele(vp); 1578 return (EEXIST); 1579 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1580 /* 1581 * Cross-device link. No need to recheck 1582 * vp->v_type, since it cannot change, except 1583 * to VBAD. 1584 */ 1585 NDFREE(&nd, NDF_ONLY_PNBUF); 1586 vput(nd.ni_dvp); 1587 vrele(vp); 1588 return (EXDEV); 1589 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1590 error = can_hardlink(vp, td->td_ucred); 1591#ifdef MAC 1592 if (error == 0) 1593 error = mac_vnode_check_link(td->td_ucred, 1594 nd.ni_dvp, vp, &nd.ni_cnd); 1595#endif 1596 if (error != 0) { 1597 vput(vp); 1598 vput(nd.ni_dvp); 1599 NDFREE(&nd, NDF_ONLY_PNBUF); 1600 return (error); 1601 } 1602 error = vn_start_write(vp, &mp, V_NOWAIT); 1603 if (error != 0) { 1604 vput(vp); 1605 vput(nd.ni_dvp); 1606 NDFREE(&nd, NDF_ONLY_PNBUF); 1607 error = vn_start_write(NULL, &mp, 1608 V_XSLEEP | PCATCH); 1609 if (error != 0) 1610 return (error); 1611 goto again; 1612 } 1613 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1614 VOP_UNLOCK(vp, 0); 1615 vput(nd.ni_dvp); 1616 vn_finished_write(mp); 1617 NDFREE(&nd, NDF_ONLY_PNBUF); 1618 } else { 1619 vput(nd.ni_dvp); 1620 NDFREE(&nd, NDF_ONLY_PNBUF); 1621 vrele(vp); 1622 goto again; 1623 } 1624 } 1625 vrele(vp); 1626 return (error); 1627} 1628 1629/* 1630 * Make a symbolic link. 1631 */ 1632#ifndef _SYS_SYSPROTO_H_ 1633struct symlink_args { 1634 char *path; 1635 char *link; 1636}; 1637#endif 1638int 1639sys_symlink(td, uap) 1640 struct thread *td; 1641 register struct symlink_args /* { 1642 char *path; 1643 char *link; 1644 } */ *uap; 1645{ 1646 1647 return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE)); 1648} 1649 1650#ifndef _SYS_SYSPROTO_H_ 1651struct symlinkat_args { 1652 char *path; 1653 int fd; 1654 char *path2; 1655}; 1656#endif 1657int 1658sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1659{ 1660 1661 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1662 UIO_USERSPACE)); 1663} 1664 1665int 1666kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg) 1667{ 1668 1669 return (kern_symlinkat(td, path, AT_FDCWD, link, segflg)); 1670} 1671 1672int 1673kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1674 enum uio_seg segflg) 1675{ 1676 struct mount *mp; 1677 struct vattr vattr; 1678 char *syspath; 1679 struct nameidata nd; 1680 int error; 1681 cap_rights_t rights; 1682 1683 if (segflg == UIO_SYSSPACE) { 1684 syspath = path1; 1685 } else { 1686 syspath = uma_zalloc(namei_zone, M_WAITOK); 1687 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1688 goto out; 1689 } 1690 AUDIT_ARG_TEXT(syspath); 1691restart: 1692 bwillwrite(); 1693 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1694 NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), 1695 td); 1696 if ((error = namei(&nd)) != 0) 1697 goto out; 1698 if (nd.ni_vp) { 1699 NDFREE(&nd, NDF_ONLY_PNBUF); 1700 if (nd.ni_vp == nd.ni_dvp) 1701 vrele(nd.ni_dvp); 1702 else 1703 vput(nd.ni_dvp); 1704 vrele(nd.ni_vp); 1705 error = EEXIST; 1706 goto out; 1707 } 1708 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1709 NDFREE(&nd, NDF_ONLY_PNBUF); 1710 vput(nd.ni_dvp); 1711 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1712 goto out; 1713 goto restart; 1714 } 1715 VATTR_NULL(&vattr); 1716 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1717#ifdef MAC 1718 vattr.va_type = VLNK; 1719 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1720 &vattr); 1721 if (error != 0) 1722 goto out2; 1723#endif 1724 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1725 if (error == 0) 1726 vput(nd.ni_vp); 1727#ifdef MAC 1728out2: 1729#endif 1730 NDFREE(&nd, NDF_ONLY_PNBUF); 1731 vput(nd.ni_dvp); 1732 vn_finished_write(mp); 1733out: 1734 if (segflg != UIO_SYSSPACE) 1735 uma_zfree(namei_zone, syspath); 1736 return (error); 1737} 1738 1739/* 1740 * Delete a whiteout from the filesystem. 1741 */ 1742int 1743sys_undelete(td, uap) 1744 struct thread *td; 1745 register struct undelete_args /* { 1746 char *path; 1747 } */ *uap; 1748{ 1749 struct mount *mp; 1750 struct nameidata nd; 1751 int error; 1752 1753restart: 1754 bwillwrite(); 1755 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1756 UIO_USERSPACE, uap->path, td); 1757 error = namei(&nd); 1758 if (error != 0) 1759 return (error); 1760 1761 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1762 NDFREE(&nd, NDF_ONLY_PNBUF); 1763 if (nd.ni_vp == nd.ni_dvp) 1764 vrele(nd.ni_dvp); 1765 else 1766 vput(nd.ni_dvp); 1767 if (nd.ni_vp) 1768 vrele(nd.ni_vp); 1769 return (EEXIST); 1770 } 1771 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1772 NDFREE(&nd, NDF_ONLY_PNBUF); 1773 vput(nd.ni_dvp); 1774 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1775 return (error); 1776 goto restart; 1777 } 1778 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1779 NDFREE(&nd, NDF_ONLY_PNBUF); 1780 vput(nd.ni_dvp); 1781 vn_finished_write(mp); 1782 return (error); 1783} 1784 1785/* 1786 * Delete a name from the filesystem. 1787 */ 1788#ifndef _SYS_SYSPROTO_H_ 1789struct unlink_args { 1790 char *path; 1791}; 1792#endif 1793int 1794sys_unlink(td, uap) 1795 struct thread *td; 1796 struct unlink_args /* { 1797 char *path; 1798 } */ *uap; 1799{ 1800 1801 return (kern_unlink(td, uap->path, UIO_USERSPACE)); 1802} 1803 1804#ifndef _SYS_SYSPROTO_H_ 1805struct unlinkat_args { 1806 int fd; 1807 char *path; 1808 int flag; 1809}; 1810#endif 1811int 1812sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1813{ 1814 int flag = uap->flag; 1815 int fd = uap->fd; 1816 char *path = uap->path; 1817 1818 if (flag & ~AT_REMOVEDIR) 1819 return (EINVAL); 1820 1821 if (flag & AT_REMOVEDIR) 1822 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1823 else 1824 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1825} 1826 1827int 1828kern_unlink(struct thread *td, char *path, enum uio_seg pathseg) 1829{ 1830 1831 return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0)); 1832} 1833 1834int 1835kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1836 ino_t oldinum) 1837{ 1838 struct mount *mp; 1839 struct vnode *vp; 1840 struct nameidata nd; 1841 struct stat sb; 1842 cap_rights_t rights; 1843 int error; 1844 1845restart: 1846 bwillwrite(); 1847 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1848 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1849 if ((error = namei(&nd)) != 0) 1850 return (error == EINVAL ? EPERM : error); 1851 vp = nd.ni_vp; 1852 if (vp->v_type == VDIR && oldinum == 0) { 1853 error = EPERM; /* POSIX */ 1854 } else if (oldinum != 0 && 1855 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1856 sb.st_ino != oldinum) { 1857 error = EIDRM; /* Identifier removed */ 1858 } else { 1859 /* 1860 * The root of a mounted filesystem cannot be deleted. 1861 * 1862 * XXX: can this only be a VDIR case? 1863 */ 1864 if (vp->v_vflag & VV_ROOT) 1865 error = EBUSY; 1866 } 1867 if (error == 0) { 1868 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1869 NDFREE(&nd, NDF_ONLY_PNBUF); 1870 vput(nd.ni_dvp); 1871 if (vp == nd.ni_dvp) 1872 vrele(vp); 1873 else 1874 vput(vp); 1875 if ((error = vn_start_write(NULL, &mp, 1876 V_XSLEEP | PCATCH)) != 0) 1877 return (error); 1878 goto restart; 1879 } 1880#ifdef MAC 1881 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1882 &nd.ni_cnd); 1883 if (error != 0) 1884 goto out; 1885#endif 1886 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1887 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1888#ifdef MAC 1889out: 1890#endif 1891 vn_finished_write(mp); 1892 } 1893 NDFREE(&nd, NDF_ONLY_PNBUF); 1894 vput(nd.ni_dvp); 1895 if (vp == nd.ni_dvp) 1896 vrele(vp); 1897 else 1898 vput(vp); 1899 return (error); 1900} 1901 1902/* 1903 * Reposition read/write file offset. 1904 */ 1905#ifndef _SYS_SYSPROTO_H_ 1906struct lseek_args { 1907 int fd; 1908 int pad; 1909 off_t offset; 1910 int whence; 1911}; 1912#endif 1913int 1914sys_lseek(td, uap) 1915 struct thread *td; 1916 register struct lseek_args /* { 1917 int fd; 1918 int pad; 1919 off_t offset; 1920 int whence; 1921 } */ *uap; 1922{ 1923 struct file *fp; 1924 cap_rights_t rights; 1925 int error; 1926 1927 AUDIT_ARG_FD(uap->fd); 1928 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1929 if (error != 0) 1930 return (error); 1931 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1932 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1933 fdrop(fp, td); 1934 return (error); 1935} 1936 1937#if defined(COMPAT_43) 1938/* 1939 * Reposition read/write file offset. 1940 */ 1941#ifndef _SYS_SYSPROTO_H_ 1942struct olseek_args { 1943 int fd; 1944 long offset; 1945 int whence; 1946}; 1947#endif 1948int 1949olseek(td, uap) 1950 struct thread *td; 1951 register struct olseek_args /* { 1952 int fd; 1953 long offset; 1954 int whence; 1955 } */ *uap; 1956{ 1957 struct lseek_args /* { 1958 int fd; 1959 int pad; 1960 off_t offset; 1961 int whence; 1962 } */ nuap; 1963 1964 nuap.fd = uap->fd; 1965 nuap.offset = uap->offset; 1966 nuap.whence = uap->whence; 1967 return (sys_lseek(td, &nuap)); 1968} 1969#endif /* COMPAT_43 */ 1970 1971/* Version with the 'pad' argument */ 1972int 1973freebsd6_lseek(td, uap) 1974 struct thread *td; 1975 register struct freebsd6_lseek_args *uap; 1976{ 1977 struct lseek_args ouap; 1978 1979 ouap.fd = uap->fd; 1980 ouap.offset = uap->offset; 1981 ouap.whence = uap->whence; 1982 return (sys_lseek(td, &ouap)); 1983} 1984 1985/* 1986 * Check access permissions using passed credentials. 1987 */ 1988static int 1989vn_access(vp, user_flags, cred, td) 1990 struct vnode *vp; 1991 int user_flags; 1992 struct ucred *cred; 1993 struct thread *td; 1994{ 1995 accmode_t accmode; 1996 int error; 1997 1998 /* Flags == 0 means only check for existence. */ 1999 error = 0; 2000 if (user_flags) { 2001 accmode = 0; 2002 if (user_flags & R_OK) 2003 accmode |= VREAD; 2004 if (user_flags & W_OK) 2005 accmode |= VWRITE; 2006 if (user_flags & X_OK) 2007 accmode |= VEXEC; 2008#ifdef MAC 2009 error = mac_vnode_check_access(cred, vp, accmode); 2010 if (error != 0) 2011 return (error); 2012#endif 2013 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2014 error = VOP_ACCESS(vp, accmode, cred, td); 2015 } 2016 return (error); 2017} 2018 2019/* 2020 * Check access permissions using "real" credentials. 2021 */ 2022#ifndef _SYS_SYSPROTO_H_ 2023struct access_args { 2024 char *path; 2025 int amode; 2026}; 2027#endif 2028int 2029sys_access(td, uap) 2030 struct thread *td; 2031 register struct access_args /* { 2032 char *path; 2033 int amode; 2034 } */ *uap; 2035{ 2036 2037 return (kern_access(td, uap->path, UIO_USERSPACE, uap->amode)); 2038} 2039 2040#ifndef _SYS_SYSPROTO_H_ 2041struct faccessat_args { 2042 int dirfd; 2043 char *path; 2044 int amode; 2045 int flag; 2046} 2047#endif 2048int 2049sys_faccessat(struct thread *td, struct faccessat_args *uap) 2050{ 2051 2052 if (uap->flag & ~AT_EACCESS) 2053 return (EINVAL); 2054 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2055 uap->amode)); 2056} 2057 2058int 2059kern_access(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2060{ 2061 2062 return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, amode)); 2063} 2064 2065int 2066kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2067 int flag, int amode) 2068{ 2069 struct ucred *cred, *tmpcred; 2070 struct vnode *vp; 2071 struct nameidata nd; 2072 cap_rights_t rights; 2073 int error; 2074 2075 /* 2076 * Create and modify a temporary credential instead of one that 2077 * is potentially shared. 2078 */ 2079 if (!(flag & AT_EACCESS)) { 2080 cred = td->td_ucred; 2081 tmpcred = crdup(cred); 2082 tmpcred->cr_uid = cred->cr_ruid; 2083 tmpcred->cr_groups[0] = cred->cr_rgid; 2084 td->td_ucred = tmpcred; 2085 } else 2086 cred = tmpcred = td->td_ucred; 2087 AUDIT_ARG_VALUE(amode); 2088 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2089 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 2090 td); 2091 if ((error = namei(&nd)) != 0) 2092 goto out1; 2093 vp = nd.ni_vp; 2094 2095 error = vn_access(vp, amode, tmpcred, td); 2096 NDFREE(&nd, NDF_ONLY_PNBUF); 2097 vput(vp); 2098out1: 2099 if (!(flag & AT_EACCESS)) { 2100 td->td_ucred = cred; 2101 crfree(tmpcred); 2102 } 2103 return (error); 2104} 2105 2106/* 2107 * Check access permissions using "effective" credentials. 2108 */ 2109#ifndef _SYS_SYSPROTO_H_ 2110struct eaccess_args { 2111 char *path; 2112 int amode; 2113}; 2114#endif 2115int 2116sys_eaccess(td, uap) 2117 struct thread *td; 2118 register struct eaccess_args /* { 2119 char *path; 2120 int amode; 2121 } */ *uap; 2122{ 2123 2124 return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->amode)); 2125} 2126 2127int 2128kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2129{ 2130 2131 return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, amode)); 2132} 2133 2134#if defined(COMPAT_43) 2135/* 2136 * Get file status; this version follows links. 2137 */ 2138#ifndef _SYS_SYSPROTO_H_ 2139struct ostat_args { 2140 char *path; 2141 struct ostat *ub; 2142}; 2143#endif 2144int 2145ostat(td, uap) 2146 struct thread *td; 2147 register struct ostat_args /* { 2148 char *path; 2149 struct ostat *ub; 2150 } */ *uap; 2151{ 2152 struct stat sb; 2153 struct ostat osb; 2154 int error; 2155 2156 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2157 if (error != 0) 2158 return (error); 2159 cvtstat(&sb, &osb); 2160 return (copyout(&osb, uap->ub, sizeof (osb))); 2161} 2162 2163/* 2164 * Get file status; this version does not follow links. 2165 */ 2166#ifndef _SYS_SYSPROTO_H_ 2167struct olstat_args { 2168 char *path; 2169 struct ostat *ub; 2170}; 2171#endif 2172int 2173olstat(td, uap) 2174 struct thread *td; 2175 register struct olstat_args /* { 2176 char *path; 2177 struct ostat *ub; 2178 } */ *uap; 2179{ 2180 struct stat sb; 2181 struct ostat osb; 2182 int error; 2183 2184 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2185 if (error != 0) 2186 return (error); 2187 cvtstat(&sb, &osb); 2188 return (copyout(&osb, uap->ub, sizeof (osb))); 2189} 2190 2191/* 2192 * Convert from an old to a new stat structure. 2193 */ 2194void 2195cvtstat(st, ost) 2196 struct stat *st; 2197 struct ostat *ost; 2198{ 2199 2200 bzero(ost, sizeof(*ost)); 2201 ost->st_dev = st->st_dev; 2202 ost->st_ino = st->st_ino; 2203 ost->st_mode = st->st_mode; 2204 ost->st_nlink = st->st_nlink; 2205 ost->st_uid = st->st_uid; 2206 ost->st_gid = st->st_gid; 2207 ost->st_rdev = st->st_rdev; 2208 if (st->st_size < (quad_t)1 << 32) 2209 ost->st_size = st->st_size; 2210 else 2211 ost->st_size = -2; 2212 ost->st_atim = st->st_atim; 2213 ost->st_mtim = st->st_mtim; 2214 ost->st_ctim = st->st_ctim; 2215 ost->st_blksize = st->st_blksize; 2216 ost->st_blocks = st->st_blocks; 2217 ost->st_flags = st->st_flags; 2218 ost->st_gen = st->st_gen; 2219} 2220#endif /* COMPAT_43 */ 2221 2222/* 2223 * Get file status; this version follows links. 2224 */ 2225#ifndef _SYS_SYSPROTO_H_ 2226struct stat_args { 2227 char *path; 2228 struct stat *ub; 2229}; 2230#endif 2231int 2232sys_stat(td, uap) 2233 struct thread *td; 2234 register struct stat_args /* { 2235 char *path; 2236 struct stat *ub; 2237 } */ *uap; 2238{ 2239 struct stat sb; 2240 int error; 2241 2242 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2243 if (error == 0) 2244 error = copyout(&sb, uap->ub, sizeof (sb)); 2245 return (error); 2246} 2247 2248#ifndef _SYS_SYSPROTO_H_ 2249struct fstatat_args { 2250 int fd; 2251 char *path; 2252 struct stat *buf; 2253 int flag; 2254} 2255#endif 2256int 2257sys_fstatat(struct thread *td, struct fstatat_args *uap) 2258{ 2259 struct stat sb; 2260 int error; 2261 2262 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2263 UIO_USERSPACE, &sb); 2264 if (error == 0) 2265 error = copyout(&sb, uap->buf, sizeof (sb)); 2266 return (error); 2267} 2268 2269int 2270kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2271{ 2272 2273 return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp)); 2274} 2275 2276int 2277kern_statat(struct thread *td, int flag, int fd, char *path, 2278 enum uio_seg pathseg, struct stat *sbp) 2279{ 2280 2281 return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL)); 2282} 2283 2284int 2285kern_statat_vnhook(struct thread *td, int flag, int fd, char *path, 2286 enum uio_seg pathseg, struct stat *sbp, 2287 void (*hook)(struct vnode *vp, struct stat *sbp)) 2288{ 2289 struct nameidata nd; 2290 struct stat sb; 2291 cap_rights_t rights; 2292 int error; 2293 2294 if (flag & ~AT_SYMLINK_NOFOLLOW) 2295 return (EINVAL); 2296 2297 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2298 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2299 cap_rights_init(&rights, CAP_FSTAT), td); 2300 2301 if ((error = namei(&nd)) != 0) 2302 return (error); 2303 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2304 if (error == 0) { 2305 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode); 2306 if (S_ISREG(sb.st_mode)) 2307 SDT_PROBE2(vfs, , stat, reg, path, pathseg); 2308 if (__predict_false(hook != NULL)) 2309 hook(nd.ni_vp, &sb); 2310 } 2311 NDFREE(&nd, NDF_ONLY_PNBUF); 2312 vput(nd.ni_vp); 2313 if (error != 0) 2314 return (error); 2315 *sbp = sb; 2316#ifdef KTRACE 2317 if (KTRPOINT(td, KTR_STRUCT)) 2318 ktrstat(&sb); 2319#endif 2320 return (0); 2321} 2322 2323/* 2324 * Get file status; this version does not follow links. 2325 */ 2326#ifndef _SYS_SYSPROTO_H_ 2327struct lstat_args { 2328 char *path; 2329 struct stat *ub; 2330}; 2331#endif 2332int 2333sys_lstat(td, uap) 2334 struct thread *td; 2335 register struct lstat_args /* { 2336 char *path; 2337 struct stat *ub; 2338 } */ *uap; 2339{ 2340 struct stat sb; 2341 int error; 2342 2343 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2344 if (error == 0) 2345 error = copyout(&sb, uap->ub, sizeof (sb)); 2346 return (error); 2347} 2348 2349int 2350kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2351{ 2352 2353 return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg, 2354 sbp)); 2355} 2356 2357/* 2358 * Implementation of the NetBSD [l]stat() functions. 2359 */ 2360void 2361cvtnstat(sb, nsb) 2362 struct stat *sb; 2363 struct nstat *nsb; 2364{ 2365 2366 bzero(nsb, sizeof *nsb); 2367 nsb->st_dev = sb->st_dev; 2368 nsb->st_ino = sb->st_ino; 2369 nsb->st_mode = sb->st_mode; 2370 nsb->st_nlink = sb->st_nlink; 2371 nsb->st_uid = sb->st_uid; 2372 nsb->st_gid = sb->st_gid; 2373 nsb->st_rdev = sb->st_rdev; 2374 nsb->st_atim = sb->st_atim; 2375 nsb->st_mtim = sb->st_mtim; 2376 nsb->st_ctim = sb->st_ctim; 2377 nsb->st_size = sb->st_size; 2378 nsb->st_blocks = sb->st_blocks; 2379 nsb->st_blksize = sb->st_blksize; 2380 nsb->st_flags = sb->st_flags; 2381 nsb->st_gen = sb->st_gen; 2382 nsb->st_birthtim = sb->st_birthtim; 2383} 2384 2385#ifndef _SYS_SYSPROTO_H_ 2386struct nstat_args { 2387 char *path; 2388 struct nstat *ub; 2389}; 2390#endif 2391int 2392sys_nstat(td, uap) 2393 struct thread *td; 2394 register struct nstat_args /* { 2395 char *path; 2396 struct nstat *ub; 2397 } */ *uap; 2398{ 2399 struct stat sb; 2400 struct nstat nsb; 2401 int error; 2402 2403 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2404 if (error != 0) 2405 return (error); 2406 cvtnstat(&sb, &nsb); 2407 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2408} 2409 2410/* 2411 * NetBSD lstat. Get file status; this version does not follow links. 2412 */ 2413#ifndef _SYS_SYSPROTO_H_ 2414struct lstat_args { 2415 char *path; 2416 struct stat *ub; 2417}; 2418#endif 2419int 2420sys_nlstat(td, uap) 2421 struct thread *td; 2422 register struct nlstat_args /* { 2423 char *path; 2424 struct nstat *ub; 2425 } */ *uap; 2426{ 2427 struct stat sb; 2428 struct nstat nsb; 2429 int error; 2430 2431 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2432 if (error != 0) 2433 return (error); 2434 cvtnstat(&sb, &nsb); 2435 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2436} 2437 2438/* 2439 * Get configurable pathname variables. 2440 */ 2441#ifndef _SYS_SYSPROTO_H_ 2442struct pathconf_args { 2443 char *path; 2444 int name; 2445}; 2446#endif 2447int 2448sys_pathconf(td, uap) 2449 struct thread *td; 2450 register struct pathconf_args /* { 2451 char *path; 2452 int name; 2453 } */ *uap; 2454{ 2455 2456 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2457} 2458 2459#ifndef _SYS_SYSPROTO_H_ 2460struct lpathconf_args { 2461 char *path; 2462 int name; 2463}; 2464#endif 2465int 2466sys_lpathconf(td, uap) 2467 struct thread *td; 2468 register struct lpathconf_args /* { 2469 char *path; 2470 int name; 2471 } */ *uap; 2472{ 2473 2474 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2475 NOFOLLOW)); 2476} 2477 2478int 2479kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2480 u_long flags) 2481{ 2482 struct nameidata nd; 2483 int error; 2484 2485 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2486 pathseg, path, td); 2487 if ((error = namei(&nd)) != 0) 2488 return (error); 2489 NDFREE(&nd, NDF_ONLY_PNBUF); 2490 2491 /* If asynchronous I/O is available, it works for all files. */ 2492 if (name == _PC_ASYNC_IO) 2493 td->td_retval[0] = async_io_version; 2494 else 2495 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2496 vput(nd.ni_vp); 2497 return (error); 2498} 2499 2500/* 2501 * Return target name of a symbolic link. 2502 */ 2503#ifndef _SYS_SYSPROTO_H_ 2504struct readlink_args { 2505 char *path; 2506 char *buf; 2507 size_t count; 2508}; 2509#endif 2510int 2511sys_readlink(td, uap) 2512 struct thread *td; 2513 register struct readlink_args /* { 2514 char *path; 2515 char *buf; 2516 size_t count; 2517 } */ *uap; 2518{ 2519 2520 return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf, 2521 UIO_USERSPACE, uap->count)); 2522} 2523#ifndef _SYS_SYSPROTO_H_ 2524struct readlinkat_args { 2525 int fd; 2526 char *path; 2527 char *buf; 2528 size_t bufsize; 2529}; 2530#endif 2531int 2532sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2533{ 2534 2535 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2536 uap->buf, UIO_USERSPACE, uap->bufsize)); 2537} 2538 2539int 2540kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf, 2541 enum uio_seg bufseg, size_t count) 2542{ 2543 2544 return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg, 2545 count)); 2546} 2547 2548int 2549kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2550 char *buf, enum uio_seg bufseg, size_t count) 2551{ 2552 struct vnode *vp; 2553 struct iovec aiov; 2554 struct uio auio; 2555 struct nameidata nd; 2556 int error; 2557 2558 if (count > IOSIZE_MAX) 2559 return (EINVAL); 2560 2561 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2562 pathseg, path, fd, td); 2563 2564 if ((error = namei(&nd)) != 0) 2565 return (error); 2566 NDFREE(&nd, NDF_ONLY_PNBUF); 2567 vp = nd.ni_vp; 2568#ifdef MAC 2569 error = mac_vnode_check_readlink(td->td_ucred, vp); 2570 if (error != 0) { 2571 vput(vp); 2572 return (error); 2573 } 2574#endif 2575 if (vp->v_type != VLNK) 2576 error = EINVAL; 2577 else { 2578 aiov.iov_base = buf; 2579 aiov.iov_len = count; 2580 auio.uio_iov = &aiov; 2581 auio.uio_iovcnt = 1; 2582 auio.uio_offset = 0; 2583 auio.uio_rw = UIO_READ; 2584 auio.uio_segflg = bufseg; 2585 auio.uio_td = td; 2586 auio.uio_resid = count; 2587 error = VOP_READLINK(vp, &auio, td->td_ucred); 2588 td->td_retval[0] = count - auio.uio_resid; 2589 } 2590 vput(vp); 2591 return (error); 2592} 2593 2594/* 2595 * Common implementation code for chflags() and fchflags(). 2596 */ 2597static int 2598setfflags(td, vp, flags) 2599 struct thread *td; 2600 struct vnode *vp; 2601 u_long flags; 2602{ 2603 struct mount *mp; 2604 struct vattr vattr; 2605 int error; 2606 2607 /* We can't support the value matching VNOVAL. */ 2608 if (flags == VNOVAL) 2609 return (EOPNOTSUPP); 2610 2611 /* 2612 * Prevent non-root users from setting flags on devices. When 2613 * a device is reused, users can retain ownership of the device 2614 * if they are allowed to set flags and programs assume that 2615 * chown can't fail when done as root. 2616 */ 2617 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2618 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2619 if (error != 0) 2620 return (error); 2621 } 2622 2623 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2624 return (error); 2625 VATTR_NULL(&vattr); 2626 vattr.va_flags = flags; 2627 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2628#ifdef MAC 2629 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2630 if (error == 0) 2631#endif 2632 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2633 VOP_UNLOCK(vp, 0); 2634 vn_finished_write(mp); 2635 return (error); 2636} 2637 2638/* 2639 * Change flags of a file given a path name. 2640 */ 2641#ifndef _SYS_SYSPROTO_H_ 2642struct chflags_args { 2643 const char *path; 2644 u_long flags; 2645}; 2646#endif 2647int 2648sys_chflags(td, uap) 2649 struct thread *td; 2650 register struct chflags_args /* { 2651 const char *path; 2652 u_long flags; 2653 } */ *uap; 2654{ 2655 2656 return (kern_chflags(td, uap->path, UIO_USERSPACE, uap->flags)); 2657} 2658 2659#ifndef _SYS_SYSPROTO_H_ 2660struct chflagsat_args { 2661 int fd; 2662 const char *path; 2663 u_long flags; 2664 int atflag; 2665} 2666#endif 2667int 2668sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2669{ 2670 int fd = uap->fd; 2671 const char *path = uap->path; 2672 u_long flags = uap->flags; 2673 int atflag = uap->atflag; 2674 2675 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2676 return (EINVAL); 2677 2678 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2679} 2680 2681static int 2682kern_chflags(struct thread *td, const char *path, enum uio_seg pathseg, 2683 u_long flags) 2684{ 2685 2686 return (kern_chflagsat(td, AT_FDCWD, path, pathseg, flags, 0)); 2687} 2688 2689/* 2690 * Same as chflags() but doesn't follow symlinks. 2691 */ 2692int 2693sys_lchflags(td, uap) 2694 struct thread *td; 2695 register struct lchflags_args /* { 2696 const char *path; 2697 u_long flags; 2698 } */ *uap; 2699{ 2700 2701 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2702 uap->flags, AT_SYMLINK_NOFOLLOW)); 2703} 2704 2705static int 2706kern_chflagsat(struct thread *td, int fd, const char *path, 2707 enum uio_seg pathseg, u_long flags, int atflag) 2708{ 2709 struct nameidata nd; 2710 cap_rights_t rights; 2711 int error, follow; 2712 2713 AUDIT_ARG_FFLAGS(flags); 2714 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2715 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2716 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2717 if ((error = namei(&nd)) != 0) 2718 return (error); 2719 NDFREE(&nd, NDF_ONLY_PNBUF); 2720 error = setfflags(td, nd.ni_vp, flags); 2721 vrele(nd.ni_vp); 2722 return (error); 2723} 2724 2725/* 2726 * Change flags of a file given a file descriptor. 2727 */ 2728#ifndef _SYS_SYSPROTO_H_ 2729struct fchflags_args { 2730 int fd; 2731 u_long flags; 2732}; 2733#endif 2734int 2735sys_fchflags(td, uap) 2736 struct thread *td; 2737 register struct fchflags_args /* { 2738 int fd; 2739 u_long flags; 2740 } */ *uap; 2741{ 2742 struct file *fp; 2743 cap_rights_t rights; 2744 int error; 2745 2746 AUDIT_ARG_FD(uap->fd); 2747 AUDIT_ARG_FFLAGS(uap->flags); 2748 error = getvnode(td->td_proc->p_fd, uap->fd, 2749 cap_rights_init(&rights, CAP_FCHFLAGS), &fp); 2750 if (error != 0) 2751 return (error); 2752#ifdef AUDIT 2753 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2754 AUDIT_ARG_VNODE1(fp->f_vnode); 2755 VOP_UNLOCK(fp->f_vnode, 0); 2756#endif 2757 error = setfflags(td, fp->f_vnode, uap->flags); 2758 fdrop(fp, td); 2759 return (error); 2760} 2761 2762/* 2763 * Common implementation code for chmod(), lchmod() and fchmod(). 2764 */ 2765int 2766setfmode(td, cred, vp, mode) 2767 struct thread *td; 2768 struct ucred *cred; 2769 struct vnode *vp; 2770 int mode; 2771{ 2772 struct mount *mp; 2773 struct vattr vattr; 2774 int error; 2775 2776 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2777 return (error); 2778 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2779 VATTR_NULL(&vattr); 2780 vattr.va_mode = mode & ALLPERMS; 2781#ifdef MAC 2782 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2783 if (error == 0) 2784#endif 2785 error = VOP_SETATTR(vp, &vattr, cred); 2786 VOP_UNLOCK(vp, 0); 2787 vn_finished_write(mp); 2788 return (error); 2789} 2790 2791/* 2792 * Change mode of a file given path name. 2793 */ 2794#ifndef _SYS_SYSPROTO_H_ 2795struct chmod_args { 2796 char *path; 2797 int mode; 2798}; 2799#endif 2800int 2801sys_chmod(td, uap) 2802 struct thread *td; 2803 register struct chmod_args /* { 2804 char *path; 2805 int mode; 2806 } */ *uap; 2807{ 2808 2809 return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode)); 2810} 2811 2812#ifndef _SYS_SYSPROTO_H_ 2813struct fchmodat_args { 2814 int dirfd; 2815 char *path; 2816 mode_t mode; 2817 int flag; 2818} 2819#endif 2820int 2821sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2822{ 2823 int flag = uap->flag; 2824 int fd = uap->fd; 2825 char *path = uap->path; 2826 mode_t mode = uap->mode; 2827 2828 if (flag & ~AT_SYMLINK_NOFOLLOW) 2829 return (EINVAL); 2830 2831 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2832} 2833 2834int 2835kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode) 2836{ 2837 2838 return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0)); 2839} 2840 2841/* 2842 * Change mode of a file given path name (don't follow links.) 2843 */ 2844#ifndef _SYS_SYSPROTO_H_ 2845struct lchmod_args { 2846 char *path; 2847 int mode; 2848}; 2849#endif 2850int 2851sys_lchmod(td, uap) 2852 struct thread *td; 2853 register struct lchmod_args /* { 2854 char *path; 2855 int mode; 2856 } */ *uap; 2857{ 2858 2859 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2860 uap->mode, AT_SYMLINK_NOFOLLOW)); 2861} 2862 2863int 2864kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2865 mode_t mode, int flag) 2866{ 2867 struct nameidata nd; 2868 cap_rights_t rights; 2869 int error, follow; 2870 2871 AUDIT_ARG_MODE(mode); 2872 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2873 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2874 cap_rights_init(&rights, CAP_FCHMOD), td); 2875 if ((error = namei(&nd)) != 0) 2876 return (error); 2877 NDFREE(&nd, NDF_ONLY_PNBUF); 2878 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2879 vrele(nd.ni_vp); 2880 return (error); 2881} 2882 2883/* 2884 * Change mode of a file given a file descriptor. 2885 */ 2886#ifndef _SYS_SYSPROTO_H_ 2887struct fchmod_args { 2888 int fd; 2889 int mode; 2890}; 2891#endif 2892int 2893sys_fchmod(struct thread *td, struct fchmod_args *uap) 2894{ 2895 struct file *fp; 2896 cap_rights_t rights; 2897 int error; 2898 2899 AUDIT_ARG_FD(uap->fd); 2900 AUDIT_ARG_MODE(uap->mode); 2901 2902 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2903 if (error != 0) 2904 return (error); 2905 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2906 fdrop(fp, td); 2907 return (error); 2908} 2909 2910/* 2911 * Common implementation for chown(), lchown(), and fchown() 2912 */ 2913int 2914setfown(td, cred, vp, uid, gid) 2915 struct thread *td; 2916 struct ucred *cred; 2917 struct vnode *vp; 2918 uid_t uid; 2919 gid_t gid; 2920{ 2921 struct mount *mp; 2922 struct vattr vattr; 2923 int error; 2924 2925 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2926 return (error); 2927 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2928 VATTR_NULL(&vattr); 2929 vattr.va_uid = uid; 2930 vattr.va_gid = gid; 2931#ifdef MAC 2932 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2933 vattr.va_gid); 2934 if (error == 0) 2935#endif 2936 error = VOP_SETATTR(vp, &vattr, cred); 2937 VOP_UNLOCK(vp, 0); 2938 vn_finished_write(mp); 2939 return (error); 2940} 2941 2942/* 2943 * Set ownership given a path name. 2944 */ 2945#ifndef _SYS_SYSPROTO_H_ 2946struct chown_args { 2947 char *path; 2948 int uid; 2949 int gid; 2950}; 2951#endif 2952int 2953sys_chown(td, uap) 2954 struct thread *td; 2955 register struct chown_args /* { 2956 char *path; 2957 int uid; 2958 int gid; 2959 } */ *uap; 2960{ 2961 2962 return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 2963} 2964 2965#ifndef _SYS_SYSPROTO_H_ 2966struct fchownat_args { 2967 int fd; 2968 const char * path; 2969 uid_t uid; 2970 gid_t gid; 2971 int flag; 2972}; 2973#endif 2974int 2975sys_fchownat(struct thread *td, struct fchownat_args *uap) 2976{ 2977 int flag; 2978 2979 flag = uap->flag; 2980 if (flag & ~AT_SYMLINK_NOFOLLOW) 2981 return (EINVAL); 2982 2983 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2984 uap->gid, uap->flag)); 2985} 2986 2987int 2988kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 2989 int gid) 2990{ 2991 2992 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0)); 2993} 2994 2995int 2996kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2997 int uid, int gid, int flag) 2998{ 2999 struct nameidata nd; 3000 cap_rights_t rights; 3001 int error, follow; 3002 3003 AUDIT_ARG_OWNER(uid, gid); 3004 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3005 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 3006 cap_rights_init(&rights, CAP_FCHOWN), td); 3007 3008 if ((error = namei(&nd)) != 0) 3009 return (error); 3010 NDFREE(&nd, NDF_ONLY_PNBUF); 3011 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 3012 vrele(nd.ni_vp); 3013 return (error); 3014} 3015 3016/* 3017 * Set ownership given a path name, do not cross symlinks. 3018 */ 3019#ifndef _SYS_SYSPROTO_H_ 3020struct lchown_args { 3021 char *path; 3022 int uid; 3023 int gid; 3024}; 3025#endif 3026int 3027sys_lchown(td, uap) 3028 struct thread *td; 3029 register struct lchown_args /* { 3030 char *path; 3031 int uid; 3032 int gid; 3033 } */ *uap; 3034{ 3035 3036 return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 3037} 3038 3039int 3040kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 3041 int gid) 3042{ 3043 3044 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 3045 AT_SYMLINK_NOFOLLOW)); 3046} 3047 3048/* 3049 * Set ownership given a file descriptor. 3050 */ 3051#ifndef _SYS_SYSPROTO_H_ 3052struct fchown_args { 3053 int fd; 3054 int uid; 3055 int gid; 3056}; 3057#endif 3058int 3059sys_fchown(td, uap) 3060 struct thread *td; 3061 register struct fchown_args /* { 3062 int fd; 3063 int uid; 3064 int gid; 3065 } */ *uap; 3066{ 3067 struct file *fp; 3068 cap_rights_t rights; 3069 int error; 3070 3071 AUDIT_ARG_FD(uap->fd); 3072 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3073 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 3074 if (error != 0) 3075 return (error); 3076 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3077 fdrop(fp, td); 3078 return (error); 3079} 3080 3081/* 3082 * Common implementation code for utimes(), lutimes(), and futimes(). 3083 */ 3084static int 3085getutimes(usrtvp, tvpseg, tsp) 3086 const struct timeval *usrtvp; 3087 enum uio_seg tvpseg; 3088 struct timespec *tsp; 3089{ 3090 struct timeval tv[2]; 3091 const struct timeval *tvp; 3092 int error; 3093 3094 if (usrtvp == NULL) { 3095 vfs_timestamp(&tsp[0]); 3096 tsp[1] = tsp[0]; 3097 } else { 3098 if (tvpseg == UIO_SYSSPACE) { 3099 tvp = usrtvp; 3100 } else { 3101 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3102 return (error); 3103 tvp = tv; 3104 } 3105 3106 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3107 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3108 return (EINVAL); 3109 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3110 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3111 } 3112 return (0); 3113} 3114 3115/* 3116 * Common implementation code for futimens(), utimensat(). 3117 */ 3118#define UTIMENS_NULL 0x1 3119#define UTIMENS_EXIT 0x2 3120static int 3121getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 3122 struct timespec *tsp, int *retflags) 3123{ 3124 struct timespec tsnow; 3125 int error; 3126 3127 vfs_timestamp(&tsnow); 3128 *retflags = 0; 3129 if (usrtsp == NULL) { 3130 tsp[0] = tsnow; 3131 tsp[1] = tsnow; 3132 *retflags |= UTIMENS_NULL; 3133 return (0); 3134 } 3135 if (tspseg == UIO_SYSSPACE) { 3136 tsp[0] = usrtsp[0]; 3137 tsp[1] = usrtsp[1]; 3138 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 3139 return (error); 3140 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 3141 *retflags |= UTIMENS_EXIT; 3142 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 3143 *retflags |= UTIMENS_NULL; 3144 if (tsp[0].tv_nsec == UTIME_OMIT) 3145 tsp[0].tv_sec = VNOVAL; 3146 else if (tsp[0].tv_nsec == UTIME_NOW) 3147 tsp[0] = tsnow; 3148 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 3149 return (EINVAL); 3150 if (tsp[1].tv_nsec == UTIME_OMIT) 3151 tsp[1].tv_sec = VNOVAL; 3152 else if (tsp[1].tv_nsec == UTIME_NOW) 3153 tsp[1] = tsnow; 3154 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 3155 return (EINVAL); 3156 3157 return (0); 3158} 3159 3160/* 3161 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3162 * and utimensat(). 3163 */ 3164static int 3165setutimes(td, vp, ts, numtimes, nullflag) 3166 struct thread *td; 3167 struct vnode *vp; 3168 const struct timespec *ts; 3169 int numtimes; 3170 int nullflag; 3171{ 3172 struct mount *mp; 3173 struct vattr vattr; 3174 int error, setbirthtime; 3175 3176 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3177 return (error); 3178 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3179 setbirthtime = 0; 3180 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3181 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3182 setbirthtime = 1; 3183 VATTR_NULL(&vattr); 3184 vattr.va_atime = ts[0]; 3185 vattr.va_mtime = ts[1]; 3186 if (setbirthtime) 3187 vattr.va_birthtime = ts[1]; 3188 if (numtimes > 2) 3189 vattr.va_birthtime = ts[2]; 3190 if (nullflag) 3191 vattr.va_vaflags |= VA_UTIMES_NULL; 3192#ifdef MAC 3193 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3194 vattr.va_mtime); 3195#endif 3196 if (error == 0) 3197 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3198 VOP_UNLOCK(vp, 0); 3199 vn_finished_write(mp); 3200 return (error); 3201} 3202 3203/* 3204 * Set the access and modification times of a file. 3205 */ 3206#ifndef _SYS_SYSPROTO_H_ 3207struct utimes_args { 3208 char *path; 3209 struct timeval *tptr; 3210}; 3211#endif 3212int 3213sys_utimes(td, uap) 3214 struct thread *td; 3215 register struct utimes_args /* { 3216 char *path; 3217 struct timeval *tptr; 3218 } */ *uap; 3219{ 3220 3221 return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3222 UIO_USERSPACE)); 3223} 3224 3225#ifndef _SYS_SYSPROTO_H_ 3226struct futimesat_args { 3227 int fd; 3228 const char * path; 3229 const struct timeval * times; 3230}; 3231#endif 3232int 3233sys_futimesat(struct thread *td, struct futimesat_args *uap) 3234{ 3235 3236 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3237 uap->times, UIO_USERSPACE)); 3238} 3239 3240int 3241kern_utimes(struct thread *td, char *path, enum uio_seg pathseg, 3242 struct timeval *tptr, enum uio_seg tptrseg) 3243{ 3244 3245 return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg)); 3246} 3247 3248int 3249kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3250 struct timeval *tptr, enum uio_seg tptrseg) 3251{ 3252 struct nameidata nd; 3253 struct timespec ts[2]; 3254 cap_rights_t rights; 3255 int error; 3256 3257 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3258 return (error); 3259 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3260 cap_rights_init(&rights, CAP_FUTIMES), td); 3261 3262 if ((error = namei(&nd)) != 0) 3263 return (error); 3264 NDFREE(&nd, NDF_ONLY_PNBUF); 3265 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3266 vrele(nd.ni_vp); 3267 return (error); 3268} 3269 3270/* 3271 * Set the access and modification times of a file. 3272 */ 3273#ifndef _SYS_SYSPROTO_H_ 3274struct lutimes_args { 3275 char *path; 3276 struct timeval *tptr; 3277}; 3278#endif 3279int 3280sys_lutimes(td, uap) 3281 struct thread *td; 3282 register struct lutimes_args /* { 3283 char *path; 3284 struct timeval *tptr; 3285 } */ *uap; 3286{ 3287 3288 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3289 UIO_USERSPACE)); 3290} 3291 3292int 3293kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3294 struct timeval *tptr, enum uio_seg tptrseg) 3295{ 3296 struct timespec ts[2]; 3297 struct nameidata nd; 3298 int error; 3299 3300 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3301 return (error); 3302 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3303 if ((error = namei(&nd)) != 0) 3304 return (error); 3305 NDFREE(&nd, NDF_ONLY_PNBUF); 3306 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3307 vrele(nd.ni_vp); 3308 return (error); 3309} 3310 3311/* 3312 * Set the access and modification times of a file. 3313 */ 3314#ifndef _SYS_SYSPROTO_H_ 3315struct futimes_args { 3316 int fd; 3317 struct timeval *tptr; 3318}; 3319#endif 3320int 3321sys_futimes(td, uap) 3322 struct thread *td; 3323 register struct futimes_args /* { 3324 int fd; 3325 struct timeval *tptr; 3326 } */ *uap; 3327{ 3328 3329 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3330} 3331 3332int 3333kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3334 enum uio_seg tptrseg) 3335{ 3336 struct timespec ts[2]; 3337 struct file *fp; 3338 cap_rights_t rights; 3339 int error; 3340 3341 AUDIT_ARG_FD(fd); 3342 error = getutimes(tptr, tptrseg, ts); 3343 if (error != 0) 3344 return (error); 3345 error = getvnode(td->td_proc->p_fd, fd, 3346 cap_rights_init(&rights, CAP_FUTIMES), &fp); 3347 if (error != 0) 3348 return (error); 3349#ifdef AUDIT 3350 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3351 AUDIT_ARG_VNODE1(fp->f_vnode); 3352 VOP_UNLOCK(fp->f_vnode, 0); 3353#endif 3354 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3355 fdrop(fp, td); 3356 return (error); 3357} 3358 3359int 3360sys_futimens(struct thread *td, struct futimens_args *uap) 3361{ 3362 3363 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3364} 3365 3366int 3367kern_futimens(struct thread *td, int fd, struct timespec *tptr, 3368 enum uio_seg tptrseg) 3369{ 3370 struct timespec ts[2]; 3371 struct file *fp; 3372 cap_rights_t rights; 3373 int error, flags; 3374 3375 AUDIT_ARG_FD(fd); 3376 error = getutimens(tptr, tptrseg, ts, &flags); 3377 if (error != 0) 3378 return (error); 3379 if (flags & UTIMENS_EXIT) 3380 return (0); 3381 error = getvnode(td->td_proc->p_fd, fd, 3382 cap_rights_init(&rights, CAP_FUTIMES), &fp); 3383 if (error != 0) 3384 return (error); 3385#ifdef AUDIT 3386 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3387 AUDIT_ARG_VNODE1(fp->f_vnode); 3388 VOP_UNLOCK(fp->f_vnode, 0); 3389#endif 3390 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3391 fdrop(fp, td); 3392 return (error); 3393} 3394 3395int 3396sys_utimensat(struct thread *td, struct utimensat_args *uap) 3397{ 3398 3399 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3400 uap->times, UIO_USERSPACE, uap->flag)); 3401} 3402 3403int 3404kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3405 struct timespec *tptr, enum uio_seg tptrseg, int flag) 3406{ 3407 struct nameidata nd; 3408 struct timespec ts[2]; 3409 cap_rights_t rights; 3410 int error, flags; 3411 3412 if (flag & ~AT_SYMLINK_NOFOLLOW) 3413 return (EINVAL); 3414 3415 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3416 return (error); 3417 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 3418 FOLLOW) | AUDITVNODE1, pathseg, path, fd, 3419 cap_rights_init(&rights, CAP_FUTIMES), td); 3420 if ((error = namei(&nd)) != 0) 3421 return (error); 3422 /* 3423 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3424 * POSIX states: 3425 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3426 * "Search permission is denied by a component of the path prefix." 3427 */ 3428 NDFREE(&nd, NDF_ONLY_PNBUF); 3429 if ((flags & UTIMENS_EXIT) == 0) 3430 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3431 vrele(nd.ni_vp); 3432 return (error); 3433} 3434 3435/* 3436 * Truncate a file given its path name. 3437 */ 3438#ifndef _SYS_SYSPROTO_H_ 3439struct truncate_args { 3440 char *path; 3441 int pad; 3442 off_t length; 3443}; 3444#endif 3445int 3446sys_truncate(td, uap) 3447 struct thread *td; 3448 register struct truncate_args /* { 3449 char *path; 3450 int pad; 3451 off_t length; 3452 } */ *uap; 3453{ 3454 3455 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3456} 3457 3458int 3459kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3460{ 3461 struct mount *mp; 3462 struct vnode *vp; 3463 void *rl_cookie; 3464 struct vattr vattr; 3465 struct nameidata nd; 3466 int error; 3467 3468 if (length < 0) 3469 return(EINVAL); 3470 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3471 if ((error = namei(&nd)) != 0) 3472 return (error); 3473 vp = nd.ni_vp; 3474 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3475 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3476 vn_rangelock_unlock(vp, rl_cookie); 3477 vrele(vp); 3478 return (error); 3479 } 3480 NDFREE(&nd, NDF_ONLY_PNBUF); 3481 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3482 if (vp->v_type == VDIR) 3483 error = EISDIR; 3484#ifdef MAC 3485 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3486 } 3487#endif 3488 else if ((error = vn_writechk(vp)) == 0 && 3489 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3490 VATTR_NULL(&vattr); 3491 vattr.va_size = length; 3492 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3493 } 3494 VOP_UNLOCK(vp, 0); 3495 vn_finished_write(mp); 3496 vn_rangelock_unlock(vp, rl_cookie); 3497 vrele(vp); 3498 return (error); 3499} 3500 3501#if defined(COMPAT_43) 3502/* 3503 * Truncate a file given its path name. 3504 */ 3505#ifndef _SYS_SYSPROTO_H_ 3506struct otruncate_args { 3507 char *path; 3508 long length; 3509}; 3510#endif 3511int 3512otruncate(td, uap) 3513 struct thread *td; 3514 register struct otruncate_args /* { 3515 char *path; 3516 long length; 3517 } */ *uap; 3518{ 3519 struct truncate_args /* { 3520 char *path; 3521 int pad; 3522 off_t length; 3523 } */ nuap; 3524 3525 nuap.path = uap->path; 3526 nuap.length = uap->length; 3527 return (sys_truncate(td, &nuap)); 3528} 3529#endif /* COMPAT_43 */ 3530 3531/* Versions with the pad argument */ 3532int 3533freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3534{ 3535 struct truncate_args ouap; 3536 3537 ouap.path = uap->path; 3538 ouap.length = uap->length; 3539 return (sys_truncate(td, &ouap)); 3540} 3541 3542int 3543freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3544{ 3545 struct ftruncate_args ouap; 3546 3547 ouap.fd = uap->fd; 3548 ouap.length = uap->length; 3549 return (sys_ftruncate(td, &ouap)); 3550} 3551 3552/* 3553 * Sync an open file. 3554 */ 3555#ifndef _SYS_SYSPROTO_H_ 3556struct fsync_args { 3557 int fd; 3558}; 3559#endif 3560int 3561sys_fsync(td, uap) 3562 struct thread *td; 3563 struct fsync_args /* { 3564 int fd; 3565 } */ *uap; 3566{ 3567 struct vnode *vp; 3568 struct mount *mp; 3569 struct file *fp; 3570 cap_rights_t rights; 3571 int error, lock_flags; 3572 3573 AUDIT_ARG_FD(uap->fd); 3574 error = getvnode(td->td_proc->p_fd, uap->fd, 3575 cap_rights_init(&rights, CAP_FSYNC), &fp); 3576 if (error != 0) 3577 return (error); 3578 vp = fp->f_vnode; 3579 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3580 if (error != 0) 3581 goto drop; 3582 if (MNT_SHARED_WRITES(mp) || 3583 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3584 lock_flags = LK_SHARED; 3585 } else { 3586 lock_flags = LK_EXCLUSIVE; 3587 } 3588 vn_lock(vp, lock_flags | LK_RETRY); 3589 AUDIT_ARG_VNODE1(vp); 3590 if (vp->v_object != NULL) { 3591 VM_OBJECT_WLOCK(vp->v_object); 3592 vm_object_page_clean(vp->v_object, 0, 0, 0); 3593 VM_OBJECT_WUNLOCK(vp->v_object); 3594 } 3595 error = VOP_FSYNC(vp, MNT_WAIT, td); 3596 3597 VOP_UNLOCK(vp, 0); 3598 vn_finished_write(mp); 3599drop: 3600 fdrop(fp, td); 3601 return (error); 3602} 3603 3604/* 3605 * Rename files. Source and destination must either both be directories, or 3606 * both not be directories. If target is a directory, it must be empty. 3607 */ 3608#ifndef _SYS_SYSPROTO_H_ 3609struct rename_args { 3610 char *from; 3611 char *to; 3612}; 3613#endif 3614int 3615sys_rename(td, uap) 3616 struct thread *td; 3617 register struct rename_args /* { 3618 char *from; 3619 char *to; 3620 } */ *uap; 3621{ 3622 3623 return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE)); 3624} 3625 3626#ifndef _SYS_SYSPROTO_H_ 3627struct renameat_args { 3628 int oldfd; 3629 char *old; 3630 int newfd; 3631 char *new; 3632}; 3633#endif 3634int 3635sys_renameat(struct thread *td, struct renameat_args *uap) 3636{ 3637 3638 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3639 UIO_USERSPACE)); 3640} 3641 3642int 3643kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg) 3644{ 3645 3646 return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg)); 3647} 3648 3649int 3650kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3651 enum uio_seg pathseg) 3652{ 3653 struct mount *mp = NULL; 3654 struct vnode *tvp, *fvp, *tdvp; 3655 struct nameidata fromnd, tond; 3656 cap_rights_t rights; 3657 int error; 3658 3659again: 3660 bwillwrite(); 3661#ifdef MAC 3662 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3663 AUDITVNODE1, pathseg, old, oldfd, 3664 cap_rights_init(&rights, CAP_RENAMEAT), td); 3665#else 3666 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3667 pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT), td); 3668#endif 3669 3670 if ((error = namei(&fromnd)) != 0) 3671 return (error); 3672#ifdef MAC 3673 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3674 fromnd.ni_vp, &fromnd.ni_cnd); 3675 VOP_UNLOCK(fromnd.ni_dvp, 0); 3676 if (fromnd.ni_dvp != fromnd.ni_vp) 3677 VOP_UNLOCK(fromnd.ni_vp, 0); 3678#endif 3679 fvp = fromnd.ni_vp; 3680 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3681 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3682 cap_rights_init(&rights, CAP_LINKAT), td); 3683 if (fromnd.ni_vp->v_type == VDIR) 3684 tond.ni_cnd.cn_flags |= WILLBEDIR; 3685 if ((error = namei(&tond)) != 0) { 3686 /* Translate error code for rename("dir1", "dir2/."). */ 3687 if (error == EISDIR && fvp->v_type == VDIR) 3688 error = EINVAL; 3689 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3690 vrele(fromnd.ni_dvp); 3691 vrele(fvp); 3692 goto out1; 3693 } 3694 tdvp = tond.ni_dvp; 3695 tvp = tond.ni_vp; 3696 error = vn_start_write(fvp, &mp, V_NOWAIT); 3697 if (error != 0) { 3698 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3699 NDFREE(&tond, NDF_ONLY_PNBUF); 3700 if (tvp != NULL) 3701 vput(tvp); 3702 if (tdvp == tvp) 3703 vrele(tdvp); 3704 else 3705 vput(tdvp); 3706 vrele(fromnd.ni_dvp); 3707 vrele(fvp); 3708 vrele(tond.ni_startdir); 3709 if (fromnd.ni_startdir != NULL) 3710 vrele(fromnd.ni_startdir); 3711 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3712 if (error != 0) 3713 return (error); 3714 goto again; 3715 } 3716 if (tvp != NULL) { 3717 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3718 error = ENOTDIR; 3719 goto out; 3720 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3721 error = EISDIR; 3722 goto out; 3723 } 3724#ifdef CAPABILITIES 3725 if (newfd != AT_FDCWD) { 3726 /* 3727 * If the target already exists we require CAP_UNLINKAT 3728 * from 'newfd'. 3729 */ 3730 error = cap_check(&tond.ni_filecaps.fc_rights, 3731 cap_rights_init(&rights, CAP_UNLINKAT)); 3732 if (error != 0) 3733 goto out; 3734 } 3735#endif 3736 } 3737 if (fvp == tdvp) { 3738 error = EINVAL; 3739 goto out; 3740 } 3741 /* 3742 * If the source is the same as the destination (that is, if they 3743 * are links to the same vnode), then there is nothing to do. 3744 */ 3745 if (fvp == tvp) 3746 error = -1; 3747#ifdef MAC 3748 else 3749 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3750 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3751#endif 3752out: 3753 if (error == 0) { 3754 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3755 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3756 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3757 NDFREE(&tond, NDF_ONLY_PNBUF); 3758 } else { 3759 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3760 NDFREE(&tond, NDF_ONLY_PNBUF); 3761 if (tvp != NULL) 3762 vput(tvp); 3763 if (tdvp == tvp) 3764 vrele(tdvp); 3765 else 3766 vput(tdvp); 3767 vrele(fromnd.ni_dvp); 3768 vrele(fvp); 3769 } 3770 vrele(tond.ni_startdir); 3771 vn_finished_write(mp); 3772out1: 3773 if (fromnd.ni_startdir) 3774 vrele(fromnd.ni_startdir); 3775 if (error == -1) 3776 return (0); 3777 return (error); 3778} 3779 3780/* 3781 * Make a directory file. 3782 */ 3783#ifndef _SYS_SYSPROTO_H_ 3784struct mkdir_args { 3785 char *path; 3786 int mode; 3787}; 3788#endif 3789int 3790sys_mkdir(td, uap) 3791 struct thread *td; 3792 register struct mkdir_args /* { 3793 char *path; 3794 int mode; 3795 } */ *uap; 3796{ 3797 3798 return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode)); 3799} 3800 3801#ifndef _SYS_SYSPROTO_H_ 3802struct mkdirat_args { 3803 int fd; 3804 char *path; 3805 mode_t mode; 3806}; 3807#endif 3808int 3809sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3810{ 3811 3812 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3813} 3814 3815int 3816kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode) 3817{ 3818 3819 return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode)); 3820} 3821 3822int 3823kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3824 int mode) 3825{ 3826 struct mount *mp; 3827 struct vnode *vp; 3828 struct vattr vattr; 3829 struct nameidata nd; 3830 cap_rights_t rights; 3831 int error; 3832 3833 AUDIT_ARG_MODE(mode); 3834restart: 3835 bwillwrite(); 3836 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3837 NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), 3838 td); 3839 nd.ni_cnd.cn_flags |= WILLBEDIR; 3840 if ((error = namei(&nd)) != 0) 3841 return (error); 3842 vp = nd.ni_vp; 3843 if (vp != NULL) { 3844 NDFREE(&nd, NDF_ONLY_PNBUF); 3845 /* 3846 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3847 * the strange behaviour of leaving the vnode unlocked 3848 * if the target is the same vnode as the parent. 3849 */ 3850 if (vp == nd.ni_dvp) 3851 vrele(nd.ni_dvp); 3852 else 3853 vput(nd.ni_dvp); 3854 vrele(vp); 3855 return (EEXIST); 3856 } 3857 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3858 NDFREE(&nd, NDF_ONLY_PNBUF); 3859 vput(nd.ni_dvp); 3860 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3861 return (error); 3862 goto restart; 3863 } 3864 VATTR_NULL(&vattr); 3865 vattr.va_type = VDIR; 3866 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3867#ifdef MAC 3868 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3869 &vattr); 3870 if (error != 0) 3871 goto out; 3872#endif 3873 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3874#ifdef MAC 3875out: 3876#endif 3877 NDFREE(&nd, NDF_ONLY_PNBUF); 3878 vput(nd.ni_dvp); 3879 if (error == 0) 3880 vput(nd.ni_vp); 3881 vn_finished_write(mp); 3882 return (error); 3883} 3884 3885/* 3886 * Remove a directory file. 3887 */ 3888#ifndef _SYS_SYSPROTO_H_ 3889struct rmdir_args { 3890 char *path; 3891}; 3892#endif 3893int 3894sys_rmdir(td, uap) 3895 struct thread *td; 3896 struct rmdir_args /* { 3897 char *path; 3898 } */ *uap; 3899{ 3900 3901 return (kern_rmdir(td, uap->path, UIO_USERSPACE)); 3902} 3903 3904int 3905kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg) 3906{ 3907 3908 return (kern_rmdirat(td, AT_FDCWD, path, pathseg)); 3909} 3910 3911int 3912kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3913{ 3914 struct mount *mp; 3915 struct vnode *vp; 3916 struct nameidata nd; 3917 cap_rights_t rights; 3918 int error; 3919 3920restart: 3921 bwillwrite(); 3922 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3923 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3924 if ((error = namei(&nd)) != 0) 3925 return (error); 3926 vp = nd.ni_vp; 3927 if (vp->v_type != VDIR) { 3928 error = ENOTDIR; 3929 goto out; 3930 } 3931 /* 3932 * No rmdir "." please. 3933 */ 3934 if (nd.ni_dvp == vp) { 3935 error = EINVAL; 3936 goto out; 3937 } 3938 /* 3939 * The root of a mounted filesystem cannot be deleted. 3940 */ 3941 if (vp->v_vflag & VV_ROOT) { 3942 error = EBUSY; 3943 goto out; 3944 } 3945#ifdef MAC 3946 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3947 &nd.ni_cnd); 3948 if (error != 0) 3949 goto out; 3950#endif 3951 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3952 NDFREE(&nd, NDF_ONLY_PNBUF); 3953 vput(vp); 3954 if (nd.ni_dvp == vp) 3955 vrele(nd.ni_dvp); 3956 else 3957 vput(nd.ni_dvp); 3958 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3959 return (error); 3960 goto restart; 3961 } 3962 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3963 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3964 vn_finished_write(mp); 3965out: 3966 NDFREE(&nd, NDF_ONLY_PNBUF); 3967 vput(vp); 3968 if (nd.ni_dvp == vp) 3969 vrele(nd.ni_dvp); 3970 else 3971 vput(nd.ni_dvp); 3972 return (error); 3973} 3974 3975#ifdef COMPAT_43 3976/* 3977 * Read a block of directory entries in a filesystem independent format. 3978 */ 3979#ifndef _SYS_SYSPROTO_H_ 3980struct ogetdirentries_args { 3981 int fd; 3982 char *buf; 3983 u_int count; 3984 long *basep; 3985}; 3986#endif 3987int 3988ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3989{ 3990 long loff; 3991 int error; 3992 3993 error = kern_ogetdirentries(td, uap, &loff); 3994 if (error == 0) 3995 error = copyout(&loff, uap->basep, sizeof(long)); 3996 return (error); 3997} 3998 3999int 4000kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 4001 long *ploff) 4002{ 4003 struct vnode *vp; 4004 struct file *fp; 4005 struct uio auio, kuio; 4006 struct iovec aiov, kiov; 4007 struct dirent *dp, *edp; 4008 cap_rights_t rights; 4009 caddr_t dirbuf; 4010 int error, eofflag, readcnt; 4011 long loff; 4012 off_t foffset; 4013 4014 /* XXX arbitrary sanity limit on `count'. */ 4015 if (uap->count > 64 * 1024) 4016 return (EINVAL); 4017 error = getvnode(td->td_proc->p_fd, uap->fd, 4018 cap_rights_init(&rights, CAP_READ), &fp); 4019 if (error != 0) 4020 return (error); 4021 if ((fp->f_flag & FREAD) == 0) { 4022 fdrop(fp, td); 4023 return (EBADF); 4024 } 4025 vp = fp->f_vnode; 4026 foffset = foffset_lock(fp, 0); 4027unionread: 4028 if (vp->v_type != VDIR) { 4029 foffset_unlock(fp, foffset, 0); 4030 fdrop(fp, td); 4031 return (EINVAL); 4032 } 4033 aiov.iov_base = uap->buf; 4034 aiov.iov_len = uap->count; 4035 auio.uio_iov = &aiov; 4036 auio.uio_iovcnt = 1; 4037 auio.uio_rw = UIO_READ; 4038 auio.uio_segflg = UIO_USERSPACE; 4039 auio.uio_td = td; 4040 auio.uio_resid = uap->count; 4041 vn_lock(vp, LK_SHARED | LK_RETRY); 4042 loff = auio.uio_offset = foffset; 4043#ifdef MAC 4044 error = mac_vnode_check_readdir(td->td_ucred, vp); 4045 if (error != 0) { 4046 VOP_UNLOCK(vp, 0); 4047 foffset_unlock(fp, foffset, FOF_NOUPDATE); 4048 fdrop(fp, td); 4049 return (error); 4050 } 4051#endif 4052# if (BYTE_ORDER != LITTLE_ENDIAN) 4053 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 4054 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 4055 NULL, NULL); 4056 foffset = auio.uio_offset; 4057 } else 4058# endif 4059 { 4060 kuio = auio; 4061 kuio.uio_iov = &kiov; 4062 kuio.uio_segflg = UIO_SYSSPACE; 4063 kiov.iov_len = uap->count; 4064 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 4065 kiov.iov_base = dirbuf; 4066 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 4067 NULL, NULL); 4068 foffset = kuio.uio_offset; 4069 if (error == 0) { 4070 readcnt = uap->count - kuio.uio_resid; 4071 edp = (struct dirent *)&dirbuf[readcnt]; 4072 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 4073# if (BYTE_ORDER == LITTLE_ENDIAN) 4074 /* 4075 * The expected low byte of 4076 * dp->d_namlen is our dp->d_type. 4077 * The high MBZ byte of dp->d_namlen 4078 * is our dp->d_namlen. 4079 */ 4080 dp->d_type = dp->d_namlen; 4081 dp->d_namlen = 0; 4082# else 4083 /* 4084 * The dp->d_type is the high byte 4085 * of the expected dp->d_namlen, 4086 * so must be zero'ed. 4087 */ 4088 dp->d_type = 0; 4089# endif 4090 if (dp->d_reclen > 0) { 4091 dp = (struct dirent *) 4092 ((char *)dp + dp->d_reclen); 4093 } else { 4094 error = EIO; 4095 break; 4096 } 4097 } 4098 if (dp >= edp) 4099 error = uiomove(dirbuf, readcnt, &auio); 4100 } 4101 free(dirbuf, M_TEMP); 4102 } 4103 if (error != 0) { 4104 VOP_UNLOCK(vp, 0); 4105 foffset_unlock(fp, foffset, 0); 4106 fdrop(fp, td); 4107 return (error); 4108 } 4109 if (uap->count == auio.uio_resid && 4110 (vp->v_vflag & VV_ROOT) && 4111 (vp->v_mount->mnt_flag & MNT_UNION)) { 4112 struct vnode *tvp = vp; 4113 vp = vp->v_mount->mnt_vnodecovered; 4114 VREF(vp); 4115 fp->f_vnode = vp; 4116 fp->f_data = vp; 4117 foffset = 0; 4118 vput(tvp); 4119 goto unionread; 4120 } 4121 VOP_UNLOCK(vp, 0); 4122 foffset_unlock(fp, foffset, 0); 4123 fdrop(fp, td); 4124 td->td_retval[0] = uap->count - auio.uio_resid; 4125 if (error == 0) 4126 *ploff = loff; 4127 return (error); 4128} 4129#endif /* COMPAT_43 */ 4130 4131/* 4132 * Read a block of directory entries in a filesystem independent format. 4133 */ 4134#ifndef _SYS_SYSPROTO_H_ 4135struct getdirentries_args { 4136 int fd; 4137 char *buf; 4138 u_int count; 4139 long *basep; 4140}; 4141#endif 4142int 4143sys_getdirentries(td, uap) 4144 struct thread *td; 4145 register struct getdirentries_args /* { 4146 int fd; 4147 char *buf; 4148 u_int count; 4149 long *basep; 4150 } */ *uap; 4151{ 4152 long base; 4153 int error; 4154 4155 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4156 NULL, UIO_USERSPACE); 4157 if (error != 0) 4158 return (error); 4159 if (uap->basep != NULL) 4160 error = copyout(&base, uap->basep, sizeof(long)); 4161 return (error); 4162} 4163 4164int 4165kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 4166 long *basep, ssize_t *residp, enum uio_seg bufseg) 4167{ 4168 struct vnode *vp; 4169 struct file *fp; 4170 struct uio auio; 4171 struct iovec aiov; 4172 cap_rights_t rights; 4173 long loff; 4174 int error, eofflag; 4175 off_t foffset; 4176 4177 AUDIT_ARG_FD(fd); 4178 if (count > IOSIZE_MAX) 4179 return (EINVAL); 4180 auio.uio_resid = count; 4181 error = getvnode(td->td_proc->p_fd, fd, 4182 cap_rights_init(&rights, CAP_READ), &fp); 4183 if (error != 0) 4184 return (error); 4185 if ((fp->f_flag & FREAD) == 0) { 4186 fdrop(fp, td); 4187 return (EBADF); 4188 } 4189 vp = fp->f_vnode; 4190 foffset = foffset_lock(fp, 0); 4191unionread: 4192 if (vp->v_type != VDIR) { 4193 error = EINVAL; 4194 goto fail; 4195 } 4196 aiov.iov_base = buf; 4197 aiov.iov_len = count; 4198 auio.uio_iov = &aiov; 4199 auio.uio_iovcnt = 1; 4200 auio.uio_rw = UIO_READ; 4201 auio.uio_segflg = bufseg; 4202 auio.uio_td = td; 4203 vn_lock(vp, LK_SHARED | LK_RETRY); 4204 AUDIT_ARG_VNODE1(vp); 4205 loff = auio.uio_offset = foffset; 4206#ifdef MAC 4207 error = mac_vnode_check_readdir(td->td_ucred, vp); 4208 if (error == 0) 4209#endif 4210 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4211 NULL); 4212 foffset = auio.uio_offset; 4213 if (error != 0) { 4214 VOP_UNLOCK(vp, 0); 4215 goto fail; 4216 } 4217 if (count == auio.uio_resid && 4218 (vp->v_vflag & VV_ROOT) && 4219 (vp->v_mount->mnt_flag & MNT_UNION)) { 4220 struct vnode *tvp = vp; 4221 4222 vp = vp->v_mount->mnt_vnodecovered; 4223 VREF(vp); 4224 fp->f_vnode = vp; 4225 fp->f_data = vp; 4226 foffset = 0; 4227 vput(tvp); 4228 goto unionread; 4229 } 4230 VOP_UNLOCK(vp, 0); 4231 *basep = loff; 4232 if (residp != NULL) 4233 *residp = auio.uio_resid; 4234 td->td_retval[0] = count - auio.uio_resid; 4235fail: 4236 foffset_unlock(fp, foffset, 0); 4237 fdrop(fp, td); 4238 return (error); 4239} 4240 4241#ifndef _SYS_SYSPROTO_H_ 4242struct getdents_args { 4243 int fd; 4244 char *buf; 4245 size_t count; 4246}; 4247#endif 4248int 4249sys_getdents(td, uap) 4250 struct thread *td; 4251 register struct getdents_args /* { 4252 int fd; 4253 char *buf; 4254 u_int count; 4255 } */ *uap; 4256{ 4257 struct getdirentries_args ap; 4258 4259 ap.fd = uap->fd; 4260 ap.buf = uap->buf; 4261 ap.count = uap->count; 4262 ap.basep = NULL; 4263 return (sys_getdirentries(td, &ap)); 4264} 4265 4266/* 4267 * Set the mode mask for creation of filesystem nodes. 4268 */ 4269#ifndef _SYS_SYSPROTO_H_ 4270struct umask_args { 4271 int newmask; 4272}; 4273#endif 4274int 4275sys_umask(td, uap) 4276 struct thread *td; 4277 struct umask_args /* { 4278 int newmask; 4279 } */ *uap; 4280{ 4281 register struct filedesc *fdp; 4282 4283 FILEDESC_XLOCK(td->td_proc->p_fd); 4284 fdp = td->td_proc->p_fd; 4285 td->td_retval[0] = fdp->fd_cmask; 4286 fdp->fd_cmask = uap->newmask & ALLPERMS; 4287 FILEDESC_XUNLOCK(td->td_proc->p_fd); 4288 return (0); 4289} 4290 4291/* 4292 * Void all references to file by ripping underlying filesystem away from 4293 * vnode. 4294 */ 4295#ifndef _SYS_SYSPROTO_H_ 4296struct revoke_args { 4297 char *path; 4298}; 4299#endif 4300int 4301sys_revoke(td, uap) 4302 struct thread *td; 4303 register struct revoke_args /* { 4304 char *path; 4305 } */ *uap; 4306{ 4307 struct vnode *vp; 4308 struct vattr vattr; 4309 struct nameidata nd; 4310 int error; 4311 4312 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4313 uap->path, td); 4314 if ((error = namei(&nd)) != 0) 4315 return (error); 4316 vp = nd.ni_vp; 4317 NDFREE(&nd, NDF_ONLY_PNBUF); 4318 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4319 error = EINVAL; 4320 goto out; 4321 } 4322#ifdef MAC 4323 error = mac_vnode_check_revoke(td->td_ucred, vp); 4324 if (error != 0) 4325 goto out; 4326#endif 4327 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4328 if (error != 0) 4329 goto out; 4330 if (td->td_ucred->cr_uid != vattr.va_uid) { 4331 error = priv_check(td, PRIV_VFS_ADMIN); 4332 if (error != 0) 4333 goto out; 4334 } 4335 if (vcount(vp) > 1) 4336 VOP_REVOKE(vp, REVOKEALL); 4337out: 4338 vput(vp); 4339 return (error); 4340} 4341 4342/* 4343 * Convert a user file descriptor to a kernel file entry and check that, if it 4344 * is a capability, the correct rights are present. A reference on the file 4345 * entry is held upon returning. 4346 */ 4347int 4348getvnode(struct filedesc *fdp, int fd, cap_rights_t *rightsp, struct file **fpp) 4349{ 4350 struct file *fp; 4351 int error; 4352 4353 error = fget_unlocked(fdp, fd, rightsp, 0, &fp, NULL); 4354 if (error != 0) 4355 return (error); 4356 4357 /* 4358 * The file could be not of the vnode type, or it may be not 4359 * yet fully initialized, in which case the f_vnode pointer 4360 * may be set, but f_ops is still badfileops. E.g., 4361 * devfs_open() transiently create such situation to 4362 * facilitate csw d_fdopen(). 4363 * 4364 * Dupfdopen() handling in kern_openat() installs the 4365 * half-baked file into the process descriptor table, allowing 4366 * other thread to dereference it. Guard against the race by 4367 * checking f_ops. 4368 */ 4369 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4370 fdrop(fp, curthread); 4371 return (EINVAL); 4372 } 4373 *fpp = fp; 4374 return (0); 4375} 4376 4377 4378/* 4379 * Get an (NFS) file handle. 4380 */ 4381#ifndef _SYS_SYSPROTO_H_ 4382struct lgetfh_args { 4383 char *fname; 4384 fhandle_t *fhp; 4385}; 4386#endif 4387int 4388sys_lgetfh(td, uap) 4389 struct thread *td; 4390 register struct lgetfh_args *uap; 4391{ 4392 struct nameidata nd; 4393 fhandle_t fh; 4394 register struct vnode *vp; 4395 int error; 4396 4397 error = priv_check(td, PRIV_VFS_GETFH); 4398 if (error != 0) 4399 return (error); 4400 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4401 uap->fname, td); 4402 error = namei(&nd); 4403 if (error != 0) 4404 return (error); 4405 NDFREE(&nd, NDF_ONLY_PNBUF); 4406 vp = nd.ni_vp; 4407 bzero(&fh, sizeof(fh)); 4408 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4409 error = VOP_VPTOFH(vp, &fh.fh_fid); 4410 vput(vp); 4411 if (error == 0) 4412 error = copyout(&fh, uap->fhp, sizeof (fh)); 4413 return (error); 4414} 4415 4416#ifndef _SYS_SYSPROTO_H_ 4417struct getfh_args { 4418 char *fname; 4419 fhandle_t *fhp; 4420}; 4421#endif 4422int 4423sys_getfh(td, uap) 4424 struct thread *td; 4425 register struct getfh_args *uap; 4426{ 4427 struct nameidata nd; 4428 fhandle_t fh; 4429 register struct vnode *vp; 4430 int error; 4431 4432 error = priv_check(td, PRIV_VFS_GETFH); 4433 if (error != 0) 4434 return (error); 4435 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4436 uap->fname, td); 4437 error = namei(&nd); 4438 if (error != 0) 4439 return (error); 4440 NDFREE(&nd, NDF_ONLY_PNBUF); 4441 vp = nd.ni_vp; 4442 bzero(&fh, sizeof(fh)); 4443 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4444 error = VOP_VPTOFH(vp, &fh.fh_fid); 4445 vput(vp); 4446 if (error == 0) 4447 error = copyout(&fh, uap->fhp, sizeof (fh)); 4448 return (error); 4449} 4450 4451/* 4452 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4453 * open descriptor. 4454 * 4455 * warning: do not remove the priv_check() call or this becomes one giant 4456 * security hole. 4457 */ 4458#ifndef _SYS_SYSPROTO_H_ 4459struct fhopen_args { 4460 const struct fhandle *u_fhp; 4461 int flags; 4462}; 4463#endif 4464int 4465sys_fhopen(td, uap) 4466 struct thread *td; 4467 struct fhopen_args /* { 4468 const struct fhandle *u_fhp; 4469 int flags; 4470 } */ *uap; 4471{ 4472 struct mount *mp; 4473 struct vnode *vp; 4474 struct fhandle fhp; 4475 struct file *fp; 4476 int fmode, error; 4477 int indx; 4478 4479 error = priv_check(td, PRIV_VFS_FHOPEN); 4480 if (error != 0) 4481 return (error); 4482 indx = -1; 4483 fmode = FFLAGS(uap->flags); 4484 /* why not allow a non-read/write open for our lockd? */ 4485 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4486 return (EINVAL); 4487 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4488 if (error != 0) 4489 return(error); 4490 /* find the mount point */ 4491 mp = vfs_busyfs(&fhp.fh_fsid); 4492 if (mp == NULL) 4493 return (ESTALE); 4494 /* now give me my vnode, it gets returned to me locked */ 4495 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4496 vfs_unbusy(mp); 4497 if (error != 0) 4498 return (error); 4499 4500 error = falloc_noinstall(td, &fp); 4501 if (error != 0) { 4502 vput(vp); 4503 return (error); 4504 } 4505 /* 4506 * An extra reference on `fp' has been held for us by 4507 * falloc_noinstall(). 4508 */ 4509 4510#ifdef INVARIANTS 4511 td->td_dupfd = -1; 4512#endif 4513 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4514 if (error != 0) { 4515 KASSERT(fp->f_ops == &badfileops, 4516 ("VOP_OPEN in fhopen() set f_ops")); 4517 KASSERT(td->td_dupfd < 0, 4518 ("fhopen() encountered fdopen()")); 4519 4520 vput(vp); 4521 goto bad; 4522 } 4523#ifdef INVARIANTS 4524 td->td_dupfd = 0; 4525#endif 4526 fp->f_vnode = vp; 4527 fp->f_seqcount = 1; 4528 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4529 &vnops); 4530 VOP_UNLOCK(vp, 0); 4531 if ((fmode & O_TRUNC) != 0) { 4532 error = fo_truncate(fp, 0, td->td_ucred, td); 4533 if (error != 0) 4534 goto bad; 4535 } 4536 4537 error = finstall(td, fp, &indx, fmode, NULL); 4538bad: 4539 fdrop(fp, td); 4540 td->td_retval[0] = indx; 4541 return (error); 4542} 4543 4544/* 4545 * Stat an (NFS) file handle. 4546 */ 4547#ifndef _SYS_SYSPROTO_H_ 4548struct fhstat_args { 4549 struct fhandle *u_fhp; 4550 struct stat *sb; 4551}; 4552#endif 4553int 4554sys_fhstat(td, uap) 4555 struct thread *td; 4556 register struct fhstat_args /* { 4557 struct fhandle *u_fhp; 4558 struct stat *sb; 4559 } */ *uap; 4560{ 4561 struct stat sb; 4562 struct fhandle fh; 4563 int error; 4564 4565 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4566 if (error != 0) 4567 return (error); 4568 error = kern_fhstat(td, fh, &sb); 4569 if (error == 0) 4570 error = copyout(&sb, uap->sb, sizeof(sb)); 4571 return (error); 4572} 4573 4574int 4575kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4576{ 4577 struct mount *mp; 4578 struct vnode *vp; 4579 int error; 4580 4581 error = priv_check(td, PRIV_VFS_FHSTAT); 4582 if (error != 0) 4583 return (error); 4584 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4585 return (ESTALE); 4586 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4587 vfs_unbusy(mp); 4588 if (error != 0) 4589 return (error); 4590 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4591 vput(vp); 4592 return (error); 4593} 4594 4595/* 4596 * Implement fstatfs() for (NFS) file handles. 4597 */ 4598#ifndef _SYS_SYSPROTO_H_ 4599struct fhstatfs_args { 4600 struct fhandle *u_fhp; 4601 struct statfs *buf; 4602}; 4603#endif 4604int 4605sys_fhstatfs(td, uap) 4606 struct thread *td; 4607 struct fhstatfs_args /* { 4608 struct fhandle *u_fhp; 4609 struct statfs *buf; 4610 } */ *uap; 4611{ 4612 struct statfs sf; 4613 fhandle_t fh; 4614 int error; 4615 4616 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4617 if (error != 0) 4618 return (error); 4619 error = kern_fhstatfs(td, fh, &sf); 4620 if (error != 0) 4621 return (error); 4622 return (copyout(&sf, uap->buf, sizeof(sf))); 4623} 4624 4625int 4626kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4627{ 4628 struct statfs *sp; 4629 struct mount *mp; 4630 struct vnode *vp; 4631 int error; 4632 4633 error = priv_check(td, PRIV_VFS_FHSTATFS); 4634 if (error != 0) 4635 return (error); 4636 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4637 return (ESTALE); 4638 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4639 if (error != 0) { 4640 vfs_unbusy(mp); 4641 return (error); 4642 } 4643 vput(vp); 4644 error = prison_canseemount(td->td_ucred, mp); 4645 if (error != 0) 4646 goto out; 4647#ifdef MAC 4648 error = mac_mount_check_stat(td->td_ucred, mp); 4649 if (error != 0) 4650 goto out; 4651#endif 4652 /* 4653 * Set these in case the underlying filesystem fails to do so. 4654 */ 4655 sp = &mp->mnt_stat; 4656 sp->f_version = STATFS_VERSION; 4657 sp->f_namemax = NAME_MAX; 4658 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4659 error = VFS_STATFS(mp, sp); 4660 if (error == 0) 4661 *buf = *sp; 4662out: 4663 vfs_unbusy(mp); 4664 return (error); 4665} 4666 4667int 4668kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4669{ 4670 struct file *fp; 4671 struct mount *mp; 4672 struct vnode *vp; 4673 cap_rights_t rights; 4674 off_t olen, ooffset; 4675 int error; 4676 4677 if (offset < 0 || len <= 0) 4678 return (EINVAL); 4679 /* Check for wrap. */ 4680 if (offset > OFF_MAX - len) 4681 return (EFBIG); 4682 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4683 if (error != 0) 4684 return (error); 4685 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4686 error = ESPIPE; 4687 goto out; 4688 } 4689 if ((fp->f_flag & FWRITE) == 0) { 4690 error = EBADF; 4691 goto out; 4692 } 4693 if (fp->f_type != DTYPE_VNODE) { 4694 error = ENODEV; 4695 goto out; 4696 } 4697 vp = fp->f_vnode; 4698 if (vp->v_type != VREG) { 4699 error = ENODEV; 4700 goto out; 4701 } 4702 4703 /* Allocating blocks may take a long time, so iterate. */ 4704 for (;;) { 4705 olen = len; 4706 ooffset = offset; 4707 4708 bwillwrite(); 4709 mp = NULL; 4710 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4711 if (error != 0) 4712 break; 4713 error = vn_lock(vp, LK_EXCLUSIVE); 4714 if (error != 0) { 4715 vn_finished_write(mp); 4716 break; 4717 } 4718#ifdef MAC 4719 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4720 if (error == 0) 4721#endif 4722 error = VOP_ALLOCATE(vp, &offset, &len); 4723 VOP_UNLOCK(vp, 0); 4724 vn_finished_write(mp); 4725 4726 if (olen + ooffset != offset + len) { 4727 panic("offset + len changed from %jx/%jx to %jx/%jx", 4728 ooffset, olen, offset, len); 4729 } 4730 if (error != 0 || len == 0) 4731 break; 4732 KASSERT(olen > len, ("Iteration did not make progress?")); 4733 maybe_yield(); 4734 } 4735 out: 4736 fdrop(fp, td); 4737 return (error); 4738} 4739 4740int 4741sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4742{ 4743 4744 td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset, 4745 uap->len); 4746 return (0); 4747} 4748 4749/* 4750 * Unlike madvise(2), we do not make a best effort to remember every 4751 * possible caching hint. Instead, we remember the last setting with 4752 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4753 * region of any current setting. 4754 */ 4755int 4756kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4757 int advice) 4758{ 4759 struct fadvise_info *fa, *new; 4760 struct file *fp; 4761 struct vnode *vp; 4762 cap_rights_t rights; 4763 off_t end; 4764 int error; 4765 4766 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4767 return (EINVAL); 4768 switch (advice) { 4769 case POSIX_FADV_SEQUENTIAL: 4770 case POSIX_FADV_RANDOM: 4771 case POSIX_FADV_NOREUSE: 4772 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4773 break; 4774 case POSIX_FADV_NORMAL: 4775 case POSIX_FADV_WILLNEED: 4776 case POSIX_FADV_DONTNEED: 4777 new = NULL; 4778 break; 4779 default: 4780 return (EINVAL); 4781 } 4782 /* XXX: CAP_POSIX_FADVISE? */ 4783 error = fget(td, fd, cap_rights_init(&rights), &fp); 4784 if (error != 0) 4785 goto out; 4786 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4787 error = ESPIPE; 4788 goto out; 4789 } 4790 if (fp->f_type != DTYPE_VNODE) { 4791 error = ENODEV; 4792 goto out; 4793 } 4794 vp = fp->f_vnode; 4795 if (vp->v_type != VREG) { 4796 error = ENODEV; 4797 goto out; 4798 } 4799 if (len == 0) 4800 end = OFF_MAX; 4801 else 4802 end = offset + len - 1; 4803 switch (advice) { 4804 case POSIX_FADV_SEQUENTIAL: 4805 case POSIX_FADV_RANDOM: 4806 case POSIX_FADV_NOREUSE: 4807 /* 4808 * Try to merge any existing non-standard region with 4809 * this new region if possible, otherwise create a new 4810 * non-standard region for this request. 4811 */ 4812 mtx_pool_lock(mtxpool_sleep, fp); 4813 fa = fp->f_advice; 4814 if (fa != NULL && fa->fa_advice == advice && 4815 ((fa->fa_start <= end && fa->fa_end >= offset) || 4816 (end != OFF_MAX && fa->fa_start == end + 1) || 4817 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4818 if (offset < fa->fa_start) 4819 fa->fa_start = offset; 4820 if (end > fa->fa_end) 4821 fa->fa_end = end; 4822 } else { 4823 new->fa_advice = advice; 4824 new->fa_start = offset; 4825 new->fa_end = end; 4826 new->fa_prevstart = 0; 4827 new->fa_prevend = 0; 4828 fp->f_advice = new; 4829 new = fa; 4830 } 4831 mtx_pool_unlock(mtxpool_sleep, fp); 4832 break; 4833 case POSIX_FADV_NORMAL: 4834 /* 4835 * If a the "normal" region overlaps with an existing 4836 * non-standard region, trim or remove the 4837 * non-standard region. 4838 */ 4839 mtx_pool_lock(mtxpool_sleep, fp); 4840 fa = fp->f_advice; 4841 if (fa != NULL) { 4842 if (offset <= fa->fa_start && end >= fa->fa_end) { 4843 new = fa; 4844 fp->f_advice = NULL; 4845 } else if (offset <= fa->fa_start && 4846 end >= fa->fa_start) 4847 fa->fa_start = end + 1; 4848 else if (offset <= fa->fa_end && end >= fa->fa_end) 4849 fa->fa_end = offset - 1; 4850 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4851 /* 4852 * If the "normal" region is a middle 4853 * portion of the existing 4854 * non-standard region, just remove 4855 * the whole thing rather than picking 4856 * one side or the other to 4857 * preserve. 4858 */ 4859 new = fa; 4860 fp->f_advice = NULL; 4861 } 4862 } 4863 mtx_pool_unlock(mtxpool_sleep, fp); 4864 break; 4865 case POSIX_FADV_WILLNEED: 4866 case POSIX_FADV_DONTNEED: 4867 error = VOP_ADVISE(vp, offset, end, advice); 4868 break; 4869 } 4870out: 4871 if (fp != NULL) 4872 fdrop(fp, td); 4873 free(new, M_FADVISE); 4874 return (error); 4875} 4876 4877int 4878sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4879{ 4880 4881 td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset, 4882 uap->len, uap->advice); 4883 return (0); 4884} 4885