vfs_syscalls.c revision 289798
1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37#include <sys/cdefs.h> 38__FBSDID("$FreeBSD: stable/10/sys/kern/vfs_syscalls.c 289798 2015-10-23 07:40:43Z avg $"); 39 40#include "opt_capsicum.h" 41#include "opt_compat.h" 42#include "opt_kdtrace.h" 43#include "opt_ktrace.h" 44 45#include <sys/param.h> 46#include <sys/systm.h> 47#include <sys/bio.h> 48#include <sys/buf.h> 49#include <sys/capsicum.h> 50#include <sys/disk.h> 51#include <sys/sysent.h> 52#include <sys/malloc.h> 53#include <sys/mount.h> 54#include <sys/mutex.h> 55#include <sys/sysproto.h> 56#include <sys/namei.h> 57#include <sys/filedesc.h> 58#include <sys/kernel.h> 59#include <sys/fcntl.h> 60#include <sys/file.h> 61#include <sys/filio.h> 62#include <sys/limits.h> 63#include <sys/linker.h> 64#include <sys/rwlock.h> 65#include <sys/sdt.h> 66#include <sys/stat.h> 67#include <sys/sx.h> 68#include <sys/unistd.h> 69#include <sys/vnode.h> 70#include <sys/priv.h> 71#include <sys/proc.h> 72#include <sys/dirent.h> 73#include <sys/jail.h> 74#include <sys/syscallsubr.h> 75#include <sys/sysctl.h> 76#ifdef KTRACE 77#include <sys/ktrace.h> 78#endif 79 80#include <machine/stdarg.h> 81 82#include <security/audit/audit.h> 83#include <security/mac/mac_framework.h> 84 85#include <vm/vm.h> 86#include <vm/vm_object.h> 87#include <vm/vm_page.h> 88#include <vm/uma.h> 89 90#include <ufs/ufs/quota.h> 91 92MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 93 94SDT_PROVIDER_DEFINE(vfs); 95SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 96SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 97 98static int chroot_refuse_vdir_fds(struct filedesc *fdp); 99static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 100static int kern_chflags(struct thread *td, const char *path, 101 enum uio_seg pathseg, u_long flags); 102static int kern_chflagsat(struct thread *td, int fd, const char *path, 103 enum uio_seg pathseg, u_long flags, int atflag); 104static int setfflags(struct thread *td, struct vnode *, u_long); 105static int setutimes(struct thread *td, struct vnode *, 106 const struct timespec *, int, int); 107static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 108 struct thread *td); 109 110/* 111 * The module initialization routine for POSIX asynchronous I/O will 112 * set this to the version of AIO that it implements. (Zero means 113 * that it is not implemented.) This value is used here by pathconf() 114 * and in kern_descrip.c by fpathconf(). 115 */ 116int async_io_version; 117 118/* 119 * Sync each mounted filesystem. 120 */ 121#ifndef _SYS_SYSPROTO_H_ 122struct sync_args { 123 int dummy; 124}; 125#endif 126/* ARGSUSED */ 127int 128sys_sync(td, uap) 129 struct thread *td; 130 struct sync_args *uap; 131{ 132 struct mount *mp, *nmp; 133 int save; 134 135 mtx_lock(&mountlist_mtx); 136 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 137 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 138 nmp = TAILQ_NEXT(mp, mnt_list); 139 continue; 140 } 141 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 142 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 143 save = curthread_pflags_set(TDP_SYNCIO); 144 vfs_msync(mp, MNT_NOWAIT); 145 VFS_SYNC(mp, MNT_NOWAIT); 146 curthread_pflags_restore(save); 147 vn_finished_write(mp); 148 } 149 mtx_lock(&mountlist_mtx); 150 nmp = TAILQ_NEXT(mp, mnt_list); 151 vfs_unbusy(mp); 152 } 153 mtx_unlock(&mountlist_mtx); 154 return (0); 155} 156 157/* 158 * Change filesystem quotas. 159 */ 160#ifndef _SYS_SYSPROTO_H_ 161struct quotactl_args { 162 char *path; 163 int cmd; 164 int uid; 165 caddr_t arg; 166}; 167#endif 168int 169sys_quotactl(td, uap) 170 struct thread *td; 171 register struct quotactl_args /* { 172 char *path; 173 int cmd; 174 int uid; 175 caddr_t arg; 176 } */ *uap; 177{ 178 struct mount *mp; 179 struct nameidata nd; 180 int error; 181 182 AUDIT_ARG_CMD(uap->cmd); 183 AUDIT_ARG_UID(uap->uid); 184 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 185 return (EPERM); 186 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 187 uap->path, td); 188 if ((error = namei(&nd)) != 0) 189 return (error); 190 NDFREE(&nd, NDF_ONLY_PNBUF); 191 mp = nd.ni_vp->v_mount; 192 vfs_ref(mp); 193 vput(nd.ni_vp); 194 error = vfs_busy(mp, 0); 195 vfs_rel(mp); 196 if (error != 0) 197 return (error); 198 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 199 200 /* 201 * Since quota on operation typically needs to open quota 202 * file, the Q_QUOTAON handler needs to unbusy the mount point 203 * before calling into namei. Otherwise, unmount might be 204 * started between two vfs_busy() invocations (first is our, 205 * second is from mount point cross-walk code in lookup()), 206 * causing deadlock. 207 * 208 * Require that Q_QUOTAON handles the vfs_busy() reference on 209 * its own, always returning with ubusied mount point. 210 */ 211 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 212 vfs_unbusy(mp); 213 return (error); 214} 215 216/* 217 * Used by statfs conversion routines to scale the block size up if 218 * necessary so that all of the block counts are <= 'max_size'. Note 219 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 220 * value of 'n'. 221 */ 222void 223statfs_scale_blocks(struct statfs *sf, long max_size) 224{ 225 uint64_t count; 226 int shift; 227 228 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 229 230 /* 231 * Attempt to scale the block counts to give a more accurate 232 * overview to userland of the ratio of free space to used 233 * space. To do this, find the largest block count and compute 234 * a divisor that lets it fit into a signed integer <= max_size. 235 */ 236 if (sf->f_bavail < 0) 237 count = -sf->f_bavail; 238 else 239 count = sf->f_bavail; 240 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 241 if (count <= max_size) 242 return; 243 244 count >>= flsl(max_size); 245 shift = 0; 246 while (count > 0) { 247 shift++; 248 count >>=1; 249 } 250 251 sf->f_bsize <<= shift; 252 sf->f_blocks >>= shift; 253 sf->f_bfree >>= shift; 254 sf->f_bavail >>= shift; 255} 256 257/* 258 * Get filesystem statistics. 259 */ 260#ifndef _SYS_SYSPROTO_H_ 261struct statfs_args { 262 char *path; 263 struct statfs *buf; 264}; 265#endif 266int 267sys_statfs(td, uap) 268 struct thread *td; 269 register struct statfs_args /* { 270 char *path; 271 struct statfs *buf; 272 } */ *uap; 273{ 274 struct statfs sf; 275 int error; 276 277 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 278 if (error == 0) 279 error = copyout(&sf, uap->buf, sizeof(sf)); 280 return (error); 281} 282 283int 284kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 285 struct statfs *buf) 286{ 287 struct mount *mp; 288 struct statfs *sp, sb; 289 struct nameidata nd; 290 int error; 291 292 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 293 pathseg, path, td); 294 error = namei(&nd); 295 if (error != 0) 296 return (error); 297 mp = nd.ni_vp->v_mount; 298 vfs_ref(mp); 299 NDFREE(&nd, NDF_ONLY_PNBUF); 300 vput(nd.ni_vp); 301 error = vfs_busy(mp, 0); 302 vfs_rel(mp); 303 if (error != 0) 304 return (error); 305#ifdef MAC 306 error = mac_mount_check_stat(td->td_ucred, mp); 307 if (error != 0) 308 goto out; 309#endif 310 /* 311 * Set these in case the underlying filesystem fails to do so. 312 */ 313 sp = &mp->mnt_stat; 314 sp->f_version = STATFS_VERSION; 315 sp->f_namemax = NAME_MAX; 316 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 317 error = VFS_STATFS(mp, sp); 318 if (error != 0) 319 goto out; 320 if (priv_check(td, PRIV_VFS_GENERATION)) { 321 bcopy(sp, &sb, sizeof(sb)); 322 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 323 prison_enforce_statfs(td->td_ucred, mp, &sb); 324 sp = &sb; 325 } 326 *buf = *sp; 327out: 328 vfs_unbusy(mp); 329 return (error); 330} 331 332/* 333 * Get filesystem statistics. 334 */ 335#ifndef _SYS_SYSPROTO_H_ 336struct fstatfs_args { 337 int fd; 338 struct statfs *buf; 339}; 340#endif 341int 342sys_fstatfs(td, uap) 343 struct thread *td; 344 register struct fstatfs_args /* { 345 int fd; 346 struct statfs *buf; 347 } */ *uap; 348{ 349 struct statfs sf; 350 int error; 351 352 error = kern_fstatfs(td, uap->fd, &sf); 353 if (error == 0) 354 error = copyout(&sf, uap->buf, sizeof(sf)); 355 return (error); 356} 357 358int 359kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 360{ 361 struct file *fp; 362 struct mount *mp; 363 struct statfs *sp, sb; 364 struct vnode *vp; 365 cap_rights_t rights; 366 int error; 367 368 AUDIT_ARG_FD(fd); 369 error = getvnode(td->td_proc->p_fd, fd, 370 cap_rights_init(&rights, CAP_FSTATFS), &fp); 371 if (error != 0) 372 return (error); 373 vp = fp->f_vnode; 374 vn_lock(vp, LK_SHARED | LK_RETRY); 375#ifdef AUDIT 376 AUDIT_ARG_VNODE1(vp); 377#endif 378 mp = vp->v_mount; 379 if (mp) 380 vfs_ref(mp); 381 VOP_UNLOCK(vp, 0); 382 fdrop(fp, td); 383 if (mp == NULL) { 384 error = EBADF; 385 goto out; 386 } 387 error = vfs_busy(mp, 0); 388 vfs_rel(mp); 389 if (error != 0) 390 return (error); 391#ifdef MAC 392 error = mac_mount_check_stat(td->td_ucred, mp); 393 if (error != 0) 394 goto out; 395#endif 396 /* 397 * Set these in case the underlying filesystem fails to do so. 398 */ 399 sp = &mp->mnt_stat; 400 sp->f_version = STATFS_VERSION; 401 sp->f_namemax = NAME_MAX; 402 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 403 error = VFS_STATFS(mp, sp); 404 if (error != 0) 405 goto out; 406 if (priv_check(td, PRIV_VFS_GENERATION)) { 407 bcopy(sp, &sb, sizeof(sb)); 408 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 409 prison_enforce_statfs(td->td_ucred, mp, &sb); 410 sp = &sb; 411 } 412 *buf = *sp; 413out: 414 if (mp) 415 vfs_unbusy(mp); 416 return (error); 417} 418 419/* 420 * Get statistics on all filesystems. 421 */ 422#ifndef _SYS_SYSPROTO_H_ 423struct getfsstat_args { 424 struct statfs *buf; 425 long bufsize; 426 int flags; 427}; 428#endif 429int 430sys_getfsstat(td, uap) 431 struct thread *td; 432 register struct getfsstat_args /* { 433 struct statfs *buf; 434 long bufsize; 435 int flags; 436 } */ *uap; 437{ 438 439 return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE, 440 uap->flags)); 441} 442 443/* 444 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 445 * The caller is responsible for freeing memory which will be allocated 446 * in '*buf'. 447 */ 448int 449kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 450 enum uio_seg bufseg, int flags) 451{ 452 struct mount *mp, *nmp; 453 struct statfs *sfsp, *sp, sb; 454 size_t count, maxcount; 455 int error; 456 457 maxcount = bufsize / sizeof(struct statfs); 458 if (bufsize == 0) 459 sfsp = NULL; 460 else if (bufseg == UIO_USERSPACE) 461 sfsp = *buf; 462 else /* if (bufseg == UIO_SYSSPACE) */ { 463 count = 0; 464 mtx_lock(&mountlist_mtx); 465 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 466 count++; 467 } 468 mtx_unlock(&mountlist_mtx); 469 if (maxcount > count) 470 maxcount = count; 471 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 472 M_WAITOK); 473 } 474 count = 0; 475 mtx_lock(&mountlist_mtx); 476 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 477 if (prison_canseemount(td->td_ucred, mp) != 0) { 478 nmp = TAILQ_NEXT(mp, mnt_list); 479 continue; 480 } 481#ifdef MAC 482 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 483 nmp = TAILQ_NEXT(mp, mnt_list); 484 continue; 485 } 486#endif 487 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 488 nmp = TAILQ_NEXT(mp, mnt_list); 489 continue; 490 } 491 if (sfsp && count < maxcount) { 492 sp = &mp->mnt_stat; 493 /* 494 * Set these in case the underlying filesystem 495 * fails to do so. 496 */ 497 sp->f_version = STATFS_VERSION; 498 sp->f_namemax = NAME_MAX; 499 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 500 /* 501 * If MNT_NOWAIT or MNT_LAZY is specified, do not 502 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 503 * overrides MNT_WAIT. 504 */ 505 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 506 (flags & MNT_WAIT)) && 507 (error = VFS_STATFS(mp, sp))) { 508 mtx_lock(&mountlist_mtx); 509 nmp = TAILQ_NEXT(mp, mnt_list); 510 vfs_unbusy(mp); 511 continue; 512 } 513 if (priv_check(td, PRIV_VFS_GENERATION)) { 514 bcopy(sp, &sb, sizeof(sb)); 515 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 516 prison_enforce_statfs(td->td_ucred, mp, &sb); 517 sp = &sb; 518 } 519 if (bufseg == UIO_SYSSPACE) 520 bcopy(sp, sfsp, sizeof(*sp)); 521 else /* if (bufseg == UIO_USERSPACE) */ { 522 error = copyout(sp, sfsp, sizeof(*sp)); 523 if (error != 0) { 524 vfs_unbusy(mp); 525 return (error); 526 } 527 } 528 sfsp++; 529 } 530 count++; 531 mtx_lock(&mountlist_mtx); 532 nmp = TAILQ_NEXT(mp, mnt_list); 533 vfs_unbusy(mp); 534 } 535 mtx_unlock(&mountlist_mtx); 536 if (sfsp && count > maxcount) 537 td->td_retval[0] = maxcount; 538 else 539 td->td_retval[0] = count; 540 return (0); 541} 542 543#ifdef COMPAT_FREEBSD4 544/* 545 * Get old format filesystem statistics. 546 */ 547static void cvtstatfs(struct statfs *, struct ostatfs *); 548 549#ifndef _SYS_SYSPROTO_H_ 550struct freebsd4_statfs_args { 551 char *path; 552 struct ostatfs *buf; 553}; 554#endif 555int 556freebsd4_statfs(td, uap) 557 struct thread *td; 558 struct freebsd4_statfs_args /* { 559 char *path; 560 struct ostatfs *buf; 561 } */ *uap; 562{ 563 struct ostatfs osb; 564 struct statfs sf; 565 int error; 566 567 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 568 if (error != 0) 569 return (error); 570 cvtstatfs(&sf, &osb); 571 return (copyout(&osb, uap->buf, sizeof(osb))); 572} 573 574/* 575 * Get filesystem statistics. 576 */ 577#ifndef _SYS_SYSPROTO_H_ 578struct freebsd4_fstatfs_args { 579 int fd; 580 struct ostatfs *buf; 581}; 582#endif 583int 584freebsd4_fstatfs(td, uap) 585 struct thread *td; 586 struct freebsd4_fstatfs_args /* { 587 int fd; 588 struct ostatfs *buf; 589 } */ *uap; 590{ 591 struct ostatfs osb; 592 struct statfs sf; 593 int error; 594 595 error = kern_fstatfs(td, uap->fd, &sf); 596 if (error != 0) 597 return (error); 598 cvtstatfs(&sf, &osb); 599 return (copyout(&osb, uap->buf, sizeof(osb))); 600} 601 602/* 603 * Get statistics on all filesystems. 604 */ 605#ifndef _SYS_SYSPROTO_H_ 606struct freebsd4_getfsstat_args { 607 struct ostatfs *buf; 608 long bufsize; 609 int flags; 610}; 611#endif 612int 613freebsd4_getfsstat(td, uap) 614 struct thread *td; 615 register struct freebsd4_getfsstat_args /* { 616 struct ostatfs *buf; 617 long bufsize; 618 int flags; 619 } */ *uap; 620{ 621 struct statfs *buf, *sp; 622 struct ostatfs osb; 623 size_t count, size; 624 int error; 625 626 count = uap->bufsize / sizeof(struct ostatfs); 627 size = count * sizeof(struct statfs); 628 error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags); 629 if (size > 0) { 630 count = td->td_retval[0]; 631 sp = buf; 632 while (count > 0 && error == 0) { 633 cvtstatfs(sp, &osb); 634 error = copyout(&osb, uap->buf, sizeof(osb)); 635 sp++; 636 uap->buf++; 637 count--; 638 } 639 free(buf, M_TEMP); 640 } 641 return (error); 642} 643 644/* 645 * Implement fstatfs() for (NFS) file handles. 646 */ 647#ifndef _SYS_SYSPROTO_H_ 648struct freebsd4_fhstatfs_args { 649 struct fhandle *u_fhp; 650 struct ostatfs *buf; 651}; 652#endif 653int 654freebsd4_fhstatfs(td, uap) 655 struct thread *td; 656 struct freebsd4_fhstatfs_args /* { 657 struct fhandle *u_fhp; 658 struct ostatfs *buf; 659 } */ *uap; 660{ 661 struct ostatfs osb; 662 struct statfs sf; 663 fhandle_t fh; 664 int error; 665 666 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 667 if (error != 0) 668 return (error); 669 error = kern_fhstatfs(td, fh, &sf); 670 if (error != 0) 671 return (error); 672 cvtstatfs(&sf, &osb); 673 return (copyout(&osb, uap->buf, sizeof(osb))); 674} 675 676/* 677 * Convert a new format statfs structure to an old format statfs structure. 678 */ 679static void 680cvtstatfs(nsp, osp) 681 struct statfs *nsp; 682 struct ostatfs *osp; 683{ 684 685 statfs_scale_blocks(nsp, LONG_MAX); 686 bzero(osp, sizeof(*osp)); 687 osp->f_bsize = nsp->f_bsize; 688 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 689 osp->f_blocks = nsp->f_blocks; 690 osp->f_bfree = nsp->f_bfree; 691 osp->f_bavail = nsp->f_bavail; 692 osp->f_files = MIN(nsp->f_files, LONG_MAX); 693 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 694 osp->f_owner = nsp->f_owner; 695 osp->f_type = nsp->f_type; 696 osp->f_flags = nsp->f_flags; 697 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 698 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 699 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 700 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 701 strlcpy(osp->f_fstypename, nsp->f_fstypename, 702 MIN(MFSNAMELEN, OMFSNAMELEN)); 703 strlcpy(osp->f_mntonname, nsp->f_mntonname, 704 MIN(MNAMELEN, OMNAMELEN)); 705 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 706 MIN(MNAMELEN, OMNAMELEN)); 707 osp->f_fsid = nsp->f_fsid; 708} 709#endif /* COMPAT_FREEBSD4 */ 710 711/* 712 * Change current working directory to a given file descriptor. 713 */ 714#ifndef _SYS_SYSPROTO_H_ 715struct fchdir_args { 716 int fd; 717}; 718#endif 719int 720sys_fchdir(td, uap) 721 struct thread *td; 722 struct fchdir_args /* { 723 int fd; 724 } */ *uap; 725{ 726 register struct filedesc *fdp = td->td_proc->p_fd; 727 struct vnode *vp, *tdp, *vpold; 728 struct mount *mp; 729 struct file *fp; 730 cap_rights_t rights; 731 int error; 732 733 AUDIT_ARG_FD(uap->fd); 734 error = getvnode(fdp, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 735 &fp); 736 if (error != 0) 737 return (error); 738 vp = fp->f_vnode; 739 VREF(vp); 740 fdrop(fp, td); 741 vn_lock(vp, LK_SHARED | LK_RETRY); 742 AUDIT_ARG_VNODE1(vp); 743 error = change_dir(vp, td); 744 while (!error && (mp = vp->v_mountedhere) != NULL) { 745 if (vfs_busy(mp, 0)) 746 continue; 747 error = VFS_ROOT(mp, LK_SHARED, &tdp); 748 vfs_unbusy(mp); 749 if (error != 0) 750 break; 751 vput(vp); 752 vp = tdp; 753 } 754 if (error != 0) { 755 vput(vp); 756 return (error); 757 } 758 VOP_UNLOCK(vp, 0); 759 FILEDESC_XLOCK(fdp); 760 vpold = fdp->fd_cdir; 761 fdp->fd_cdir = vp; 762 FILEDESC_XUNLOCK(fdp); 763 vrele(vpold); 764 return (0); 765} 766 767/* 768 * Change current working directory (``.''). 769 */ 770#ifndef _SYS_SYSPROTO_H_ 771struct chdir_args { 772 char *path; 773}; 774#endif 775int 776sys_chdir(td, uap) 777 struct thread *td; 778 struct chdir_args /* { 779 char *path; 780 } */ *uap; 781{ 782 783 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 784} 785 786int 787kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 788{ 789 register struct filedesc *fdp = td->td_proc->p_fd; 790 struct nameidata nd; 791 struct vnode *vp; 792 int error; 793 794 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 795 pathseg, path, td); 796 if ((error = namei(&nd)) != 0) 797 return (error); 798 if ((error = change_dir(nd.ni_vp, td)) != 0) { 799 vput(nd.ni_vp); 800 NDFREE(&nd, NDF_ONLY_PNBUF); 801 return (error); 802 } 803 VOP_UNLOCK(nd.ni_vp, 0); 804 NDFREE(&nd, NDF_ONLY_PNBUF); 805 FILEDESC_XLOCK(fdp); 806 vp = fdp->fd_cdir; 807 fdp->fd_cdir = nd.ni_vp; 808 FILEDESC_XUNLOCK(fdp); 809 vrele(vp); 810 return (0); 811} 812 813/* 814 * Helper function for raised chroot(2) security function: Refuse if 815 * any filedescriptors are open directories. 816 */ 817static int 818chroot_refuse_vdir_fds(fdp) 819 struct filedesc *fdp; 820{ 821 struct vnode *vp; 822 struct file *fp; 823 int fd; 824 825 FILEDESC_LOCK_ASSERT(fdp); 826 827 for (fd = 0; fd <= fdp->fd_lastfile; fd++) { 828 fp = fget_locked(fdp, fd); 829 if (fp == NULL) 830 continue; 831 if (fp->f_type == DTYPE_VNODE) { 832 vp = fp->f_vnode; 833 if (vp->v_type == VDIR) 834 return (EPERM); 835 } 836 } 837 return (0); 838} 839 840/* 841 * This sysctl determines if we will allow a process to chroot(2) if it 842 * has a directory open: 843 * 0: disallowed for all processes. 844 * 1: allowed for processes that were not already chroot(2)'ed. 845 * 2: allowed for all processes. 846 */ 847 848static int chroot_allow_open_directories = 1; 849 850SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 851 &chroot_allow_open_directories, 0, 852 "Allow a process to chroot(2) if it has a directory open"); 853 854/* 855 * Change notion of root (``/'') directory. 856 */ 857#ifndef _SYS_SYSPROTO_H_ 858struct chroot_args { 859 char *path; 860}; 861#endif 862int 863sys_chroot(td, uap) 864 struct thread *td; 865 struct chroot_args /* { 866 char *path; 867 } */ *uap; 868{ 869 struct nameidata nd; 870 int error; 871 872 error = priv_check(td, PRIV_VFS_CHROOT); 873 if (error != 0) 874 return (error); 875 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 876 UIO_USERSPACE, uap->path, td); 877 error = namei(&nd); 878 if (error != 0) 879 goto error; 880 error = change_dir(nd.ni_vp, td); 881 if (error != 0) 882 goto e_vunlock; 883#ifdef MAC 884 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 885 if (error != 0) 886 goto e_vunlock; 887#endif 888 VOP_UNLOCK(nd.ni_vp, 0); 889 error = change_root(nd.ni_vp, td); 890 vrele(nd.ni_vp); 891 NDFREE(&nd, NDF_ONLY_PNBUF); 892 return (error); 893e_vunlock: 894 vput(nd.ni_vp); 895error: 896 NDFREE(&nd, NDF_ONLY_PNBUF); 897 return (error); 898} 899 900/* 901 * Common routine for chroot and chdir. Callers must provide a locked vnode 902 * instance. 903 */ 904int 905change_dir(vp, td) 906 struct vnode *vp; 907 struct thread *td; 908{ 909#ifdef MAC 910 int error; 911#endif 912 913 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 914 if (vp->v_type != VDIR) 915 return (ENOTDIR); 916#ifdef MAC 917 error = mac_vnode_check_chdir(td->td_ucred, vp); 918 if (error != 0) 919 return (error); 920#endif 921 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 922} 923 924/* 925 * Common routine for kern_chroot() and jail_attach(). The caller is 926 * responsible for invoking priv_check() and mac_vnode_check_chroot() to 927 * authorize this operation. 928 */ 929int 930change_root(vp, td) 931 struct vnode *vp; 932 struct thread *td; 933{ 934 struct filedesc *fdp; 935 struct vnode *oldvp; 936 int error; 937 938 fdp = td->td_proc->p_fd; 939 FILEDESC_XLOCK(fdp); 940 if (chroot_allow_open_directories == 0 || 941 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 942 error = chroot_refuse_vdir_fds(fdp); 943 if (error != 0) { 944 FILEDESC_XUNLOCK(fdp); 945 return (error); 946 } 947 } 948 oldvp = fdp->fd_rdir; 949 fdp->fd_rdir = vp; 950 VREF(fdp->fd_rdir); 951 if (!fdp->fd_jdir) { 952 fdp->fd_jdir = vp; 953 VREF(fdp->fd_jdir); 954 } 955 FILEDESC_XUNLOCK(fdp); 956 vrele(oldvp); 957 return (0); 958} 959 960static __inline void 961flags_to_rights(int flags, cap_rights_t *rightsp) 962{ 963 964 if (flags & O_EXEC) { 965 cap_rights_set(rightsp, CAP_FEXECVE); 966 } else { 967 switch ((flags & O_ACCMODE)) { 968 case O_RDONLY: 969 cap_rights_set(rightsp, CAP_READ); 970 break; 971 case O_RDWR: 972 cap_rights_set(rightsp, CAP_READ); 973 /* FALLTHROUGH */ 974 case O_WRONLY: 975 cap_rights_set(rightsp, CAP_WRITE); 976 if (!(flags & (O_APPEND | O_TRUNC))) 977 cap_rights_set(rightsp, CAP_SEEK); 978 break; 979 } 980 } 981 982 if (flags & O_CREAT) 983 cap_rights_set(rightsp, CAP_CREATE); 984 985 if (flags & O_TRUNC) 986 cap_rights_set(rightsp, CAP_FTRUNCATE); 987 988 if (flags & (O_SYNC | O_FSYNC)) 989 cap_rights_set(rightsp, CAP_FSYNC); 990 991 if (flags & (O_EXLOCK | O_SHLOCK)) 992 cap_rights_set(rightsp, CAP_FLOCK); 993} 994 995/* 996 * Check permissions, allocate an open file structure, and call the device 997 * open routine if any. 998 */ 999#ifndef _SYS_SYSPROTO_H_ 1000struct open_args { 1001 char *path; 1002 int flags; 1003 int mode; 1004}; 1005#endif 1006int 1007sys_open(td, uap) 1008 struct thread *td; 1009 register struct open_args /* { 1010 char *path; 1011 int flags; 1012 int mode; 1013 } */ *uap; 1014{ 1015 1016 return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode)); 1017} 1018 1019#ifndef _SYS_SYSPROTO_H_ 1020struct openat_args { 1021 int fd; 1022 char *path; 1023 int flag; 1024 int mode; 1025}; 1026#endif 1027int 1028sys_openat(struct thread *td, struct openat_args *uap) 1029{ 1030 1031 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1032 uap->mode)); 1033} 1034 1035int 1036kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags, 1037 int mode) 1038{ 1039 1040 return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode)); 1041} 1042 1043int 1044kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1045 int flags, int mode) 1046{ 1047 struct proc *p = td->td_proc; 1048 struct filedesc *fdp = p->p_fd; 1049 struct file *fp; 1050 struct vnode *vp; 1051 struct nameidata nd; 1052 cap_rights_t rights; 1053 int cmode, error, indx; 1054 1055 indx = -1; 1056 1057 AUDIT_ARG_FFLAGS(flags); 1058 AUDIT_ARG_MODE(mode); 1059 /* XXX: audit dirfd */ 1060 cap_rights_init(&rights, CAP_LOOKUP); 1061 flags_to_rights(flags, &rights); 1062 /* 1063 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1064 * may be specified. 1065 */ 1066 if (flags & O_EXEC) { 1067 if (flags & O_ACCMODE) 1068 return (EINVAL); 1069 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1070 return (EINVAL); 1071 } else { 1072 flags = FFLAGS(flags); 1073 } 1074 1075 /* 1076 * Allocate the file descriptor, but don't install a descriptor yet. 1077 */ 1078 error = falloc_noinstall(td, &fp); 1079 if (error != 0) 1080 return (error); 1081 /* 1082 * An extra reference on `fp' has been held for us by 1083 * falloc_noinstall(). 1084 */ 1085 /* Set the flags early so the finit in devfs can pick them up. */ 1086 fp->f_flag = flags & FMASK; 1087 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1088 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1089 &rights, td); 1090 td->td_dupfd = -1; /* XXX check for fdopen */ 1091 error = vn_open(&nd, &flags, cmode, fp); 1092 if (error != 0) { 1093 /* 1094 * If the vn_open replaced the method vector, something 1095 * wonderous happened deep below and we just pass it up 1096 * pretending we know what we do. 1097 */ 1098 if (error == ENXIO && fp->f_ops != &badfileops) 1099 goto success; 1100 1101 /* 1102 * Handle special fdopen() case. bleh. 1103 * 1104 * Don't do this for relative (capability) lookups; we don't 1105 * understand exactly what would happen, and we don't think 1106 * that it ever should. 1107 */ 1108 if (nd.ni_strictrelative == 0 && 1109 (error == ENODEV || error == ENXIO) && 1110 td->td_dupfd >= 0) { 1111 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1112 &indx); 1113 if (error == 0) 1114 goto success; 1115 } 1116 1117 goto bad; 1118 } 1119 td->td_dupfd = 0; 1120 NDFREE(&nd, NDF_ONLY_PNBUF); 1121 vp = nd.ni_vp; 1122 1123 /* 1124 * Store the vnode, for any f_type. Typically, the vnode use 1125 * count is decremented by direct call to vn_closefile() for 1126 * files that switched type in the cdevsw fdopen() method. 1127 */ 1128 fp->f_vnode = vp; 1129 /* 1130 * If the file wasn't claimed by devfs bind it to the normal 1131 * vnode operations here. 1132 */ 1133 if (fp->f_ops == &badfileops) { 1134 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1135 fp->f_seqcount = 1; 1136 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1137 DTYPE_VNODE, vp, &vnops); 1138 } 1139 1140 VOP_UNLOCK(vp, 0); 1141 if (flags & O_TRUNC) { 1142 error = fo_truncate(fp, 0, td->td_ucred, td); 1143 if (error != 0) 1144 goto bad; 1145 } 1146success: 1147 /* 1148 * If we haven't already installed the FD (for dupfdopen), do so now. 1149 */ 1150 if (indx == -1) { 1151 struct filecaps *fcaps; 1152 1153#ifdef CAPABILITIES 1154 if (nd.ni_strictrelative == 1) 1155 fcaps = &nd.ni_filecaps; 1156 else 1157#endif 1158 fcaps = NULL; 1159 error = finstall(td, fp, &indx, flags, fcaps); 1160 /* On success finstall() consumes fcaps. */ 1161 if (error != 0) { 1162 filecaps_free(&nd.ni_filecaps); 1163 goto bad; 1164 } 1165 } else { 1166 filecaps_free(&nd.ni_filecaps); 1167 } 1168 1169 /* 1170 * Release our private reference, leaving the one associated with 1171 * the descriptor table intact. 1172 */ 1173 fdrop(fp, td); 1174 td->td_retval[0] = indx; 1175 return (0); 1176bad: 1177 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1178 fdrop(fp, td); 1179 return (error); 1180} 1181 1182#ifdef COMPAT_43 1183/* 1184 * Create a file. 1185 */ 1186#ifndef _SYS_SYSPROTO_H_ 1187struct ocreat_args { 1188 char *path; 1189 int mode; 1190}; 1191#endif 1192int 1193ocreat(td, uap) 1194 struct thread *td; 1195 register struct ocreat_args /* { 1196 char *path; 1197 int mode; 1198 } */ *uap; 1199{ 1200 1201 return (kern_open(td, uap->path, UIO_USERSPACE, 1202 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1203} 1204#endif /* COMPAT_43 */ 1205 1206/* 1207 * Create a special file. 1208 */ 1209#ifndef _SYS_SYSPROTO_H_ 1210struct mknod_args { 1211 char *path; 1212 int mode; 1213 int dev; 1214}; 1215#endif 1216int 1217sys_mknod(td, uap) 1218 struct thread *td; 1219 register struct mknod_args /* { 1220 char *path; 1221 int mode; 1222 int dev; 1223 } */ *uap; 1224{ 1225 1226 return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); 1227} 1228 1229#ifndef _SYS_SYSPROTO_H_ 1230struct mknodat_args { 1231 int fd; 1232 char *path; 1233 mode_t mode; 1234 dev_t dev; 1235}; 1236#endif 1237int 1238sys_mknodat(struct thread *td, struct mknodat_args *uap) 1239{ 1240 1241 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1242 uap->dev)); 1243} 1244 1245int 1246kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode, 1247 int dev) 1248{ 1249 1250 return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev)); 1251} 1252 1253int 1254kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1255 int mode, int dev) 1256{ 1257 struct vnode *vp; 1258 struct mount *mp; 1259 struct vattr vattr; 1260 struct nameidata nd; 1261 cap_rights_t rights; 1262 int error, whiteout = 0; 1263 1264 AUDIT_ARG_MODE(mode); 1265 AUDIT_ARG_DEV(dev); 1266 switch (mode & S_IFMT) { 1267 case S_IFCHR: 1268 case S_IFBLK: 1269 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1270 break; 1271 case S_IFMT: 1272 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1273 break; 1274 case S_IFWHT: 1275 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1276 break; 1277 case S_IFIFO: 1278 if (dev == 0) 1279 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1280 /* FALLTHROUGH */ 1281 default: 1282 error = EINVAL; 1283 break; 1284 } 1285 if (error != 0) 1286 return (error); 1287restart: 1288 bwillwrite(); 1289 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1290 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), 1291 td); 1292 if ((error = namei(&nd)) != 0) 1293 return (error); 1294 vp = nd.ni_vp; 1295 if (vp != NULL) { 1296 NDFREE(&nd, NDF_ONLY_PNBUF); 1297 if (vp == nd.ni_dvp) 1298 vrele(nd.ni_dvp); 1299 else 1300 vput(nd.ni_dvp); 1301 vrele(vp); 1302 return (EEXIST); 1303 } else { 1304 VATTR_NULL(&vattr); 1305 vattr.va_mode = (mode & ALLPERMS) & 1306 ~td->td_proc->p_fd->fd_cmask; 1307 vattr.va_rdev = dev; 1308 whiteout = 0; 1309 1310 switch (mode & S_IFMT) { 1311 case S_IFMT: /* used by badsect to flag bad sectors */ 1312 vattr.va_type = VBAD; 1313 break; 1314 case S_IFCHR: 1315 vattr.va_type = VCHR; 1316 break; 1317 case S_IFBLK: 1318 vattr.va_type = VBLK; 1319 break; 1320 case S_IFWHT: 1321 whiteout = 1; 1322 break; 1323 default: 1324 panic("kern_mknod: invalid mode"); 1325 } 1326 } 1327 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1328 NDFREE(&nd, NDF_ONLY_PNBUF); 1329 vput(nd.ni_dvp); 1330 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1331 return (error); 1332 goto restart; 1333 } 1334#ifdef MAC 1335 if (error == 0 && !whiteout) 1336 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1337 &nd.ni_cnd, &vattr); 1338#endif 1339 if (error == 0) { 1340 if (whiteout) 1341 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1342 else { 1343 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1344 &nd.ni_cnd, &vattr); 1345 if (error == 0) 1346 vput(nd.ni_vp); 1347 } 1348 } 1349 NDFREE(&nd, NDF_ONLY_PNBUF); 1350 vput(nd.ni_dvp); 1351 vn_finished_write(mp); 1352 return (error); 1353} 1354 1355/* 1356 * Create a named pipe. 1357 */ 1358#ifndef _SYS_SYSPROTO_H_ 1359struct mkfifo_args { 1360 char *path; 1361 int mode; 1362}; 1363#endif 1364int 1365sys_mkfifo(td, uap) 1366 struct thread *td; 1367 register struct mkfifo_args /* { 1368 char *path; 1369 int mode; 1370 } */ *uap; 1371{ 1372 1373 return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode)); 1374} 1375 1376#ifndef _SYS_SYSPROTO_H_ 1377struct mkfifoat_args { 1378 int fd; 1379 char *path; 1380 mode_t mode; 1381}; 1382#endif 1383int 1384sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1385{ 1386 1387 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1388 uap->mode)); 1389} 1390 1391int 1392kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode) 1393{ 1394 1395 return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode)); 1396} 1397 1398int 1399kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1400 int mode) 1401{ 1402 struct mount *mp; 1403 struct vattr vattr; 1404 struct nameidata nd; 1405 cap_rights_t rights; 1406 int error; 1407 1408 AUDIT_ARG_MODE(mode); 1409restart: 1410 bwillwrite(); 1411 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1412 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), 1413 td); 1414 if ((error = namei(&nd)) != 0) 1415 return (error); 1416 if (nd.ni_vp != NULL) { 1417 NDFREE(&nd, NDF_ONLY_PNBUF); 1418 if (nd.ni_vp == nd.ni_dvp) 1419 vrele(nd.ni_dvp); 1420 else 1421 vput(nd.ni_dvp); 1422 vrele(nd.ni_vp); 1423 return (EEXIST); 1424 } 1425 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1426 NDFREE(&nd, NDF_ONLY_PNBUF); 1427 vput(nd.ni_dvp); 1428 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1429 return (error); 1430 goto restart; 1431 } 1432 VATTR_NULL(&vattr); 1433 vattr.va_type = VFIFO; 1434 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1435#ifdef MAC 1436 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1437 &vattr); 1438 if (error != 0) 1439 goto out; 1440#endif 1441 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1442 if (error == 0) 1443 vput(nd.ni_vp); 1444#ifdef MAC 1445out: 1446#endif 1447 vput(nd.ni_dvp); 1448 vn_finished_write(mp); 1449 NDFREE(&nd, NDF_ONLY_PNBUF); 1450 return (error); 1451} 1452 1453/* 1454 * Make a hard file link. 1455 */ 1456#ifndef _SYS_SYSPROTO_H_ 1457struct link_args { 1458 char *path; 1459 char *link; 1460}; 1461#endif 1462int 1463sys_link(td, uap) 1464 struct thread *td; 1465 register struct link_args /* { 1466 char *path; 1467 char *link; 1468 } */ *uap; 1469{ 1470 1471 return (kern_link(td, uap->path, uap->link, UIO_USERSPACE)); 1472} 1473 1474#ifndef _SYS_SYSPROTO_H_ 1475struct linkat_args { 1476 int fd1; 1477 char *path1; 1478 int fd2; 1479 char *path2; 1480 int flag; 1481}; 1482#endif 1483int 1484sys_linkat(struct thread *td, struct linkat_args *uap) 1485{ 1486 int flag; 1487 1488 flag = uap->flag; 1489 if (flag & ~AT_SYMLINK_FOLLOW) 1490 return (EINVAL); 1491 1492 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1493 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1494} 1495 1496int hardlink_check_uid = 0; 1497SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1498 &hardlink_check_uid, 0, 1499 "Unprivileged processes cannot create hard links to files owned by other " 1500 "users"); 1501static int hardlink_check_gid = 0; 1502SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1503 &hardlink_check_gid, 0, 1504 "Unprivileged processes cannot create hard links to files owned by other " 1505 "groups"); 1506 1507static int 1508can_hardlink(struct vnode *vp, struct ucred *cred) 1509{ 1510 struct vattr va; 1511 int error; 1512 1513 if (!hardlink_check_uid && !hardlink_check_gid) 1514 return (0); 1515 1516 error = VOP_GETATTR(vp, &va, cred); 1517 if (error != 0) 1518 return (error); 1519 1520 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1521 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1522 if (error != 0) 1523 return (error); 1524 } 1525 1526 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1527 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1528 if (error != 0) 1529 return (error); 1530 } 1531 1532 return (0); 1533} 1534 1535int 1536kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg) 1537{ 1538 1539 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW)); 1540} 1541 1542int 1543kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1544 enum uio_seg segflg, int follow) 1545{ 1546 struct vnode *vp; 1547 struct mount *mp; 1548 struct nameidata nd; 1549 cap_rights_t rights; 1550 int error; 1551 1552again: 1553 bwillwrite(); 1554 NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td); 1555 1556 if ((error = namei(&nd)) != 0) 1557 return (error); 1558 NDFREE(&nd, NDF_ONLY_PNBUF); 1559 vp = nd.ni_vp; 1560 if (vp->v_type == VDIR) { 1561 vrele(vp); 1562 return (EPERM); /* POSIX */ 1563 } 1564 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2 | 1565 NOCACHE, segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT), 1566 td); 1567 if ((error = namei(&nd)) == 0) { 1568 if (nd.ni_vp != NULL) { 1569 NDFREE(&nd, NDF_ONLY_PNBUF); 1570 if (nd.ni_dvp == nd.ni_vp) 1571 vrele(nd.ni_dvp); 1572 else 1573 vput(nd.ni_dvp); 1574 vrele(nd.ni_vp); 1575 vrele(vp); 1576 return (EEXIST); 1577 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1578 /* 1579 * Cross-device link. No need to recheck 1580 * vp->v_type, since it cannot change, except 1581 * to VBAD. 1582 */ 1583 NDFREE(&nd, NDF_ONLY_PNBUF); 1584 vput(nd.ni_dvp); 1585 vrele(vp); 1586 return (EXDEV); 1587 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1588 error = can_hardlink(vp, td->td_ucred); 1589#ifdef MAC 1590 if (error == 0) 1591 error = mac_vnode_check_link(td->td_ucred, 1592 nd.ni_dvp, vp, &nd.ni_cnd); 1593#endif 1594 if (error != 0) { 1595 vput(vp); 1596 vput(nd.ni_dvp); 1597 NDFREE(&nd, NDF_ONLY_PNBUF); 1598 return (error); 1599 } 1600 error = vn_start_write(vp, &mp, V_NOWAIT); 1601 if (error != 0) { 1602 vput(vp); 1603 vput(nd.ni_dvp); 1604 NDFREE(&nd, NDF_ONLY_PNBUF); 1605 error = vn_start_write(NULL, &mp, 1606 V_XSLEEP | PCATCH); 1607 if (error != 0) 1608 return (error); 1609 goto again; 1610 } 1611 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1612 VOP_UNLOCK(vp, 0); 1613 vput(nd.ni_dvp); 1614 vn_finished_write(mp); 1615 NDFREE(&nd, NDF_ONLY_PNBUF); 1616 } else { 1617 vput(nd.ni_dvp); 1618 NDFREE(&nd, NDF_ONLY_PNBUF); 1619 vrele(vp); 1620 goto again; 1621 } 1622 } 1623 vrele(vp); 1624 return (error); 1625} 1626 1627/* 1628 * Make a symbolic link. 1629 */ 1630#ifndef _SYS_SYSPROTO_H_ 1631struct symlink_args { 1632 char *path; 1633 char *link; 1634}; 1635#endif 1636int 1637sys_symlink(td, uap) 1638 struct thread *td; 1639 register struct symlink_args /* { 1640 char *path; 1641 char *link; 1642 } */ *uap; 1643{ 1644 1645 return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE)); 1646} 1647 1648#ifndef _SYS_SYSPROTO_H_ 1649struct symlinkat_args { 1650 char *path; 1651 int fd; 1652 char *path2; 1653}; 1654#endif 1655int 1656sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1657{ 1658 1659 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1660 UIO_USERSPACE)); 1661} 1662 1663int 1664kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg) 1665{ 1666 1667 return (kern_symlinkat(td, path, AT_FDCWD, link, segflg)); 1668} 1669 1670int 1671kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1672 enum uio_seg segflg) 1673{ 1674 struct mount *mp; 1675 struct vattr vattr; 1676 char *syspath; 1677 struct nameidata nd; 1678 int error; 1679 cap_rights_t rights; 1680 1681 if (segflg == UIO_SYSSPACE) { 1682 syspath = path1; 1683 } else { 1684 syspath = uma_zalloc(namei_zone, M_WAITOK); 1685 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1686 goto out; 1687 } 1688 AUDIT_ARG_TEXT(syspath); 1689restart: 1690 bwillwrite(); 1691 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1692 NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), 1693 td); 1694 if ((error = namei(&nd)) != 0) 1695 goto out; 1696 if (nd.ni_vp) { 1697 NDFREE(&nd, NDF_ONLY_PNBUF); 1698 if (nd.ni_vp == nd.ni_dvp) 1699 vrele(nd.ni_dvp); 1700 else 1701 vput(nd.ni_dvp); 1702 vrele(nd.ni_vp); 1703 error = EEXIST; 1704 goto out; 1705 } 1706 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1707 NDFREE(&nd, NDF_ONLY_PNBUF); 1708 vput(nd.ni_dvp); 1709 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1710 goto out; 1711 goto restart; 1712 } 1713 VATTR_NULL(&vattr); 1714 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1715#ifdef MAC 1716 vattr.va_type = VLNK; 1717 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1718 &vattr); 1719 if (error != 0) 1720 goto out2; 1721#endif 1722 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1723 if (error == 0) 1724 vput(nd.ni_vp); 1725#ifdef MAC 1726out2: 1727#endif 1728 NDFREE(&nd, NDF_ONLY_PNBUF); 1729 vput(nd.ni_dvp); 1730 vn_finished_write(mp); 1731out: 1732 if (segflg != UIO_SYSSPACE) 1733 uma_zfree(namei_zone, syspath); 1734 return (error); 1735} 1736 1737/* 1738 * Delete a whiteout from the filesystem. 1739 */ 1740int 1741sys_undelete(td, uap) 1742 struct thread *td; 1743 register struct undelete_args /* { 1744 char *path; 1745 } */ *uap; 1746{ 1747 struct mount *mp; 1748 struct nameidata nd; 1749 int error; 1750 1751restart: 1752 bwillwrite(); 1753 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1754 UIO_USERSPACE, uap->path, td); 1755 error = namei(&nd); 1756 if (error != 0) 1757 return (error); 1758 1759 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1760 NDFREE(&nd, NDF_ONLY_PNBUF); 1761 if (nd.ni_vp == nd.ni_dvp) 1762 vrele(nd.ni_dvp); 1763 else 1764 vput(nd.ni_dvp); 1765 if (nd.ni_vp) 1766 vrele(nd.ni_vp); 1767 return (EEXIST); 1768 } 1769 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1770 NDFREE(&nd, NDF_ONLY_PNBUF); 1771 vput(nd.ni_dvp); 1772 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1773 return (error); 1774 goto restart; 1775 } 1776 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1777 NDFREE(&nd, NDF_ONLY_PNBUF); 1778 vput(nd.ni_dvp); 1779 vn_finished_write(mp); 1780 return (error); 1781} 1782 1783/* 1784 * Delete a name from the filesystem. 1785 */ 1786#ifndef _SYS_SYSPROTO_H_ 1787struct unlink_args { 1788 char *path; 1789}; 1790#endif 1791int 1792sys_unlink(td, uap) 1793 struct thread *td; 1794 struct unlink_args /* { 1795 char *path; 1796 } */ *uap; 1797{ 1798 1799 return (kern_unlink(td, uap->path, UIO_USERSPACE)); 1800} 1801 1802#ifndef _SYS_SYSPROTO_H_ 1803struct unlinkat_args { 1804 int fd; 1805 char *path; 1806 int flag; 1807}; 1808#endif 1809int 1810sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1811{ 1812 int flag = uap->flag; 1813 int fd = uap->fd; 1814 char *path = uap->path; 1815 1816 if (flag & ~AT_REMOVEDIR) 1817 return (EINVAL); 1818 1819 if (flag & AT_REMOVEDIR) 1820 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1821 else 1822 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1823} 1824 1825int 1826kern_unlink(struct thread *td, char *path, enum uio_seg pathseg) 1827{ 1828 1829 return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0)); 1830} 1831 1832int 1833kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1834 ino_t oldinum) 1835{ 1836 struct mount *mp; 1837 struct vnode *vp; 1838 struct nameidata nd; 1839 struct stat sb; 1840 cap_rights_t rights; 1841 int error; 1842 1843restart: 1844 bwillwrite(); 1845 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1846 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1847 if ((error = namei(&nd)) != 0) 1848 return (error == EINVAL ? EPERM : error); 1849 vp = nd.ni_vp; 1850 if (vp->v_type == VDIR && oldinum == 0) { 1851 error = EPERM; /* POSIX */ 1852 } else if (oldinum != 0 && 1853 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1854 sb.st_ino != oldinum) { 1855 error = EIDRM; /* Identifier removed */ 1856 } else { 1857 /* 1858 * The root of a mounted filesystem cannot be deleted. 1859 * 1860 * XXX: can this only be a VDIR case? 1861 */ 1862 if (vp->v_vflag & VV_ROOT) 1863 error = EBUSY; 1864 } 1865 if (error == 0) { 1866 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1867 NDFREE(&nd, NDF_ONLY_PNBUF); 1868 vput(nd.ni_dvp); 1869 if (vp == nd.ni_dvp) 1870 vrele(vp); 1871 else 1872 vput(vp); 1873 if ((error = vn_start_write(NULL, &mp, 1874 V_XSLEEP | PCATCH)) != 0) 1875 return (error); 1876 goto restart; 1877 } 1878#ifdef MAC 1879 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1880 &nd.ni_cnd); 1881 if (error != 0) 1882 goto out; 1883#endif 1884 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1885 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1886#ifdef MAC 1887out: 1888#endif 1889 vn_finished_write(mp); 1890 } 1891 NDFREE(&nd, NDF_ONLY_PNBUF); 1892 vput(nd.ni_dvp); 1893 if (vp == nd.ni_dvp) 1894 vrele(vp); 1895 else 1896 vput(vp); 1897 return (error); 1898} 1899 1900/* 1901 * Reposition read/write file offset. 1902 */ 1903#ifndef _SYS_SYSPROTO_H_ 1904struct lseek_args { 1905 int fd; 1906 int pad; 1907 off_t offset; 1908 int whence; 1909}; 1910#endif 1911int 1912sys_lseek(td, uap) 1913 struct thread *td; 1914 register struct lseek_args /* { 1915 int fd; 1916 int pad; 1917 off_t offset; 1918 int whence; 1919 } */ *uap; 1920{ 1921 struct file *fp; 1922 cap_rights_t rights; 1923 int error; 1924 1925 AUDIT_ARG_FD(uap->fd); 1926 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1927 if (error != 0) 1928 return (error); 1929 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1930 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1931 fdrop(fp, td); 1932 return (error); 1933} 1934 1935#if defined(COMPAT_43) 1936/* 1937 * Reposition read/write file offset. 1938 */ 1939#ifndef _SYS_SYSPROTO_H_ 1940struct olseek_args { 1941 int fd; 1942 long offset; 1943 int whence; 1944}; 1945#endif 1946int 1947olseek(td, uap) 1948 struct thread *td; 1949 register struct olseek_args /* { 1950 int fd; 1951 long offset; 1952 int whence; 1953 } */ *uap; 1954{ 1955 struct lseek_args /* { 1956 int fd; 1957 int pad; 1958 off_t offset; 1959 int whence; 1960 } */ nuap; 1961 1962 nuap.fd = uap->fd; 1963 nuap.offset = uap->offset; 1964 nuap.whence = uap->whence; 1965 return (sys_lseek(td, &nuap)); 1966} 1967#endif /* COMPAT_43 */ 1968 1969/* Version with the 'pad' argument */ 1970int 1971freebsd6_lseek(td, uap) 1972 struct thread *td; 1973 register struct freebsd6_lseek_args *uap; 1974{ 1975 struct lseek_args ouap; 1976 1977 ouap.fd = uap->fd; 1978 ouap.offset = uap->offset; 1979 ouap.whence = uap->whence; 1980 return (sys_lseek(td, &ouap)); 1981} 1982 1983/* 1984 * Check access permissions using passed credentials. 1985 */ 1986static int 1987vn_access(vp, user_flags, cred, td) 1988 struct vnode *vp; 1989 int user_flags; 1990 struct ucred *cred; 1991 struct thread *td; 1992{ 1993 accmode_t accmode; 1994 int error; 1995 1996 /* Flags == 0 means only check for existence. */ 1997 error = 0; 1998 if (user_flags) { 1999 accmode = 0; 2000 if (user_flags & R_OK) 2001 accmode |= VREAD; 2002 if (user_flags & W_OK) 2003 accmode |= VWRITE; 2004 if (user_flags & X_OK) 2005 accmode |= VEXEC; 2006#ifdef MAC 2007 error = mac_vnode_check_access(cred, vp, accmode); 2008 if (error != 0) 2009 return (error); 2010#endif 2011 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2012 error = VOP_ACCESS(vp, accmode, cred, td); 2013 } 2014 return (error); 2015} 2016 2017/* 2018 * Check access permissions using "real" credentials. 2019 */ 2020#ifndef _SYS_SYSPROTO_H_ 2021struct access_args { 2022 char *path; 2023 int amode; 2024}; 2025#endif 2026int 2027sys_access(td, uap) 2028 struct thread *td; 2029 register struct access_args /* { 2030 char *path; 2031 int amode; 2032 } */ *uap; 2033{ 2034 2035 return (kern_access(td, uap->path, UIO_USERSPACE, uap->amode)); 2036} 2037 2038#ifndef _SYS_SYSPROTO_H_ 2039struct faccessat_args { 2040 int dirfd; 2041 char *path; 2042 int amode; 2043 int flag; 2044} 2045#endif 2046int 2047sys_faccessat(struct thread *td, struct faccessat_args *uap) 2048{ 2049 2050 if (uap->flag & ~AT_EACCESS) 2051 return (EINVAL); 2052 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2053 uap->amode)); 2054} 2055 2056int 2057kern_access(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2058{ 2059 2060 return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, amode)); 2061} 2062 2063int 2064kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2065 int flag, int amode) 2066{ 2067 struct ucred *cred, *tmpcred; 2068 struct vnode *vp; 2069 struct nameidata nd; 2070 cap_rights_t rights; 2071 int error; 2072 2073 /* 2074 * Create and modify a temporary credential instead of one that 2075 * is potentially shared. 2076 */ 2077 if (!(flag & AT_EACCESS)) { 2078 cred = td->td_ucred; 2079 tmpcred = crdup(cred); 2080 tmpcred->cr_uid = cred->cr_ruid; 2081 tmpcred->cr_groups[0] = cred->cr_rgid; 2082 td->td_ucred = tmpcred; 2083 } else 2084 cred = tmpcred = td->td_ucred; 2085 AUDIT_ARG_VALUE(amode); 2086 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2087 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 2088 td); 2089 if ((error = namei(&nd)) != 0) 2090 goto out1; 2091 vp = nd.ni_vp; 2092 2093 error = vn_access(vp, amode, tmpcred, td); 2094 NDFREE(&nd, NDF_ONLY_PNBUF); 2095 vput(vp); 2096out1: 2097 if (!(flag & AT_EACCESS)) { 2098 td->td_ucred = cred; 2099 crfree(tmpcred); 2100 } 2101 return (error); 2102} 2103 2104/* 2105 * Check access permissions using "effective" credentials. 2106 */ 2107#ifndef _SYS_SYSPROTO_H_ 2108struct eaccess_args { 2109 char *path; 2110 int amode; 2111}; 2112#endif 2113int 2114sys_eaccess(td, uap) 2115 struct thread *td; 2116 register struct eaccess_args /* { 2117 char *path; 2118 int amode; 2119 } */ *uap; 2120{ 2121 2122 return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->amode)); 2123} 2124 2125int 2126kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2127{ 2128 2129 return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, amode)); 2130} 2131 2132#if defined(COMPAT_43) 2133/* 2134 * Get file status; this version follows links. 2135 */ 2136#ifndef _SYS_SYSPROTO_H_ 2137struct ostat_args { 2138 char *path; 2139 struct ostat *ub; 2140}; 2141#endif 2142int 2143ostat(td, uap) 2144 struct thread *td; 2145 register struct ostat_args /* { 2146 char *path; 2147 struct ostat *ub; 2148 } */ *uap; 2149{ 2150 struct stat sb; 2151 struct ostat osb; 2152 int error; 2153 2154 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2155 if (error != 0) 2156 return (error); 2157 cvtstat(&sb, &osb); 2158 return (copyout(&osb, uap->ub, sizeof (osb))); 2159} 2160 2161/* 2162 * Get file status; this version does not follow links. 2163 */ 2164#ifndef _SYS_SYSPROTO_H_ 2165struct olstat_args { 2166 char *path; 2167 struct ostat *ub; 2168}; 2169#endif 2170int 2171olstat(td, uap) 2172 struct thread *td; 2173 register struct olstat_args /* { 2174 char *path; 2175 struct ostat *ub; 2176 } */ *uap; 2177{ 2178 struct stat sb; 2179 struct ostat osb; 2180 int error; 2181 2182 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2183 if (error != 0) 2184 return (error); 2185 cvtstat(&sb, &osb); 2186 return (copyout(&osb, uap->ub, sizeof (osb))); 2187} 2188 2189/* 2190 * Convert from an old to a new stat structure. 2191 */ 2192void 2193cvtstat(st, ost) 2194 struct stat *st; 2195 struct ostat *ost; 2196{ 2197 2198 ost->st_dev = st->st_dev; 2199 ost->st_ino = st->st_ino; 2200 ost->st_mode = st->st_mode; 2201 ost->st_nlink = st->st_nlink; 2202 ost->st_uid = st->st_uid; 2203 ost->st_gid = st->st_gid; 2204 ost->st_rdev = st->st_rdev; 2205 if (st->st_size < (quad_t)1 << 32) 2206 ost->st_size = st->st_size; 2207 else 2208 ost->st_size = -2; 2209 ost->st_atim = st->st_atim; 2210 ost->st_mtim = st->st_mtim; 2211 ost->st_ctim = st->st_ctim; 2212 ost->st_blksize = st->st_blksize; 2213 ost->st_blocks = st->st_blocks; 2214 ost->st_flags = st->st_flags; 2215 ost->st_gen = st->st_gen; 2216} 2217#endif /* COMPAT_43 */ 2218 2219/* 2220 * Get file status; this version follows links. 2221 */ 2222#ifndef _SYS_SYSPROTO_H_ 2223struct stat_args { 2224 char *path; 2225 struct stat *ub; 2226}; 2227#endif 2228int 2229sys_stat(td, uap) 2230 struct thread *td; 2231 register struct stat_args /* { 2232 char *path; 2233 struct stat *ub; 2234 } */ *uap; 2235{ 2236 struct stat sb; 2237 int error; 2238 2239 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2240 if (error == 0) 2241 error = copyout(&sb, uap->ub, sizeof (sb)); 2242 return (error); 2243} 2244 2245#ifndef _SYS_SYSPROTO_H_ 2246struct fstatat_args { 2247 int fd; 2248 char *path; 2249 struct stat *buf; 2250 int flag; 2251} 2252#endif 2253int 2254sys_fstatat(struct thread *td, struct fstatat_args *uap) 2255{ 2256 struct stat sb; 2257 int error; 2258 2259 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2260 UIO_USERSPACE, &sb); 2261 if (error == 0) 2262 error = copyout(&sb, uap->buf, sizeof (sb)); 2263 return (error); 2264} 2265 2266int 2267kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2268{ 2269 2270 return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp)); 2271} 2272 2273int 2274kern_statat(struct thread *td, int flag, int fd, char *path, 2275 enum uio_seg pathseg, struct stat *sbp) 2276{ 2277 2278 return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL)); 2279} 2280 2281int 2282kern_statat_vnhook(struct thread *td, int flag, int fd, char *path, 2283 enum uio_seg pathseg, struct stat *sbp, 2284 void (*hook)(struct vnode *vp, struct stat *sbp)) 2285{ 2286 struct nameidata nd; 2287 struct stat sb; 2288 cap_rights_t rights; 2289 int error; 2290 2291 if (flag & ~AT_SYMLINK_NOFOLLOW) 2292 return (EINVAL); 2293 2294 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2295 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2296 cap_rights_init(&rights, CAP_FSTAT), td); 2297 2298 if ((error = namei(&nd)) != 0) 2299 return (error); 2300 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2301 if (error == 0) { 2302 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode); 2303 if (S_ISREG(sb.st_mode)) 2304 SDT_PROBE2(vfs, , stat, reg, path, pathseg); 2305 if (__predict_false(hook != NULL)) 2306 hook(nd.ni_vp, &sb); 2307 } 2308 NDFREE(&nd, NDF_ONLY_PNBUF); 2309 vput(nd.ni_vp); 2310 if (error != 0) 2311 return (error); 2312 *sbp = sb; 2313#ifdef KTRACE 2314 if (KTRPOINT(td, KTR_STRUCT)) 2315 ktrstat(&sb); 2316#endif 2317 return (0); 2318} 2319 2320/* 2321 * Get file status; this version does not follow links. 2322 */ 2323#ifndef _SYS_SYSPROTO_H_ 2324struct lstat_args { 2325 char *path; 2326 struct stat *ub; 2327}; 2328#endif 2329int 2330sys_lstat(td, uap) 2331 struct thread *td; 2332 register struct lstat_args /* { 2333 char *path; 2334 struct stat *ub; 2335 } */ *uap; 2336{ 2337 struct stat sb; 2338 int error; 2339 2340 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2341 if (error == 0) 2342 error = copyout(&sb, uap->ub, sizeof (sb)); 2343 return (error); 2344} 2345 2346int 2347kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2348{ 2349 2350 return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg, 2351 sbp)); 2352} 2353 2354/* 2355 * Implementation of the NetBSD [l]stat() functions. 2356 */ 2357void 2358cvtnstat(sb, nsb) 2359 struct stat *sb; 2360 struct nstat *nsb; 2361{ 2362 2363 bzero(nsb, sizeof *nsb); 2364 nsb->st_dev = sb->st_dev; 2365 nsb->st_ino = sb->st_ino; 2366 nsb->st_mode = sb->st_mode; 2367 nsb->st_nlink = sb->st_nlink; 2368 nsb->st_uid = sb->st_uid; 2369 nsb->st_gid = sb->st_gid; 2370 nsb->st_rdev = sb->st_rdev; 2371 nsb->st_atim = sb->st_atim; 2372 nsb->st_mtim = sb->st_mtim; 2373 nsb->st_ctim = sb->st_ctim; 2374 nsb->st_size = sb->st_size; 2375 nsb->st_blocks = sb->st_blocks; 2376 nsb->st_blksize = sb->st_blksize; 2377 nsb->st_flags = sb->st_flags; 2378 nsb->st_gen = sb->st_gen; 2379 nsb->st_birthtim = sb->st_birthtim; 2380} 2381 2382#ifndef _SYS_SYSPROTO_H_ 2383struct nstat_args { 2384 char *path; 2385 struct nstat *ub; 2386}; 2387#endif 2388int 2389sys_nstat(td, uap) 2390 struct thread *td; 2391 register struct nstat_args /* { 2392 char *path; 2393 struct nstat *ub; 2394 } */ *uap; 2395{ 2396 struct stat sb; 2397 struct nstat nsb; 2398 int error; 2399 2400 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2401 if (error != 0) 2402 return (error); 2403 cvtnstat(&sb, &nsb); 2404 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2405} 2406 2407/* 2408 * NetBSD lstat. Get file status; this version does not follow links. 2409 */ 2410#ifndef _SYS_SYSPROTO_H_ 2411struct lstat_args { 2412 char *path; 2413 struct stat *ub; 2414}; 2415#endif 2416int 2417sys_nlstat(td, uap) 2418 struct thread *td; 2419 register struct nlstat_args /* { 2420 char *path; 2421 struct nstat *ub; 2422 } */ *uap; 2423{ 2424 struct stat sb; 2425 struct nstat nsb; 2426 int error; 2427 2428 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2429 if (error != 0) 2430 return (error); 2431 cvtnstat(&sb, &nsb); 2432 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2433} 2434 2435/* 2436 * Get configurable pathname variables. 2437 */ 2438#ifndef _SYS_SYSPROTO_H_ 2439struct pathconf_args { 2440 char *path; 2441 int name; 2442}; 2443#endif 2444int 2445sys_pathconf(td, uap) 2446 struct thread *td; 2447 register struct pathconf_args /* { 2448 char *path; 2449 int name; 2450 } */ *uap; 2451{ 2452 2453 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2454} 2455 2456#ifndef _SYS_SYSPROTO_H_ 2457struct lpathconf_args { 2458 char *path; 2459 int name; 2460}; 2461#endif 2462int 2463sys_lpathconf(td, uap) 2464 struct thread *td; 2465 register struct lpathconf_args /* { 2466 char *path; 2467 int name; 2468 } */ *uap; 2469{ 2470 2471 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2472 NOFOLLOW)); 2473} 2474 2475int 2476kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2477 u_long flags) 2478{ 2479 struct nameidata nd; 2480 int error; 2481 2482 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2483 pathseg, path, td); 2484 if ((error = namei(&nd)) != 0) 2485 return (error); 2486 NDFREE(&nd, NDF_ONLY_PNBUF); 2487 2488 /* If asynchronous I/O is available, it works for all files. */ 2489 if (name == _PC_ASYNC_IO) 2490 td->td_retval[0] = async_io_version; 2491 else 2492 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2493 vput(nd.ni_vp); 2494 return (error); 2495} 2496 2497/* 2498 * Return target name of a symbolic link. 2499 */ 2500#ifndef _SYS_SYSPROTO_H_ 2501struct readlink_args { 2502 char *path; 2503 char *buf; 2504 size_t count; 2505}; 2506#endif 2507int 2508sys_readlink(td, uap) 2509 struct thread *td; 2510 register struct readlink_args /* { 2511 char *path; 2512 char *buf; 2513 size_t count; 2514 } */ *uap; 2515{ 2516 2517 return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf, 2518 UIO_USERSPACE, uap->count)); 2519} 2520#ifndef _SYS_SYSPROTO_H_ 2521struct readlinkat_args { 2522 int fd; 2523 char *path; 2524 char *buf; 2525 size_t bufsize; 2526}; 2527#endif 2528int 2529sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2530{ 2531 2532 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2533 uap->buf, UIO_USERSPACE, uap->bufsize)); 2534} 2535 2536int 2537kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf, 2538 enum uio_seg bufseg, size_t count) 2539{ 2540 2541 return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg, 2542 count)); 2543} 2544 2545int 2546kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2547 char *buf, enum uio_seg bufseg, size_t count) 2548{ 2549 struct vnode *vp; 2550 struct iovec aiov; 2551 struct uio auio; 2552 struct nameidata nd; 2553 int error; 2554 2555 if (count > IOSIZE_MAX) 2556 return (EINVAL); 2557 2558 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2559 pathseg, path, fd, td); 2560 2561 if ((error = namei(&nd)) != 0) 2562 return (error); 2563 NDFREE(&nd, NDF_ONLY_PNBUF); 2564 vp = nd.ni_vp; 2565#ifdef MAC 2566 error = mac_vnode_check_readlink(td->td_ucred, vp); 2567 if (error != 0) { 2568 vput(vp); 2569 return (error); 2570 } 2571#endif 2572 if (vp->v_type != VLNK) 2573 error = EINVAL; 2574 else { 2575 aiov.iov_base = buf; 2576 aiov.iov_len = count; 2577 auio.uio_iov = &aiov; 2578 auio.uio_iovcnt = 1; 2579 auio.uio_offset = 0; 2580 auio.uio_rw = UIO_READ; 2581 auio.uio_segflg = bufseg; 2582 auio.uio_td = td; 2583 auio.uio_resid = count; 2584 error = VOP_READLINK(vp, &auio, td->td_ucred); 2585 td->td_retval[0] = count - auio.uio_resid; 2586 } 2587 vput(vp); 2588 return (error); 2589} 2590 2591/* 2592 * Common implementation code for chflags() and fchflags(). 2593 */ 2594static int 2595setfflags(td, vp, flags) 2596 struct thread *td; 2597 struct vnode *vp; 2598 u_long flags; 2599{ 2600 struct mount *mp; 2601 struct vattr vattr; 2602 int error; 2603 2604 /* We can't support the value matching VNOVAL. */ 2605 if (flags == VNOVAL) 2606 return (EOPNOTSUPP); 2607 2608 /* 2609 * Prevent non-root users from setting flags on devices. When 2610 * a device is reused, users can retain ownership of the device 2611 * if they are allowed to set flags and programs assume that 2612 * chown can't fail when done as root. 2613 */ 2614 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2615 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2616 if (error != 0) 2617 return (error); 2618 } 2619 2620 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2621 return (error); 2622 VATTR_NULL(&vattr); 2623 vattr.va_flags = flags; 2624 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2625#ifdef MAC 2626 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2627 if (error == 0) 2628#endif 2629 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2630 VOP_UNLOCK(vp, 0); 2631 vn_finished_write(mp); 2632 return (error); 2633} 2634 2635/* 2636 * Change flags of a file given a path name. 2637 */ 2638#ifndef _SYS_SYSPROTO_H_ 2639struct chflags_args { 2640 const char *path; 2641 u_long flags; 2642}; 2643#endif 2644int 2645sys_chflags(td, uap) 2646 struct thread *td; 2647 register struct chflags_args /* { 2648 const char *path; 2649 u_long flags; 2650 } */ *uap; 2651{ 2652 2653 return (kern_chflags(td, uap->path, UIO_USERSPACE, uap->flags)); 2654} 2655 2656#ifndef _SYS_SYSPROTO_H_ 2657struct chflagsat_args { 2658 int fd; 2659 const char *path; 2660 u_long flags; 2661 int atflag; 2662} 2663#endif 2664int 2665sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2666{ 2667 int fd = uap->fd; 2668 const char *path = uap->path; 2669 u_long flags = uap->flags; 2670 int atflag = uap->atflag; 2671 2672 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2673 return (EINVAL); 2674 2675 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2676} 2677 2678static int 2679kern_chflags(struct thread *td, const char *path, enum uio_seg pathseg, 2680 u_long flags) 2681{ 2682 2683 return (kern_chflagsat(td, AT_FDCWD, path, pathseg, flags, 0)); 2684} 2685 2686/* 2687 * Same as chflags() but doesn't follow symlinks. 2688 */ 2689int 2690sys_lchflags(td, uap) 2691 struct thread *td; 2692 register struct lchflags_args /* { 2693 const char *path; 2694 u_long flags; 2695 } */ *uap; 2696{ 2697 2698 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2699 uap->flags, AT_SYMLINK_NOFOLLOW)); 2700} 2701 2702static int 2703kern_chflagsat(struct thread *td, int fd, const char *path, 2704 enum uio_seg pathseg, u_long flags, int atflag) 2705{ 2706 struct nameidata nd; 2707 cap_rights_t rights; 2708 int error, follow; 2709 2710 AUDIT_ARG_FFLAGS(flags); 2711 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2712 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2713 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2714 if ((error = namei(&nd)) != 0) 2715 return (error); 2716 NDFREE(&nd, NDF_ONLY_PNBUF); 2717 error = setfflags(td, nd.ni_vp, flags); 2718 vrele(nd.ni_vp); 2719 return (error); 2720} 2721 2722/* 2723 * Change flags of a file given a file descriptor. 2724 */ 2725#ifndef _SYS_SYSPROTO_H_ 2726struct fchflags_args { 2727 int fd; 2728 u_long flags; 2729}; 2730#endif 2731int 2732sys_fchflags(td, uap) 2733 struct thread *td; 2734 register struct fchflags_args /* { 2735 int fd; 2736 u_long flags; 2737 } */ *uap; 2738{ 2739 struct file *fp; 2740 cap_rights_t rights; 2741 int error; 2742 2743 AUDIT_ARG_FD(uap->fd); 2744 AUDIT_ARG_FFLAGS(uap->flags); 2745 error = getvnode(td->td_proc->p_fd, uap->fd, 2746 cap_rights_init(&rights, CAP_FCHFLAGS), &fp); 2747 if (error != 0) 2748 return (error); 2749#ifdef AUDIT 2750 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2751 AUDIT_ARG_VNODE1(fp->f_vnode); 2752 VOP_UNLOCK(fp->f_vnode, 0); 2753#endif 2754 error = setfflags(td, fp->f_vnode, uap->flags); 2755 fdrop(fp, td); 2756 return (error); 2757} 2758 2759/* 2760 * Common implementation code for chmod(), lchmod() and fchmod(). 2761 */ 2762int 2763setfmode(td, cred, vp, mode) 2764 struct thread *td; 2765 struct ucred *cred; 2766 struct vnode *vp; 2767 int mode; 2768{ 2769 struct mount *mp; 2770 struct vattr vattr; 2771 int error; 2772 2773 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2774 return (error); 2775 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2776 VATTR_NULL(&vattr); 2777 vattr.va_mode = mode & ALLPERMS; 2778#ifdef MAC 2779 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2780 if (error == 0) 2781#endif 2782 error = VOP_SETATTR(vp, &vattr, cred); 2783 VOP_UNLOCK(vp, 0); 2784 vn_finished_write(mp); 2785 return (error); 2786} 2787 2788/* 2789 * Change mode of a file given path name. 2790 */ 2791#ifndef _SYS_SYSPROTO_H_ 2792struct chmod_args { 2793 char *path; 2794 int mode; 2795}; 2796#endif 2797int 2798sys_chmod(td, uap) 2799 struct thread *td; 2800 register struct chmod_args /* { 2801 char *path; 2802 int mode; 2803 } */ *uap; 2804{ 2805 2806 return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode)); 2807} 2808 2809#ifndef _SYS_SYSPROTO_H_ 2810struct fchmodat_args { 2811 int dirfd; 2812 char *path; 2813 mode_t mode; 2814 int flag; 2815} 2816#endif 2817int 2818sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2819{ 2820 int flag = uap->flag; 2821 int fd = uap->fd; 2822 char *path = uap->path; 2823 mode_t mode = uap->mode; 2824 2825 if (flag & ~AT_SYMLINK_NOFOLLOW) 2826 return (EINVAL); 2827 2828 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2829} 2830 2831int 2832kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode) 2833{ 2834 2835 return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0)); 2836} 2837 2838/* 2839 * Change mode of a file given path name (don't follow links.) 2840 */ 2841#ifndef _SYS_SYSPROTO_H_ 2842struct lchmod_args { 2843 char *path; 2844 int mode; 2845}; 2846#endif 2847int 2848sys_lchmod(td, uap) 2849 struct thread *td; 2850 register struct lchmod_args /* { 2851 char *path; 2852 int mode; 2853 } */ *uap; 2854{ 2855 2856 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2857 uap->mode, AT_SYMLINK_NOFOLLOW)); 2858} 2859 2860int 2861kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2862 mode_t mode, int flag) 2863{ 2864 struct nameidata nd; 2865 cap_rights_t rights; 2866 int error, follow; 2867 2868 AUDIT_ARG_MODE(mode); 2869 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2870 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2871 cap_rights_init(&rights, CAP_FCHMOD), td); 2872 if ((error = namei(&nd)) != 0) 2873 return (error); 2874 NDFREE(&nd, NDF_ONLY_PNBUF); 2875 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2876 vrele(nd.ni_vp); 2877 return (error); 2878} 2879 2880/* 2881 * Change mode of a file given a file descriptor. 2882 */ 2883#ifndef _SYS_SYSPROTO_H_ 2884struct fchmod_args { 2885 int fd; 2886 int mode; 2887}; 2888#endif 2889int 2890sys_fchmod(struct thread *td, struct fchmod_args *uap) 2891{ 2892 struct file *fp; 2893 cap_rights_t rights; 2894 int error; 2895 2896 AUDIT_ARG_FD(uap->fd); 2897 AUDIT_ARG_MODE(uap->mode); 2898 2899 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2900 if (error != 0) 2901 return (error); 2902 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2903 fdrop(fp, td); 2904 return (error); 2905} 2906 2907/* 2908 * Common implementation for chown(), lchown(), and fchown() 2909 */ 2910int 2911setfown(td, cred, vp, uid, gid) 2912 struct thread *td; 2913 struct ucred *cred; 2914 struct vnode *vp; 2915 uid_t uid; 2916 gid_t gid; 2917{ 2918 struct mount *mp; 2919 struct vattr vattr; 2920 int error; 2921 2922 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2923 return (error); 2924 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2925 VATTR_NULL(&vattr); 2926 vattr.va_uid = uid; 2927 vattr.va_gid = gid; 2928#ifdef MAC 2929 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2930 vattr.va_gid); 2931 if (error == 0) 2932#endif 2933 error = VOP_SETATTR(vp, &vattr, cred); 2934 VOP_UNLOCK(vp, 0); 2935 vn_finished_write(mp); 2936 return (error); 2937} 2938 2939/* 2940 * Set ownership given a path name. 2941 */ 2942#ifndef _SYS_SYSPROTO_H_ 2943struct chown_args { 2944 char *path; 2945 int uid; 2946 int gid; 2947}; 2948#endif 2949int 2950sys_chown(td, uap) 2951 struct thread *td; 2952 register struct chown_args /* { 2953 char *path; 2954 int uid; 2955 int gid; 2956 } */ *uap; 2957{ 2958 2959 return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 2960} 2961 2962#ifndef _SYS_SYSPROTO_H_ 2963struct fchownat_args { 2964 int fd; 2965 const char * path; 2966 uid_t uid; 2967 gid_t gid; 2968 int flag; 2969}; 2970#endif 2971int 2972sys_fchownat(struct thread *td, struct fchownat_args *uap) 2973{ 2974 int flag; 2975 2976 flag = uap->flag; 2977 if (flag & ~AT_SYMLINK_NOFOLLOW) 2978 return (EINVAL); 2979 2980 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2981 uap->gid, uap->flag)); 2982} 2983 2984int 2985kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 2986 int gid) 2987{ 2988 2989 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0)); 2990} 2991 2992int 2993kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2994 int uid, int gid, int flag) 2995{ 2996 struct nameidata nd; 2997 cap_rights_t rights; 2998 int error, follow; 2999 3000 AUDIT_ARG_OWNER(uid, gid); 3001 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3002 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 3003 cap_rights_init(&rights, CAP_FCHOWN), td); 3004 3005 if ((error = namei(&nd)) != 0) 3006 return (error); 3007 NDFREE(&nd, NDF_ONLY_PNBUF); 3008 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 3009 vrele(nd.ni_vp); 3010 return (error); 3011} 3012 3013/* 3014 * Set ownership given a path name, do not cross symlinks. 3015 */ 3016#ifndef _SYS_SYSPROTO_H_ 3017struct lchown_args { 3018 char *path; 3019 int uid; 3020 int gid; 3021}; 3022#endif 3023int 3024sys_lchown(td, uap) 3025 struct thread *td; 3026 register struct lchown_args /* { 3027 char *path; 3028 int uid; 3029 int gid; 3030 } */ *uap; 3031{ 3032 3033 return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 3034} 3035 3036int 3037kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 3038 int gid) 3039{ 3040 3041 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 3042 AT_SYMLINK_NOFOLLOW)); 3043} 3044 3045/* 3046 * Set ownership given a file descriptor. 3047 */ 3048#ifndef _SYS_SYSPROTO_H_ 3049struct fchown_args { 3050 int fd; 3051 int uid; 3052 int gid; 3053}; 3054#endif 3055int 3056sys_fchown(td, uap) 3057 struct thread *td; 3058 register struct fchown_args /* { 3059 int fd; 3060 int uid; 3061 int gid; 3062 } */ *uap; 3063{ 3064 struct file *fp; 3065 cap_rights_t rights; 3066 int error; 3067 3068 AUDIT_ARG_FD(uap->fd); 3069 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3070 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 3071 if (error != 0) 3072 return (error); 3073 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3074 fdrop(fp, td); 3075 return (error); 3076} 3077 3078/* 3079 * Common implementation code for utimes(), lutimes(), and futimes(). 3080 */ 3081static int 3082getutimes(usrtvp, tvpseg, tsp) 3083 const struct timeval *usrtvp; 3084 enum uio_seg tvpseg; 3085 struct timespec *tsp; 3086{ 3087 struct timeval tv[2]; 3088 const struct timeval *tvp; 3089 int error; 3090 3091 if (usrtvp == NULL) { 3092 vfs_timestamp(&tsp[0]); 3093 tsp[1] = tsp[0]; 3094 } else { 3095 if (tvpseg == UIO_SYSSPACE) { 3096 tvp = usrtvp; 3097 } else { 3098 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3099 return (error); 3100 tvp = tv; 3101 } 3102 3103 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3104 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3105 return (EINVAL); 3106 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3107 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3108 } 3109 return (0); 3110} 3111 3112/* 3113 * Common implementation code for utimes(), lutimes(), and futimes(). 3114 */ 3115static int 3116setutimes(td, vp, ts, numtimes, nullflag) 3117 struct thread *td; 3118 struct vnode *vp; 3119 const struct timespec *ts; 3120 int numtimes; 3121 int nullflag; 3122{ 3123 struct mount *mp; 3124 struct vattr vattr; 3125 int error, setbirthtime; 3126 3127 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3128 return (error); 3129 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3130 setbirthtime = 0; 3131 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3132 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3133 setbirthtime = 1; 3134 VATTR_NULL(&vattr); 3135 vattr.va_atime = ts[0]; 3136 vattr.va_mtime = ts[1]; 3137 if (setbirthtime) 3138 vattr.va_birthtime = ts[1]; 3139 if (numtimes > 2) 3140 vattr.va_birthtime = ts[2]; 3141 if (nullflag) 3142 vattr.va_vaflags |= VA_UTIMES_NULL; 3143#ifdef MAC 3144 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3145 vattr.va_mtime); 3146#endif 3147 if (error == 0) 3148 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3149 VOP_UNLOCK(vp, 0); 3150 vn_finished_write(mp); 3151 return (error); 3152} 3153 3154/* 3155 * Set the access and modification times of a file. 3156 */ 3157#ifndef _SYS_SYSPROTO_H_ 3158struct utimes_args { 3159 char *path; 3160 struct timeval *tptr; 3161}; 3162#endif 3163int 3164sys_utimes(td, uap) 3165 struct thread *td; 3166 register struct utimes_args /* { 3167 char *path; 3168 struct timeval *tptr; 3169 } */ *uap; 3170{ 3171 3172 return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3173 UIO_USERSPACE)); 3174} 3175 3176#ifndef _SYS_SYSPROTO_H_ 3177struct futimesat_args { 3178 int fd; 3179 const char * path; 3180 const struct timeval * times; 3181}; 3182#endif 3183int 3184sys_futimesat(struct thread *td, struct futimesat_args *uap) 3185{ 3186 3187 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3188 uap->times, UIO_USERSPACE)); 3189} 3190 3191int 3192kern_utimes(struct thread *td, char *path, enum uio_seg pathseg, 3193 struct timeval *tptr, enum uio_seg tptrseg) 3194{ 3195 3196 return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg)); 3197} 3198 3199int 3200kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3201 struct timeval *tptr, enum uio_seg tptrseg) 3202{ 3203 struct nameidata nd; 3204 struct timespec ts[2]; 3205 cap_rights_t rights; 3206 int error; 3207 3208 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3209 return (error); 3210 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3211 cap_rights_init(&rights, CAP_FUTIMES), td); 3212 3213 if ((error = namei(&nd)) != 0) 3214 return (error); 3215 NDFREE(&nd, NDF_ONLY_PNBUF); 3216 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3217 vrele(nd.ni_vp); 3218 return (error); 3219} 3220 3221/* 3222 * Set the access and modification times of a file. 3223 */ 3224#ifndef _SYS_SYSPROTO_H_ 3225struct lutimes_args { 3226 char *path; 3227 struct timeval *tptr; 3228}; 3229#endif 3230int 3231sys_lutimes(td, uap) 3232 struct thread *td; 3233 register struct lutimes_args /* { 3234 char *path; 3235 struct timeval *tptr; 3236 } */ *uap; 3237{ 3238 3239 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3240 UIO_USERSPACE)); 3241} 3242 3243int 3244kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3245 struct timeval *tptr, enum uio_seg tptrseg) 3246{ 3247 struct timespec ts[2]; 3248 struct nameidata nd; 3249 int error; 3250 3251 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3252 return (error); 3253 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3254 if ((error = namei(&nd)) != 0) 3255 return (error); 3256 NDFREE(&nd, NDF_ONLY_PNBUF); 3257 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3258 vrele(nd.ni_vp); 3259 return (error); 3260} 3261 3262/* 3263 * Set the access and modification times of a file. 3264 */ 3265#ifndef _SYS_SYSPROTO_H_ 3266struct futimes_args { 3267 int fd; 3268 struct timeval *tptr; 3269}; 3270#endif 3271int 3272sys_futimes(td, uap) 3273 struct thread *td; 3274 register struct futimes_args /* { 3275 int fd; 3276 struct timeval *tptr; 3277 } */ *uap; 3278{ 3279 3280 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3281} 3282 3283int 3284kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3285 enum uio_seg tptrseg) 3286{ 3287 struct timespec ts[2]; 3288 struct file *fp; 3289 cap_rights_t rights; 3290 int error; 3291 3292 AUDIT_ARG_FD(fd); 3293 error = getutimes(tptr, tptrseg, ts); 3294 if (error != 0) 3295 return (error); 3296 error = getvnode(td->td_proc->p_fd, fd, 3297 cap_rights_init(&rights, CAP_FUTIMES), &fp); 3298 if (error != 0) 3299 return (error); 3300#ifdef AUDIT 3301 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3302 AUDIT_ARG_VNODE1(fp->f_vnode); 3303 VOP_UNLOCK(fp->f_vnode, 0); 3304#endif 3305 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3306 fdrop(fp, td); 3307 return (error); 3308} 3309 3310/* 3311 * Truncate a file given its path name. 3312 */ 3313#ifndef _SYS_SYSPROTO_H_ 3314struct truncate_args { 3315 char *path; 3316 int pad; 3317 off_t length; 3318}; 3319#endif 3320int 3321sys_truncate(td, uap) 3322 struct thread *td; 3323 register struct truncate_args /* { 3324 char *path; 3325 int pad; 3326 off_t length; 3327 } */ *uap; 3328{ 3329 3330 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3331} 3332 3333int 3334kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3335{ 3336 struct mount *mp; 3337 struct vnode *vp; 3338 void *rl_cookie; 3339 struct vattr vattr; 3340 struct nameidata nd; 3341 int error; 3342 3343 if (length < 0) 3344 return(EINVAL); 3345 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3346 if ((error = namei(&nd)) != 0) 3347 return (error); 3348 vp = nd.ni_vp; 3349 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3350 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3351 vn_rangelock_unlock(vp, rl_cookie); 3352 vrele(vp); 3353 return (error); 3354 } 3355 NDFREE(&nd, NDF_ONLY_PNBUF); 3356 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3357 if (vp->v_type == VDIR) 3358 error = EISDIR; 3359#ifdef MAC 3360 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3361 } 3362#endif 3363 else if ((error = vn_writechk(vp)) == 0 && 3364 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3365 VATTR_NULL(&vattr); 3366 vattr.va_size = length; 3367 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3368 } 3369 VOP_UNLOCK(vp, 0); 3370 vn_finished_write(mp); 3371 vn_rangelock_unlock(vp, rl_cookie); 3372 vrele(vp); 3373 return (error); 3374} 3375 3376#if defined(COMPAT_43) 3377/* 3378 * Truncate a file given its path name. 3379 */ 3380#ifndef _SYS_SYSPROTO_H_ 3381struct otruncate_args { 3382 char *path; 3383 long length; 3384}; 3385#endif 3386int 3387otruncate(td, uap) 3388 struct thread *td; 3389 register struct otruncate_args /* { 3390 char *path; 3391 long length; 3392 } */ *uap; 3393{ 3394 struct truncate_args /* { 3395 char *path; 3396 int pad; 3397 off_t length; 3398 } */ nuap; 3399 3400 nuap.path = uap->path; 3401 nuap.length = uap->length; 3402 return (sys_truncate(td, &nuap)); 3403} 3404#endif /* COMPAT_43 */ 3405 3406/* Versions with the pad argument */ 3407int 3408freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3409{ 3410 struct truncate_args ouap; 3411 3412 ouap.path = uap->path; 3413 ouap.length = uap->length; 3414 return (sys_truncate(td, &ouap)); 3415} 3416 3417int 3418freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3419{ 3420 struct ftruncate_args ouap; 3421 3422 ouap.fd = uap->fd; 3423 ouap.length = uap->length; 3424 return (sys_ftruncate(td, &ouap)); 3425} 3426 3427/* 3428 * Sync an open file. 3429 */ 3430#ifndef _SYS_SYSPROTO_H_ 3431struct fsync_args { 3432 int fd; 3433}; 3434#endif 3435int 3436sys_fsync(td, uap) 3437 struct thread *td; 3438 struct fsync_args /* { 3439 int fd; 3440 } */ *uap; 3441{ 3442 struct vnode *vp; 3443 struct mount *mp; 3444 struct file *fp; 3445 cap_rights_t rights; 3446 int error, lock_flags; 3447 3448 AUDIT_ARG_FD(uap->fd); 3449 error = getvnode(td->td_proc->p_fd, uap->fd, 3450 cap_rights_init(&rights, CAP_FSYNC), &fp); 3451 if (error != 0) 3452 return (error); 3453 vp = fp->f_vnode; 3454 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3455 if (error != 0) 3456 goto drop; 3457 if (MNT_SHARED_WRITES(mp) || 3458 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3459 lock_flags = LK_SHARED; 3460 } else { 3461 lock_flags = LK_EXCLUSIVE; 3462 } 3463 vn_lock(vp, lock_flags | LK_RETRY); 3464 AUDIT_ARG_VNODE1(vp); 3465 if (vp->v_object != NULL) { 3466 VM_OBJECT_WLOCK(vp->v_object); 3467 vm_object_page_clean(vp->v_object, 0, 0, 0); 3468 VM_OBJECT_WUNLOCK(vp->v_object); 3469 } 3470 error = VOP_FSYNC(vp, MNT_WAIT, td); 3471 3472 VOP_UNLOCK(vp, 0); 3473 vn_finished_write(mp); 3474drop: 3475 fdrop(fp, td); 3476 return (error); 3477} 3478 3479/* 3480 * Rename files. Source and destination must either both be directories, or 3481 * both not be directories. If target is a directory, it must be empty. 3482 */ 3483#ifndef _SYS_SYSPROTO_H_ 3484struct rename_args { 3485 char *from; 3486 char *to; 3487}; 3488#endif 3489int 3490sys_rename(td, uap) 3491 struct thread *td; 3492 register struct rename_args /* { 3493 char *from; 3494 char *to; 3495 } */ *uap; 3496{ 3497 3498 return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE)); 3499} 3500 3501#ifndef _SYS_SYSPROTO_H_ 3502struct renameat_args { 3503 int oldfd; 3504 char *old; 3505 int newfd; 3506 char *new; 3507}; 3508#endif 3509int 3510sys_renameat(struct thread *td, struct renameat_args *uap) 3511{ 3512 3513 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3514 UIO_USERSPACE)); 3515} 3516 3517int 3518kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg) 3519{ 3520 3521 return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg)); 3522} 3523 3524int 3525kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3526 enum uio_seg pathseg) 3527{ 3528 struct mount *mp = NULL; 3529 struct vnode *tvp, *fvp, *tdvp; 3530 struct nameidata fromnd, tond; 3531 cap_rights_t rights; 3532 int error; 3533 3534again: 3535 bwillwrite(); 3536#ifdef MAC 3537 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3538 AUDITVNODE1, pathseg, old, oldfd, 3539 cap_rights_init(&rights, CAP_RENAMEAT), td); 3540#else 3541 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3542 pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT), td); 3543#endif 3544 3545 if ((error = namei(&fromnd)) != 0) 3546 return (error); 3547#ifdef MAC 3548 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3549 fromnd.ni_vp, &fromnd.ni_cnd); 3550 VOP_UNLOCK(fromnd.ni_dvp, 0); 3551 if (fromnd.ni_dvp != fromnd.ni_vp) 3552 VOP_UNLOCK(fromnd.ni_vp, 0); 3553#endif 3554 fvp = fromnd.ni_vp; 3555 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3556 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3557 cap_rights_init(&rights, CAP_LINKAT), td); 3558 if (fromnd.ni_vp->v_type == VDIR) 3559 tond.ni_cnd.cn_flags |= WILLBEDIR; 3560 if ((error = namei(&tond)) != 0) { 3561 /* Translate error code for rename("dir1", "dir2/."). */ 3562 if (error == EISDIR && fvp->v_type == VDIR) 3563 error = EINVAL; 3564 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3565 vrele(fromnd.ni_dvp); 3566 vrele(fvp); 3567 goto out1; 3568 } 3569 tdvp = tond.ni_dvp; 3570 tvp = tond.ni_vp; 3571 error = vn_start_write(fvp, &mp, V_NOWAIT); 3572 if (error != 0) { 3573 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3574 NDFREE(&tond, NDF_ONLY_PNBUF); 3575 if (tvp != NULL) 3576 vput(tvp); 3577 if (tdvp == tvp) 3578 vrele(tdvp); 3579 else 3580 vput(tdvp); 3581 vrele(fromnd.ni_dvp); 3582 vrele(fvp); 3583 vrele(tond.ni_startdir); 3584 if (fromnd.ni_startdir != NULL) 3585 vrele(fromnd.ni_startdir); 3586 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3587 if (error != 0) 3588 return (error); 3589 goto again; 3590 } 3591 if (tvp != NULL) { 3592 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3593 error = ENOTDIR; 3594 goto out; 3595 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3596 error = EISDIR; 3597 goto out; 3598 } 3599#ifdef CAPABILITIES 3600 if (newfd != AT_FDCWD) { 3601 /* 3602 * If the target already exists we require CAP_UNLINKAT 3603 * from 'newfd'. 3604 */ 3605 error = cap_check(&tond.ni_filecaps.fc_rights, 3606 cap_rights_init(&rights, CAP_UNLINKAT)); 3607 if (error != 0) 3608 goto out; 3609 } 3610#endif 3611 } 3612 if (fvp == tdvp) { 3613 error = EINVAL; 3614 goto out; 3615 } 3616 /* 3617 * If the source is the same as the destination (that is, if they 3618 * are links to the same vnode), then there is nothing to do. 3619 */ 3620 if (fvp == tvp) 3621 error = -1; 3622#ifdef MAC 3623 else 3624 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3625 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3626#endif 3627out: 3628 if (error == 0) { 3629 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3630 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3631 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3632 NDFREE(&tond, NDF_ONLY_PNBUF); 3633 } else { 3634 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3635 NDFREE(&tond, NDF_ONLY_PNBUF); 3636 if (tvp != NULL) 3637 vput(tvp); 3638 if (tdvp == tvp) 3639 vrele(tdvp); 3640 else 3641 vput(tdvp); 3642 vrele(fromnd.ni_dvp); 3643 vrele(fvp); 3644 } 3645 vrele(tond.ni_startdir); 3646 vn_finished_write(mp); 3647out1: 3648 if (fromnd.ni_startdir) 3649 vrele(fromnd.ni_startdir); 3650 if (error == -1) 3651 return (0); 3652 return (error); 3653} 3654 3655/* 3656 * Make a directory file. 3657 */ 3658#ifndef _SYS_SYSPROTO_H_ 3659struct mkdir_args { 3660 char *path; 3661 int mode; 3662}; 3663#endif 3664int 3665sys_mkdir(td, uap) 3666 struct thread *td; 3667 register struct mkdir_args /* { 3668 char *path; 3669 int mode; 3670 } */ *uap; 3671{ 3672 3673 return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode)); 3674} 3675 3676#ifndef _SYS_SYSPROTO_H_ 3677struct mkdirat_args { 3678 int fd; 3679 char *path; 3680 mode_t mode; 3681}; 3682#endif 3683int 3684sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3685{ 3686 3687 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3688} 3689 3690int 3691kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode) 3692{ 3693 3694 return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode)); 3695} 3696 3697int 3698kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3699 int mode) 3700{ 3701 struct mount *mp; 3702 struct vnode *vp; 3703 struct vattr vattr; 3704 struct nameidata nd; 3705 cap_rights_t rights; 3706 int error; 3707 3708 AUDIT_ARG_MODE(mode); 3709restart: 3710 bwillwrite(); 3711 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3712 NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), 3713 td); 3714 nd.ni_cnd.cn_flags |= WILLBEDIR; 3715 if ((error = namei(&nd)) != 0) 3716 return (error); 3717 vp = nd.ni_vp; 3718 if (vp != NULL) { 3719 NDFREE(&nd, NDF_ONLY_PNBUF); 3720 /* 3721 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3722 * the strange behaviour of leaving the vnode unlocked 3723 * if the target is the same vnode as the parent. 3724 */ 3725 if (vp == nd.ni_dvp) 3726 vrele(nd.ni_dvp); 3727 else 3728 vput(nd.ni_dvp); 3729 vrele(vp); 3730 return (EEXIST); 3731 } 3732 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3733 NDFREE(&nd, NDF_ONLY_PNBUF); 3734 vput(nd.ni_dvp); 3735 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3736 return (error); 3737 goto restart; 3738 } 3739 VATTR_NULL(&vattr); 3740 vattr.va_type = VDIR; 3741 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3742#ifdef MAC 3743 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3744 &vattr); 3745 if (error != 0) 3746 goto out; 3747#endif 3748 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3749#ifdef MAC 3750out: 3751#endif 3752 NDFREE(&nd, NDF_ONLY_PNBUF); 3753 vput(nd.ni_dvp); 3754 if (error == 0) 3755 vput(nd.ni_vp); 3756 vn_finished_write(mp); 3757 return (error); 3758} 3759 3760/* 3761 * Remove a directory file. 3762 */ 3763#ifndef _SYS_SYSPROTO_H_ 3764struct rmdir_args { 3765 char *path; 3766}; 3767#endif 3768int 3769sys_rmdir(td, uap) 3770 struct thread *td; 3771 struct rmdir_args /* { 3772 char *path; 3773 } */ *uap; 3774{ 3775 3776 return (kern_rmdir(td, uap->path, UIO_USERSPACE)); 3777} 3778 3779int 3780kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg) 3781{ 3782 3783 return (kern_rmdirat(td, AT_FDCWD, path, pathseg)); 3784} 3785 3786int 3787kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3788{ 3789 struct mount *mp; 3790 struct vnode *vp; 3791 struct nameidata nd; 3792 cap_rights_t rights; 3793 int error; 3794 3795restart: 3796 bwillwrite(); 3797 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3798 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3799 if ((error = namei(&nd)) != 0) 3800 return (error); 3801 vp = nd.ni_vp; 3802 if (vp->v_type != VDIR) { 3803 error = ENOTDIR; 3804 goto out; 3805 } 3806 /* 3807 * No rmdir "." please. 3808 */ 3809 if (nd.ni_dvp == vp) { 3810 error = EINVAL; 3811 goto out; 3812 } 3813 /* 3814 * The root of a mounted filesystem cannot be deleted. 3815 */ 3816 if (vp->v_vflag & VV_ROOT) { 3817 error = EBUSY; 3818 goto out; 3819 } 3820#ifdef MAC 3821 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3822 &nd.ni_cnd); 3823 if (error != 0) 3824 goto out; 3825#endif 3826 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3827 NDFREE(&nd, NDF_ONLY_PNBUF); 3828 vput(vp); 3829 if (nd.ni_dvp == vp) 3830 vrele(nd.ni_dvp); 3831 else 3832 vput(nd.ni_dvp); 3833 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3834 return (error); 3835 goto restart; 3836 } 3837 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3838 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3839 vn_finished_write(mp); 3840out: 3841 NDFREE(&nd, NDF_ONLY_PNBUF); 3842 vput(vp); 3843 if (nd.ni_dvp == vp) 3844 vrele(nd.ni_dvp); 3845 else 3846 vput(nd.ni_dvp); 3847 return (error); 3848} 3849 3850#ifdef COMPAT_43 3851/* 3852 * Read a block of directory entries in a filesystem independent format. 3853 */ 3854#ifndef _SYS_SYSPROTO_H_ 3855struct ogetdirentries_args { 3856 int fd; 3857 char *buf; 3858 u_int count; 3859 long *basep; 3860}; 3861#endif 3862int 3863ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3864{ 3865 long loff; 3866 int error; 3867 3868 error = kern_ogetdirentries(td, uap, &loff); 3869 if (error == 0) 3870 error = copyout(&loff, uap->basep, sizeof(long)); 3871 return (error); 3872} 3873 3874int 3875kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3876 long *ploff) 3877{ 3878 struct vnode *vp; 3879 struct file *fp; 3880 struct uio auio, kuio; 3881 struct iovec aiov, kiov; 3882 struct dirent *dp, *edp; 3883 cap_rights_t rights; 3884 caddr_t dirbuf; 3885 int error, eofflag, readcnt; 3886 long loff; 3887 off_t foffset; 3888 3889 /* XXX arbitrary sanity limit on `count'. */ 3890 if (uap->count > 64 * 1024) 3891 return (EINVAL); 3892 error = getvnode(td->td_proc->p_fd, uap->fd, 3893 cap_rights_init(&rights, CAP_READ), &fp); 3894 if (error != 0) 3895 return (error); 3896 if ((fp->f_flag & FREAD) == 0) { 3897 fdrop(fp, td); 3898 return (EBADF); 3899 } 3900 vp = fp->f_vnode; 3901 foffset = foffset_lock(fp, 0); 3902unionread: 3903 if (vp->v_type != VDIR) { 3904 foffset_unlock(fp, foffset, 0); 3905 fdrop(fp, td); 3906 return (EINVAL); 3907 } 3908 aiov.iov_base = uap->buf; 3909 aiov.iov_len = uap->count; 3910 auio.uio_iov = &aiov; 3911 auio.uio_iovcnt = 1; 3912 auio.uio_rw = UIO_READ; 3913 auio.uio_segflg = UIO_USERSPACE; 3914 auio.uio_td = td; 3915 auio.uio_resid = uap->count; 3916 vn_lock(vp, LK_SHARED | LK_RETRY); 3917 loff = auio.uio_offset = foffset; 3918#ifdef MAC 3919 error = mac_vnode_check_readdir(td->td_ucred, vp); 3920 if (error != 0) { 3921 VOP_UNLOCK(vp, 0); 3922 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3923 fdrop(fp, td); 3924 return (error); 3925 } 3926#endif 3927# if (BYTE_ORDER != LITTLE_ENDIAN) 3928 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3929 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3930 NULL, NULL); 3931 foffset = auio.uio_offset; 3932 } else 3933# endif 3934 { 3935 kuio = auio; 3936 kuio.uio_iov = &kiov; 3937 kuio.uio_segflg = UIO_SYSSPACE; 3938 kiov.iov_len = uap->count; 3939 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3940 kiov.iov_base = dirbuf; 3941 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3942 NULL, NULL); 3943 foffset = kuio.uio_offset; 3944 if (error == 0) { 3945 readcnt = uap->count - kuio.uio_resid; 3946 edp = (struct dirent *)&dirbuf[readcnt]; 3947 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3948# if (BYTE_ORDER == LITTLE_ENDIAN) 3949 /* 3950 * The expected low byte of 3951 * dp->d_namlen is our dp->d_type. 3952 * The high MBZ byte of dp->d_namlen 3953 * is our dp->d_namlen. 3954 */ 3955 dp->d_type = dp->d_namlen; 3956 dp->d_namlen = 0; 3957# else 3958 /* 3959 * The dp->d_type is the high byte 3960 * of the expected dp->d_namlen, 3961 * so must be zero'ed. 3962 */ 3963 dp->d_type = 0; 3964# endif 3965 if (dp->d_reclen > 0) { 3966 dp = (struct dirent *) 3967 ((char *)dp + dp->d_reclen); 3968 } else { 3969 error = EIO; 3970 break; 3971 } 3972 } 3973 if (dp >= edp) 3974 error = uiomove(dirbuf, readcnt, &auio); 3975 } 3976 free(dirbuf, M_TEMP); 3977 } 3978 if (error != 0) { 3979 VOP_UNLOCK(vp, 0); 3980 foffset_unlock(fp, foffset, 0); 3981 fdrop(fp, td); 3982 return (error); 3983 } 3984 if (uap->count == auio.uio_resid && 3985 (vp->v_vflag & VV_ROOT) && 3986 (vp->v_mount->mnt_flag & MNT_UNION)) { 3987 struct vnode *tvp = vp; 3988 vp = vp->v_mount->mnt_vnodecovered; 3989 VREF(vp); 3990 fp->f_vnode = vp; 3991 fp->f_data = vp; 3992 foffset = 0; 3993 vput(tvp); 3994 goto unionread; 3995 } 3996 VOP_UNLOCK(vp, 0); 3997 foffset_unlock(fp, foffset, 0); 3998 fdrop(fp, td); 3999 td->td_retval[0] = uap->count - auio.uio_resid; 4000 if (error == 0) 4001 *ploff = loff; 4002 return (error); 4003} 4004#endif /* COMPAT_43 */ 4005 4006/* 4007 * Read a block of directory entries in a filesystem independent format. 4008 */ 4009#ifndef _SYS_SYSPROTO_H_ 4010struct getdirentries_args { 4011 int fd; 4012 char *buf; 4013 u_int count; 4014 long *basep; 4015}; 4016#endif 4017int 4018sys_getdirentries(td, uap) 4019 struct thread *td; 4020 register struct getdirentries_args /* { 4021 int fd; 4022 char *buf; 4023 u_int count; 4024 long *basep; 4025 } */ *uap; 4026{ 4027 long base; 4028 int error; 4029 4030 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4031 NULL, UIO_USERSPACE); 4032 if (error != 0) 4033 return (error); 4034 if (uap->basep != NULL) 4035 error = copyout(&base, uap->basep, sizeof(long)); 4036 return (error); 4037} 4038 4039int 4040kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 4041 long *basep, ssize_t *residp, enum uio_seg bufseg) 4042{ 4043 struct vnode *vp; 4044 struct file *fp; 4045 struct uio auio; 4046 struct iovec aiov; 4047 cap_rights_t rights; 4048 long loff; 4049 int error, eofflag; 4050 off_t foffset; 4051 4052 AUDIT_ARG_FD(fd); 4053 if (count > IOSIZE_MAX) 4054 return (EINVAL); 4055 auio.uio_resid = count; 4056 error = getvnode(td->td_proc->p_fd, fd, 4057 cap_rights_init(&rights, CAP_READ), &fp); 4058 if (error != 0) 4059 return (error); 4060 if ((fp->f_flag & FREAD) == 0) { 4061 fdrop(fp, td); 4062 return (EBADF); 4063 } 4064 vp = fp->f_vnode; 4065 foffset = foffset_lock(fp, 0); 4066unionread: 4067 if (vp->v_type != VDIR) { 4068 error = EINVAL; 4069 goto fail; 4070 } 4071 aiov.iov_base = buf; 4072 aiov.iov_len = count; 4073 auio.uio_iov = &aiov; 4074 auio.uio_iovcnt = 1; 4075 auio.uio_rw = UIO_READ; 4076 auio.uio_segflg = bufseg; 4077 auio.uio_td = td; 4078 vn_lock(vp, LK_SHARED | LK_RETRY); 4079 AUDIT_ARG_VNODE1(vp); 4080 loff = auio.uio_offset = foffset; 4081#ifdef MAC 4082 error = mac_vnode_check_readdir(td->td_ucred, vp); 4083 if (error == 0) 4084#endif 4085 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4086 NULL); 4087 foffset = auio.uio_offset; 4088 if (error != 0) { 4089 VOP_UNLOCK(vp, 0); 4090 goto fail; 4091 } 4092 if (count == auio.uio_resid && 4093 (vp->v_vflag & VV_ROOT) && 4094 (vp->v_mount->mnt_flag & MNT_UNION)) { 4095 struct vnode *tvp = vp; 4096 4097 vp = vp->v_mount->mnt_vnodecovered; 4098 VREF(vp); 4099 fp->f_vnode = vp; 4100 fp->f_data = vp; 4101 foffset = 0; 4102 vput(tvp); 4103 goto unionread; 4104 } 4105 VOP_UNLOCK(vp, 0); 4106 *basep = loff; 4107 if (residp != NULL) 4108 *residp = auio.uio_resid; 4109 td->td_retval[0] = count - auio.uio_resid; 4110fail: 4111 foffset_unlock(fp, foffset, 0); 4112 fdrop(fp, td); 4113 return (error); 4114} 4115 4116#ifndef _SYS_SYSPROTO_H_ 4117struct getdents_args { 4118 int fd; 4119 char *buf; 4120 size_t count; 4121}; 4122#endif 4123int 4124sys_getdents(td, uap) 4125 struct thread *td; 4126 register struct getdents_args /* { 4127 int fd; 4128 char *buf; 4129 u_int count; 4130 } */ *uap; 4131{ 4132 struct getdirentries_args ap; 4133 4134 ap.fd = uap->fd; 4135 ap.buf = uap->buf; 4136 ap.count = uap->count; 4137 ap.basep = NULL; 4138 return (sys_getdirentries(td, &ap)); 4139} 4140 4141/* 4142 * Set the mode mask for creation of filesystem nodes. 4143 */ 4144#ifndef _SYS_SYSPROTO_H_ 4145struct umask_args { 4146 int newmask; 4147}; 4148#endif 4149int 4150sys_umask(td, uap) 4151 struct thread *td; 4152 struct umask_args /* { 4153 int newmask; 4154 } */ *uap; 4155{ 4156 register struct filedesc *fdp; 4157 4158 FILEDESC_XLOCK(td->td_proc->p_fd); 4159 fdp = td->td_proc->p_fd; 4160 td->td_retval[0] = fdp->fd_cmask; 4161 fdp->fd_cmask = uap->newmask & ALLPERMS; 4162 FILEDESC_XUNLOCK(td->td_proc->p_fd); 4163 return (0); 4164} 4165 4166/* 4167 * Void all references to file by ripping underlying filesystem away from 4168 * vnode. 4169 */ 4170#ifndef _SYS_SYSPROTO_H_ 4171struct revoke_args { 4172 char *path; 4173}; 4174#endif 4175int 4176sys_revoke(td, uap) 4177 struct thread *td; 4178 register struct revoke_args /* { 4179 char *path; 4180 } */ *uap; 4181{ 4182 struct vnode *vp; 4183 struct vattr vattr; 4184 struct nameidata nd; 4185 int error; 4186 4187 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4188 uap->path, td); 4189 if ((error = namei(&nd)) != 0) 4190 return (error); 4191 vp = nd.ni_vp; 4192 NDFREE(&nd, NDF_ONLY_PNBUF); 4193 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4194 error = EINVAL; 4195 goto out; 4196 } 4197#ifdef MAC 4198 error = mac_vnode_check_revoke(td->td_ucred, vp); 4199 if (error != 0) 4200 goto out; 4201#endif 4202 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4203 if (error != 0) 4204 goto out; 4205 if (td->td_ucred->cr_uid != vattr.va_uid) { 4206 error = priv_check(td, PRIV_VFS_ADMIN); 4207 if (error != 0) 4208 goto out; 4209 } 4210 if (vcount(vp) > 1) 4211 VOP_REVOKE(vp, REVOKEALL); 4212out: 4213 vput(vp); 4214 return (error); 4215} 4216 4217/* 4218 * Convert a user file descriptor to a kernel file entry and check that, if it 4219 * is a capability, the correct rights are present. A reference on the file 4220 * entry is held upon returning. 4221 */ 4222int 4223getvnode(struct filedesc *fdp, int fd, cap_rights_t *rightsp, struct file **fpp) 4224{ 4225 struct file *fp; 4226 int error; 4227 4228 error = fget_unlocked(fdp, fd, rightsp, 0, &fp, NULL); 4229 if (error != 0) 4230 return (error); 4231 4232 /* 4233 * The file could be not of the vnode type, or it may be not 4234 * yet fully initialized, in which case the f_vnode pointer 4235 * may be set, but f_ops is still badfileops. E.g., 4236 * devfs_open() transiently create such situation to 4237 * facilitate csw d_fdopen(). 4238 * 4239 * Dupfdopen() handling in kern_openat() installs the 4240 * half-baked file into the process descriptor table, allowing 4241 * other thread to dereference it. Guard against the race by 4242 * checking f_ops. 4243 */ 4244 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4245 fdrop(fp, curthread); 4246 return (EINVAL); 4247 } 4248 *fpp = fp; 4249 return (0); 4250} 4251 4252 4253/* 4254 * Get an (NFS) file handle. 4255 */ 4256#ifndef _SYS_SYSPROTO_H_ 4257struct lgetfh_args { 4258 char *fname; 4259 fhandle_t *fhp; 4260}; 4261#endif 4262int 4263sys_lgetfh(td, uap) 4264 struct thread *td; 4265 register struct lgetfh_args *uap; 4266{ 4267 struct nameidata nd; 4268 fhandle_t fh; 4269 register struct vnode *vp; 4270 int error; 4271 4272 error = priv_check(td, PRIV_VFS_GETFH); 4273 if (error != 0) 4274 return (error); 4275 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4276 uap->fname, td); 4277 error = namei(&nd); 4278 if (error != 0) 4279 return (error); 4280 NDFREE(&nd, NDF_ONLY_PNBUF); 4281 vp = nd.ni_vp; 4282 bzero(&fh, sizeof(fh)); 4283 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4284 error = VOP_VPTOFH(vp, &fh.fh_fid); 4285 vput(vp); 4286 if (error == 0) 4287 error = copyout(&fh, uap->fhp, sizeof (fh)); 4288 return (error); 4289} 4290 4291#ifndef _SYS_SYSPROTO_H_ 4292struct getfh_args { 4293 char *fname; 4294 fhandle_t *fhp; 4295}; 4296#endif 4297int 4298sys_getfh(td, uap) 4299 struct thread *td; 4300 register struct getfh_args *uap; 4301{ 4302 struct nameidata nd; 4303 fhandle_t fh; 4304 register struct vnode *vp; 4305 int error; 4306 4307 error = priv_check(td, PRIV_VFS_GETFH); 4308 if (error != 0) 4309 return (error); 4310 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4311 uap->fname, td); 4312 error = namei(&nd); 4313 if (error != 0) 4314 return (error); 4315 NDFREE(&nd, NDF_ONLY_PNBUF); 4316 vp = nd.ni_vp; 4317 bzero(&fh, sizeof(fh)); 4318 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4319 error = VOP_VPTOFH(vp, &fh.fh_fid); 4320 vput(vp); 4321 if (error == 0) 4322 error = copyout(&fh, uap->fhp, sizeof (fh)); 4323 return (error); 4324} 4325 4326/* 4327 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4328 * open descriptor. 4329 * 4330 * warning: do not remove the priv_check() call or this becomes one giant 4331 * security hole. 4332 */ 4333#ifndef _SYS_SYSPROTO_H_ 4334struct fhopen_args { 4335 const struct fhandle *u_fhp; 4336 int flags; 4337}; 4338#endif 4339int 4340sys_fhopen(td, uap) 4341 struct thread *td; 4342 struct fhopen_args /* { 4343 const struct fhandle *u_fhp; 4344 int flags; 4345 } */ *uap; 4346{ 4347 struct mount *mp; 4348 struct vnode *vp; 4349 struct fhandle fhp; 4350 struct file *fp; 4351 int fmode, error; 4352 int indx; 4353 4354 error = priv_check(td, PRIV_VFS_FHOPEN); 4355 if (error != 0) 4356 return (error); 4357 indx = -1; 4358 fmode = FFLAGS(uap->flags); 4359 /* why not allow a non-read/write open for our lockd? */ 4360 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4361 return (EINVAL); 4362 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4363 if (error != 0) 4364 return(error); 4365 /* find the mount point */ 4366 mp = vfs_busyfs(&fhp.fh_fsid); 4367 if (mp == NULL) 4368 return (ESTALE); 4369 /* now give me my vnode, it gets returned to me locked */ 4370 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4371 vfs_unbusy(mp); 4372 if (error != 0) 4373 return (error); 4374 4375 error = falloc_noinstall(td, &fp); 4376 if (error != 0) { 4377 vput(vp); 4378 return (error); 4379 } 4380 /* 4381 * An extra reference on `fp' has been held for us by 4382 * falloc_noinstall(). 4383 */ 4384 4385#ifdef INVARIANTS 4386 td->td_dupfd = -1; 4387#endif 4388 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4389 if (error != 0) { 4390 KASSERT(fp->f_ops == &badfileops, 4391 ("VOP_OPEN in fhopen() set f_ops")); 4392 KASSERT(td->td_dupfd < 0, 4393 ("fhopen() encountered fdopen()")); 4394 4395 vput(vp); 4396 goto bad; 4397 } 4398#ifdef INVARIANTS 4399 td->td_dupfd = 0; 4400#endif 4401 fp->f_vnode = vp; 4402 fp->f_seqcount = 1; 4403 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4404 &vnops); 4405 VOP_UNLOCK(vp, 0); 4406 if ((fmode & O_TRUNC) != 0) { 4407 error = fo_truncate(fp, 0, td->td_ucred, td); 4408 if (error != 0) 4409 goto bad; 4410 } 4411 4412 error = finstall(td, fp, &indx, fmode, NULL); 4413bad: 4414 fdrop(fp, td); 4415 td->td_retval[0] = indx; 4416 return (error); 4417} 4418 4419/* 4420 * Stat an (NFS) file handle. 4421 */ 4422#ifndef _SYS_SYSPROTO_H_ 4423struct fhstat_args { 4424 struct fhandle *u_fhp; 4425 struct stat *sb; 4426}; 4427#endif 4428int 4429sys_fhstat(td, uap) 4430 struct thread *td; 4431 register struct fhstat_args /* { 4432 struct fhandle *u_fhp; 4433 struct stat *sb; 4434 } */ *uap; 4435{ 4436 struct stat sb; 4437 struct fhandle fh; 4438 int error; 4439 4440 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4441 if (error != 0) 4442 return (error); 4443 error = kern_fhstat(td, fh, &sb); 4444 if (error == 0) 4445 error = copyout(&sb, uap->sb, sizeof(sb)); 4446 return (error); 4447} 4448 4449int 4450kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4451{ 4452 struct mount *mp; 4453 struct vnode *vp; 4454 int error; 4455 4456 error = priv_check(td, PRIV_VFS_FHSTAT); 4457 if (error != 0) 4458 return (error); 4459 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4460 return (ESTALE); 4461 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4462 vfs_unbusy(mp); 4463 if (error != 0) 4464 return (error); 4465 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4466 vput(vp); 4467 return (error); 4468} 4469 4470/* 4471 * Implement fstatfs() for (NFS) file handles. 4472 */ 4473#ifndef _SYS_SYSPROTO_H_ 4474struct fhstatfs_args { 4475 struct fhandle *u_fhp; 4476 struct statfs *buf; 4477}; 4478#endif 4479int 4480sys_fhstatfs(td, uap) 4481 struct thread *td; 4482 struct fhstatfs_args /* { 4483 struct fhandle *u_fhp; 4484 struct statfs *buf; 4485 } */ *uap; 4486{ 4487 struct statfs sf; 4488 fhandle_t fh; 4489 int error; 4490 4491 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4492 if (error != 0) 4493 return (error); 4494 error = kern_fhstatfs(td, fh, &sf); 4495 if (error != 0) 4496 return (error); 4497 return (copyout(&sf, uap->buf, sizeof(sf))); 4498} 4499 4500int 4501kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4502{ 4503 struct statfs *sp; 4504 struct mount *mp; 4505 struct vnode *vp; 4506 int error; 4507 4508 error = priv_check(td, PRIV_VFS_FHSTATFS); 4509 if (error != 0) 4510 return (error); 4511 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4512 return (ESTALE); 4513 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4514 if (error != 0) { 4515 vfs_unbusy(mp); 4516 return (error); 4517 } 4518 vput(vp); 4519 error = prison_canseemount(td->td_ucred, mp); 4520 if (error != 0) 4521 goto out; 4522#ifdef MAC 4523 error = mac_mount_check_stat(td->td_ucred, mp); 4524 if (error != 0) 4525 goto out; 4526#endif 4527 /* 4528 * Set these in case the underlying filesystem fails to do so. 4529 */ 4530 sp = &mp->mnt_stat; 4531 sp->f_version = STATFS_VERSION; 4532 sp->f_namemax = NAME_MAX; 4533 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4534 error = VFS_STATFS(mp, sp); 4535 if (error == 0) 4536 *buf = *sp; 4537out: 4538 vfs_unbusy(mp); 4539 return (error); 4540} 4541 4542int 4543kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4544{ 4545 struct file *fp; 4546 struct mount *mp; 4547 struct vnode *vp; 4548 cap_rights_t rights; 4549 off_t olen, ooffset; 4550 int error; 4551 4552 if (offset < 0 || len <= 0) 4553 return (EINVAL); 4554 /* Check for wrap. */ 4555 if (offset > OFF_MAX - len) 4556 return (EFBIG); 4557 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4558 if (error != 0) 4559 return (error); 4560 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4561 error = ESPIPE; 4562 goto out; 4563 } 4564 if ((fp->f_flag & FWRITE) == 0) { 4565 error = EBADF; 4566 goto out; 4567 } 4568 if (fp->f_type != DTYPE_VNODE) { 4569 error = ENODEV; 4570 goto out; 4571 } 4572 vp = fp->f_vnode; 4573 if (vp->v_type != VREG) { 4574 error = ENODEV; 4575 goto out; 4576 } 4577 4578 /* Allocating blocks may take a long time, so iterate. */ 4579 for (;;) { 4580 olen = len; 4581 ooffset = offset; 4582 4583 bwillwrite(); 4584 mp = NULL; 4585 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4586 if (error != 0) 4587 break; 4588 error = vn_lock(vp, LK_EXCLUSIVE); 4589 if (error != 0) { 4590 vn_finished_write(mp); 4591 break; 4592 } 4593#ifdef MAC 4594 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4595 if (error == 0) 4596#endif 4597 error = VOP_ALLOCATE(vp, &offset, &len); 4598 VOP_UNLOCK(vp, 0); 4599 vn_finished_write(mp); 4600 4601 if (olen + ooffset != offset + len) { 4602 panic("offset + len changed from %jx/%jx to %jx/%jx", 4603 ooffset, olen, offset, len); 4604 } 4605 if (error != 0 || len == 0) 4606 break; 4607 KASSERT(olen > len, ("Iteration did not make progress?")); 4608 maybe_yield(); 4609 } 4610 out: 4611 fdrop(fp, td); 4612 return (error); 4613} 4614 4615int 4616sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4617{ 4618 4619 td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset, 4620 uap->len); 4621 return (0); 4622} 4623 4624/* 4625 * Unlike madvise(2), we do not make a best effort to remember every 4626 * possible caching hint. Instead, we remember the last setting with 4627 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4628 * region of any current setting. 4629 */ 4630int 4631kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4632 int advice) 4633{ 4634 struct fadvise_info *fa, *new; 4635 struct file *fp; 4636 struct vnode *vp; 4637 cap_rights_t rights; 4638 off_t end; 4639 int error; 4640 4641 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4642 return (EINVAL); 4643 switch (advice) { 4644 case POSIX_FADV_SEQUENTIAL: 4645 case POSIX_FADV_RANDOM: 4646 case POSIX_FADV_NOREUSE: 4647 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4648 break; 4649 case POSIX_FADV_NORMAL: 4650 case POSIX_FADV_WILLNEED: 4651 case POSIX_FADV_DONTNEED: 4652 new = NULL; 4653 break; 4654 default: 4655 return (EINVAL); 4656 } 4657 /* XXX: CAP_POSIX_FADVISE? */ 4658 error = fget(td, fd, cap_rights_init(&rights), &fp); 4659 if (error != 0) 4660 goto out; 4661 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4662 error = ESPIPE; 4663 goto out; 4664 } 4665 if (fp->f_type != DTYPE_VNODE) { 4666 error = ENODEV; 4667 goto out; 4668 } 4669 vp = fp->f_vnode; 4670 if (vp->v_type != VREG) { 4671 error = ENODEV; 4672 goto out; 4673 } 4674 if (len == 0) 4675 end = OFF_MAX; 4676 else 4677 end = offset + len - 1; 4678 switch (advice) { 4679 case POSIX_FADV_SEQUENTIAL: 4680 case POSIX_FADV_RANDOM: 4681 case POSIX_FADV_NOREUSE: 4682 /* 4683 * Try to merge any existing non-standard region with 4684 * this new region if possible, otherwise create a new 4685 * non-standard region for this request. 4686 */ 4687 mtx_pool_lock(mtxpool_sleep, fp); 4688 fa = fp->f_advice; 4689 if (fa != NULL && fa->fa_advice == advice && 4690 ((fa->fa_start <= end && fa->fa_end >= offset) || 4691 (end != OFF_MAX && fa->fa_start == end + 1) || 4692 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4693 if (offset < fa->fa_start) 4694 fa->fa_start = offset; 4695 if (end > fa->fa_end) 4696 fa->fa_end = end; 4697 } else { 4698 new->fa_advice = advice; 4699 new->fa_start = offset; 4700 new->fa_end = end; 4701 new->fa_prevstart = 0; 4702 new->fa_prevend = 0; 4703 fp->f_advice = new; 4704 new = fa; 4705 } 4706 mtx_pool_unlock(mtxpool_sleep, fp); 4707 break; 4708 case POSIX_FADV_NORMAL: 4709 /* 4710 * If a the "normal" region overlaps with an existing 4711 * non-standard region, trim or remove the 4712 * non-standard region. 4713 */ 4714 mtx_pool_lock(mtxpool_sleep, fp); 4715 fa = fp->f_advice; 4716 if (fa != NULL) { 4717 if (offset <= fa->fa_start && end >= fa->fa_end) { 4718 new = fa; 4719 fp->f_advice = NULL; 4720 } else if (offset <= fa->fa_start && 4721 end >= fa->fa_start) 4722 fa->fa_start = end + 1; 4723 else if (offset <= fa->fa_end && end >= fa->fa_end) 4724 fa->fa_end = offset - 1; 4725 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4726 /* 4727 * If the "normal" region is a middle 4728 * portion of the existing 4729 * non-standard region, just remove 4730 * the whole thing rather than picking 4731 * one side or the other to 4732 * preserve. 4733 */ 4734 new = fa; 4735 fp->f_advice = NULL; 4736 } 4737 } 4738 mtx_pool_unlock(mtxpool_sleep, fp); 4739 break; 4740 case POSIX_FADV_WILLNEED: 4741 case POSIX_FADV_DONTNEED: 4742 error = VOP_ADVISE(vp, offset, end, advice); 4743 break; 4744 } 4745out: 4746 if (fp != NULL) 4747 fdrop(fp, td); 4748 free(new, M_FADVISE); 4749 return (error); 4750} 4751 4752int 4753sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4754{ 4755 4756 td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset, 4757 uap->len, uap->advice); 4758 return (0); 4759} 4760