vfs_syscalls.c revision 301051
1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37#include <sys/cdefs.h> 38__FBSDID("$FreeBSD: releng/10.2/sys/kern/vfs_syscalls.c 301051 2016-05-31 16:55:45Z glebius $"); 39 40#include "opt_capsicum.h" 41#include "opt_compat.h" 42#include "opt_kdtrace.h" 43#include "opt_ktrace.h" 44 45#include <sys/param.h> 46#include <sys/systm.h> 47#include <sys/bio.h> 48#include <sys/buf.h> 49#include <sys/capsicum.h> 50#include <sys/disk.h> 51#include <sys/sysent.h> 52#include <sys/malloc.h> 53#include <sys/mount.h> 54#include <sys/mutex.h> 55#include <sys/sysproto.h> 56#include <sys/namei.h> 57#include <sys/filedesc.h> 58#include <sys/kernel.h> 59#include <sys/fcntl.h> 60#include <sys/file.h> 61#include <sys/filio.h> 62#include <sys/limits.h> 63#include <sys/linker.h> 64#include <sys/rwlock.h> 65#include <sys/sdt.h> 66#include <sys/stat.h> 67#include <sys/sx.h> 68#include <sys/unistd.h> 69#include <sys/vnode.h> 70#include <sys/priv.h> 71#include <sys/proc.h> 72#include <sys/dirent.h> 73#include <sys/jail.h> 74#include <sys/syscallsubr.h> 75#include <sys/sysctl.h> 76#ifdef KTRACE 77#include <sys/ktrace.h> 78#endif 79 80#include <machine/stdarg.h> 81 82#include <security/audit/audit.h> 83#include <security/mac/mac_framework.h> 84 85#include <vm/vm.h> 86#include <vm/vm_object.h> 87#include <vm/vm_page.h> 88#include <vm/uma.h> 89 90#include <ufs/ufs/quota.h> 91 92MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 93 94SDT_PROVIDER_DEFINE(vfs); 95SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 96SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 97 98static int chroot_refuse_vdir_fds(struct filedesc *fdp); 99static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 100static int kern_chflags(struct thread *td, const char *path, 101 enum uio_seg pathseg, u_long flags); 102static int kern_chflagsat(struct thread *td, int fd, const char *path, 103 enum uio_seg pathseg, u_long flags, int atflag); 104static int setfflags(struct thread *td, struct vnode *, u_long); 105static int setutimes(struct thread *td, struct vnode *, 106 const struct timespec *, int, int); 107static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 108 struct thread *td); 109 110/* 111 * The module initialization routine for POSIX asynchronous I/O will 112 * set this to the version of AIO that it implements. (Zero means 113 * that it is not implemented.) This value is used here by pathconf() 114 * and in kern_descrip.c by fpathconf(). 115 */ 116int async_io_version; 117 118/* 119 * Sync each mounted filesystem. 120 */ 121#ifndef _SYS_SYSPROTO_H_ 122struct sync_args { 123 int dummy; 124}; 125#endif 126/* ARGSUSED */ 127int 128sys_sync(td, uap) 129 struct thread *td; 130 struct sync_args *uap; 131{ 132 struct mount *mp, *nmp; 133 int save; 134 135 mtx_lock(&mountlist_mtx); 136 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 137 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 138 nmp = TAILQ_NEXT(mp, mnt_list); 139 continue; 140 } 141 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 142 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 143 save = curthread_pflags_set(TDP_SYNCIO); 144 vfs_msync(mp, MNT_NOWAIT); 145 VFS_SYNC(mp, MNT_NOWAIT); 146 curthread_pflags_restore(save); 147 vn_finished_write(mp); 148 } 149 mtx_lock(&mountlist_mtx); 150 nmp = TAILQ_NEXT(mp, mnt_list); 151 vfs_unbusy(mp); 152 } 153 mtx_unlock(&mountlist_mtx); 154 return (0); 155} 156 157/* 158 * Change filesystem quotas. 159 */ 160#ifndef _SYS_SYSPROTO_H_ 161struct quotactl_args { 162 char *path; 163 int cmd; 164 int uid; 165 caddr_t arg; 166}; 167#endif 168int 169sys_quotactl(td, uap) 170 struct thread *td; 171 register struct quotactl_args /* { 172 char *path; 173 int cmd; 174 int uid; 175 caddr_t arg; 176 } */ *uap; 177{ 178 struct mount *mp; 179 struct nameidata nd; 180 int error; 181 182 AUDIT_ARG_CMD(uap->cmd); 183 AUDIT_ARG_UID(uap->uid); 184 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 185 return (EPERM); 186 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 187 uap->path, td); 188 if ((error = namei(&nd)) != 0) 189 return (error); 190 NDFREE(&nd, NDF_ONLY_PNBUF); 191 mp = nd.ni_vp->v_mount; 192 vfs_ref(mp); 193 vput(nd.ni_vp); 194 error = vfs_busy(mp, 0); 195 vfs_rel(mp); 196 if (error != 0) 197 return (error); 198 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 199 200 /* 201 * Since quota on operation typically needs to open quota 202 * file, the Q_QUOTAON handler needs to unbusy the mount point 203 * before calling into namei. Otherwise, unmount might be 204 * started between two vfs_busy() invocations (first is our, 205 * second is from mount point cross-walk code in lookup()), 206 * causing deadlock. 207 * 208 * Require that Q_QUOTAON handles the vfs_busy() reference on 209 * its own, always returning with ubusied mount point. 210 */ 211 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 212 vfs_unbusy(mp); 213 return (error); 214} 215 216/* 217 * Used by statfs conversion routines to scale the block size up if 218 * necessary so that all of the block counts are <= 'max_size'. Note 219 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 220 * value of 'n'. 221 */ 222void 223statfs_scale_blocks(struct statfs *sf, long max_size) 224{ 225 uint64_t count; 226 int shift; 227 228 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 229 230 /* 231 * Attempt to scale the block counts to give a more accurate 232 * overview to userland of the ratio of free space to used 233 * space. To do this, find the largest block count and compute 234 * a divisor that lets it fit into a signed integer <= max_size. 235 */ 236 if (sf->f_bavail < 0) 237 count = -sf->f_bavail; 238 else 239 count = sf->f_bavail; 240 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 241 if (count <= max_size) 242 return; 243 244 count >>= flsl(max_size); 245 shift = 0; 246 while (count > 0) { 247 shift++; 248 count >>=1; 249 } 250 251 sf->f_bsize <<= shift; 252 sf->f_blocks >>= shift; 253 sf->f_bfree >>= shift; 254 sf->f_bavail >>= shift; 255} 256 257/* 258 * Get filesystem statistics. 259 */ 260#ifndef _SYS_SYSPROTO_H_ 261struct statfs_args { 262 char *path; 263 struct statfs *buf; 264}; 265#endif 266int 267sys_statfs(td, uap) 268 struct thread *td; 269 register struct statfs_args /* { 270 char *path; 271 struct statfs *buf; 272 } */ *uap; 273{ 274 struct statfs sf; 275 int error; 276 277 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 278 if (error == 0) 279 error = copyout(&sf, uap->buf, sizeof(sf)); 280 return (error); 281} 282 283int 284kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 285 struct statfs *buf) 286{ 287 struct mount *mp; 288 struct statfs *sp, sb; 289 struct nameidata nd; 290 int error; 291 292 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 293 pathseg, path, td); 294 error = namei(&nd); 295 if (error != 0) 296 return (error); 297 mp = nd.ni_vp->v_mount; 298 vfs_ref(mp); 299 NDFREE(&nd, NDF_ONLY_PNBUF); 300 vput(nd.ni_vp); 301 error = vfs_busy(mp, 0); 302 vfs_rel(mp); 303 if (error != 0) 304 return (error); 305#ifdef MAC 306 error = mac_mount_check_stat(td->td_ucred, mp); 307 if (error != 0) 308 goto out; 309#endif 310 /* 311 * Set these in case the underlying filesystem fails to do so. 312 */ 313 sp = &mp->mnt_stat; 314 sp->f_version = STATFS_VERSION; 315 sp->f_namemax = NAME_MAX; 316 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 317 error = VFS_STATFS(mp, sp); 318 if (error != 0) 319 goto out; 320 if (priv_check(td, PRIV_VFS_GENERATION)) { 321 bcopy(sp, &sb, sizeof(sb)); 322 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 323 prison_enforce_statfs(td->td_ucred, mp, &sb); 324 sp = &sb; 325 } 326 *buf = *sp; 327out: 328 vfs_unbusy(mp); 329 return (error); 330} 331 332/* 333 * Get filesystem statistics. 334 */ 335#ifndef _SYS_SYSPROTO_H_ 336struct fstatfs_args { 337 int fd; 338 struct statfs *buf; 339}; 340#endif 341int 342sys_fstatfs(td, uap) 343 struct thread *td; 344 register struct fstatfs_args /* { 345 int fd; 346 struct statfs *buf; 347 } */ *uap; 348{ 349 struct statfs sf; 350 int error; 351 352 error = kern_fstatfs(td, uap->fd, &sf); 353 if (error == 0) 354 error = copyout(&sf, uap->buf, sizeof(sf)); 355 return (error); 356} 357 358int 359kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 360{ 361 struct file *fp; 362 struct mount *mp; 363 struct statfs *sp, sb; 364 struct vnode *vp; 365 cap_rights_t rights; 366 int error; 367 368 AUDIT_ARG_FD(fd); 369 error = getvnode(td->td_proc->p_fd, fd, 370 cap_rights_init(&rights, CAP_FSTATFS), &fp); 371 if (error != 0) 372 return (error); 373 vp = fp->f_vnode; 374 vn_lock(vp, LK_SHARED | LK_RETRY); 375#ifdef AUDIT 376 AUDIT_ARG_VNODE1(vp); 377#endif 378 mp = vp->v_mount; 379 if (mp) 380 vfs_ref(mp); 381 VOP_UNLOCK(vp, 0); 382 fdrop(fp, td); 383 if (mp == NULL) { 384 error = EBADF; 385 goto out; 386 } 387 error = vfs_busy(mp, 0); 388 vfs_rel(mp); 389 if (error != 0) 390 return (error); 391#ifdef MAC 392 error = mac_mount_check_stat(td->td_ucred, mp); 393 if (error != 0) 394 goto out; 395#endif 396 /* 397 * Set these in case the underlying filesystem fails to do so. 398 */ 399 sp = &mp->mnt_stat; 400 sp->f_version = STATFS_VERSION; 401 sp->f_namemax = NAME_MAX; 402 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 403 error = VFS_STATFS(mp, sp); 404 if (error != 0) 405 goto out; 406 if (priv_check(td, PRIV_VFS_GENERATION)) { 407 bcopy(sp, &sb, sizeof(sb)); 408 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 409 prison_enforce_statfs(td->td_ucred, mp, &sb); 410 sp = &sb; 411 } 412 *buf = *sp; 413out: 414 if (mp) 415 vfs_unbusy(mp); 416 return (error); 417} 418 419/* 420 * Get statistics on all filesystems. 421 */ 422#ifndef _SYS_SYSPROTO_H_ 423struct getfsstat_args { 424 struct statfs *buf; 425 long bufsize; 426 int flags; 427}; 428#endif 429int 430sys_getfsstat(td, uap) 431 struct thread *td; 432 register struct getfsstat_args /* { 433 struct statfs *buf; 434 long bufsize; 435 int flags; 436 } */ *uap; 437{ 438 439 return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE, 440 uap->flags)); 441} 442 443/* 444 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 445 * The caller is responsible for freeing memory which will be allocated 446 * in '*buf'. 447 */ 448int 449kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 450 enum uio_seg bufseg, int flags) 451{ 452 struct mount *mp, *nmp; 453 struct statfs *sfsp, *sp, sb; 454 size_t count, maxcount; 455 int error; 456 457 maxcount = bufsize / sizeof(struct statfs); 458 if (bufsize == 0) 459 sfsp = NULL; 460 else if (bufseg == UIO_USERSPACE) 461 sfsp = *buf; 462 else /* if (bufseg == UIO_SYSSPACE) */ { 463 count = 0; 464 mtx_lock(&mountlist_mtx); 465 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 466 count++; 467 } 468 mtx_unlock(&mountlist_mtx); 469 if (maxcount > count) 470 maxcount = count; 471 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 472 M_WAITOK); 473 } 474 count = 0; 475 mtx_lock(&mountlist_mtx); 476 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 477 if (prison_canseemount(td->td_ucred, mp) != 0) { 478 nmp = TAILQ_NEXT(mp, mnt_list); 479 continue; 480 } 481#ifdef MAC 482 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 483 nmp = TAILQ_NEXT(mp, mnt_list); 484 continue; 485 } 486#endif 487 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 488 nmp = TAILQ_NEXT(mp, mnt_list); 489 continue; 490 } 491 if (sfsp && count < maxcount) { 492 sp = &mp->mnt_stat; 493 /* 494 * Set these in case the underlying filesystem 495 * fails to do so. 496 */ 497 sp->f_version = STATFS_VERSION; 498 sp->f_namemax = NAME_MAX; 499 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 500 /* 501 * If MNT_NOWAIT or MNT_LAZY is specified, do not 502 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 503 * overrides MNT_WAIT. 504 */ 505 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 506 (flags & MNT_WAIT)) && 507 (error = VFS_STATFS(mp, sp))) { 508 mtx_lock(&mountlist_mtx); 509 nmp = TAILQ_NEXT(mp, mnt_list); 510 vfs_unbusy(mp); 511 continue; 512 } 513 if (priv_check(td, PRIV_VFS_GENERATION)) { 514 bcopy(sp, &sb, sizeof(sb)); 515 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 516 prison_enforce_statfs(td->td_ucred, mp, &sb); 517 sp = &sb; 518 } 519 if (bufseg == UIO_SYSSPACE) 520 bcopy(sp, sfsp, sizeof(*sp)); 521 else /* if (bufseg == UIO_USERSPACE) */ { 522 error = copyout(sp, sfsp, sizeof(*sp)); 523 if (error != 0) { 524 vfs_unbusy(mp); 525 return (error); 526 } 527 } 528 sfsp++; 529 } 530 count++; 531 mtx_lock(&mountlist_mtx); 532 nmp = TAILQ_NEXT(mp, mnt_list); 533 vfs_unbusy(mp); 534 } 535 mtx_unlock(&mountlist_mtx); 536 if (sfsp && count > maxcount) 537 td->td_retval[0] = maxcount; 538 else 539 td->td_retval[0] = count; 540 return (0); 541} 542 543#ifdef COMPAT_FREEBSD4 544/* 545 * Get old format filesystem statistics. 546 */ 547static void cvtstatfs(struct statfs *, struct ostatfs *); 548 549#ifndef _SYS_SYSPROTO_H_ 550struct freebsd4_statfs_args { 551 char *path; 552 struct ostatfs *buf; 553}; 554#endif 555int 556freebsd4_statfs(td, uap) 557 struct thread *td; 558 struct freebsd4_statfs_args /* { 559 char *path; 560 struct ostatfs *buf; 561 } */ *uap; 562{ 563 struct ostatfs osb; 564 struct statfs sf; 565 int error; 566 567 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 568 if (error != 0) 569 return (error); 570 cvtstatfs(&sf, &osb); 571 return (copyout(&osb, uap->buf, sizeof(osb))); 572} 573 574/* 575 * Get filesystem statistics. 576 */ 577#ifndef _SYS_SYSPROTO_H_ 578struct freebsd4_fstatfs_args { 579 int fd; 580 struct ostatfs *buf; 581}; 582#endif 583int 584freebsd4_fstatfs(td, uap) 585 struct thread *td; 586 struct freebsd4_fstatfs_args /* { 587 int fd; 588 struct ostatfs *buf; 589 } */ *uap; 590{ 591 struct ostatfs osb; 592 struct statfs sf; 593 int error; 594 595 error = kern_fstatfs(td, uap->fd, &sf); 596 if (error != 0) 597 return (error); 598 cvtstatfs(&sf, &osb); 599 return (copyout(&osb, uap->buf, sizeof(osb))); 600} 601 602/* 603 * Get statistics on all filesystems. 604 */ 605#ifndef _SYS_SYSPROTO_H_ 606struct freebsd4_getfsstat_args { 607 struct ostatfs *buf; 608 long bufsize; 609 int flags; 610}; 611#endif 612int 613freebsd4_getfsstat(td, uap) 614 struct thread *td; 615 register struct freebsd4_getfsstat_args /* { 616 struct ostatfs *buf; 617 long bufsize; 618 int flags; 619 } */ *uap; 620{ 621 struct statfs *buf, *sp; 622 struct ostatfs osb; 623 size_t count, size; 624 int error; 625 626 count = uap->bufsize / sizeof(struct ostatfs); 627 size = count * sizeof(struct statfs); 628 error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags); 629 if (size > 0) { 630 count = td->td_retval[0]; 631 sp = buf; 632 while (count > 0 && error == 0) { 633 cvtstatfs(sp, &osb); 634 error = copyout(&osb, uap->buf, sizeof(osb)); 635 sp++; 636 uap->buf++; 637 count--; 638 } 639 free(buf, M_TEMP); 640 } 641 return (error); 642} 643 644/* 645 * Implement fstatfs() for (NFS) file handles. 646 */ 647#ifndef _SYS_SYSPROTO_H_ 648struct freebsd4_fhstatfs_args { 649 struct fhandle *u_fhp; 650 struct ostatfs *buf; 651}; 652#endif 653int 654freebsd4_fhstatfs(td, uap) 655 struct thread *td; 656 struct freebsd4_fhstatfs_args /* { 657 struct fhandle *u_fhp; 658 struct ostatfs *buf; 659 } */ *uap; 660{ 661 struct ostatfs osb; 662 struct statfs sf; 663 fhandle_t fh; 664 int error; 665 666 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 667 if (error != 0) 668 return (error); 669 error = kern_fhstatfs(td, fh, &sf); 670 if (error != 0) 671 return (error); 672 cvtstatfs(&sf, &osb); 673 return (copyout(&osb, uap->buf, sizeof(osb))); 674} 675 676/* 677 * Convert a new format statfs structure to an old format statfs structure. 678 */ 679static void 680cvtstatfs(nsp, osp) 681 struct statfs *nsp; 682 struct ostatfs *osp; 683{ 684 685 statfs_scale_blocks(nsp, LONG_MAX); 686 bzero(osp, sizeof(*osp)); 687 osp->f_bsize = nsp->f_bsize; 688 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 689 osp->f_blocks = nsp->f_blocks; 690 osp->f_bfree = nsp->f_bfree; 691 osp->f_bavail = nsp->f_bavail; 692 osp->f_files = MIN(nsp->f_files, LONG_MAX); 693 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 694 osp->f_owner = nsp->f_owner; 695 osp->f_type = nsp->f_type; 696 osp->f_flags = nsp->f_flags; 697 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 698 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 699 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 700 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 701 strlcpy(osp->f_fstypename, nsp->f_fstypename, 702 MIN(MFSNAMELEN, OMFSNAMELEN)); 703 strlcpy(osp->f_mntonname, nsp->f_mntonname, 704 MIN(MNAMELEN, OMNAMELEN)); 705 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 706 MIN(MNAMELEN, OMNAMELEN)); 707 osp->f_fsid = nsp->f_fsid; 708} 709#endif /* COMPAT_FREEBSD4 */ 710 711/* 712 * Change current working directory to a given file descriptor. 713 */ 714#ifndef _SYS_SYSPROTO_H_ 715struct fchdir_args { 716 int fd; 717}; 718#endif 719int 720sys_fchdir(td, uap) 721 struct thread *td; 722 struct fchdir_args /* { 723 int fd; 724 } */ *uap; 725{ 726 register struct filedesc *fdp = td->td_proc->p_fd; 727 struct vnode *vp, *tdp, *vpold; 728 struct mount *mp; 729 struct file *fp; 730 cap_rights_t rights; 731 int error; 732 733 AUDIT_ARG_FD(uap->fd); 734 error = getvnode(fdp, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 735 &fp); 736 if (error != 0) 737 return (error); 738 vp = fp->f_vnode; 739 VREF(vp); 740 fdrop(fp, td); 741 vn_lock(vp, LK_SHARED | LK_RETRY); 742 AUDIT_ARG_VNODE1(vp); 743 error = change_dir(vp, td); 744 while (!error && (mp = vp->v_mountedhere) != NULL) { 745 if (vfs_busy(mp, 0)) 746 continue; 747 error = VFS_ROOT(mp, LK_SHARED, &tdp); 748 vfs_unbusy(mp); 749 if (error != 0) 750 break; 751 vput(vp); 752 vp = tdp; 753 } 754 if (error != 0) { 755 vput(vp); 756 return (error); 757 } 758 VOP_UNLOCK(vp, 0); 759 FILEDESC_XLOCK(fdp); 760 vpold = fdp->fd_cdir; 761 fdp->fd_cdir = vp; 762 FILEDESC_XUNLOCK(fdp); 763 vrele(vpold); 764 return (0); 765} 766 767/* 768 * Change current working directory (``.''). 769 */ 770#ifndef _SYS_SYSPROTO_H_ 771struct chdir_args { 772 char *path; 773}; 774#endif 775int 776sys_chdir(td, uap) 777 struct thread *td; 778 struct chdir_args /* { 779 char *path; 780 } */ *uap; 781{ 782 783 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 784} 785 786int 787kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 788{ 789 register struct filedesc *fdp = td->td_proc->p_fd; 790 struct nameidata nd; 791 struct vnode *vp; 792 int error; 793 794 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 795 pathseg, path, td); 796 if ((error = namei(&nd)) != 0) 797 return (error); 798 if ((error = change_dir(nd.ni_vp, td)) != 0) { 799 vput(nd.ni_vp); 800 NDFREE(&nd, NDF_ONLY_PNBUF); 801 return (error); 802 } 803 VOP_UNLOCK(nd.ni_vp, 0); 804 NDFREE(&nd, NDF_ONLY_PNBUF); 805 FILEDESC_XLOCK(fdp); 806 vp = fdp->fd_cdir; 807 fdp->fd_cdir = nd.ni_vp; 808 FILEDESC_XUNLOCK(fdp); 809 vrele(vp); 810 return (0); 811} 812 813/* 814 * Helper function for raised chroot(2) security function: Refuse if 815 * any filedescriptors are open directories. 816 */ 817static int 818chroot_refuse_vdir_fds(fdp) 819 struct filedesc *fdp; 820{ 821 struct vnode *vp; 822 struct file *fp; 823 int fd; 824 825 FILEDESC_LOCK_ASSERT(fdp); 826 827 for (fd = 0; fd <= fdp->fd_lastfile; fd++) { 828 fp = fget_locked(fdp, fd); 829 if (fp == NULL) 830 continue; 831 if (fp->f_type == DTYPE_VNODE) { 832 vp = fp->f_vnode; 833 if (vp->v_type == VDIR) 834 return (EPERM); 835 } 836 } 837 return (0); 838} 839 840/* 841 * This sysctl determines if we will allow a process to chroot(2) if it 842 * has a directory open: 843 * 0: disallowed for all processes. 844 * 1: allowed for processes that were not already chroot(2)'ed. 845 * 2: allowed for all processes. 846 */ 847 848static int chroot_allow_open_directories = 1; 849 850SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 851 &chroot_allow_open_directories, 0, 852 "Allow a process to chroot(2) if it has a directory open"); 853 854/* 855 * Change notion of root (``/'') directory. 856 */ 857#ifndef _SYS_SYSPROTO_H_ 858struct chroot_args { 859 char *path; 860}; 861#endif 862int 863sys_chroot(td, uap) 864 struct thread *td; 865 struct chroot_args /* { 866 char *path; 867 } */ *uap; 868{ 869 struct nameidata nd; 870 int error; 871 872 error = priv_check(td, PRIV_VFS_CHROOT); 873 if (error != 0) 874 return (error); 875 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 876 UIO_USERSPACE, uap->path, td); 877 error = namei(&nd); 878 if (error != 0) 879 goto error; 880 error = change_dir(nd.ni_vp, td); 881 if (error != 0) 882 goto e_vunlock; 883#ifdef MAC 884 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 885 if (error != 0) 886 goto e_vunlock; 887#endif 888 VOP_UNLOCK(nd.ni_vp, 0); 889 error = change_root(nd.ni_vp, td); 890 vrele(nd.ni_vp); 891 NDFREE(&nd, NDF_ONLY_PNBUF); 892 return (error); 893e_vunlock: 894 vput(nd.ni_vp); 895error: 896 NDFREE(&nd, NDF_ONLY_PNBUF); 897 return (error); 898} 899 900/* 901 * Common routine for chroot and chdir. Callers must provide a locked vnode 902 * instance. 903 */ 904int 905change_dir(vp, td) 906 struct vnode *vp; 907 struct thread *td; 908{ 909#ifdef MAC 910 int error; 911#endif 912 913 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 914 if (vp->v_type != VDIR) 915 return (ENOTDIR); 916#ifdef MAC 917 error = mac_vnode_check_chdir(td->td_ucred, vp); 918 if (error != 0) 919 return (error); 920#endif 921 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 922} 923 924/* 925 * Common routine for kern_chroot() and jail_attach(). The caller is 926 * responsible for invoking priv_check() and mac_vnode_check_chroot() to 927 * authorize this operation. 928 */ 929int 930change_root(vp, td) 931 struct vnode *vp; 932 struct thread *td; 933{ 934 struct filedesc *fdp; 935 struct vnode *oldvp; 936 int error; 937 938 fdp = td->td_proc->p_fd; 939 FILEDESC_XLOCK(fdp); 940 if (chroot_allow_open_directories == 0 || 941 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 942 error = chroot_refuse_vdir_fds(fdp); 943 if (error != 0) { 944 FILEDESC_XUNLOCK(fdp); 945 return (error); 946 } 947 } 948 oldvp = fdp->fd_rdir; 949 fdp->fd_rdir = vp; 950 VREF(fdp->fd_rdir); 951 if (!fdp->fd_jdir) { 952 fdp->fd_jdir = vp; 953 VREF(fdp->fd_jdir); 954 } 955 FILEDESC_XUNLOCK(fdp); 956 vrele(oldvp); 957 return (0); 958} 959 960static __inline void 961flags_to_rights(int flags, cap_rights_t *rightsp) 962{ 963 964 if (flags & O_EXEC) { 965 cap_rights_set(rightsp, CAP_FEXECVE); 966 } else { 967 switch ((flags & O_ACCMODE)) { 968 case O_RDONLY: 969 cap_rights_set(rightsp, CAP_READ); 970 break; 971 case O_RDWR: 972 cap_rights_set(rightsp, CAP_READ); 973 /* FALLTHROUGH */ 974 case O_WRONLY: 975 cap_rights_set(rightsp, CAP_WRITE); 976 if (!(flags & (O_APPEND | O_TRUNC))) 977 cap_rights_set(rightsp, CAP_SEEK); 978 break; 979 } 980 } 981 982 if (flags & O_CREAT) 983 cap_rights_set(rightsp, CAP_CREATE); 984 985 if (flags & O_TRUNC) 986 cap_rights_set(rightsp, CAP_FTRUNCATE); 987 988 if (flags & (O_SYNC | O_FSYNC)) 989 cap_rights_set(rightsp, CAP_FSYNC); 990 991 if (flags & (O_EXLOCK | O_SHLOCK)) 992 cap_rights_set(rightsp, CAP_FLOCK); 993} 994 995/* 996 * Check permissions, allocate an open file structure, and call the device 997 * open routine if any. 998 */ 999#ifndef _SYS_SYSPROTO_H_ 1000struct open_args { 1001 char *path; 1002 int flags; 1003 int mode; 1004}; 1005#endif 1006int 1007sys_open(td, uap) 1008 struct thread *td; 1009 register struct open_args /* { 1010 char *path; 1011 int flags; 1012 int mode; 1013 } */ *uap; 1014{ 1015 1016 return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode)); 1017} 1018 1019#ifndef _SYS_SYSPROTO_H_ 1020struct openat_args { 1021 int fd; 1022 char *path; 1023 int flag; 1024 int mode; 1025}; 1026#endif 1027int 1028sys_openat(struct thread *td, struct openat_args *uap) 1029{ 1030 1031 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1032 uap->mode)); 1033} 1034 1035int 1036kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags, 1037 int mode) 1038{ 1039 1040 return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode)); 1041} 1042 1043int 1044kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1045 int flags, int mode) 1046{ 1047 struct proc *p = td->td_proc; 1048 struct filedesc *fdp = p->p_fd; 1049 struct file *fp; 1050 struct vnode *vp; 1051 struct nameidata nd; 1052 cap_rights_t rights; 1053 int cmode, error, indx; 1054 1055 indx = -1; 1056 1057 AUDIT_ARG_FFLAGS(flags); 1058 AUDIT_ARG_MODE(mode); 1059 /* XXX: audit dirfd */ 1060 cap_rights_init(&rights, CAP_LOOKUP); 1061 flags_to_rights(flags, &rights); 1062 /* 1063 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1064 * may be specified. 1065 */ 1066 if (flags & O_EXEC) { 1067 if (flags & O_ACCMODE) 1068 return (EINVAL); 1069 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1070 return (EINVAL); 1071 } else { 1072 flags = FFLAGS(flags); 1073 } 1074 1075 /* 1076 * Allocate the file descriptor, but don't install a descriptor yet. 1077 */ 1078 error = falloc_noinstall(td, &fp); 1079 if (error != 0) 1080 return (error); 1081 /* 1082 * An extra reference on `fp' has been held for us by 1083 * falloc_noinstall(). 1084 */ 1085 /* Set the flags early so the finit in devfs can pick them up. */ 1086 fp->f_flag = flags & FMASK; 1087 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1088 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1089 &rights, td); 1090 td->td_dupfd = -1; /* XXX check for fdopen */ 1091 error = vn_open(&nd, &flags, cmode, fp); 1092 if (error != 0) { 1093 /* 1094 * If the vn_open replaced the method vector, something 1095 * wonderous happened deep below and we just pass it up 1096 * pretending we know what we do. 1097 */ 1098 if (error == ENXIO && fp->f_ops != &badfileops) 1099 goto success; 1100 1101 /* 1102 * Handle special fdopen() case. bleh. 1103 * 1104 * Don't do this for relative (capability) lookups; we don't 1105 * understand exactly what would happen, and we don't think 1106 * that it ever should. 1107 */ 1108 if (nd.ni_strictrelative == 0 && 1109 (error == ENODEV || error == ENXIO) && 1110 td->td_dupfd >= 0) { 1111 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1112 &indx); 1113 if (error == 0) 1114 goto success; 1115 } 1116 1117 goto bad; 1118 } 1119 td->td_dupfd = 0; 1120 NDFREE(&nd, NDF_ONLY_PNBUF); 1121 vp = nd.ni_vp; 1122 1123 /* 1124 * Store the vnode, for any f_type. Typically, the vnode use 1125 * count is decremented by direct call to vn_closefile() for 1126 * files that switched type in the cdevsw fdopen() method. 1127 */ 1128 fp->f_vnode = vp; 1129 /* 1130 * If the file wasn't claimed by devfs bind it to the normal 1131 * vnode operations here. 1132 */ 1133 if (fp->f_ops == &badfileops) { 1134 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1135 fp->f_seqcount = 1; 1136 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1137 DTYPE_VNODE, vp, &vnops); 1138 } 1139 1140 VOP_UNLOCK(vp, 0); 1141 if (flags & O_TRUNC) { 1142 error = fo_truncate(fp, 0, td->td_ucred, td); 1143 if (error != 0) 1144 goto bad; 1145 } 1146success: 1147 /* 1148 * If we haven't already installed the FD (for dupfdopen), do so now. 1149 */ 1150 if (indx == -1) { 1151 struct filecaps *fcaps; 1152 1153#ifdef CAPABILITIES 1154 if (nd.ni_strictrelative == 1) 1155 fcaps = &nd.ni_filecaps; 1156 else 1157#endif 1158 fcaps = NULL; 1159 error = finstall(td, fp, &indx, flags, fcaps); 1160 /* On success finstall() consumes fcaps. */ 1161 if (error != 0) { 1162 filecaps_free(&nd.ni_filecaps); 1163 goto bad; 1164 } 1165 } else { 1166 filecaps_free(&nd.ni_filecaps); 1167 } 1168 1169 /* 1170 * Release our private reference, leaving the one associated with 1171 * the descriptor table intact. 1172 */ 1173 fdrop(fp, td); 1174 td->td_retval[0] = indx; 1175 return (0); 1176bad: 1177 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1178 fdrop(fp, td); 1179 return (error); 1180} 1181 1182#ifdef COMPAT_43 1183/* 1184 * Create a file. 1185 */ 1186#ifndef _SYS_SYSPROTO_H_ 1187struct ocreat_args { 1188 char *path; 1189 int mode; 1190}; 1191#endif 1192int 1193ocreat(td, uap) 1194 struct thread *td; 1195 register struct ocreat_args /* { 1196 char *path; 1197 int mode; 1198 } */ *uap; 1199{ 1200 1201 return (kern_open(td, uap->path, UIO_USERSPACE, 1202 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1203} 1204#endif /* COMPAT_43 */ 1205 1206/* 1207 * Create a special file. 1208 */ 1209#ifndef _SYS_SYSPROTO_H_ 1210struct mknod_args { 1211 char *path; 1212 int mode; 1213 int dev; 1214}; 1215#endif 1216int 1217sys_mknod(td, uap) 1218 struct thread *td; 1219 register struct mknod_args /* { 1220 char *path; 1221 int mode; 1222 int dev; 1223 } */ *uap; 1224{ 1225 1226 return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); 1227} 1228 1229#ifndef _SYS_SYSPROTO_H_ 1230struct mknodat_args { 1231 int fd; 1232 char *path; 1233 mode_t mode; 1234 dev_t dev; 1235}; 1236#endif 1237int 1238sys_mknodat(struct thread *td, struct mknodat_args *uap) 1239{ 1240 1241 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1242 uap->dev)); 1243} 1244 1245int 1246kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode, 1247 int dev) 1248{ 1249 1250 return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev)); 1251} 1252 1253int 1254kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1255 int mode, int dev) 1256{ 1257 struct vnode *vp; 1258 struct mount *mp; 1259 struct vattr vattr; 1260 struct nameidata nd; 1261 cap_rights_t rights; 1262 int error, whiteout = 0; 1263 1264 AUDIT_ARG_MODE(mode); 1265 AUDIT_ARG_DEV(dev); 1266 switch (mode & S_IFMT) { 1267 case S_IFCHR: 1268 case S_IFBLK: 1269 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1270 break; 1271 case S_IFMT: 1272 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1273 break; 1274 case S_IFWHT: 1275 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1276 break; 1277 case S_IFIFO: 1278 if (dev == 0) 1279 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1280 /* FALLTHROUGH */ 1281 default: 1282 error = EINVAL; 1283 break; 1284 } 1285 if (error != 0) 1286 return (error); 1287restart: 1288 bwillwrite(); 1289 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1290 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), 1291 td); 1292 if ((error = namei(&nd)) != 0) 1293 return (error); 1294 vp = nd.ni_vp; 1295 if (vp != NULL) { 1296 NDFREE(&nd, NDF_ONLY_PNBUF); 1297 if (vp == nd.ni_dvp) 1298 vrele(nd.ni_dvp); 1299 else 1300 vput(nd.ni_dvp); 1301 vrele(vp); 1302 return (EEXIST); 1303 } else { 1304 VATTR_NULL(&vattr); 1305 vattr.va_mode = (mode & ALLPERMS) & 1306 ~td->td_proc->p_fd->fd_cmask; 1307 vattr.va_rdev = dev; 1308 whiteout = 0; 1309 1310 switch (mode & S_IFMT) { 1311 case S_IFMT: /* used by badsect to flag bad sectors */ 1312 vattr.va_type = VBAD; 1313 break; 1314 case S_IFCHR: 1315 vattr.va_type = VCHR; 1316 break; 1317 case S_IFBLK: 1318 vattr.va_type = VBLK; 1319 break; 1320 case S_IFWHT: 1321 whiteout = 1; 1322 break; 1323 default: 1324 panic("kern_mknod: invalid mode"); 1325 } 1326 } 1327 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1328 NDFREE(&nd, NDF_ONLY_PNBUF); 1329 vput(nd.ni_dvp); 1330 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1331 return (error); 1332 goto restart; 1333 } 1334#ifdef MAC 1335 if (error == 0 && !whiteout) 1336 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1337 &nd.ni_cnd, &vattr); 1338#endif 1339 if (error == 0) { 1340 if (whiteout) 1341 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1342 else { 1343 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1344 &nd.ni_cnd, &vattr); 1345 if (error == 0) 1346 vput(nd.ni_vp); 1347 } 1348 } 1349 NDFREE(&nd, NDF_ONLY_PNBUF); 1350 vput(nd.ni_dvp); 1351 vn_finished_write(mp); 1352 return (error); 1353} 1354 1355/* 1356 * Create a named pipe. 1357 */ 1358#ifndef _SYS_SYSPROTO_H_ 1359struct mkfifo_args { 1360 char *path; 1361 int mode; 1362}; 1363#endif 1364int 1365sys_mkfifo(td, uap) 1366 struct thread *td; 1367 register struct mkfifo_args /* { 1368 char *path; 1369 int mode; 1370 } */ *uap; 1371{ 1372 1373 return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode)); 1374} 1375 1376#ifndef _SYS_SYSPROTO_H_ 1377struct mkfifoat_args { 1378 int fd; 1379 char *path; 1380 mode_t mode; 1381}; 1382#endif 1383int 1384sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1385{ 1386 1387 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1388 uap->mode)); 1389} 1390 1391int 1392kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode) 1393{ 1394 1395 return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode)); 1396} 1397 1398int 1399kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1400 int mode) 1401{ 1402 struct mount *mp; 1403 struct vattr vattr; 1404 struct nameidata nd; 1405 cap_rights_t rights; 1406 int error; 1407 1408 AUDIT_ARG_MODE(mode); 1409restart: 1410 bwillwrite(); 1411 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1412 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), 1413 td); 1414 if ((error = namei(&nd)) != 0) 1415 return (error); 1416 if (nd.ni_vp != NULL) { 1417 NDFREE(&nd, NDF_ONLY_PNBUF); 1418 if (nd.ni_vp == nd.ni_dvp) 1419 vrele(nd.ni_dvp); 1420 else 1421 vput(nd.ni_dvp); 1422 vrele(nd.ni_vp); 1423 return (EEXIST); 1424 } 1425 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1426 NDFREE(&nd, NDF_ONLY_PNBUF); 1427 vput(nd.ni_dvp); 1428 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1429 return (error); 1430 goto restart; 1431 } 1432 VATTR_NULL(&vattr); 1433 vattr.va_type = VFIFO; 1434 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1435#ifdef MAC 1436 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1437 &vattr); 1438 if (error != 0) 1439 goto out; 1440#endif 1441 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1442 if (error == 0) 1443 vput(nd.ni_vp); 1444#ifdef MAC 1445out: 1446#endif 1447 vput(nd.ni_dvp); 1448 vn_finished_write(mp); 1449 NDFREE(&nd, NDF_ONLY_PNBUF); 1450 return (error); 1451} 1452 1453/* 1454 * Make a hard file link. 1455 */ 1456#ifndef _SYS_SYSPROTO_H_ 1457struct link_args { 1458 char *path; 1459 char *link; 1460}; 1461#endif 1462int 1463sys_link(td, uap) 1464 struct thread *td; 1465 register struct link_args /* { 1466 char *path; 1467 char *link; 1468 } */ *uap; 1469{ 1470 1471 return (kern_link(td, uap->path, uap->link, UIO_USERSPACE)); 1472} 1473 1474#ifndef _SYS_SYSPROTO_H_ 1475struct linkat_args { 1476 int fd1; 1477 char *path1; 1478 int fd2; 1479 char *path2; 1480 int flag; 1481}; 1482#endif 1483int 1484sys_linkat(struct thread *td, struct linkat_args *uap) 1485{ 1486 int flag; 1487 1488 flag = uap->flag; 1489 if (flag & ~AT_SYMLINK_FOLLOW) 1490 return (EINVAL); 1491 1492 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1493 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1494} 1495 1496int hardlink_check_uid = 0; 1497SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1498 &hardlink_check_uid, 0, 1499 "Unprivileged processes cannot create hard links to files owned by other " 1500 "users"); 1501static int hardlink_check_gid = 0; 1502SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1503 &hardlink_check_gid, 0, 1504 "Unprivileged processes cannot create hard links to files owned by other " 1505 "groups"); 1506 1507static int 1508can_hardlink(struct vnode *vp, struct ucred *cred) 1509{ 1510 struct vattr va; 1511 int error; 1512 1513 if (!hardlink_check_uid && !hardlink_check_gid) 1514 return (0); 1515 1516 error = VOP_GETATTR(vp, &va, cred); 1517 if (error != 0) 1518 return (error); 1519 1520 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1521 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1522 if (error != 0) 1523 return (error); 1524 } 1525 1526 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1527 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1528 if (error != 0) 1529 return (error); 1530 } 1531 1532 return (0); 1533} 1534 1535int 1536kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg) 1537{ 1538 1539 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW)); 1540} 1541 1542int 1543kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1544 enum uio_seg segflg, int follow) 1545{ 1546 struct vnode *vp; 1547 struct mount *mp; 1548 struct nameidata nd; 1549 cap_rights_t rights; 1550 int error; 1551 1552again: 1553 bwillwrite(); 1554 NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td); 1555 1556 if ((error = namei(&nd)) != 0) 1557 return (error); 1558 NDFREE(&nd, NDF_ONLY_PNBUF); 1559 vp = nd.ni_vp; 1560 if (vp->v_type == VDIR) { 1561 vrele(vp); 1562 return (EPERM); /* POSIX */ 1563 } 1564 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2 | 1565 NOCACHE, segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT), 1566 td); 1567 if ((error = namei(&nd)) == 0) { 1568 if (nd.ni_vp != NULL) { 1569 NDFREE(&nd, NDF_ONLY_PNBUF); 1570 if (nd.ni_dvp == nd.ni_vp) 1571 vrele(nd.ni_dvp); 1572 else 1573 vput(nd.ni_dvp); 1574 vrele(nd.ni_vp); 1575 vrele(vp); 1576 return (EEXIST); 1577 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1578 /* 1579 * Cross-device link. No need to recheck 1580 * vp->v_type, since it cannot change, except 1581 * to VBAD. 1582 */ 1583 NDFREE(&nd, NDF_ONLY_PNBUF); 1584 vput(nd.ni_dvp); 1585 vrele(vp); 1586 return (EXDEV); 1587 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1588 error = can_hardlink(vp, td->td_ucred); 1589#ifdef MAC 1590 if (error == 0) 1591 error = mac_vnode_check_link(td->td_ucred, 1592 nd.ni_dvp, vp, &nd.ni_cnd); 1593#endif 1594 if (error != 0) { 1595 vput(vp); 1596 vput(nd.ni_dvp); 1597 NDFREE(&nd, NDF_ONLY_PNBUF); 1598 return (error); 1599 } 1600 error = vn_start_write(vp, &mp, V_NOWAIT); 1601 if (error != 0) { 1602 vput(vp); 1603 vput(nd.ni_dvp); 1604 NDFREE(&nd, NDF_ONLY_PNBUF); 1605 error = vn_start_write(NULL, &mp, 1606 V_XSLEEP | PCATCH); 1607 if (error != 0) 1608 return (error); 1609 goto again; 1610 } 1611 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1612 VOP_UNLOCK(vp, 0); 1613 vput(nd.ni_dvp); 1614 vn_finished_write(mp); 1615 NDFREE(&nd, NDF_ONLY_PNBUF); 1616 } else { 1617 vput(nd.ni_dvp); 1618 NDFREE(&nd, NDF_ONLY_PNBUF); 1619 vrele(vp); 1620 goto again; 1621 } 1622 } 1623 vrele(vp); 1624 return (error); 1625} 1626 1627/* 1628 * Make a symbolic link. 1629 */ 1630#ifndef _SYS_SYSPROTO_H_ 1631struct symlink_args { 1632 char *path; 1633 char *link; 1634}; 1635#endif 1636int 1637sys_symlink(td, uap) 1638 struct thread *td; 1639 register struct symlink_args /* { 1640 char *path; 1641 char *link; 1642 } */ *uap; 1643{ 1644 1645 return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE)); 1646} 1647 1648#ifndef _SYS_SYSPROTO_H_ 1649struct symlinkat_args { 1650 char *path; 1651 int fd; 1652 char *path2; 1653}; 1654#endif 1655int 1656sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1657{ 1658 1659 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1660 UIO_USERSPACE)); 1661} 1662 1663int 1664kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg) 1665{ 1666 1667 return (kern_symlinkat(td, path, AT_FDCWD, link, segflg)); 1668} 1669 1670int 1671kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1672 enum uio_seg segflg) 1673{ 1674 struct mount *mp; 1675 struct vattr vattr; 1676 char *syspath; 1677 struct nameidata nd; 1678 int error; 1679 cap_rights_t rights; 1680 1681 if (segflg == UIO_SYSSPACE) { 1682 syspath = path1; 1683 } else { 1684 syspath = uma_zalloc(namei_zone, M_WAITOK); 1685 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1686 goto out; 1687 } 1688 AUDIT_ARG_TEXT(syspath); 1689restart: 1690 bwillwrite(); 1691 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1692 NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), 1693 td); 1694 if ((error = namei(&nd)) != 0) 1695 goto out; 1696 if (nd.ni_vp) { 1697 NDFREE(&nd, NDF_ONLY_PNBUF); 1698 if (nd.ni_vp == nd.ni_dvp) 1699 vrele(nd.ni_dvp); 1700 else 1701 vput(nd.ni_dvp); 1702 vrele(nd.ni_vp); 1703 error = EEXIST; 1704 goto out; 1705 } 1706 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1707 NDFREE(&nd, NDF_ONLY_PNBUF); 1708 vput(nd.ni_dvp); 1709 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1710 goto out; 1711 goto restart; 1712 } 1713 VATTR_NULL(&vattr); 1714 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1715#ifdef MAC 1716 vattr.va_type = VLNK; 1717 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1718 &vattr); 1719 if (error != 0) 1720 goto out2; 1721#endif 1722 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1723 if (error == 0) 1724 vput(nd.ni_vp); 1725#ifdef MAC 1726out2: 1727#endif 1728 NDFREE(&nd, NDF_ONLY_PNBUF); 1729 vput(nd.ni_dvp); 1730 vn_finished_write(mp); 1731out: 1732 if (segflg != UIO_SYSSPACE) 1733 uma_zfree(namei_zone, syspath); 1734 return (error); 1735} 1736 1737/* 1738 * Delete a whiteout from the filesystem. 1739 */ 1740int 1741sys_undelete(td, uap) 1742 struct thread *td; 1743 register struct undelete_args /* { 1744 char *path; 1745 } */ *uap; 1746{ 1747 struct mount *mp; 1748 struct nameidata nd; 1749 int error; 1750 1751restart: 1752 bwillwrite(); 1753 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1754 UIO_USERSPACE, uap->path, td); 1755 error = namei(&nd); 1756 if (error != 0) 1757 return (error); 1758 1759 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1760 NDFREE(&nd, NDF_ONLY_PNBUF); 1761 if (nd.ni_vp == nd.ni_dvp) 1762 vrele(nd.ni_dvp); 1763 else 1764 vput(nd.ni_dvp); 1765 if (nd.ni_vp) 1766 vrele(nd.ni_vp); 1767 return (EEXIST); 1768 } 1769 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1770 NDFREE(&nd, NDF_ONLY_PNBUF); 1771 vput(nd.ni_dvp); 1772 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1773 return (error); 1774 goto restart; 1775 } 1776 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1777 NDFREE(&nd, NDF_ONLY_PNBUF); 1778 vput(nd.ni_dvp); 1779 vn_finished_write(mp); 1780 return (error); 1781} 1782 1783/* 1784 * Delete a name from the filesystem. 1785 */ 1786#ifndef _SYS_SYSPROTO_H_ 1787struct unlink_args { 1788 char *path; 1789}; 1790#endif 1791int 1792sys_unlink(td, uap) 1793 struct thread *td; 1794 struct unlink_args /* { 1795 char *path; 1796 } */ *uap; 1797{ 1798 1799 return (kern_unlink(td, uap->path, UIO_USERSPACE)); 1800} 1801 1802#ifndef _SYS_SYSPROTO_H_ 1803struct unlinkat_args { 1804 int fd; 1805 char *path; 1806 int flag; 1807}; 1808#endif 1809int 1810sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1811{ 1812 int flag = uap->flag; 1813 int fd = uap->fd; 1814 char *path = uap->path; 1815 1816 if (flag & ~AT_REMOVEDIR) 1817 return (EINVAL); 1818 1819 if (flag & AT_REMOVEDIR) 1820 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1821 else 1822 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1823} 1824 1825int 1826kern_unlink(struct thread *td, char *path, enum uio_seg pathseg) 1827{ 1828 1829 return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0)); 1830} 1831 1832int 1833kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1834 ino_t oldinum) 1835{ 1836 struct mount *mp; 1837 struct vnode *vp; 1838 struct nameidata nd; 1839 struct stat sb; 1840 cap_rights_t rights; 1841 int error; 1842 1843restart: 1844 bwillwrite(); 1845 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1846 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1847 if ((error = namei(&nd)) != 0) 1848 return (error == EINVAL ? EPERM : error); 1849 vp = nd.ni_vp; 1850 if (vp->v_type == VDIR && oldinum == 0) { 1851 error = EPERM; /* POSIX */ 1852 } else if (oldinum != 0 && 1853 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1854 sb.st_ino != oldinum) { 1855 error = EIDRM; /* Identifier removed */ 1856 } else { 1857 /* 1858 * The root of a mounted filesystem cannot be deleted. 1859 * 1860 * XXX: can this only be a VDIR case? 1861 */ 1862 if (vp->v_vflag & VV_ROOT) 1863 error = EBUSY; 1864 } 1865 if (error == 0) { 1866 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1867 NDFREE(&nd, NDF_ONLY_PNBUF); 1868 vput(nd.ni_dvp); 1869 if (vp == nd.ni_dvp) 1870 vrele(vp); 1871 else 1872 vput(vp); 1873 if ((error = vn_start_write(NULL, &mp, 1874 V_XSLEEP | PCATCH)) != 0) 1875 return (error); 1876 goto restart; 1877 } 1878#ifdef MAC 1879 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1880 &nd.ni_cnd); 1881 if (error != 0) 1882 goto out; 1883#endif 1884 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1885 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1886#ifdef MAC 1887out: 1888#endif 1889 vn_finished_write(mp); 1890 } 1891 NDFREE(&nd, NDF_ONLY_PNBUF); 1892 vput(nd.ni_dvp); 1893 if (vp == nd.ni_dvp) 1894 vrele(vp); 1895 else 1896 vput(vp); 1897 return (error); 1898} 1899 1900/* 1901 * Reposition read/write file offset. 1902 */ 1903#ifndef _SYS_SYSPROTO_H_ 1904struct lseek_args { 1905 int fd; 1906 int pad; 1907 off_t offset; 1908 int whence; 1909}; 1910#endif 1911int 1912sys_lseek(td, uap) 1913 struct thread *td; 1914 register struct lseek_args /* { 1915 int fd; 1916 int pad; 1917 off_t offset; 1918 int whence; 1919 } */ *uap; 1920{ 1921 struct file *fp; 1922 cap_rights_t rights; 1923 int error; 1924 1925 AUDIT_ARG_FD(uap->fd); 1926 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1927 if (error != 0) 1928 return (error); 1929 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1930 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1931 fdrop(fp, td); 1932 return (error); 1933} 1934 1935#if defined(COMPAT_43) 1936/* 1937 * Reposition read/write file offset. 1938 */ 1939#ifndef _SYS_SYSPROTO_H_ 1940struct olseek_args { 1941 int fd; 1942 long offset; 1943 int whence; 1944}; 1945#endif 1946int 1947olseek(td, uap) 1948 struct thread *td; 1949 register struct olseek_args /* { 1950 int fd; 1951 long offset; 1952 int whence; 1953 } */ *uap; 1954{ 1955 struct lseek_args /* { 1956 int fd; 1957 int pad; 1958 off_t offset; 1959 int whence; 1960 } */ nuap; 1961 1962 nuap.fd = uap->fd; 1963 nuap.offset = uap->offset; 1964 nuap.whence = uap->whence; 1965 return (sys_lseek(td, &nuap)); 1966} 1967#endif /* COMPAT_43 */ 1968 1969/* Version with the 'pad' argument */ 1970int 1971freebsd6_lseek(td, uap) 1972 struct thread *td; 1973 register struct freebsd6_lseek_args *uap; 1974{ 1975 struct lseek_args ouap; 1976 1977 ouap.fd = uap->fd; 1978 ouap.offset = uap->offset; 1979 ouap.whence = uap->whence; 1980 return (sys_lseek(td, &ouap)); 1981} 1982 1983/* 1984 * Check access permissions using passed credentials. 1985 */ 1986static int 1987vn_access(vp, user_flags, cred, td) 1988 struct vnode *vp; 1989 int user_flags; 1990 struct ucred *cred; 1991 struct thread *td; 1992{ 1993 accmode_t accmode; 1994 int error; 1995 1996 /* Flags == 0 means only check for existence. */ 1997 error = 0; 1998 if (user_flags) { 1999 accmode = 0; 2000 if (user_flags & R_OK) 2001 accmode |= VREAD; 2002 if (user_flags & W_OK) 2003 accmode |= VWRITE; 2004 if (user_flags & X_OK) 2005 accmode |= VEXEC; 2006#ifdef MAC 2007 error = mac_vnode_check_access(cred, vp, accmode); 2008 if (error != 0) 2009 return (error); 2010#endif 2011 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2012 error = VOP_ACCESS(vp, accmode, cred, td); 2013 } 2014 return (error); 2015} 2016 2017/* 2018 * Check access permissions using "real" credentials. 2019 */ 2020#ifndef _SYS_SYSPROTO_H_ 2021struct access_args { 2022 char *path; 2023 int amode; 2024}; 2025#endif 2026int 2027sys_access(td, uap) 2028 struct thread *td; 2029 register struct access_args /* { 2030 char *path; 2031 int amode; 2032 } */ *uap; 2033{ 2034 2035 return (kern_access(td, uap->path, UIO_USERSPACE, uap->amode)); 2036} 2037 2038#ifndef _SYS_SYSPROTO_H_ 2039struct faccessat_args { 2040 int dirfd; 2041 char *path; 2042 int amode; 2043 int flag; 2044} 2045#endif 2046int 2047sys_faccessat(struct thread *td, struct faccessat_args *uap) 2048{ 2049 2050 if (uap->flag & ~AT_EACCESS) 2051 return (EINVAL); 2052 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2053 uap->amode)); 2054} 2055 2056int 2057kern_access(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2058{ 2059 2060 return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, amode)); 2061} 2062 2063int 2064kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2065 int flag, int amode) 2066{ 2067 struct ucred *cred, *tmpcred; 2068 struct vnode *vp; 2069 struct nameidata nd; 2070 cap_rights_t rights; 2071 int error; 2072 2073 /* 2074 * Create and modify a temporary credential instead of one that 2075 * is potentially shared. 2076 */ 2077 if (!(flag & AT_EACCESS)) { 2078 cred = td->td_ucred; 2079 tmpcred = crdup(cred); 2080 tmpcred->cr_uid = cred->cr_ruid; 2081 tmpcred->cr_groups[0] = cred->cr_rgid; 2082 td->td_ucred = tmpcred; 2083 } else 2084 cred = tmpcred = td->td_ucred; 2085 AUDIT_ARG_VALUE(amode); 2086 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2087 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 2088 td); 2089 if ((error = namei(&nd)) != 0) 2090 goto out1; 2091 vp = nd.ni_vp; 2092 2093 error = vn_access(vp, amode, tmpcred, td); 2094 NDFREE(&nd, NDF_ONLY_PNBUF); 2095 vput(vp); 2096out1: 2097 if (!(flag & AT_EACCESS)) { 2098 td->td_ucred = cred; 2099 crfree(tmpcred); 2100 } 2101 return (error); 2102} 2103 2104/* 2105 * Check access permissions using "effective" credentials. 2106 */ 2107#ifndef _SYS_SYSPROTO_H_ 2108struct eaccess_args { 2109 char *path; 2110 int amode; 2111}; 2112#endif 2113int 2114sys_eaccess(td, uap) 2115 struct thread *td; 2116 register struct eaccess_args /* { 2117 char *path; 2118 int amode; 2119 } */ *uap; 2120{ 2121 2122 return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->amode)); 2123} 2124 2125int 2126kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2127{ 2128 2129 return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, amode)); 2130} 2131 2132#if defined(COMPAT_43) 2133/* 2134 * Get file status; this version follows links. 2135 */ 2136#ifndef _SYS_SYSPROTO_H_ 2137struct ostat_args { 2138 char *path; 2139 struct ostat *ub; 2140}; 2141#endif 2142int 2143ostat(td, uap) 2144 struct thread *td; 2145 register struct ostat_args /* { 2146 char *path; 2147 struct ostat *ub; 2148 } */ *uap; 2149{ 2150 struct stat sb; 2151 struct ostat osb; 2152 int error; 2153 2154 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2155 if (error != 0) 2156 return (error); 2157 cvtstat(&sb, &osb); 2158 return (copyout(&osb, uap->ub, sizeof (osb))); 2159} 2160 2161/* 2162 * Get file status; this version does not follow links. 2163 */ 2164#ifndef _SYS_SYSPROTO_H_ 2165struct olstat_args { 2166 char *path; 2167 struct ostat *ub; 2168}; 2169#endif 2170int 2171olstat(td, uap) 2172 struct thread *td; 2173 register struct olstat_args /* { 2174 char *path; 2175 struct ostat *ub; 2176 } */ *uap; 2177{ 2178 struct stat sb; 2179 struct ostat osb; 2180 int error; 2181 2182 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2183 if (error != 0) 2184 return (error); 2185 cvtstat(&sb, &osb); 2186 return (copyout(&osb, uap->ub, sizeof (osb))); 2187} 2188 2189/* 2190 * Convert from an old to a new stat structure. 2191 */ 2192void 2193cvtstat(st, ost) 2194 struct stat *st; 2195 struct ostat *ost; 2196{ 2197 2198 bzero(ost, sizeof(*ost)); 2199 ost->st_dev = st->st_dev; 2200 ost->st_ino = st->st_ino; 2201 ost->st_mode = st->st_mode; 2202 ost->st_nlink = st->st_nlink; 2203 ost->st_uid = st->st_uid; 2204 ost->st_gid = st->st_gid; 2205 ost->st_rdev = st->st_rdev; 2206 if (st->st_size < (quad_t)1 << 32) 2207 ost->st_size = st->st_size; 2208 else 2209 ost->st_size = -2; 2210 ost->st_atim = st->st_atim; 2211 ost->st_mtim = st->st_mtim; 2212 ost->st_ctim = st->st_ctim; 2213 ost->st_blksize = st->st_blksize; 2214 ost->st_blocks = st->st_blocks; 2215 ost->st_flags = st->st_flags; 2216 ost->st_gen = st->st_gen; 2217} 2218#endif /* COMPAT_43 */ 2219 2220/* 2221 * Get file status; this version follows links. 2222 */ 2223#ifndef _SYS_SYSPROTO_H_ 2224struct stat_args { 2225 char *path; 2226 struct stat *ub; 2227}; 2228#endif 2229int 2230sys_stat(td, uap) 2231 struct thread *td; 2232 register struct stat_args /* { 2233 char *path; 2234 struct stat *ub; 2235 } */ *uap; 2236{ 2237 struct stat sb; 2238 int error; 2239 2240 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2241 if (error == 0) 2242 error = copyout(&sb, uap->ub, sizeof (sb)); 2243 return (error); 2244} 2245 2246#ifndef _SYS_SYSPROTO_H_ 2247struct fstatat_args { 2248 int fd; 2249 char *path; 2250 struct stat *buf; 2251 int flag; 2252} 2253#endif 2254int 2255sys_fstatat(struct thread *td, struct fstatat_args *uap) 2256{ 2257 struct stat sb; 2258 int error; 2259 2260 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2261 UIO_USERSPACE, &sb); 2262 if (error == 0) 2263 error = copyout(&sb, uap->buf, sizeof (sb)); 2264 return (error); 2265} 2266 2267int 2268kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2269{ 2270 2271 return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp)); 2272} 2273 2274int 2275kern_statat(struct thread *td, int flag, int fd, char *path, 2276 enum uio_seg pathseg, struct stat *sbp) 2277{ 2278 2279 return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL)); 2280} 2281 2282int 2283kern_statat_vnhook(struct thread *td, int flag, int fd, char *path, 2284 enum uio_seg pathseg, struct stat *sbp, 2285 void (*hook)(struct vnode *vp, struct stat *sbp)) 2286{ 2287 struct nameidata nd; 2288 struct stat sb; 2289 cap_rights_t rights; 2290 int error; 2291 2292 if (flag & ~AT_SYMLINK_NOFOLLOW) 2293 return (EINVAL); 2294 2295 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2296 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2297 cap_rights_init(&rights, CAP_FSTAT), td); 2298 2299 if ((error = namei(&nd)) != 0) 2300 return (error); 2301 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2302 if (error == 0) { 2303 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0); 2304 if (S_ISREG(sb.st_mode)) 2305 SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0); 2306 if (__predict_false(hook != NULL)) 2307 hook(nd.ni_vp, &sb); 2308 } 2309 NDFREE(&nd, NDF_ONLY_PNBUF); 2310 vput(nd.ni_vp); 2311 if (error != 0) 2312 return (error); 2313 *sbp = sb; 2314#ifdef KTRACE 2315 if (KTRPOINT(td, KTR_STRUCT)) 2316 ktrstat(&sb); 2317#endif 2318 return (0); 2319} 2320 2321/* 2322 * Get file status; this version does not follow links. 2323 */ 2324#ifndef _SYS_SYSPROTO_H_ 2325struct lstat_args { 2326 char *path; 2327 struct stat *ub; 2328}; 2329#endif 2330int 2331sys_lstat(td, uap) 2332 struct thread *td; 2333 register struct lstat_args /* { 2334 char *path; 2335 struct stat *ub; 2336 } */ *uap; 2337{ 2338 struct stat sb; 2339 int error; 2340 2341 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2342 if (error == 0) 2343 error = copyout(&sb, uap->ub, sizeof (sb)); 2344 return (error); 2345} 2346 2347int 2348kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2349{ 2350 2351 return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg, 2352 sbp)); 2353} 2354 2355/* 2356 * Implementation of the NetBSD [l]stat() functions. 2357 */ 2358void 2359cvtnstat(sb, nsb) 2360 struct stat *sb; 2361 struct nstat *nsb; 2362{ 2363 2364 bzero(nsb, sizeof *nsb); 2365 nsb->st_dev = sb->st_dev; 2366 nsb->st_ino = sb->st_ino; 2367 nsb->st_mode = sb->st_mode; 2368 nsb->st_nlink = sb->st_nlink; 2369 nsb->st_uid = sb->st_uid; 2370 nsb->st_gid = sb->st_gid; 2371 nsb->st_rdev = sb->st_rdev; 2372 nsb->st_atim = sb->st_atim; 2373 nsb->st_mtim = sb->st_mtim; 2374 nsb->st_ctim = sb->st_ctim; 2375 nsb->st_size = sb->st_size; 2376 nsb->st_blocks = sb->st_blocks; 2377 nsb->st_blksize = sb->st_blksize; 2378 nsb->st_flags = sb->st_flags; 2379 nsb->st_gen = sb->st_gen; 2380 nsb->st_birthtim = sb->st_birthtim; 2381} 2382 2383#ifndef _SYS_SYSPROTO_H_ 2384struct nstat_args { 2385 char *path; 2386 struct nstat *ub; 2387}; 2388#endif 2389int 2390sys_nstat(td, uap) 2391 struct thread *td; 2392 register struct nstat_args /* { 2393 char *path; 2394 struct nstat *ub; 2395 } */ *uap; 2396{ 2397 struct stat sb; 2398 struct nstat nsb; 2399 int error; 2400 2401 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2402 if (error != 0) 2403 return (error); 2404 cvtnstat(&sb, &nsb); 2405 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2406} 2407 2408/* 2409 * NetBSD lstat. Get file status; this version does not follow links. 2410 */ 2411#ifndef _SYS_SYSPROTO_H_ 2412struct lstat_args { 2413 char *path; 2414 struct stat *ub; 2415}; 2416#endif 2417int 2418sys_nlstat(td, uap) 2419 struct thread *td; 2420 register struct nlstat_args /* { 2421 char *path; 2422 struct nstat *ub; 2423 } */ *uap; 2424{ 2425 struct stat sb; 2426 struct nstat nsb; 2427 int error; 2428 2429 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2430 if (error != 0) 2431 return (error); 2432 cvtnstat(&sb, &nsb); 2433 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2434} 2435 2436/* 2437 * Get configurable pathname variables. 2438 */ 2439#ifndef _SYS_SYSPROTO_H_ 2440struct pathconf_args { 2441 char *path; 2442 int name; 2443}; 2444#endif 2445int 2446sys_pathconf(td, uap) 2447 struct thread *td; 2448 register struct pathconf_args /* { 2449 char *path; 2450 int name; 2451 } */ *uap; 2452{ 2453 2454 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2455} 2456 2457#ifndef _SYS_SYSPROTO_H_ 2458struct lpathconf_args { 2459 char *path; 2460 int name; 2461}; 2462#endif 2463int 2464sys_lpathconf(td, uap) 2465 struct thread *td; 2466 register struct lpathconf_args /* { 2467 char *path; 2468 int name; 2469 } */ *uap; 2470{ 2471 2472 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2473 NOFOLLOW)); 2474} 2475 2476int 2477kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2478 u_long flags) 2479{ 2480 struct nameidata nd; 2481 int error; 2482 2483 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2484 pathseg, path, td); 2485 if ((error = namei(&nd)) != 0) 2486 return (error); 2487 NDFREE(&nd, NDF_ONLY_PNBUF); 2488 2489 /* If asynchronous I/O is available, it works for all files. */ 2490 if (name == _PC_ASYNC_IO) 2491 td->td_retval[0] = async_io_version; 2492 else 2493 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2494 vput(nd.ni_vp); 2495 return (error); 2496} 2497 2498/* 2499 * Return target name of a symbolic link. 2500 */ 2501#ifndef _SYS_SYSPROTO_H_ 2502struct readlink_args { 2503 char *path; 2504 char *buf; 2505 size_t count; 2506}; 2507#endif 2508int 2509sys_readlink(td, uap) 2510 struct thread *td; 2511 register struct readlink_args /* { 2512 char *path; 2513 char *buf; 2514 size_t count; 2515 } */ *uap; 2516{ 2517 2518 return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf, 2519 UIO_USERSPACE, uap->count)); 2520} 2521#ifndef _SYS_SYSPROTO_H_ 2522struct readlinkat_args { 2523 int fd; 2524 char *path; 2525 char *buf; 2526 size_t bufsize; 2527}; 2528#endif 2529int 2530sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2531{ 2532 2533 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2534 uap->buf, UIO_USERSPACE, uap->bufsize)); 2535} 2536 2537int 2538kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf, 2539 enum uio_seg bufseg, size_t count) 2540{ 2541 2542 return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg, 2543 count)); 2544} 2545 2546int 2547kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2548 char *buf, enum uio_seg bufseg, size_t count) 2549{ 2550 struct vnode *vp; 2551 struct iovec aiov; 2552 struct uio auio; 2553 struct nameidata nd; 2554 int error; 2555 2556 if (count > IOSIZE_MAX) 2557 return (EINVAL); 2558 2559 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2560 pathseg, path, fd, td); 2561 2562 if ((error = namei(&nd)) != 0) 2563 return (error); 2564 NDFREE(&nd, NDF_ONLY_PNBUF); 2565 vp = nd.ni_vp; 2566#ifdef MAC 2567 error = mac_vnode_check_readlink(td->td_ucred, vp); 2568 if (error != 0) { 2569 vput(vp); 2570 return (error); 2571 } 2572#endif 2573 if (vp->v_type != VLNK) 2574 error = EINVAL; 2575 else { 2576 aiov.iov_base = buf; 2577 aiov.iov_len = count; 2578 auio.uio_iov = &aiov; 2579 auio.uio_iovcnt = 1; 2580 auio.uio_offset = 0; 2581 auio.uio_rw = UIO_READ; 2582 auio.uio_segflg = bufseg; 2583 auio.uio_td = td; 2584 auio.uio_resid = count; 2585 error = VOP_READLINK(vp, &auio, td->td_ucred); 2586 td->td_retval[0] = count - auio.uio_resid; 2587 } 2588 vput(vp); 2589 return (error); 2590} 2591 2592/* 2593 * Common implementation code for chflags() and fchflags(). 2594 */ 2595static int 2596setfflags(td, vp, flags) 2597 struct thread *td; 2598 struct vnode *vp; 2599 u_long flags; 2600{ 2601 struct mount *mp; 2602 struct vattr vattr; 2603 int error; 2604 2605 /* We can't support the value matching VNOVAL. */ 2606 if (flags == VNOVAL) 2607 return (EOPNOTSUPP); 2608 2609 /* 2610 * Prevent non-root users from setting flags on devices. When 2611 * a device is reused, users can retain ownership of the device 2612 * if they are allowed to set flags and programs assume that 2613 * chown can't fail when done as root. 2614 */ 2615 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2616 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2617 if (error != 0) 2618 return (error); 2619 } 2620 2621 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2622 return (error); 2623 VATTR_NULL(&vattr); 2624 vattr.va_flags = flags; 2625 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2626#ifdef MAC 2627 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2628 if (error == 0) 2629#endif 2630 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2631 VOP_UNLOCK(vp, 0); 2632 vn_finished_write(mp); 2633 return (error); 2634} 2635 2636/* 2637 * Change flags of a file given a path name. 2638 */ 2639#ifndef _SYS_SYSPROTO_H_ 2640struct chflags_args { 2641 const char *path; 2642 u_long flags; 2643}; 2644#endif 2645int 2646sys_chflags(td, uap) 2647 struct thread *td; 2648 register struct chflags_args /* { 2649 const char *path; 2650 u_long flags; 2651 } */ *uap; 2652{ 2653 2654 return (kern_chflags(td, uap->path, UIO_USERSPACE, uap->flags)); 2655} 2656 2657#ifndef _SYS_SYSPROTO_H_ 2658struct chflagsat_args { 2659 int fd; 2660 const char *path; 2661 u_long flags; 2662 int atflag; 2663} 2664#endif 2665int 2666sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2667{ 2668 int fd = uap->fd; 2669 const char *path = uap->path; 2670 u_long flags = uap->flags; 2671 int atflag = uap->atflag; 2672 2673 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2674 return (EINVAL); 2675 2676 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2677} 2678 2679static int 2680kern_chflags(struct thread *td, const char *path, enum uio_seg pathseg, 2681 u_long flags) 2682{ 2683 2684 return (kern_chflagsat(td, AT_FDCWD, path, pathseg, flags, 0)); 2685} 2686 2687/* 2688 * Same as chflags() but doesn't follow symlinks. 2689 */ 2690int 2691sys_lchflags(td, uap) 2692 struct thread *td; 2693 register struct lchflags_args /* { 2694 const char *path; 2695 u_long flags; 2696 } */ *uap; 2697{ 2698 2699 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2700 uap->flags, AT_SYMLINK_NOFOLLOW)); 2701} 2702 2703static int 2704kern_chflagsat(struct thread *td, int fd, const char *path, 2705 enum uio_seg pathseg, u_long flags, int atflag) 2706{ 2707 struct nameidata nd; 2708 cap_rights_t rights; 2709 int error, follow; 2710 2711 AUDIT_ARG_FFLAGS(flags); 2712 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2713 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2714 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2715 if ((error = namei(&nd)) != 0) 2716 return (error); 2717 NDFREE(&nd, NDF_ONLY_PNBUF); 2718 error = setfflags(td, nd.ni_vp, flags); 2719 vrele(nd.ni_vp); 2720 return (error); 2721} 2722 2723/* 2724 * Change flags of a file given a file descriptor. 2725 */ 2726#ifndef _SYS_SYSPROTO_H_ 2727struct fchflags_args { 2728 int fd; 2729 u_long flags; 2730}; 2731#endif 2732int 2733sys_fchflags(td, uap) 2734 struct thread *td; 2735 register struct fchflags_args /* { 2736 int fd; 2737 u_long flags; 2738 } */ *uap; 2739{ 2740 struct file *fp; 2741 cap_rights_t rights; 2742 int error; 2743 2744 AUDIT_ARG_FD(uap->fd); 2745 AUDIT_ARG_FFLAGS(uap->flags); 2746 error = getvnode(td->td_proc->p_fd, uap->fd, 2747 cap_rights_init(&rights, CAP_FCHFLAGS), &fp); 2748 if (error != 0) 2749 return (error); 2750#ifdef AUDIT 2751 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2752 AUDIT_ARG_VNODE1(fp->f_vnode); 2753 VOP_UNLOCK(fp->f_vnode, 0); 2754#endif 2755 error = setfflags(td, fp->f_vnode, uap->flags); 2756 fdrop(fp, td); 2757 return (error); 2758} 2759 2760/* 2761 * Common implementation code for chmod(), lchmod() and fchmod(). 2762 */ 2763int 2764setfmode(td, cred, vp, mode) 2765 struct thread *td; 2766 struct ucred *cred; 2767 struct vnode *vp; 2768 int mode; 2769{ 2770 struct mount *mp; 2771 struct vattr vattr; 2772 int error; 2773 2774 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2775 return (error); 2776 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2777 VATTR_NULL(&vattr); 2778 vattr.va_mode = mode & ALLPERMS; 2779#ifdef MAC 2780 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2781 if (error == 0) 2782#endif 2783 error = VOP_SETATTR(vp, &vattr, cred); 2784 VOP_UNLOCK(vp, 0); 2785 vn_finished_write(mp); 2786 return (error); 2787} 2788 2789/* 2790 * Change mode of a file given path name. 2791 */ 2792#ifndef _SYS_SYSPROTO_H_ 2793struct chmod_args { 2794 char *path; 2795 int mode; 2796}; 2797#endif 2798int 2799sys_chmod(td, uap) 2800 struct thread *td; 2801 register struct chmod_args /* { 2802 char *path; 2803 int mode; 2804 } */ *uap; 2805{ 2806 2807 return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode)); 2808} 2809 2810#ifndef _SYS_SYSPROTO_H_ 2811struct fchmodat_args { 2812 int dirfd; 2813 char *path; 2814 mode_t mode; 2815 int flag; 2816} 2817#endif 2818int 2819sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2820{ 2821 int flag = uap->flag; 2822 int fd = uap->fd; 2823 char *path = uap->path; 2824 mode_t mode = uap->mode; 2825 2826 if (flag & ~AT_SYMLINK_NOFOLLOW) 2827 return (EINVAL); 2828 2829 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2830} 2831 2832int 2833kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode) 2834{ 2835 2836 return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0)); 2837} 2838 2839/* 2840 * Change mode of a file given path name (don't follow links.) 2841 */ 2842#ifndef _SYS_SYSPROTO_H_ 2843struct lchmod_args { 2844 char *path; 2845 int mode; 2846}; 2847#endif 2848int 2849sys_lchmod(td, uap) 2850 struct thread *td; 2851 register struct lchmod_args /* { 2852 char *path; 2853 int mode; 2854 } */ *uap; 2855{ 2856 2857 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2858 uap->mode, AT_SYMLINK_NOFOLLOW)); 2859} 2860 2861int 2862kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2863 mode_t mode, int flag) 2864{ 2865 struct nameidata nd; 2866 cap_rights_t rights; 2867 int error, follow; 2868 2869 AUDIT_ARG_MODE(mode); 2870 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2871 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2872 cap_rights_init(&rights, CAP_FCHMOD), td); 2873 if ((error = namei(&nd)) != 0) 2874 return (error); 2875 NDFREE(&nd, NDF_ONLY_PNBUF); 2876 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2877 vrele(nd.ni_vp); 2878 return (error); 2879} 2880 2881/* 2882 * Change mode of a file given a file descriptor. 2883 */ 2884#ifndef _SYS_SYSPROTO_H_ 2885struct fchmod_args { 2886 int fd; 2887 int mode; 2888}; 2889#endif 2890int 2891sys_fchmod(struct thread *td, struct fchmod_args *uap) 2892{ 2893 struct file *fp; 2894 cap_rights_t rights; 2895 int error; 2896 2897 AUDIT_ARG_FD(uap->fd); 2898 AUDIT_ARG_MODE(uap->mode); 2899 2900 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2901 if (error != 0) 2902 return (error); 2903 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2904 fdrop(fp, td); 2905 return (error); 2906} 2907 2908/* 2909 * Common implementation for chown(), lchown(), and fchown() 2910 */ 2911int 2912setfown(td, cred, vp, uid, gid) 2913 struct thread *td; 2914 struct ucred *cred; 2915 struct vnode *vp; 2916 uid_t uid; 2917 gid_t gid; 2918{ 2919 struct mount *mp; 2920 struct vattr vattr; 2921 int error; 2922 2923 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2924 return (error); 2925 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2926 VATTR_NULL(&vattr); 2927 vattr.va_uid = uid; 2928 vattr.va_gid = gid; 2929#ifdef MAC 2930 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2931 vattr.va_gid); 2932 if (error == 0) 2933#endif 2934 error = VOP_SETATTR(vp, &vattr, cred); 2935 VOP_UNLOCK(vp, 0); 2936 vn_finished_write(mp); 2937 return (error); 2938} 2939 2940/* 2941 * Set ownership given a path name. 2942 */ 2943#ifndef _SYS_SYSPROTO_H_ 2944struct chown_args { 2945 char *path; 2946 int uid; 2947 int gid; 2948}; 2949#endif 2950int 2951sys_chown(td, uap) 2952 struct thread *td; 2953 register struct chown_args /* { 2954 char *path; 2955 int uid; 2956 int gid; 2957 } */ *uap; 2958{ 2959 2960 return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 2961} 2962 2963#ifndef _SYS_SYSPROTO_H_ 2964struct fchownat_args { 2965 int fd; 2966 const char * path; 2967 uid_t uid; 2968 gid_t gid; 2969 int flag; 2970}; 2971#endif 2972int 2973sys_fchownat(struct thread *td, struct fchownat_args *uap) 2974{ 2975 int flag; 2976 2977 flag = uap->flag; 2978 if (flag & ~AT_SYMLINK_NOFOLLOW) 2979 return (EINVAL); 2980 2981 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2982 uap->gid, uap->flag)); 2983} 2984 2985int 2986kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 2987 int gid) 2988{ 2989 2990 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0)); 2991} 2992 2993int 2994kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2995 int uid, int gid, int flag) 2996{ 2997 struct nameidata nd; 2998 cap_rights_t rights; 2999 int error, follow; 3000 3001 AUDIT_ARG_OWNER(uid, gid); 3002 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3003 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 3004 cap_rights_init(&rights, CAP_FCHOWN), td); 3005 3006 if ((error = namei(&nd)) != 0) 3007 return (error); 3008 NDFREE(&nd, NDF_ONLY_PNBUF); 3009 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 3010 vrele(nd.ni_vp); 3011 return (error); 3012} 3013 3014/* 3015 * Set ownership given a path name, do not cross symlinks. 3016 */ 3017#ifndef _SYS_SYSPROTO_H_ 3018struct lchown_args { 3019 char *path; 3020 int uid; 3021 int gid; 3022}; 3023#endif 3024int 3025sys_lchown(td, uap) 3026 struct thread *td; 3027 register struct lchown_args /* { 3028 char *path; 3029 int uid; 3030 int gid; 3031 } */ *uap; 3032{ 3033 3034 return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 3035} 3036 3037int 3038kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 3039 int gid) 3040{ 3041 3042 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 3043 AT_SYMLINK_NOFOLLOW)); 3044} 3045 3046/* 3047 * Set ownership given a file descriptor. 3048 */ 3049#ifndef _SYS_SYSPROTO_H_ 3050struct fchown_args { 3051 int fd; 3052 int uid; 3053 int gid; 3054}; 3055#endif 3056int 3057sys_fchown(td, uap) 3058 struct thread *td; 3059 register struct fchown_args /* { 3060 int fd; 3061 int uid; 3062 int gid; 3063 } */ *uap; 3064{ 3065 struct file *fp; 3066 cap_rights_t rights; 3067 int error; 3068 3069 AUDIT_ARG_FD(uap->fd); 3070 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3071 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 3072 if (error != 0) 3073 return (error); 3074 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3075 fdrop(fp, td); 3076 return (error); 3077} 3078 3079/* 3080 * Common implementation code for utimes(), lutimes(), and futimes(). 3081 */ 3082static int 3083getutimes(usrtvp, tvpseg, tsp) 3084 const struct timeval *usrtvp; 3085 enum uio_seg tvpseg; 3086 struct timespec *tsp; 3087{ 3088 struct timeval tv[2]; 3089 const struct timeval *tvp; 3090 int error; 3091 3092 if (usrtvp == NULL) { 3093 vfs_timestamp(&tsp[0]); 3094 tsp[1] = tsp[0]; 3095 } else { 3096 if (tvpseg == UIO_SYSSPACE) { 3097 tvp = usrtvp; 3098 } else { 3099 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3100 return (error); 3101 tvp = tv; 3102 } 3103 3104 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3105 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3106 return (EINVAL); 3107 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3108 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3109 } 3110 return (0); 3111} 3112 3113/* 3114 * Common implementation code for utimes(), lutimes(), and futimes(). 3115 */ 3116static int 3117setutimes(td, vp, ts, numtimes, nullflag) 3118 struct thread *td; 3119 struct vnode *vp; 3120 const struct timespec *ts; 3121 int numtimes; 3122 int nullflag; 3123{ 3124 struct mount *mp; 3125 struct vattr vattr; 3126 int error, setbirthtime; 3127 3128 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3129 return (error); 3130 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3131 setbirthtime = 0; 3132 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3133 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3134 setbirthtime = 1; 3135 VATTR_NULL(&vattr); 3136 vattr.va_atime = ts[0]; 3137 vattr.va_mtime = ts[1]; 3138 if (setbirthtime) 3139 vattr.va_birthtime = ts[1]; 3140 if (numtimes > 2) 3141 vattr.va_birthtime = ts[2]; 3142 if (nullflag) 3143 vattr.va_vaflags |= VA_UTIMES_NULL; 3144#ifdef MAC 3145 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3146 vattr.va_mtime); 3147#endif 3148 if (error == 0) 3149 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3150 VOP_UNLOCK(vp, 0); 3151 vn_finished_write(mp); 3152 return (error); 3153} 3154 3155/* 3156 * Set the access and modification times of a file. 3157 */ 3158#ifndef _SYS_SYSPROTO_H_ 3159struct utimes_args { 3160 char *path; 3161 struct timeval *tptr; 3162}; 3163#endif 3164int 3165sys_utimes(td, uap) 3166 struct thread *td; 3167 register struct utimes_args /* { 3168 char *path; 3169 struct timeval *tptr; 3170 } */ *uap; 3171{ 3172 3173 return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3174 UIO_USERSPACE)); 3175} 3176 3177#ifndef _SYS_SYSPROTO_H_ 3178struct futimesat_args { 3179 int fd; 3180 const char * path; 3181 const struct timeval * times; 3182}; 3183#endif 3184int 3185sys_futimesat(struct thread *td, struct futimesat_args *uap) 3186{ 3187 3188 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3189 uap->times, UIO_USERSPACE)); 3190} 3191 3192int 3193kern_utimes(struct thread *td, char *path, enum uio_seg pathseg, 3194 struct timeval *tptr, enum uio_seg tptrseg) 3195{ 3196 3197 return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg)); 3198} 3199 3200int 3201kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3202 struct timeval *tptr, enum uio_seg tptrseg) 3203{ 3204 struct nameidata nd; 3205 struct timespec ts[2]; 3206 cap_rights_t rights; 3207 int error; 3208 3209 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3210 return (error); 3211 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3212 cap_rights_init(&rights, CAP_FUTIMES), td); 3213 3214 if ((error = namei(&nd)) != 0) 3215 return (error); 3216 NDFREE(&nd, NDF_ONLY_PNBUF); 3217 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3218 vrele(nd.ni_vp); 3219 return (error); 3220} 3221 3222/* 3223 * Set the access and modification times of a file. 3224 */ 3225#ifndef _SYS_SYSPROTO_H_ 3226struct lutimes_args { 3227 char *path; 3228 struct timeval *tptr; 3229}; 3230#endif 3231int 3232sys_lutimes(td, uap) 3233 struct thread *td; 3234 register struct lutimes_args /* { 3235 char *path; 3236 struct timeval *tptr; 3237 } */ *uap; 3238{ 3239 3240 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3241 UIO_USERSPACE)); 3242} 3243 3244int 3245kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3246 struct timeval *tptr, enum uio_seg tptrseg) 3247{ 3248 struct timespec ts[2]; 3249 struct nameidata nd; 3250 int error; 3251 3252 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3253 return (error); 3254 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3255 if ((error = namei(&nd)) != 0) 3256 return (error); 3257 NDFREE(&nd, NDF_ONLY_PNBUF); 3258 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3259 vrele(nd.ni_vp); 3260 return (error); 3261} 3262 3263/* 3264 * Set the access and modification times of a file. 3265 */ 3266#ifndef _SYS_SYSPROTO_H_ 3267struct futimes_args { 3268 int fd; 3269 struct timeval *tptr; 3270}; 3271#endif 3272int 3273sys_futimes(td, uap) 3274 struct thread *td; 3275 register struct futimes_args /* { 3276 int fd; 3277 struct timeval *tptr; 3278 } */ *uap; 3279{ 3280 3281 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3282} 3283 3284int 3285kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3286 enum uio_seg tptrseg) 3287{ 3288 struct timespec ts[2]; 3289 struct file *fp; 3290 cap_rights_t rights; 3291 int error; 3292 3293 AUDIT_ARG_FD(fd); 3294 error = getutimes(tptr, tptrseg, ts); 3295 if (error != 0) 3296 return (error); 3297 error = getvnode(td->td_proc->p_fd, fd, 3298 cap_rights_init(&rights, CAP_FUTIMES), &fp); 3299 if (error != 0) 3300 return (error); 3301#ifdef AUDIT 3302 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3303 AUDIT_ARG_VNODE1(fp->f_vnode); 3304 VOP_UNLOCK(fp->f_vnode, 0); 3305#endif 3306 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3307 fdrop(fp, td); 3308 return (error); 3309} 3310 3311/* 3312 * Truncate a file given its path name. 3313 */ 3314#ifndef _SYS_SYSPROTO_H_ 3315struct truncate_args { 3316 char *path; 3317 int pad; 3318 off_t length; 3319}; 3320#endif 3321int 3322sys_truncate(td, uap) 3323 struct thread *td; 3324 register struct truncate_args /* { 3325 char *path; 3326 int pad; 3327 off_t length; 3328 } */ *uap; 3329{ 3330 3331 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3332} 3333 3334int 3335kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3336{ 3337 struct mount *mp; 3338 struct vnode *vp; 3339 void *rl_cookie; 3340 struct vattr vattr; 3341 struct nameidata nd; 3342 int error; 3343 3344 if (length < 0) 3345 return(EINVAL); 3346 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3347 if ((error = namei(&nd)) != 0) 3348 return (error); 3349 vp = nd.ni_vp; 3350 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3351 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3352 vn_rangelock_unlock(vp, rl_cookie); 3353 vrele(vp); 3354 return (error); 3355 } 3356 NDFREE(&nd, NDF_ONLY_PNBUF); 3357 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3358 if (vp->v_type == VDIR) 3359 error = EISDIR; 3360#ifdef MAC 3361 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3362 } 3363#endif 3364 else if ((error = vn_writechk(vp)) == 0 && 3365 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3366 VATTR_NULL(&vattr); 3367 vattr.va_size = length; 3368 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3369 } 3370 VOP_UNLOCK(vp, 0); 3371 vn_finished_write(mp); 3372 vn_rangelock_unlock(vp, rl_cookie); 3373 vrele(vp); 3374 return (error); 3375} 3376 3377#if defined(COMPAT_43) 3378/* 3379 * Truncate a file given its path name. 3380 */ 3381#ifndef _SYS_SYSPROTO_H_ 3382struct otruncate_args { 3383 char *path; 3384 long length; 3385}; 3386#endif 3387int 3388otruncate(td, uap) 3389 struct thread *td; 3390 register struct otruncate_args /* { 3391 char *path; 3392 long length; 3393 } */ *uap; 3394{ 3395 struct truncate_args /* { 3396 char *path; 3397 int pad; 3398 off_t length; 3399 } */ nuap; 3400 3401 nuap.path = uap->path; 3402 nuap.length = uap->length; 3403 return (sys_truncate(td, &nuap)); 3404} 3405#endif /* COMPAT_43 */ 3406 3407/* Versions with the pad argument */ 3408int 3409freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3410{ 3411 struct truncate_args ouap; 3412 3413 ouap.path = uap->path; 3414 ouap.length = uap->length; 3415 return (sys_truncate(td, &ouap)); 3416} 3417 3418int 3419freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3420{ 3421 struct ftruncate_args ouap; 3422 3423 ouap.fd = uap->fd; 3424 ouap.length = uap->length; 3425 return (sys_ftruncate(td, &ouap)); 3426} 3427 3428/* 3429 * Sync an open file. 3430 */ 3431#ifndef _SYS_SYSPROTO_H_ 3432struct fsync_args { 3433 int fd; 3434}; 3435#endif 3436int 3437sys_fsync(td, uap) 3438 struct thread *td; 3439 struct fsync_args /* { 3440 int fd; 3441 } */ *uap; 3442{ 3443 struct vnode *vp; 3444 struct mount *mp; 3445 struct file *fp; 3446 cap_rights_t rights; 3447 int error, lock_flags; 3448 3449 AUDIT_ARG_FD(uap->fd); 3450 error = getvnode(td->td_proc->p_fd, uap->fd, 3451 cap_rights_init(&rights, CAP_FSYNC), &fp); 3452 if (error != 0) 3453 return (error); 3454 vp = fp->f_vnode; 3455 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3456 if (error != 0) 3457 goto drop; 3458 if (MNT_SHARED_WRITES(mp) || 3459 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3460 lock_flags = LK_SHARED; 3461 } else { 3462 lock_flags = LK_EXCLUSIVE; 3463 } 3464 vn_lock(vp, lock_flags | LK_RETRY); 3465 AUDIT_ARG_VNODE1(vp); 3466 if (vp->v_object != NULL) { 3467 VM_OBJECT_WLOCK(vp->v_object); 3468 vm_object_page_clean(vp->v_object, 0, 0, 0); 3469 VM_OBJECT_WUNLOCK(vp->v_object); 3470 } 3471 error = VOP_FSYNC(vp, MNT_WAIT, td); 3472 3473 VOP_UNLOCK(vp, 0); 3474 vn_finished_write(mp); 3475drop: 3476 fdrop(fp, td); 3477 return (error); 3478} 3479 3480/* 3481 * Rename files. Source and destination must either both be directories, or 3482 * both not be directories. If target is a directory, it must be empty. 3483 */ 3484#ifndef _SYS_SYSPROTO_H_ 3485struct rename_args { 3486 char *from; 3487 char *to; 3488}; 3489#endif 3490int 3491sys_rename(td, uap) 3492 struct thread *td; 3493 register struct rename_args /* { 3494 char *from; 3495 char *to; 3496 } */ *uap; 3497{ 3498 3499 return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE)); 3500} 3501 3502#ifndef _SYS_SYSPROTO_H_ 3503struct renameat_args { 3504 int oldfd; 3505 char *old; 3506 int newfd; 3507 char *new; 3508}; 3509#endif 3510int 3511sys_renameat(struct thread *td, struct renameat_args *uap) 3512{ 3513 3514 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3515 UIO_USERSPACE)); 3516} 3517 3518int 3519kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg) 3520{ 3521 3522 return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg)); 3523} 3524 3525int 3526kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3527 enum uio_seg pathseg) 3528{ 3529 struct mount *mp = NULL; 3530 struct vnode *tvp, *fvp, *tdvp; 3531 struct nameidata fromnd, tond; 3532 cap_rights_t rights; 3533 int error; 3534 3535again: 3536 bwillwrite(); 3537#ifdef MAC 3538 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3539 AUDITVNODE1, pathseg, old, oldfd, 3540 cap_rights_init(&rights, CAP_RENAMEAT), td); 3541#else 3542 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3543 pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT), td); 3544#endif 3545 3546 if ((error = namei(&fromnd)) != 0) 3547 return (error); 3548#ifdef MAC 3549 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3550 fromnd.ni_vp, &fromnd.ni_cnd); 3551 VOP_UNLOCK(fromnd.ni_dvp, 0); 3552 if (fromnd.ni_dvp != fromnd.ni_vp) 3553 VOP_UNLOCK(fromnd.ni_vp, 0); 3554#endif 3555 fvp = fromnd.ni_vp; 3556 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3557 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3558 cap_rights_init(&rights, CAP_LINKAT), td); 3559 if (fromnd.ni_vp->v_type == VDIR) 3560 tond.ni_cnd.cn_flags |= WILLBEDIR; 3561 if ((error = namei(&tond)) != 0) { 3562 /* Translate error code for rename("dir1", "dir2/."). */ 3563 if (error == EISDIR && fvp->v_type == VDIR) 3564 error = EINVAL; 3565 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3566 vrele(fromnd.ni_dvp); 3567 vrele(fvp); 3568 goto out1; 3569 } 3570 tdvp = tond.ni_dvp; 3571 tvp = tond.ni_vp; 3572 error = vn_start_write(fvp, &mp, V_NOWAIT); 3573 if (error != 0) { 3574 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3575 NDFREE(&tond, NDF_ONLY_PNBUF); 3576 if (tvp != NULL) 3577 vput(tvp); 3578 if (tdvp == tvp) 3579 vrele(tdvp); 3580 else 3581 vput(tdvp); 3582 vrele(fromnd.ni_dvp); 3583 vrele(fvp); 3584 vrele(tond.ni_startdir); 3585 if (fromnd.ni_startdir != NULL) 3586 vrele(fromnd.ni_startdir); 3587 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3588 if (error != 0) 3589 return (error); 3590 goto again; 3591 } 3592 if (tvp != NULL) { 3593 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3594 error = ENOTDIR; 3595 goto out; 3596 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3597 error = EISDIR; 3598 goto out; 3599 } 3600#ifdef CAPABILITIES 3601 if (newfd != AT_FDCWD) { 3602 /* 3603 * If the target already exists we require CAP_UNLINKAT 3604 * from 'newfd'. 3605 */ 3606 error = cap_check(&tond.ni_filecaps.fc_rights, 3607 cap_rights_init(&rights, CAP_UNLINKAT)); 3608 if (error != 0) 3609 goto out; 3610 } 3611#endif 3612 } 3613 if (fvp == tdvp) { 3614 error = EINVAL; 3615 goto out; 3616 } 3617 /* 3618 * If the source is the same as the destination (that is, if they 3619 * are links to the same vnode), then there is nothing to do. 3620 */ 3621 if (fvp == tvp) 3622 error = -1; 3623#ifdef MAC 3624 else 3625 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3626 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3627#endif 3628out: 3629 if (error == 0) { 3630 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3631 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3632 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3633 NDFREE(&tond, NDF_ONLY_PNBUF); 3634 } else { 3635 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3636 NDFREE(&tond, NDF_ONLY_PNBUF); 3637 if (tvp != NULL) 3638 vput(tvp); 3639 if (tdvp == tvp) 3640 vrele(tdvp); 3641 else 3642 vput(tdvp); 3643 vrele(fromnd.ni_dvp); 3644 vrele(fvp); 3645 } 3646 vrele(tond.ni_startdir); 3647 vn_finished_write(mp); 3648out1: 3649 if (fromnd.ni_startdir) 3650 vrele(fromnd.ni_startdir); 3651 if (error == -1) 3652 return (0); 3653 return (error); 3654} 3655 3656/* 3657 * Make a directory file. 3658 */ 3659#ifndef _SYS_SYSPROTO_H_ 3660struct mkdir_args { 3661 char *path; 3662 int mode; 3663}; 3664#endif 3665int 3666sys_mkdir(td, uap) 3667 struct thread *td; 3668 register struct mkdir_args /* { 3669 char *path; 3670 int mode; 3671 } */ *uap; 3672{ 3673 3674 return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode)); 3675} 3676 3677#ifndef _SYS_SYSPROTO_H_ 3678struct mkdirat_args { 3679 int fd; 3680 char *path; 3681 mode_t mode; 3682}; 3683#endif 3684int 3685sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3686{ 3687 3688 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3689} 3690 3691int 3692kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode) 3693{ 3694 3695 return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode)); 3696} 3697 3698int 3699kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3700 int mode) 3701{ 3702 struct mount *mp; 3703 struct vnode *vp; 3704 struct vattr vattr; 3705 struct nameidata nd; 3706 cap_rights_t rights; 3707 int error; 3708 3709 AUDIT_ARG_MODE(mode); 3710restart: 3711 bwillwrite(); 3712 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3713 NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), 3714 td); 3715 nd.ni_cnd.cn_flags |= WILLBEDIR; 3716 if ((error = namei(&nd)) != 0) 3717 return (error); 3718 vp = nd.ni_vp; 3719 if (vp != NULL) { 3720 NDFREE(&nd, NDF_ONLY_PNBUF); 3721 /* 3722 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3723 * the strange behaviour of leaving the vnode unlocked 3724 * if the target is the same vnode as the parent. 3725 */ 3726 if (vp == nd.ni_dvp) 3727 vrele(nd.ni_dvp); 3728 else 3729 vput(nd.ni_dvp); 3730 vrele(vp); 3731 return (EEXIST); 3732 } 3733 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3734 NDFREE(&nd, NDF_ONLY_PNBUF); 3735 vput(nd.ni_dvp); 3736 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3737 return (error); 3738 goto restart; 3739 } 3740 VATTR_NULL(&vattr); 3741 vattr.va_type = VDIR; 3742 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3743#ifdef MAC 3744 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3745 &vattr); 3746 if (error != 0) 3747 goto out; 3748#endif 3749 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3750#ifdef MAC 3751out: 3752#endif 3753 NDFREE(&nd, NDF_ONLY_PNBUF); 3754 vput(nd.ni_dvp); 3755 if (error == 0) 3756 vput(nd.ni_vp); 3757 vn_finished_write(mp); 3758 return (error); 3759} 3760 3761/* 3762 * Remove a directory file. 3763 */ 3764#ifndef _SYS_SYSPROTO_H_ 3765struct rmdir_args { 3766 char *path; 3767}; 3768#endif 3769int 3770sys_rmdir(td, uap) 3771 struct thread *td; 3772 struct rmdir_args /* { 3773 char *path; 3774 } */ *uap; 3775{ 3776 3777 return (kern_rmdir(td, uap->path, UIO_USERSPACE)); 3778} 3779 3780int 3781kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg) 3782{ 3783 3784 return (kern_rmdirat(td, AT_FDCWD, path, pathseg)); 3785} 3786 3787int 3788kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3789{ 3790 struct mount *mp; 3791 struct vnode *vp; 3792 struct nameidata nd; 3793 cap_rights_t rights; 3794 int error; 3795 3796restart: 3797 bwillwrite(); 3798 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3799 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3800 if ((error = namei(&nd)) != 0) 3801 return (error); 3802 vp = nd.ni_vp; 3803 if (vp->v_type != VDIR) { 3804 error = ENOTDIR; 3805 goto out; 3806 } 3807 /* 3808 * No rmdir "." please. 3809 */ 3810 if (nd.ni_dvp == vp) { 3811 error = EINVAL; 3812 goto out; 3813 } 3814 /* 3815 * The root of a mounted filesystem cannot be deleted. 3816 */ 3817 if (vp->v_vflag & VV_ROOT) { 3818 error = EBUSY; 3819 goto out; 3820 } 3821#ifdef MAC 3822 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3823 &nd.ni_cnd); 3824 if (error != 0) 3825 goto out; 3826#endif 3827 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3828 NDFREE(&nd, NDF_ONLY_PNBUF); 3829 vput(vp); 3830 if (nd.ni_dvp == vp) 3831 vrele(nd.ni_dvp); 3832 else 3833 vput(nd.ni_dvp); 3834 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3835 return (error); 3836 goto restart; 3837 } 3838 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3839 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3840 vn_finished_write(mp); 3841out: 3842 NDFREE(&nd, NDF_ONLY_PNBUF); 3843 vput(vp); 3844 if (nd.ni_dvp == vp) 3845 vrele(nd.ni_dvp); 3846 else 3847 vput(nd.ni_dvp); 3848 return (error); 3849} 3850 3851#ifdef COMPAT_43 3852/* 3853 * Read a block of directory entries in a filesystem independent format. 3854 */ 3855#ifndef _SYS_SYSPROTO_H_ 3856struct ogetdirentries_args { 3857 int fd; 3858 char *buf; 3859 u_int count; 3860 long *basep; 3861}; 3862#endif 3863int 3864ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3865{ 3866 long loff; 3867 int error; 3868 3869 error = kern_ogetdirentries(td, uap, &loff); 3870 if (error == 0) 3871 error = copyout(&loff, uap->basep, sizeof(long)); 3872 return (error); 3873} 3874 3875int 3876kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3877 long *ploff) 3878{ 3879 struct vnode *vp; 3880 struct file *fp; 3881 struct uio auio, kuio; 3882 struct iovec aiov, kiov; 3883 struct dirent *dp, *edp; 3884 cap_rights_t rights; 3885 caddr_t dirbuf; 3886 int error, eofflag, readcnt; 3887 long loff; 3888 off_t foffset; 3889 3890 /* XXX arbitrary sanity limit on `count'. */ 3891 if (uap->count > 64 * 1024) 3892 return (EINVAL); 3893 error = getvnode(td->td_proc->p_fd, uap->fd, 3894 cap_rights_init(&rights, CAP_READ), &fp); 3895 if (error != 0) 3896 return (error); 3897 if ((fp->f_flag & FREAD) == 0) { 3898 fdrop(fp, td); 3899 return (EBADF); 3900 } 3901 vp = fp->f_vnode; 3902 foffset = foffset_lock(fp, 0); 3903unionread: 3904 if (vp->v_type != VDIR) { 3905 foffset_unlock(fp, foffset, 0); 3906 fdrop(fp, td); 3907 return (EINVAL); 3908 } 3909 aiov.iov_base = uap->buf; 3910 aiov.iov_len = uap->count; 3911 auio.uio_iov = &aiov; 3912 auio.uio_iovcnt = 1; 3913 auio.uio_rw = UIO_READ; 3914 auio.uio_segflg = UIO_USERSPACE; 3915 auio.uio_td = td; 3916 auio.uio_resid = uap->count; 3917 vn_lock(vp, LK_SHARED | LK_RETRY); 3918 loff = auio.uio_offset = foffset; 3919#ifdef MAC 3920 error = mac_vnode_check_readdir(td->td_ucred, vp); 3921 if (error != 0) { 3922 VOP_UNLOCK(vp, 0); 3923 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3924 fdrop(fp, td); 3925 return (error); 3926 } 3927#endif 3928# if (BYTE_ORDER != LITTLE_ENDIAN) 3929 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3930 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3931 NULL, NULL); 3932 foffset = auio.uio_offset; 3933 } else 3934# endif 3935 { 3936 kuio = auio; 3937 kuio.uio_iov = &kiov; 3938 kuio.uio_segflg = UIO_SYSSPACE; 3939 kiov.iov_len = uap->count; 3940 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3941 kiov.iov_base = dirbuf; 3942 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3943 NULL, NULL); 3944 foffset = kuio.uio_offset; 3945 if (error == 0) { 3946 readcnt = uap->count - kuio.uio_resid; 3947 edp = (struct dirent *)&dirbuf[readcnt]; 3948 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3949# if (BYTE_ORDER == LITTLE_ENDIAN) 3950 /* 3951 * The expected low byte of 3952 * dp->d_namlen is our dp->d_type. 3953 * The high MBZ byte of dp->d_namlen 3954 * is our dp->d_namlen. 3955 */ 3956 dp->d_type = dp->d_namlen; 3957 dp->d_namlen = 0; 3958# else 3959 /* 3960 * The dp->d_type is the high byte 3961 * of the expected dp->d_namlen, 3962 * so must be zero'ed. 3963 */ 3964 dp->d_type = 0; 3965# endif 3966 if (dp->d_reclen > 0) { 3967 dp = (struct dirent *) 3968 ((char *)dp + dp->d_reclen); 3969 } else { 3970 error = EIO; 3971 break; 3972 } 3973 } 3974 if (dp >= edp) 3975 error = uiomove(dirbuf, readcnt, &auio); 3976 } 3977 free(dirbuf, M_TEMP); 3978 } 3979 if (error != 0) { 3980 VOP_UNLOCK(vp, 0); 3981 foffset_unlock(fp, foffset, 0); 3982 fdrop(fp, td); 3983 return (error); 3984 } 3985 if (uap->count == auio.uio_resid && 3986 (vp->v_vflag & VV_ROOT) && 3987 (vp->v_mount->mnt_flag & MNT_UNION)) { 3988 struct vnode *tvp = vp; 3989 vp = vp->v_mount->mnt_vnodecovered; 3990 VREF(vp); 3991 fp->f_vnode = vp; 3992 fp->f_data = vp; 3993 foffset = 0; 3994 vput(tvp); 3995 goto unionread; 3996 } 3997 VOP_UNLOCK(vp, 0); 3998 foffset_unlock(fp, foffset, 0); 3999 fdrop(fp, td); 4000 td->td_retval[0] = uap->count - auio.uio_resid; 4001 if (error == 0) 4002 *ploff = loff; 4003 return (error); 4004} 4005#endif /* COMPAT_43 */ 4006 4007/* 4008 * Read a block of directory entries in a filesystem independent format. 4009 */ 4010#ifndef _SYS_SYSPROTO_H_ 4011struct getdirentries_args { 4012 int fd; 4013 char *buf; 4014 u_int count; 4015 long *basep; 4016}; 4017#endif 4018int 4019sys_getdirentries(td, uap) 4020 struct thread *td; 4021 register struct getdirentries_args /* { 4022 int fd; 4023 char *buf; 4024 u_int count; 4025 long *basep; 4026 } */ *uap; 4027{ 4028 long base; 4029 int error; 4030 4031 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4032 NULL, UIO_USERSPACE); 4033 if (error != 0) 4034 return (error); 4035 if (uap->basep != NULL) 4036 error = copyout(&base, uap->basep, sizeof(long)); 4037 return (error); 4038} 4039 4040int 4041kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 4042 long *basep, ssize_t *residp, enum uio_seg bufseg) 4043{ 4044 struct vnode *vp; 4045 struct file *fp; 4046 struct uio auio; 4047 struct iovec aiov; 4048 cap_rights_t rights; 4049 long loff; 4050 int error, eofflag; 4051 off_t foffset; 4052 4053 AUDIT_ARG_FD(fd); 4054 if (count > IOSIZE_MAX) 4055 return (EINVAL); 4056 auio.uio_resid = count; 4057 error = getvnode(td->td_proc->p_fd, fd, 4058 cap_rights_init(&rights, CAP_READ), &fp); 4059 if (error != 0) 4060 return (error); 4061 if ((fp->f_flag & FREAD) == 0) { 4062 fdrop(fp, td); 4063 return (EBADF); 4064 } 4065 vp = fp->f_vnode; 4066 foffset = foffset_lock(fp, 0); 4067unionread: 4068 if (vp->v_type != VDIR) { 4069 error = EINVAL; 4070 goto fail; 4071 } 4072 aiov.iov_base = buf; 4073 aiov.iov_len = count; 4074 auio.uio_iov = &aiov; 4075 auio.uio_iovcnt = 1; 4076 auio.uio_rw = UIO_READ; 4077 auio.uio_segflg = bufseg; 4078 auio.uio_td = td; 4079 vn_lock(vp, LK_SHARED | LK_RETRY); 4080 AUDIT_ARG_VNODE1(vp); 4081 loff = auio.uio_offset = foffset; 4082#ifdef MAC 4083 error = mac_vnode_check_readdir(td->td_ucred, vp); 4084 if (error == 0) 4085#endif 4086 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4087 NULL); 4088 foffset = auio.uio_offset; 4089 if (error != 0) { 4090 VOP_UNLOCK(vp, 0); 4091 goto fail; 4092 } 4093 if (count == auio.uio_resid && 4094 (vp->v_vflag & VV_ROOT) && 4095 (vp->v_mount->mnt_flag & MNT_UNION)) { 4096 struct vnode *tvp = vp; 4097 4098 vp = vp->v_mount->mnt_vnodecovered; 4099 VREF(vp); 4100 fp->f_vnode = vp; 4101 fp->f_data = vp; 4102 foffset = 0; 4103 vput(tvp); 4104 goto unionread; 4105 } 4106 VOP_UNLOCK(vp, 0); 4107 *basep = loff; 4108 if (residp != NULL) 4109 *residp = auio.uio_resid; 4110 td->td_retval[0] = count - auio.uio_resid; 4111fail: 4112 foffset_unlock(fp, foffset, 0); 4113 fdrop(fp, td); 4114 return (error); 4115} 4116 4117#ifndef _SYS_SYSPROTO_H_ 4118struct getdents_args { 4119 int fd; 4120 char *buf; 4121 size_t count; 4122}; 4123#endif 4124int 4125sys_getdents(td, uap) 4126 struct thread *td; 4127 register struct getdents_args /* { 4128 int fd; 4129 char *buf; 4130 u_int count; 4131 } */ *uap; 4132{ 4133 struct getdirentries_args ap; 4134 4135 ap.fd = uap->fd; 4136 ap.buf = uap->buf; 4137 ap.count = uap->count; 4138 ap.basep = NULL; 4139 return (sys_getdirentries(td, &ap)); 4140} 4141 4142/* 4143 * Set the mode mask for creation of filesystem nodes. 4144 */ 4145#ifndef _SYS_SYSPROTO_H_ 4146struct umask_args { 4147 int newmask; 4148}; 4149#endif 4150int 4151sys_umask(td, uap) 4152 struct thread *td; 4153 struct umask_args /* { 4154 int newmask; 4155 } */ *uap; 4156{ 4157 register struct filedesc *fdp; 4158 4159 FILEDESC_XLOCK(td->td_proc->p_fd); 4160 fdp = td->td_proc->p_fd; 4161 td->td_retval[0] = fdp->fd_cmask; 4162 fdp->fd_cmask = uap->newmask & ALLPERMS; 4163 FILEDESC_XUNLOCK(td->td_proc->p_fd); 4164 return (0); 4165} 4166 4167/* 4168 * Void all references to file by ripping underlying filesystem away from 4169 * vnode. 4170 */ 4171#ifndef _SYS_SYSPROTO_H_ 4172struct revoke_args { 4173 char *path; 4174}; 4175#endif 4176int 4177sys_revoke(td, uap) 4178 struct thread *td; 4179 register struct revoke_args /* { 4180 char *path; 4181 } */ *uap; 4182{ 4183 struct vnode *vp; 4184 struct vattr vattr; 4185 struct nameidata nd; 4186 int error; 4187 4188 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4189 uap->path, td); 4190 if ((error = namei(&nd)) != 0) 4191 return (error); 4192 vp = nd.ni_vp; 4193 NDFREE(&nd, NDF_ONLY_PNBUF); 4194 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4195 error = EINVAL; 4196 goto out; 4197 } 4198#ifdef MAC 4199 error = mac_vnode_check_revoke(td->td_ucred, vp); 4200 if (error != 0) 4201 goto out; 4202#endif 4203 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4204 if (error != 0) 4205 goto out; 4206 if (td->td_ucred->cr_uid != vattr.va_uid) { 4207 error = priv_check(td, PRIV_VFS_ADMIN); 4208 if (error != 0) 4209 goto out; 4210 } 4211 if (vcount(vp) > 1) 4212 VOP_REVOKE(vp, REVOKEALL); 4213out: 4214 vput(vp); 4215 return (error); 4216} 4217 4218/* 4219 * Convert a user file descriptor to a kernel file entry and check that, if it 4220 * is a capability, the correct rights are present. A reference on the file 4221 * entry is held upon returning. 4222 */ 4223int 4224getvnode(struct filedesc *fdp, int fd, cap_rights_t *rightsp, struct file **fpp) 4225{ 4226 struct file *fp; 4227 int error; 4228 4229 error = fget_unlocked(fdp, fd, rightsp, 0, &fp, NULL); 4230 if (error != 0) 4231 return (error); 4232 4233 /* 4234 * The file could be not of the vnode type, or it may be not 4235 * yet fully initialized, in which case the f_vnode pointer 4236 * may be set, but f_ops is still badfileops. E.g., 4237 * devfs_open() transiently create such situation to 4238 * facilitate csw d_fdopen(). 4239 * 4240 * Dupfdopen() handling in kern_openat() installs the 4241 * half-baked file into the process descriptor table, allowing 4242 * other thread to dereference it. Guard against the race by 4243 * checking f_ops. 4244 */ 4245 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4246 fdrop(fp, curthread); 4247 return (EINVAL); 4248 } 4249 *fpp = fp; 4250 return (0); 4251} 4252 4253 4254/* 4255 * Get an (NFS) file handle. 4256 */ 4257#ifndef _SYS_SYSPROTO_H_ 4258struct lgetfh_args { 4259 char *fname; 4260 fhandle_t *fhp; 4261}; 4262#endif 4263int 4264sys_lgetfh(td, uap) 4265 struct thread *td; 4266 register struct lgetfh_args *uap; 4267{ 4268 struct nameidata nd; 4269 fhandle_t fh; 4270 register struct vnode *vp; 4271 int error; 4272 4273 error = priv_check(td, PRIV_VFS_GETFH); 4274 if (error != 0) 4275 return (error); 4276 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4277 uap->fname, td); 4278 error = namei(&nd); 4279 if (error != 0) 4280 return (error); 4281 NDFREE(&nd, NDF_ONLY_PNBUF); 4282 vp = nd.ni_vp; 4283 bzero(&fh, sizeof(fh)); 4284 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4285 error = VOP_VPTOFH(vp, &fh.fh_fid); 4286 vput(vp); 4287 if (error == 0) 4288 error = copyout(&fh, uap->fhp, sizeof (fh)); 4289 return (error); 4290} 4291 4292#ifndef _SYS_SYSPROTO_H_ 4293struct getfh_args { 4294 char *fname; 4295 fhandle_t *fhp; 4296}; 4297#endif 4298int 4299sys_getfh(td, uap) 4300 struct thread *td; 4301 register struct getfh_args *uap; 4302{ 4303 struct nameidata nd; 4304 fhandle_t fh; 4305 register struct vnode *vp; 4306 int error; 4307 4308 error = priv_check(td, PRIV_VFS_GETFH); 4309 if (error != 0) 4310 return (error); 4311 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4312 uap->fname, td); 4313 error = namei(&nd); 4314 if (error != 0) 4315 return (error); 4316 NDFREE(&nd, NDF_ONLY_PNBUF); 4317 vp = nd.ni_vp; 4318 bzero(&fh, sizeof(fh)); 4319 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4320 error = VOP_VPTOFH(vp, &fh.fh_fid); 4321 vput(vp); 4322 if (error == 0) 4323 error = copyout(&fh, uap->fhp, sizeof (fh)); 4324 return (error); 4325} 4326 4327/* 4328 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4329 * open descriptor. 4330 * 4331 * warning: do not remove the priv_check() call or this becomes one giant 4332 * security hole. 4333 */ 4334#ifndef _SYS_SYSPROTO_H_ 4335struct fhopen_args { 4336 const struct fhandle *u_fhp; 4337 int flags; 4338}; 4339#endif 4340int 4341sys_fhopen(td, uap) 4342 struct thread *td; 4343 struct fhopen_args /* { 4344 const struct fhandle *u_fhp; 4345 int flags; 4346 } */ *uap; 4347{ 4348 struct mount *mp; 4349 struct vnode *vp; 4350 struct fhandle fhp; 4351 struct file *fp; 4352 int fmode, error; 4353 int indx; 4354 4355 error = priv_check(td, PRIV_VFS_FHOPEN); 4356 if (error != 0) 4357 return (error); 4358 indx = -1; 4359 fmode = FFLAGS(uap->flags); 4360 /* why not allow a non-read/write open for our lockd? */ 4361 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4362 return (EINVAL); 4363 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4364 if (error != 0) 4365 return(error); 4366 /* find the mount point */ 4367 mp = vfs_busyfs(&fhp.fh_fsid); 4368 if (mp == NULL) 4369 return (ESTALE); 4370 /* now give me my vnode, it gets returned to me locked */ 4371 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4372 vfs_unbusy(mp); 4373 if (error != 0) 4374 return (error); 4375 4376 error = falloc_noinstall(td, &fp); 4377 if (error != 0) { 4378 vput(vp); 4379 return (error); 4380 } 4381 /* 4382 * An extra reference on `fp' has been held for us by 4383 * falloc_noinstall(). 4384 */ 4385 4386#ifdef INVARIANTS 4387 td->td_dupfd = -1; 4388#endif 4389 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4390 if (error != 0) { 4391 KASSERT(fp->f_ops == &badfileops, 4392 ("VOP_OPEN in fhopen() set f_ops")); 4393 KASSERT(td->td_dupfd < 0, 4394 ("fhopen() encountered fdopen()")); 4395 4396 vput(vp); 4397 goto bad; 4398 } 4399#ifdef INVARIANTS 4400 td->td_dupfd = 0; 4401#endif 4402 fp->f_vnode = vp; 4403 fp->f_seqcount = 1; 4404 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4405 &vnops); 4406 VOP_UNLOCK(vp, 0); 4407 if ((fmode & O_TRUNC) != 0) { 4408 error = fo_truncate(fp, 0, td->td_ucred, td); 4409 if (error != 0) 4410 goto bad; 4411 } 4412 4413 error = finstall(td, fp, &indx, fmode, NULL); 4414bad: 4415 fdrop(fp, td); 4416 td->td_retval[0] = indx; 4417 return (error); 4418} 4419 4420/* 4421 * Stat an (NFS) file handle. 4422 */ 4423#ifndef _SYS_SYSPROTO_H_ 4424struct fhstat_args { 4425 struct fhandle *u_fhp; 4426 struct stat *sb; 4427}; 4428#endif 4429int 4430sys_fhstat(td, uap) 4431 struct thread *td; 4432 register struct fhstat_args /* { 4433 struct fhandle *u_fhp; 4434 struct stat *sb; 4435 } */ *uap; 4436{ 4437 struct stat sb; 4438 struct fhandle fh; 4439 int error; 4440 4441 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4442 if (error != 0) 4443 return (error); 4444 error = kern_fhstat(td, fh, &sb); 4445 if (error == 0) 4446 error = copyout(&sb, uap->sb, sizeof(sb)); 4447 return (error); 4448} 4449 4450int 4451kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4452{ 4453 struct mount *mp; 4454 struct vnode *vp; 4455 int error; 4456 4457 error = priv_check(td, PRIV_VFS_FHSTAT); 4458 if (error != 0) 4459 return (error); 4460 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4461 return (ESTALE); 4462 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4463 vfs_unbusy(mp); 4464 if (error != 0) 4465 return (error); 4466 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4467 vput(vp); 4468 return (error); 4469} 4470 4471/* 4472 * Implement fstatfs() for (NFS) file handles. 4473 */ 4474#ifndef _SYS_SYSPROTO_H_ 4475struct fhstatfs_args { 4476 struct fhandle *u_fhp; 4477 struct statfs *buf; 4478}; 4479#endif 4480int 4481sys_fhstatfs(td, uap) 4482 struct thread *td; 4483 struct fhstatfs_args /* { 4484 struct fhandle *u_fhp; 4485 struct statfs *buf; 4486 } */ *uap; 4487{ 4488 struct statfs sf; 4489 fhandle_t fh; 4490 int error; 4491 4492 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4493 if (error != 0) 4494 return (error); 4495 error = kern_fhstatfs(td, fh, &sf); 4496 if (error != 0) 4497 return (error); 4498 return (copyout(&sf, uap->buf, sizeof(sf))); 4499} 4500 4501int 4502kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4503{ 4504 struct statfs *sp; 4505 struct mount *mp; 4506 struct vnode *vp; 4507 int error; 4508 4509 error = priv_check(td, PRIV_VFS_FHSTATFS); 4510 if (error != 0) 4511 return (error); 4512 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4513 return (ESTALE); 4514 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4515 if (error != 0) { 4516 vfs_unbusy(mp); 4517 return (error); 4518 } 4519 vput(vp); 4520 error = prison_canseemount(td->td_ucred, mp); 4521 if (error != 0) 4522 goto out; 4523#ifdef MAC 4524 error = mac_mount_check_stat(td->td_ucred, mp); 4525 if (error != 0) 4526 goto out; 4527#endif 4528 /* 4529 * Set these in case the underlying filesystem fails to do so. 4530 */ 4531 sp = &mp->mnt_stat; 4532 sp->f_version = STATFS_VERSION; 4533 sp->f_namemax = NAME_MAX; 4534 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4535 error = VFS_STATFS(mp, sp); 4536 if (error == 0) 4537 *buf = *sp; 4538out: 4539 vfs_unbusy(mp); 4540 return (error); 4541} 4542 4543int 4544kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4545{ 4546 struct file *fp; 4547 struct mount *mp; 4548 struct vnode *vp; 4549 cap_rights_t rights; 4550 off_t olen, ooffset; 4551 int error; 4552 4553 if (offset < 0 || len <= 0) 4554 return (EINVAL); 4555 /* Check for wrap. */ 4556 if (offset > OFF_MAX - len) 4557 return (EFBIG); 4558 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4559 if (error != 0) 4560 return (error); 4561 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4562 error = ESPIPE; 4563 goto out; 4564 } 4565 if ((fp->f_flag & FWRITE) == 0) { 4566 error = EBADF; 4567 goto out; 4568 } 4569 if (fp->f_type != DTYPE_VNODE) { 4570 error = ENODEV; 4571 goto out; 4572 } 4573 vp = fp->f_vnode; 4574 if (vp->v_type != VREG) { 4575 error = ENODEV; 4576 goto out; 4577 } 4578 4579 /* Allocating blocks may take a long time, so iterate. */ 4580 for (;;) { 4581 olen = len; 4582 ooffset = offset; 4583 4584 bwillwrite(); 4585 mp = NULL; 4586 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4587 if (error != 0) 4588 break; 4589 error = vn_lock(vp, LK_EXCLUSIVE); 4590 if (error != 0) { 4591 vn_finished_write(mp); 4592 break; 4593 } 4594#ifdef MAC 4595 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4596 if (error == 0) 4597#endif 4598 error = VOP_ALLOCATE(vp, &offset, &len); 4599 VOP_UNLOCK(vp, 0); 4600 vn_finished_write(mp); 4601 4602 if (olen + ooffset != offset + len) { 4603 panic("offset + len changed from %jx/%jx to %jx/%jx", 4604 ooffset, olen, offset, len); 4605 } 4606 if (error != 0 || len == 0) 4607 break; 4608 KASSERT(olen > len, ("Iteration did not make progress?")); 4609 maybe_yield(); 4610 } 4611 out: 4612 fdrop(fp, td); 4613 return (error); 4614} 4615 4616int 4617sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4618{ 4619 4620 td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset, 4621 uap->len); 4622 return (0); 4623} 4624 4625/* 4626 * Unlike madvise(2), we do not make a best effort to remember every 4627 * possible caching hint. Instead, we remember the last setting with 4628 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4629 * region of any current setting. 4630 */ 4631int 4632kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4633 int advice) 4634{ 4635 struct fadvise_info *fa, *new; 4636 struct file *fp; 4637 struct vnode *vp; 4638 cap_rights_t rights; 4639 off_t end; 4640 int error; 4641 4642 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4643 return (EINVAL); 4644 switch (advice) { 4645 case POSIX_FADV_SEQUENTIAL: 4646 case POSIX_FADV_RANDOM: 4647 case POSIX_FADV_NOREUSE: 4648 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4649 break; 4650 case POSIX_FADV_NORMAL: 4651 case POSIX_FADV_WILLNEED: 4652 case POSIX_FADV_DONTNEED: 4653 new = NULL; 4654 break; 4655 default: 4656 return (EINVAL); 4657 } 4658 /* XXX: CAP_POSIX_FADVISE? */ 4659 error = fget(td, fd, cap_rights_init(&rights), &fp); 4660 if (error != 0) 4661 goto out; 4662 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4663 error = ESPIPE; 4664 goto out; 4665 } 4666 if (fp->f_type != DTYPE_VNODE) { 4667 error = ENODEV; 4668 goto out; 4669 } 4670 vp = fp->f_vnode; 4671 if (vp->v_type != VREG) { 4672 error = ENODEV; 4673 goto out; 4674 } 4675 if (len == 0) 4676 end = OFF_MAX; 4677 else 4678 end = offset + len - 1; 4679 switch (advice) { 4680 case POSIX_FADV_SEQUENTIAL: 4681 case POSIX_FADV_RANDOM: 4682 case POSIX_FADV_NOREUSE: 4683 /* 4684 * Try to merge any existing non-standard region with 4685 * this new region if possible, otherwise create a new 4686 * non-standard region for this request. 4687 */ 4688 mtx_pool_lock(mtxpool_sleep, fp); 4689 fa = fp->f_advice; 4690 if (fa != NULL && fa->fa_advice == advice && 4691 ((fa->fa_start <= end && fa->fa_end >= offset) || 4692 (end != OFF_MAX && fa->fa_start == end + 1) || 4693 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4694 if (offset < fa->fa_start) 4695 fa->fa_start = offset; 4696 if (end > fa->fa_end) 4697 fa->fa_end = end; 4698 } else { 4699 new->fa_advice = advice; 4700 new->fa_start = offset; 4701 new->fa_end = end; 4702 new->fa_prevstart = 0; 4703 new->fa_prevend = 0; 4704 fp->f_advice = new; 4705 new = fa; 4706 } 4707 mtx_pool_unlock(mtxpool_sleep, fp); 4708 break; 4709 case POSIX_FADV_NORMAL: 4710 /* 4711 * If a the "normal" region overlaps with an existing 4712 * non-standard region, trim or remove the 4713 * non-standard region. 4714 */ 4715 mtx_pool_lock(mtxpool_sleep, fp); 4716 fa = fp->f_advice; 4717 if (fa != NULL) { 4718 if (offset <= fa->fa_start && end >= fa->fa_end) { 4719 new = fa; 4720 fp->f_advice = NULL; 4721 } else if (offset <= fa->fa_start && 4722 end >= fa->fa_start) 4723 fa->fa_start = end + 1; 4724 else if (offset <= fa->fa_end && end >= fa->fa_end) 4725 fa->fa_end = offset - 1; 4726 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4727 /* 4728 * If the "normal" region is a middle 4729 * portion of the existing 4730 * non-standard region, just remove 4731 * the whole thing rather than picking 4732 * one side or the other to 4733 * preserve. 4734 */ 4735 new = fa; 4736 fp->f_advice = NULL; 4737 } 4738 } 4739 mtx_pool_unlock(mtxpool_sleep, fp); 4740 break; 4741 case POSIX_FADV_WILLNEED: 4742 case POSIX_FADV_DONTNEED: 4743 error = VOP_ADVISE(vp, offset, end, advice); 4744 break; 4745 } 4746out: 4747 if (fp != NULL) 4748 fdrop(fp, td); 4749 free(new, M_FADVISE); 4750 return (error); 4751} 4752 4753int 4754sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4755{ 4756 4757 td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset, 4758 uap->len, uap->advice); 4759 return (0); 4760} 4761