vfs_syscalls.c revision 331643
1/*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 37 */ 38 39#include <sys/cdefs.h> 40__FBSDID("$FreeBSD: stable/11/sys/kern/vfs_syscalls.c 331643 2018-03-27 18:52:27Z dim $"); 41 42#include "opt_capsicum.h" 43#include "opt_compat.h" 44#include "opt_ktrace.h" 45 46#include <sys/param.h> 47#include <sys/systm.h> 48#include <sys/bio.h> 49#include <sys/buf.h> 50#include <sys/capsicum.h> 51#include <sys/disk.h> 52#include <sys/sysent.h> 53#include <sys/malloc.h> 54#include <sys/mount.h> 55#include <sys/mutex.h> 56#include <sys/sysproto.h> 57#include <sys/namei.h> 58#include <sys/filedesc.h> 59#include <sys/kernel.h> 60#include <sys/fcntl.h> 61#include <sys/file.h> 62#include <sys/filio.h> 63#include <sys/limits.h> 64#include <sys/linker.h> 65#include <sys/rwlock.h> 66#include <sys/sdt.h> 67#include <sys/stat.h> 68#include <sys/sx.h> 69#include <sys/unistd.h> 70#include <sys/vnode.h> 71#include <sys/priv.h> 72#include <sys/proc.h> 73#include <sys/dirent.h> 74#include <sys/jail.h> 75#include <sys/syscallsubr.h> 76#include <sys/sysctl.h> 77#ifdef KTRACE 78#include <sys/ktrace.h> 79#endif 80 81#include <machine/stdarg.h> 82 83#include <security/audit/audit.h> 84#include <security/mac/mac_framework.h> 85 86#include <vm/vm.h> 87#include <vm/vm_object.h> 88#include <vm/vm_page.h> 89#include <vm/uma.h> 90 91#include <ufs/ufs/quota.h> 92 93MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 94 95SDT_PROVIDER_DEFINE(vfs); 96SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 97SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 98 99static int kern_chflagsat(struct thread *td, int fd, const char *path, 100 enum uio_seg pathseg, u_long flags, int atflag); 101static int setfflags(struct thread *td, struct vnode *, u_long); 102static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 103static int getutimens(const struct timespec *, enum uio_seg, 104 struct timespec *, int *); 105static int setutimes(struct thread *td, struct vnode *, 106 const struct timespec *, int, int); 107static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 108 struct thread *td); 109 110/* 111 * Sync each mounted filesystem. 112 */ 113#ifndef _SYS_SYSPROTO_H_ 114struct sync_args { 115 int dummy; 116}; 117#endif 118/* ARGSUSED */ 119int 120sys_sync(struct thread *td, struct sync_args *uap) 121{ 122 struct mount *mp, *nmp; 123 int save; 124 125 mtx_lock(&mountlist_mtx); 126 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 127 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 128 nmp = TAILQ_NEXT(mp, mnt_list); 129 continue; 130 } 131 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 132 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 133 save = curthread_pflags_set(TDP_SYNCIO); 134 vfs_msync(mp, MNT_NOWAIT); 135 VFS_SYNC(mp, MNT_NOWAIT); 136 curthread_pflags_restore(save); 137 vn_finished_write(mp); 138 } 139 mtx_lock(&mountlist_mtx); 140 nmp = TAILQ_NEXT(mp, mnt_list); 141 vfs_unbusy(mp); 142 } 143 mtx_unlock(&mountlist_mtx); 144 return (0); 145} 146 147/* 148 * Change filesystem quotas. 149 */ 150#ifndef _SYS_SYSPROTO_H_ 151struct quotactl_args { 152 char *path; 153 int cmd; 154 int uid; 155 caddr_t arg; 156}; 157#endif 158int 159sys_quotactl(struct thread *td, struct quotactl_args *uap) 160{ 161 struct mount *mp; 162 struct nameidata nd; 163 int error; 164 165 AUDIT_ARG_CMD(uap->cmd); 166 AUDIT_ARG_UID(uap->uid); 167 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 168 return (EPERM); 169 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 170 uap->path, td); 171 if ((error = namei(&nd)) != 0) 172 return (error); 173 NDFREE(&nd, NDF_ONLY_PNBUF); 174 mp = nd.ni_vp->v_mount; 175 vfs_ref(mp); 176 vput(nd.ni_vp); 177 error = vfs_busy(mp, 0); 178 vfs_rel(mp); 179 if (error != 0) 180 return (error); 181 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 182 183 /* 184 * Since quota on operation typically needs to open quota 185 * file, the Q_QUOTAON handler needs to unbusy the mount point 186 * before calling into namei. Otherwise, unmount might be 187 * started between two vfs_busy() invocations (first is our, 188 * second is from mount point cross-walk code in lookup()), 189 * causing deadlock. 190 * 191 * Require that Q_QUOTAON handles the vfs_busy() reference on 192 * its own, always returning with ubusied mount point. 193 */ 194 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 195 vfs_unbusy(mp); 196 return (error); 197} 198 199/* 200 * Used by statfs conversion routines to scale the block size up if 201 * necessary so that all of the block counts are <= 'max_size'. Note 202 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 203 * value of 'n'. 204 */ 205void 206statfs_scale_blocks(struct statfs *sf, long max_size) 207{ 208 uint64_t count; 209 int shift; 210 211 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 212 213 /* 214 * Attempt to scale the block counts to give a more accurate 215 * overview to userland of the ratio of free space to used 216 * space. To do this, find the largest block count and compute 217 * a divisor that lets it fit into a signed integer <= max_size. 218 */ 219 if (sf->f_bavail < 0) 220 count = -sf->f_bavail; 221 else 222 count = sf->f_bavail; 223 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 224 if (count <= max_size) 225 return; 226 227 count >>= flsl(max_size); 228 shift = 0; 229 while (count > 0) { 230 shift++; 231 count >>=1; 232 } 233 234 sf->f_bsize <<= shift; 235 sf->f_blocks >>= shift; 236 sf->f_bfree >>= shift; 237 sf->f_bavail >>= shift; 238} 239 240static int 241kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 242{ 243 struct statfs *sp; 244 int error; 245 246 if (mp == NULL) 247 return (EBADF); 248 error = vfs_busy(mp, 0); 249 vfs_rel(mp); 250 if (error != 0) 251 return (error); 252#ifdef MAC 253 error = mac_mount_check_stat(td->td_ucred, mp); 254 if (error != 0) 255 goto out; 256#endif 257 /* 258 * Set these in case the underlying filesystem fails to do so. 259 */ 260 sp = &mp->mnt_stat; 261 sp->f_version = STATFS_VERSION; 262 sp->f_namemax = NAME_MAX; 263 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 264 error = VFS_STATFS(mp, sp); 265 if (error != 0) 266 goto out; 267 *buf = *sp; 268 if (priv_check(td, PRIV_VFS_GENERATION)) { 269 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 270 prison_enforce_statfs(td->td_ucred, mp, buf); 271 } 272out: 273 vfs_unbusy(mp); 274 return (error); 275} 276 277/* 278 * Get filesystem statistics. 279 */ 280#ifndef _SYS_SYSPROTO_H_ 281struct statfs_args { 282 char *path; 283 struct statfs *buf; 284}; 285#endif 286int 287sys_statfs(struct thread *td, struct statfs_args *uap) 288{ 289 struct statfs *sfp; 290 int error; 291 292 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 293 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 294 if (error == 0) 295 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 296 free(sfp, M_STATFS); 297 return (error); 298} 299 300int 301kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 302 struct statfs *buf) 303{ 304 struct mount *mp; 305 struct nameidata nd; 306 int error; 307 308 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 309 pathseg, path, td); 310 error = namei(&nd); 311 if (error != 0) 312 return (error); 313 mp = nd.ni_vp->v_mount; 314 vfs_ref(mp); 315 NDFREE(&nd, NDF_ONLY_PNBUF); 316 vput(nd.ni_vp); 317 return (kern_do_statfs(td, mp, buf)); 318} 319 320/* 321 * Get filesystem statistics. 322 */ 323#ifndef _SYS_SYSPROTO_H_ 324struct fstatfs_args { 325 int fd; 326 struct statfs *buf; 327}; 328#endif 329int 330sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 331{ 332 struct statfs *sfp; 333 int error; 334 335 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 336 error = kern_fstatfs(td, uap->fd, sfp); 337 if (error == 0) 338 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 339 free(sfp, M_STATFS); 340 return (error); 341} 342 343int 344kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 345{ 346 struct file *fp; 347 struct mount *mp; 348 struct vnode *vp; 349 cap_rights_t rights; 350 int error; 351 352 AUDIT_ARG_FD(fd); 353 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSTATFS), &fp); 354 if (error != 0) 355 return (error); 356 vp = fp->f_vnode; 357 vn_lock(vp, LK_SHARED | LK_RETRY); 358#ifdef AUDIT 359 AUDIT_ARG_VNODE1(vp); 360#endif 361 mp = vp->v_mount; 362 if (mp != NULL) 363 vfs_ref(mp); 364 VOP_UNLOCK(vp, 0); 365 fdrop(fp, td); 366 return (kern_do_statfs(td, mp, buf)); 367} 368 369/* 370 * Get statistics on all filesystems. 371 */ 372#ifndef _SYS_SYSPROTO_H_ 373struct getfsstat_args { 374 struct statfs *buf; 375 long bufsize; 376 int mode; 377}; 378#endif 379int 380sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 381{ 382 size_t count; 383 int error; 384 385 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 386 return (EINVAL); 387 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 388 UIO_USERSPACE, uap->mode); 389 if (error == 0) 390 td->td_retval[0] = count; 391 return (error); 392} 393 394/* 395 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 396 * The caller is responsible for freeing memory which will be allocated 397 * in '*buf'. 398 */ 399int 400kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 401 size_t *countp, enum uio_seg bufseg, int mode) 402{ 403 struct mount *mp, *nmp; 404 struct statfs *sfsp, *sp, *sptmp, *tofree; 405 size_t count, maxcount; 406 int error; 407 408 switch (mode) { 409 case MNT_WAIT: 410 case MNT_NOWAIT: 411 break; 412 default: 413 return (EINVAL); 414 } 415restart: 416 maxcount = bufsize / sizeof(struct statfs); 417 if (bufsize == 0) { 418 sfsp = NULL; 419 tofree = NULL; 420 } else if (bufseg == UIO_USERSPACE) { 421 sfsp = *buf; 422 tofree = NULL; 423 } else /* if (bufseg == UIO_SYSSPACE) */ { 424 count = 0; 425 mtx_lock(&mountlist_mtx); 426 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 427 count++; 428 } 429 mtx_unlock(&mountlist_mtx); 430 if (maxcount > count) 431 maxcount = count; 432 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 433 M_STATFS, M_WAITOK); 434 } 435 count = 0; 436 mtx_lock(&mountlist_mtx); 437 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 438 if (prison_canseemount(td->td_ucred, mp) != 0) { 439 nmp = TAILQ_NEXT(mp, mnt_list); 440 continue; 441 } 442#ifdef MAC 443 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 444 nmp = TAILQ_NEXT(mp, mnt_list); 445 continue; 446 } 447#endif 448 if (mode == MNT_WAIT) { 449 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 450 /* 451 * If vfs_busy() failed, and MBF_NOWAIT 452 * wasn't passed, then the mp is gone. 453 * Furthermore, because of MBF_MNTLSTLOCK, 454 * the mountlist_mtx was dropped. We have 455 * no other choice than to start over. 456 */ 457 mtx_unlock(&mountlist_mtx); 458 free(tofree, M_STATFS); 459 goto restart; 460 } 461 } else { 462 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 463 nmp = TAILQ_NEXT(mp, mnt_list); 464 continue; 465 } 466 } 467 if (sfsp != NULL && count < maxcount) { 468 sp = &mp->mnt_stat; 469 /* 470 * Set these in case the underlying filesystem 471 * fails to do so. 472 */ 473 sp->f_version = STATFS_VERSION; 474 sp->f_namemax = NAME_MAX; 475 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 476 /* 477 * If MNT_NOWAIT is specified, do not refresh 478 * the fsstat cache. 479 */ 480 if (mode != MNT_NOWAIT) { 481 error = VFS_STATFS(mp, sp); 482 if (error != 0) { 483 mtx_lock(&mountlist_mtx); 484 nmp = TAILQ_NEXT(mp, mnt_list); 485 vfs_unbusy(mp); 486 continue; 487 } 488 } 489 if (priv_check(td, PRIV_VFS_GENERATION)) { 490 sptmp = malloc(sizeof(struct statfs), M_STATFS, 491 M_WAITOK); 492 *sptmp = *sp; 493 sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; 494 prison_enforce_statfs(td->td_ucred, mp, sptmp); 495 sp = sptmp; 496 } else 497 sptmp = NULL; 498 if (bufseg == UIO_SYSSPACE) { 499 bcopy(sp, sfsp, sizeof(*sp)); 500 free(sptmp, M_STATFS); 501 } else /* if (bufseg == UIO_USERSPACE) */ { 502 error = copyout(sp, sfsp, sizeof(*sp)); 503 free(sptmp, M_STATFS); 504 if (error != 0) { 505 vfs_unbusy(mp); 506 return (error); 507 } 508 } 509 sfsp++; 510 } 511 count++; 512 mtx_lock(&mountlist_mtx); 513 nmp = TAILQ_NEXT(mp, mnt_list); 514 vfs_unbusy(mp); 515 } 516 mtx_unlock(&mountlist_mtx); 517 if (sfsp != NULL && count > maxcount) 518 *countp = maxcount; 519 else 520 *countp = count; 521 return (0); 522} 523 524#ifdef COMPAT_FREEBSD4 525/* 526 * Get old format filesystem statistics. 527 */ 528static void cvtstatfs(struct statfs *, struct ostatfs *); 529 530#ifndef _SYS_SYSPROTO_H_ 531struct freebsd4_statfs_args { 532 char *path; 533 struct ostatfs *buf; 534}; 535#endif 536int 537freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 538{ 539 struct ostatfs osb; 540 struct statfs *sfp; 541 int error; 542 543 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 544 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 545 if (error == 0) { 546 cvtstatfs(sfp, &osb); 547 error = copyout(&osb, uap->buf, sizeof(osb)); 548 } 549 free(sfp, M_STATFS); 550 return (error); 551} 552 553/* 554 * Get filesystem statistics. 555 */ 556#ifndef _SYS_SYSPROTO_H_ 557struct freebsd4_fstatfs_args { 558 int fd; 559 struct ostatfs *buf; 560}; 561#endif 562int 563freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 564{ 565 struct ostatfs osb; 566 struct statfs *sfp; 567 int error; 568 569 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 570 error = kern_fstatfs(td, uap->fd, sfp); 571 if (error == 0) { 572 cvtstatfs(sfp, &osb); 573 error = copyout(&osb, uap->buf, sizeof(osb)); 574 } 575 free(sfp, M_STATFS); 576 return (error); 577} 578 579/* 580 * Get statistics on all filesystems. 581 */ 582#ifndef _SYS_SYSPROTO_H_ 583struct freebsd4_getfsstat_args { 584 struct ostatfs *buf; 585 long bufsize; 586 int mode; 587}; 588#endif 589int 590freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 591{ 592 struct statfs *buf, *sp; 593 struct ostatfs osb; 594 size_t count, size; 595 int error; 596 597 if (uap->bufsize < 0) 598 return (EINVAL); 599 count = uap->bufsize / sizeof(struct ostatfs); 600 if (count > SIZE_MAX / sizeof(struct statfs)) 601 return (EINVAL); 602 size = count * sizeof(struct statfs); 603 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 604 uap->mode); 605 td->td_retval[0] = count; 606 if (size != 0) { 607 sp = buf; 608 while (count != 0 && error == 0) { 609 cvtstatfs(sp, &osb); 610 error = copyout(&osb, uap->buf, sizeof(osb)); 611 sp++; 612 uap->buf++; 613 count--; 614 } 615 free(buf, M_STATFS); 616 } 617 return (error); 618} 619 620/* 621 * Implement fstatfs() for (NFS) file handles. 622 */ 623#ifndef _SYS_SYSPROTO_H_ 624struct freebsd4_fhstatfs_args { 625 struct fhandle *u_fhp; 626 struct ostatfs *buf; 627}; 628#endif 629int 630freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 631{ 632 struct ostatfs osb; 633 struct statfs *sfp; 634 fhandle_t fh; 635 int error; 636 637 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 638 if (error != 0) 639 return (error); 640 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 641 error = kern_fhstatfs(td, fh, sfp); 642 if (error == 0) { 643 cvtstatfs(sfp, &osb); 644 error = copyout(&osb, uap->buf, sizeof(osb)); 645 } 646 free(sfp, M_STATFS); 647 return (error); 648} 649 650/* 651 * Convert a new format statfs structure to an old format statfs structure. 652 */ 653static void 654cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 655{ 656 657 statfs_scale_blocks(nsp, LONG_MAX); 658 bzero(osp, sizeof(*osp)); 659 osp->f_bsize = nsp->f_bsize; 660 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 661 osp->f_blocks = nsp->f_blocks; 662 osp->f_bfree = nsp->f_bfree; 663 osp->f_bavail = nsp->f_bavail; 664 osp->f_files = MIN(nsp->f_files, LONG_MAX); 665 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 666 osp->f_owner = nsp->f_owner; 667 osp->f_type = nsp->f_type; 668 osp->f_flags = nsp->f_flags; 669 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 670 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 671 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 672 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 673 strlcpy(osp->f_fstypename, nsp->f_fstypename, 674 MIN(MFSNAMELEN, OMFSNAMELEN)); 675 strlcpy(osp->f_mntonname, nsp->f_mntonname, 676 MIN(MNAMELEN, OMNAMELEN)); 677 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 678 MIN(MNAMELEN, OMNAMELEN)); 679 osp->f_fsid = nsp->f_fsid; 680} 681#endif /* COMPAT_FREEBSD4 */ 682 683/* 684 * Change current working directory to a given file descriptor. 685 */ 686#ifndef _SYS_SYSPROTO_H_ 687struct fchdir_args { 688 int fd; 689}; 690#endif 691int 692sys_fchdir(struct thread *td, struct fchdir_args *uap) 693{ 694 struct vnode *vp, *tdp; 695 struct mount *mp; 696 struct file *fp; 697 cap_rights_t rights; 698 int error; 699 700 AUDIT_ARG_FD(uap->fd); 701 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 702 &fp); 703 if (error != 0) 704 return (error); 705 vp = fp->f_vnode; 706 vrefact(vp); 707 fdrop(fp, td); 708 vn_lock(vp, LK_SHARED | LK_RETRY); 709 AUDIT_ARG_VNODE1(vp); 710 error = change_dir(vp, td); 711 while (!error && (mp = vp->v_mountedhere) != NULL) { 712 if (vfs_busy(mp, 0)) 713 continue; 714 error = VFS_ROOT(mp, LK_SHARED, &tdp); 715 vfs_unbusy(mp); 716 if (error != 0) 717 break; 718 vput(vp); 719 vp = tdp; 720 } 721 if (error != 0) { 722 vput(vp); 723 return (error); 724 } 725 VOP_UNLOCK(vp, 0); 726 pwd_chdir(td, vp); 727 return (0); 728} 729 730/* 731 * Change current working directory (``.''). 732 */ 733#ifndef _SYS_SYSPROTO_H_ 734struct chdir_args { 735 char *path; 736}; 737#endif 738int 739sys_chdir(struct thread *td, struct chdir_args *uap) 740{ 741 742 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 743} 744 745int 746kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 747{ 748 struct nameidata nd; 749 int error; 750 751 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 752 pathseg, path, td); 753 if ((error = namei(&nd)) != 0) 754 return (error); 755 if ((error = change_dir(nd.ni_vp, td)) != 0) { 756 vput(nd.ni_vp); 757 NDFREE(&nd, NDF_ONLY_PNBUF); 758 return (error); 759 } 760 VOP_UNLOCK(nd.ni_vp, 0); 761 NDFREE(&nd, NDF_ONLY_PNBUF); 762 pwd_chdir(td, nd.ni_vp); 763 return (0); 764} 765 766/* 767 * Change notion of root (``/'') directory. 768 */ 769#ifndef _SYS_SYSPROTO_H_ 770struct chroot_args { 771 char *path; 772}; 773#endif 774int 775sys_chroot(struct thread *td, struct chroot_args *uap) 776{ 777 struct nameidata nd; 778 int error; 779 780 error = priv_check(td, PRIV_VFS_CHROOT); 781 if (error != 0) 782 return (error); 783 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 784 UIO_USERSPACE, uap->path, td); 785 error = namei(&nd); 786 if (error != 0) 787 goto error; 788 error = change_dir(nd.ni_vp, td); 789 if (error != 0) 790 goto e_vunlock; 791#ifdef MAC 792 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 793 if (error != 0) 794 goto e_vunlock; 795#endif 796 VOP_UNLOCK(nd.ni_vp, 0); 797 error = pwd_chroot(td, nd.ni_vp); 798 vrele(nd.ni_vp); 799 NDFREE(&nd, NDF_ONLY_PNBUF); 800 return (error); 801e_vunlock: 802 vput(nd.ni_vp); 803error: 804 NDFREE(&nd, NDF_ONLY_PNBUF); 805 return (error); 806} 807 808/* 809 * Common routine for chroot and chdir. Callers must provide a locked vnode 810 * instance. 811 */ 812int 813change_dir(struct vnode *vp, struct thread *td) 814{ 815#ifdef MAC 816 int error; 817#endif 818 819 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 820 if (vp->v_type != VDIR) 821 return (ENOTDIR); 822#ifdef MAC 823 error = mac_vnode_check_chdir(td->td_ucred, vp); 824 if (error != 0) 825 return (error); 826#endif 827 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 828} 829 830static __inline void 831flags_to_rights(int flags, cap_rights_t *rightsp) 832{ 833 834 if (flags & O_EXEC) { 835 cap_rights_set(rightsp, CAP_FEXECVE); 836 } else { 837 switch ((flags & O_ACCMODE)) { 838 case O_RDONLY: 839 cap_rights_set(rightsp, CAP_READ); 840 break; 841 case O_RDWR: 842 cap_rights_set(rightsp, CAP_READ); 843 /* FALLTHROUGH */ 844 case O_WRONLY: 845 cap_rights_set(rightsp, CAP_WRITE); 846 if (!(flags & (O_APPEND | O_TRUNC))) 847 cap_rights_set(rightsp, CAP_SEEK); 848 break; 849 } 850 } 851 852 if (flags & O_CREAT) 853 cap_rights_set(rightsp, CAP_CREATE); 854 855 if (flags & O_TRUNC) 856 cap_rights_set(rightsp, CAP_FTRUNCATE); 857 858 if (flags & (O_SYNC | O_FSYNC)) 859 cap_rights_set(rightsp, CAP_FSYNC); 860 861 if (flags & (O_EXLOCK | O_SHLOCK)) 862 cap_rights_set(rightsp, CAP_FLOCK); 863} 864 865/* 866 * Check permissions, allocate an open file structure, and call the device 867 * open routine if any. 868 */ 869#ifndef _SYS_SYSPROTO_H_ 870struct open_args { 871 char *path; 872 int flags; 873 int mode; 874}; 875#endif 876int 877sys_open(struct thread *td, struct open_args *uap) 878{ 879 880 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 881 uap->flags, uap->mode)); 882} 883 884#ifndef _SYS_SYSPROTO_H_ 885struct openat_args { 886 int fd; 887 char *path; 888 int flag; 889 int mode; 890}; 891#endif 892int 893sys_openat(struct thread *td, struct openat_args *uap) 894{ 895 896 AUDIT_ARG_FD(uap->fd); 897 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 898 uap->mode)); 899} 900 901int 902kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 903 int flags, int mode) 904{ 905 struct proc *p = td->td_proc; 906 struct filedesc *fdp = p->p_fd; 907 struct file *fp; 908 struct vnode *vp; 909 struct nameidata nd; 910 cap_rights_t rights; 911 int cmode, error, indx; 912 913 indx = -1; 914 915 AUDIT_ARG_FFLAGS(flags); 916 AUDIT_ARG_MODE(mode); 917 cap_rights_init(&rights, CAP_LOOKUP); 918 flags_to_rights(flags, &rights); 919 /* 920 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 921 * may be specified. 922 */ 923 if (flags & O_EXEC) { 924 if (flags & O_ACCMODE) 925 return (EINVAL); 926 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 927 return (EINVAL); 928 } else { 929 flags = FFLAGS(flags); 930 } 931 932 /* 933 * Allocate a file structure. The descriptor to reference it 934 * is allocated and set by finstall() below. 935 */ 936 error = falloc_noinstall(td, &fp); 937 if (error != 0) 938 return (error); 939 /* 940 * An extra reference on `fp' has been held for us by 941 * falloc_noinstall(). 942 */ 943 /* Set the flags early so the finit in devfs can pick them up. */ 944 fp->f_flag = flags & FMASK; 945 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 946 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 947 &rights, td); 948 td->td_dupfd = -1; /* XXX check for fdopen */ 949 error = vn_open(&nd, &flags, cmode, fp); 950 if (error != 0) { 951 /* 952 * If the vn_open replaced the method vector, something 953 * wonderous happened deep below and we just pass it up 954 * pretending we know what we do. 955 */ 956 if (error == ENXIO && fp->f_ops != &badfileops) 957 goto success; 958 959 /* 960 * Handle special fdopen() case. bleh. 961 * 962 * Don't do this for relative (capability) lookups; we don't 963 * understand exactly what would happen, and we don't think 964 * that it ever should. 965 */ 966 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) == 0 && 967 (error == ENODEV || error == ENXIO) && 968 td->td_dupfd >= 0) { 969 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 970 &indx); 971 if (error == 0) 972 goto success; 973 } 974 975 goto bad; 976 } 977 td->td_dupfd = 0; 978 NDFREE(&nd, NDF_ONLY_PNBUF); 979 vp = nd.ni_vp; 980 981 /* 982 * Store the vnode, for any f_type. Typically, the vnode use 983 * count is decremented by direct call to vn_closefile() for 984 * files that switched type in the cdevsw fdopen() method. 985 */ 986 fp->f_vnode = vp; 987 /* 988 * If the file wasn't claimed by devfs bind it to the normal 989 * vnode operations here. 990 */ 991 if (fp->f_ops == &badfileops) { 992 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 993 fp->f_seqcount = 1; 994 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 995 DTYPE_VNODE, vp, &vnops); 996 } 997 998 VOP_UNLOCK(vp, 0); 999 if (flags & O_TRUNC) { 1000 error = fo_truncate(fp, 0, td->td_ucred, td); 1001 if (error != 0) 1002 goto bad; 1003 } 1004success: 1005 /* 1006 * If we haven't already installed the FD (for dupfdopen), do so now. 1007 */ 1008 if (indx == -1) { 1009 struct filecaps *fcaps; 1010 1011#ifdef CAPABILITIES 1012 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) != 0) 1013 fcaps = &nd.ni_filecaps; 1014 else 1015#endif 1016 fcaps = NULL; 1017 error = finstall(td, fp, &indx, flags, fcaps); 1018 /* On success finstall() consumes fcaps. */ 1019 if (error != 0) { 1020 filecaps_free(&nd.ni_filecaps); 1021 goto bad; 1022 } 1023 } else { 1024 filecaps_free(&nd.ni_filecaps); 1025 } 1026 1027 /* 1028 * Release our private reference, leaving the one associated with 1029 * the descriptor table intact. 1030 */ 1031 fdrop(fp, td); 1032 td->td_retval[0] = indx; 1033 return (0); 1034bad: 1035 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1036 fdrop(fp, td); 1037 return (error); 1038} 1039 1040#ifdef COMPAT_43 1041/* 1042 * Create a file. 1043 */ 1044#ifndef _SYS_SYSPROTO_H_ 1045struct ocreat_args { 1046 char *path; 1047 int mode; 1048}; 1049#endif 1050int 1051ocreat(struct thread *td, struct ocreat_args *uap) 1052{ 1053 1054 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1055 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1056} 1057#endif /* COMPAT_43 */ 1058 1059/* 1060 * Create a special file. 1061 */ 1062#ifndef _SYS_SYSPROTO_H_ 1063struct mknod_args { 1064 char *path; 1065 int mode; 1066 int dev; 1067}; 1068#endif 1069int 1070sys_mknod(struct thread *td, struct mknod_args *uap) 1071{ 1072 1073 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1074 uap->mode, uap->dev)); 1075} 1076 1077#ifndef _SYS_SYSPROTO_H_ 1078struct mknodat_args { 1079 int fd; 1080 char *path; 1081 mode_t mode; 1082 dev_t dev; 1083}; 1084#endif 1085int 1086sys_mknodat(struct thread *td, struct mknodat_args *uap) 1087{ 1088 1089 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1090 uap->dev)); 1091} 1092 1093int 1094kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1095 int mode, int dev) 1096{ 1097 struct vnode *vp; 1098 struct mount *mp; 1099 struct vattr vattr; 1100 struct nameidata nd; 1101 cap_rights_t rights; 1102 int error, whiteout = 0; 1103 1104 AUDIT_ARG_MODE(mode); 1105 AUDIT_ARG_DEV(dev); 1106 switch (mode & S_IFMT) { 1107 case S_IFCHR: 1108 case S_IFBLK: 1109 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1110 if (error == 0 && dev == VNOVAL) 1111 error = EINVAL; 1112 break; 1113 case S_IFWHT: 1114 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1115 break; 1116 case S_IFIFO: 1117 if (dev == 0) 1118 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1119 /* FALLTHROUGH */ 1120 default: 1121 error = EINVAL; 1122 break; 1123 } 1124 if (error != 0) 1125 return (error); 1126restart: 1127 bwillwrite(); 1128 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1129 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), 1130 td); 1131 if ((error = namei(&nd)) != 0) 1132 return (error); 1133 vp = nd.ni_vp; 1134 if (vp != NULL) { 1135 NDFREE(&nd, NDF_ONLY_PNBUF); 1136 if (vp == nd.ni_dvp) 1137 vrele(nd.ni_dvp); 1138 else 1139 vput(nd.ni_dvp); 1140 vrele(vp); 1141 return (EEXIST); 1142 } else { 1143 VATTR_NULL(&vattr); 1144 vattr.va_mode = (mode & ALLPERMS) & 1145 ~td->td_proc->p_fd->fd_cmask; 1146 vattr.va_rdev = dev; 1147 whiteout = 0; 1148 1149 switch (mode & S_IFMT) { 1150 case S_IFCHR: 1151 vattr.va_type = VCHR; 1152 break; 1153 case S_IFBLK: 1154 vattr.va_type = VBLK; 1155 break; 1156 case S_IFWHT: 1157 whiteout = 1; 1158 break; 1159 default: 1160 panic("kern_mknod: invalid mode"); 1161 } 1162 } 1163 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1164 NDFREE(&nd, NDF_ONLY_PNBUF); 1165 vput(nd.ni_dvp); 1166 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1167 return (error); 1168 goto restart; 1169 } 1170#ifdef MAC 1171 if (error == 0 && !whiteout) 1172 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1173 &nd.ni_cnd, &vattr); 1174#endif 1175 if (error == 0) { 1176 if (whiteout) 1177 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1178 else { 1179 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1180 &nd.ni_cnd, &vattr); 1181 if (error == 0) 1182 vput(nd.ni_vp); 1183 } 1184 } 1185 NDFREE(&nd, NDF_ONLY_PNBUF); 1186 vput(nd.ni_dvp); 1187 vn_finished_write(mp); 1188 return (error); 1189} 1190 1191/* 1192 * Create a named pipe. 1193 */ 1194#ifndef _SYS_SYSPROTO_H_ 1195struct mkfifo_args { 1196 char *path; 1197 int mode; 1198}; 1199#endif 1200int 1201sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1202{ 1203 1204 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1205 uap->mode)); 1206} 1207 1208#ifndef _SYS_SYSPROTO_H_ 1209struct mkfifoat_args { 1210 int fd; 1211 char *path; 1212 mode_t mode; 1213}; 1214#endif 1215int 1216sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1217{ 1218 1219 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1220 uap->mode)); 1221} 1222 1223int 1224kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1225 int mode) 1226{ 1227 struct mount *mp; 1228 struct vattr vattr; 1229 struct nameidata nd; 1230 cap_rights_t rights; 1231 int error; 1232 1233 AUDIT_ARG_MODE(mode); 1234restart: 1235 bwillwrite(); 1236 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1237 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), 1238 td); 1239 if ((error = namei(&nd)) != 0) 1240 return (error); 1241 if (nd.ni_vp != NULL) { 1242 NDFREE(&nd, NDF_ONLY_PNBUF); 1243 if (nd.ni_vp == nd.ni_dvp) 1244 vrele(nd.ni_dvp); 1245 else 1246 vput(nd.ni_dvp); 1247 vrele(nd.ni_vp); 1248 return (EEXIST); 1249 } 1250 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1251 NDFREE(&nd, NDF_ONLY_PNBUF); 1252 vput(nd.ni_dvp); 1253 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1254 return (error); 1255 goto restart; 1256 } 1257 VATTR_NULL(&vattr); 1258 vattr.va_type = VFIFO; 1259 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1260#ifdef MAC 1261 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1262 &vattr); 1263 if (error != 0) 1264 goto out; 1265#endif 1266 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1267 if (error == 0) 1268 vput(nd.ni_vp); 1269#ifdef MAC 1270out: 1271#endif 1272 vput(nd.ni_dvp); 1273 vn_finished_write(mp); 1274 NDFREE(&nd, NDF_ONLY_PNBUF); 1275 return (error); 1276} 1277 1278/* 1279 * Make a hard file link. 1280 */ 1281#ifndef _SYS_SYSPROTO_H_ 1282struct link_args { 1283 char *path; 1284 char *link; 1285}; 1286#endif 1287int 1288sys_link(struct thread *td, struct link_args *uap) 1289{ 1290 1291 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1292 UIO_USERSPACE, FOLLOW)); 1293} 1294 1295#ifndef _SYS_SYSPROTO_H_ 1296struct linkat_args { 1297 int fd1; 1298 char *path1; 1299 int fd2; 1300 char *path2; 1301 int flag; 1302}; 1303#endif 1304int 1305sys_linkat(struct thread *td, struct linkat_args *uap) 1306{ 1307 int flag; 1308 1309 flag = uap->flag; 1310 if (flag & ~AT_SYMLINK_FOLLOW) 1311 return (EINVAL); 1312 1313 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1314 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1315} 1316 1317int hardlink_check_uid = 0; 1318SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1319 &hardlink_check_uid, 0, 1320 "Unprivileged processes cannot create hard links to files owned by other " 1321 "users"); 1322static int hardlink_check_gid = 0; 1323SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1324 &hardlink_check_gid, 0, 1325 "Unprivileged processes cannot create hard links to files owned by other " 1326 "groups"); 1327 1328static int 1329can_hardlink(struct vnode *vp, struct ucred *cred) 1330{ 1331 struct vattr va; 1332 int error; 1333 1334 if (!hardlink_check_uid && !hardlink_check_gid) 1335 return (0); 1336 1337 error = VOP_GETATTR(vp, &va, cred); 1338 if (error != 0) 1339 return (error); 1340 1341 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1342 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1343 if (error != 0) 1344 return (error); 1345 } 1346 1347 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1348 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1349 if (error != 0) 1350 return (error); 1351 } 1352 1353 return (0); 1354} 1355 1356int 1357kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1358 enum uio_seg segflg, int follow) 1359{ 1360 struct vnode *vp; 1361 struct mount *mp; 1362 struct nameidata nd; 1363 cap_rights_t rights; 1364 int error; 1365 1366again: 1367 bwillwrite(); 1368 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, 1369 cap_rights_init(&rights, CAP_LINKAT_SOURCE), td); 1370 1371 if ((error = namei(&nd)) != 0) 1372 return (error); 1373 NDFREE(&nd, NDF_ONLY_PNBUF); 1374 vp = nd.ni_vp; 1375 if (vp->v_type == VDIR) { 1376 vrele(vp); 1377 return (EPERM); /* POSIX */ 1378 } 1379 NDINIT_ATRIGHTS(&nd, CREATE, 1380 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflg, path2, fd2, 1381 cap_rights_init(&rights, CAP_LINKAT_TARGET), td); 1382 if ((error = namei(&nd)) == 0) { 1383 if (nd.ni_vp != NULL) { 1384 NDFREE(&nd, NDF_ONLY_PNBUF); 1385 if (nd.ni_dvp == nd.ni_vp) 1386 vrele(nd.ni_dvp); 1387 else 1388 vput(nd.ni_dvp); 1389 vrele(nd.ni_vp); 1390 vrele(vp); 1391 return (EEXIST); 1392 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1393 /* 1394 * Cross-device link. No need to recheck 1395 * vp->v_type, since it cannot change, except 1396 * to VBAD. 1397 */ 1398 NDFREE(&nd, NDF_ONLY_PNBUF); 1399 vput(nd.ni_dvp); 1400 vrele(vp); 1401 return (EXDEV); 1402 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1403 error = can_hardlink(vp, td->td_ucred); 1404#ifdef MAC 1405 if (error == 0) 1406 error = mac_vnode_check_link(td->td_ucred, 1407 nd.ni_dvp, vp, &nd.ni_cnd); 1408#endif 1409 if (error != 0) { 1410 vput(vp); 1411 vput(nd.ni_dvp); 1412 NDFREE(&nd, NDF_ONLY_PNBUF); 1413 return (error); 1414 } 1415 error = vn_start_write(vp, &mp, V_NOWAIT); 1416 if (error != 0) { 1417 vput(vp); 1418 vput(nd.ni_dvp); 1419 NDFREE(&nd, NDF_ONLY_PNBUF); 1420 error = vn_start_write(NULL, &mp, 1421 V_XSLEEP | PCATCH); 1422 if (error != 0) 1423 return (error); 1424 goto again; 1425 } 1426 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1427 VOP_UNLOCK(vp, 0); 1428 vput(nd.ni_dvp); 1429 vn_finished_write(mp); 1430 NDFREE(&nd, NDF_ONLY_PNBUF); 1431 } else { 1432 vput(nd.ni_dvp); 1433 NDFREE(&nd, NDF_ONLY_PNBUF); 1434 vrele(vp); 1435 goto again; 1436 } 1437 } 1438 vrele(vp); 1439 return (error); 1440} 1441 1442/* 1443 * Make a symbolic link. 1444 */ 1445#ifndef _SYS_SYSPROTO_H_ 1446struct symlink_args { 1447 char *path; 1448 char *link; 1449}; 1450#endif 1451int 1452sys_symlink(struct thread *td, struct symlink_args *uap) 1453{ 1454 1455 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1456 UIO_USERSPACE)); 1457} 1458 1459#ifndef _SYS_SYSPROTO_H_ 1460struct symlinkat_args { 1461 char *path; 1462 int fd; 1463 char *path2; 1464}; 1465#endif 1466int 1467sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1468{ 1469 1470 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1471 UIO_USERSPACE)); 1472} 1473 1474int 1475kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1476 enum uio_seg segflg) 1477{ 1478 struct mount *mp; 1479 struct vattr vattr; 1480 char *syspath; 1481 struct nameidata nd; 1482 int error; 1483 cap_rights_t rights; 1484 1485 if (segflg == UIO_SYSSPACE) { 1486 syspath = path1; 1487 } else { 1488 syspath = uma_zalloc(namei_zone, M_WAITOK); 1489 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1490 goto out; 1491 } 1492 AUDIT_ARG_TEXT(syspath); 1493restart: 1494 bwillwrite(); 1495 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1496 NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), 1497 td); 1498 if ((error = namei(&nd)) != 0) 1499 goto out; 1500 if (nd.ni_vp) { 1501 NDFREE(&nd, NDF_ONLY_PNBUF); 1502 if (nd.ni_vp == nd.ni_dvp) 1503 vrele(nd.ni_dvp); 1504 else 1505 vput(nd.ni_dvp); 1506 vrele(nd.ni_vp); 1507 error = EEXIST; 1508 goto out; 1509 } 1510 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1511 NDFREE(&nd, NDF_ONLY_PNBUF); 1512 vput(nd.ni_dvp); 1513 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1514 goto out; 1515 goto restart; 1516 } 1517 VATTR_NULL(&vattr); 1518 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1519#ifdef MAC 1520 vattr.va_type = VLNK; 1521 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1522 &vattr); 1523 if (error != 0) 1524 goto out2; 1525#endif 1526 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1527 if (error == 0) 1528 vput(nd.ni_vp); 1529#ifdef MAC 1530out2: 1531#endif 1532 NDFREE(&nd, NDF_ONLY_PNBUF); 1533 vput(nd.ni_dvp); 1534 vn_finished_write(mp); 1535out: 1536 if (segflg != UIO_SYSSPACE) 1537 uma_zfree(namei_zone, syspath); 1538 return (error); 1539} 1540 1541/* 1542 * Delete a whiteout from the filesystem. 1543 */ 1544#ifndef _SYS_SYSPROTO_H_ 1545struct undelete_args { 1546 char *path; 1547}; 1548#endif 1549int 1550sys_undelete(struct thread *td, struct undelete_args *uap) 1551{ 1552 struct mount *mp; 1553 struct nameidata nd; 1554 int error; 1555 1556restart: 1557 bwillwrite(); 1558 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1559 UIO_USERSPACE, uap->path, td); 1560 error = namei(&nd); 1561 if (error != 0) 1562 return (error); 1563 1564 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1565 NDFREE(&nd, NDF_ONLY_PNBUF); 1566 if (nd.ni_vp == nd.ni_dvp) 1567 vrele(nd.ni_dvp); 1568 else 1569 vput(nd.ni_dvp); 1570 if (nd.ni_vp) 1571 vrele(nd.ni_vp); 1572 return (EEXIST); 1573 } 1574 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1575 NDFREE(&nd, NDF_ONLY_PNBUF); 1576 vput(nd.ni_dvp); 1577 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1578 return (error); 1579 goto restart; 1580 } 1581 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1582 NDFREE(&nd, NDF_ONLY_PNBUF); 1583 vput(nd.ni_dvp); 1584 vn_finished_write(mp); 1585 return (error); 1586} 1587 1588/* 1589 * Delete a name from the filesystem. 1590 */ 1591#ifndef _SYS_SYSPROTO_H_ 1592struct unlink_args { 1593 char *path; 1594}; 1595#endif 1596int 1597sys_unlink(struct thread *td, struct unlink_args *uap) 1598{ 1599 1600 return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); 1601} 1602 1603#ifndef _SYS_SYSPROTO_H_ 1604struct unlinkat_args { 1605 int fd; 1606 char *path; 1607 int flag; 1608}; 1609#endif 1610int 1611sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1612{ 1613 int flag = uap->flag; 1614 int fd = uap->fd; 1615 char *path = uap->path; 1616 1617 if (flag & ~AT_REMOVEDIR) 1618 return (EINVAL); 1619 1620 if (flag & AT_REMOVEDIR) 1621 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1622 else 1623 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1624} 1625 1626int 1627kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1628 ino_t oldinum) 1629{ 1630 struct mount *mp; 1631 struct vnode *vp; 1632 struct nameidata nd; 1633 struct stat sb; 1634 cap_rights_t rights; 1635 int error; 1636 1637restart: 1638 bwillwrite(); 1639 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1640 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1641 if ((error = namei(&nd)) != 0) 1642 return (error == EINVAL ? EPERM : error); 1643 vp = nd.ni_vp; 1644 if (vp->v_type == VDIR && oldinum == 0) { 1645 error = EPERM; /* POSIX */ 1646 } else if (oldinum != 0 && 1647 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1648 sb.st_ino != oldinum) { 1649 error = EIDRM; /* Identifier removed */ 1650 } else { 1651 /* 1652 * The root of a mounted filesystem cannot be deleted. 1653 * 1654 * XXX: can this only be a VDIR case? 1655 */ 1656 if (vp->v_vflag & VV_ROOT) 1657 error = EBUSY; 1658 } 1659 if (error == 0) { 1660 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1661 NDFREE(&nd, NDF_ONLY_PNBUF); 1662 vput(nd.ni_dvp); 1663 if (vp == nd.ni_dvp) 1664 vrele(vp); 1665 else 1666 vput(vp); 1667 if ((error = vn_start_write(NULL, &mp, 1668 V_XSLEEP | PCATCH)) != 0) 1669 return (error); 1670 goto restart; 1671 } 1672#ifdef MAC 1673 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1674 &nd.ni_cnd); 1675 if (error != 0) 1676 goto out; 1677#endif 1678 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1679 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1680#ifdef MAC 1681out: 1682#endif 1683 vn_finished_write(mp); 1684 } 1685 NDFREE(&nd, NDF_ONLY_PNBUF); 1686 vput(nd.ni_dvp); 1687 if (vp == nd.ni_dvp) 1688 vrele(vp); 1689 else 1690 vput(vp); 1691 return (error); 1692} 1693 1694/* 1695 * Reposition read/write file offset. 1696 */ 1697#ifndef _SYS_SYSPROTO_H_ 1698struct lseek_args { 1699 int fd; 1700 int pad; 1701 off_t offset; 1702 int whence; 1703}; 1704#endif 1705int 1706sys_lseek(struct thread *td, struct lseek_args *uap) 1707{ 1708 1709 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1710} 1711 1712int 1713kern_lseek(struct thread *td, int fd, off_t offset, int whence) 1714{ 1715 struct file *fp; 1716 cap_rights_t rights; 1717 int error; 1718 1719 AUDIT_ARG_FD(fd); 1720 error = fget(td, fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1721 if (error != 0) 1722 return (error); 1723 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1724 fo_seek(fp, offset, whence, td) : ESPIPE; 1725 fdrop(fp, td); 1726 return (error); 1727} 1728 1729#if defined(COMPAT_43) 1730/* 1731 * Reposition read/write file offset. 1732 */ 1733#ifndef _SYS_SYSPROTO_H_ 1734struct olseek_args { 1735 int fd; 1736 long offset; 1737 int whence; 1738}; 1739#endif 1740int 1741olseek(struct thread *td, struct olseek_args *uap) 1742{ 1743 1744 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1745} 1746#endif /* COMPAT_43 */ 1747 1748#if defined(COMPAT_FREEBSD6) 1749/* Version with the 'pad' argument */ 1750int 1751freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 1752{ 1753 1754 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1755} 1756#endif 1757 1758/* 1759 * Check access permissions using passed credentials. 1760 */ 1761static int 1762vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 1763 struct thread *td) 1764{ 1765 accmode_t accmode; 1766 int error; 1767 1768 /* Flags == 0 means only check for existence. */ 1769 if (user_flags == 0) 1770 return (0); 1771 1772 accmode = 0; 1773 if (user_flags & R_OK) 1774 accmode |= VREAD; 1775 if (user_flags & W_OK) 1776 accmode |= VWRITE; 1777 if (user_flags & X_OK) 1778 accmode |= VEXEC; 1779#ifdef MAC 1780 error = mac_vnode_check_access(cred, vp, accmode); 1781 if (error != 0) 1782 return (error); 1783#endif 1784 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1785 error = VOP_ACCESS(vp, accmode, cred, td); 1786 return (error); 1787} 1788 1789/* 1790 * Check access permissions using "real" credentials. 1791 */ 1792#ifndef _SYS_SYSPROTO_H_ 1793struct access_args { 1794 char *path; 1795 int amode; 1796}; 1797#endif 1798int 1799sys_access(struct thread *td, struct access_args *uap) 1800{ 1801 1802 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1803 0, uap->amode)); 1804} 1805 1806#ifndef _SYS_SYSPROTO_H_ 1807struct faccessat_args { 1808 int dirfd; 1809 char *path; 1810 int amode; 1811 int flag; 1812} 1813#endif 1814int 1815sys_faccessat(struct thread *td, struct faccessat_args *uap) 1816{ 1817 1818 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1819 uap->amode)); 1820} 1821 1822int 1823kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1824 int flag, int amode) 1825{ 1826 struct ucred *cred, *usecred; 1827 struct vnode *vp; 1828 struct nameidata nd; 1829 cap_rights_t rights; 1830 int error; 1831 1832 if (flag & ~AT_EACCESS) 1833 return (EINVAL); 1834 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 1835 return (EINVAL); 1836 1837 /* 1838 * Create and modify a temporary credential instead of one that 1839 * is potentially shared (if we need one). 1840 */ 1841 cred = td->td_ucred; 1842 if ((flag & AT_EACCESS) == 0 && 1843 ((cred->cr_uid != cred->cr_ruid || 1844 cred->cr_rgid != cred->cr_groups[0]))) { 1845 usecred = crdup(cred); 1846 usecred->cr_uid = cred->cr_ruid; 1847 usecred->cr_groups[0] = cred->cr_rgid; 1848 td->td_ucred = usecred; 1849 } else 1850 usecred = cred; 1851 AUDIT_ARG_VALUE(amode); 1852 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 1853 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 1854 td); 1855 if ((error = namei(&nd)) != 0) 1856 goto out; 1857 vp = nd.ni_vp; 1858 1859 error = vn_access(vp, amode, usecred, td); 1860 NDFREE(&nd, NDF_ONLY_PNBUF); 1861 vput(vp); 1862out: 1863 if (usecred != cred) { 1864 td->td_ucred = cred; 1865 crfree(usecred); 1866 } 1867 return (error); 1868} 1869 1870/* 1871 * Check access permissions using "effective" credentials. 1872 */ 1873#ifndef _SYS_SYSPROTO_H_ 1874struct eaccess_args { 1875 char *path; 1876 int amode; 1877}; 1878#endif 1879int 1880sys_eaccess(struct thread *td, struct eaccess_args *uap) 1881{ 1882 1883 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1884 AT_EACCESS, uap->amode)); 1885} 1886 1887#if defined(COMPAT_43) 1888/* 1889 * Get file status; this version follows links. 1890 */ 1891#ifndef _SYS_SYSPROTO_H_ 1892struct ostat_args { 1893 char *path; 1894 struct ostat *ub; 1895}; 1896#endif 1897int 1898ostat(struct thread *td, struct ostat_args *uap) 1899{ 1900 struct stat sb; 1901 struct ostat osb; 1902 int error; 1903 1904 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 1905 &sb, NULL); 1906 if (error != 0) 1907 return (error); 1908 cvtstat(&sb, &osb); 1909 return (copyout(&osb, uap->ub, sizeof (osb))); 1910} 1911 1912/* 1913 * Get file status; this version does not follow links. 1914 */ 1915#ifndef _SYS_SYSPROTO_H_ 1916struct olstat_args { 1917 char *path; 1918 struct ostat *ub; 1919}; 1920#endif 1921int 1922olstat(struct thread *td, struct olstat_args *uap) 1923{ 1924 struct stat sb; 1925 struct ostat osb; 1926 int error; 1927 1928 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 1929 UIO_USERSPACE, &sb, NULL); 1930 if (error != 0) 1931 return (error); 1932 cvtstat(&sb, &osb); 1933 return (copyout(&osb, uap->ub, sizeof (osb))); 1934} 1935 1936/* 1937 * Convert from an old to a new stat structure. 1938 */ 1939void 1940cvtstat(struct stat *st, struct ostat *ost) 1941{ 1942 1943 bzero(ost, sizeof(*ost)); 1944 ost->st_dev = st->st_dev; 1945 ost->st_ino = st->st_ino; 1946 ost->st_mode = st->st_mode; 1947 ost->st_nlink = st->st_nlink; 1948 ost->st_uid = st->st_uid; 1949 ost->st_gid = st->st_gid; 1950 ost->st_rdev = st->st_rdev; 1951 if (st->st_size < (quad_t)1 << 32) 1952 ost->st_size = st->st_size; 1953 else 1954 ost->st_size = -2; 1955 ost->st_atim = st->st_atim; 1956 ost->st_mtim = st->st_mtim; 1957 ost->st_ctim = st->st_ctim; 1958 ost->st_blksize = st->st_blksize; 1959 ost->st_blocks = st->st_blocks; 1960 ost->st_flags = st->st_flags; 1961 ost->st_gen = st->st_gen; 1962} 1963#endif /* COMPAT_43 */ 1964 1965/* 1966 * Get file status; this version follows links. 1967 */ 1968#ifndef _SYS_SYSPROTO_H_ 1969struct stat_args { 1970 char *path; 1971 struct stat *ub; 1972}; 1973#endif 1974int 1975sys_stat(struct thread *td, struct stat_args *uap) 1976{ 1977 struct stat sb; 1978 int error; 1979 1980 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 1981 &sb, NULL); 1982 if (error == 0) 1983 error = copyout(&sb, uap->ub, sizeof (sb)); 1984 return (error); 1985} 1986 1987#ifndef _SYS_SYSPROTO_H_ 1988struct fstatat_args { 1989 int fd; 1990 char *path; 1991 struct stat *buf; 1992 int flag; 1993} 1994#endif 1995int 1996sys_fstatat(struct thread *td, struct fstatat_args *uap) 1997{ 1998 struct stat sb; 1999 int error; 2000 2001 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2002 UIO_USERSPACE, &sb, NULL); 2003 if (error == 0) 2004 error = copyout(&sb, uap->buf, sizeof (sb)); 2005 return (error); 2006} 2007 2008int 2009kern_statat(struct thread *td, int flag, int fd, char *path, 2010 enum uio_seg pathseg, struct stat *sbp, 2011 void (*hook)(struct vnode *vp, struct stat *sbp)) 2012{ 2013 struct nameidata nd; 2014 struct stat sb; 2015 cap_rights_t rights; 2016 int error; 2017 2018 if (flag & ~AT_SYMLINK_NOFOLLOW) 2019 return (EINVAL); 2020 2021 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2022 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2023 cap_rights_init(&rights, CAP_FSTAT), td); 2024 2025 if ((error = namei(&nd)) != 0) 2026 return (error); 2027 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2028 if (error == 0) { 2029 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode); 2030 if (S_ISREG(sb.st_mode)) 2031 SDT_PROBE2(vfs, , stat, reg, path, pathseg); 2032 if (__predict_false(hook != NULL)) 2033 hook(nd.ni_vp, &sb); 2034 } 2035 NDFREE(&nd, NDF_ONLY_PNBUF); 2036 vput(nd.ni_vp); 2037 if (error != 0) 2038 return (error); 2039 *sbp = sb; 2040#ifdef KTRACE 2041 if (KTRPOINT(td, KTR_STRUCT)) 2042 ktrstat(&sb); 2043#endif 2044 return (0); 2045} 2046 2047/* 2048 * Get file status; this version does not follow links. 2049 */ 2050#ifndef _SYS_SYSPROTO_H_ 2051struct lstat_args { 2052 char *path; 2053 struct stat *ub; 2054}; 2055#endif 2056int 2057sys_lstat(struct thread *td, struct lstat_args *uap) 2058{ 2059 struct stat sb; 2060 int error; 2061 2062 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2063 UIO_USERSPACE, &sb, NULL); 2064 if (error == 0) 2065 error = copyout(&sb, uap->ub, sizeof (sb)); 2066 return (error); 2067} 2068 2069/* 2070 * Implementation of the NetBSD [l]stat() functions. 2071 */ 2072void 2073cvtnstat( struct stat *sb, struct nstat *nsb) 2074{ 2075 2076 bzero(nsb, sizeof *nsb); 2077 nsb->st_dev = sb->st_dev; 2078 nsb->st_ino = sb->st_ino; 2079 nsb->st_mode = sb->st_mode; 2080 nsb->st_nlink = sb->st_nlink; 2081 nsb->st_uid = sb->st_uid; 2082 nsb->st_gid = sb->st_gid; 2083 nsb->st_rdev = sb->st_rdev; 2084 nsb->st_atim = sb->st_atim; 2085 nsb->st_mtim = sb->st_mtim; 2086 nsb->st_ctim = sb->st_ctim; 2087 nsb->st_size = sb->st_size; 2088 nsb->st_blocks = sb->st_blocks; 2089 nsb->st_blksize = sb->st_blksize; 2090 nsb->st_flags = sb->st_flags; 2091 nsb->st_gen = sb->st_gen; 2092 nsb->st_birthtim = sb->st_birthtim; 2093} 2094 2095#ifndef _SYS_SYSPROTO_H_ 2096struct nstat_args { 2097 char *path; 2098 struct nstat *ub; 2099}; 2100#endif 2101int 2102sys_nstat(struct thread *td, struct nstat_args *uap) 2103{ 2104 struct stat sb; 2105 struct nstat nsb; 2106 int error; 2107 2108 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2109 &sb, NULL); 2110 if (error != 0) 2111 return (error); 2112 cvtnstat(&sb, &nsb); 2113 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2114} 2115 2116/* 2117 * NetBSD lstat. Get file status; this version does not follow links. 2118 */ 2119#ifndef _SYS_SYSPROTO_H_ 2120struct lstat_args { 2121 char *path; 2122 struct stat *ub; 2123}; 2124#endif 2125int 2126sys_nlstat(struct thread *td, struct nlstat_args *uap) 2127{ 2128 struct stat sb; 2129 struct nstat nsb; 2130 int error; 2131 2132 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2133 UIO_USERSPACE, &sb, NULL); 2134 if (error != 0) 2135 return (error); 2136 cvtnstat(&sb, &nsb); 2137 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2138} 2139 2140/* 2141 * Get configurable pathname variables. 2142 */ 2143#ifndef _SYS_SYSPROTO_H_ 2144struct pathconf_args { 2145 char *path; 2146 int name; 2147}; 2148#endif 2149int 2150sys_pathconf(struct thread *td, struct pathconf_args *uap) 2151{ 2152 2153 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2154} 2155 2156#ifndef _SYS_SYSPROTO_H_ 2157struct lpathconf_args { 2158 char *path; 2159 int name; 2160}; 2161#endif 2162int 2163sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2164{ 2165 2166 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2167 NOFOLLOW)); 2168} 2169 2170int 2171kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2172 u_long flags) 2173{ 2174 struct nameidata nd; 2175 int error; 2176 2177 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2178 pathseg, path, td); 2179 if ((error = namei(&nd)) != 0) 2180 return (error); 2181 NDFREE(&nd, NDF_ONLY_PNBUF); 2182 2183 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2184 vput(nd.ni_vp); 2185 return (error); 2186} 2187 2188/* 2189 * Return target name of a symbolic link. 2190 */ 2191#ifndef _SYS_SYSPROTO_H_ 2192struct readlink_args { 2193 char *path; 2194 char *buf; 2195 size_t count; 2196}; 2197#endif 2198int 2199sys_readlink(struct thread *td, struct readlink_args *uap) 2200{ 2201 2202 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2203 uap->buf, UIO_USERSPACE, uap->count)); 2204} 2205#ifndef _SYS_SYSPROTO_H_ 2206struct readlinkat_args { 2207 int fd; 2208 char *path; 2209 char *buf; 2210 size_t bufsize; 2211}; 2212#endif 2213int 2214sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2215{ 2216 2217 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2218 uap->buf, UIO_USERSPACE, uap->bufsize)); 2219} 2220 2221int 2222kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2223 char *buf, enum uio_seg bufseg, size_t count) 2224{ 2225 struct vnode *vp; 2226 struct iovec aiov; 2227 struct uio auio; 2228 struct nameidata nd; 2229 int error; 2230 2231 if (count > IOSIZE_MAX) 2232 return (EINVAL); 2233 2234 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2235 pathseg, path, fd, td); 2236 2237 if ((error = namei(&nd)) != 0) 2238 return (error); 2239 NDFREE(&nd, NDF_ONLY_PNBUF); 2240 vp = nd.ni_vp; 2241#ifdef MAC 2242 error = mac_vnode_check_readlink(td->td_ucred, vp); 2243 if (error != 0) { 2244 vput(vp); 2245 return (error); 2246 } 2247#endif 2248 if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) 2249 error = EINVAL; 2250 else { 2251 aiov.iov_base = buf; 2252 aiov.iov_len = count; 2253 auio.uio_iov = &aiov; 2254 auio.uio_iovcnt = 1; 2255 auio.uio_offset = 0; 2256 auio.uio_rw = UIO_READ; 2257 auio.uio_segflg = bufseg; 2258 auio.uio_td = td; 2259 auio.uio_resid = count; 2260 error = VOP_READLINK(vp, &auio, td->td_ucred); 2261 td->td_retval[0] = count - auio.uio_resid; 2262 } 2263 vput(vp); 2264 return (error); 2265} 2266 2267/* 2268 * Common implementation code for chflags() and fchflags(). 2269 */ 2270static int 2271setfflags(struct thread *td, struct vnode *vp, u_long flags) 2272{ 2273 struct mount *mp; 2274 struct vattr vattr; 2275 int error; 2276 2277 /* We can't support the value matching VNOVAL. */ 2278 if (flags == VNOVAL) 2279 return (EOPNOTSUPP); 2280 2281 /* 2282 * Prevent non-root users from setting flags on devices. When 2283 * a device is reused, users can retain ownership of the device 2284 * if they are allowed to set flags and programs assume that 2285 * chown can't fail when done as root. 2286 */ 2287 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2288 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2289 if (error != 0) 2290 return (error); 2291 } 2292 2293 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2294 return (error); 2295 VATTR_NULL(&vattr); 2296 vattr.va_flags = flags; 2297 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2298#ifdef MAC 2299 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2300 if (error == 0) 2301#endif 2302 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2303 VOP_UNLOCK(vp, 0); 2304 vn_finished_write(mp); 2305 return (error); 2306} 2307 2308/* 2309 * Change flags of a file given a path name. 2310 */ 2311#ifndef _SYS_SYSPROTO_H_ 2312struct chflags_args { 2313 const char *path; 2314 u_long flags; 2315}; 2316#endif 2317int 2318sys_chflags(struct thread *td, struct chflags_args *uap) 2319{ 2320 2321 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2322 uap->flags, 0)); 2323} 2324 2325#ifndef _SYS_SYSPROTO_H_ 2326struct chflagsat_args { 2327 int fd; 2328 const char *path; 2329 u_long flags; 2330 int atflag; 2331} 2332#endif 2333int 2334sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2335{ 2336 int fd = uap->fd; 2337 const char *path = uap->path; 2338 u_long flags = uap->flags; 2339 int atflag = uap->atflag; 2340 2341 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2342 return (EINVAL); 2343 2344 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2345} 2346 2347/* 2348 * Same as chflags() but doesn't follow symlinks. 2349 */ 2350#ifndef _SYS_SYSPROTO_H_ 2351struct lchflags_args { 2352 const char *path; 2353 u_long flags; 2354}; 2355#endif 2356int 2357sys_lchflags(struct thread *td, struct lchflags_args *uap) 2358{ 2359 2360 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2361 uap->flags, AT_SYMLINK_NOFOLLOW)); 2362} 2363 2364static int 2365kern_chflagsat(struct thread *td, int fd, const char *path, 2366 enum uio_seg pathseg, u_long flags, int atflag) 2367{ 2368 struct nameidata nd; 2369 cap_rights_t rights; 2370 int error, follow; 2371 2372 AUDIT_ARG_FFLAGS(flags); 2373 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2374 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2375 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2376 if ((error = namei(&nd)) != 0) 2377 return (error); 2378 NDFREE(&nd, NDF_ONLY_PNBUF); 2379 error = setfflags(td, nd.ni_vp, flags); 2380 vrele(nd.ni_vp); 2381 return (error); 2382} 2383 2384/* 2385 * Change flags of a file given a file descriptor. 2386 */ 2387#ifndef _SYS_SYSPROTO_H_ 2388struct fchflags_args { 2389 int fd; 2390 u_long flags; 2391}; 2392#endif 2393int 2394sys_fchflags(struct thread *td, struct fchflags_args *uap) 2395{ 2396 struct file *fp; 2397 cap_rights_t rights; 2398 int error; 2399 2400 AUDIT_ARG_FD(uap->fd); 2401 AUDIT_ARG_FFLAGS(uap->flags); 2402 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHFLAGS), 2403 &fp); 2404 if (error != 0) 2405 return (error); 2406#ifdef AUDIT 2407 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2408 AUDIT_ARG_VNODE1(fp->f_vnode); 2409 VOP_UNLOCK(fp->f_vnode, 0); 2410#endif 2411 error = setfflags(td, fp->f_vnode, uap->flags); 2412 fdrop(fp, td); 2413 return (error); 2414} 2415 2416/* 2417 * Common implementation code for chmod(), lchmod() and fchmod(). 2418 */ 2419int 2420setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2421{ 2422 struct mount *mp; 2423 struct vattr vattr; 2424 int error; 2425 2426 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2427 return (error); 2428 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2429 VATTR_NULL(&vattr); 2430 vattr.va_mode = mode & ALLPERMS; 2431#ifdef MAC 2432 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2433 if (error == 0) 2434#endif 2435 error = VOP_SETATTR(vp, &vattr, cred); 2436 VOP_UNLOCK(vp, 0); 2437 vn_finished_write(mp); 2438 return (error); 2439} 2440 2441/* 2442 * Change mode of a file given path name. 2443 */ 2444#ifndef _SYS_SYSPROTO_H_ 2445struct chmod_args { 2446 char *path; 2447 int mode; 2448}; 2449#endif 2450int 2451sys_chmod(struct thread *td, struct chmod_args *uap) 2452{ 2453 2454 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2455 uap->mode, 0)); 2456} 2457 2458#ifndef _SYS_SYSPROTO_H_ 2459struct fchmodat_args { 2460 int dirfd; 2461 char *path; 2462 mode_t mode; 2463 int flag; 2464} 2465#endif 2466int 2467sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2468{ 2469 int flag = uap->flag; 2470 int fd = uap->fd; 2471 char *path = uap->path; 2472 mode_t mode = uap->mode; 2473 2474 if (flag & ~AT_SYMLINK_NOFOLLOW) 2475 return (EINVAL); 2476 2477 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2478} 2479 2480/* 2481 * Change mode of a file given path name (don't follow links.) 2482 */ 2483#ifndef _SYS_SYSPROTO_H_ 2484struct lchmod_args { 2485 char *path; 2486 int mode; 2487}; 2488#endif 2489int 2490sys_lchmod(struct thread *td, struct lchmod_args *uap) 2491{ 2492 2493 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2494 uap->mode, AT_SYMLINK_NOFOLLOW)); 2495} 2496 2497int 2498kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2499 mode_t mode, int flag) 2500{ 2501 struct nameidata nd; 2502 cap_rights_t rights; 2503 int error, follow; 2504 2505 AUDIT_ARG_MODE(mode); 2506 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2507 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2508 cap_rights_init(&rights, CAP_FCHMOD), td); 2509 if ((error = namei(&nd)) != 0) 2510 return (error); 2511 NDFREE(&nd, NDF_ONLY_PNBUF); 2512 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2513 vrele(nd.ni_vp); 2514 return (error); 2515} 2516 2517/* 2518 * Change mode of a file given a file descriptor. 2519 */ 2520#ifndef _SYS_SYSPROTO_H_ 2521struct fchmod_args { 2522 int fd; 2523 int mode; 2524}; 2525#endif 2526int 2527sys_fchmod(struct thread *td, struct fchmod_args *uap) 2528{ 2529 struct file *fp; 2530 cap_rights_t rights; 2531 int error; 2532 2533 AUDIT_ARG_FD(uap->fd); 2534 AUDIT_ARG_MODE(uap->mode); 2535 2536 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2537 if (error != 0) 2538 return (error); 2539 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2540 fdrop(fp, td); 2541 return (error); 2542} 2543 2544/* 2545 * Common implementation for chown(), lchown(), and fchown() 2546 */ 2547int 2548setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 2549 gid_t gid) 2550{ 2551 struct mount *mp; 2552 struct vattr vattr; 2553 int error; 2554 2555 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2556 return (error); 2557 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2558 VATTR_NULL(&vattr); 2559 vattr.va_uid = uid; 2560 vattr.va_gid = gid; 2561#ifdef MAC 2562 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2563 vattr.va_gid); 2564 if (error == 0) 2565#endif 2566 error = VOP_SETATTR(vp, &vattr, cred); 2567 VOP_UNLOCK(vp, 0); 2568 vn_finished_write(mp); 2569 return (error); 2570} 2571 2572/* 2573 * Set ownership given a path name. 2574 */ 2575#ifndef _SYS_SYSPROTO_H_ 2576struct chown_args { 2577 char *path; 2578 int uid; 2579 int gid; 2580}; 2581#endif 2582int 2583sys_chown(struct thread *td, struct chown_args *uap) 2584{ 2585 2586 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2587 uap->gid, 0)); 2588} 2589 2590#ifndef _SYS_SYSPROTO_H_ 2591struct fchownat_args { 2592 int fd; 2593 const char * path; 2594 uid_t uid; 2595 gid_t gid; 2596 int flag; 2597}; 2598#endif 2599int 2600sys_fchownat(struct thread *td, struct fchownat_args *uap) 2601{ 2602 int flag; 2603 2604 flag = uap->flag; 2605 if (flag & ~AT_SYMLINK_NOFOLLOW) 2606 return (EINVAL); 2607 2608 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2609 uap->gid, uap->flag)); 2610} 2611 2612int 2613kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2614 int uid, int gid, int flag) 2615{ 2616 struct nameidata nd; 2617 cap_rights_t rights; 2618 int error, follow; 2619 2620 AUDIT_ARG_OWNER(uid, gid); 2621 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2622 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2623 cap_rights_init(&rights, CAP_FCHOWN), td); 2624 2625 if ((error = namei(&nd)) != 0) 2626 return (error); 2627 NDFREE(&nd, NDF_ONLY_PNBUF); 2628 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2629 vrele(nd.ni_vp); 2630 return (error); 2631} 2632 2633/* 2634 * Set ownership given a path name, do not cross symlinks. 2635 */ 2636#ifndef _SYS_SYSPROTO_H_ 2637struct lchown_args { 2638 char *path; 2639 int uid; 2640 int gid; 2641}; 2642#endif 2643int 2644sys_lchown(struct thread *td, struct lchown_args *uap) 2645{ 2646 2647 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2648 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2649} 2650 2651/* 2652 * Set ownership given a file descriptor. 2653 */ 2654#ifndef _SYS_SYSPROTO_H_ 2655struct fchown_args { 2656 int fd; 2657 int uid; 2658 int gid; 2659}; 2660#endif 2661int 2662sys_fchown(struct thread *td, struct fchown_args *uap) 2663{ 2664 struct file *fp; 2665 cap_rights_t rights; 2666 int error; 2667 2668 AUDIT_ARG_FD(uap->fd); 2669 AUDIT_ARG_OWNER(uap->uid, uap->gid); 2670 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 2671 if (error != 0) 2672 return (error); 2673 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 2674 fdrop(fp, td); 2675 return (error); 2676} 2677 2678/* 2679 * Common implementation code for utimes(), lutimes(), and futimes(). 2680 */ 2681static int 2682getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 2683 struct timespec *tsp) 2684{ 2685 struct timeval tv[2]; 2686 const struct timeval *tvp; 2687 int error; 2688 2689 if (usrtvp == NULL) { 2690 vfs_timestamp(&tsp[0]); 2691 tsp[1] = tsp[0]; 2692 } else { 2693 if (tvpseg == UIO_SYSSPACE) { 2694 tvp = usrtvp; 2695 } else { 2696 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 2697 return (error); 2698 tvp = tv; 2699 } 2700 2701 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 2702 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 2703 return (EINVAL); 2704 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2705 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2706 } 2707 return (0); 2708} 2709 2710/* 2711 * Common implementation code for futimens(), utimensat(). 2712 */ 2713#define UTIMENS_NULL 0x1 2714#define UTIMENS_EXIT 0x2 2715static int 2716getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 2717 struct timespec *tsp, int *retflags) 2718{ 2719 struct timespec tsnow; 2720 int error; 2721 2722 vfs_timestamp(&tsnow); 2723 *retflags = 0; 2724 if (usrtsp == NULL) { 2725 tsp[0] = tsnow; 2726 tsp[1] = tsnow; 2727 *retflags |= UTIMENS_NULL; 2728 return (0); 2729 } 2730 if (tspseg == UIO_SYSSPACE) { 2731 tsp[0] = usrtsp[0]; 2732 tsp[1] = usrtsp[1]; 2733 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 2734 return (error); 2735 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 2736 *retflags |= UTIMENS_EXIT; 2737 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 2738 *retflags |= UTIMENS_NULL; 2739 if (tsp[0].tv_nsec == UTIME_OMIT) 2740 tsp[0].tv_sec = VNOVAL; 2741 else if (tsp[0].tv_nsec == UTIME_NOW) 2742 tsp[0] = tsnow; 2743 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 2744 return (EINVAL); 2745 if (tsp[1].tv_nsec == UTIME_OMIT) 2746 tsp[1].tv_sec = VNOVAL; 2747 else if (tsp[1].tv_nsec == UTIME_NOW) 2748 tsp[1] = tsnow; 2749 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 2750 return (EINVAL); 2751 2752 return (0); 2753} 2754 2755/* 2756 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 2757 * and utimensat(). 2758 */ 2759static int 2760setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 2761 int numtimes, int nullflag) 2762{ 2763 struct mount *mp; 2764 struct vattr vattr; 2765 int error, setbirthtime; 2766 2767 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2768 return (error); 2769 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2770 setbirthtime = 0; 2771 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 2772 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 2773 setbirthtime = 1; 2774 VATTR_NULL(&vattr); 2775 vattr.va_atime = ts[0]; 2776 vattr.va_mtime = ts[1]; 2777 if (setbirthtime) 2778 vattr.va_birthtime = ts[1]; 2779 if (numtimes > 2) 2780 vattr.va_birthtime = ts[2]; 2781 if (nullflag) 2782 vattr.va_vaflags |= VA_UTIMES_NULL; 2783#ifdef MAC 2784 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 2785 vattr.va_mtime); 2786#endif 2787 if (error == 0) 2788 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2789 VOP_UNLOCK(vp, 0); 2790 vn_finished_write(mp); 2791 return (error); 2792} 2793 2794/* 2795 * Set the access and modification times of a file. 2796 */ 2797#ifndef _SYS_SYSPROTO_H_ 2798struct utimes_args { 2799 char *path; 2800 struct timeval *tptr; 2801}; 2802#endif 2803int 2804sys_utimes(struct thread *td, struct utimes_args *uap) 2805{ 2806 2807 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2808 uap->tptr, UIO_USERSPACE)); 2809} 2810 2811#ifndef _SYS_SYSPROTO_H_ 2812struct futimesat_args { 2813 int fd; 2814 const char * path; 2815 const struct timeval * times; 2816}; 2817#endif 2818int 2819sys_futimesat(struct thread *td, struct futimesat_args *uap) 2820{ 2821 2822 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 2823 uap->times, UIO_USERSPACE)); 2824} 2825 2826int 2827kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2828 struct timeval *tptr, enum uio_seg tptrseg) 2829{ 2830 struct nameidata nd; 2831 struct timespec ts[2]; 2832 cap_rights_t rights; 2833 int error; 2834 2835 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 2836 return (error); 2837 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 2838 cap_rights_init(&rights, CAP_FUTIMES), td); 2839 2840 if ((error = namei(&nd)) != 0) 2841 return (error); 2842 NDFREE(&nd, NDF_ONLY_PNBUF); 2843 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 2844 vrele(nd.ni_vp); 2845 return (error); 2846} 2847 2848/* 2849 * Set the access and modification times of a file. 2850 */ 2851#ifndef _SYS_SYSPROTO_H_ 2852struct lutimes_args { 2853 char *path; 2854 struct timeval *tptr; 2855}; 2856#endif 2857int 2858sys_lutimes(struct thread *td, struct lutimes_args *uap) 2859{ 2860 2861 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 2862 UIO_USERSPACE)); 2863} 2864 2865int 2866kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 2867 struct timeval *tptr, enum uio_seg tptrseg) 2868{ 2869 struct timespec ts[2]; 2870 struct nameidata nd; 2871 int error; 2872 2873 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 2874 return (error); 2875 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 2876 if ((error = namei(&nd)) != 0) 2877 return (error); 2878 NDFREE(&nd, NDF_ONLY_PNBUF); 2879 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 2880 vrele(nd.ni_vp); 2881 return (error); 2882} 2883 2884/* 2885 * Set the access and modification times of a file. 2886 */ 2887#ifndef _SYS_SYSPROTO_H_ 2888struct futimes_args { 2889 int fd; 2890 struct timeval *tptr; 2891}; 2892#endif 2893int 2894sys_futimes(struct thread *td, struct futimes_args *uap) 2895{ 2896 2897 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 2898} 2899 2900int 2901kern_futimes(struct thread *td, int fd, struct timeval *tptr, 2902 enum uio_seg tptrseg) 2903{ 2904 struct timespec ts[2]; 2905 struct file *fp; 2906 cap_rights_t rights; 2907 int error; 2908 2909 AUDIT_ARG_FD(fd); 2910 error = getutimes(tptr, tptrseg, ts); 2911 if (error != 0) 2912 return (error); 2913 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 2914 if (error != 0) 2915 return (error); 2916#ifdef AUDIT 2917 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2918 AUDIT_ARG_VNODE1(fp->f_vnode); 2919 VOP_UNLOCK(fp->f_vnode, 0); 2920#endif 2921 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 2922 fdrop(fp, td); 2923 return (error); 2924} 2925 2926int 2927sys_futimens(struct thread *td, struct futimens_args *uap) 2928{ 2929 2930 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 2931} 2932 2933int 2934kern_futimens(struct thread *td, int fd, struct timespec *tptr, 2935 enum uio_seg tptrseg) 2936{ 2937 struct timespec ts[2]; 2938 struct file *fp; 2939 cap_rights_t rights; 2940 int error, flags; 2941 2942 AUDIT_ARG_FD(fd); 2943 error = getutimens(tptr, tptrseg, ts, &flags); 2944 if (error != 0) 2945 return (error); 2946 if (flags & UTIMENS_EXIT) 2947 return (0); 2948 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 2949 if (error != 0) 2950 return (error); 2951#ifdef AUDIT 2952 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2953 AUDIT_ARG_VNODE1(fp->f_vnode); 2954 VOP_UNLOCK(fp->f_vnode, 0); 2955#endif 2956 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 2957 fdrop(fp, td); 2958 return (error); 2959} 2960 2961int 2962sys_utimensat(struct thread *td, struct utimensat_args *uap) 2963{ 2964 2965 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 2966 uap->times, UIO_USERSPACE, uap->flag)); 2967} 2968 2969int 2970kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2971 struct timespec *tptr, enum uio_seg tptrseg, int flag) 2972{ 2973 struct nameidata nd; 2974 struct timespec ts[2]; 2975 cap_rights_t rights; 2976 int error, flags; 2977 2978 if (flag & ~AT_SYMLINK_NOFOLLOW) 2979 return (EINVAL); 2980 2981 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 2982 return (error); 2983 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2984 FOLLOW) | AUDITVNODE1, pathseg, path, fd, 2985 cap_rights_init(&rights, CAP_FUTIMES), td); 2986 if ((error = namei(&nd)) != 0) 2987 return (error); 2988 /* 2989 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 2990 * POSIX states: 2991 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 2992 * "Search permission is denied by a component of the path prefix." 2993 */ 2994 NDFREE(&nd, NDF_ONLY_PNBUF); 2995 if ((flags & UTIMENS_EXIT) == 0) 2996 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 2997 vrele(nd.ni_vp); 2998 return (error); 2999} 3000 3001/* 3002 * Truncate a file given its path name. 3003 */ 3004#ifndef _SYS_SYSPROTO_H_ 3005struct truncate_args { 3006 char *path; 3007 int pad; 3008 off_t length; 3009}; 3010#endif 3011int 3012sys_truncate(struct thread *td, struct truncate_args *uap) 3013{ 3014 3015 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3016} 3017 3018int 3019kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3020{ 3021 struct mount *mp; 3022 struct vnode *vp; 3023 void *rl_cookie; 3024 struct vattr vattr; 3025 struct nameidata nd; 3026 int error; 3027 3028 if (length < 0) 3029 return(EINVAL); 3030 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3031 if ((error = namei(&nd)) != 0) 3032 return (error); 3033 vp = nd.ni_vp; 3034 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3035 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3036 vn_rangelock_unlock(vp, rl_cookie); 3037 vrele(vp); 3038 return (error); 3039 } 3040 NDFREE(&nd, NDF_ONLY_PNBUF); 3041 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3042 if (vp->v_type == VDIR) 3043 error = EISDIR; 3044#ifdef MAC 3045 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3046 } 3047#endif 3048 else if ((error = vn_writechk(vp)) == 0 && 3049 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3050 VATTR_NULL(&vattr); 3051 vattr.va_size = length; 3052 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3053 } 3054 VOP_UNLOCK(vp, 0); 3055 vn_finished_write(mp); 3056 vn_rangelock_unlock(vp, rl_cookie); 3057 vrele(vp); 3058 return (error); 3059} 3060 3061#if defined(COMPAT_43) 3062/* 3063 * Truncate a file given its path name. 3064 */ 3065#ifndef _SYS_SYSPROTO_H_ 3066struct otruncate_args { 3067 char *path; 3068 long length; 3069}; 3070#endif 3071int 3072otruncate(struct thread *td, struct otruncate_args *uap) 3073{ 3074 3075 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3076} 3077#endif /* COMPAT_43 */ 3078 3079#if defined(COMPAT_FREEBSD6) 3080/* Versions with the pad argument */ 3081int 3082freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3083{ 3084 3085 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3086} 3087 3088int 3089freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3090{ 3091 3092 return (kern_ftruncate(td, uap->fd, uap->length)); 3093} 3094#endif 3095 3096int 3097kern_fsync(struct thread *td, int fd, bool fullsync) 3098{ 3099 struct vnode *vp; 3100 struct mount *mp; 3101 struct file *fp; 3102 cap_rights_t rights; 3103 int error, lock_flags; 3104 3105 AUDIT_ARG_FD(fd); 3106 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSYNC), &fp); 3107 if (error != 0) 3108 return (error); 3109 vp = fp->f_vnode; 3110#if 0 3111 if (!fullsync) 3112 /* XXXKIB: compete outstanding aio writes */; 3113#endif 3114 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3115 if (error != 0) 3116 goto drop; 3117 if (MNT_SHARED_WRITES(mp) || 3118 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3119 lock_flags = LK_SHARED; 3120 } else { 3121 lock_flags = LK_EXCLUSIVE; 3122 } 3123 vn_lock(vp, lock_flags | LK_RETRY); 3124 AUDIT_ARG_VNODE1(vp); 3125 if (vp->v_object != NULL) { 3126 VM_OBJECT_WLOCK(vp->v_object); 3127 vm_object_page_clean(vp->v_object, 0, 0, 0); 3128 VM_OBJECT_WUNLOCK(vp->v_object); 3129 } 3130 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3131 VOP_UNLOCK(vp, 0); 3132 vn_finished_write(mp); 3133drop: 3134 fdrop(fp, td); 3135 return (error); 3136} 3137 3138/* 3139 * Sync an open file. 3140 */ 3141#ifndef _SYS_SYSPROTO_H_ 3142struct fsync_args { 3143 int fd; 3144}; 3145#endif 3146int 3147sys_fsync(struct thread *td, struct fsync_args *uap) 3148{ 3149 3150 return (kern_fsync(td, uap->fd, true)); 3151} 3152 3153int 3154sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3155{ 3156 3157 return (kern_fsync(td, uap->fd, false)); 3158} 3159 3160/* 3161 * Rename files. Source and destination must either both be directories, or 3162 * both not be directories. If target is a directory, it must be empty. 3163 */ 3164#ifndef _SYS_SYSPROTO_H_ 3165struct rename_args { 3166 char *from; 3167 char *to; 3168}; 3169#endif 3170int 3171sys_rename(struct thread *td, struct rename_args *uap) 3172{ 3173 3174 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3175 uap->to, UIO_USERSPACE)); 3176} 3177 3178#ifndef _SYS_SYSPROTO_H_ 3179struct renameat_args { 3180 int oldfd; 3181 char *old; 3182 int newfd; 3183 char *new; 3184}; 3185#endif 3186int 3187sys_renameat(struct thread *td, struct renameat_args *uap) 3188{ 3189 3190 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3191 UIO_USERSPACE)); 3192} 3193 3194int 3195kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3196 enum uio_seg pathseg) 3197{ 3198 struct mount *mp = NULL; 3199 struct vnode *tvp, *fvp, *tdvp; 3200 struct nameidata fromnd, tond; 3201 cap_rights_t rights; 3202 int error; 3203 3204again: 3205 bwillwrite(); 3206#ifdef MAC 3207 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3208 AUDITVNODE1, pathseg, old, oldfd, 3209 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3210#else 3211 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3212 pathseg, old, oldfd, 3213 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3214#endif 3215 3216 if ((error = namei(&fromnd)) != 0) 3217 return (error); 3218#ifdef MAC 3219 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3220 fromnd.ni_vp, &fromnd.ni_cnd); 3221 VOP_UNLOCK(fromnd.ni_dvp, 0); 3222 if (fromnd.ni_dvp != fromnd.ni_vp) 3223 VOP_UNLOCK(fromnd.ni_vp, 0); 3224#endif 3225 fvp = fromnd.ni_vp; 3226 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3227 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3228 cap_rights_init(&rights, CAP_RENAMEAT_TARGET), td); 3229 if (fromnd.ni_vp->v_type == VDIR) 3230 tond.ni_cnd.cn_flags |= WILLBEDIR; 3231 if ((error = namei(&tond)) != 0) { 3232 /* Translate error code for rename("dir1", "dir2/."). */ 3233 if (error == EISDIR && fvp->v_type == VDIR) 3234 error = EINVAL; 3235 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3236 vrele(fromnd.ni_dvp); 3237 vrele(fvp); 3238 goto out1; 3239 } 3240 tdvp = tond.ni_dvp; 3241 tvp = tond.ni_vp; 3242 error = vn_start_write(fvp, &mp, V_NOWAIT); 3243 if (error != 0) { 3244 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3245 NDFREE(&tond, NDF_ONLY_PNBUF); 3246 if (tvp != NULL) 3247 vput(tvp); 3248 if (tdvp == tvp) 3249 vrele(tdvp); 3250 else 3251 vput(tdvp); 3252 vrele(fromnd.ni_dvp); 3253 vrele(fvp); 3254 vrele(tond.ni_startdir); 3255 if (fromnd.ni_startdir != NULL) 3256 vrele(fromnd.ni_startdir); 3257 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3258 if (error != 0) 3259 return (error); 3260 goto again; 3261 } 3262 if (tvp != NULL) { 3263 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3264 error = ENOTDIR; 3265 goto out; 3266 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3267 error = EISDIR; 3268 goto out; 3269 } 3270#ifdef CAPABILITIES 3271 if (newfd != AT_FDCWD) { 3272 /* 3273 * If the target already exists we require CAP_UNLINKAT 3274 * from 'newfd'. 3275 */ 3276 error = cap_check(&tond.ni_filecaps.fc_rights, 3277 cap_rights_init(&rights, CAP_UNLINKAT)); 3278 if (error != 0) 3279 goto out; 3280 } 3281#endif 3282 } 3283 if (fvp == tdvp) { 3284 error = EINVAL; 3285 goto out; 3286 } 3287 /* 3288 * If the source is the same as the destination (that is, if they 3289 * are links to the same vnode), then there is nothing to do. 3290 */ 3291 if (fvp == tvp) 3292 error = -1; 3293#ifdef MAC 3294 else 3295 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3296 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3297#endif 3298out: 3299 if (error == 0) { 3300 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3301 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3302 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3303 NDFREE(&tond, NDF_ONLY_PNBUF); 3304 } else { 3305 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3306 NDFREE(&tond, NDF_ONLY_PNBUF); 3307 if (tvp != NULL) 3308 vput(tvp); 3309 if (tdvp == tvp) 3310 vrele(tdvp); 3311 else 3312 vput(tdvp); 3313 vrele(fromnd.ni_dvp); 3314 vrele(fvp); 3315 } 3316 vrele(tond.ni_startdir); 3317 vn_finished_write(mp); 3318out1: 3319 if (fromnd.ni_startdir) 3320 vrele(fromnd.ni_startdir); 3321 if (error == -1) 3322 return (0); 3323 return (error); 3324} 3325 3326/* 3327 * Make a directory file. 3328 */ 3329#ifndef _SYS_SYSPROTO_H_ 3330struct mkdir_args { 3331 char *path; 3332 int mode; 3333}; 3334#endif 3335int 3336sys_mkdir(struct thread *td, struct mkdir_args *uap) 3337{ 3338 3339 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3340 uap->mode)); 3341} 3342 3343#ifndef _SYS_SYSPROTO_H_ 3344struct mkdirat_args { 3345 int fd; 3346 char *path; 3347 mode_t mode; 3348}; 3349#endif 3350int 3351sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3352{ 3353 3354 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3355} 3356 3357int 3358kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3359 int mode) 3360{ 3361 struct mount *mp; 3362 struct vnode *vp; 3363 struct vattr vattr; 3364 struct nameidata nd; 3365 cap_rights_t rights; 3366 int error; 3367 3368 AUDIT_ARG_MODE(mode); 3369restart: 3370 bwillwrite(); 3371 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3372 NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), 3373 td); 3374 nd.ni_cnd.cn_flags |= WILLBEDIR; 3375 if ((error = namei(&nd)) != 0) 3376 return (error); 3377 vp = nd.ni_vp; 3378 if (vp != NULL) { 3379 NDFREE(&nd, NDF_ONLY_PNBUF); 3380 /* 3381 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3382 * the strange behaviour of leaving the vnode unlocked 3383 * if the target is the same vnode as the parent. 3384 */ 3385 if (vp == nd.ni_dvp) 3386 vrele(nd.ni_dvp); 3387 else 3388 vput(nd.ni_dvp); 3389 vrele(vp); 3390 return (EEXIST); 3391 } 3392 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3393 NDFREE(&nd, NDF_ONLY_PNBUF); 3394 vput(nd.ni_dvp); 3395 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3396 return (error); 3397 goto restart; 3398 } 3399 VATTR_NULL(&vattr); 3400 vattr.va_type = VDIR; 3401 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3402#ifdef MAC 3403 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3404 &vattr); 3405 if (error != 0) 3406 goto out; 3407#endif 3408 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3409#ifdef MAC 3410out: 3411#endif 3412 NDFREE(&nd, NDF_ONLY_PNBUF); 3413 vput(nd.ni_dvp); 3414 if (error == 0) 3415 vput(nd.ni_vp); 3416 vn_finished_write(mp); 3417 return (error); 3418} 3419 3420/* 3421 * Remove a directory file. 3422 */ 3423#ifndef _SYS_SYSPROTO_H_ 3424struct rmdir_args { 3425 char *path; 3426}; 3427#endif 3428int 3429sys_rmdir(struct thread *td, struct rmdir_args *uap) 3430{ 3431 3432 return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE)); 3433} 3434 3435int 3436kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3437{ 3438 struct mount *mp; 3439 struct vnode *vp; 3440 struct nameidata nd; 3441 cap_rights_t rights; 3442 int error; 3443 3444restart: 3445 bwillwrite(); 3446 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3447 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3448 if ((error = namei(&nd)) != 0) 3449 return (error); 3450 vp = nd.ni_vp; 3451 if (vp->v_type != VDIR) { 3452 error = ENOTDIR; 3453 goto out; 3454 } 3455 /* 3456 * No rmdir "." please. 3457 */ 3458 if (nd.ni_dvp == vp) { 3459 error = EINVAL; 3460 goto out; 3461 } 3462 /* 3463 * The root of a mounted filesystem cannot be deleted. 3464 */ 3465 if (vp->v_vflag & VV_ROOT) { 3466 error = EBUSY; 3467 goto out; 3468 } 3469#ifdef MAC 3470 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3471 &nd.ni_cnd); 3472 if (error != 0) 3473 goto out; 3474#endif 3475 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3476 NDFREE(&nd, NDF_ONLY_PNBUF); 3477 vput(vp); 3478 if (nd.ni_dvp == vp) 3479 vrele(nd.ni_dvp); 3480 else 3481 vput(nd.ni_dvp); 3482 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3483 return (error); 3484 goto restart; 3485 } 3486 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3487 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3488 vn_finished_write(mp); 3489out: 3490 NDFREE(&nd, NDF_ONLY_PNBUF); 3491 vput(vp); 3492 if (nd.ni_dvp == vp) 3493 vrele(nd.ni_dvp); 3494 else 3495 vput(nd.ni_dvp); 3496 return (error); 3497} 3498 3499#ifdef COMPAT_43 3500/* 3501 * Read a block of directory entries in a filesystem independent format. 3502 */ 3503#ifndef _SYS_SYSPROTO_H_ 3504struct ogetdirentries_args { 3505 int fd; 3506 char *buf; 3507 u_int count; 3508 long *basep; 3509}; 3510#endif 3511int 3512ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3513{ 3514 long loff; 3515 int error; 3516 3517 error = kern_ogetdirentries(td, uap, &loff); 3518 if (error == 0) 3519 error = copyout(&loff, uap->basep, sizeof(long)); 3520 return (error); 3521} 3522 3523int 3524kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3525 long *ploff) 3526{ 3527 struct vnode *vp; 3528 struct file *fp; 3529 struct uio auio, kuio; 3530 struct iovec aiov, kiov; 3531 struct dirent *dp, *edp; 3532 cap_rights_t rights; 3533 caddr_t dirbuf; 3534 int error, eofflag, readcnt; 3535 long loff; 3536 off_t foffset; 3537 3538 /* XXX arbitrary sanity limit on `count'. */ 3539 if (uap->count > 64 * 1024) 3540 return (EINVAL); 3541 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); 3542 if (error != 0) 3543 return (error); 3544 if ((fp->f_flag & FREAD) == 0) { 3545 fdrop(fp, td); 3546 return (EBADF); 3547 } 3548 vp = fp->f_vnode; 3549 foffset = foffset_lock(fp, 0); 3550unionread: 3551 if (vp->v_type != VDIR) { 3552 foffset_unlock(fp, foffset, 0); 3553 fdrop(fp, td); 3554 return (EINVAL); 3555 } 3556 aiov.iov_base = uap->buf; 3557 aiov.iov_len = uap->count; 3558 auio.uio_iov = &aiov; 3559 auio.uio_iovcnt = 1; 3560 auio.uio_rw = UIO_READ; 3561 auio.uio_segflg = UIO_USERSPACE; 3562 auio.uio_td = td; 3563 auio.uio_resid = uap->count; 3564 vn_lock(vp, LK_SHARED | LK_RETRY); 3565 loff = auio.uio_offset = foffset; 3566#ifdef MAC 3567 error = mac_vnode_check_readdir(td->td_ucred, vp); 3568 if (error != 0) { 3569 VOP_UNLOCK(vp, 0); 3570 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3571 fdrop(fp, td); 3572 return (error); 3573 } 3574#endif 3575# if (BYTE_ORDER != LITTLE_ENDIAN) 3576 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3577 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3578 NULL, NULL); 3579 foffset = auio.uio_offset; 3580 } else 3581# endif 3582 { 3583 kuio = auio; 3584 kuio.uio_iov = &kiov; 3585 kuio.uio_segflg = UIO_SYSSPACE; 3586 kiov.iov_len = uap->count; 3587 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3588 kiov.iov_base = dirbuf; 3589 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3590 NULL, NULL); 3591 foffset = kuio.uio_offset; 3592 if (error == 0) { 3593 readcnt = uap->count - kuio.uio_resid; 3594 edp = (struct dirent *)&dirbuf[readcnt]; 3595 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3596# if (BYTE_ORDER == LITTLE_ENDIAN) 3597 /* 3598 * The expected low byte of 3599 * dp->d_namlen is our dp->d_type. 3600 * The high MBZ byte of dp->d_namlen 3601 * is our dp->d_namlen. 3602 */ 3603 dp->d_type = dp->d_namlen; 3604 dp->d_namlen = 0; 3605# else 3606 /* 3607 * The dp->d_type is the high byte 3608 * of the expected dp->d_namlen, 3609 * so must be zero'ed. 3610 */ 3611 dp->d_type = 0; 3612# endif 3613 if (dp->d_reclen > 0) { 3614 dp = (struct dirent *) 3615 ((char *)dp + dp->d_reclen); 3616 } else { 3617 error = EIO; 3618 break; 3619 } 3620 } 3621 if (dp >= edp) 3622 error = uiomove(dirbuf, readcnt, &auio); 3623 } 3624 free(dirbuf, M_TEMP); 3625 } 3626 if (error != 0) { 3627 VOP_UNLOCK(vp, 0); 3628 foffset_unlock(fp, foffset, 0); 3629 fdrop(fp, td); 3630 return (error); 3631 } 3632 if (uap->count == auio.uio_resid && 3633 (vp->v_vflag & VV_ROOT) && 3634 (vp->v_mount->mnt_flag & MNT_UNION)) { 3635 struct vnode *tvp = vp; 3636 vp = vp->v_mount->mnt_vnodecovered; 3637 VREF(vp); 3638 fp->f_vnode = vp; 3639 fp->f_data = vp; 3640 foffset = 0; 3641 vput(tvp); 3642 goto unionread; 3643 } 3644 VOP_UNLOCK(vp, 0); 3645 foffset_unlock(fp, foffset, 0); 3646 fdrop(fp, td); 3647 td->td_retval[0] = uap->count - auio.uio_resid; 3648 if (error == 0) 3649 *ploff = loff; 3650 return (error); 3651} 3652#endif /* COMPAT_43 */ 3653 3654/* 3655 * Read a block of directory entries in a filesystem independent format. 3656 */ 3657#ifndef _SYS_SYSPROTO_H_ 3658struct getdirentries_args { 3659 int fd; 3660 char *buf; 3661 u_int count; 3662 long *basep; 3663}; 3664#endif 3665int 3666sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 3667{ 3668 long base; 3669 int error; 3670 3671 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3672 NULL, UIO_USERSPACE); 3673 if (error != 0) 3674 return (error); 3675 if (uap->basep != NULL) 3676 error = copyout(&base, uap->basep, sizeof(long)); 3677 return (error); 3678} 3679 3680int 3681kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 3682 long *basep, ssize_t *residp, enum uio_seg bufseg) 3683{ 3684 struct vnode *vp; 3685 struct file *fp; 3686 struct uio auio; 3687 struct iovec aiov; 3688 cap_rights_t rights; 3689 long loff; 3690 int error, eofflag; 3691 off_t foffset; 3692 3693 AUDIT_ARG_FD(fd); 3694 if (count > IOSIZE_MAX) 3695 return (EINVAL); 3696 auio.uio_resid = count; 3697 error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp); 3698 if (error != 0) 3699 return (error); 3700 if ((fp->f_flag & FREAD) == 0) { 3701 fdrop(fp, td); 3702 return (EBADF); 3703 } 3704 vp = fp->f_vnode; 3705 foffset = foffset_lock(fp, 0); 3706unionread: 3707 if (vp->v_type != VDIR) { 3708 error = EINVAL; 3709 goto fail; 3710 } 3711 aiov.iov_base = buf; 3712 aiov.iov_len = count; 3713 auio.uio_iov = &aiov; 3714 auio.uio_iovcnt = 1; 3715 auio.uio_rw = UIO_READ; 3716 auio.uio_segflg = bufseg; 3717 auio.uio_td = td; 3718 vn_lock(vp, LK_SHARED | LK_RETRY); 3719 AUDIT_ARG_VNODE1(vp); 3720 loff = auio.uio_offset = foffset; 3721#ifdef MAC 3722 error = mac_vnode_check_readdir(td->td_ucred, vp); 3723 if (error == 0) 3724#endif 3725 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 3726 NULL); 3727 foffset = auio.uio_offset; 3728 if (error != 0) { 3729 VOP_UNLOCK(vp, 0); 3730 goto fail; 3731 } 3732 if (count == auio.uio_resid && 3733 (vp->v_vflag & VV_ROOT) && 3734 (vp->v_mount->mnt_flag & MNT_UNION)) { 3735 struct vnode *tvp = vp; 3736 3737 vp = vp->v_mount->mnt_vnodecovered; 3738 VREF(vp); 3739 fp->f_vnode = vp; 3740 fp->f_data = vp; 3741 foffset = 0; 3742 vput(tvp); 3743 goto unionread; 3744 } 3745 VOP_UNLOCK(vp, 0); 3746 *basep = loff; 3747 if (residp != NULL) 3748 *residp = auio.uio_resid; 3749 td->td_retval[0] = count - auio.uio_resid; 3750fail: 3751 foffset_unlock(fp, foffset, 0); 3752 fdrop(fp, td); 3753 return (error); 3754} 3755 3756#ifndef _SYS_SYSPROTO_H_ 3757struct getdents_args { 3758 int fd; 3759 char *buf; 3760 size_t count; 3761}; 3762#endif 3763int 3764sys_getdents(struct thread *td, struct getdents_args *uap) 3765{ 3766 struct getdirentries_args ap; 3767 3768 ap.fd = uap->fd; 3769 ap.buf = uap->buf; 3770 ap.count = uap->count; 3771 ap.basep = NULL; 3772 return (sys_getdirentries(td, &ap)); 3773} 3774 3775/* 3776 * Set the mode mask for creation of filesystem nodes. 3777 */ 3778#ifndef _SYS_SYSPROTO_H_ 3779struct umask_args { 3780 int newmask; 3781}; 3782#endif 3783int 3784sys_umask(struct thread *td, struct umask_args *uap) 3785{ 3786 struct filedesc *fdp; 3787 3788 fdp = td->td_proc->p_fd; 3789 FILEDESC_XLOCK(fdp); 3790 td->td_retval[0] = fdp->fd_cmask; 3791 fdp->fd_cmask = uap->newmask & ALLPERMS; 3792 FILEDESC_XUNLOCK(fdp); 3793 return (0); 3794} 3795 3796/* 3797 * Void all references to file by ripping underlying filesystem away from 3798 * vnode. 3799 */ 3800#ifndef _SYS_SYSPROTO_H_ 3801struct revoke_args { 3802 char *path; 3803}; 3804#endif 3805int 3806sys_revoke(struct thread *td, struct revoke_args *uap) 3807{ 3808 struct vnode *vp; 3809 struct vattr vattr; 3810 struct nameidata nd; 3811 int error; 3812 3813 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 3814 uap->path, td); 3815 if ((error = namei(&nd)) != 0) 3816 return (error); 3817 vp = nd.ni_vp; 3818 NDFREE(&nd, NDF_ONLY_PNBUF); 3819 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 3820 error = EINVAL; 3821 goto out; 3822 } 3823#ifdef MAC 3824 error = mac_vnode_check_revoke(td->td_ucred, vp); 3825 if (error != 0) 3826 goto out; 3827#endif 3828 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 3829 if (error != 0) 3830 goto out; 3831 if (td->td_ucred->cr_uid != vattr.va_uid) { 3832 error = priv_check(td, PRIV_VFS_ADMIN); 3833 if (error != 0) 3834 goto out; 3835 } 3836 if (vcount(vp) > 1) 3837 VOP_REVOKE(vp, REVOKEALL); 3838out: 3839 vput(vp); 3840 return (error); 3841} 3842 3843/* 3844 * Convert a user file descriptor to a kernel file entry and check that, if it 3845 * is a capability, the correct rights are present. A reference on the file 3846 * entry is held upon returning. 3847 */ 3848int 3849getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 3850{ 3851 struct file *fp; 3852 int error; 3853 3854 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL); 3855 if (error != 0) 3856 return (error); 3857 3858 /* 3859 * The file could be not of the vnode type, or it may be not 3860 * yet fully initialized, in which case the f_vnode pointer 3861 * may be set, but f_ops is still badfileops. E.g., 3862 * devfs_open() transiently create such situation to 3863 * facilitate csw d_fdopen(). 3864 * 3865 * Dupfdopen() handling in kern_openat() installs the 3866 * half-baked file into the process descriptor table, allowing 3867 * other thread to dereference it. Guard against the race by 3868 * checking f_ops. 3869 */ 3870 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 3871 fdrop(fp, td); 3872 return (EINVAL); 3873 } 3874 *fpp = fp; 3875 return (0); 3876} 3877 3878 3879/* 3880 * Get an (NFS) file handle. 3881 */ 3882#ifndef _SYS_SYSPROTO_H_ 3883struct lgetfh_args { 3884 char *fname; 3885 fhandle_t *fhp; 3886}; 3887#endif 3888int 3889sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 3890{ 3891 struct nameidata nd; 3892 fhandle_t fh; 3893 struct vnode *vp; 3894 int error; 3895 3896 error = priv_check(td, PRIV_VFS_GETFH); 3897 if (error != 0) 3898 return (error); 3899 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 3900 uap->fname, td); 3901 error = namei(&nd); 3902 if (error != 0) 3903 return (error); 3904 NDFREE(&nd, NDF_ONLY_PNBUF); 3905 vp = nd.ni_vp; 3906 bzero(&fh, sizeof(fh)); 3907 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 3908 error = VOP_VPTOFH(vp, &fh.fh_fid); 3909 vput(vp); 3910 if (error == 0) 3911 error = copyout(&fh, uap->fhp, sizeof (fh)); 3912 return (error); 3913} 3914 3915#ifndef _SYS_SYSPROTO_H_ 3916struct getfh_args { 3917 char *fname; 3918 fhandle_t *fhp; 3919}; 3920#endif 3921int 3922sys_getfh(struct thread *td, struct getfh_args *uap) 3923{ 3924 struct nameidata nd; 3925 fhandle_t fh; 3926 struct vnode *vp; 3927 int error; 3928 3929 error = priv_check(td, PRIV_VFS_GETFH); 3930 if (error != 0) 3931 return (error); 3932 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 3933 uap->fname, td); 3934 error = namei(&nd); 3935 if (error != 0) 3936 return (error); 3937 NDFREE(&nd, NDF_ONLY_PNBUF); 3938 vp = nd.ni_vp; 3939 bzero(&fh, sizeof(fh)); 3940 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 3941 error = VOP_VPTOFH(vp, &fh.fh_fid); 3942 vput(vp); 3943 if (error == 0) 3944 error = copyout(&fh, uap->fhp, sizeof (fh)); 3945 return (error); 3946} 3947 3948/* 3949 * syscall for the rpc.lockd to use to translate a NFS file handle into an 3950 * open descriptor. 3951 * 3952 * warning: do not remove the priv_check() call or this becomes one giant 3953 * security hole. 3954 */ 3955#ifndef _SYS_SYSPROTO_H_ 3956struct fhopen_args { 3957 const struct fhandle *u_fhp; 3958 int flags; 3959}; 3960#endif 3961int 3962sys_fhopen(struct thread *td, struct fhopen_args *uap) 3963{ 3964 struct mount *mp; 3965 struct vnode *vp; 3966 struct fhandle fhp; 3967 struct file *fp; 3968 int fmode, error; 3969 int indx; 3970 3971 error = priv_check(td, PRIV_VFS_FHOPEN); 3972 if (error != 0) 3973 return (error); 3974 indx = -1; 3975 fmode = FFLAGS(uap->flags); 3976 /* why not allow a non-read/write open for our lockd? */ 3977 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 3978 return (EINVAL); 3979 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 3980 if (error != 0) 3981 return(error); 3982 /* find the mount point */ 3983 mp = vfs_busyfs(&fhp.fh_fsid); 3984 if (mp == NULL) 3985 return (ESTALE); 3986 /* now give me my vnode, it gets returned to me locked */ 3987 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 3988 vfs_unbusy(mp); 3989 if (error != 0) 3990 return (error); 3991 3992 error = falloc_noinstall(td, &fp); 3993 if (error != 0) { 3994 vput(vp); 3995 return (error); 3996 } 3997 /* 3998 * An extra reference on `fp' has been held for us by 3999 * falloc_noinstall(). 4000 */ 4001 4002#ifdef INVARIANTS 4003 td->td_dupfd = -1; 4004#endif 4005 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4006 if (error != 0) { 4007 KASSERT(fp->f_ops == &badfileops, 4008 ("VOP_OPEN in fhopen() set f_ops")); 4009 KASSERT(td->td_dupfd < 0, 4010 ("fhopen() encountered fdopen()")); 4011 4012 vput(vp); 4013 goto bad; 4014 } 4015#ifdef INVARIANTS 4016 td->td_dupfd = 0; 4017#endif 4018 fp->f_vnode = vp; 4019 fp->f_seqcount = 1; 4020 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4021 &vnops); 4022 VOP_UNLOCK(vp, 0); 4023 if ((fmode & O_TRUNC) != 0) { 4024 error = fo_truncate(fp, 0, td->td_ucred, td); 4025 if (error != 0) 4026 goto bad; 4027 } 4028 4029 error = finstall(td, fp, &indx, fmode, NULL); 4030bad: 4031 fdrop(fp, td); 4032 td->td_retval[0] = indx; 4033 return (error); 4034} 4035 4036/* 4037 * Stat an (NFS) file handle. 4038 */ 4039#ifndef _SYS_SYSPROTO_H_ 4040struct fhstat_args { 4041 struct fhandle *u_fhp; 4042 struct stat *sb; 4043}; 4044#endif 4045int 4046sys_fhstat(struct thread *td, struct fhstat_args *uap) 4047{ 4048 struct stat sb; 4049 struct fhandle fh; 4050 int error; 4051 4052 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4053 if (error != 0) 4054 return (error); 4055 error = kern_fhstat(td, fh, &sb); 4056 if (error == 0) 4057 error = copyout(&sb, uap->sb, sizeof(sb)); 4058 return (error); 4059} 4060 4061int 4062kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4063{ 4064 struct mount *mp; 4065 struct vnode *vp; 4066 int error; 4067 4068 error = priv_check(td, PRIV_VFS_FHSTAT); 4069 if (error != 0) 4070 return (error); 4071 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4072 return (ESTALE); 4073 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4074 vfs_unbusy(mp); 4075 if (error != 0) 4076 return (error); 4077 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4078 vput(vp); 4079 return (error); 4080} 4081 4082/* 4083 * Implement fstatfs() for (NFS) file handles. 4084 */ 4085#ifndef _SYS_SYSPROTO_H_ 4086struct fhstatfs_args { 4087 struct fhandle *u_fhp; 4088 struct statfs *buf; 4089}; 4090#endif 4091int 4092sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4093{ 4094 struct statfs *sfp; 4095 fhandle_t fh; 4096 int error; 4097 4098 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4099 if (error != 0) 4100 return (error); 4101 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4102 error = kern_fhstatfs(td, fh, sfp); 4103 if (error == 0) 4104 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4105 free(sfp, M_STATFS); 4106 return (error); 4107} 4108 4109int 4110kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4111{ 4112 struct statfs *sp; 4113 struct mount *mp; 4114 struct vnode *vp; 4115 int error; 4116 4117 error = priv_check(td, PRIV_VFS_FHSTATFS); 4118 if (error != 0) 4119 return (error); 4120 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4121 return (ESTALE); 4122 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4123 if (error != 0) { 4124 vfs_unbusy(mp); 4125 return (error); 4126 } 4127 vput(vp); 4128 error = prison_canseemount(td->td_ucred, mp); 4129 if (error != 0) 4130 goto out; 4131#ifdef MAC 4132 error = mac_mount_check_stat(td->td_ucred, mp); 4133 if (error != 0) 4134 goto out; 4135#endif 4136 /* 4137 * Set these in case the underlying filesystem fails to do so. 4138 */ 4139 sp = &mp->mnt_stat; 4140 sp->f_version = STATFS_VERSION; 4141 sp->f_namemax = NAME_MAX; 4142 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4143 error = VFS_STATFS(mp, sp); 4144 if (error == 0) 4145 *buf = *sp; 4146out: 4147 vfs_unbusy(mp); 4148 return (error); 4149} 4150 4151int 4152kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4153{ 4154 struct file *fp; 4155 struct mount *mp; 4156 struct vnode *vp; 4157 cap_rights_t rights; 4158 off_t olen, ooffset; 4159 int error; 4160 4161 if (offset < 0 || len <= 0) 4162 return (EINVAL); 4163 /* Check for wrap. */ 4164 if (offset > OFF_MAX - len) 4165 return (EFBIG); 4166 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4167 if (error != 0) 4168 return (error); 4169 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4170 error = ESPIPE; 4171 goto out; 4172 } 4173 if ((fp->f_flag & FWRITE) == 0) { 4174 error = EBADF; 4175 goto out; 4176 } 4177 if (fp->f_type != DTYPE_VNODE) { 4178 error = ENODEV; 4179 goto out; 4180 } 4181 vp = fp->f_vnode; 4182 if (vp->v_type != VREG) { 4183 error = ENODEV; 4184 goto out; 4185 } 4186 4187 /* Allocating blocks may take a long time, so iterate. */ 4188 for (;;) { 4189 olen = len; 4190 ooffset = offset; 4191 4192 bwillwrite(); 4193 mp = NULL; 4194 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4195 if (error != 0) 4196 break; 4197 error = vn_lock(vp, LK_EXCLUSIVE); 4198 if (error != 0) { 4199 vn_finished_write(mp); 4200 break; 4201 } 4202#ifdef MAC 4203 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4204 if (error == 0) 4205#endif 4206 error = VOP_ALLOCATE(vp, &offset, &len); 4207 VOP_UNLOCK(vp, 0); 4208 vn_finished_write(mp); 4209 4210 if (olen + ooffset != offset + len) { 4211 panic("offset + len changed from %jx/%jx to %jx/%jx", 4212 ooffset, olen, offset, len); 4213 } 4214 if (error != 0 || len == 0) 4215 break; 4216 KASSERT(olen > len, ("Iteration did not make progress?")); 4217 maybe_yield(); 4218 } 4219 out: 4220 fdrop(fp, td); 4221 return (error); 4222} 4223 4224int 4225sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4226{ 4227 int error; 4228 4229 error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len); 4230 return (kern_posix_error(td, error)); 4231} 4232 4233/* 4234 * Unlike madvise(2), we do not make a best effort to remember every 4235 * possible caching hint. Instead, we remember the last setting with 4236 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4237 * region of any current setting. 4238 */ 4239int 4240kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4241 int advice) 4242{ 4243 struct fadvise_info *fa, *new; 4244 struct file *fp; 4245 struct vnode *vp; 4246 cap_rights_t rights; 4247 off_t end; 4248 int error; 4249 4250 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4251 return (EINVAL); 4252 switch (advice) { 4253 case POSIX_FADV_SEQUENTIAL: 4254 case POSIX_FADV_RANDOM: 4255 case POSIX_FADV_NOREUSE: 4256 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4257 break; 4258 case POSIX_FADV_NORMAL: 4259 case POSIX_FADV_WILLNEED: 4260 case POSIX_FADV_DONTNEED: 4261 new = NULL; 4262 break; 4263 default: 4264 return (EINVAL); 4265 } 4266 /* XXX: CAP_POSIX_FADVISE? */ 4267 error = fget(td, fd, cap_rights_init(&rights), &fp); 4268 if (error != 0) 4269 goto out; 4270 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4271 error = ESPIPE; 4272 goto out; 4273 } 4274 if (fp->f_type != DTYPE_VNODE) { 4275 error = ENODEV; 4276 goto out; 4277 } 4278 vp = fp->f_vnode; 4279 if (vp->v_type != VREG) { 4280 error = ENODEV; 4281 goto out; 4282 } 4283 if (len == 0) 4284 end = OFF_MAX; 4285 else 4286 end = offset + len - 1; 4287 switch (advice) { 4288 case POSIX_FADV_SEQUENTIAL: 4289 case POSIX_FADV_RANDOM: 4290 case POSIX_FADV_NOREUSE: 4291 /* 4292 * Try to merge any existing non-standard region with 4293 * this new region if possible, otherwise create a new 4294 * non-standard region for this request. 4295 */ 4296 mtx_pool_lock(mtxpool_sleep, fp); 4297 fa = fp->f_advice; 4298 if (fa != NULL && fa->fa_advice == advice && 4299 ((fa->fa_start <= end && fa->fa_end >= offset) || 4300 (end != OFF_MAX && fa->fa_start == end + 1) || 4301 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4302 if (offset < fa->fa_start) 4303 fa->fa_start = offset; 4304 if (end > fa->fa_end) 4305 fa->fa_end = end; 4306 } else { 4307 new->fa_advice = advice; 4308 new->fa_start = offset; 4309 new->fa_end = end; 4310 fp->f_advice = new; 4311 new = fa; 4312 } 4313 mtx_pool_unlock(mtxpool_sleep, fp); 4314 break; 4315 case POSIX_FADV_NORMAL: 4316 /* 4317 * If a the "normal" region overlaps with an existing 4318 * non-standard region, trim or remove the 4319 * non-standard region. 4320 */ 4321 mtx_pool_lock(mtxpool_sleep, fp); 4322 fa = fp->f_advice; 4323 if (fa != NULL) { 4324 if (offset <= fa->fa_start && end >= fa->fa_end) { 4325 new = fa; 4326 fp->f_advice = NULL; 4327 } else if (offset <= fa->fa_start && 4328 end >= fa->fa_start) 4329 fa->fa_start = end + 1; 4330 else if (offset <= fa->fa_end && end >= fa->fa_end) 4331 fa->fa_end = offset - 1; 4332 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4333 /* 4334 * If the "normal" region is a middle 4335 * portion of the existing 4336 * non-standard region, just remove 4337 * the whole thing rather than picking 4338 * one side or the other to 4339 * preserve. 4340 */ 4341 new = fa; 4342 fp->f_advice = NULL; 4343 } 4344 } 4345 mtx_pool_unlock(mtxpool_sleep, fp); 4346 break; 4347 case POSIX_FADV_WILLNEED: 4348 case POSIX_FADV_DONTNEED: 4349 error = VOP_ADVISE(vp, offset, end, advice); 4350 break; 4351 } 4352out: 4353 if (fp != NULL) 4354 fdrop(fp, td); 4355 free(new, M_FADVISE); 4356 return (error); 4357} 4358 4359int 4360sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4361{ 4362 int error; 4363 4364 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4365 uap->advice); 4366 return (kern_posix_error(td, error)); 4367} 4368