vfs_syscalls.c revision 269283
155714Skris/*- 255714Skris * Copyright (c) 1989, 1993 355714Skris * The Regents of the University of California. All rights reserved. 455714Skris * (c) UNIX System Laboratories, Inc. 555714Skris * All or some portions of this file are derived from material licensed 655714Skris * to the University of California by American Telephone and Telegraph 755714Skris * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8280304Sjkim * the permission of UNIX System Laboratories, Inc. 955714Skris * 1055714Skris * Redistribution and use in source and binary forms, with or without 1155714Skris * modification, are permitted provided that the following conditions 1255714Skris * are met: 1355714Skris * 1. Redistributions of source code must retain the above copyright 1455714Skris * notice, this list of conditions and the following disclaimer. 15280304Sjkim * 2. Redistributions in binary form must reproduce the above copyright 1655714Skris * notice, this list of conditions and the following disclaimer in the 1755714Skris * documentation and/or other materials provided with the distribution. 1855714Skris * 4. Neither the name of the University nor the names of its contributors 1955714Skris * may be used to endorse or promote products derived from this software 2055714Skris * without specific prior written permission. 2155714Skris * 22280304Sjkim * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2355714Skris * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2455714Skris * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2555714Skris * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2655714Skris * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2755714Skris * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2855714Skris * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2955714Skris * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3055714Skris * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3155714Skris * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3255714Skris * SUCH DAMAGE. 3355714Skris * 3455714Skris * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 3555714Skris */ 3655714Skris 37280304Sjkim#include <sys/cdefs.h> 3855714Skris__FBSDID("$FreeBSD: stable/10/sys/kern/vfs_syscalls.c 269283 2014-07-30 03:56:17Z kib $"); 3955714Skris 40280304Sjkim#include "opt_capsicum.h" 4155714Skris#include "opt_compat.h" 4255714Skris#include "opt_kdtrace.h" 4355714Skris#include "opt_ktrace.h" 4455714Skris 4555714Skris#include <sys/param.h> 4655714Skris#include <sys/systm.h> 4755714Skris#include <sys/bio.h> 4855714Skris#include <sys/buf.h> 4955714Skris#include <sys/capability.h> 5055714Skris#include <sys/disk.h> 5155714Skris#include <sys/sysent.h> 52280304Sjkim#include <sys/malloc.h> 5355714Skris#include <sys/mount.h> 5455714Skris#include <sys/mutex.h> 5555714Skris#include <sys/sysproto.h> 5655714Skris#include <sys/namei.h> 5755714Skris#include <sys/filedesc.h> 5855714Skris#include <sys/kernel.h> 5955714Skris#include <sys/fcntl.h> 6055714Skris#include <sys/file.h> 6155714Skris#include <sys/filio.h> 6255714Skris#include <sys/limits.h> 6355714Skris#include <sys/linker.h> 6455714Skris#include <sys/rwlock.h> 6555714Skris#include <sys/sdt.h> 6655714Skris#include <sys/stat.h> 6755714Skris#include <sys/sx.h> 6855714Skris#include <sys/unistd.h> 69280304Sjkim#include <sys/vnode.h> 7055714Skris#include <sys/priv.h> 7159191Skris#include <sys/proc.h> 7259191Skris#include <sys/dirent.h> 7355714Skris#include <sys/jail.h> 74280304Sjkim#include <sys/syscallsubr.h> 75280304Sjkim#include <sys/sysctl.h> 76280304Sjkim#ifdef KTRACE 77280304Sjkim#include <sys/ktrace.h> 7855714Skris#endif 79280304Sjkim 8055714Skris#include <machine/stdarg.h> 81280304Sjkim 82280304Sjkim#include <security/audit/audit.h> 83280304Sjkim#include <security/mac/mac_framework.h> 8455714Skris 85280304Sjkim#include <vm/vm.h> 8655714Skris#include <vm/vm_object.h> 87280304Sjkim#include <vm/vm_page.h> 88280304Sjkim#include <vm/uma.h> 8955714Skris 90280304Sjkim#include <ufs/ufs/quota.h> 91280304Sjkim 92109998SmarkmMALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 93280304Sjkim 94280304SjkimSDT_PROVIDER_DEFINE(vfs); 95280304SjkimSDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 96280304SjkimSDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 9768651Skris 98280304Sjkimstatic int chroot_refuse_vdir_fds(struct filedesc *fdp); 99280304Sjkimstatic int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 100280304Sjkimstatic int kern_chflags(struct thread *td, const char *path, 101280304Sjkim enum uio_seg pathseg, u_long flags); 102280304Sjkimstatic int kern_chflagsat(struct thread *td, int fd, const char *path, 103280304Sjkim enum uio_seg pathseg, u_long flags, int atflag); 104280304Sjkimstatic int setfflags(struct thread *td, struct vnode *, u_long); 105280304Sjkimstatic int setutimes(struct thread *td, struct vnode *, 106280304Sjkim const struct timespec *, int, int); 107280304Sjkimstatic int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 10855714Skris struct thread *td); 109280304Sjkim 110280304Sjkim/* 111280304Sjkim * The module initialization routine for POSIX asynchronous I/O will 112280304Sjkim * set this to the version of AIO that it implements. (Zero means 113280304Sjkim * that it is not implemented.) This value is used here by pathconf() 114280304Sjkim * and in kern_descrip.c by fpathconf(). 115280304Sjkim */ 116280304Sjkimint async_io_version; 117280304Sjkim 118280304Sjkim#ifdef DEBUG 119280304Sjkimstatic int syncprt = 0; 120280304SjkimSYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 121280304Sjkim#endif 122 123/* 124 * Sync each mounted filesystem. 125 */ 126#ifndef _SYS_SYSPROTO_H_ 127struct sync_args { 128 int dummy; 129}; 130#endif 131/* ARGSUSED */ 132int 133sys_sync(td, uap) 134 struct thread *td; 135 struct sync_args *uap; 136{ 137 struct mount *mp, *nmp; 138 int save; 139 140 mtx_lock(&mountlist_mtx); 141 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 142 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 143 nmp = TAILQ_NEXT(mp, mnt_list); 144 continue; 145 } 146 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 147 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 148 save = curthread_pflags_set(TDP_SYNCIO); 149 vfs_msync(mp, MNT_NOWAIT); 150 VFS_SYNC(mp, MNT_NOWAIT); 151 curthread_pflags_restore(save); 152 vn_finished_write(mp); 153 } 154 mtx_lock(&mountlist_mtx); 155 nmp = TAILQ_NEXT(mp, mnt_list); 156 vfs_unbusy(mp); 157 } 158 mtx_unlock(&mountlist_mtx); 159 return (0); 160} 161 162/* 163 * Change filesystem quotas. 164 */ 165#ifndef _SYS_SYSPROTO_H_ 166struct quotactl_args { 167 char *path; 168 int cmd; 169 int uid; 170 caddr_t arg; 171}; 172#endif 173int 174sys_quotactl(td, uap) 175 struct thread *td; 176 register struct quotactl_args /* { 177 char *path; 178 int cmd; 179 int uid; 180 caddr_t arg; 181 } */ *uap; 182{ 183 struct mount *mp; 184 struct nameidata nd; 185 int error; 186 187 AUDIT_ARG_CMD(uap->cmd); 188 AUDIT_ARG_UID(uap->uid); 189 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 190 return (EPERM); 191 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 192 uap->path, td); 193 if ((error = namei(&nd)) != 0) 194 return (error); 195 NDFREE(&nd, NDF_ONLY_PNBUF); 196 mp = nd.ni_vp->v_mount; 197 vfs_ref(mp); 198 vput(nd.ni_vp); 199 error = vfs_busy(mp, 0); 200 vfs_rel(mp); 201 if (error != 0) 202 return (error); 203 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 204 205 /* 206 * Since quota on operation typically needs to open quota 207 * file, the Q_QUOTAON handler needs to unbusy the mount point 208 * before calling into namei. Otherwise, unmount might be 209 * started between two vfs_busy() invocations (first is our, 210 * second is from mount point cross-walk code in lookup()), 211 * causing deadlock. 212 * 213 * Require that Q_QUOTAON handles the vfs_busy() reference on 214 * its own, always returning with ubusied mount point. 215 */ 216 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 217 vfs_unbusy(mp); 218 return (error); 219} 220 221/* 222 * Used by statfs conversion routines to scale the block size up if 223 * necessary so that all of the block counts are <= 'max_size'. Note 224 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 225 * value of 'n'. 226 */ 227void 228statfs_scale_blocks(struct statfs *sf, long max_size) 229{ 230 uint64_t count; 231 int shift; 232 233 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 234 235 /* 236 * Attempt to scale the block counts to give a more accurate 237 * overview to userland of the ratio of free space to used 238 * space. To do this, find the largest block count and compute 239 * a divisor that lets it fit into a signed integer <= max_size. 240 */ 241 if (sf->f_bavail < 0) 242 count = -sf->f_bavail; 243 else 244 count = sf->f_bavail; 245 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 246 if (count <= max_size) 247 return; 248 249 count >>= flsl(max_size); 250 shift = 0; 251 while (count > 0) { 252 shift++; 253 count >>=1; 254 } 255 256 sf->f_bsize <<= shift; 257 sf->f_blocks >>= shift; 258 sf->f_bfree >>= shift; 259 sf->f_bavail >>= shift; 260} 261 262/* 263 * Get filesystem statistics. 264 */ 265#ifndef _SYS_SYSPROTO_H_ 266struct statfs_args { 267 char *path; 268 struct statfs *buf; 269}; 270#endif 271int 272sys_statfs(td, uap) 273 struct thread *td; 274 register struct statfs_args /* { 275 char *path; 276 struct statfs *buf; 277 } */ *uap; 278{ 279 struct statfs sf; 280 int error; 281 282 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 283 if (error == 0) 284 error = copyout(&sf, uap->buf, sizeof(sf)); 285 return (error); 286} 287 288int 289kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 290 struct statfs *buf) 291{ 292 struct mount *mp; 293 struct statfs *sp, sb; 294 struct nameidata nd; 295 int error; 296 297 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 298 pathseg, path, td); 299 error = namei(&nd); 300 if (error != 0) 301 return (error); 302 mp = nd.ni_vp->v_mount; 303 vfs_ref(mp); 304 NDFREE(&nd, NDF_ONLY_PNBUF); 305 vput(nd.ni_vp); 306 error = vfs_busy(mp, 0); 307 vfs_rel(mp); 308 if (error != 0) 309 return (error); 310#ifdef MAC 311 error = mac_mount_check_stat(td->td_ucred, mp); 312 if (error != 0) 313 goto out; 314#endif 315 /* 316 * Set these in case the underlying filesystem fails to do so. 317 */ 318 sp = &mp->mnt_stat; 319 sp->f_version = STATFS_VERSION; 320 sp->f_namemax = NAME_MAX; 321 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 322 error = VFS_STATFS(mp, sp); 323 if (error != 0) 324 goto out; 325 if (priv_check(td, PRIV_VFS_GENERATION)) { 326 bcopy(sp, &sb, sizeof(sb)); 327 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 328 prison_enforce_statfs(td->td_ucred, mp, &sb); 329 sp = &sb; 330 } 331 *buf = *sp; 332out: 333 vfs_unbusy(mp); 334 return (error); 335} 336 337/* 338 * Get filesystem statistics. 339 */ 340#ifndef _SYS_SYSPROTO_H_ 341struct fstatfs_args { 342 int fd; 343 struct statfs *buf; 344}; 345#endif 346int 347sys_fstatfs(td, uap) 348 struct thread *td; 349 register struct fstatfs_args /* { 350 int fd; 351 struct statfs *buf; 352 } */ *uap; 353{ 354 struct statfs sf; 355 int error; 356 357 error = kern_fstatfs(td, uap->fd, &sf); 358 if (error == 0) 359 error = copyout(&sf, uap->buf, sizeof(sf)); 360 return (error); 361} 362 363int 364kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 365{ 366 struct file *fp; 367 struct mount *mp; 368 struct statfs *sp, sb; 369 struct vnode *vp; 370 cap_rights_t rights; 371 int error; 372 373 AUDIT_ARG_FD(fd); 374 error = getvnode(td->td_proc->p_fd, fd, 375 cap_rights_init(&rights, CAP_FSTATFS), &fp); 376 if (error != 0) 377 return (error); 378 vp = fp->f_vnode; 379 vn_lock(vp, LK_SHARED | LK_RETRY); 380#ifdef AUDIT 381 AUDIT_ARG_VNODE1(vp); 382#endif 383 mp = vp->v_mount; 384 if (mp) 385 vfs_ref(mp); 386 VOP_UNLOCK(vp, 0); 387 fdrop(fp, td); 388 if (mp == NULL) { 389 error = EBADF; 390 goto out; 391 } 392 error = vfs_busy(mp, 0); 393 vfs_rel(mp); 394 if (error != 0) 395 return (error); 396#ifdef MAC 397 error = mac_mount_check_stat(td->td_ucred, mp); 398 if (error != 0) 399 goto out; 400#endif 401 /* 402 * Set these in case the underlying filesystem fails to do so. 403 */ 404 sp = &mp->mnt_stat; 405 sp->f_version = STATFS_VERSION; 406 sp->f_namemax = NAME_MAX; 407 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 408 error = VFS_STATFS(mp, sp); 409 if (error != 0) 410 goto out; 411 if (priv_check(td, PRIV_VFS_GENERATION)) { 412 bcopy(sp, &sb, sizeof(sb)); 413 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 414 prison_enforce_statfs(td->td_ucred, mp, &sb); 415 sp = &sb; 416 } 417 *buf = *sp; 418out: 419 if (mp) 420 vfs_unbusy(mp); 421 return (error); 422} 423 424/* 425 * Get statistics on all filesystems. 426 */ 427#ifndef _SYS_SYSPROTO_H_ 428struct getfsstat_args { 429 struct statfs *buf; 430 long bufsize; 431 int flags; 432}; 433#endif 434int 435sys_getfsstat(td, uap) 436 struct thread *td; 437 register struct getfsstat_args /* { 438 struct statfs *buf; 439 long bufsize; 440 int flags; 441 } */ *uap; 442{ 443 444 return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE, 445 uap->flags)); 446} 447 448/* 449 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 450 * The caller is responsible for freeing memory which will be allocated 451 * in '*buf'. 452 */ 453int 454kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 455 enum uio_seg bufseg, int flags) 456{ 457 struct mount *mp, *nmp; 458 struct statfs *sfsp, *sp, sb; 459 size_t count, maxcount; 460 int error; 461 462 maxcount = bufsize / sizeof(struct statfs); 463 if (bufsize == 0) 464 sfsp = NULL; 465 else if (bufseg == UIO_USERSPACE) 466 sfsp = *buf; 467 else /* if (bufseg == UIO_SYSSPACE) */ { 468 count = 0; 469 mtx_lock(&mountlist_mtx); 470 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 471 count++; 472 } 473 mtx_unlock(&mountlist_mtx); 474 if (maxcount > count) 475 maxcount = count; 476 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 477 M_WAITOK); 478 } 479 count = 0; 480 mtx_lock(&mountlist_mtx); 481 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 482 if (prison_canseemount(td->td_ucred, mp) != 0) { 483 nmp = TAILQ_NEXT(mp, mnt_list); 484 continue; 485 } 486#ifdef MAC 487 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 488 nmp = TAILQ_NEXT(mp, mnt_list); 489 continue; 490 } 491#endif 492 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 493 nmp = TAILQ_NEXT(mp, mnt_list); 494 continue; 495 } 496 if (sfsp && count < maxcount) { 497 sp = &mp->mnt_stat; 498 /* 499 * Set these in case the underlying filesystem 500 * fails to do so. 501 */ 502 sp->f_version = STATFS_VERSION; 503 sp->f_namemax = NAME_MAX; 504 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 505 /* 506 * If MNT_NOWAIT or MNT_LAZY is specified, do not 507 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 508 * overrides MNT_WAIT. 509 */ 510 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 511 (flags & MNT_WAIT)) && 512 (error = VFS_STATFS(mp, sp))) { 513 mtx_lock(&mountlist_mtx); 514 nmp = TAILQ_NEXT(mp, mnt_list); 515 vfs_unbusy(mp); 516 continue; 517 } 518 if (priv_check(td, PRIV_VFS_GENERATION)) { 519 bcopy(sp, &sb, sizeof(sb)); 520 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 521 prison_enforce_statfs(td->td_ucred, mp, &sb); 522 sp = &sb; 523 } 524 if (bufseg == UIO_SYSSPACE) 525 bcopy(sp, sfsp, sizeof(*sp)); 526 else /* if (bufseg == UIO_USERSPACE) */ { 527 error = copyout(sp, sfsp, sizeof(*sp)); 528 if (error != 0) { 529 vfs_unbusy(mp); 530 return (error); 531 } 532 } 533 sfsp++; 534 } 535 count++; 536 mtx_lock(&mountlist_mtx); 537 nmp = TAILQ_NEXT(mp, mnt_list); 538 vfs_unbusy(mp); 539 } 540 mtx_unlock(&mountlist_mtx); 541 if (sfsp && count > maxcount) 542 td->td_retval[0] = maxcount; 543 else 544 td->td_retval[0] = count; 545 return (0); 546} 547 548#ifdef COMPAT_FREEBSD4 549/* 550 * Get old format filesystem statistics. 551 */ 552static void cvtstatfs(struct statfs *, struct ostatfs *); 553 554#ifndef _SYS_SYSPROTO_H_ 555struct freebsd4_statfs_args { 556 char *path; 557 struct ostatfs *buf; 558}; 559#endif 560int 561freebsd4_statfs(td, uap) 562 struct thread *td; 563 struct freebsd4_statfs_args /* { 564 char *path; 565 struct ostatfs *buf; 566 } */ *uap; 567{ 568 struct ostatfs osb; 569 struct statfs sf; 570 int error; 571 572 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 573 if (error != 0) 574 return (error); 575 cvtstatfs(&sf, &osb); 576 return (copyout(&osb, uap->buf, sizeof(osb))); 577} 578 579/* 580 * Get filesystem statistics. 581 */ 582#ifndef _SYS_SYSPROTO_H_ 583struct freebsd4_fstatfs_args { 584 int fd; 585 struct ostatfs *buf; 586}; 587#endif 588int 589freebsd4_fstatfs(td, uap) 590 struct thread *td; 591 struct freebsd4_fstatfs_args /* { 592 int fd; 593 struct ostatfs *buf; 594 } */ *uap; 595{ 596 struct ostatfs osb; 597 struct statfs sf; 598 int error; 599 600 error = kern_fstatfs(td, uap->fd, &sf); 601 if (error != 0) 602 return (error); 603 cvtstatfs(&sf, &osb); 604 return (copyout(&osb, uap->buf, sizeof(osb))); 605} 606 607/* 608 * Get statistics on all filesystems. 609 */ 610#ifndef _SYS_SYSPROTO_H_ 611struct freebsd4_getfsstat_args { 612 struct ostatfs *buf; 613 long bufsize; 614 int flags; 615}; 616#endif 617int 618freebsd4_getfsstat(td, uap) 619 struct thread *td; 620 register struct freebsd4_getfsstat_args /* { 621 struct ostatfs *buf; 622 long bufsize; 623 int flags; 624 } */ *uap; 625{ 626 struct statfs *buf, *sp; 627 struct ostatfs osb; 628 size_t count, size; 629 int error; 630 631 count = uap->bufsize / sizeof(struct ostatfs); 632 size = count * sizeof(struct statfs); 633 error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags); 634 if (size > 0) { 635 count = td->td_retval[0]; 636 sp = buf; 637 while (count > 0 && error == 0) { 638 cvtstatfs(sp, &osb); 639 error = copyout(&osb, uap->buf, sizeof(osb)); 640 sp++; 641 uap->buf++; 642 count--; 643 } 644 free(buf, M_TEMP); 645 } 646 return (error); 647} 648 649/* 650 * Implement fstatfs() for (NFS) file handles. 651 */ 652#ifndef _SYS_SYSPROTO_H_ 653struct freebsd4_fhstatfs_args { 654 struct fhandle *u_fhp; 655 struct ostatfs *buf; 656}; 657#endif 658int 659freebsd4_fhstatfs(td, uap) 660 struct thread *td; 661 struct freebsd4_fhstatfs_args /* { 662 struct fhandle *u_fhp; 663 struct ostatfs *buf; 664 } */ *uap; 665{ 666 struct ostatfs osb; 667 struct statfs sf; 668 fhandle_t fh; 669 int error; 670 671 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 672 if (error != 0) 673 return (error); 674 error = kern_fhstatfs(td, fh, &sf); 675 if (error != 0) 676 return (error); 677 cvtstatfs(&sf, &osb); 678 return (copyout(&osb, uap->buf, sizeof(osb))); 679} 680 681/* 682 * Convert a new format statfs structure to an old format statfs structure. 683 */ 684static void 685cvtstatfs(nsp, osp) 686 struct statfs *nsp; 687 struct ostatfs *osp; 688{ 689 690 statfs_scale_blocks(nsp, LONG_MAX); 691 bzero(osp, sizeof(*osp)); 692 osp->f_bsize = nsp->f_bsize; 693 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 694 osp->f_blocks = nsp->f_blocks; 695 osp->f_bfree = nsp->f_bfree; 696 osp->f_bavail = nsp->f_bavail; 697 osp->f_files = MIN(nsp->f_files, LONG_MAX); 698 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 699 osp->f_owner = nsp->f_owner; 700 osp->f_type = nsp->f_type; 701 osp->f_flags = nsp->f_flags; 702 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 703 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 704 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 705 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 706 strlcpy(osp->f_fstypename, nsp->f_fstypename, 707 MIN(MFSNAMELEN, OMFSNAMELEN)); 708 strlcpy(osp->f_mntonname, nsp->f_mntonname, 709 MIN(MNAMELEN, OMNAMELEN)); 710 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 711 MIN(MNAMELEN, OMNAMELEN)); 712 osp->f_fsid = nsp->f_fsid; 713} 714#endif /* COMPAT_FREEBSD4 */ 715 716/* 717 * Change current working directory to a given file descriptor. 718 */ 719#ifndef _SYS_SYSPROTO_H_ 720struct fchdir_args { 721 int fd; 722}; 723#endif 724int 725sys_fchdir(td, uap) 726 struct thread *td; 727 struct fchdir_args /* { 728 int fd; 729 } */ *uap; 730{ 731 register struct filedesc *fdp = td->td_proc->p_fd; 732 struct vnode *vp, *tdp, *vpold; 733 struct mount *mp; 734 struct file *fp; 735 cap_rights_t rights; 736 int error; 737 738 AUDIT_ARG_FD(uap->fd); 739 error = getvnode(fdp, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 740 &fp); 741 if (error != 0) 742 return (error); 743 vp = fp->f_vnode; 744 VREF(vp); 745 fdrop(fp, td); 746 vn_lock(vp, LK_SHARED | LK_RETRY); 747 AUDIT_ARG_VNODE1(vp); 748 error = change_dir(vp, td); 749 while (!error && (mp = vp->v_mountedhere) != NULL) { 750 if (vfs_busy(mp, 0)) 751 continue; 752 error = VFS_ROOT(mp, LK_SHARED, &tdp); 753 vfs_unbusy(mp); 754 if (error != 0) 755 break; 756 vput(vp); 757 vp = tdp; 758 } 759 if (error != 0) { 760 vput(vp); 761 return (error); 762 } 763 VOP_UNLOCK(vp, 0); 764 FILEDESC_XLOCK(fdp); 765 vpold = fdp->fd_cdir; 766 fdp->fd_cdir = vp; 767 FILEDESC_XUNLOCK(fdp); 768 vrele(vpold); 769 return (0); 770} 771 772/* 773 * Change current working directory (``.''). 774 */ 775#ifndef _SYS_SYSPROTO_H_ 776struct chdir_args { 777 char *path; 778}; 779#endif 780int 781sys_chdir(td, uap) 782 struct thread *td; 783 struct chdir_args /* { 784 char *path; 785 } */ *uap; 786{ 787 788 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 789} 790 791int 792kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 793{ 794 register struct filedesc *fdp = td->td_proc->p_fd; 795 struct nameidata nd; 796 struct vnode *vp; 797 int error; 798 799 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 800 pathseg, path, td); 801 if ((error = namei(&nd)) != 0) 802 return (error); 803 if ((error = change_dir(nd.ni_vp, td)) != 0) { 804 vput(nd.ni_vp); 805 NDFREE(&nd, NDF_ONLY_PNBUF); 806 return (error); 807 } 808 VOP_UNLOCK(nd.ni_vp, 0); 809 NDFREE(&nd, NDF_ONLY_PNBUF); 810 FILEDESC_XLOCK(fdp); 811 vp = fdp->fd_cdir; 812 fdp->fd_cdir = nd.ni_vp; 813 FILEDESC_XUNLOCK(fdp); 814 vrele(vp); 815 return (0); 816} 817 818/* 819 * Helper function for raised chroot(2) security function: Refuse if 820 * any filedescriptors are open directories. 821 */ 822static int 823chroot_refuse_vdir_fds(fdp) 824 struct filedesc *fdp; 825{ 826 struct vnode *vp; 827 struct file *fp; 828 int fd; 829 830 FILEDESC_LOCK_ASSERT(fdp); 831 832 for (fd = 0; fd <= fdp->fd_lastfile; fd++) { 833 fp = fget_locked(fdp, fd); 834 if (fp == NULL) 835 continue; 836 if (fp->f_type == DTYPE_VNODE) { 837 vp = fp->f_vnode; 838 if (vp->v_type == VDIR) 839 return (EPERM); 840 } 841 } 842 return (0); 843} 844 845/* 846 * This sysctl determines if we will allow a process to chroot(2) if it 847 * has a directory open: 848 * 0: disallowed for all processes. 849 * 1: allowed for processes that were not already chroot(2)'ed. 850 * 2: allowed for all processes. 851 */ 852 853static int chroot_allow_open_directories = 1; 854 855SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 856 &chroot_allow_open_directories, 0, 857 "Allow a process to chroot(2) if it has a directory open"); 858 859/* 860 * Change notion of root (``/'') directory. 861 */ 862#ifndef _SYS_SYSPROTO_H_ 863struct chroot_args { 864 char *path; 865}; 866#endif 867int 868sys_chroot(td, uap) 869 struct thread *td; 870 struct chroot_args /* { 871 char *path; 872 } */ *uap; 873{ 874 struct nameidata nd; 875 int error; 876 877 error = priv_check(td, PRIV_VFS_CHROOT); 878 if (error != 0) 879 return (error); 880 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 881 UIO_USERSPACE, uap->path, td); 882 error = namei(&nd); 883 if (error != 0) 884 goto error; 885 error = change_dir(nd.ni_vp, td); 886 if (error != 0) 887 goto e_vunlock; 888#ifdef MAC 889 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 890 if (error != 0) 891 goto e_vunlock; 892#endif 893 VOP_UNLOCK(nd.ni_vp, 0); 894 error = change_root(nd.ni_vp, td); 895 vrele(nd.ni_vp); 896 NDFREE(&nd, NDF_ONLY_PNBUF); 897 return (error); 898e_vunlock: 899 vput(nd.ni_vp); 900error: 901 NDFREE(&nd, NDF_ONLY_PNBUF); 902 return (error); 903} 904 905/* 906 * Common routine for chroot and chdir. Callers must provide a locked vnode 907 * instance. 908 */ 909int 910change_dir(vp, td) 911 struct vnode *vp; 912 struct thread *td; 913{ 914#ifdef MAC 915 int error; 916#endif 917 918 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 919 if (vp->v_type != VDIR) 920 return (ENOTDIR); 921#ifdef MAC 922 error = mac_vnode_check_chdir(td->td_ucred, vp); 923 if (error != 0) 924 return (error); 925#endif 926 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 927} 928 929/* 930 * Common routine for kern_chroot() and jail_attach(). The caller is 931 * responsible for invoking priv_check() and mac_vnode_check_chroot() to 932 * authorize this operation. 933 */ 934int 935change_root(vp, td) 936 struct vnode *vp; 937 struct thread *td; 938{ 939 struct filedesc *fdp; 940 struct vnode *oldvp; 941 int error; 942 943 fdp = td->td_proc->p_fd; 944 FILEDESC_XLOCK(fdp); 945 if (chroot_allow_open_directories == 0 || 946 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 947 error = chroot_refuse_vdir_fds(fdp); 948 if (error != 0) { 949 FILEDESC_XUNLOCK(fdp); 950 return (error); 951 } 952 } 953 oldvp = fdp->fd_rdir; 954 fdp->fd_rdir = vp; 955 VREF(fdp->fd_rdir); 956 if (!fdp->fd_jdir) { 957 fdp->fd_jdir = vp; 958 VREF(fdp->fd_jdir); 959 } 960 FILEDESC_XUNLOCK(fdp); 961 vrele(oldvp); 962 return (0); 963} 964 965static __inline void 966flags_to_rights(int flags, cap_rights_t *rightsp) 967{ 968 969 if (flags & O_EXEC) { 970 cap_rights_set(rightsp, CAP_FEXECVE); 971 } else { 972 switch ((flags & O_ACCMODE)) { 973 case O_RDONLY: 974 cap_rights_set(rightsp, CAP_READ); 975 break; 976 case O_RDWR: 977 cap_rights_set(rightsp, CAP_READ); 978 /* FALLTHROUGH */ 979 case O_WRONLY: 980 cap_rights_set(rightsp, CAP_WRITE); 981 if (!(flags & (O_APPEND | O_TRUNC))) 982 cap_rights_set(rightsp, CAP_SEEK); 983 break; 984 } 985 } 986 987 if (flags & O_CREAT) 988 cap_rights_set(rightsp, CAP_CREATE); 989 990 if (flags & O_TRUNC) 991 cap_rights_set(rightsp, CAP_FTRUNCATE); 992 993 if (flags & (O_SYNC | O_FSYNC)) 994 cap_rights_set(rightsp, CAP_FSYNC); 995 996 if (flags & (O_EXLOCK | O_SHLOCK)) 997 cap_rights_set(rightsp, CAP_FLOCK); 998} 999 1000/* 1001 * Check permissions, allocate an open file structure, and call the device 1002 * open routine if any. 1003 */ 1004#ifndef _SYS_SYSPROTO_H_ 1005struct open_args { 1006 char *path; 1007 int flags; 1008 int mode; 1009}; 1010#endif 1011int 1012sys_open(td, uap) 1013 struct thread *td; 1014 register struct open_args /* { 1015 char *path; 1016 int flags; 1017 int mode; 1018 } */ *uap; 1019{ 1020 1021 return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode)); 1022} 1023 1024#ifndef _SYS_SYSPROTO_H_ 1025struct openat_args { 1026 int fd; 1027 char *path; 1028 int flag; 1029 int mode; 1030}; 1031#endif 1032int 1033sys_openat(struct thread *td, struct openat_args *uap) 1034{ 1035 1036 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1037 uap->mode)); 1038} 1039 1040int 1041kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags, 1042 int mode) 1043{ 1044 1045 return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode)); 1046} 1047 1048int 1049kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1050 int flags, int mode) 1051{ 1052 struct proc *p = td->td_proc; 1053 struct filedesc *fdp = p->p_fd; 1054 struct file *fp; 1055 struct vnode *vp; 1056 struct nameidata nd; 1057 cap_rights_t rights; 1058 int cmode, error, indx; 1059 1060 indx = -1; 1061 1062 AUDIT_ARG_FFLAGS(flags); 1063 AUDIT_ARG_MODE(mode); 1064 /* XXX: audit dirfd */ 1065 cap_rights_init(&rights, CAP_LOOKUP); 1066 flags_to_rights(flags, &rights); 1067 /* 1068 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1069 * may be specified. 1070 */ 1071 if (flags & O_EXEC) { 1072 if (flags & O_ACCMODE) 1073 return (EINVAL); 1074 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1075 return (EINVAL); 1076 } else { 1077 flags = FFLAGS(flags); 1078 } 1079 1080 /* 1081 * Allocate the file descriptor, but don't install a descriptor yet. 1082 */ 1083 error = falloc_noinstall(td, &fp); 1084 if (error != 0) 1085 return (error); 1086 /* 1087 * An extra reference on `fp' has been held for us by 1088 * falloc_noinstall(). 1089 */ 1090 /* Set the flags early so the finit in devfs can pick them up. */ 1091 fp->f_flag = flags & FMASK; 1092 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1093 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1094 &rights, td); 1095 td->td_dupfd = -1; /* XXX check for fdopen */ 1096 error = vn_open(&nd, &flags, cmode, fp); 1097 if (error != 0) { 1098 /* 1099 * If the vn_open replaced the method vector, something 1100 * wonderous happened deep below and we just pass it up 1101 * pretending we know what we do. 1102 */ 1103 if (error == ENXIO && fp->f_ops != &badfileops) 1104 goto success; 1105 1106 /* 1107 * Handle special fdopen() case. bleh. 1108 * 1109 * Don't do this for relative (capability) lookups; we don't 1110 * understand exactly what would happen, and we don't think 1111 * that it ever should. 1112 */ 1113 if (nd.ni_strictrelative == 0 && 1114 (error == ENODEV || error == ENXIO) && 1115 td->td_dupfd >= 0) { 1116 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1117 &indx); 1118 if (error == 0) 1119 goto success; 1120 } 1121 1122 goto bad; 1123 } 1124 td->td_dupfd = 0; 1125 NDFREE(&nd, NDF_ONLY_PNBUF); 1126 vp = nd.ni_vp; 1127 1128 /* 1129 * Store the vnode, for any f_type. Typically, the vnode use 1130 * count is decremented by direct call to vn_closefile() for 1131 * files that switched type in the cdevsw fdopen() method. 1132 */ 1133 fp->f_vnode = vp; 1134 /* 1135 * If the file wasn't claimed by devfs bind it to the normal 1136 * vnode operations here. 1137 */ 1138 if (fp->f_ops == &badfileops) { 1139 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1140 fp->f_seqcount = 1; 1141 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1142 DTYPE_VNODE, vp, &vnops); 1143 } 1144 1145 VOP_UNLOCK(vp, 0); 1146 if (flags & O_TRUNC) { 1147 error = fo_truncate(fp, 0, td->td_ucred, td); 1148 if (error != 0) 1149 goto bad; 1150 } 1151success: 1152 /* 1153 * If we haven't already installed the FD (for dupfdopen), do so now. 1154 */ 1155 if (indx == -1) { 1156 struct filecaps *fcaps; 1157 1158#ifdef CAPABILITIES 1159 if (nd.ni_strictrelative == 1) 1160 fcaps = &nd.ni_filecaps; 1161 else 1162#endif 1163 fcaps = NULL; 1164 error = finstall(td, fp, &indx, flags, fcaps); 1165 /* On success finstall() consumes fcaps. */ 1166 if (error != 0) { 1167 filecaps_free(&nd.ni_filecaps); 1168 goto bad; 1169 } 1170 } else { 1171 filecaps_free(&nd.ni_filecaps); 1172 } 1173 1174 /* 1175 * Release our private reference, leaving the one associated with 1176 * the descriptor table intact. 1177 */ 1178 fdrop(fp, td); 1179 td->td_retval[0] = indx; 1180 return (0); 1181bad: 1182 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1183 fdrop(fp, td); 1184 return (error); 1185} 1186 1187#ifdef COMPAT_43 1188/* 1189 * Create a file. 1190 */ 1191#ifndef _SYS_SYSPROTO_H_ 1192struct ocreat_args { 1193 char *path; 1194 int mode; 1195}; 1196#endif 1197int 1198ocreat(td, uap) 1199 struct thread *td; 1200 register struct ocreat_args /* { 1201 char *path; 1202 int mode; 1203 } */ *uap; 1204{ 1205 1206 return (kern_open(td, uap->path, UIO_USERSPACE, 1207 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1208} 1209#endif /* COMPAT_43 */ 1210 1211/* 1212 * Create a special file. 1213 */ 1214#ifndef _SYS_SYSPROTO_H_ 1215struct mknod_args { 1216 char *path; 1217 int mode; 1218 int dev; 1219}; 1220#endif 1221int 1222sys_mknod(td, uap) 1223 struct thread *td; 1224 register struct mknod_args /* { 1225 char *path; 1226 int mode; 1227 int dev; 1228 } */ *uap; 1229{ 1230 1231 return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); 1232} 1233 1234#ifndef _SYS_SYSPROTO_H_ 1235struct mknodat_args { 1236 int fd; 1237 char *path; 1238 mode_t mode; 1239 dev_t dev; 1240}; 1241#endif 1242int 1243sys_mknodat(struct thread *td, struct mknodat_args *uap) 1244{ 1245 1246 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1247 uap->dev)); 1248} 1249 1250int 1251kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode, 1252 int dev) 1253{ 1254 1255 return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev)); 1256} 1257 1258int 1259kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1260 int mode, int dev) 1261{ 1262 struct vnode *vp; 1263 struct mount *mp; 1264 struct vattr vattr; 1265 struct nameidata nd; 1266 cap_rights_t rights; 1267 int error, whiteout = 0; 1268 1269 AUDIT_ARG_MODE(mode); 1270 AUDIT_ARG_DEV(dev); 1271 switch (mode & S_IFMT) { 1272 case S_IFCHR: 1273 case S_IFBLK: 1274 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1275 break; 1276 case S_IFMT: 1277 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1278 break; 1279 case S_IFWHT: 1280 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1281 break; 1282 case S_IFIFO: 1283 if (dev == 0) 1284 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1285 /* FALLTHROUGH */ 1286 default: 1287 error = EINVAL; 1288 break; 1289 } 1290 if (error != 0) 1291 return (error); 1292restart: 1293 bwillwrite(); 1294 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1295 pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), td); 1296 if ((error = namei(&nd)) != 0) 1297 return (error); 1298 vp = nd.ni_vp; 1299 if (vp != NULL) { 1300 NDFREE(&nd, NDF_ONLY_PNBUF); 1301 if (vp == nd.ni_dvp) 1302 vrele(nd.ni_dvp); 1303 else 1304 vput(nd.ni_dvp); 1305 vrele(vp); 1306 return (EEXIST); 1307 } else { 1308 VATTR_NULL(&vattr); 1309 vattr.va_mode = (mode & ALLPERMS) & 1310 ~td->td_proc->p_fd->fd_cmask; 1311 vattr.va_rdev = dev; 1312 whiteout = 0; 1313 1314 switch (mode & S_IFMT) { 1315 case S_IFMT: /* used by badsect to flag bad sectors */ 1316 vattr.va_type = VBAD; 1317 break; 1318 case S_IFCHR: 1319 vattr.va_type = VCHR; 1320 break; 1321 case S_IFBLK: 1322 vattr.va_type = VBLK; 1323 break; 1324 case S_IFWHT: 1325 whiteout = 1; 1326 break; 1327 default: 1328 panic("kern_mknod: invalid mode"); 1329 } 1330 } 1331 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1332 NDFREE(&nd, NDF_ONLY_PNBUF); 1333 vput(nd.ni_dvp); 1334 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1335 return (error); 1336 goto restart; 1337 } 1338#ifdef MAC 1339 if (error == 0 && !whiteout) 1340 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1341 &nd.ni_cnd, &vattr); 1342#endif 1343 if (error == 0) { 1344 if (whiteout) 1345 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1346 else { 1347 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1348 &nd.ni_cnd, &vattr); 1349 if (error == 0) 1350 vput(nd.ni_vp); 1351 } 1352 } 1353 NDFREE(&nd, NDF_ONLY_PNBUF); 1354 vput(nd.ni_dvp); 1355 vn_finished_write(mp); 1356 return (error); 1357} 1358 1359/* 1360 * Create a named pipe. 1361 */ 1362#ifndef _SYS_SYSPROTO_H_ 1363struct mkfifo_args { 1364 char *path; 1365 int mode; 1366}; 1367#endif 1368int 1369sys_mkfifo(td, uap) 1370 struct thread *td; 1371 register struct mkfifo_args /* { 1372 char *path; 1373 int mode; 1374 } */ *uap; 1375{ 1376 1377 return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode)); 1378} 1379 1380#ifndef _SYS_SYSPROTO_H_ 1381struct mkfifoat_args { 1382 int fd; 1383 char *path; 1384 mode_t mode; 1385}; 1386#endif 1387int 1388sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1389{ 1390 1391 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1392 uap->mode)); 1393} 1394 1395int 1396kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode) 1397{ 1398 1399 return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode)); 1400} 1401 1402int 1403kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1404 int mode) 1405{ 1406 struct mount *mp; 1407 struct vattr vattr; 1408 struct nameidata nd; 1409 cap_rights_t rights; 1410 int error; 1411 1412 AUDIT_ARG_MODE(mode); 1413restart: 1414 bwillwrite(); 1415 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1416 pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), td); 1417 if ((error = namei(&nd)) != 0) 1418 return (error); 1419 if (nd.ni_vp != NULL) { 1420 NDFREE(&nd, NDF_ONLY_PNBUF); 1421 if (nd.ni_vp == nd.ni_dvp) 1422 vrele(nd.ni_dvp); 1423 else 1424 vput(nd.ni_dvp); 1425 vrele(nd.ni_vp); 1426 return (EEXIST); 1427 } 1428 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1429 NDFREE(&nd, NDF_ONLY_PNBUF); 1430 vput(nd.ni_dvp); 1431 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1432 return (error); 1433 goto restart; 1434 } 1435 VATTR_NULL(&vattr); 1436 vattr.va_type = VFIFO; 1437 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1438#ifdef MAC 1439 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1440 &vattr); 1441 if (error != 0) 1442 goto out; 1443#endif 1444 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1445 if (error == 0) 1446 vput(nd.ni_vp); 1447#ifdef MAC 1448out: 1449#endif 1450 vput(nd.ni_dvp); 1451 vn_finished_write(mp); 1452 NDFREE(&nd, NDF_ONLY_PNBUF); 1453 return (error); 1454} 1455 1456/* 1457 * Make a hard file link. 1458 */ 1459#ifndef _SYS_SYSPROTO_H_ 1460struct link_args { 1461 char *path; 1462 char *link; 1463}; 1464#endif 1465int 1466sys_link(td, uap) 1467 struct thread *td; 1468 register struct link_args /* { 1469 char *path; 1470 char *link; 1471 } */ *uap; 1472{ 1473 1474 return (kern_link(td, uap->path, uap->link, UIO_USERSPACE)); 1475} 1476 1477#ifndef _SYS_SYSPROTO_H_ 1478struct linkat_args { 1479 int fd1; 1480 char *path1; 1481 int fd2; 1482 char *path2; 1483 int flag; 1484}; 1485#endif 1486int 1487sys_linkat(struct thread *td, struct linkat_args *uap) 1488{ 1489 int flag; 1490 1491 flag = uap->flag; 1492 if (flag & ~AT_SYMLINK_FOLLOW) 1493 return (EINVAL); 1494 1495 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1496 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1497} 1498 1499int hardlink_check_uid = 0; 1500SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1501 &hardlink_check_uid, 0, 1502 "Unprivileged processes cannot create hard links to files owned by other " 1503 "users"); 1504static int hardlink_check_gid = 0; 1505SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1506 &hardlink_check_gid, 0, 1507 "Unprivileged processes cannot create hard links to files owned by other " 1508 "groups"); 1509 1510static int 1511can_hardlink(struct vnode *vp, struct ucred *cred) 1512{ 1513 struct vattr va; 1514 int error; 1515 1516 if (!hardlink_check_uid && !hardlink_check_gid) 1517 return (0); 1518 1519 error = VOP_GETATTR(vp, &va, cred); 1520 if (error != 0) 1521 return (error); 1522 1523 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1524 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1525 if (error != 0) 1526 return (error); 1527 } 1528 1529 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1530 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1531 if (error != 0) 1532 return (error); 1533 } 1534 1535 return (0); 1536} 1537 1538int 1539kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg) 1540{ 1541 1542 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW)); 1543} 1544 1545int 1546kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1547 enum uio_seg segflg, int follow) 1548{ 1549 struct vnode *vp; 1550 struct mount *mp; 1551 struct nameidata nd; 1552 cap_rights_t rights; 1553 int error; 1554 1555 bwillwrite(); 1556 NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td); 1557 1558again: 1559 if ((error = namei(&nd)) != 0) 1560 return (error); 1561 NDFREE(&nd, NDF_ONLY_PNBUF); 1562 vp = nd.ni_vp; 1563 if (vp->v_type == VDIR) { 1564 vrele(vp); 1565 return (EPERM); /* POSIX */ 1566 } 1567 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 1568 vrele(vp); 1569 return (error); 1570 } 1571 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2, 1572 segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT), td); 1573 if ((error = namei(&nd)) == 0) { 1574 if (nd.ni_vp != NULL) { 1575 if (nd.ni_dvp == nd.ni_vp) 1576 vrele(nd.ni_dvp); 1577 else 1578 vput(nd.ni_dvp); 1579 vrele(nd.ni_vp); 1580 error = EEXIST; 1581 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1582 /* 1583 * Check for cross-device links. No need to 1584 * recheck vp->v_type, since it cannot change 1585 * for non-doomed vnode. 1586 */ 1587 if (nd.ni_dvp->v_mount != vp->v_mount) 1588 error = EXDEV; 1589 else 1590 error = can_hardlink(vp, td->td_ucred); 1591 if (error == 0) 1592#ifdef MAC 1593 error = mac_vnode_check_link(td->td_ucred, 1594 nd.ni_dvp, vp, &nd.ni_cnd); 1595 if (error == 0) 1596#endif 1597 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1598 VOP_UNLOCK(vp, 0); 1599 vput(nd.ni_dvp); 1600 } else { 1601 vput(nd.ni_dvp); 1602 NDFREE(&nd, NDF_ONLY_PNBUF); 1603 vrele(vp); 1604 vn_finished_write(mp); 1605 goto again; 1606 } 1607 NDFREE(&nd, NDF_ONLY_PNBUF); 1608 } 1609 vrele(vp); 1610 vn_finished_write(mp); 1611 return (error); 1612} 1613 1614/* 1615 * Make a symbolic link. 1616 */ 1617#ifndef _SYS_SYSPROTO_H_ 1618struct symlink_args { 1619 char *path; 1620 char *link; 1621}; 1622#endif 1623int 1624sys_symlink(td, uap) 1625 struct thread *td; 1626 register struct symlink_args /* { 1627 char *path; 1628 char *link; 1629 } */ *uap; 1630{ 1631 1632 return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE)); 1633} 1634 1635#ifndef _SYS_SYSPROTO_H_ 1636struct symlinkat_args { 1637 char *path; 1638 int fd; 1639 char *path2; 1640}; 1641#endif 1642int 1643sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1644{ 1645 1646 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1647 UIO_USERSPACE)); 1648} 1649 1650int 1651kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg) 1652{ 1653 1654 return (kern_symlinkat(td, path, AT_FDCWD, link, segflg)); 1655} 1656 1657int 1658kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1659 enum uio_seg segflg) 1660{ 1661 struct mount *mp; 1662 struct vattr vattr; 1663 char *syspath; 1664 struct nameidata nd; 1665 int error; 1666 cap_rights_t rights; 1667 1668 if (segflg == UIO_SYSSPACE) { 1669 syspath = path1; 1670 } else { 1671 syspath = uma_zalloc(namei_zone, M_WAITOK); 1672 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1673 goto out; 1674 } 1675 AUDIT_ARG_TEXT(syspath); 1676restart: 1677 bwillwrite(); 1678 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1679 segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), td); 1680 if ((error = namei(&nd)) != 0) 1681 goto out; 1682 if (nd.ni_vp) { 1683 NDFREE(&nd, NDF_ONLY_PNBUF); 1684 if (nd.ni_vp == nd.ni_dvp) 1685 vrele(nd.ni_dvp); 1686 else 1687 vput(nd.ni_dvp); 1688 vrele(nd.ni_vp); 1689 error = EEXIST; 1690 goto out; 1691 } 1692 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1693 NDFREE(&nd, NDF_ONLY_PNBUF); 1694 vput(nd.ni_dvp); 1695 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1696 goto out; 1697 goto restart; 1698 } 1699 VATTR_NULL(&vattr); 1700 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1701#ifdef MAC 1702 vattr.va_type = VLNK; 1703 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1704 &vattr); 1705 if (error != 0) 1706 goto out2; 1707#endif 1708 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1709 if (error == 0) 1710 vput(nd.ni_vp); 1711#ifdef MAC 1712out2: 1713#endif 1714 NDFREE(&nd, NDF_ONLY_PNBUF); 1715 vput(nd.ni_dvp); 1716 vn_finished_write(mp); 1717out: 1718 if (segflg != UIO_SYSSPACE) 1719 uma_zfree(namei_zone, syspath); 1720 return (error); 1721} 1722 1723/* 1724 * Delete a whiteout from the filesystem. 1725 */ 1726int 1727sys_undelete(td, uap) 1728 struct thread *td; 1729 register struct undelete_args /* { 1730 char *path; 1731 } */ *uap; 1732{ 1733 struct mount *mp; 1734 struct nameidata nd; 1735 int error; 1736 1737restart: 1738 bwillwrite(); 1739 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1740 UIO_USERSPACE, uap->path, td); 1741 error = namei(&nd); 1742 if (error != 0) 1743 return (error); 1744 1745 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1746 NDFREE(&nd, NDF_ONLY_PNBUF); 1747 if (nd.ni_vp == nd.ni_dvp) 1748 vrele(nd.ni_dvp); 1749 else 1750 vput(nd.ni_dvp); 1751 if (nd.ni_vp) 1752 vrele(nd.ni_vp); 1753 return (EEXIST); 1754 } 1755 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1756 NDFREE(&nd, NDF_ONLY_PNBUF); 1757 vput(nd.ni_dvp); 1758 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1759 return (error); 1760 goto restart; 1761 } 1762 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1763 NDFREE(&nd, NDF_ONLY_PNBUF); 1764 vput(nd.ni_dvp); 1765 vn_finished_write(mp); 1766 return (error); 1767} 1768 1769/* 1770 * Delete a name from the filesystem. 1771 */ 1772#ifndef _SYS_SYSPROTO_H_ 1773struct unlink_args { 1774 char *path; 1775}; 1776#endif 1777int 1778sys_unlink(td, uap) 1779 struct thread *td; 1780 struct unlink_args /* { 1781 char *path; 1782 } */ *uap; 1783{ 1784 1785 return (kern_unlink(td, uap->path, UIO_USERSPACE)); 1786} 1787 1788#ifndef _SYS_SYSPROTO_H_ 1789struct unlinkat_args { 1790 int fd; 1791 char *path; 1792 int flag; 1793}; 1794#endif 1795int 1796sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1797{ 1798 int flag = uap->flag; 1799 int fd = uap->fd; 1800 char *path = uap->path; 1801 1802 if (flag & ~AT_REMOVEDIR) 1803 return (EINVAL); 1804 1805 if (flag & AT_REMOVEDIR) 1806 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1807 else 1808 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1809} 1810 1811int 1812kern_unlink(struct thread *td, char *path, enum uio_seg pathseg) 1813{ 1814 1815 return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0)); 1816} 1817 1818int 1819kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1820 ino_t oldinum) 1821{ 1822 struct mount *mp; 1823 struct vnode *vp; 1824 struct nameidata nd; 1825 struct stat sb; 1826 cap_rights_t rights; 1827 int error; 1828 1829restart: 1830 bwillwrite(); 1831 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1832 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1833 if ((error = namei(&nd)) != 0) 1834 return (error == EINVAL ? EPERM : error); 1835 vp = nd.ni_vp; 1836 if (vp->v_type == VDIR && oldinum == 0) { 1837 error = EPERM; /* POSIX */ 1838 } else if (oldinum != 0 && 1839 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1840 sb.st_ino != oldinum) { 1841 error = EIDRM; /* Identifier removed */ 1842 } else { 1843 /* 1844 * The root of a mounted filesystem cannot be deleted. 1845 * 1846 * XXX: can this only be a VDIR case? 1847 */ 1848 if (vp->v_vflag & VV_ROOT) 1849 error = EBUSY; 1850 } 1851 if (error == 0) { 1852 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1853 NDFREE(&nd, NDF_ONLY_PNBUF); 1854 vput(nd.ni_dvp); 1855 if (vp == nd.ni_dvp) 1856 vrele(vp); 1857 else 1858 vput(vp); 1859 if ((error = vn_start_write(NULL, &mp, 1860 V_XSLEEP | PCATCH)) != 0) 1861 return (error); 1862 goto restart; 1863 } 1864#ifdef MAC 1865 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1866 &nd.ni_cnd); 1867 if (error != 0) 1868 goto out; 1869#endif 1870 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1871 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1872#ifdef MAC 1873out: 1874#endif 1875 vn_finished_write(mp); 1876 } 1877 NDFREE(&nd, NDF_ONLY_PNBUF); 1878 vput(nd.ni_dvp); 1879 if (vp == nd.ni_dvp) 1880 vrele(vp); 1881 else 1882 vput(vp); 1883 return (error); 1884} 1885 1886/* 1887 * Reposition read/write file offset. 1888 */ 1889#ifndef _SYS_SYSPROTO_H_ 1890struct lseek_args { 1891 int fd; 1892 int pad; 1893 off_t offset; 1894 int whence; 1895}; 1896#endif 1897int 1898sys_lseek(td, uap) 1899 struct thread *td; 1900 register struct lseek_args /* { 1901 int fd; 1902 int pad; 1903 off_t offset; 1904 int whence; 1905 } */ *uap; 1906{ 1907 struct file *fp; 1908 cap_rights_t rights; 1909 int error; 1910 1911 AUDIT_ARG_FD(uap->fd); 1912 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1913 if (error != 0) 1914 return (error); 1915 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1916 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1917 fdrop(fp, td); 1918 return (error); 1919} 1920 1921#if defined(COMPAT_43) 1922/* 1923 * Reposition read/write file offset. 1924 */ 1925#ifndef _SYS_SYSPROTO_H_ 1926struct olseek_args { 1927 int fd; 1928 long offset; 1929 int whence; 1930}; 1931#endif 1932int 1933olseek(td, uap) 1934 struct thread *td; 1935 register struct olseek_args /* { 1936 int fd; 1937 long offset; 1938 int whence; 1939 } */ *uap; 1940{ 1941 struct lseek_args /* { 1942 int fd; 1943 int pad; 1944 off_t offset; 1945 int whence; 1946 } */ nuap; 1947 1948 nuap.fd = uap->fd; 1949 nuap.offset = uap->offset; 1950 nuap.whence = uap->whence; 1951 return (sys_lseek(td, &nuap)); 1952} 1953#endif /* COMPAT_43 */ 1954 1955/* Version with the 'pad' argument */ 1956int 1957freebsd6_lseek(td, uap) 1958 struct thread *td; 1959 register struct freebsd6_lseek_args *uap; 1960{ 1961 struct lseek_args ouap; 1962 1963 ouap.fd = uap->fd; 1964 ouap.offset = uap->offset; 1965 ouap.whence = uap->whence; 1966 return (sys_lseek(td, &ouap)); 1967} 1968 1969/* 1970 * Check access permissions using passed credentials. 1971 */ 1972static int 1973vn_access(vp, user_flags, cred, td) 1974 struct vnode *vp; 1975 int user_flags; 1976 struct ucred *cred; 1977 struct thread *td; 1978{ 1979 accmode_t accmode; 1980 int error; 1981 1982 /* Flags == 0 means only check for existence. */ 1983 error = 0; 1984 if (user_flags) { 1985 accmode = 0; 1986 if (user_flags & R_OK) 1987 accmode |= VREAD; 1988 if (user_flags & W_OK) 1989 accmode |= VWRITE; 1990 if (user_flags & X_OK) 1991 accmode |= VEXEC; 1992#ifdef MAC 1993 error = mac_vnode_check_access(cred, vp, accmode); 1994 if (error != 0) 1995 return (error); 1996#endif 1997 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1998 error = VOP_ACCESS(vp, accmode, cred, td); 1999 } 2000 return (error); 2001} 2002 2003/* 2004 * Check access permissions using "real" credentials. 2005 */ 2006#ifndef _SYS_SYSPROTO_H_ 2007struct access_args { 2008 char *path; 2009 int amode; 2010}; 2011#endif 2012int 2013sys_access(td, uap) 2014 struct thread *td; 2015 register struct access_args /* { 2016 char *path; 2017 int amode; 2018 } */ *uap; 2019{ 2020 2021 return (kern_access(td, uap->path, UIO_USERSPACE, uap->amode)); 2022} 2023 2024#ifndef _SYS_SYSPROTO_H_ 2025struct faccessat_args { 2026 int dirfd; 2027 char *path; 2028 int amode; 2029 int flag; 2030} 2031#endif 2032int 2033sys_faccessat(struct thread *td, struct faccessat_args *uap) 2034{ 2035 2036 if (uap->flag & ~AT_EACCESS) 2037 return (EINVAL); 2038 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2039 uap->amode)); 2040} 2041 2042int 2043kern_access(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2044{ 2045 2046 return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, amode)); 2047} 2048 2049int 2050kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2051 int flag, int amode) 2052{ 2053 struct ucred *cred, *tmpcred; 2054 struct vnode *vp; 2055 struct nameidata nd; 2056 cap_rights_t rights; 2057 int error; 2058 2059 /* 2060 * Create and modify a temporary credential instead of one that 2061 * is potentially shared. 2062 */ 2063 if (!(flag & AT_EACCESS)) { 2064 cred = td->td_ucred; 2065 tmpcred = crdup(cred); 2066 tmpcred->cr_uid = cred->cr_ruid; 2067 tmpcred->cr_groups[0] = cred->cr_rgid; 2068 td->td_ucred = tmpcred; 2069 } else 2070 cred = tmpcred = td->td_ucred; 2071 AUDIT_ARG_VALUE(amode); 2072 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2073 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 2074 td); 2075 if ((error = namei(&nd)) != 0) 2076 goto out1; 2077 vp = nd.ni_vp; 2078 2079 error = vn_access(vp, amode, tmpcred, td); 2080 NDFREE(&nd, NDF_ONLY_PNBUF); 2081 vput(vp); 2082out1: 2083 if (!(flag & AT_EACCESS)) { 2084 td->td_ucred = cred; 2085 crfree(tmpcred); 2086 } 2087 return (error); 2088} 2089 2090/* 2091 * Check access permissions using "effective" credentials. 2092 */ 2093#ifndef _SYS_SYSPROTO_H_ 2094struct eaccess_args { 2095 char *path; 2096 int amode; 2097}; 2098#endif 2099int 2100sys_eaccess(td, uap) 2101 struct thread *td; 2102 register struct eaccess_args /* { 2103 char *path; 2104 int amode; 2105 } */ *uap; 2106{ 2107 2108 return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->amode)); 2109} 2110 2111int 2112kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2113{ 2114 2115 return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, amode)); 2116} 2117 2118#if defined(COMPAT_43) 2119/* 2120 * Get file status; this version follows links. 2121 */ 2122#ifndef _SYS_SYSPROTO_H_ 2123struct ostat_args { 2124 char *path; 2125 struct ostat *ub; 2126}; 2127#endif 2128int 2129ostat(td, uap) 2130 struct thread *td; 2131 register struct ostat_args /* { 2132 char *path; 2133 struct ostat *ub; 2134 } */ *uap; 2135{ 2136 struct stat sb; 2137 struct ostat osb; 2138 int error; 2139 2140 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2141 if (error != 0) 2142 return (error); 2143 cvtstat(&sb, &osb); 2144 return (copyout(&osb, uap->ub, sizeof (osb))); 2145} 2146 2147/* 2148 * Get file status; this version does not follow links. 2149 */ 2150#ifndef _SYS_SYSPROTO_H_ 2151struct olstat_args { 2152 char *path; 2153 struct ostat *ub; 2154}; 2155#endif 2156int 2157olstat(td, uap) 2158 struct thread *td; 2159 register struct olstat_args /* { 2160 char *path; 2161 struct ostat *ub; 2162 } */ *uap; 2163{ 2164 struct stat sb; 2165 struct ostat osb; 2166 int error; 2167 2168 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2169 if (error != 0) 2170 return (error); 2171 cvtstat(&sb, &osb); 2172 return (copyout(&osb, uap->ub, sizeof (osb))); 2173} 2174 2175/* 2176 * Convert from an old to a new stat structure. 2177 */ 2178void 2179cvtstat(st, ost) 2180 struct stat *st; 2181 struct ostat *ost; 2182{ 2183 2184 ost->st_dev = st->st_dev; 2185 ost->st_ino = st->st_ino; 2186 ost->st_mode = st->st_mode; 2187 ost->st_nlink = st->st_nlink; 2188 ost->st_uid = st->st_uid; 2189 ost->st_gid = st->st_gid; 2190 ost->st_rdev = st->st_rdev; 2191 if (st->st_size < (quad_t)1 << 32) 2192 ost->st_size = st->st_size; 2193 else 2194 ost->st_size = -2; 2195 ost->st_atim = st->st_atim; 2196 ost->st_mtim = st->st_mtim; 2197 ost->st_ctim = st->st_ctim; 2198 ost->st_blksize = st->st_blksize; 2199 ost->st_blocks = st->st_blocks; 2200 ost->st_flags = st->st_flags; 2201 ost->st_gen = st->st_gen; 2202} 2203#endif /* COMPAT_43 */ 2204 2205/* 2206 * Get file status; this version follows links. 2207 */ 2208#ifndef _SYS_SYSPROTO_H_ 2209struct stat_args { 2210 char *path; 2211 struct stat *ub; 2212}; 2213#endif 2214int 2215sys_stat(td, uap) 2216 struct thread *td; 2217 register struct stat_args /* { 2218 char *path; 2219 struct stat *ub; 2220 } */ *uap; 2221{ 2222 struct stat sb; 2223 int error; 2224 2225 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2226 if (error == 0) 2227 error = copyout(&sb, uap->ub, sizeof (sb)); 2228 return (error); 2229} 2230 2231#ifndef _SYS_SYSPROTO_H_ 2232struct fstatat_args { 2233 int fd; 2234 char *path; 2235 struct stat *buf; 2236 int flag; 2237} 2238#endif 2239int 2240sys_fstatat(struct thread *td, struct fstatat_args *uap) 2241{ 2242 struct stat sb; 2243 int error; 2244 2245 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2246 UIO_USERSPACE, &sb); 2247 if (error == 0) 2248 error = copyout(&sb, uap->buf, sizeof (sb)); 2249 return (error); 2250} 2251 2252int 2253kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2254{ 2255 2256 return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp)); 2257} 2258 2259int 2260kern_statat(struct thread *td, int flag, int fd, char *path, 2261 enum uio_seg pathseg, struct stat *sbp) 2262{ 2263 2264 return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL)); 2265} 2266 2267int 2268kern_statat_vnhook(struct thread *td, int flag, int fd, char *path, 2269 enum uio_seg pathseg, struct stat *sbp, 2270 void (*hook)(struct vnode *vp, struct stat *sbp)) 2271{ 2272 struct nameidata nd; 2273 struct stat sb; 2274 cap_rights_t rights; 2275 int error; 2276 2277 if (flag & ~AT_SYMLINK_NOFOLLOW) 2278 return (EINVAL); 2279 2280 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2281 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2282 cap_rights_init(&rights, CAP_FSTAT), td); 2283 2284 if ((error = namei(&nd)) != 0) 2285 return (error); 2286 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2287 if (error == 0) { 2288 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0); 2289 if (S_ISREG(sb.st_mode)) 2290 SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0); 2291 if (__predict_false(hook != NULL)) 2292 hook(nd.ni_vp, &sb); 2293 } 2294 NDFREE(&nd, NDF_ONLY_PNBUF); 2295 vput(nd.ni_vp); 2296 if (error != 0) 2297 return (error); 2298 *sbp = sb; 2299#ifdef KTRACE 2300 if (KTRPOINT(td, KTR_STRUCT)) 2301 ktrstat(&sb); 2302#endif 2303 return (0); 2304} 2305 2306/* 2307 * Get file status; this version does not follow links. 2308 */ 2309#ifndef _SYS_SYSPROTO_H_ 2310struct lstat_args { 2311 char *path; 2312 struct stat *ub; 2313}; 2314#endif 2315int 2316sys_lstat(td, uap) 2317 struct thread *td; 2318 register struct lstat_args /* { 2319 char *path; 2320 struct stat *ub; 2321 } */ *uap; 2322{ 2323 struct stat sb; 2324 int error; 2325 2326 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2327 if (error == 0) 2328 error = copyout(&sb, uap->ub, sizeof (sb)); 2329 return (error); 2330} 2331 2332int 2333kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2334{ 2335 2336 return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg, 2337 sbp)); 2338} 2339 2340/* 2341 * Implementation of the NetBSD [l]stat() functions. 2342 */ 2343void 2344cvtnstat(sb, nsb) 2345 struct stat *sb; 2346 struct nstat *nsb; 2347{ 2348 2349 bzero(nsb, sizeof *nsb); 2350 nsb->st_dev = sb->st_dev; 2351 nsb->st_ino = sb->st_ino; 2352 nsb->st_mode = sb->st_mode; 2353 nsb->st_nlink = sb->st_nlink; 2354 nsb->st_uid = sb->st_uid; 2355 nsb->st_gid = sb->st_gid; 2356 nsb->st_rdev = sb->st_rdev; 2357 nsb->st_atim = sb->st_atim; 2358 nsb->st_mtim = sb->st_mtim; 2359 nsb->st_ctim = sb->st_ctim; 2360 nsb->st_size = sb->st_size; 2361 nsb->st_blocks = sb->st_blocks; 2362 nsb->st_blksize = sb->st_blksize; 2363 nsb->st_flags = sb->st_flags; 2364 nsb->st_gen = sb->st_gen; 2365 nsb->st_birthtim = sb->st_birthtim; 2366} 2367 2368#ifndef _SYS_SYSPROTO_H_ 2369struct nstat_args { 2370 char *path; 2371 struct nstat *ub; 2372}; 2373#endif 2374int 2375sys_nstat(td, uap) 2376 struct thread *td; 2377 register struct nstat_args /* { 2378 char *path; 2379 struct nstat *ub; 2380 } */ *uap; 2381{ 2382 struct stat sb; 2383 struct nstat nsb; 2384 int error; 2385 2386 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2387 if (error != 0) 2388 return (error); 2389 cvtnstat(&sb, &nsb); 2390 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2391} 2392 2393/* 2394 * NetBSD lstat. Get file status; this version does not follow links. 2395 */ 2396#ifndef _SYS_SYSPROTO_H_ 2397struct lstat_args { 2398 char *path; 2399 struct stat *ub; 2400}; 2401#endif 2402int 2403sys_nlstat(td, uap) 2404 struct thread *td; 2405 register struct nlstat_args /* { 2406 char *path; 2407 struct nstat *ub; 2408 } */ *uap; 2409{ 2410 struct stat sb; 2411 struct nstat nsb; 2412 int error; 2413 2414 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2415 if (error != 0) 2416 return (error); 2417 cvtnstat(&sb, &nsb); 2418 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2419} 2420 2421/* 2422 * Get configurable pathname variables. 2423 */ 2424#ifndef _SYS_SYSPROTO_H_ 2425struct pathconf_args { 2426 char *path; 2427 int name; 2428}; 2429#endif 2430int 2431sys_pathconf(td, uap) 2432 struct thread *td; 2433 register struct pathconf_args /* { 2434 char *path; 2435 int name; 2436 } */ *uap; 2437{ 2438 2439 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2440} 2441 2442#ifndef _SYS_SYSPROTO_H_ 2443struct lpathconf_args { 2444 char *path; 2445 int name; 2446}; 2447#endif 2448int 2449sys_lpathconf(td, uap) 2450 struct thread *td; 2451 register struct lpathconf_args /* { 2452 char *path; 2453 int name; 2454 } */ *uap; 2455{ 2456 2457 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2458 NOFOLLOW)); 2459} 2460 2461int 2462kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2463 u_long flags) 2464{ 2465 struct nameidata nd; 2466 int error; 2467 2468 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2469 pathseg, path, td); 2470 if ((error = namei(&nd)) != 0) 2471 return (error); 2472 NDFREE(&nd, NDF_ONLY_PNBUF); 2473 2474 /* If asynchronous I/O is available, it works for all files. */ 2475 if (name == _PC_ASYNC_IO) 2476 td->td_retval[0] = async_io_version; 2477 else 2478 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2479 vput(nd.ni_vp); 2480 return (error); 2481} 2482 2483/* 2484 * Return target name of a symbolic link. 2485 */ 2486#ifndef _SYS_SYSPROTO_H_ 2487struct readlink_args { 2488 char *path; 2489 char *buf; 2490 size_t count; 2491}; 2492#endif 2493int 2494sys_readlink(td, uap) 2495 struct thread *td; 2496 register struct readlink_args /* { 2497 char *path; 2498 char *buf; 2499 size_t count; 2500 } */ *uap; 2501{ 2502 2503 return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf, 2504 UIO_USERSPACE, uap->count)); 2505} 2506#ifndef _SYS_SYSPROTO_H_ 2507struct readlinkat_args { 2508 int fd; 2509 char *path; 2510 char *buf; 2511 size_t bufsize; 2512}; 2513#endif 2514int 2515sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2516{ 2517 2518 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2519 uap->buf, UIO_USERSPACE, uap->bufsize)); 2520} 2521 2522int 2523kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf, 2524 enum uio_seg bufseg, size_t count) 2525{ 2526 2527 return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg, 2528 count)); 2529} 2530 2531int 2532kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2533 char *buf, enum uio_seg bufseg, size_t count) 2534{ 2535 struct vnode *vp; 2536 struct iovec aiov; 2537 struct uio auio; 2538 struct nameidata nd; 2539 int error; 2540 2541 if (count > IOSIZE_MAX) 2542 return (EINVAL); 2543 2544 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2545 pathseg, path, fd, td); 2546 2547 if ((error = namei(&nd)) != 0) 2548 return (error); 2549 NDFREE(&nd, NDF_ONLY_PNBUF); 2550 vp = nd.ni_vp; 2551#ifdef MAC 2552 error = mac_vnode_check_readlink(td->td_ucred, vp); 2553 if (error != 0) { 2554 vput(vp); 2555 return (error); 2556 } 2557#endif 2558 if (vp->v_type != VLNK) 2559 error = EINVAL; 2560 else { 2561 aiov.iov_base = buf; 2562 aiov.iov_len = count; 2563 auio.uio_iov = &aiov; 2564 auio.uio_iovcnt = 1; 2565 auio.uio_offset = 0; 2566 auio.uio_rw = UIO_READ; 2567 auio.uio_segflg = bufseg; 2568 auio.uio_td = td; 2569 auio.uio_resid = count; 2570 error = VOP_READLINK(vp, &auio, td->td_ucred); 2571 td->td_retval[0] = count - auio.uio_resid; 2572 } 2573 vput(vp); 2574 return (error); 2575} 2576 2577/* 2578 * Common implementation code for chflags() and fchflags(). 2579 */ 2580static int 2581setfflags(td, vp, flags) 2582 struct thread *td; 2583 struct vnode *vp; 2584 u_long flags; 2585{ 2586 struct mount *mp; 2587 struct vattr vattr; 2588 int error; 2589 2590 /* We can't support the value matching VNOVAL. */ 2591 if (flags == VNOVAL) 2592 return (EOPNOTSUPP); 2593 2594 /* 2595 * Prevent non-root users from setting flags on devices. When 2596 * a device is reused, users can retain ownership of the device 2597 * if they are allowed to set flags and programs assume that 2598 * chown can't fail when done as root. 2599 */ 2600 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2601 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2602 if (error != 0) 2603 return (error); 2604 } 2605 2606 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2607 return (error); 2608 VATTR_NULL(&vattr); 2609 vattr.va_flags = flags; 2610 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2611#ifdef MAC 2612 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2613 if (error == 0) 2614#endif 2615 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2616 VOP_UNLOCK(vp, 0); 2617 vn_finished_write(mp); 2618 return (error); 2619} 2620 2621/* 2622 * Change flags of a file given a path name. 2623 */ 2624#ifndef _SYS_SYSPROTO_H_ 2625struct chflags_args { 2626 const char *path; 2627 u_long flags; 2628}; 2629#endif 2630int 2631sys_chflags(td, uap) 2632 struct thread *td; 2633 register struct chflags_args /* { 2634 const char *path; 2635 u_long flags; 2636 } */ *uap; 2637{ 2638 2639 return (kern_chflags(td, uap->path, UIO_USERSPACE, uap->flags)); 2640} 2641 2642#ifndef _SYS_SYSPROTO_H_ 2643struct chflagsat_args { 2644 int fd; 2645 const char *path; 2646 u_long flags; 2647 int atflag; 2648} 2649#endif 2650int 2651sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2652{ 2653 int fd = uap->fd; 2654 const char *path = uap->path; 2655 u_long flags = uap->flags; 2656 int atflag = uap->atflag; 2657 2658 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2659 return (EINVAL); 2660 2661 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2662} 2663 2664static int 2665kern_chflags(struct thread *td, const char *path, enum uio_seg pathseg, 2666 u_long flags) 2667{ 2668 2669 return (kern_chflagsat(td, AT_FDCWD, path, pathseg, flags, 0)); 2670} 2671 2672/* 2673 * Same as chflags() but doesn't follow symlinks. 2674 */ 2675int 2676sys_lchflags(td, uap) 2677 struct thread *td; 2678 register struct lchflags_args /* { 2679 const char *path; 2680 u_long flags; 2681 } */ *uap; 2682{ 2683 2684 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2685 uap->flags, AT_SYMLINK_NOFOLLOW)); 2686} 2687 2688static int 2689kern_chflagsat(struct thread *td, int fd, const char *path, 2690 enum uio_seg pathseg, u_long flags, int atflag) 2691{ 2692 struct nameidata nd; 2693 cap_rights_t rights; 2694 int error, follow; 2695 2696 AUDIT_ARG_FFLAGS(flags); 2697 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2698 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2699 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2700 if ((error = namei(&nd)) != 0) 2701 return (error); 2702 NDFREE(&nd, NDF_ONLY_PNBUF); 2703 error = setfflags(td, nd.ni_vp, flags); 2704 vrele(nd.ni_vp); 2705 return (error); 2706} 2707 2708/* 2709 * Change flags of a file given a file descriptor. 2710 */ 2711#ifndef _SYS_SYSPROTO_H_ 2712struct fchflags_args { 2713 int fd; 2714 u_long flags; 2715}; 2716#endif 2717int 2718sys_fchflags(td, uap) 2719 struct thread *td; 2720 register struct fchflags_args /* { 2721 int fd; 2722 u_long flags; 2723 } */ *uap; 2724{ 2725 struct file *fp; 2726 cap_rights_t rights; 2727 int error; 2728 2729 AUDIT_ARG_FD(uap->fd); 2730 AUDIT_ARG_FFLAGS(uap->flags); 2731 error = getvnode(td->td_proc->p_fd, uap->fd, 2732 cap_rights_init(&rights, CAP_FCHFLAGS), &fp); 2733 if (error != 0) 2734 return (error); 2735#ifdef AUDIT 2736 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2737 AUDIT_ARG_VNODE1(fp->f_vnode); 2738 VOP_UNLOCK(fp->f_vnode, 0); 2739#endif 2740 error = setfflags(td, fp->f_vnode, uap->flags); 2741 fdrop(fp, td); 2742 return (error); 2743} 2744 2745/* 2746 * Common implementation code for chmod(), lchmod() and fchmod(). 2747 */ 2748int 2749setfmode(td, cred, vp, mode) 2750 struct thread *td; 2751 struct ucred *cred; 2752 struct vnode *vp; 2753 int mode; 2754{ 2755 struct mount *mp; 2756 struct vattr vattr; 2757 int error; 2758 2759 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2760 return (error); 2761 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2762 VATTR_NULL(&vattr); 2763 vattr.va_mode = mode & ALLPERMS; 2764#ifdef MAC 2765 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2766 if (error == 0) 2767#endif 2768 error = VOP_SETATTR(vp, &vattr, cred); 2769 VOP_UNLOCK(vp, 0); 2770 vn_finished_write(mp); 2771 return (error); 2772} 2773 2774/* 2775 * Change mode of a file given path name. 2776 */ 2777#ifndef _SYS_SYSPROTO_H_ 2778struct chmod_args { 2779 char *path; 2780 int mode; 2781}; 2782#endif 2783int 2784sys_chmod(td, uap) 2785 struct thread *td; 2786 register struct chmod_args /* { 2787 char *path; 2788 int mode; 2789 } */ *uap; 2790{ 2791 2792 return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode)); 2793} 2794 2795#ifndef _SYS_SYSPROTO_H_ 2796struct fchmodat_args { 2797 int dirfd; 2798 char *path; 2799 mode_t mode; 2800 int flag; 2801} 2802#endif 2803int 2804sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2805{ 2806 int flag = uap->flag; 2807 int fd = uap->fd; 2808 char *path = uap->path; 2809 mode_t mode = uap->mode; 2810 2811 if (flag & ~AT_SYMLINK_NOFOLLOW) 2812 return (EINVAL); 2813 2814 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2815} 2816 2817int 2818kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode) 2819{ 2820 2821 return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0)); 2822} 2823 2824/* 2825 * Change mode of a file given path name (don't follow links.) 2826 */ 2827#ifndef _SYS_SYSPROTO_H_ 2828struct lchmod_args { 2829 char *path; 2830 int mode; 2831}; 2832#endif 2833int 2834sys_lchmod(td, uap) 2835 struct thread *td; 2836 register struct lchmod_args /* { 2837 char *path; 2838 int mode; 2839 } */ *uap; 2840{ 2841 2842 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2843 uap->mode, AT_SYMLINK_NOFOLLOW)); 2844} 2845 2846int 2847kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2848 mode_t mode, int flag) 2849{ 2850 struct nameidata nd; 2851 cap_rights_t rights; 2852 int error, follow; 2853 2854 AUDIT_ARG_MODE(mode); 2855 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2856 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2857 cap_rights_init(&rights, CAP_FCHMOD), td); 2858 if ((error = namei(&nd)) != 0) 2859 return (error); 2860 NDFREE(&nd, NDF_ONLY_PNBUF); 2861 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2862 vrele(nd.ni_vp); 2863 return (error); 2864} 2865 2866/* 2867 * Change mode of a file given a file descriptor. 2868 */ 2869#ifndef _SYS_SYSPROTO_H_ 2870struct fchmod_args { 2871 int fd; 2872 int mode; 2873}; 2874#endif 2875int 2876sys_fchmod(struct thread *td, struct fchmod_args *uap) 2877{ 2878 struct file *fp; 2879 cap_rights_t rights; 2880 int error; 2881 2882 AUDIT_ARG_FD(uap->fd); 2883 AUDIT_ARG_MODE(uap->mode); 2884 2885 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2886 if (error != 0) 2887 return (error); 2888 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2889 fdrop(fp, td); 2890 return (error); 2891} 2892 2893/* 2894 * Common implementation for chown(), lchown(), and fchown() 2895 */ 2896int 2897setfown(td, cred, vp, uid, gid) 2898 struct thread *td; 2899 struct ucred *cred; 2900 struct vnode *vp; 2901 uid_t uid; 2902 gid_t gid; 2903{ 2904 struct mount *mp; 2905 struct vattr vattr; 2906 int error; 2907 2908 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2909 return (error); 2910 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2911 VATTR_NULL(&vattr); 2912 vattr.va_uid = uid; 2913 vattr.va_gid = gid; 2914#ifdef MAC 2915 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2916 vattr.va_gid); 2917 if (error == 0) 2918#endif 2919 error = VOP_SETATTR(vp, &vattr, cred); 2920 VOP_UNLOCK(vp, 0); 2921 vn_finished_write(mp); 2922 return (error); 2923} 2924 2925/* 2926 * Set ownership given a path name. 2927 */ 2928#ifndef _SYS_SYSPROTO_H_ 2929struct chown_args { 2930 char *path; 2931 int uid; 2932 int gid; 2933}; 2934#endif 2935int 2936sys_chown(td, uap) 2937 struct thread *td; 2938 register struct chown_args /* { 2939 char *path; 2940 int uid; 2941 int gid; 2942 } */ *uap; 2943{ 2944 2945 return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 2946} 2947 2948#ifndef _SYS_SYSPROTO_H_ 2949struct fchownat_args { 2950 int fd; 2951 const char * path; 2952 uid_t uid; 2953 gid_t gid; 2954 int flag; 2955}; 2956#endif 2957int 2958sys_fchownat(struct thread *td, struct fchownat_args *uap) 2959{ 2960 int flag; 2961 2962 flag = uap->flag; 2963 if (flag & ~AT_SYMLINK_NOFOLLOW) 2964 return (EINVAL); 2965 2966 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2967 uap->gid, uap->flag)); 2968} 2969 2970int 2971kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 2972 int gid) 2973{ 2974 2975 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0)); 2976} 2977 2978int 2979kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2980 int uid, int gid, int flag) 2981{ 2982 struct nameidata nd; 2983 cap_rights_t rights; 2984 int error, follow; 2985 2986 AUDIT_ARG_OWNER(uid, gid); 2987 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2988 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2989 cap_rights_init(&rights, CAP_FCHOWN), td); 2990 2991 if ((error = namei(&nd)) != 0) 2992 return (error); 2993 NDFREE(&nd, NDF_ONLY_PNBUF); 2994 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2995 vrele(nd.ni_vp); 2996 return (error); 2997} 2998 2999/* 3000 * Set ownership given a path name, do not cross symlinks. 3001 */ 3002#ifndef _SYS_SYSPROTO_H_ 3003struct lchown_args { 3004 char *path; 3005 int uid; 3006 int gid; 3007}; 3008#endif 3009int 3010sys_lchown(td, uap) 3011 struct thread *td; 3012 register struct lchown_args /* { 3013 char *path; 3014 int uid; 3015 int gid; 3016 } */ *uap; 3017{ 3018 3019 return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 3020} 3021 3022int 3023kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 3024 int gid) 3025{ 3026 3027 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 3028 AT_SYMLINK_NOFOLLOW)); 3029} 3030 3031/* 3032 * Set ownership given a file descriptor. 3033 */ 3034#ifndef _SYS_SYSPROTO_H_ 3035struct fchown_args { 3036 int fd; 3037 int uid; 3038 int gid; 3039}; 3040#endif 3041int 3042sys_fchown(td, uap) 3043 struct thread *td; 3044 register struct fchown_args /* { 3045 int fd; 3046 int uid; 3047 int gid; 3048 } */ *uap; 3049{ 3050 struct file *fp; 3051 cap_rights_t rights; 3052 int error; 3053 3054 AUDIT_ARG_FD(uap->fd); 3055 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3056 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 3057 if (error != 0) 3058 return (error); 3059 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3060 fdrop(fp, td); 3061 return (error); 3062} 3063 3064/* 3065 * Common implementation code for utimes(), lutimes(), and futimes(). 3066 */ 3067static int 3068getutimes(usrtvp, tvpseg, tsp) 3069 const struct timeval *usrtvp; 3070 enum uio_seg tvpseg; 3071 struct timespec *tsp; 3072{ 3073 struct timeval tv[2]; 3074 const struct timeval *tvp; 3075 int error; 3076 3077 if (usrtvp == NULL) { 3078 vfs_timestamp(&tsp[0]); 3079 tsp[1] = tsp[0]; 3080 } else { 3081 if (tvpseg == UIO_SYSSPACE) { 3082 tvp = usrtvp; 3083 } else { 3084 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3085 return (error); 3086 tvp = tv; 3087 } 3088 3089 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3090 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3091 return (EINVAL); 3092 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3093 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3094 } 3095 return (0); 3096} 3097 3098/* 3099 * Common implementation code for utimes(), lutimes(), and futimes(). 3100 */ 3101static int 3102setutimes(td, vp, ts, numtimes, nullflag) 3103 struct thread *td; 3104 struct vnode *vp; 3105 const struct timespec *ts; 3106 int numtimes; 3107 int nullflag; 3108{ 3109 struct mount *mp; 3110 struct vattr vattr; 3111 int error, setbirthtime; 3112 3113 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3114 return (error); 3115 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3116 setbirthtime = 0; 3117 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3118 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3119 setbirthtime = 1; 3120 VATTR_NULL(&vattr); 3121 vattr.va_atime = ts[0]; 3122 vattr.va_mtime = ts[1]; 3123 if (setbirthtime) 3124 vattr.va_birthtime = ts[1]; 3125 if (numtimes > 2) 3126 vattr.va_birthtime = ts[2]; 3127 if (nullflag) 3128 vattr.va_vaflags |= VA_UTIMES_NULL; 3129#ifdef MAC 3130 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3131 vattr.va_mtime); 3132#endif 3133 if (error == 0) 3134 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3135 VOP_UNLOCK(vp, 0); 3136 vn_finished_write(mp); 3137 return (error); 3138} 3139 3140/* 3141 * Set the access and modification times of a file. 3142 */ 3143#ifndef _SYS_SYSPROTO_H_ 3144struct utimes_args { 3145 char *path; 3146 struct timeval *tptr; 3147}; 3148#endif 3149int 3150sys_utimes(td, uap) 3151 struct thread *td; 3152 register struct utimes_args /* { 3153 char *path; 3154 struct timeval *tptr; 3155 } */ *uap; 3156{ 3157 3158 return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3159 UIO_USERSPACE)); 3160} 3161 3162#ifndef _SYS_SYSPROTO_H_ 3163struct futimesat_args { 3164 int fd; 3165 const char * path; 3166 const struct timeval * times; 3167}; 3168#endif 3169int 3170sys_futimesat(struct thread *td, struct futimesat_args *uap) 3171{ 3172 3173 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3174 uap->times, UIO_USERSPACE)); 3175} 3176 3177int 3178kern_utimes(struct thread *td, char *path, enum uio_seg pathseg, 3179 struct timeval *tptr, enum uio_seg tptrseg) 3180{ 3181 3182 return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg)); 3183} 3184 3185int 3186kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3187 struct timeval *tptr, enum uio_seg tptrseg) 3188{ 3189 struct nameidata nd; 3190 struct timespec ts[2]; 3191 cap_rights_t rights; 3192 int error; 3193 3194 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3195 return (error); 3196 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3197 cap_rights_init(&rights, CAP_FUTIMES), td); 3198 3199 if ((error = namei(&nd)) != 0) 3200 return (error); 3201 NDFREE(&nd, NDF_ONLY_PNBUF); 3202 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3203 vrele(nd.ni_vp); 3204 return (error); 3205} 3206 3207/* 3208 * Set the access and modification times of a file. 3209 */ 3210#ifndef _SYS_SYSPROTO_H_ 3211struct lutimes_args { 3212 char *path; 3213 struct timeval *tptr; 3214}; 3215#endif 3216int 3217sys_lutimes(td, uap) 3218 struct thread *td; 3219 register struct lutimes_args /* { 3220 char *path; 3221 struct timeval *tptr; 3222 } */ *uap; 3223{ 3224 3225 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3226 UIO_USERSPACE)); 3227} 3228 3229int 3230kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3231 struct timeval *tptr, enum uio_seg tptrseg) 3232{ 3233 struct timespec ts[2]; 3234 struct nameidata nd; 3235 int error; 3236 3237 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3238 return (error); 3239 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3240 if ((error = namei(&nd)) != 0) 3241 return (error); 3242 NDFREE(&nd, NDF_ONLY_PNBUF); 3243 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3244 vrele(nd.ni_vp); 3245 return (error); 3246} 3247 3248/* 3249 * Set the access and modification times of a file. 3250 */ 3251#ifndef _SYS_SYSPROTO_H_ 3252struct futimes_args { 3253 int fd; 3254 struct timeval *tptr; 3255}; 3256#endif 3257int 3258sys_futimes(td, uap) 3259 struct thread *td; 3260 register struct futimes_args /* { 3261 int fd; 3262 struct timeval *tptr; 3263 } */ *uap; 3264{ 3265 3266 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3267} 3268 3269int 3270kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3271 enum uio_seg tptrseg) 3272{ 3273 struct timespec ts[2]; 3274 struct file *fp; 3275 cap_rights_t rights; 3276 int error; 3277 3278 AUDIT_ARG_FD(fd); 3279 error = getutimes(tptr, tptrseg, ts); 3280 if (error != 0) 3281 return (error); 3282 error = getvnode(td->td_proc->p_fd, fd, 3283 cap_rights_init(&rights, CAP_FUTIMES), &fp); 3284 if (error != 0) 3285 return (error); 3286#ifdef AUDIT 3287 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3288 AUDIT_ARG_VNODE1(fp->f_vnode); 3289 VOP_UNLOCK(fp->f_vnode, 0); 3290#endif 3291 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3292 fdrop(fp, td); 3293 return (error); 3294} 3295 3296/* 3297 * Truncate a file given its path name. 3298 */ 3299#ifndef _SYS_SYSPROTO_H_ 3300struct truncate_args { 3301 char *path; 3302 int pad; 3303 off_t length; 3304}; 3305#endif 3306int 3307sys_truncate(td, uap) 3308 struct thread *td; 3309 register struct truncate_args /* { 3310 char *path; 3311 int pad; 3312 off_t length; 3313 } */ *uap; 3314{ 3315 3316 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3317} 3318 3319int 3320kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3321{ 3322 struct mount *mp; 3323 struct vnode *vp; 3324 void *rl_cookie; 3325 struct vattr vattr; 3326 struct nameidata nd; 3327 int error; 3328 3329 if (length < 0) 3330 return(EINVAL); 3331 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3332 if ((error = namei(&nd)) != 0) 3333 return (error); 3334 vp = nd.ni_vp; 3335 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3336 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3337 vn_rangelock_unlock(vp, rl_cookie); 3338 vrele(vp); 3339 return (error); 3340 } 3341 NDFREE(&nd, NDF_ONLY_PNBUF); 3342 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3343 if (vp->v_type == VDIR) 3344 error = EISDIR; 3345#ifdef MAC 3346 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3347 } 3348#endif 3349 else if ((error = vn_writechk(vp)) == 0 && 3350 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3351 VATTR_NULL(&vattr); 3352 vattr.va_size = length; 3353 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3354 } 3355 VOP_UNLOCK(vp, 0); 3356 vn_finished_write(mp); 3357 vn_rangelock_unlock(vp, rl_cookie); 3358 vrele(vp); 3359 return (error); 3360} 3361 3362#if defined(COMPAT_43) 3363/* 3364 * Truncate a file given its path name. 3365 */ 3366#ifndef _SYS_SYSPROTO_H_ 3367struct otruncate_args { 3368 char *path; 3369 long length; 3370}; 3371#endif 3372int 3373otruncate(td, uap) 3374 struct thread *td; 3375 register struct otruncate_args /* { 3376 char *path; 3377 long length; 3378 } */ *uap; 3379{ 3380 struct truncate_args /* { 3381 char *path; 3382 int pad; 3383 off_t length; 3384 } */ nuap; 3385 3386 nuap.path = uap->path; 3387 nuap.length = uap->length; 3388 return (sys_truncate(td, &nuap)); 3389} 3390#endif /* COMPAT_43 */ 3391 3392/* Versions with the pad argument */ 3393int 3394freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3395{ 3396 struct truncate_args ouap; 3397 3398 ouap.path = uap->path; 3399 ouap.length = uap->length; 3400 return (sys_truncate(td, &ouap)); 3401} 3402 3403int 3404freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3405{ 3406 struct ftruncate_args ouap; 3407 3408 ouap.fd = uap->fd; 3409 ouap.length = uap->length; 3410 return (sys_ftruncate(td, &ouap)); 3411} 3412 3413/* 3414 * Sync an open file. 3415 */ 3416#ifndef _SYS_SYSPROTO_H_ 3417struct fsync_args { 3418 int fd; 3419}; 3420#endif 3421int 3422sys_fsync(td, uap) 3423 struct thread *td; 3424 struct fsync_args /* { 3425 int fd; 3426 } */ *uap; 3427{ 3428 struct vnode *vp; 3429 struct mount *mp; 3430 struct file *fp; 3431 cap_rights_t rights; 3432 int error, lock_flags; 3433 3434 AUDIT_ARG_FD(uap->fd); 3435 error = getvnode(td->td_proc->p_fd, uap->fd, 3436 cap_rights_init(&rights, CAP_FSYNC), &fp); 3437 if (error != 0) 3438 return (error); 3439 vp = fp->f_vnode; 3440 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3441 if (error != 0) 3442 goto drop; 3443 if (MNT_SHARED_WRITES(mp) || 3444 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3445 lock_flags = LK_SHARED; 3446 } else { 3447 lock_flags = LK_EXCLUSIVE; 3448 } 3449 vn_lock(vp, lock_flags | LK_RETRY); 3450 AUDIT_ARG_VNODE1(vp); 3451 if (vp->v_object != NULL) { 3452 VM_OBJECT_WLOCK(vp->v_object); 3453 vm_object_page_clean(vp->v_object, 0, 0, 0); 3454 VM_OBJECT_WUNLOCK(vp->v_object); 3455 } 3456 error = VOP_FSYNC(vp, MNT_WAIT, td); 3457 3458 VOP_UNLOCK(vp, 0); 3459 vn_finished_write(mp); 3460drop: 3461 fdrop(fp, td); 3462 return (error); 3463} 3464 3465/* 3466 * Rename files. Source and destination must either both be directories, or 3467 * both not be directories. If target is a directory, it must be empty. 3468 */ 3469#ifndef _SYS_SYSPROTO_H_ 3470struct rename_args { 3471 char *from; 3472 char *to; 3473}; 3474#endif 3475int 3476sys_rename(td, uap) 3477 struct thread *td; 3478 register struct rename_args /* { 3479 char *from; 3480 char *to; 3481 } */ *uap; 3482{ 3483 3484 return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE)); 3485} 3486 3487#ifndef _SYS_SYSPROTO_H_ 3488struct renameat_args { 3489 int oldfd; 3490 char *old; 3491 int newfd; 3492 char *new; 3493}; 3494#endif 3495int 3496sys_renameat(struct thread *td, struct renameat_args *uap) 3497{ 3498 3499 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3500 UIO_USERSPACE)); 3501} 3502 3503int 3504kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg) 3505{ 3506 3507 return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg)); 3508} 3509 3510int 3511kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3512 enum uio_seg pathseg) 3513{ 3514 struct mount *mp = NULL; 3515 struct vnode *tvp, *fvp, *tdvp; 3516 struct nameidata fromnd, tond; 3517 cap_rights_t rights; 3518 int error; 3519 3520 bwillwrite(); 3521#ifdef MAC 3522 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3523 AUDITVNODE1, pathseg, old, oldfd, 3524 cap_rights_init(&rights, CAP_RENAMEAT), td); 3525#else 3526 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3527 pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT), td); 3528#endif 3529 3530 if ((error = namei(&fromnd)) != 0) 3531 return (error); 3532#ifdef MAC 3533 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3534 fromnd.ni_vp, &fromnd.ni_cnd); 3535 VOP_UNLOCK(fromnd.ni_dvp, 0); 3536 if (fromnd.ni_dvp != fromnd.ni_vp) 3537 VOP_UNLOCK(fromnd.ni_vp, 0); 3538#endif 3539 fvp = fromnd.ni_vp; 3540 if (error == 0) 3541 error = vn_start_write(fvp, &mp, V_WAIT | PCATCH); 3542 if (error != 0) { 3543 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3544 vrele(fromnd.ni_dvp); 3545 vrele(fvp); 3546 goto out1; 3547 } 3548 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3549 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3550 cap_rights_init(&rights, CAP_LINKAT), td); 3551 if (fromnd.ni_vp->v_type == VDIR) 3552 tond.ni_cnd.cn_flags |= WILLBEDIR; 3553 if ((error = namei(&tond)) != 0) { 3554 /* Translate error code for rename("dir1", "dir2/."). */ 3555 if (error == EISDIR && fvp->v_type == VDIR) 3556 error = EINVAL; 3557 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3558 vrele(fromnd.ni_dvp); 3559 vrele(fvp); 3560 vn_finished_write(mp); 3561 goto out1; 3562 } 3563 tdvp = tond.ni_dvp; 3564 tvp = tond.ni_vp; 3565 if (tvp != NULL) { 3566 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3567 error = ENOTDIR; 3568 goto out; 3569 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3570 error = EISDIR; 3571 goto out; 3572 } 3573#ifdef CAPABILITIES 3574 if (newfd != AT_FDCWD) { 3575 /* 3576 * If the target already exists we require CAP_UNLINKAT 3577 * from 'newfd'. 3578 */ 3579 error = cap_check(&tond.ni_filecaps.fc_rights, 3580 cap_rights_init(&rights, CAP_UNLINKAT)); 3581 if (error != 0) 3582 goto out; 3583 } 3584#endif 3585 } 3586 if (fvp == tdvp) { 3587 error = EINVAL; 3588 goto out; 3589 } 3590 /* 3591 * If the source is the same as the destination (that is, if they 3592 * are links to the same vnode), then there is nothing to do. 3593 */ 3594 if (fvp == tvp) 3595 error = -1; 3596#ifdef MAC 3597 else 3598 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3599 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3600#endif 3601out: 3602 if (error == 0) { 3603 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3604 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3605 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3606 NDFREE(&tond, NDF_ONLY_PNBUF); 3607 } else { 3608 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3609 NDFREE(&tond, NDF_ONLY_PNBUF); 3610 if (tvp != NULL) 3611 vput(tvp); 3612 if (tdvp == tvp) 3613 vrele(tdvp); 3614 else 3615 vput(tdvp); 3616 vrele(fromnd.ni_dvp); 3617 vrele(fvp); 3618 } 3619 vrele(tond.ni_startdir); 3620 vn_finished_write(mp); 3621out1: 3622 if (fromnd.ni_startdir) 3623 vrele(fromnd.ni_startdir); 3624 if (error == -1) 3625 return (0); 3626 return (error); 3627} 3628 3629/* 3630 * Make a directory file. 3631 */ 3632#ifndef _SYS_SYSPROTO_H_ 3633struct mkdir_args { 3634 char *path; 3635 int mode; 3636}; 3637#endif 3638int 3639sys_mkdir(td, uap) 3640 struct thread *td; 3641 register struct mkdir_args /* { 3642 char *path; 3643 int mode; 3644 } */ *uap; 3645{ 3646 3647 return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode)); 3648} 3649 3650#ifndef _SYS_SYSPROTO_H_ 3651struct mkdirat_args { 3652 int fd; 3653 char *path; 3654 mode_t mode; 3655}; 3656#endif 3657int 3658sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3659{ 3660 3661 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3662} 3663 3664int 3665kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode) 3666{ 3667 3668 return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode)); 3669} 3670 3671int 3672kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3673 int mode) 3674{ 3675 struct mount *mp; 3676 struct vnode *vp; 3677 struct vattr vattr; 3678 struct nameidata nd; 3679 cap_rights_t rights; 3680 int error; 3681 3682 AUDIT_ARG_MODE(mode); 3683restart: 3684 bwillwrite(); 3685 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 3686 segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), td); 3687 nd.ni_cnd.cn_flags |= WILLBEDIR; 3688 if ((error = namei(&nd)) != 0) 3689 return (error); 3690 vp = nd.ni_vp; 3691 if (vp != NULL) { 3692 NDFREE(&nd, NDF_ONLY_PNBUF); 3693 /* 3694 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3695 * the strange behaviour of leaving the vnode unlocked 3696 * if the target is the same vnode as the parent. 3697 */ 3698 if (vp == nd.ni_dvp) 3699 vrele(nd.ni_dvp); 3700 else 3701 vput(nd.ni_dvp); 3702 vrele(vp); 3703 return (EEXIST); 3704 } 3705 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3706 NDFREE(&nd, NDF_ONLY_PNBUF); 3707 vput(nd.ni_dvp); 3708 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3709 return (error); 3710 goto restart; 3711 } 3712 VATTR_NULL(&vattr); 3713 vattr.va_type = VDIR; 3714 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3715#ifdef MAC 3716 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3717 &vattr); 3718 if (error != 0) 3719 goto out; 3720#endif 3721 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3722#ifdef MAC 3723out: 3724#endif 3725 NDFREE(&nd, NDF_ONLY_PNBUF); 3726 vput(nd.ni_dvp); 3727 if (error == 0) 3728 vput(nd.ni_vp); 3729 vn_finished_write(mp); 3730 return (error); 3731} 3732 3733/* 3734 * Remove a directory file. 3735 */ 3736#ifndef _SYS_SYSPROTO_H_ 3737struct rmdir_args { 3738 char *path; 3739}; 3740#endif 3741int 3742sys_rmdir(td, uap) 3743 struct thread *td; 3744 struct rmdir_args /* { 3745 char *path; 3746 } */ *uap; 3747{ 3748 3749 return (kern_rmdir(td, uap->path, UIO_USERSPACE)); 3750} 3751 3752int 3753kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg) 3754{ 3755 3756 return (kern_rmdirat(td, AT_FDCWD, path, pathseg)); 3757} 3758 3759int 3760kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3761{ 3762 struct mount *mp; 3763 struct vnode *vp; 3764 struct nameidata nd; 3765 cap_rights_t rights; 3766 int error; 3767 3768restart: 3769 bwillwrite(); 3770 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3771 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3772 if ((error = namei(&nd)) != 0) 3773 return (error); 3774 vp = nd.ni_vp; 3775 if (vp->v_type != VDIR) { 3776 error = ENOTDIR; 3777 goto out; 3778 } 3779 /* 3780 * No rmdir "." please. 3781 */ 3782 if (nd.ni_dvp == vp) { 3783 error = EINVAL; 3784 goto out; 3785 } 3786 /* 3787 * The root of a mounted filesystem cannot be deleted. 3788 */ 3789 if (vp->v_vflag & VV_ROOT) { 3790 error = EBUSY; 3791 goto out; 3792 } 3793#ifdef MAC 3794 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3795 &nd.ni_cnd); 3796 if (error != 0) 3797 goto out; 3798#endif 3799 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3800 NDFREE(&nd, NDF_ONLY_PNBUF); 3801 vput(vp); 3802 if (nd.ni_dvp == vp) 3803 vrele(nd.ni_dvp); 3804 else 3805 vput(nd.ni_dvp); 3806 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3807 return (error); 3808 goto restart; 3809 } 3810 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3811 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3812 vn_finished_write(mp); 3813out: 3814 NDFREE(&nd, NDF_ONLY_PNBUF); 3815 vput(vp); 3816 if (nd.ni_dvp == vp) 3817 vrele(nd.ni_dvp); 3818 else 3819 vput(nd.ni_dvp); 3820 return (error); 3821} 3822 3823#ifdef COMPAT_43 3824/* 3825 * Read a block of directory entries in a filesystem independent format. 3826 */ 3827#ifndef _SYS_SYSPROTO_H_ 3828struct ogetdirentries_args { 3829 int fd; 3830 char *buf; 3831 u_int count; 3832 long *basep; 3833}; 3834#endif 3835int 3836ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3837{ 3838 long loff; 3839 int error; 3840 3841 error = kern_ogetdirentries(td, uap, &loff); 3842 if (error == 0) 3843 error = copyout(&loff, uap->basep, sizeof(long)); 3844 return (error); 3845} 3846 3847int 3848kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3849 long *ploff) 3850{ 3851 struct vnode *vp; 3852 struct file *fp; 3853 struct uio auio, kuio; 3854 struct iovec aiov, kiov; 3855 struct dirent *dp, *edp; 3856 cap_rights_t rights; 3857 caddr_t dirbuf; 3858 int error, eofflag, readcnt; 3859 long loff; 3860 off_t foffset; 3861 3862 /* XXX arbitrary sanity limit on `count'. */ 3863 if (uap->count > 64 * 1024) 3864 return (EINVAL); 3865 error = getvnode(td->td_proc->p_fd, uap->fd, 3866 cap_rights_init(&rights, CAP_READ), &fp); 3867 if (error != 0) 3868 return (error); 3869 if ((fp->f_flag & FREAD) == 0) { 3870 fdrop(fp, td); 3871 return (EBADF); 3872 } 3873 vp = fp->f_vnode; 3874 foffset = foffset_lock(fp, 0); 3875unionread: 3876 if (vp->v_type != VDIR) { 3877 foffset_unlock(fp, foffset, 0); 3878 fdrop(fp, td); 3879 return (EINVAL); 3880 } 3881 aiov.iov_base = uap->buf; 3882 aiov.iov_len = uap->count; 3883 auio.uio_iov = &aiov; 3884 auio.uio_iovcnt = 1; 3885 auio.uio_rw = UIO_READ; 3886 auio.uio_segflg = UIO_USERSPACE; 3887 auio.uio_td = td; 3888 auio.uio_resid = uap->count; 3889 vn_lock(vp, LK_SHARED | LK_RETRY); 3890 loff = auio.uio_offset = foffset; 3891#ifdef MAC 3892 error = mac_vnode_check_readdir(td->td_ucred, vp); 3893 if (error != 0) { 3894 VOP_UNLOCK(vp, 0); 3895 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3896 fdrop(fp, td); 3897 return (error); 3898 } 3899#endif 3900# if (BYTE_ORDER != LITTLE_ENDIAN) 3901 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3902 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3903 NULL, NULL); 3904 foffset = auio.uio_offset; 3905 } else 3906# endif 3907 { 3908 kuio = auio; 3909 kuio.uio_iov = &kiov; 3910 kuio.uio_segflg = UIO_SYSSPACE; 3911 kiov.iov_len = uap->count; 3912 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3913 kiov.iov_base = dirbuf; 3914 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3915 NULL, NULL); 3916 foffset = kuio.uio_offset; 3917 if (error == 0) { 3918 readcnt = uap->count - kuio.uio_resid; 3919 edp = (struct dirent *)&dirbuf[readcnt]; 3920 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3921# if (BYTE_ORDER == LITTLE_ENDIAN) 3922 /* 3923 * The expected low byte of 3924 * dp->d_namlen is our dp->d_type. 3925 * The high MBZ byte of dp->d_namlen 3926 * is our dp->d_namlen. 3927 */ 3928 dp->d_type = dp->d_namlen; 3929 dp->d_namlen = 0; 3930# else 3931 /* 3932 * The dp->d_type is the high byte 3933 * of the expected dp->d_namlen, 3934 * so must be zero'ed. 3935 */ 3936 dp->d_type = 0; 3937# endif 3938 if (dp->d_reclen > 0) { 3939 dp = (struct dirent *) 3940 ((char *)dp + dp->d_reclen); 3941 } else { 3942 error = EIO; 3943 break; 3944 } 3945 } 3946 if (dp >= edp) 3947 error = uiomove(dirbuf, readcnt, &auio); 3948 } 3949 free(dirbuf, M_TEMP); 3950 } 3951 if (error != 0) { 3952 VOP_UNLOCK(vp, 0); 3953 foffset_unlock(fp, foffset, 0); 3954 fdrop(fp, td); 3955 return (error); 3956 } 3957 if (uap->count == auio.uio_resid && 3958 (vp->v_vflag & VV_ROOT) && 3959 (vp->v_mount->mnt_flag & MNT_UNION)) { 3960 struct vnode *tvp = vp; 3961 vp = vp->v_mount->mnt_vnodecovered; 3962 VREF(vp); 3963 fp->f_vnode = vp; 3964 fp->f_data = vp; 3965 foffset = 0; 3966 vput(tvp); 3967 goto unionread; 3968 } 3969 VOP_UNLOCK(vp, 0); 3970 foffset_unlock(fp, foffset, 0); 3971 fdrop(fp, td); 3972 td->td_retval[0] = uap->count - auio.uio_resid; 3973 if (error == 0) 3974 *ploff = loff; 3975 return (error); 3976} 3977#endif /* COMPAT_43 */ 3978 3979/* 3980 * Read a block of directory entries in a filesystem independent format. 3981 */ 3982#ifndef _SYS_SYSPROTO_H_ 3983struct getdirentries_args { 3984 int fd; 3985 char *buf; 3986 u_int count; 3987 long *basep; 3988}; 3989#endif 3990int 3991sys_getdirentries(td, uap) 3992 struct thread *td; 3993 register struct getdirentries_args /* { 3994 int fd; 3995 char *buf; 3996 u_int count; 3997 long *basep; 3998 } */ *uap; 3999{ 4000 long base; 4001 int error; 4002 4003 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4004 NULL, UIO_USERSPACE); 4005 if (error != 0) 4006 return (error); 4007 if (uap->basep != NULL) 4008 error = copyout(&base, uap->basep, sizeof(long)); 4009 return (error); 4010} 4011 4012int 4013kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 4014 long *basep, ssize_t *residp, enum uio_seg bufseg) 4015{ 4016 struct vnode *vp; 4017 struct file *fp; 4018 struct uio auio; 4019 struct iovec aiov; 4020 cap_rights_t rights; 4021 long loff; 4022 int error, eofflag; 4023 off_t foffset; 4024 4025 AUDIT_ARG_FD(fd); 4026 if (count > IOSIZE_MAX) 4027 return (EINVAL); 4028 auio.uio_resid = count; 4029 error = getvnode(td->td_proc->p_fd, fd, 4030 cap_rights_init(&rights, CAP_READ), &fp); 4031 if (error != 0) 4032 return (error); 4033 if ((fp->f_flag & FREAD) == 0) { 4034 fdrop(fp, td); 4035 return (EBADF); 4036 } 4037 vp = fp->f_vnode; 4038 foffset = foffset_lock(fp, 0); 4039unionread: 4040 if (vp->v_type != VDIR) { 4041 error = EINVAL; 4042 goto fail; 4043 } 4044 aiov.iov_base = buf; 4045 aiov.iov_len = count; 4046 auio.uio_iov = &aiov; 4047 auio.uio_iovcnt = 1; 4048 auio.uio_rw = UIO_READ; 4049 auio.uio_segflg = bufseg; 4050 auio.uio_td = td; 4051 vn_lock(vp, LK_SHARED | LK_RETRY); 4052 AUDIT_ARG_VNODE1(vp); 4053 loff = auio.uio_offset = foffset; 4054#ifdef MAC 4055 error = mac_vnode_check_readdir(td->td_ucred, vp); 4056 if (error == 0) 4057#endif 4058 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4059 NULL); 4060 foffset = auio.uio_offset; 4061 if (error != 0) { 4062 VOP_UNLOCK(vp, 0); 4063 goto fail; 4064 } 4065 if (count == auio.uio_resid && 4066 (vp->v_vflag & VV_ROOT) && 4067 (vp->v_mount->mnt_flag & MNT_UNION)) { 4068 struct vnode *tvp = vp; 4069 4070 vp = vp->v_mount->mnt_vnodecovered; 4071 VREF(vp); 4072 fp->f_vnode = vp; 4073 fp->f_data = vp; 4074 foffset = 0; 4075 vput(tvp); 4076 goto unionread; 4077 } 4078 VOP_UNLOCK(vp, 0); 4079 *basep = loff; 4080 if (residp != NULL) 4081 *residp = auio.uio_resid; 4082 td->td_retval[0] = count - auio.uio_resid; 4083fail: 4084 foffset_unlock(fp, foffset, 0); 4085 fdrop(fp, td); 4086 return (error); 4087} 4088 4089#ifndef _SYS_SYSPROTO_H_ 4090struct getdents_args { 4091 int fd; 4092 char *buf; 4093 size_t count; 4094}; 4095#endif 4096int 4097sys_getdents(td, uap) 4098 struct thread *td; 4099 register struct getdents_args /* { 4100 int fd; 4101 char *buf; 4102 u_int count; 4103 } */ *uap; 4104{ 4105 struct getdirentries_args ap; 4106 4107 ap.fd = uap->fd; 4108 ap.buf = uap->buf; 4109 ap.count = uap->count; 4110 ap.basep = NULL; 4111 return (sys_getdirentries(td, &ap)); 4112} 4113 4114/* 4115 * Set the mode mask for creation of filesystem nodes. 4116 */ 4117#ifndef _SYS_SYSPROTO_H_ 4118struct umask_args { 4119 int newmask; 4120}; 4121#endif 4122int 4123sys_umask(td, uap) 4124 struct thread *td; 4125 struct umask_args /* { 4126 int newmask; 4127 } */ *uap; 4128{ 4129 register struct filedesc *fdp; 4130 4131 FILEDESC_XLOCK(td->td_proc->p_fd); 4132 fdp = td->td_proc->p_fd; 4133 td->td_retval[0] = fdp->fd_cmask; 4134 fdp->fd_cmask = uap->newmask & ALLPERMS; 4135 FILEDESC_XUNLOCK(td->td_proc->p_fd); 4136 return (0); 4137} 4138 4139/* 4140 * Void all references to file by ripping underlying filesystem away from 4141 * vnode. 4142 */ 4143#ifndef _SYS_SYSPROTO_H_ 4144struct revoke_args { 4145 char *path; 4146}; 4147#endif 4148int 4149sys_revoke(td, uap) 4150 struct thread *td; 4151 register struct revoke_args /* { 4152 char *path; 4153 } */ *uap; 4154{ 4155 struct vnode *vp; 4156 struct vattr vattr; 4157 struct nameidata nd; 4158 int error; 4159 4160 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4161 uap->path, td); 4162 if ((error = namei(&nd)) != 0) 4163 return (error); 4164 vp = nd.ni_vp; 4165 NDFREE(&nd, NDF_ONLY_PNBUF); 4166 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4167 error = EINVAL; 4168 goto out; 4169 } 4170#ifdef MAC 4171 error = mac_vnode_check_revoke(td->td_ucred, vp); 4172 if (error != 0) 4173 goto out; 4174#endif 4175 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4176 if (error != 0) 4177 goto out; 4178 if (td->td_ucred->cr_uid != vattr.va_uid) { 4179 error = priv_check(td, PRIV_VFS_ADMIN); 4180 if (error != 0) 4181 goto out; 4182 } 4183 if (vcount(vp) > 1) 4184 VOP_REVOKE(vp, REVOKEALL); 4185out: 4186 vput(vp); 4187 return (error); 4188} 4189 4190/* 4191 * Convert a user file descriptor to a kernel file entry and check that, if it 4192 * is a capability, the correct rights are present. A reference on the file 4193 * entry is held upon returning. 4194 */ 4195int 4196getvnode(struct filedesc *fdp, int fd, cap_rights_t *rightsp, struct file **fpp) 4197{ 4198 struct file *fp; 4199 int error; 4200 4201 error = fget_unlocked(fdp, fd, rightsp, 0, &fp, NULL); 4202 if (error != 0) 4203 return (error); 4204 4205 /* 4206 * The file could be not of the vnode type, or it may be not 4207 * yet fully initialized, in which case the f_vnode pointer 4208 * may be set, but f_ops is still badfileops. E.g., 4209 * devfs_open() transiently create such situation to 4210 * facilitate csw d_fdopen(). 4211 * 4212 * Dupfdopen() handling in kern_openat() installs the 4213 * half-baked file into the process descriptor table, allowing 4214 * other thread to dereference it. Guard against the race by 4215 * checking f_ops. 4216 */ 4217 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4218 fdrop(fp, curthread); 4219 return (EINVAL); 4220 } 4221 *fpp = fp; 4222 return (0); 4223} 4224 4225 4226/* 4227 * Get an (NFS) file handle. 4228 */ 4229#ifndef _SYS_SYSPROTO_H_ 4230struct lgetfh_args { 4231 char *fname; 4232 fhandle_t *fhp; 4233}; 4234#endif 4235int 4236sys_lgetfh(td, uap) 4237 struct thread *td; 4238 register struct lgetfh_args *uap; 4239{ 4240 struct nameidata nd; 4241 fhandle_t fh; 4242 register struct vnode *vp; 4243 int error; 4244 4245 error = priv_check(td, PRIV_VFS_GETFH); 4246 if (error != 0) 4247 return (error); 4248 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4249 uap->fname, td); 4250 error = namei(&nd); 4251 if (error != 0) 4252 return (error); 4253 NDFREE(&nd, NDF_ONLY_PNBUF); 4254 vp = nd.ni_vp; 4255 bzero(&fh, sizeof(fh)); 4256 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4257 error = VOP_VPTOFH(vp, &fh.fh_fid); 4258 vput(vp); 4259 if (error == 0) 4260 error = copyout(&fh, uap->fhp, sizeof (fh)); 4261 return (error); 4262} 4263 4264#ifndef _SYS_SYSPROTO_H_ 4265struct getfh_args { 4266 char *fname; 4267 fhandle_t *fhp; 4268}; 4269#endif 4270int 4271sys_getfh(td, uap) 4272 struct thread *td; 4273 register struct getfh_args *uap; 4274{ 4275 struct nameidata nd; 4276 fhandle_t fh; 4277 register struct vnode *vp; 4278 int error; 4279 4280 error = priv_check(td, PRIV_VFS_GETFH); 4281 if (error != 0) 4282 return (error); 4283 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4284 uap->fname, td); 4285 error = namei(&nd); 4286 if (error != 0) 4287 return (error); 4288 NDFREE(&nd, NDF_ONLY_PNBUF); 4289 vp = nd.ni_vp; 4290 bzero(&fh, sizeof(fh)); 4291 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4292 error = VOP_VPTOFH(vp, &fh.fh_fid); 4293 vput(vp); 4294 if (error == 0) 4295 error = copyout(&fh, uap->fhp, sizeof (fh)); 4296 return (error); 4297} 4298 4299/* 4300 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4301 * open descriptor. 4302 * 4303 * warning: do not remove the priv_check() call or this becomes one giant 4304 * security hole. 4305 */ 4306#ifndef _SYS_SYSPROTO_H_ 4307struct fhopen_args { 4308 const struct fhandle *u_fhp; 4309 int flags; 4310}; 4311#endif 4312int 4313sys_fhopen(td, uap) 4314 struct thread *td; 4315 struct fhopen_args /* { 4316 const struct fhandle *u_fhp; 4317 int flags; 4318 } */ *uap; 4319{ 4320 struct mount *mp; 4321 struct vnode *vp; 4322 struct fhandle fhp; 4323 struct file *fp; 4324 int fmode, error; 4325 int indx; 4326 4327 error = priv_check(td, PRIV_VFS_FHOPEN); 4328 if (error != 0) 4329 return (error); 4330 indx = -1; 4331 fmode = FFLAGS(uap->flags); 4332 /* why not allow a non-read/write open for our lockd? */ 4333 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4334 return (EINVAL); 4335 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4336 if (error != 0) 4337 return(error); 4338 /* find the mount point */ 4339 mp = vfs_busyfs(&fhp.fh_fsid); 4340 if (mp == NULL) 4341 return (ESTALE); 4342 /* now give me my vnode, it gets returned to me locked */ 4343 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4344 vfs_unbusy(mp); 4345 if (error != 0) 4346 return (error); 4347 4348 error = falloc_noinstall(td, &fp); 4349 if (error != 0) { 4350 vput(vp); 4351 return (error); 4352 } 4353 /* 4354 * An extra reference on `fp' has been held for us by 4355 * falloc_noinstall(). 4356 */ 4357 4358#ifdef INVARIANTS 4359 td->td_dupfd = -1; 4360#endif 4361 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4362 if (error != 0) { 4363 KASSERT(fp->f_ops == &badfileops, 4364 ("VOP_OPEN in fhopen() set f_ops")); 4365 KASSERT(td->td_dupfd < 0, 4366 ("fhopen() encountered fdopen()")); 4367 4368 vput(vp); 4369 goto bad; 4370 } 4371#ifdef INVARIANTS 4372 td->td_dupfd = 0; 4373#endif 4374 fp->f_vnode = vp; 4375 fp->f_seqcount = 1; 4376 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4377 &vnops); 4378 VOP_UNLOCK(vp, 0); 4379 if ((fmode & O_TRUNC) != 0) { 4380 error = fo_truncate(fp, 0, td->td_ucred, td); 4381 if (error != 0) 4382 goto bad; 4383 } 4384 4385 error = finstall(td, fp, &indx, fmode, NULL); 4386bad: 4387 fdrop(fp, td); 4388 td->td_retval[0] = indx; 4389 return (error); 4390} 4391 4392/* 4393 * Stat an (NFS) file handle. 4394 */ 4395#ifndef _SYS_SYSPROTO_H_ 4396struct fhstat_args { 4397 struct fhandle *u_fhp; 4398 struct stat *sb; 4399}; 4400#endif 4401int 4402sys_fhstat(td, uap) 4403 struct thread *td; 4404 register struct fhstat_args /* { 4405 struct fhandle *u_fhp; 4406 struct stat *sb; 4407 } */ *uap; 4408{ 4409 struct stat sb; 4410 struct fhandle fh; 4411 int error; 4412 4413 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4414 if (error != 0) 4415 return (error); 4416 error = kern_fhstat(td, fh, &sb); 4417 if (error == 0) 4418 error = copyout(&sb, uap->sb, sizeof(sb)); 4419 return (error); 4420} 4421 4422int 4423kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4424{ 4425 struct mount *mp; 4426 struct vnode *vp; 4427 int error; 4428 4429 error = priv_check(td, PRIV_VFS_FHSTAT); 4430 if (error != 0) 4431 return (error); 4432 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4433 return (ESTALE); 4434 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4435 vfs_unbusy(mp); 4436 if (error != 0) 4437 return (error); 4438 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4439 vput(vp); 4440 return (error); 4441} 4442 4443/* 4444 * Implement fstatfs() for (NFS) file handles. 4445 */ 4446#ifndef _SYS_SYSPROTO_H_ 4447struct fhstatfs_args { 4448 struct fhandle *u_fhp; 4449 struct statfs *buf; 4450}; 4451#endif 4452int 4453sys_fhstatfs(td, uap) 4454 struct thread *td; 4455 struct fhstatfs_args /* { 4456 struct fhandle *u_fhp; 4457 struct statfs *buf; 4458 } */ *uap; 4459{ 4460 struct statfs sf; 4461 fhandle_t fh; 4462 int error; 4463 4464 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4465 if (error != 0) 4466 return (error); 4467 error = kern_fhstatfs(td, fh, &sf); 4468 if (error != 0) 4469 return (error); 4470 return (copyout(&sf, uap->buf, sizeof(sf))); 4471} 4472 4473int 4474kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4475{ 4476 struct statfs *sp; 4477 struct mount *mp; 4478 struct vnode *vp; 4479 int error; 4480 4481 error = priv_check(td, PRIV_VFS_FHSTATFS); 4482 if (error != 0) 4483 return (error); 4484 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4485 return (ESTALE); 4486 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4487 if (error != 0) { 4488 vfs_unbusy(mp); 4489 return (error); 4490 } 4491 vput(vp); 4492 error = prison_canseemount(td->td_ucred, mp); 4493 if (error != 0) 4494 goto out; 4495#ifdef MAC 4496 error = mac_mount_check_stat(td->td_ucred, mp); 4497 if (error != 0) 4498 goto out; 4499#endif 4500 /* 4501 * Set these in case the underlying filesystem fails to do so. 4502 */ 4503 sp = &mp->mnt_stat; 4504 sp->f_version = STATFS_VERSION; 4505 sp->f_namemax = NAME_MAX; 4506 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4507 error = VFS_STATFS(mp, sp); 4508 if (error == 0) 4509 *buf = *sp; 4510out: 4511 vfs_unbusy(mp); 4512 return (error); 4513} 4514 4515int 4516kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4517{ 4518 struct file *fp; 4519 struct mount *mp; 4520 struct vnode *vp; 4521 cap_rights_t rights; 4522 off_t olen, ooffset; 4523 int error; 4524 4525 fp = NULL; 4526 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4527 if (error != 0) 4528 goto out; 4529 4530 switch (fp->f_type) { 4531 case DTYPE_VNODE: 4532 break; 4533 case DTYPE_PIPE: 4534 case DTYPE_FIFO: 4535 error = ESPIPE; 4536 goto out; 4537 default: 4538 error = ENODEV; 4539 goto out; 4540 } 4541 if ((fp->f_flag & FWRITE) == 0) { 4542 error = EBADF; 4543 goto out; 4544 } 4545 vp = fp->f_vnode; 4546 if (vp->v_type != VREG) { 4547 error = ENODEV; 4548 goto out; 4549 } 4550 if (offset < 0 || len <= 0) { 4551 error = EINVAL; 4552 goto out; 4553 } 4554 /* Check for wrap. */ 4555 if (offset > OFF_MAX - len) { 4556 error = EFBIG; 4557 goto out; 4558 } 4559 4560 /* Allocating blocks may take a long time, so iterate. */ 4561 for (;;) { 4562 olen = len; 4563 ooffset = offset; 4564 4565 bwillwrite(); 4566 mp = NULL; 4567 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4568 if (error != 0) 4569 break; 4570 error = vn_lock(vp, LK_EXCLUSIVE); 4571 if (error != 0) { 4572 vn_finished_write(mp); 4573 break; 4574 } 4575#ifdef MAC 4576 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4577 if (error == 0) 4578#endif 4579 error = VOP_ALLOCATE(vp, &offset, &len); 4580 VOP_UNLOCK(vp, 0); 4581 vn_finished_write(mp); 4582 4583 if (olen + ooffset != offset + len) { 4584 panic("offset + len changed from %jx/%jx to %jx/%jx", 4585 ooffset, olen, offset, len); 4586 } 4587 if (error != 0 || len == 0) 4588 break; 4589 KASSERT(olen > len, ("Iteration did not make progress?")); 4590 maybe_yield(); 4591 } 4592 out: 4593 if (fp != NULL) 4594 fdrop(fp, td); 4595 return (error); 4596} 4597 4598int 4599sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4600{ 4601 4602 td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset, 4603 uap->len); 4604 return (0); 4605} 4606 4607/* 4608 * Unlike madvise(2), we do not make a best effort to remember every 4609 * possible caching hint. Instead, we remember the last setting with 4610 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4611 * region of any current setting. 4612 */ 4613int 4614kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4615 int advice) 4616{ 4617 struct fadvise_info *fa, *new; 4618 struct file *fp; 4619 struct vnode *vp; 4620 cap_rights_t rights; 4621 off_t end; 4622 int error; 4623 4624 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4625 return (EINVAL); 4626 switch (advice) { 4627 case POSIX_FADV_SEQUENTIAL: 4628 case POSIX_FADV_RANDOM: 4629 case POSIX_FADV_NOREUSE: 4630 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4631 break; 4632 case POSIX_FADV_NORMAL: 4633 case POSIX_FADV_WILLNEED: 4634 case POSIX_FADV_DONTNEED: 4635 new = NULL; 4636 break; 4637 default: 4638 return (EINVAL); 4639 } 4640 /* XXX: CAP_POSIX_FADVISE? */ 4641 error = fget(td, fd, cap_rights_init(&rights), &fp); 4642 if (error != 0) 4643 goto out; 4644 4645 switch (fp->f_type) { 4646 case DTYPE_VNODE: 4647 break; 4648 case DTYPE_PIPE: 4649 case DTYPE_FIFO: 4650 error = ESPIPE; 4651 goto out; 4652 default: 4653 error = ENODEV; 4654 goto out; 4655 } 4656 vp = fp->f_vnode; 4657 if (vp->v_type != VREG) { 4658 error = ENODEV; 4659 goto out; 4660 } 4661 if (len == 0) 4662 end = OFF_MAX; 4663 else 4664 end = offset + len - 1; 4665 switch (advice) { 4666 case POSIX_FADV_SEQUENTIAL: 4667 case POSIX_FADV_RANDOM: 4668 case POSIX_FADV_NOREUSE: 4669 /* 4670 * Try to merge any existing non-standard region with 4671 * this new region if possible, otherwise create a new 4672 * non-standard region for this request. 4673 */ 4674 mtx_pool_lock(mtxpool_sleep, fp); 4675 fa = fp->f_advice; 4676 if (fa != NULL && fa->fa_advice == advice && 4677 ((fa->fa_start <= end && fa->fa_end >= offset) || 4678 (end != OFF_MAX && fa->fa_start == end + 1) || 4679 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4680 if (offset < fa->fa_start) 4681 fa->fa_start = offset; 4682 if (end > fa->fa_end) 4683 fa->fa_end = end; 4684 } else { 4685 new->fa_advice = advice; 4686 new->fa_start = offset; 4687 new->fa_end = end; 4688 new->fa_prevstart = 0; 4689 new->fa_prevend = 0; 4690 fp->f_advice = new; 4691 new = fa; 4692 } 4693 mtx_pool_unlock(mtxpool_sleep, fp); 4694 break; 4695 case POSIX_FADV_NORMAL: 4696 /* 4697 * If a the "normal" region overlaps with an existing 4698 * non-standard region, trim or remove the 4699 * non-standard region. 4700 */ 4701 mtx_pool_lock(mtxpool_sleep, fp); 4702 fa = fp->f_advice; 4703 if (fa != NULL) { 4704 if (offset <= fa->fa_start && end >= fa->fa_end) { 4705 new = fa; 4706 fp->f_advice = NULL; 4707 } else if (offset <= fa->fa_start && 4708 end >= fa->fa_start) 4709 fa->fa_start = end + 1; 4710 else if (offset <= fa->fa_end && end >= fa->fa_end) 4711 fa->fa_end = offset - 1; 4712 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4713 /* 4714 * If the "normal" region is a middle 4715 * portion of the existing 4716 * non-standard region, just remove 4717 * the whole thing rather than picking 4718 * one side or the other to 4719 * preserve. 4720 */ 4721 new = fa; 4722 fp->f_advice = NULL; 4723 } 4724 } 4725 mtx_pool_unlock(mtxpool_sleep, fp); 4726 break; 4727 case POSIX_FADV_WILLNEED: 4728 case POSIX_FADV_DONTNEED: 4729 error = VOP_ADVISE(vp, offset, end, advice); 4730 break; 4731 } 4732out: 4733 if (fp != NULL) 4734 fdrop(fp, td); 4735 free(new, M_FADVISE); 4736 return (error); 4737} 4738 4739int 4740sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4741{ 4742 4743 td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset, 4744 uap->len, uap->advice); 4745 return (0); 4746} 4747