1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95 33 */ 34 35#include <sys/cdefs.h> 36__FBSDID("$FreeBSD$"); 37 38/* 39 * These functions support the macros and help fiddle mbuf chains for 40 * the nfs op functions. They do things like create the rpc header and 41 * copy data between mbuf chains and uio lists. 42 */ 43 44#include "opt_kdtrace.h" 45 46#include <sys/param.h> 47#include <sys/systm.h> 48#include <sys/kernel.h> 49#include <sys/bio.h> 50#include <sys/buf.h> 51#include <sys/proc.h> 52#include <sys/mount.h> 53#include <sys/vnode.h> 54#include <sys/namei.h> 55#include <sys/mbuf.h> 56#include <sys/socket.h> 57#include <sys/stat.h> 58#include <sys/malloc.h> 59#include <sys/rwlock.h> 60#include <sys/sysent.h> 61#include <sys/syscall.h> 62#include <sys/sysproto.h> 63#include <sys/taskqueue.h> 64 65#include <vm/vm.h> 66#include <vm/vm_object.h> 67#include <vm/vm_extern.h> 68#include <vm/uma.h> 69 70#include <nfs/nfsproto.h> 71#include <nfsclient/nfs.h> 72#include <nfsclient/nfsnode.h> 73#include <nfs/nfs_kdtrace.h> 74#include <nfs/xdr_subs.h> 75#include <nfsclient/nfsm_subs.h> 76#include <nfsclient/nfsmount.h> 77 78#include <netinet/in.h> 79 80/* 81 * Note that stdarg.h and the ANSI style va_start macro is used for both 82 * ANSI and traditional C compilers. 83 */ 84#include <machine/stdarg.h> 85 86#ifdef KDTRACE_HOOKS 87dtrace_nfsclient_attrcache_flush_probe_func_t 88 dtrace_nfsclient_attrcache_flush_done_probe; 89uint32_t nfsclient_attrcache_flush_done_id; 90 91dtrace_nfsclient_attrcache_get_hit_probe_func_t 92 dtrace_nfsclient_attrcache_get_hit_probe; 93uint32_t nfsclient_attrcache_get_hit_id; 94 95dtrace_nfsclient_attrcache_get_miss_probe_func_t 96 dtrace_nfsclient_attrcache_get_miss_probe; 97uint32_t nfsclient_attrcache_get_miss_id; 98 99dtrace_nfsclient_attrcache_load_probe_func_t 100 dtrace_nfsclient_attrcache_load_done_probe; 101uint32_t nfsclient_attrcache_load_done_id; 102#endif /* !KDTRACE_HOOKS */ 103 104/* 105 * Data items converted to xdr at startup, since they are constant 106 * This is kinda hokey, but may save a little time doing byte swaps 107 */ 108u_int32_t nfs_xdrneg1; 109u_int32_t nfs_true, nfs_false; 110 111/* And other global data */ 112static u_int32_t nfs_xid = 0; 113static enum vtype nv2tov_type[8]= { 114 VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON 115}; 116 117int nfs_ticks; 118int nfs_pbuf_freecnt = -1; /* start out unlimited */ 119 120struct nfs_bufq nfs_bufq; 121static struct mtx nfs_xid_mtx; 122struct task nfs_nfsiodnew_task; 123 124/* 125 * and the reverse mapping from generic to Version 2 procedure numbers 126 */ 127int nfsv2_procid[NFS_NPROCS] = { 128 NFSV2PROC_NULL, 129 NFSV2PROC_GETATTR, 130 NFSV2PROC_SETATTR, 131 NFSV2PROC_LOOKUP, 132 NFSV2PROC_NOOP, 133 NFSV2PROC_READLINK, 134 NFSV2PROC_READ, 135 NFSV2PROC_WRITE, 136 NFSV2PROC_CREATE, 137 NFSV2PROC_MKDIR, 138 NFSV2PROC_SYMLINK, 139 NFSV2PROC_CREATE, 140 NFSV2PROC_REMOVE, 141 NFSV2PROC_RMDIR, 142 NFSV2PROC_RENAME, 143 NFSV2PROC_LINK, 144 NFSV2PROC_READDIR, 145 NFSV2PROC_NOOP, 146 NFSV2PROC_STATFS, 147 NFSV2PROC_NOOP, 148 NFSV2PROC_NOOP, 149 NFSV2PROC_NOOP, 150 NFSV2PROC_NOOP, 151}; 152 153LIST_HEAD(nfsnodehashhead, nfsnode); 154 155u_int32_t 156nfs_xid_gen(void) 157{ 158 uint32_t xid; 159 160 mtx_lock(&nfs_xid_mtx); 161 162 /* Get a pretty random xid to start with */ 163 if (!nfs_xid) 164 nfs_xid = random(); 165 /* 166 * Skip zero xid if it should ever happen. 167 */ 168 if (++nfs_xid == 0) 169 nfs_xid++; 170 xid = nfs_xid; 171 mtx_unlock(&nfs_xid_mtx); 172 return xid; 173} 174 175/* 176 * copies a uio scatter/gather list to an mbuf chain. 177 * NOTE: can ony handle iovcnt == 1 178 */ 179int 180nfsm_uiotombuf(struct uio *uiop, struct mbuf **mq, int siz, caddr_t *bpos) 181{ 182 char *uiocp; 183 struct mbuf *mp, *mp2; 184 int xfer, left, mlen; 185 int uiosiz, clflg, rem; 186 char *cp; 187 188 KASSERT(uiop->uio_iovcnt == 1, ("nfsm_uiotombuf: iovcnt != 1")); 189 190 if (siz > MLEN) /* or should it >= MCLBYTES ?? */ 191 clflg = 1; 192 else 193 clflg = 0; 194 rem = nfsm_rndup(siz)-siz; 195 mp = mp2 = *mq; 196 while (siz > 0) { 197 left = uiop->uio_iov->iov_len; 198 uiocp = uiop->uio_iov->iov_base; 199 if (left > siz) 200 left = siz; 201 uiosiz = left; 202 while (left > 0) { 203 mlen = M_TRAILINGSPACE(mp); 204 if (mlen == 0) { 205 if (clflg) 206 mp = m_getcl(M_WAITOK, MT_DATA, 0); 207 else 208 mp = m_get(M_WAITOK, MT_DATA); 209 mp2->m_next = mp; 210 mp2 = mp; 211 mlen = M_TRAILINGSPACE(mp); 212 } 213 xfer = (left > mlen) ? mlen : left; 214#ifdef notdef 215 /* Not Yet.. */ 216 if (uiop->uio_iov->iov_op != NULL) 217 (*(uiop->uio_iov->iov_op)) 218 (uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); 219 else 220#endif 221 if (uiop->uio_segflg == UIO_SYSSPACE) 222 bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); 223 else 224 copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); 225 mp->m_len += xfer; 226 left -= xfer; 227 uiocp += xfer; 228 uiop->uio_offset += xfer; 229 uiop->uio_resid -= xfer; 230 } 231 uiop->uio_iov->iov_base = 232 (char *)uiop->uio_iov->iov_base + uiosiz; 233 uiop->uio_iov->iov_len -= uiosiz; 234 siz -= uiosiz; 235 } 236 if (rem > 0) { 237 if (rem > M_TRAILINGSPACE(mp)) { 238 mp = m_get(M_WAITOK, MT_DATA); 239 mp2->m_next = mp; 240 } 241 cp = mtod(mp, caddr_t)+mp->m_len; 242 for (left = 0; left < rem; left++) 243 *cp++ = '\0'; 244 mp->m_len += rem; 245 *bpos = cp; 246 } else 247 *bpos = mtod(mp, caddr_t)+mp->m_len; 248 *mq = mp; 249 return (0); 250} 251 252/* 253 * Copy a string into mbufs for the hard cases... 254 */ 255int 256nfsm_strtmbuf(struct mbuf **mb, char **bpos, const char *cp, long siz) 257{ 258 struct mbuf *m1 = NULL, *m2; 259 long left, xfer, len, tlen; 260 u_int32_t *tl; 261 int putsize; 262 263 putsize = 1; 264 m2 = *mb; 265 left = M_TRAILINGSPACE(m2); 266 if (left > 0) { 267 tl = ((u_int32_t *)(*bpos)); 268 *tl++ = txdr_unsigned(siz); 269 putsize = 0; 270 left -= NFSX_UNSIGNED; 271 m2->m_len += NFSX_UNSIGNED; 272 if (left > 0) { 273 bcopy(cp, (caddr_t) tl, left); 274 siz -= left; 275 cp += left; 276 m2->m_len += left; 277 left = 0; 278 } 279 } 280 /* Loop around adding mbufs */ 281 while (siz > 0) { 282 if (siz > MLEN) { 283 m1 = m_getcl(M_WAITOK, MT_DATA, 0); 284 m1->m_len = MCLBYTES; 285 } else { 286 m1 = m_get(M_WAITOK, MT_DATA); 287 m1->m_len = MLEN; 288 } 289 m2->m_next = m1; 290 m2 = m1; 291 tl = mtod(m1, u_int32_t *); 292 tlen = 0; 293 if (putsize) { 294 *tl++ = txdr_unsigned(siz); 295 m1->m_len -= NFSX_UNSIGNED; 296 tlen = NFSX_UNSIGNED; 297 putsize = 0; 298 } 299 if (siz < m1->m_len) { 300 len = nfsm_rndup(siz); 301 xfer = siz; 302 if (xfer < len) 303 *(tl+(xfer>>2)) = 0; 304 } else { 305 xfer = len = m1->m_len; 306 } 307 bcopy(cp, (caddr_t) tl, xfer); 308 m1->m_len = len+tlen; 309 siz -= xfer; 310 cp += xfer; 311 } 312 *mb = m1; 313 *bpos = mtod(m1, caddr_t)+m1->m_len; 314 return (0); 315} 316 317/* 318 * Called once to initialize data structures... 319 */ 320int 321nfs_init(struct vfsconf *vfsp) 322{ 323 int i; 324 325 nfsmount_zone = uma_zcreate("NFSMOUNT", sizeof(struct nfsmount), 326 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 327 nfs_true = txdr_unsigned(TRUE); 328 nfs_false = txdr_unsigned(FALSE); 329 nfs_xdrneg1 = txdr_unsigned(-1); 330 nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000; 331 if (nfs_ticks < 1) 332 nfs_ticks = 1; 333 /* Ensure async daemons disabled */ 334 for (i = 0; i < NFS_MAXASYNCDAEMON; i++) { 335 nfs_iodwant[i] = NFSIOD_NOT_AVAILABLE; 336 nfs_iodmount[i] = NULL; 337 } 338 nfs_nhinit(); /* Init the nfsnode table */ 339 340 /* 341 * Initialize reply list and start timer 342 */ 343 mtx_init(&nfs_iod_mtx, "NFS iod lock", NULL, MTX_DEF); 344 mtx_init(&nfs_xid_mtx, "NFS xid lock", NULL, MTX_DEF); 345 TASK_INIT(&nfs_nfsiodnew_task, 0, nfs_nfsiodnew_tq, NULL); 346 347 nfs_pbuf_freecnt = nswbuf / 2 + 1; 348 349 return (0); 350} 351 352int 353nfs_uninit(struct vfsconf *vfsp) 354{ 355 int i; 356 357 /* 358 * Tell all nfsiod processes to exit. Clear nfs_iodmax, and wakeup 359 * any sleeping nfsiods so they check nfs_iodmax and exit. 360 * Drain nfsiodnew task before we wait for them to finish. 361 */ 362 mtx_lock(&nfs_iod_mtx); 363 nfs_iodmax = 0; 364 mtx_unlock(&nfs_iod_mtx); 365 taskqueue_drain(taskqueue_thread, &nfs_nfsiodnew_task); 366 mtx_lock(&nfs_iod_mtx); 367 for (i = 0; i < nfs_numasync; i++) 368 if (nfs_iodwant[i] == NFSIOD_AVAILABLE) 369 wakeup(&nfs_iodwant[i]); 370 /* The last nfsiod to exit will wake us up when nfs_numasync hits 0 */ 371 while (nfs_numasync) 372 msleep(&nfs_numasync, &nfs_iod_mtx, PWAIT, "ioddie", 0); 373 mtx_unlock(&nfs_iod_mtx); 374 nfs_nhuninit(); 375 uma_zdestroy(nfsmount_zone); 376 return (0); 377} 378 379void 380nfs_dircookie_lock(struct nfsnode *np) 381{ 382 mtx_lock(&np->n_mtx); 383 while (np->n_flag & NDIRCOOKIELK) 384 (void) msleep(&np->n_flag, &np->n_mtx, PZERO, "nfsdirlk", 0); 385 np->n_flag |= NDIRCOOKIELK; 386 mtx_unlock(&np->n_mtx); 387} 388 389void 390nfs_dircookie_unlock(struct nfsnode *np) 391{ 392 mtx_lock(&np->n_mtx); 393 np->n_flag &= ~NDIRCOOKIELK; 394 wakeup(&np->n_flag); 395 mtx_unlock(&np->n_mtx); 396} 397 398int 399nfs_upgrade_vnlock(struct vnode *vp) 400{ 401 int old_lock; 402 403 ASSERT_VOP_LOCKED(vp, "nfs_upgrade_vnlock"); 404 old_lock = VOP_ISLOCKED(vp); 405 if (old_lock != LK_EXCLUSIVE) { 406 KASSERT(old_lock == LK_SHARED, 407 ("nfs_upgrade_vnlock: wrong old_lock %d", old_lock)); 408 /* Upgrade to exclusive lock, this might block */ 409 vn_lock(vp, LK_UPGRADE | LK_RETRY); 410 } 411 return (old_lock); 412} 413 414void 415nfs_downgrade_vnlock(struct vnode *vp, int old_lock) 416{ 417 if (old_lock != LK_EXCLUSIVE) { 418 KASSERT(old_lock == LK_SHARED, ("wrong old_lock %d", old_lock)); 419 /* Downgrade from exclusive lock. */ 420 vn_lock(vp, LK_DOWNGRADE | LK_RETRY); 421 } 422} 423 424void 425nfs_printf(const char *fmt, ...) 426{ 427 va_list ap; 428 429 mtx_lock(&Giant); 430 va_start(ap, fmt); 431 vprintf(fmt, ap); 432 va_end(ap); 433 mtx_unlock(&Giant); 434} 435 436/* 437 * Attribute cache routines. 438 * nfs_loadattrcache() - loads or updates the cache contents from attributes 439 * that are on the mbuf list 440 * nfs_getattrcache() - returns valid attributes if found in cache, returns 441 * error otherwise 442 */ 443 444/* 445 * Load the attribute cache (that lives in the nfsnode entry) with 446 * the values on the mbuf list and 447 * Iff vap not NULL 448 * copy the attributes to *vaper 449 */ 450int 451nfs_loadattrcache(struct vnode **vpp, struct mbuf **mdp, caddr_t *dposp, 452 struct vattr *vaper, int dontshrink) 453{ 454 struct vnode *vp = *vpp; 455 struct vattr *vap; 456 struct nfs_fattr *fp; 457 struct nfsnode *np = NULL; 458 int32_t t1; 459 caddr_t cp2; 460 int rdev; 461 struct mbuf *md; 462 enum vtype vtyp; 463 u_short vmode; 464 struct timespec mtime, mtime_save; 465 int v3 = NFS_ISV3(vp); 466 int error = 0; 467 u_quad_t nsize; 468 int setnsize; 469 470 md = *mdp; 471 t1 = (mtod(md, caddr_t) + md->m_len) - *dposp; 472 cp2 = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, M_WAITOK); 473 if (cp2 == NULL) { 474 error = EBADRPC; 475 goto out; 476 } 477 fp = (struct nfs_fattr *)cp2; 478 if (v3) { 479 vtyp = nfsv3tov_type(fp->fa_type); 480 vmode = fxdr_unsigned(u_short, fp->fa_mode); 481 rdev = makedev(fxdr_unsigned(int, fp->fa3_rdev.specdata1), 482 fxdr_unsigned(int, fp->fa3_rdev.specdata2)); 483 fxdr_nfsv3time(&fp->fa3_mtime, &mtime); 484 } else { 485 vtyp = nfsv2tov_type(fp->fa_type); 486 vmode = fxdr_unsigned(u_short, fp->fa_mode); 487 /* 488 * XXX 489 * 490 * The duplicate information returned in fa_type and fa_mode 491 * is an ambiguity in the NFS version 2 protocol. 492 * 493 * VREG should be taken literally as a regular file. If a 494 * server intents to return some type information differently 495 * in the upper bits of the mode field (e.g. for sockets, or 496 * FIFOs), NFSv2 mandates fa_type to be VNON. Anyway, we 497 * leave the examination of the mode bits even in the VREG 498 * case to avoid breakage for bogus servers, but we make sure 499 * that there are actually type bits set in the upper part of 500 * fa_mode (and failing that, trust the va_type field). 501 * 502 * NFSv3 cleared the issue, and requires fa_mode to not 503 * contain any type information (while also introduing sockets 504 * and FIFOs for fa_type). 505 */ 506 if (vtyp == VNON || (vtyp == VREG && (vmode & S_IFMT) != 0)) 507 vtyp = IFTOVT(vmode); 508 rdev = fxdr_unsigned(int32_t, fp->fa2_rdev); 509 fxdr_nfsv2time(&fp->fa2_mtime, &mtime); 510 511 /* 512 * Really ugly NFSv2 kludge. 513 */ 514 if (vtyp == VCHR && rdev == 0xffffffff) 515 vtyp = VFIFO; 516 } 517 518 /* 519 * If v_type == VNON it is a new node, so fill in the v_type, 520 * n_mtime fields. Check to see if it represents a special 521 * device, and if so, check for a possible alias. Once the 522 * correct vnode has been obtained, fill in the rest of the 523 * information. 524 */ 525 np = VTONFS(vp); 526 mtx_lock(&np->n_mtx); 527 if (vp->v_type != vtyp) { 528 vp->v_type = vtyp; 529 if (vp->v_type == VFIFO) 530 vp->v_op = &nfs_fifoops; 531 np->n_mtime = mtime; 532 } 533 vap = &np->n_vattr; 534 vap->va_type = vtyp; 535 vap->va_mode = (vmode & 07777); 536 vap->va_rdev = rdev; 537 mtime_save = vap->va_mtime; 538 vap->va_mtime = mtime; 539 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 540 if (v3) { 541 vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); 542 vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid); 543 vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid); 544 vap->va_size = fxdr_hyper(&fp->fa3_size); 545 vap->va_blocksize = NFS_FABLKSIZE; 546 vap->va_bytes = fxdr_hyper(&fp->fa3_used); 547 vap->va_fileid = fxdr_unsigned(int32_t, 548 fp->fa3_fileid.nfsuquad[1]); 549 fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime); 550 fxdr_nfsv3time(&fp->fa3_ctime, &vap->va_ctime); 551 vap->va_flags = 0; 552 vap->va_filerev = 0; 553 } else { 554 vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); 555 vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid); 556 vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid); 557 vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size); 558 vap->va_blocksize = fxdr_unsigned(int32_t, fp->fa2_blocksize); 559 vap->va_bytes = (u_quad_t)fxdr_unsigned(int32_t, fp->fa2_blocks) 560 * NFS_FABLKSIZE; 561 vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid); 562 fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime); 563 vap->va_flags = 0; 564 vap->va_ctime.tv_sec = fxdr_unsigned(u_int32_t, 565 fp->fa2_ctime.nfsv2_sec); 566 vap->va_ctime.tv_nsec = 0; 567 vap->va_gen = fxdr_unsigned(u_int32_t, fp->fa2_ctime.nfsv2_usec); 568 vap->va_filerev = 0; 569 } 570 np->n_attrstamp = time_second; 571 setnsize = 0; 572 nsize = 0; 573 if (vap->va_size != np->n_size) { 574 if (vap->va_type == VREG) { 575 if (dontshrink && vap->va_size < np->n_size) { 576 /* 577 * We've been told not to shrink the file; 578 * zero np->n_attrstamp to indicate that 579 * the attributes are stale. 580 */ 581 vap->va_size = np->n_size; 582 np->n_attrstamp = 0; 583 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 584 vnode_pager_setsize(vp, np->n_size); 585 } else if (np->n_flag & NMODIFIED) { 586 /* 587 * We've modified the file: Use the larger 588 * of our size, and the server's size. 589 */ 590 if (vap->va_size < np->n_size) { 591 vap->va_size = np->n_size; 592 } else { 593 np->n_size = vap->va_size; 594 np->n_flag |= NSIZECHANGED; 595 } 596 vnode_pager_setsize(vp, np->n_size); 597 } else if (vap->va_size < np->n_size) { 598 /* 599 * When shrinking the size, the call to 600 * vnode_pager_setsize() cannot be done 601 * with the mutex held, so delay it until 602 * after the mtx_unlock call. 603 */ 604 nsize = np->n_size = vap->va_size; 605 np->n_flag |= NSIZECHANGED; 606 setnsize = 1; 607 } else { 608 np->n_size = vap->va_size; 609 np->n_flag |= NSIZECHANGED; 610 vnode_pager_setsize(vp, np->n_size); 611 } 612 } else { 613 np->n_size = vap->va_size; 614 } 615 } 616 /* 617 * The following checks are added to prevent a race between (say) 618 * a READDIR+ and a WRITE. 619 * READDIR+, WRITE requests sent out. 620 * READDIR+ resp, WRITE resp received on client. 621 * However, the WRITE resp was handled before the READDIR+ resp 622 * causing the post op attrs from the write to be loaded first 623 * and the attrs from the READDIR+ to be loaded later. If this 624 * happens, we have stale attrs loaded into the attrcache. 625 * We detect this by for the mtime moving back. We invalidate the 626 * attrcache when this happens. 627 */ 628 if (timespeccmp(&mtime_save, &vap->va_mtime, >)) { 629 /* Size changed or mtime went backwards */ 630 np->n_attrstamp = 0; 631 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 632 } 633 if (vaper != NULL) { 634 bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap)); 635 if (np->n_flag & NCHG) { 636 if (np->n_flag & NACC) 637 vaper->va_atime = np->n_atim; 638 if (np->n_flag & NUPD) 639 vaper->va_mtime = np->n_mtim; 640 } 641 } 642 643#ifdef KDTRACE_HOOKS 644 if (np->n_attrstamp != 0) 645 KDTRACE_NFS_ATTRCACHE_LOAD_DONE(vp, &np->n_vattr, 0); 646#endif 647 mtx_unlock(&np->n_mtx); 648 if (setnsize) 649 vnode_pager_setsize(vp, nsize); 650out: 651#ifdef KDTRACE_HOOKS 652 if (error) 653 KDTRACE_NFS_ATTRCACHE_LOAD_DONE(vp, NULL, error); 654#endif 655 return (error); 656} 657 658#ifdef NFS_ACDEBUG 659#include <sys/sysctl.h> 660SYSCTL_DECL(_vfs_oldnfs); 661static int nfs_acdebug; 662SYSCTL_INT(_vfs_oldnfs, OID_AUTO, acdebug, CTLFLAG_RW, &nfs_acdebug, 0, 663 "Toggle acdebug (attribute cache debug) flag"); 664#endif 665 666/* 667 * Check the time stamp 668 * If the cache is valid, copy contents to *vap and return 0 669 * otherwise return an error 670 */ 671int 672nfs_getattrcache(struct vnode *vp, struct vattr *vaper) 673{ 674 struct nfsnode *np; 675 struct vattr *vap; 676 struct nfsmount *nmp; 677 int timeo; 678 679 np = VTONFS(vp); 680 vap = &np->n_vattr; 681 nmp = VFSTONFS(vp->v_mount); 682#ifdef NFS_ACDEBUG 683 mtx_lock(&Giant); /* nfs_printf() */ 684#endif 685 mtx_lock(&np->n_mtx); 686 /* XXX n_mtime doesn't seem to be updated on a miss-and-reload */ 687 timeo = (time_second - np->n_mtime.tv_sec) / 10; 688 689#ifdef NFS_ACDEBUG 690 if (nfs_acdebug>1) 691 nfs_printf("nfs_getattrcache: initial timeo = %d\n", timeo); 692#endif 693 694 if (vap->va_type == VDIR) { 695 if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acdirmin) 696 timeo = nmp->nm_acdirmin; 697 else if (timeo > nmp->nm_acdirmax) 698 timeo = nmp->nm_acdirmax; 699 } else { 700 if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acregmin) 701 timeo = nmp->nm_acregmin; 702 else if (timeo > nmp->nm_acregmax) 703 timeo = nmp->nm_acregmax; 704 } 705 706#ifdef NFS_ACDEBUG 707 if (nfs_acdebug > 2) 708 nfs_printf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n", 709 nmp->nm_acregmin, nmp->nm_acregmax, 710 nmp->nm_acdirmin, nmp->nm_acdirmax); 711 712 if (nfs_acdebug) 713 nfs_printf("nfs_getattrcache: age = %d; final timeo = %d\n", 714 (time_second - np->n_attrstamp), timeo); 715#endif 716 717 if ((time_second - np->n_attrstamp) >= timeo) { 718 nfsstats.attrcache_misses++; 719 mtx_unlock(&np->n_mtx); 720#ifdef NFS_ACDEBUG 721 mtx_unlock(&Giant); /* nfs_printf() */ 722#endif 723 KDTRACE_NFS_ATTRCACHE_GET_MISS(vp); 724 return (ENOENT); 725 } 726 nfsstats.attrcache_hits++; 727 if (vap->va_size != np->n_size) { 728 if (vap->va_type == VREG) { 729 if (np->n_flag & NMODIFIED) { 730 if (vap->va_size < np->n_size) 731 vap->va_size = np->n_size; 732 else 733 np->n_size = vap->va_size; 734 } else { 735 np->n_size = vap->va_size; 736 } 737 vnode_pager_setsize(vp, np->n_size); 738 } else { 739 np->n_size = vap->va_size; 740 } 741 } 742 bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr)); 743 if (np->n_flag & NCHG) { 744 if (np->n_flag & NACC) 745 vaper->va_atime = np->n_atim; 746 if (np->n_flag & NUPD) 747 vaper->va_mtime = np->n_mtim; 748 } 749 mtx_unlock(&np->n_mtx); 750#ifdef NFS_ACDEBUG 751 mtx_unlock(&Giant); /* nfs_printf() */ 752#endif 753 KDTRACE_NFS_ATTRCACHE_GET_HIT(vp, vap); 754 return (0); 755} 756 757/* 758 * Purge all cached information about an NFS vnode including name 759 * cache entries, the attribute cache, and the access cache. This is 760 * called when an NFS request for a node fails with a stale 761 * filehandle. 762 */ 763void 764nfs_purgecache(struct vnode *vp) 765{ 766 struct nfsnode *np; 767 int i; 768 769 np = VTONFS(vp); 770 cache_purge(vp); 771 mtx_lock(&np->n_mtx); 772 np->n_attrstamp = 0; 773 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 774 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) 775 np->n_accesscache[i].stamp = 0; 776 KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp); 777 mtx_unlock(&np->n_mtx); 778} 779 780static nfsuint64 nfs_nullcookie = { { 0, 0 } }; 781/* 782 * This function finds the directory cookie that corresponds to the 783 * logical byte offset given. 784 */ 785nfsuint64 * 786nfs_getcookie(struct nfsnode *np, off_t off, int add) 787{ 788 struct nfsdmap *dp, *dp2; 789 int pos; 790 nfsuint64 *retval = NULL; 791 792 pos = (uoff_t)off / NFS_DIRBLKSIZ; 793 if (pos == 0 || off < 0) { 794 KASSERT(!add, ("nfs getcookie add at <= 0")); 795 return (&nfs_nullcookie); 796 } 797 pos--; 798 dp = LIST_FIRST(&np->n_cookies); 799 if (!dp) { 800 if (add) { 801 dp = malloc(sizeof (struct nfsdmap), 802 M_NFSDIROFF, M_WAITOK); 803 dp->ndm_eocookie = 0; 804 LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list); 805 } else 806 goto out; 807 } 808 while (pos >= NFSNUMCOOKIES) { 809 pos -= NFSNUMCOOKIES; 810 if (LIST_NEXT(dp, ndm_list)) { 811 if (!add && dp->ndm_eocookie < NFSNUMCOOKIES && 812 pos >= dp->ndm_eocookie) 813 goto out; 814 dp = LIST_NEXT(dp, ndm_list); 815 } else if (add) { 816 dp2 = malloc(sizeof (struct nfsdmap), 817 M_NFSDIROFF, M_WAITOK); 818 dp2->ndm_eocookie = 0; 819 LIST_INSERT_AFTER(dp, dp2, ndm_list); 820 dp = dp2; 821 } else 822 goto out; 823 } 824 if (pos >= dp->ndm_eocookie) { 825 if (add) 826 dp->ndm_eocookie = pos + 1; 827 else 828 goto out; 829 } 830 retval = &dp->ndm_cookies[pos]; 831out: 832 return (retval); 833} 834 835/* 836 * Invalidate cached directory information, except for the actual directory 837 * blocks (which are invalidated separately). 838 * Done mainly to avoid the use of stale offset cookies. 839 */ 840void 841nfs_invaldir(struct vnode *vp) 842{ 843 struct nfsnode *np = VTONFS(vp); 844 845 KASSERT(vp->v_type == VDIR, ("nfs: invaldir not dir")); 846 nfs_dircookie_lock(np); 847 np->n_direofoffset = 0; 848 np->n_cookieverf.nfsuquad[0] = 0; 849 np->n_cookieverf.nfsuquad[1] = 0; 850 if (LIST_FIRST(&np->n_cookies)) 851 LIST_FIRST(&np->n_cookies)->ndm_eocookie = 0; 852 nfs_dircookie_unlock(np); 853} 854 855/* 856 * The write verifier has changed (probably due to a server reboot), so all 857 * B_NEEDCOMMIT blocks will have to be written again. Since they are on the 858 * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT 859 * and B_CLUSTEROK flags. Once done the new write verifier can be set for the 860 * mount point. 861 * 862 * B_CLUSTEROK must be cleared along with B_NEEDCOMMIT because stage 1 data 863 * writes are not clusterable. 864 */ 865void 866nfs_clearcommit(struct mount *mp) 867{ 868 struct vnode *vp, *nvp; 869 struct buf *bp, *nbp; 870 struct bufobj *bo; 871 872 MNT_VNODE_FOREACH_ALL(vp, mp, nvp) { 873 bo = &vp->v_bufobj; 874 vholdl(vp); 875 VI_UNLOCK(vp); 876 BO_LOCK(bo); 877 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 878 if (!BUF_ISLOCKED(bp) && 879 (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) 880 == (B_DELWRI | B_NEEDCOMMIT)) 881 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); 882 } 883 BO_UNLOCK(bo); 884 vdrop(vp); 885 } 886} 887 888/* 889 * Helper functions for former macros. Some of these should be 890 * moved to their callers. 891 */ 892 893int 894nfsm_mtofh_xx(struct vnode *d, struct vnode **v, int v3, int *f, 895 struct mbuf **md, caddr_t *dpos) 896{ 897 struct nfsnode *ttnp; 898 struct vnode *ttvp; 899 nfsfh_t *ttfhp; 900 u_int32_t *tl; 901 int ttfhsize; 902 int t1; 903 904 if (v3) { 905 tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); 906 if (tl == NULL) 907 return EBADRPC; 908 *f = fxdr_unsigned(int, *tl); 909 } else 910 *f = 1; 911 if (*f) { 912 t1 = nfsm_getfh_xx(&ttfhp, &ttfhsize, (v3), md, dpos); 913 if (t1 != 0) 914 return t1; 915 t1 = nfs_nget(d->v_mount, ttfhp, ttfhsize, &ttnp, LK_EXCLUSIVE); 916 if (t1 != 0) 917 return t1; 918 *v = NFSTOV(ttnp); 919 } 920 if (v3) { 921 tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); 922 if (tl == NULL) 923 return EBADRPC; 924 if (*f) 925 *f = fxdr_unsigned(int, *tl); 926 else if (fxdr_unsigned(int, *tl)) 927 nfsm_adv_xx(NFSX_V3FATTR, md, dpos); 928 } 929 if (*f) { 930 ttvp = *v; 931 t1 = nfs_loadattrcache(&ttvp, md, dpos, NULL, 0); 932 if (t1) 933 return t1; 934 *v = ttvp; 935 } 936 return 0; 937} 938 939int 940nfsm_getfh_xx(nfsfh_t **f, int *s, int v3, struct mbuf **md, caddr_t *dpos) 941{ 942 u_int32_t *tl; 943 944 if (v3) { 945 tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); 946 if (tl == NULL) 947 return EBADRPC; 948 *s = fxdr_unsigned(int, *tl); 949 if (*s <= 0 || *s > NFSX_V3FHMAX) 950 return EBADRPC; 951 } else 952 *s = NFSX_V2FH; 953 *f = nfsm_dissect_xx(nfsm_rndup(*s), md, dpos); 954 if (*f == NULL) 955 return EBADRPC; 956 else 957 return 0; 958} 959 960 961int 962nfsm_loadattr_xx(struct vnode **v, struct vattr *va, struct mbuf **md, 963 caddr_t *dpos) 964{ 965 int t1; 966 967 struct vnode *ttvp = *v; 968 t1 = nfs_loadattrcache(&ttvp, md, dpos, va, 0); 969 if (t1 != 0) 970 return t1; 971 *v = ttvp; 972 return 0; 973} 974 975int 976nfsm_postop_attr_xx(struct vnode **v, int *f, struct vattr *va, 977 struct mbuf **md, caddr_t *dpos) 978{ 979 u_int32_t *tl; 980 int t1; 981 982 struct vnode *ttvp = *v; 983 tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); 984 if (tl == NULL) 985 return EBADRPC; 986 *f = fxdr_unsigned(int, *tl); 987 if (*f != 0) { 988 t1 = nfs_loadattrcache(&ttvp, md, dpos, va, 1); 989 if (t1 != 0) { 990 *f = 0; 991 return t1; 992 } 993 *v = ttvp; 994 } 995 return 0; 996} 997 998int 999nfsm_wcc_data_xx(struct vnode **v, int *f, struct mbuf **md, caddr_t *dpos) 1000{ 1001 u_int32_t *tl; 1002 int ttattrf, ttretf = 0; 1003 int t1; 1004 1005 tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); 1006 if (tl == NULL) 1007 return EBADRPC; 1008 if (*tl == nfs_true) { 1009 tl = nfsm_dissect_xx(6 * NFSX_UNSIGNED, md, dpos); 1010 if (tl == NULL) 1011 return EBADRPC; 1012 mtx_lock(&(VTONFS(*v))->n_mtx); 1013 if (*f) 1014 ttretf = (VTONFS(*v)->n_mtime.tv_sec == fxdr_unsigned(u_int32_t, *(tl + 2)) && 1015 VTONFS(*v)->n_mtime.tv_nsec == fxdr_unsigned(u_int32_t, *(tl + 3))); 1016 mtx_unlock(&(VTONFS(*v))->n_mtx); 1017 } 1018 t1 = nfsm_postop_attr_xx(v, &ttattrf, NULL, md, dpos); 1019 if (t1) 1020 return t1; 1021 if (*f) 1022 *f = ttretf; 1023 else 1024 *f = ttattrf; 1025 return 0; 1026} 1027 1028int 1029nfsm_strtom_xx(const char *a, int s, int m, struct mbuf **mb, caddr_t *bpos) 1030{ 1031 u_int32_t *tl; 1032 int t1; 1033 1034 if (s > m) 1035 return ENAMETOOLONG; 1036 t1 = nfsm_rndup(s) + NFSX_UNSIGNED; 1037 if (t1 <= M_TRAILINGSPACE(*mb)) { 1038 tl = nfsm_build_xx(t1, mb, bpos); 1039 *tl++ = txdr_unsigned(s); 1040 *(tl + ((t1 >> 2) - 2)) = 0; 1041 bcopy(a, tl, s); 1042 } else { 1043 t1 = nfsm_strtmbuf(mb, bpos, a, s); 1044 if (t1 != 0) 1045 return t1; 1046 } 1047 return 0; 1048} 1049 1050int 1051nfsm_fhtom_xx(struct vnode *v, int v3, struct mbuf **mb, caddr_t *bpos) 1052{ 1053 u_int32_t *tl; 1054 int t1; 1055 caddr_t cp; 1056 1057 if (v3) { 1058 t1 = nfsm_rndup(VTONFS(v)->n_fhsize) + NFSX_UNSIGNED; 1059 if (t1 < M_TRAILINGSPACE(*mb)) { 1060 tl = nfsm_build_xx(t1, mb, bpos); 1061 *tl++ = txdr_unsigned(VTONFS(v)->n_fhsize); 1062 *(tl + ((t1 >> 2) - 2)) = 0; 1063 bcopy(VTONFS(v)->n_fhp, tl, VTONFS(v)->n_fhsize); 1064 } else { 1065 t1 = nfsm_strtmbuf(mb, bpos, 1066 (const char *)VTONFS(v)->n_fhp, 1067 VTONFS(v)->n_fhsize); 1068 if (t1 != 0) 1069 return t1; 1070 } 1071 } else { 1072 cp = nfsm_build_xx(NFSX_V2FH, mb, bpos); 1073 bcopy(VTONFS(v)->n_fhp, cp, NFSX_V2FH); 1074 } 1075 return 0; 1076} 1077 1078void 1079nfsm_v3attrbuild_xx(struct vattr *va, int full, struct mbuf **mb, 1080 caddr_t *bpos) 1081{ 1082 u_int32_t *tl; 1083 1084 if (va->va_mode != (mode_t)VNOVAL) { 1085 tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos); 1086 *tl++ = nfs_true; 1087 *tl = txdr_unsigned(va->va_mode); 1088 } else { 1089 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1090 *tl = nfs_false; 1091 } 1092 if (full && va->va_uid != (uid_t)VNOVAL) { 1093 tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos); 1094 *tl++ = nfs_true; 1095 *tl = txdr_unsigned(va->va_uid); 1096 } else { 1097 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1098 *tl = nfs_false; 1099 } 1100 if (full && va->va_gid != (gid_t)VNOVAL) { 1101 tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos); 1102 *tl++ = nfs_true; 1103 *tl = txdr_unsigned(va->va_gid); 1104 } else { 1105 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1106 *tl = nfs_false; 1107 } 1108 if (full && va->va_size != VNOVAL) { 1109 tl = nfsm_build_xx(3 * NFSX_UNSIGNED, mb, bpos); 1110 *tl++ = nfs_true; 1111 txdr_hyper(va->va_size, tl); 1112 } else { 1113 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1114 *tl = nfs_false; 1115 } 1116 if (va->va_atime.tv_sec != VNOVAL) { 1117 if ((va->va_vaflags & VA_UTIMES_NULL) == 0) { 1118 tl = nfsm_build_xx(3 * NFSX_UNSIGNED, mb, bpos); 1119 *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); 1120 txdr_nfsv3time(&va->va_atime, tl); 1121 } else { 1122 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1123 *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); 1124 } 1125 } else { 1126 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1127 *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); 1128 } 1129 if (va->va_mtime.tv_sec != VNOVAL) { 1130 if ((va->va_vaflags & VA_UTIMES_NULL) == 0) { 1131 tl = nfsm_build_xx(3 * NFSX_UNSIGNED, mb, bpos); 1132 *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); 1133 txdr_nfsv3time(&va->va_mtime, tl); 1134 } else { 1135 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1136 *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); 1137 } 1138 } else { 1139 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1140 *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); 1141 } 1142} 1143