1/* 2 * Copyright (c) 1999-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Copyright (c) 1991, 1993, 1994 30 * The Regents of the University of California. All rights reserved. 31 * (c) UNIX System Laboratories, Inc. 32 * All or some portions of this file are derived from material licensed 33 * to the University of California by American Telephone and Telegraph 34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 35 * the permission of UNIX System Laboratories, Inc. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. All advertising materials mentioning features or use of this software 46 * must display the following acknowledgement: 47 * This product includes software developed by the University of 48 * California, Berkeley and its contributors. 49 * 4. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * hfs_vfsops.c 66 * derived from @(#)ufs_vfsops.c 8.8 (Berkeley) 5/20/95 67 * 68 * (c) Copyright 1997-2002 Apple Computer, Inc. All rights reserved. 69 * 70 * hfs_vfsops.c -- VFS layer for loadable HFS file system. 71 * 72 */ 73#include <sys/param.h> 74#include <sys/systm.h> 75#include <sys/kauth.h> 76 77#include <sys/ubc.h> 78#include <sys/ubc_internal.h> 79#include <sys/vnode_internal.h> 80#include <sys/mount_internal.h> 81#include <sys/sysctl.h> 82#include <sys/malloc.h> 83#include <sys/stat.h> 84#include <sys/quota.h> 85#include <sys/disk.h> 86#include <sys/paths.h> 87#include <sys/utfconv.h> 88#include <sys/kdebug.h> 89#include <sys/fslog.h> 90#include <sys/ubc.h> 91#include <sys/buf_internal.h> 92 93#include <kern/locks.h> 94 95#include <vfs/vfs_journal.h> 96 97#include <miscfs/specfs/specdev.h> 98#include <hfs/hfs_mount.h> 99 100#include <libkern/crypto/md5.h> 101#include <uuid/uuid.h> 102 103#include "hfs.h" 104#include "hfs_catalog.h" 105#include "hfs_cnode.h" 106#include "hfs_dbg.h" 107#include "hfs_endian.h" 108#include "hfs_hotfiles.h" 109#include "hfs_quota.h" 110#include "hfs_btreeio.h" 111 112#include "hfscommon/headers/FileMgrInternal.h" 113#include "hfscommon/headers/BTreesInternal.h" 114 115#if CONFIG_PROTECT 116#include <sys/cprotect.h> 117#endif 118 119#if CONFIG_HFS_ALLOC_RBTREE 120#include "hfscommon/headers/HybridAllocator.h" 121#endif 122 123#define HFS_MOUNT_DEBUG 1 124 125#if HFS_DIAGNOSTIC 126int hfs_dbg_all = 0; 127int hfs_dbg_err = 0; 128#endif 129 130/* Enable/disable debugging code for live volume resizing */ 131int hfs_resize_debug = 0; 132 133lck_grp_attr_t * hfs_group_attr; 134lck_attr_t * hfs_lock_attr; 135lck_grp_t * hfs_mutex_group; 136lck_grp_t * hfs_rwlock_group; 137lck_grp_t * hfs_spinlock_group; 138 139extern struct vnodeopv_desc hfs_vnodeop_opv_desc; 140extern struct vnodeopv_desc hfs_std_vnodeop_opv_desc; 141 142/* not static so we can re-use in hfs_readwrite.c for build_path calls */ 143int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context); 144 145static int hfs_changefs(struct mount *mp, struct hfs_mount_args *args); 146static int hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, vfs_context_t context); 147static int hfs_flushfiles(struct mount *, int, struct proc *); 148static int hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush); 149static int hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp); 150static int hfs_init(struct vfsconf *vfsp); 151static int hfs_vfs_root(struct mount *mp, struct vnode **vpp, vfs_context_t context); 152static int hfs_quotactl(struct mount *, int, uid_t, caddr_t, vfs_context_t context); 153static int hfs_start(struct mount *mp, int flags, vfs_context_t context); 154static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context); 155static int hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec); 156static int hfs_journal_replay(vnode_t devvp, vfs_context_t context); 157static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context); 158static int hfs_extend_journal(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count, vfs_context_t context); 159 160void hfs_initialize_allocator (struct hfsmount *hfsmp); 161int hfs_teardown_allocator (struct hfsmount *hfsmp); 162void hfs_unmap_blocks (struct hfsmount *hfsmp); 163 164int hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context); 165int hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, int journal_replay_only, vfs_context_t context); 166int hfs_reload(struct mount *mp); 167int hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, vfs_context_t context); 168int hfs_sync(struct mount *mp, int waitfor, vfs_context_t context); 169int hfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, 170 user_addr_t newp, size_t newlen, vfs_context_t context); 171int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context); 172 173/* 174 * Called by vfs_mountroot when mounting HFS Plus as root. 175 */ 176 177int 178hfs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context) 179{ 180 struct hfsmount *hfsmp; 181 ExtendedVCB *vcb; 182 struct vfsstatfs *vfsp; 183 int error; 184 185 if ((error = hfs_mountfs(rvp, mp, NULL, 0, context))) { 186 if (HFS_MOUNT_DEBUG) { 187 printf("hfs_mountroot: hfs_mountfs returned %d, rvp (%p) name (%s) \n", 188 error, rvp, (rvp->v_name ? rvp->v_name : "unknown device")); 189 } 190 return (error); 191 } 192 193 /* Init hfsmp */ 194 hfsmp = VFSTOHFS(mp); 195 196 hfsmp->hfs_uid = UNKNOWNUID; 197 hfsmp->hfs_gid = UNKNOWNGID; 198 hfsmp->hfs_dir_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */ 199 hfsmp->hfs_file_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */ 200 201 /* Establish the free block reserve. */ 202 vcb = HFSTOVCB(hfsmp); 203 vcb->reserveBlocks = ((u_int64_t)vcb->totalBlocks * HFS_MINFREE) / 100; 204 vcb->reserveBlocks = MIN(vcb->reserveBlocks, HFS_MAXRESERVE / vcb->blockSize); 205 206 vfsp = vfs_statfs(mp); 207 (void)hfs_statfs(mp, vfsp, NULL); 208 209 return (0); 210} 211 212 213/* 214 * VFS Operations. 215 * 216 * mount system call 217 */ 218 219int 220hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context) 221{ 222 struct proc *p = vfs_context_proc(context); 223 struct hfsmount *hfsmp = NULL; 224 struct hfs_mount_args args; 225 int retval = E_NONE; 226 u_int32_t cmdflags; 227 228 if ((retval = copyin(data, (caddr_t)&args, sizeof(args)))) { 229 if (HFS_MOUNT_DEBUG) { 230 printf("hfs_mount: copyin returned %d for fs\n", retval); 231 } 232 return (retval); 233 } 234 cmdflags = (u_int32_t)vfs_flags(mp) & MNT_CMDFLAGS; 235 if (cmdflags & MNT_UPDATE) { 236 hfsmp = VFSTOHFS(mp); 237 238 /* Reload incore data after an fsck. */ 239 if (cmdflags & MNT_RELOAD) { 240 if (vfs_isrdonly(mp)) { 241 int error = hfs_reload(mp); 242 if (error && HFS_MOUNT_DEBUG) { 243 printf("hfs_mount: hfs_reload returned %d on %s \n", error, hfsmp->vcbVN); 244 } 245 return error; 246 } 247 else { 248 if (HFS_MOUNT_DEBUG) { 249 printf("hfs_mount: MNT_RELOAD not supported on rdwr filesystem %s\n", hfsmp->vcbVN); 250 } 251 return (EINVAL); 252 } 253 } 254 255 /* Change to a read-only file system. */ 256 if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) && 257 vfs_isrdonly(mp)) { 258 int flags; 259 260 /* Set flag to indicate that a downgrade to read-only 261 * is in progress and therefore block any further 262 * modifications to the file system. 263 */ 264 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK); 265 hfsmp->hfs_flags |= HFS_RDONLY_DOWNGRADE; 266 hfsmp->hfs_downgrading_proc = current_thread(); 267 hfs_unlock_global (hfsmp); 268 269 /* use VFS_SYNC to push out System (btree) files */ 270 retval = VFS_SYNC(mp, MNT_WAIT, context); 271 if (retval && ((cmdflags & MNT_FORCE) == 0)) { 272 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE; 273 hfsmp->hfs_downgrading_proc = NULL; 274 if (HFS_MOUNT_DEBUG) { 275 printf("hfs_mount: VFS_SYNC returned %d during b-tree sync of %s \n", retval, hfsmp->vcbVN); 276 } 277 goto out; 278 } 279 280 flags = WRITECLOSE; 281 if (cmdflags & MNT_FORCE) 282 flags |= FORCECLOSE; 283 284 if ((retval = hfs_flushfiles(mp, flags, p))) { 285 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE; 286 hfsmp->hfs_downgrading_proc = NULL; 287 if (HFS_MOUNT_DEBUG) { 288 printf("hfs_mount: hfs_flushfiles returned %d on %s \n", retval, hfsmp->vcbVN); 289 } 290 goto out; 291 } 292 293 /* mark the volume cleanly unmounted */ 294 hfsmp->vcbAtrb |= kHFSVolumeUnmountedMask; 295 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); 296 hfsmp->hfs_flags |= HFS_READ_ONLY; 297 298 /* 299 * Close down the journal. 300 * 301 * NOTE: It is critically important to close down the journal 302 * and have it issue all pending I/O prior to calling VNOP_FSYNC below. 303 * In a journaled environment it is expected that the journal be 304 * the only actor permitted to issue I/O for metadata blocks in HFS. 305 * If we were to call VNOP_FSYNC prior to closing down the journal, 306 * we would inadvertantly issue (and wait for) the I/O we just 307 * initiated above as part of the flushvolumeheader call. 308 * 309 * To avoid this, we follow the same order of operations as in 310 * unmount and issue the journal_close prior to calling VNOP_FSYNC. 311 */ 312 313 if (hfsmp->jnl) { 314 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK); 315 316 journal_close(hfsmp->jnl); 317 hfsmp->jnl = NULL; 318 319 // Note: we explicitly don't want to shutdown 320 // access to the jvp because we may need 321 // it later if we go back to being read-write. 322 323 hfs_unlock_global (hfsmp); 324 } 325 326 327 /* 328 * Write out any pending I/O still outstanding against the device node 329 * now that the journal has been closed. 330 */ 331 if (!retval) { 332 if (vnode_mount(hfsmp->hfs_devvp) == mp) { 333 retval = hfs_fsync(hfsmp->hfs_devvp, MNT_WAIT, 0, p); 334 } else { 335 vnode_get(hfsmp->hfs_devvp); 336 retval = VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context); 337 vnode_put(hfsmp->hfs_devvp); 338 } 339 } 340 341 if (retval) { 342 if (HFS_MOUNT_DEBUG) { 343 printf("hfs_mount: FSYNC on devvp returned %d for fs %s\n", retval, hfsmp->vcbVN); 344 } 345 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE; 346 hfsmp->hfs_downgrading_proc = NULL; 347 hfsmp->hfs_flags &= ~HFS_READ_ONLY; 348 goto out; 349 } 350 351#if CONFIG_HFS_ALLOC_RBTREE 352 (void) hfs_teardown_allocator(hfsmp); 353#endif 354 hfsmp->hfs_downgrading_proc = NULL; 355 } 356 357 /* Change to a writable file system. */ 358 if (vfs_iswriteupgrade(mp)) { 359#if CONFIG_HFS_ALLOC_RBTREE 360 thread_t allocator_thread; 361#endif 362 363 /* 364 * On inconsistent disks, do not allow read-write mount 365 * unless it is the boot volume being mounted. 366 */ 367 if (!(vfs_flags(mp) & MNT_ROOTFS) && 368 (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask)) { 369 if (HFS_MOUNT_DEBUG) { 370 printf("hfs_mount: attempting to mount inconsistent non-root volume %s\n", (hfsmp->vcbVN)); 371 } 372 retval = EINVAL; 373 goto out; 374 } 375 376 // If the journal was shut-down previously because we were 377 // asked to be read-only, let's start it back up again now 378 379 if ( (HFSTOVCB(hfsmp)->vcbAtrb & kHFSVolumeJournaledMask) 380 && hfsmp->jnl == NULL 381 && hfsmp->jvp != NULL) { 382 int jflags; 383 384 if (hfsmp->hfs_flags & HFS_NEED_JNL_RESET) { 385 jflags = JOURNAL_RESET; 386 } else { 387 jflags = 0; 388 } 389 390 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK); 391 392 hfsmp->jnl = journal_open(hfsmp->jvp, 393 (hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset, 394 hfsmp->jnl_size, 395 hfsmp->hfs_devvp, 396 hfsmp->hfs_logical_block_size, 397 jflags, 398 0, 399 hfs_sync_metadata, hfsmp->hfs_mp); 400 401 /* 402 * Set up the trim callback function so that we can add 403 * recently freed extents to the free extent cache once 404 * the transaction that freed them is written to the 405 * journal on disk. 406 */ 407 if (hfsmp->jnl) 408 journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp); 409 410 hfs_unlock_global (hfsmp); 411 412 if (hfsmp->jnl == NULL) { 413 if (HFS_MOUNT_DEBUG) { 414 printf("hfs_mount: journal_open == NULL; couldn't be opened on %s \n", (hfsmp->vcbVN)); 415 } 416 retval = EINVAL; 417 goto out; 418 } else { 419 hfsmp->hfs_flags &= ~HFS_NEED_JNL_RESET; 420 } 421 422 } 423 424 /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */ 425 retval = hfs_erase_unused_nodes(hfsmp); 426 if (retval != E_NONE) { 427 if (HFS_MOUNT_DEBUG) { 428 printf("hfs_mount: hfs_erase_unused_nodes returned %d for fs %s\n", retval, hfsmp->vcbVN); 429 } 430 goto out; 431 } 432 433 /* If this mount point was downgraded from read-write 434 * to read-only, clear that information as we are now 435 * moving back to read-write. 436 */ 437 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE; 438 hfsmp->hfs_downgrading_proc = NULL; 439 440 /* mark the volume dirty (clear clean unmount bit) */ 441 hfsmp->vcbAtrb &= ~kHFSVolumeUnmountedMask; 442 443 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); 444 if (retval != E_NONE) { 445 if (HFS_MOUNT_DEBUG) { 446 printf("hfs_mount: hfs_flushvolumeheader returned %d for fs %s\n", retval, hfsmp->vcbVN); 447 } 448 goto out; 449 } 450 451 /* Only clear HFS_READ_ONLY after a successful write */ 452 hfsmp->hfs_flags &= ~HFS_READ_ONLY; 453 454 455 if (!(hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_STANDARD))) { 456 /* Setup private/hidden directories for hardlinks. */ 457 hfs_privatedir_init(hfsmp, FILE_HARDLINKS); 458 hfs_privatedir_init(hfsmp, DIR_HARDLINKS); 459 460 hfs_remove_orphans(hfsmp); 461 462 /* 463 * Allow hot file clustering if conditions allow. 464 */ 465 if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) && 466 ((hfsmp->hfs_mp->mnt_kern_flag & MNTK_SSD) == 0)) { 467 (void) hfs_recording_init(hfsmp); 468 } 469 /* Force ACLs on HFS+ file systems. */ 470 if (vfs_extendedsecurity(HFSTOVFS(hfsmp)) == 0) { 471 vfs_setextendedsecurity(HFSTOVFS(hfsmp)); 472 } 473 } 474 475#if CONFIG_HFS_ALLOC_RBTREE 476 /* 477 * Like the normal mount case, we need to handle creation of the allocation red-black tree 478 * if we're upgrading from read-only to read-write. 479 * 480 * We spawn a thread to create the pair of red-black trees for this volume. 481 * However, in so doing, we must be careful to ensure that if this thread is still 482 * running after mount has finished, it doesn't interfere with an unmount. Specifically, 483 * we'll need to set a bit that indicates we're in progress building the trees here. 484 * Unmount will check for this bit, and then if it's set, mark a corresponding bit that 485 * notifies the tree generation code that an unmount is waiting. Also, mark the extent 486 * tree flags that the allocator is enabled for use before we spawn the thread that will start 487 * scanning the RB tree. 488 * 489 * Only do this if we're operating on a read-write mount (we wouldn't care for read-only), 490 * which has not previously encountered a bad error on the red-black tree code. Also, don't 491 * try to re-build a tree that already exists. 492 * 493 * When this is enabled, we must re-integrate the above function into our bitmap iteration 494 * so that we accurately send TRIMs down to the underlying disk device as needed. 495 */ 496 497 if (hfsmp->extent_tree_flags == 0) { 498 hfsmp->extent_tree_flags |= (HFS_ALLOC_TREEBUILD_INFLIGHT | HFS_ALLOC_RB_ENABLED); 499 /* Initialize EOF counter so that the thread can assume it started at initial values */ 500 hfsmp->offset_block_end = 0; 501 502 InitTree(hfsmp); 503 504 kernel_thread_start ((thread_continue_t) hfs_initialize_allocator , hfsmp, &allocator_thread); 505 thread_deallocate(allocator_thread); 506 } 507 508#endif 509 } 510 511 /* Update file system parameters. */ 512 retval = hfs_changefs(mp, &args); 513 if (retval && HFS_MOUNT_DEBUG) { 514 printf("hfs_mount: hfs_changefs returned %d for %s\n", retval, hfsmp->vcbVN); 515 } 516 517 } else /* not an update request */ { 518 519 /* Set the mount flag to indicate that we support volfs */ 520 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_DOVOLFS)); 521 522 retval = hfs_mountfs(devvp, mp, &args, 0, context); 523 if (retval && HFS_MOUNT_DEBUG) { 524 printf("hfs_mount: hfs_mountfs returned %d\n", retval); 525 } 526#if CONFIG_PROTECT 527 /* 528 * If above mount call was successful, and this mount is content protection 529 * enabled, then verify the on-disk EA on the root to ensure that the filesystem 530 * is of a suitable vintage to allow the mount to proceed. 531 */ 532 if ((retval == 0) && (cp_fs_protected (mp))) { 533 int err = 0; 534 535 struct cp_root_xattr *xattr = NULL; 536 MALLOC (xattr, struct cp_root_xattr*, sizeof(struct cp_root_xattr), M_TEMP, M_WAITOK); 537 if (xattr == NULL) { 538 err = ENOMEM; 539 goto badalloc; 540 } 541 bzero (xattr, sizeof(struct cp_root_xattr)); 542 hfsmp = vfs_fsprivate(mp); 543 544 /* go get the EA to get the version information */ 545 err = cp_getrootxattr (hfsmp, xattr); 546 /* 547 * If there was no EA there, then write one out. 548 * Assuming EA is not present on the root means 549 * this is an erase install or a very old FS 550 */ 551 if (err == ENOATTR) { 552 printf("No root EA set, creating new EA with new version: %d\n", CP_NEW_MAJOR_VERS); 553 bzero(xattr, sizeof(struct cp_root_xattr)); 554 xattr->major_version = CP_NEW_MAJOR_VERS; 555 xattr->minor_version = CP_MINOR_VERS; 556 xattr->flags = 0; 557 558 err = cp_setrootxattr (hfsmp, xattr); 559 } 560 561 /* 562 * For any other error, including having an out of date CP version in the 563 * EA, or for an error out of cp_setrootxattr, deny the mount 564 * and do not proceed further. 565 */ 566 if (err || (xattr->major_version != CP_NEW_MAJOR_VERS && xattr->major_version != CP_PREV_MAJOR_VERS)) { 567 /* Deny the mount and tear down. */ 568 retval = EPERM; 569 (void) hfs_unmount (mp, MNT_FORCE, context); 570 } 571 printf("Running with CP root xattr: %d.%d\n", xattr->major_version, xattr->minor_version); 572badalloc: 573 if(xattr) { 574 FREE(xattr, M_TEMP); 575 } 576 } 577#endif 578 } 579out: 580 if (retval == 0) { 581 (void)hfs_statfs(mp, vfs_statfs(mp), context); 582 } 583 return (retval); 584} 585 586 587struct hfs_changefs_cargs { 588 struct hfsmount *hfsmp; 589 int namefix; 590 int permfix; 591 int permswitch; 592}; 593 594static int 595hfs_changefs_callback(struct vnode *vp, void *cargs) 596{ 597 ExtendedVCB *vcb; 598 struct cnode *cp; 599 struct cat_desc cndesc; 600 struct cat_attr cnattr; 601 struct hfs_changefs_cargs *args; 602 int lockflags; 603 int error; 604 605 args = (struct hfs_changefs_cargs *)cargs; 606 607 cp = VTOC(vp); 608 vcb = HFSTOVCB(args->hfsmp); 609 610 lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); 611 error = cat_lookup(args->hfsmp, &cp->c_desc, 0, &cndesc, &cnattr, NULL, NULL); 612 hfs_systemfile_unlock(args->hfsmp, lockflags); 613 if (error) { 614 /* 615 * If we couldn't find this guy skip to the next one 616 */ 617 if (args->namefix) 618 cache_purge(vp); 619 620 return (VNODE_RETURNED); 621 } 622 /* 623 * Get the real uid/gid and perm mask from disk. 624 */ 625 if (args->permswitch || args->permfix) { 626 cp->c_uid = cnattr.ca_uid; 627 cp->c_gid = cnattr.ca_gid; 628 cp->c_mode = cnattr.ca_mode; 629 } 630 /* 631 * If we're switching name converters then... 632 * Remove the existing entry from the namei cache. 633 * Update name to one based on new encoder. 634 */ 635 if (args->namefix) { 636 cache_purge(vp); 637 replace_desc(cp, &cndesc); 638 639 if (cndesc.cd_cnid == kHFSRootFolderID) { 640 strlcpy((char *)vcb->vcbVN, (const char *)cp->c_desc.cd_nameptr, NAME_MAX+1); 641 cp->c_desc.cd_encoding = args->hfsmp->hfs_encoding; 642 } 643 } else { 644 cat_releasedesc(&cndesc); 645 } 646 return (VNODE_RETURNED); 647} 648 649/* Change fs mount parameters */ 650static int 651hfs_changefs(struct mount *mp, struct hfs_mount_args *args) 652{ 653 int retval = 0; 654 int namefix, permfix, permswitch; 655 struct hfsmount *hfsmp; 656 ExtendedVCB *vcb; 657 hfs_to_unicode_func_t get_unicode_func; 658 unicode_to_hfs_func_t get_hfsname_func; 659 u_int32_t old_encoding = 0; 660 struct hfs_changefs_cargs cargs; 661 u_int32_t mount_flags; 662 663 hfsmp = VFSTOHFS(mp); 664 vcb = HFSTOVCB(hfsmp); 665 mount_flags = (unsigned int)vfs_flags(mp); 666 667 hfsmp->hfs_flags |= HFS_IN_CHANGEFS; 668 669 permswitch = (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) && 670 ((mount_flags & MNT_UNKNOWNPERMISSIONS) == 0)) || 671 (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) == 0) && 672 (mount_flags & MNT_UNKNOWNPERMISSIONS))); 673 674 /* The root filesystem must operate with actual permissions: */ 675 if (permswitch && (mount_flags & MNT_ROOTFS) && (mount_flags & MNT_UNKNOWNPERMISSIONS)) { 676 vfs_clearflags(mp, (u_int64_t)((unsigned int)MNT_UNKNOWNPERMISSIONS)); /* Just say "No". */ 677 retval = EINVAL; 678 goto exit; 679 } 680 if (mount_flags & MNT_UNKNOWNPERMISSIONS) 681 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS; 682 else 683 hfsmp->hfs_flags &= ~HFS_UNKNOWN_PERMS; 684 685 namefix = permfix = 0; 686 687 /* 688 * Tracking of hot files requires up-to-date access times. So if 689 * access time updates are disabled, we must also disable hot files. 690 */ 691 if (mount_flags & MNT_NOATIME) { 692 (void) hfs_recording_suspend(hfsmp); 693 } 694 695 /* Change the timezone (Note: this affects all hfs volumes and hfs+ volume create dates) */ 696 if (args->hfs_timezone.tz_minuteswest != VNOVAL) { 697 gTimeZone = args->hfs_timezone; 698 } 699 700 /* Change the default uid, gid and/or mask */ 701 if ((args->hfs_uid != (uid_t)VNOVAL) && (hfsmp->hfs_uid != args->hfs_uid)) { 702 hfsmp->hfs_uid = args->hfs_uid; 703 if (vcb->vcbSigWord == kHFSPlusSigWord) 704 ++permfix; 705 } 706 if ((args->hfs_gid != (gid_t)VNOVAL) && (hfsmp->hfs_gid != args->hfs_gid)) { 707 hfsmp->hfs_gid = args->hfs_gid; 708 if (vcb->vcbSigWord == kHFSPlusSigWord) 709 ++permfix; 710 } 711 if (args->hfs_mask != (mode_t)VNOVAL) { 712 if (hfsmp->hfs_dir_mask != (args->hfs_mask & ALLPERMS)) { 713 hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS; 714 hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS; 715 if ((args->flags != VNOVAL) && (args->flags & HFSFSMNT_NOXONFILES)) 716 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE); 717 if (vcb->vcbSigWord == kHFSPlusSigWord) 718 ++permfix; 719 } 720 } 721 722 /* Change the hfs encoding value (hfs only) */ 723 if ((vcb->vcbSigWord == kHFSSigWord) && 724 (args->hfs_encoding != (u_int32_t)VNOVAL) && 725 (hfsmp->hfs_encoding != args->hfs_encoding)) { 726 727 retval = hfs_getconverter(args->hfs_encoding, &get_unicode_func, &get_hfsname_func); 728 if (retval) 729 goto exit; 730 731 /* 732 * Connect the new hfs_get_unicode converter but leave 733 * the old hfs_get_hfsname converter in place so that 734 * we can lookup existing vnodes to get their correctly 735 * encoded names. 736 * 737 * When we're all finished, we can then connect the new 738 * hfs_get_hfsname converter and release our interest 739 * in the old converters. 740 */ 741 hfsmp->hfs_get_unicode = get_unicode_func; 742 old_encoding = hfsmp->hfs_encoding; 743 hfsmp->hfs_encoding = args->hfs_encoding; 744 ++namefix; 745 } 746 747 if (!(namefix || permfix || permswitch)) 748 goto exit; 749 750 /* XXX 3762912 hack to support HFS filesystem 'owner' */ 751 if (permfix) 752 vfs_setowner(mp, 753 hfsmp->hfs_uid == UNKNOWNUID ? KAUTH_UID_NONE : hfsmp->hfs_uid, 754 hfsmp->hfs_gid == UNKNOWNGID ? KAUTH_GID_NONE : hfsmp->hfs_gid); 755 756 /* 757 * For each active vnode fix things that changed 758 * 759 * Note that we can visit a vnode more than once 760 * and we can race with fsync. 761 * 762 * hfs_changefs_callback will be called for each vnode 763 * hung off of this mount point 764 * 765 * The vnode will be properly referenced and unreferenced 766 * around the callback 767 */ 768 cargs.hfsmp = hfsmp; 769 cargs.namefix = namefix; 770 cargs.permfix = permfix; 771 cargs.permswitch = permswitch; 772 773 vnode_iterate(mp, 0, hfs_changefs_callback, (void *)&cargs); 774 775 /* 776 * If we're switching name converters we can now 777 * connect the new hfs_get_hfsname converter and 778 * release our interest in the old converters. 779 */ 780 if (namefix) { 781 hfsmp->hfs_get_hfsname = get_hfsname_func; 782 vcb->volumeNameEncodingHint = args->hfs_encoding; 783 (void) hfs_relconverter(old_encoding); 784 } 785exit: 786 hfsmp->hfs_flags &= ~HFS_IN_CHANGEFS; 787 return (retval); 788} 789 790 791struct hfs_reload_cargs { 792 struct hfsmount *hfsmp; 793 int error; 794}; 795 796static int 797hfs_reload_callback(struct vnode *vp, void *cargs) 798{ 799 struct cnode *cp; 800 struct hfs_reload_cargs *args; 801 int lockflags; 802 803 args = (struct hfs_reload_cargs *)cargs; 804 /* 805 * flush all the buffers associated with this node 806 */ 807 (void) buf_invalidateblks(vp, 0, 0, 0); 808 809 cp = VTOC(vp); 810 /* 811 * Remove any directory hints 812 */ 813 if (vnode_isdir(vp)) 814 hfs_reldirhints(cp, 0); 815 816 /* 817 * Re-read cnode data for all active vnodes (non-metadata files). 818 */ 819 if (!vnode_issystem(vp) && !VNODE_IS_RSRC(vp) && (cp->c_fileid >= kHFSFirstUserCatalogNodeID)) { 820 struct cat_fork *datafork; 821 struct cat_desc desc; 822 823 datafork = cp->c_datafork ? &cp->c_datafork->ff_data : NULL; 824 825 /* lookup by fileID since name could have changed */ 826 lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); 827 args->error = cat_idlookup(args->hfsmp, cp->c_fileid, 0, 0, &desc, &cp->c_attr, datafork); 828 hfs_systemfile_unlock(args->hfsmp, lockflags); 829 if (args->error) { 830 return (VNODE_RETURNED_DONE); 831 } 832 833 /* update cnode's catalog descriptor */ 834 (void) replace_desc(cp, &desc); 835 } 836 return (VNODE_RETURNED); 837} 838 839/* 840 * Reload all incore data for a filesystem (used after running fsck on 841 * the root filesystem and finding things to fix). The filesystem must 842 * be mounted read-only. 843 * 844 * Things to do to update the mount: 845 * invalidate all cached meta-data. 846 * invalidate all inactive vnodes. 847 * invalidate all cached file data. 848 * re-read volume header from disk. 849 * re-load meta-file info (extents, file size). 850 * re-load B-tree header data. 851 * re-read cnode data for all active vnodes. 852 */ 853int 854hfs_reload(struct mount *mountp) 855{ 856 register struct vnode *devvp; 857 struct buf *bp; 858 int error, i; 859 struct hfsmount *hfsmp; 860 struct HFSPlusVolumeHeader *vhp; 861 ExtendedVCB *vcb; 862 struct filefork *forkp; 863 struct cat_desc cndesc; 864 struct hfs_reload_cargs args; 865 daddr64_t priIDSector; 866 867 hfsmp = VFSTOHFS(mountp); 868 vcb = HFSTOVCB(hfsmp); 869 870 if (vcb->vcbSigWord == kHFSSigWord) 871 return (EINVAL); /* rooting from HFS is not supported! */ 872 873 /* 874 * Invalidate all cached meta-data. 875 */ 876 devvp = hfsmp->hfs_devvp; 877 if (buf_invalidateblks(devvp, 0, 0, 0)) 878 panic("hfs_reload: dirty1"); 879 880 args.hfsmp = hfsmp; 881 args.error = 0; 882 /* 883 * hfs_reload_callback will be called for each vnode 884 * hung off of this mount point that can't be recycled... 885 * vnode_iterate will recycle those that it can (the VNODE_RELOAD option) 886 * the vnode will be in an 'unbusy' state (VNODE_WAIT) and 887 * properly referenced and unreferenced around the callback 888 */ 889 vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, hfs_reload_callback, (void *)&args); 890 891 if (args.error) 892 return (args.error); 893 894 /* 895 * Re-read VolumeHeader from disk. 896 */ 897 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + 898 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size)); 899 900 error = (int)buf_meta_bread(hfsmp->hfs_devvp, 901 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys), 902 hfsmp->hfs_physical_block_size, NOCRED, &bp); 903 if (error) { 904 if (bp != NULL) 905 buf_brelse(bp); 906 return (error); 907 } 908 909 vhp = (HFSPlusVolumeHeader *) (buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size)); 910 911 /* Do a quick sanity check */ 912 if ((SWAP_BE16(vhp->signature) != kHFSPlusSigWord && 913 SWAP_BE16(vhp->signature) != kHFSXSigWord) || 914 (SWAP_BE16(vhp->version) != kHFSPlusVersion && 915 SWAP_BE16(vhp->version) != kHFSXVersion) || 916 SWAP_BE32(vhp->blockSize) != vcb->blockSize) { 917 buf_brelse(bp); 918 return (EIO); 919 } 920 921 vcb->vcbLsMod = to_bsd_time(SWAP_BE32(vhp->modifyDate)); 922 vcb->vcbAtrb = SWAP_BE32 (vhp->attributes); 923 vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock); 924 vcb->vcbClpSiz = SWAP_BE32 (vhp->rsrcClumpSize); 925 vcb->vcbNxtCNID = SWAP_BE32 (vhp->nextCatalogID); 926 vcb->vcbVolBkUp = to_bsd_time(SWAP_BE32(vhp->backupDate)); 927 vcb->vcbWrCnt = SWAP_BE32 (vhp->writeCount); 928 vcb->vcbFilCnt = SWAP_BE32 (vhp->fileCount); 929 vcb->vcbDirCnt = SWAP_BE32 (vhp->folderCount); 930 HFS_UPDATE_NEXT_ALLOCATION(vcb, SWAP_BE32 (vhp->nextAllocation)); 931 vcb->totalBlocks = SWAP_BE32 (vhp->totalBlocks); 932 vcb->freeBlocks = SWAP_BE32 (vhp->freeBlocks); 933 vcb->encodingsBitmap = SWAP_BE64 (vhp->encodingsBitmap); 934 bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo)); 935 vcb->localCreateDate = SWAP_BE32 (vhp->createDate); /* hfs+ create date is in local time */ 936 937 /* 938 * Re-load meta-file vnode data (extent info, file size, etc). 939 */ 940 forkp = VTOF((struct vnode *)vcb->extentsRefNum); 941 for (i = 0; i < kHFSPlusExtentDensity; i++) { 942 forkp->ff_extents[i].startBlock = 943 SWAP_BE32 (vhp->extentsFile.extents[i].startBlock); 944 forkp->ff_extents[i].blockCount = 945 SWAP_BE32 (vhp->extentsFile.extents[i].blockCount); 946 } 947 forkp->ff_size = SWAP_BE64 (vhp->extentsFile.logicalSize); 948 forkp->ff_blocks = SWAP_BE32 (vhp->extentsFile.totalBlocks); 949 forkp->ff_clumpsize = SWAP_BE32 (vhp->extentsFile.clumpSize); 950 951 952 forkp = VTOF((struct vnode *)vcb->catalogRefNum); 953 for (i = 0; i < kHFSPlusExtentDensity; i++) { 954 forkp->ff_extents[i].startBlock = 955 SWAP_BE32 (vhp->catalogFile.extents[i].startBlock); 956 forkp->ff_extents[i].blockCount = 957 SWAP_BE32 (vhp->catalogFile.extents[i].blockCount); 958 } 959 forkp->ff_size = SWAP_BE64 (vhp->catalogFile.logicalSize); 960 forkp->ff_blocks = SWAP_BE32 (vhp->catalogFile.totalBlocks); 961 forkp->ff_clumpsize = SWAP_BE32 (vhp->catalogFile.clumpSize); 962 963 if (hfsmp->hfs_attribute_vp) { 964 forkp = VTOF(hfsmp->hfs_attribute_vp); 965 for (i = 0; i < kHFSPlusExtentDensity; i++) { 966 forkp->ff_extents[i].startBlock = 967 SWAP_BE32 (vhp->attributesFile.extents[i].startBlock); 968 forkp->ff_extents[i].blockCount = 969 SWAP_BE32 (vhp->attributesFile.extents[i].blockCount); 970 } 971 forkp->ff_size = SWAP_BE64 (vhp->attributesFile.logicalSize); 972 forkp->ff_blocks = SWAP_BE32 (vhp->attributesFile.totalBlocks); 973 forkp->ff_clumpsize = SWAP_BE32 (vhp->attributesFile.clumpSize); 974 } 975 976 forkp = VTOF((struct vnode *)vcb->allocationsRefNum); 977 for (i = 0; i < kHFSPlusExtentDensity; i++) { 978 forkp->ff_extents[i].startBlock = 979 SWAP_BE32 (vhp->allocationFile.extents[i].startBlock); 980 forkp->ff_extents[i].blockCount = 981 SWAP_BE32 (vhp->allocationFile.extents[i].blockCount); 982 } 983 forkp->ff_size = SWAP_BE64 (vhp->allocationFile.logicalSize); 984 forkp->ff_blocks = SWAP_BE32 (vhp->allocationFile.totalBlocks); 985 forkp->ff_clumpsize = SWAP_BE32 (vhp->allocationFile.clumpSize); 986 987 buf_brelse(bp); 988 vhp = NULL; 989 990 /* 991 * Re-load B-tree header data 992 */ 993 forkp = VTOF((struct vnode *)vcb->extentsRefNum); 994 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) ) 995 return (error); 996 997 forkp = VTOF((struct vnode *)vcb->catalogRefNum); 998 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) ) 999 return (error); 1000 1001 if (hfsmp->hfs_attribute_vp) { 1002 forkp = VTOF(hfsmp->hfs_attribute_vp); 1003 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) ) 1004 return (error); 1005 } 1006 1007 /* Reload the volume name */ 1008 if ((error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, &cndesc, NULL, NULL))) 1009 return (error); 1010 vcb->volumeNameEncodingHint = cndesc.cd_encoding; 1011 bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen)); 1012 cat_releasedesc(&cndesc); 1013 1014 /* Re-establish private/hidden directories. */ 1015 hfs_privatedir_init(hfsmp, FILE_HARDLINKS); 1016 hfs_privatedir_init(hfsmp, DIR_HARDLINKS); 1017 1018 /* In case any volume information changed to trigger a notification */ 1019 hfs_generate_volume_notifications(hfsmp); 1020 1021 return (0); 1022} 1023 1024 1025 1026static void 1027hfs_syncer(void *arg0, void *unused) 1028{ 1029#pragma unused(unused) 1030 1031 struct hfsmount *hfsmp = arg0; 1032 clock_sec_t secs; 1033 clock_usec_t usecs; 1034 uint32_t delay = HFS_META_DELAY; 1035 uint64_t now; 1036 static int no_max=1; 1037 1038 clock_get_calendar_microtime(&secs, &usecs); 1039 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; 1040 1041 // 1042 // If the amount of pending writes is more than our limit, wait 1043 // for 2/3 of it to drain and then flush the journal. 1044 // 1045 if (hfsmp->hfs_mp->mnt_pending_write_size > hfsmp->hfs_max_pending_io) { 1046 int counter=0; 1047 uint64_t pending_io, start, rate = 0; 1048 1049 no_max = 0; 1050 1051 hfs_start_transaction(hfsmp); // so we hold off any new i/o's 1052 1053 pending_io = hfsmp->hfs_mp->mnt_pending_write_size; 1054 1055 clock_get_calendar_microtime(&secs, &usecs); 1056 start = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; 1057 1058 while(hfsmp->hfs_mp->mnt_pending_write_size > (pending_io/3) && counter++ < 500) { 1059 tsleep((caddr_t)hfsmp, PRIBIO, "hfs-wait-for-io-to-drain", 10); 1060 } 1061 1062 if (counter >= 500) { 1063 printf("hfs: timed out waiting for io to drain (%lld)\n", (int64_t)hfsmp->hfs_mp->mnt_pending_write_size); 1064 } 1065 1066 if (hfsmp->jnl) { 1067 journal_flush(hfsmp->jnl, FALSE); 1068 } else { 1069 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel()); 1070 } 1071 1072 clock_get_calendar_microtime(&secs, &usecs); 1073 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; 1074 hfsmp->hfs_last_sync_time = now; 1075 if (now != start) { 1076 rate = ((pending_io * 1000000ULL) / (now - start)); // yields bytes per second 1077 } 1078 1079 hfs_end_transaction(hfsmp); 1080 1081 // 1082 // If a reasonable amount of time elapsed then check the 1083 // i/o rate. If it's taking less than 1 second or more 1084 // than 2 seconds, adjust hfs_max_pending_io so that we 1085 // will allow about 1.5 seconds of i/o to queue up. 1086 // 1087 if (((now - start) >= 300000) && (rate != 0)) { 1088 uint64_t scale = (pending_io * 100) / rate; 1089 1090 if (scale < 100 || scale > 200) { 1091 // set it so that it should take about 1.5 seconds to drain 1092 hfsmp->hfs_max_pending_io = (rate * 150ULL) / 100ULL; 1093 } 1094 } 1095 1096 } else if ( ((now - hfsmp->hfs_last_sync_time) >= 5000000ULL) 1097 || (((now - hfsmp->hfs_last_sync_time) >= 100000LL) 1098 && ((now - hfsmp->hfs_last_sync_request_time) >= 100000LL) 1099 && (hfsmp->hfs_active_threads == 0) 1100 && (hfsmp->hfs_global_lock_nesting == 0))) { 1101 1102 // 1103 // Flush the journal if more than 5 seconds elapsed since 1104 // the last sync OR we have not sync'ed recently and the 1105 // last sync request time was more than 100 milliseconds 1106 // ago and no one is in the middle of a transaction right 1107 // now. Else we defer the sync and reschedule it. 1108 // 1109 if (hfsmp->jnl) { 1110 hfs_lock_global (hfsmp, HFS_SHARED_LOCK); 1111 1112 journal_flush(hfsmp->jnl, FALSE); 1113 1114 hfs_unlock_global (hfsmp); 1115 } else { 1116 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel()); 1117 } 1118 1119 clock_get_calendar_microtime(&secs, &usecs); 1120 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; 1121 hfsmp->hfs_last_sync_time = now; 1122 1123 } else if (hfsmp->hfs_active_threads == 0) { 1124 uint64_t deadline; 1125 1126 clock_interval_to_deadline(delay, HFS_MILLISEC_SCALE, &deadline); 1127 thread_call_enter_delayed(hfsmp->hfs_syncer, deadline); 1128 1129 // note: we intentionally return early here and do not 1130 // decrement the sync_scheduled and sync_incomplete 1131 // variables because we rescheduled the timer. 1132 1133 return; 1134 } 1135 1136 // 1137 // NOTE: we decrement these *after* we're done the journal_flush() since 1138 // it can take a significant amount of time and so we don't want more 1139 // callbacks scheduled until we're done this one. 1140 // 1141 OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled); 1142 OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete); 1143 wakeup((caddr_t)&hfsmp->hfs_sync_incomplete); 1144} 1145 1146 1147extern int IOBSDIsMediaEjectable( const char *cdev_name ); 1148 1149/* 1150 * Initialization code for Red-Black Tree Allocator 1151 * 1152 * This function will build the two red-black trees necessary for allocating space 1153 * from the metadata zone as well as normal allocations. Currently, we use 1154 * an advisory read to get most of the data into the buffer cache. 1155 * This function is intended to be run in a separate thread so as not to slow down mount. 1156 * 1157 */ 1158 1159void 1160hfs_initialize_allocator (struct hfsmount *hfsmp) { 1161 1162#if CONFIG_HFS_ALLOC_RBTREE 1163 u_int32_t err; 1164 1165 /* 1166 * Take the allocation file lock. Journal transactions will block until 1167 * we're done here. 1168 */ 1169 int flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK); 1170 1171 /* 1172 * GenerateTree assumes that the bitmap lock is held when you call the function. 1173 * It will drop and re-acquire the lock periodically as needed to let other allocations 1174 * through. It returns with the bitmap lock held. Since we only maintain one tree, 1175 * we don't need to specify a start block (always starts at 0). 1176 */ 1177 err = GenerateTree(hfsmp, hfsmp->totalBlocks, &flags, 1); 1178 if (err) { 1179 goto bailout; 1180 } 1181 /* Mark offset tree as built */ 1182 hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ACTIVE; 1183 1184bailout: 1185 /* 1186 * GenerateTree may drop the bitmap lock during operation in order to give other 1187 * threads a chance to allocate blocks, but it will always return with the lock held, so 1188 * we don't need to re-grab the lock in order to update the TREEBUILD_INFLIGHT bit. 1189 */ 1190 hfsmp->extent_tree_flags &= ~HFS_ALLOC_TREEBUILD_INFLIGHT; 1191 if (err != 0) { 1192 /* Wakeup any waiters on the allocation bitmap lock */ 1193 wakeup((caddr_t)&hfsmp->extent_tree_flags); 1194 } 1195 1196 hfs_systemfile_unlock(hfsmp, flags); 1197#else 1198#pragma unused (hfsmp) 1199#endif 1200} 1201 1202void hfs_unmap_blocks (struct hfsmount *hfsmp) { 1203 /* 1204 * Take the allocation file lock. Journal transactions will block until 1205 * we're done here. 1206 */ 1207 int flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK); 1208 1209 /* 1210 * UnmapBlocks assumes that the bitmap lock is held when you call the function. 1211 * We don't care if there were any error issuing unmaps yet. 1212 */ 1213 (void) UnmapBlocks(hfsmp); 1214 1215 hfs_systemfile_unlock(hfsmp, flags); 1216} 1217 1218 1219/* 1220 * Teardown code for the Red-Black Tree allocator. 1221 * This function consolidates the code which serializes with respect 1222 * to a thread that may be potentially still building the tree when we need to begin 1223 * tearing it down. Since the red-black tree may not be live when we enter this function 1224 * we return: 1225 * 1 -> Tree was live. 1226 * 0 -> Tree was not active at time of call. 1227 */ 1228 1229int 1230hfs_teardown_allocator (struct hfsmount *hfsmp) { 1231 int rb_used = 0; 1232 1233#if CONFIG_HFS_ALLOC_RBTREE 1234 1235 int flags = 0; 1236 1237 /* 1238 * Check to see if the tree-generation is still on-going. 1239 * If it is, then block until it's done. 1240 */ 1241 1242 flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK); 1243 1244 1245 while (hfsmp->extent_tree_flags & HFS_ALLOC_TREEBUILD_INFLIGHT) { 1246 hfsmp->extent_tree_flags |= HFS_ALLOC_TEARDOWN_INFLIGHT; 1247 1248 lck_rw_sleep(&(VTOC(hfsmp->hfs_allocation_vp))->c_rwlock, LCK_SLEEP_EXCLUSIVE, 1249 &hfsmp->extent_tree_flags, THREAD_UNINT); 1250 } 1251 1252 if (hfs_isrbtree_active (hfsmp)) { 1253 rb_used = 1; 1254 1255 /* Tear down the RB Trees while we have the bitmap locked */ 1256 DestroyTrees(hfsmp); 1257 1258 } 1259 1260 hfs_systemfile_unlock(hfsmp, flags); 1261#else 1262 #pragma unused (hfsmp) 1263#endif 1264 return rb_used; 1265 1266} 1267 1268static int hfs_root_unmounted_cleanly = 0; 1269 1270SYSCTL_DECL(_vfs_generic); 1271SYSCTL_INT(_vfs_generic, OID_AUTO, root_unmounted_cleanly, CTLFLAG_RD, &hfs_root_unmounted_cleanly, 0, "Root filesystem was unmounted cleanly"); 1272 1273/* 1274 * Common code for mount and mountroot 1275 */ 1276int 1277hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, 1278 int journal_replay_only, vfs_context_t context) 1279{ 1280 struct proc *p = vfs_context_proc(context); 1281 int retval = E_NONE; 1282 struct hfsmount *hfsmp = NULL; 1283 struct buf *bp; 1284 dev_t dev; 1285 HFSMasterDirectoryBlock *mdbp = NULL; 1286 int ronly; 1287#if QUOTA 1288 int i; 1289#endif 1290 int mntwrapper; 1291 kauth_cred_t cred; 1292 u_int64_t disksize; 1293 daddr64_t log_blkcnt; 1294 u_int32_t log_blksize; 1295 u_int32_t phys_blksize; 1296 u_int32_t minblksize; 1297 u_int32_t iswritable; 1298 daddr64_t mdb_offset; 1299 int isvirtual = 0; 1300 int isroot = 0; 1301 u_int32_t device_features = 0; 1302 int isssd; 1303#if CONFIG_HFS_ALLOC_RBTREE 1304 thread_t allocator_thread; 1305#endif 1306 1307 if (args == NULL) { 1308 /* only hfs_mountroot passes us NULL as the 'args' argument */ 1309 isroot = 1; 1310 } 1311 1312 ronly = vfs_isrdonly(mp); 1313 dev = vnode_specrdev(devvp); 1314 cred = p ? vfs_context_ucred(context) : NOCRED; 1315 mntwrapper = 0; 1316 1317 bp = NULL; 1318 hfsmp = NULL; 1319 mdbp = NULL; 1320 minblksize = kHFSBlockSize; 1321 1322 /* Advisory locking should be handled at the VFS layer */ 1323 vfs_setlocklocal(mp); 1324 1325 /* Get the logical block size (treated as physical block size everywhere) */ 1326 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&log_blksize, 0, context)) { 1327 if (HFS_MOUNT_DEBUG) { 1328 printf("hfs_mountfs: DKIOCGETBLOCKSIZE failed\n"); 1329 } 1330 retval = ENXIO; 1331 goto error_exit; 1332 } 1333 if (log_blksize == 0 || log_blksize > 1024*1024*1024) { 1334 printf("hfs: logical block size 0x%x looks bad. Not mounting.\n", log_blksize); 1335 retval = ENXIO; 1336 goto error_exit; 1337 } 1338 1339 /* Get the physical block size. */ 1340 retval = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_blksize, 0, context); 1341 if (retval) { 1342 if ((retval != ENOTSUP) && (retval != ENOTTY)) { 1343 if (HFS_MOUNT_DEBUG) { 1344 printf("hfs_mountfs: DKIOCGETPHYSICALBLOCKSIZE failed\n"); 1345 } 1346 retval = ENXIO; 1347 goto error_exit; 1348 } 1349 /* If device does not support this ioctl, assume that physical 1350 * block size is same as logical block size 1351 */ 1352 phys_blksize = log_blksize; 1353 } 1354 if (phys_blksize == 0 || phys_blksize > 1024*1024*1024) { 1355 printf("hfs: physical block size 0x%x looks bad. Not mounting.\n", phys_blksize); 1356 retval = ENXIO; 1357 goto error_exit; 1358 } 1359 1360 /* Switch to 512 byte sectors (temporarily) */ 1361 if (log_blksize > 512) { 1362 u_int32_t size512 = 512; 1363 1364 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, context)) { 1365 if (HFS_MOUNT_DEBUG) { 1366 printf("hfs_mountfs: DKIOCSETBLOCKSIZE failed \n"); 1367 } 1368 retval = ENXIO; 1369 goto error_exit; 1370 } 1371 } 1372 /* Get the number of 512 byte physical blocks. */ 1373 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) { 1374 /* resetting block size may fail if getting block count did */ 1375 (void)VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context); 1376 if (HFS_MOUNT_DEBUG) { 1377 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT failed\n"); 1378 } 1379 retval = ENXIO; 1380 goto error_exit; 1381 } 1382 /* Compute an accurate disk size (i.e. within 512 bytes) */ 1383 disksize = (u_int64_t)log_blkcnt * (u_int64_t)512; 1384 1385 /* 1386 * On Tiger it is not necessary to switch the device 1387 * block size to be 4k if there are more than 31-bits 1388 * worth of blocks but to insure compatibility with 1389 * pre-Tiger systems we have to do it. 1390 * 1391 * If the device size is not a multiple of 4K (8 * 512), then 1392 * switching the logical block size isn't going to help because 1393 * we will be unable to write the alternate volume header. 1394 * In this case, just leave the logical block size unchanged. 1395 */ 1396 if (log_blkcnt > 0x000000007fffffff && (log_blkcnt & 7) == 0) { 1397 minblksize = log_blksize = 4096; 1398 if (phys_blksize < log_blksize) 1399 phys_blksize = log_blksize; 1400 } 1401 1402 /* 1403 * The cluster layer is not currently prepared to deal with a logical 1404 * block size larger than the system's page size. (It can handle 1405 * blocks per page, but not multiple pages per block.) So limit the 1406 * logical block size to the page size. 1407 */ 1408 if (log_blksize > PAGE_SIZE) 1409 log_blksize = PAGE_SIZE; 1410 1411 /* Now switch to our preferred physical block size. */ 1412 if (log_blksize > 512) { 1413 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) { 1414 if (HFS_MOUNT_DEBUG) { 1415 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (2) failed\n"); 1416 } 1417 retval = ENXIO; 1418 goto error_exit; 1419 } 1420 /* Get the count of physical blocks. */ 1421 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) { 1422 if (HFS_MOUNT_DEBUG) { 1423 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (2) failed\n"); 1424 } 1425 retval = ENXIO; 1426 goto error_exit; 1427 } 1428 } 1429 /* 1430 * At this point: 1431 * minblksize is the minimum physical block size 1432 * log_blksize has our preferred physical block size 1433 * log_blkcnt has the total number of physical blocks 1434 */ 1435 1436 mdb_offset = (daddr64_t)HFS_PRI_SECTOR(log_blksize); 1437 if ((retval = (int)buf_meta_bread(devvp, 1438 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, (phys_blksize/log_blksize)), 1439 phys_blksize, cred, &bp))) { 1440 if (HFS_MOUNT_DEBUG) { 1441 printf("hfs_mountfs: buf_meta_bread failed with %d\n", retval); 1442 } 1443 goto error_exit; 1444 } 1445 MALLOC(mdbp, HFSMasterDirectoryBlock *, kMDBSize, M_TEMP, M_WAITOK); 1446 if (mdbp == NULL) { 1447 retval = ENOMEM; 1448 if (HFS_MOUNT_DEBUG) { 1449 printf("hfs_mountfs: MALLOC failed\n"); 1450 } 1451 goto error_exit; 1452 } 1453 bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, kMDBSize); 1454 buf_brelse(bp); 1455 bp = NULL; 1456 1457 MALLOC(hfsmp, struct hfsmount *, sizeof(struct hfsmount), M_HFSMNT, M_WAITOK); 1458 if (hfsmp == NULL) { 1459 if (HFS_MOUNT_DEBUG) { 1460 printf("hfs_mountfs: MALLOC (2) failed\n"); 1461 } 1462 retval = ENOMEM; 1463 goto error_exit; 1464 } 1465 bzero(hfsmp, sizeof(struct hfsmount)); 1466 1467 hfs_chashinit_finish(hfsmp); 1468 1469 /* 1470 * See if the disk supports unmap (trim). 1471 * 1472 * NOTE: vfs_init_io_attributes has not been called yet, so we can't use the io_flags field 1473 * returned by vfs_ioattr. We need to call VNOP_IOCTL ourselves. 1474 */ 1475 if (VNOP_IOCTL(devvp, DKIOCGETFEATURES, (caddr_t)&device_features, 0, context) == 0) { 1476 if (device_features & DK_FEATURE_UNMAP) { 1477 hfsmp->hfs_flags |= HFS_UNMAP; 1478 } 1479 } 1480 1481 /* 1482 * See if the disk is a solid state device, too. We need this to decide what to do about 1483 * hotfiles. 1484 */ 1485 if (VNOP_IOCTL(devvp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, context) == 0) { 1486 if (isssd) { 1487 hfsmp->hfs_flags |= HFS_SSD; 1488 } 1489 } 1490 1491 1492 /* 1493 * Init the volume information structure 1494 */ 1495 1496 lck_mtx_init(&hfsmp->hfs_mutex, hfs_mutex_group, hfs_lock_attr); 1497 lck_mtx_init(&hfsmp->hfc_mutex, hfs_mutex_group, hfs_lock_attr); 1498 lck_rw_init(&hfsmp->hfs_global_lock, hfs_rwlock_group, hfs_lock_attr); 1499 lck_rw_init(&hfsmp->hfs_insync, hfs_rwlock_group, hfs_lock_attr); 1500 lck_spin_init(&hfsmp->vcbFreeExtLock, hfs_spinlock_group, hfs_lock_attr); 1501 1502 vfs_setfsprivate(mp, hfsmp); 1503 hfsmp->hfs_mp = mp; /* Make VFSTOHFS work */ 1504 hfsmp->hfs_raw_dev = vnode_specrdev(devvp); 1505 hfsmp->hfs_devvp = devvp; 1506 vnode_ref(devvp); /* Hold a ref on the device, dropped when hfsmp is freed. */ 1507 hfsmp->hfs_logical_block_size = log_blksize; 1508 hfsmp->hfs_logical_block_count = log_blkcnt; 1509 hfsmp->hfs_logical_bytes = (uint64_t) log_blksize * (uint64_t) log_blkcnt; 1510 hfsmp->hfs_physical_block_size = phys_blksize; 1511 hfsmp->hfs_log_per_phys = (phys_blksize / log_blksize); 1512 hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA; 1513 if (ronly) 1514 hfsmp->hfs_flags |= HFS_READ_ONLY; 1515 if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS) 1516 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS; 1517 1518#if QUOTA 1519 for (i = 0; i < MAXQUOTAS; i++) 1520 dqfileinit(&hfsmp->hfs_qfiles[i]); 1521#endif 1522 1523 if (args) { 1524 hfsmp->hfs_uid = (args->hfs_uid == (uid_t)VNOVAL) ? UNKNOWNUID : args->hfs_uid; 1525 if (hfsmp->hfs_uid == 0xfffffffd) hfsmp->hfs_uid = UNKNOWNUID; 1526 hfsmp->hfs_gid = (args->hfs_gid == (gid_t)VNOVAL) ? UNKNOWNGID : args->hfs_gid; 1527 if (hfsmp->hfs_gid == 0xfffffffd) hfsmp->hfs_gid = UNKNOWNGID; 1528 vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid); /* tell the VFS */ 1529 if (args->hfs_mask != (mode_t)VNOVAL) { 1530 hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS; 1531 if (args->flags & HFSFSMNT_NOXONFILES) { 1532 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE); 1533 } else { 1534 hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS; 1535 } 1536 } else { 1537 hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS; /* 0777: rwx---rwx */ 1538 hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE; /* 0666: no --x by default? */ 1539 } 1540 if ((args->flags != (int)VNOVAL) && (args->flags & HFSFSMNT_WRAPPER)) 1541 mntwrapper = 1; 1542 } else { 1543 /* Even w/o explicit mount arguments, MNT_UNKNOWNPERMISSIONS requires setting up uid, gid, and mask: */ 1544 if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS) { 1545 hfsmp->hfs_uid = UNKNOWNUID; 1546 hfsmp->hfs_gid = UNKNOWNGID; 1547 vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid); /* tell the VFS */ 1548 hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS; /* 0777: rwx---rwx */ 1549 hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE; /* 0666: no --x by default? */ 1550 } 1551 } 1552 1553 /* Find out if disk media is writable. */ 1554 if (VNOP_IOCTL(devvp, DKIOCISWRITABLE, (caddr_t)&iswritable, 0, context) == 0) { 1555 if (iswritable) 1556 hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA; 1557 else 1558 hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA; 1559 } 1560 1561 // record the current time at which we're mounting this volume 1562 struct timeval tv; 1563 microtime(&tv); 1564 hfsmp->hfs_mount_time = tv.tv_sec; 1565 1566 /* Mount a standard HFS disk */ 1567 if ((SWAP_BE16(mdbp->drSigWord) == kHFSSigWord) && 1568 (mntwrapper || (SWAP_BE16(mdbp->drEmbedSigWord) != kHFSPlusSigWord))) { 1569#if CONFIG_HFS_STD 1570 /* On 10.6 and beyond, non read-only mounts for HFS standard vols get rejected */ 1571 if (vfs_isrdwr(mp)) { 1572 retval = EROFS; 1573 goto error_exit; 1574 } 1575 1576 printf("hfs_mountfs: Mounting HFS Standard volumes was deprecated in Mac OS 10.7 \n"); 1577 1578 /* Treat it as if it's read-only and not writeable */ 1579 hfsmp->hfs_flags |= HFS_READ_ONLY; 1580 hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA; 1581 1582 /* If only journal replay is requested, exit immediately */ 1583 if (journal_replay_only) { 1584 retval = 0; 1585 goto error_exit; 1586 } 1587 1588 if ((vfs_flags(mp) & MNT_ROOTFS)) { 1589 retval = EINVAL; /* Cannot root from HFS standard disks */ 1590 goto error_exit; 1591 } 1592 /* HFS disks can only use 512 byte physical blocks */ 1593 if (log_blksize > kHFSBlockSize) { 1594 log_blksize = kHFSBlockSize; 1595 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) { 1596 retval = ENXIO; 1597 goto error_exit; 1598 } 1599 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) { 1600 retval = ENXIO; 1601 goto error_exit; 1602 } 1603 hfsmp->hfs_logical_block_size = log_blksize; 1604 hfsmp->hfs_logical_block_count = log_blkcnt; 1605 hfsmp->hfs_logical_bytes = (uint64_t) log_blksize * (uint64_t) log_blkcnt; 1606 hfsmp->hfs_physical_block_size = log_blksize; 1607 hfsmp->hfs_log_per_phys = 1; 1608 } 1609 if (args) { 1610 hfsmp->hfs_encoding = args->hfs_encoding; 1611 HFSTOVCB(hfsmp)->volumeNameEncodingHint = args->hfs_encoding; 1612 1613 /* establish the timezone */ 1614 gTimeZone = args->hfs_timezone; 1615 } 1616 1617 retval = hfs_getconverter(hfsmp->hfs_encoding, &hfsmp->hfs_get_unicode, 1618 &hfsmp->hfs_get_hfsname); 1619 if (retval) 1620 goto error_exit; 1621 1622 retval = hfs_MountHFSVolume(hfsmp, mdbp, p); 1623 if (retval) 1624 (void) hfs_relconverter(hfsmp->hfs_encoding); 1625#else 1626 /* On platforms where HFS Standard is not supported, deny the mount altogether */ 1627 retval = EINVAL; 1628 goto error_exit; 1629#endif 1630 1631 } else /* Mount an HFS Plus disk */ { 1632 HFSPlusVolumeHeader *vhp; 1633 off_t embeddedOffset; 1634 int jnl_disable = 0; 1635 1636 /* Get the embedded Volume Header */ 1637 if (SWAP_BE16(mdbp->drEmbedSigWord) == kHFSPlusSigWord) { 1638 embeddedOffset = SWAP_BE16(mdbp->drAlBlSt) * kHFSBlockSize; 1639 embeddedOffset += (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.startBlock) * 1640 (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz); 1641 1642 /* 1643 * If the embedded volume doesn't start on a block 1644 * boundary, then switch the device to a 512-byte 1645 * block size so everything will line up on a block 1646 * boundary. 1647 */ 1648 if ((embeddedOffset % log_blksize) != 0) { 1649 printf("hfs_mountfs: embedded volume offset not" 1650 " a multiple of physical block size (%d);" 1651 " switching to 512\n", log_blksize); 1652 log_blksize = 512; 1653 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, 1654 (caddr_t)&log_blksize, FWRITE, context)) { 1655 1656 if (HFS_MOUNT_DEBUG) { 1657 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (3) failed\n"); 1658 } 1659 retval = ENXIO; 1660 goto error_exit; 1661 } 1662 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, 1663 (caddr_t)&log_blkcnt, 0, context)) { 1664 if (HFS_MOUNT_DEBUG) { 1665 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (3) failed\n"); 1666 } 1667 retval = ENXIO; 1668 goto error_exit; 1669 } 1670 /* Note: relative block count adjustment */ 1671 hfsmp->hfs_logical_block_count *= 1672 hfsmp->hfs_logical_block_size / log_blksize; 1673 1674 /* Update logical /physical block size */ 1675 hfsmp->hfs_logical_block_size = log_blksize; 1676 hfsmp->hfs_physical_block_size = log_blksize; 1677 1678 phys_blksize = log_blksize; 1679 hfsmp->hfs_log_per_phys = 1; 1680 } 1681 1682 disksize = (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.blockCount) * 1683 (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz); 1684 1685 hfsmp->hfs_logical_block_count = disksize / log_blksize; 1686 1687 hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size; 1688 1689 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize)); 1690 retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys), 1691 phys_blksize, cred, &bp); 1692 if (retval) { 1693 if (HFS_MOUNT_DEBUG) { 1694 printf("hfs_mountfs: buf_meta_bread (2) failed with %d\n", retval); 1695 } 1696 goto error_exit; 1697 } 1698 bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, 512); 1699 buf_brelse(bp); 1700 bp = NULL; 1701 vhp = (HFSPlusVolumeHeader*) mdbp; 1702 1703 } else /* pure HFS+ */ { 1704 embeddedOffset = 0; 1705 vhp = (HFSPlusVolumeHeader*) mdbp; 1706 } 1707 1708 if (isroot) { 1709 hfs_root_unmounted_cleanly = ((SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) != 0); 1710 } 1711 1712 /* 1713 * On inconsistent disks, do not allow read-write mount 1714 * unless it is the boot volume being mounted. We also 1715 * always want to replay the journal if the journal_replay_only 1716 * flag is set because that will (most likely) get the 1717 * disk into a consistent state before fsck_hfs starts 1718 * looking at it. 1719 */ 1720 if ( !(vfs_flags(mp) & MNT_ROOTFS) 1721 && (SWAP_BE32(vhp->attributes) & kHFSVolumeInconsistentMask) 1722 && !journal_replay_only 1723 && !(hfsmp->hfs_flags & HFS_READ_ONLY)) { 1724 1725 if (HFS_MOUNT_DEBUG) { 1726 printf("hfs_mountfs: failed to mount non-root inconsistent disk\n"); 1727 } 1728 retval = EINVAL; 1729 goto error_exit; 1730 } 1731 1732 1733 // XXXdbg 1734 // 1735 hfsmp->jnl = NULL; 1736 hfsmp->jvp = NULL; 1737 if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS) && 1738 args->journal_disable) { 1739 jnl_disable = 1; 1740 } 1741 1742 // 1743 // We only initialize the journal here if the last person 1744 // to mount this volume was journaling aware. Otherwise 1745 // we delay journal initialization until later at the end 1746 // of hfs_MountHFSPlusVolume() because the last person who 1747 // mounted it could have messed things up behind our back 1748 // (so we need to go find the .journal file, make sure it's 1749 // the right size, re-sync up if it was moved, etc). 1750 // 1751 if ( (SWAP_BE32(vhp->lastMountedVersion) == kHFSJMountVersion) 1752 && (SWAP_BE32(vhp->attributes) & kHFSVolumeJournaledMask) 1753 && !jnl_disable) { 1754 1755 // if we're able to init the journal, mark the mount 1756 // point as journaled. 1757 // 1758 if ((retval = hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred)) == 0) { 1759 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED)); 1760 } else { 1761 if (retval == EROFS) { 1762 // EROFS is a special error code that means the volume has an external 1763 // journal which we couldn't find. in that case we do not want to 1764 // rewrite the volume header - we'll just refuse to mount the volume. 1765 if (HFS_MOUNT_DEBUG) { 1766 printf("hfs_mountfs: hfs_early_journal_init indicated external jnl \n"); 1767 } 1768 retval = EINVAL; 1769 goto error_exit; 1770 } 1771 1772 // if the journal failed to open, then set the lastMountedVersion 1773 // to be "FSK!" which fsck_hfs will see and force the fsck instead 1774 // of just bailing out because the volume is journaled. 1775 if (!ronly) { 1776 if (HFS_MOUNT_DEBUG) { 1777 printf("hfs_mountfs: hfs_early_journal_init failed, setting to FSK \n"); 1778 } 1779 1780 HFSPlusVolumeHeader *jvhp; 1781 1782 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET; 1783 1784 if (mdb_offset == 0) { 1785 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize)); 1786 } 1787 1788 bp = NULL; 1789 retval = (int)buf_meta_bread(devvp, 1790 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys), 1791 phys_blksize, cred, &bp); 1792 if (retval == 0) { 1793 jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize)); 1794 1795 if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) { 1796 printf ("hfs(1): Journal replay fail. Writing lastMountVersion as FSK!\n"); 1797 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion); 1798 buf_bwrite(bp); 1799 } else { 1800 buf_brelse(bp); 1801 } 1802 bp = NULL; 1803 } else if (bp) { 1804 buf_brelse(bp); 1805 // clear this so the error exit path won't try to use it 1806 bp = NULL; 1807 } 1808 } 1809 1810 // if this isn't the root device just bail out. 1811 // If it is the root device we just continue on 1812 // in the hopes that fsck_hfs will be able to 1813 // fix any damage that exists on the volume. 1814 if ( !(vfs_flags(mp) & MNT_ROOTFS)) { 1815 if (HFS_MOUNT_DEBUG) { 1816 printf("hfs_mountfs: hfs_early_journal_init failed, erroring out \n"); 1817 } 1818 retval = EINVAL; 1819 goto error_exit; 1820 } 1821 } 1822 } 1823 // XXXdbg 1824 1825 /* Either the journal is replayed successfully, or there 1826 * was nothing to replay, or no journal exists. In any case, 1827 * return success. 1828 */ 1829 if (journal_replay_only) { 1830 retval = 0; 1831 goto error_exit; 1832 } 1833 1834 (void) hfs_getconverter(0, &hfsmp->hfs_get_unicode, &hfsmp->hfs_get_hfsname); 1835 1836 retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred); 1837 /* 1838 * If the backend didn't like our physical blocksize 1839 * then retry with physical blocksize of 512. 1840 */ 1841 if ((retval == ENXIO) && (log_blksize > 512) && (log_blksize != minblksize)) { 1842 printf("hfs_mountfs: could not use physical block size " 1843 "(%d) switching to 512\n", log_blksize); 1844 log_blksize = 512; 1845 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) { 1846 if (HFS_MOUNT_DEBUG) { 1847 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (4) failed \n"); 1848 } 1849 retval = ENXIO; 1850 goto error_exit; 1851 } 1852 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) { 1853 if (HFS_MOUNT_DEBUG) { 1854 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (4) failed \n"); 1855 } 1856 retval = ENXIO; 1857 goto error_exit; 1858 } 1859 devvp->v_specsize = log_blksize; 1860 /* Note: relative block count adjustment (in case this is an embedded volume). */ 1861 hfsmp->hfs_logical_block_count *= hfsmp->hfs_logical_block_size / log_blksize; 1862 hfsmp->hfs_logical_block_size = log_blksize; 1863 hfsmp->hfs_log_per_phys = hfsmp->hfs_physical_block_size / log_blksize; 1864 1865 hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size; 1866 1867 if (hfsmp->jnl && hfsmp->jvp == devvp) { 1868 // close and re-open this with the new block size 1869 journal_close(hfsmp->jnl); 1870 hfsmp->jnl = NULL; 1871 if (hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred) == 0) { 1872 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED)); 1873 } else { 1874 // if the journal failed to open, then set the lastMountedVersion 1875 // to be "FSK!" which fsck_hfs will see and force the fsck instead 1876 // of just bailing out because the volume is journaled. 1877 if (!ronly) { 1878 if (HFS_MOUNT_DEBUG) { 1879 printf("hfs_mountfs: hfs_early_journal_init (2) resetting.. \n"); 1880 } 1881 HFSPlusVolumeHeader *jvhp; 1882 1883 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET; 1884 1885 if (mdb_offset == 0) { 1886 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize)); 1887 } 1888 1889 bp = NULL; 1890 retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys), 1891 phys_blksize, cred, &bp); 1892 if (retval == 0) { 1893 jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize)); 1894 1895 if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) { 1896 printf ("hfs(2): Journal replay fail. Writing lastMountVersion as FSK!\n"); 1897 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion); 1898 buf_bwrite(bp); 1899 } else { 1900 buf_brelse(bp); 1901 } 1902 bp = NULL; 1903 } else if (bp) { 1904 buf_brelse(bp); 1905 // clear this so the error exit path won't try to use it 1906 bp = NULL; 1907 } 1908 } 1909 1910 // if this isn't the root device just bail out. 1911 // If it is the root device we just continue on 1912 // in the hopes that fsck_hfs will be able to 1913 // fix any damage that exists on the volume. 1914 if ( !(vfs_flags(mp) & MNT_ROOTFS)) { 1915 if (HFS_MOUNT_DEBUG) { 1916 printf("hfs_mountfs: hfs_early_journal_init (2) failed \n"); 1917 } 1918 retval = EINVAL; 1919 goto error_exit; 1920 } 1921 } 1922 } 1923 1924 /* Try again with a smaller block size... */ 1925 retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred); 1926 if (retval && HFS_MOUNT_DEBUG) { 1927 printf("hfs_MountHFSPlusVolume (late) returned %d\n",retval); 1928 } 1929 } 1930 if (retval) 1931 (void) hfs_relconverter(0); 1932 } 1933 1934 // save off a snapshot of the mtime from the previous mount 1935 // (for matador). 1936 hfsmp->hfs_last_mounted_mtime = hfsmp->hfs_mtime; 1937 1938 if ( retval ) { 1939 if (HFS_MOUNT_DEBUG) { 1940 printf("hfs_mountfs: encountered failure %d \n", retval); 1941 } 1942 goto error_exit; 1943 } 1944 1945 mp->mnt_vfsstat.f_fsid.val[0] = (long)dev; 1946 mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp); 1947 vfs_setmaxsymlen(mp, 0); 1948 1949 mp->mnt_vtable->vfc_vfsflags |= VFC_VFSNATIVEXATTR; 1950#if NAMEDSTREAMS 1951 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS; 1952#endif 1953 if (!(hfsmp->hfs_flags & HFS_STANDARD)) { 1954 /* Tell VFS that we support directory hard links. */ 1955 mp->mnt_vtable->vfc_vfsflags |= VFC_VFSDIRLINKS; 1956 } else { 1957 /* HFS standard doesn't support extended readdir! */ 1958 mount_set_noreaddirext (mp); 1959 } 1960 1961 if (args) { 1962 /* 1963 * Set the free space warning levels for a non-root volume: 1964 * 1965 * Set the "danger" limit to 1% of the volume size or 100MB, whichever 1966 * is less. Set the "warning" limit to 2% of the volume size or 150MB, 1967 * whichever is less. And last, set the "desired" freespace level to 1968 * to 3% of the volume size or 200MB, whichever is less. 1969 */ 1970 hfsmp->hfs_freespace_notify_dangerlimit = 1971 MIN(HFS_VERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize, 1972 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_VERYLOWDISKTRIGGERFRACTION); 1973 hfsmp->hfs_freespace_notify_warninglimit = 1974 MIN(HFS_LOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize, 1975 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKTRIGGERFRACTION); 1976 hfsmp->hfs_freespace_notify_desiredlevel = 1977 MIN(HFS_LOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize, 1978 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKSHUTOFFFRACTION); 1979 } else { 1980 /* 1981 * Set the free space warning levels for the root volume: 1982 * 1983 * Set the "danger" limit to 5% of the volume size or 512MB, whichever 1984 * is less. Set the "warning" limit to 10% of the volume size or 1GB, 1985 * whichever is less. And last, set the "desired" freespace level to 1986 * to 11% of the volume size or 1.25GB, whichever is less. 1987 */ 1988 hfsmp->hfs_freespace_notify_dangerlimit = 1989 MIN(HFS_ROOTVERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize, 1990 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTVERYLOWDISKTRIGGERFRACTION); 1991 hfsmp->hfs_freespace_notify_warninglimit = 1992 MIN(HFS_ROOTLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize, 1993 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKTRIGGERFRACTION); 1994 hfsmp->hfs_freespace_notify_desiredlevel = 1995 MIN(HFS_ROOTLOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize, 1996 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKSHUTOFFFRACTION); 1997 }; 1998 1999 /* Check if the file system exists on virtual device, like disk image */ 2000 if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, context) == 0) { 2001 if (isvirtual) { 2002 hfsmp->hfs_flags |= HFS_VIRTUAL_DEVICE; 2003 } 2004 } 2005 2006 /* do not allow ejectability checks on the root device */ 2007 if (isroot == 0) { 2008 if ((hfsmp->hfs_flags & HFS_VIRTUAL_DEVICE) == 0 && 2009 IOBSDIsMediaEjectable(mp->mnt_vfsstat.f_mntfromname)) { 2010 hfsmp->hfs_max_pending_io = 4096*1024; // a reasonable value to start with. 2011 hfsmp->hfs_syncer = thread_call_allocate(hfs_syncer, hfsmp); 2012 if (hfsmp->hfs_syncer == NULL) { 2013 printf("hfs: failed to allocate syncer thread callback for %s (%s)\n", 2014 mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname); 2015 } 2016 } 2017 } 2018 2019#if CONFIG_HFS_MOUNT_UNMAP 2020 /* Enable UNMAPs for embedded SSDs only for now */ 2021 /* 2022 * TODO: Should we enable this for CoreStorage volumes, too? 2023 */ 2024 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) { 2025 if (hfsmp->hfs_flags & HFS_UNMAP) { 2026 hfs_unmap_blocks(hfsmp); 2027 } 2028 } 2029#endif 2030 2031 2032#if CONFIG_HFS_ALLOC_RBTREE 2033 /* 2034 * We spawn a thread to create the pair of red-black trees for this volume. 2035 * However, in so doing, we must be careful to ensure that if this thread is still 2036 * running after mount has finished, it doesn't interfere with an unmount. Specifically, 2037 * we'll need to set a bit that indicates we're in progress building the trees here. 2038 * Unmount will check for this bit, and then if it's set, mark a corresponding bit that 2039 * notifies the tree generation code that an unmount is waiting. Also mark the bit that 2040 * indicates the tree is live and operating. 2041 * 2042 * Only do this if we're operating on a read-write mount (we wouldn't care for read-only). 2043 */ 2044 2045 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) { 2046 hfsmp->extent_tree_flags |= (HFS_ALLOC_TREEBUILD_INFLIGHT | HFS_ALLOC_RB_ENABLED); 2047 2048 /* Initialize EOF counter so that the thread can assume it started at initial values */ 2049 hfsmp->offset_block_end = 0; 2050 InitTree(hfsmp); 2051 2052 kernel_thread_start ((thread_continue_t) hfs_initialize_allocator , hfsmp, &allocator_thread); 2053 thread_deallocate(allocator_thread); 2054 } 2055 2056#endif 2057 2058 /* 2059 * Start looking for free space to drop below this level and generate a 2060 * warning immediately if needed: 2061 */ 2062 hfsmp->hfs_notification_conditions = 0; 2063 hfs_generate_volume_notifications(hfsmp); 2064 2065 if (ronly == 0) { 2066 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); 2067 } 2068 FREE(mdbp, M_TEMP); 2069 return (0); 2070 2071error_exit: 2072 if (bp) 2073 buf_brelse(bp); 2074 if (mdbp) 2075 FREE(mdbp, M_TEMP); 2076 2077 if (hfsmp && hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) { 2078 vnode_clearmountedon(hfsmp->jvp); 2079 (void)VNOP_CLOSE(hfsmp->jvp, ronly ? FREAD : FREAD|FWRITE, vfs_context_kernel()); 2080 hfsmp->jvp = NULL; 2081 } 2082 if (hfsmp) { 2083 if (hfsmp->hfs_devvp) { 2084 vnode_rele(hfsmp->hfs_devvp); 2085 } 2086 hfs_delete_chash(hfsmp); 2087 2088 FREE(hfsmp, M_HFSMNT); 2089 vfs_setfsprivate(mp, NULL); 2090 } 2091 return (retval); 2092} 2093 2094 2095/* 2096 * Make a filesystem operational. 2097 * Nothing to do at the moment. 2098 */ 2099/* ARGSUSED */ 2100static int 2101hfs_start(__unused struct mount *mp, __unused int flags, __unused vfs_context_t context) 2102{ 2103 return (0); 2104} 2105 2106 2107/* 2108 * unmount system call 2109 */ 2110int 2111hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) 2112{ 2113 struct proc *p = vfs_context_proc(context); 2114 struct hfsmount *hfsmp = VFSTOHFS(mp); 2115 int retval = E_NONE; 2116 int flags; 2117 int force; 2118 int started_tr = 0; 2119 int rb_used = 0; 2120 2121 flags = 0; 2122 force = 0; 2123 if (mntflags & MNT_FORCE) { 2124 flags |= FORCECLOSE; 2125 force = 1; 2126 } 2127 2128 if ((retval = hfs_flushfiles(mp, flags, p)) && !force) 2129 return (retval); 2130 2131 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) 2132 (void) hfs_recording_suspend(hfsmp); 2133 2134 /* 2135 * Cancel any pending timers for this volume. Then wait for any timers 2136 * which have fired, but whose callbacks have not yet completed. 2137 */ 2138 if (hfsmp->hfs_syncer) 2139 { 2140 struct timespec ts = {0, 100000000}; /* 0.1 seconds */ 2141 2142 /* 2143 * Cancel any timers that have been scheduled, but have not 2144 * fired yet. NOTE: The kernel considers a timer complete as 2145 * soon as it starts your callback, so the kernel does not 2146 * keep track of the number of callbacks in progress. 2147 */ 2148 if (thread_call_cancel(hfsmp->hfs_syncer)) 2149 OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete); 2150 thread_call_free(hfsmp->hfs_syncer); 2151 hfsmp->hfs_syncer = NULL; 2152 2153 /* 2154 * This waits for all of the callbacks that were entered before 2155 * we did thread_call_cancel above, but have not completed yet. 2156 */ 2157 while(hfsmp->hfs_sync_incomplete > 0) 2158 { 2159 msleep((caddr_t)&hfsmp->hfs_sync_incomplete, NULL, PWAIT, "hfs_unmount", &ts); 2160 } 2161 2162 if (hfsmp->hfs_sync_incomplete < 0) 2163 panic("hfs_unmount: pm_sync_incomplete underflow!\n"); 2164 } 2165 2166#if CONFIG_HFS_ALLOC_RBTREE 2167 rb_used = hfs_teardown_allocator(hfsmp); 2168#endif 2169 2170 /* 2171 * Flush out the b-trees, volume bitmap and Volume Header 2172 */ 2173 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) { 2174 retval = hfs_start_transaction(hfsmp); 2175 if (retval == 0) { 2176 started_tr = 1; 2177 } else if (!force) { 2178 goto err_exit; 2179 } 2180 2181 if (hfsmp->hfs_startup_vp) { 2182 (void) hfs_lock(VTOC(hfsmp->hfs_startup_vp), HFS_EXCLUSIVE_LOCK); 2183 retval = hfs_fsync(hfsmp->hfs_startup_vp, MNT_WAIT, 0, p); 2184 hfs_unlock(VTOC(hfsmp->hfs_startup_vp)); 2185 if (retval && !force) 2186 goto err_exit; 2187 } 2188 2189 if (hfsmp->hfs_attribute_vp) { 2190 (void) hfs_lock(VTOC(hfsmp->hfs_attribute_vp), HFS_EXCLUSIVE_LOCK); 2191 retval = hfs_fsync(hfsmp->hfs_attribute_vp, MNT_WAIT, 0, p); 2192 hfs_unlock(VTOC(hfsmp->hfs_attribute_vp)); 2193 if (retval && !force) 2194 goto err_exit; 2195 } 2196 2197 (void) hfs_lock(VTOC(hfsmp->hfs_catalog_vp), HFS_EXCLUSIVE_LOCK); 2198 retval = hfs_fsync(hfsmp->hfs_catalog_vp, MNT_WAIT, 0, p); 2199 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp)); 2200 if (retval && !force) 2201 goto err_exit; 2202 2203 (void) hfs_lock(VTOC(hfsmp->hfs_extents_vp), HFS_EXCLUSIVE_LOCK); 2204 retval = hfs_fsync(hfsmp->hfs_extents_vp, MNT_WAIT, 0, p); 2205 hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); 2206 if (retval && !force) 2207 goto err_exit; 2208 2209 if (hfsmp->hfs_allocation_vp) { 2210 (void) hfs_lock(VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK); 2211 retval = hfs_fsync(hfsmp->hfs_allocation_vp, MNT_WAIT, 0, p); 2212 hfs_unlock(VTOC(hfsmp->hfs_allocation_vp)); 2213 if (retval && !force) 2214 goto err_exit; 2215 } 2216 2217 if (hfsmp->hfc_filevp && vnode_issystem(hfsmp->hfc_filevp)) { 2218 retval = hfs_fsync(hfsmp->hfc_filevp, MNT_WAIT, 0, p); 2219 if (retval && !force) 2220 goto err_exit; 2221 } 2222 2223 /* If runtime corruption was detected, indicate that the volume 2224 * was not unmounted cleanly. 2225 */ 2226 if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) { 2227 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask; 2228 } else { 2229 HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeUnmountedMask; 2230 } 2231 2232 2233 if (rb_used) { 2234 /* If the rb-tree was live, just set min_start to 0 */ 2235 hfsmp->nextAllocation = 0; 2236 } 2237 else { 2238 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) { 2239 int i; 2240 u_int32_t min_start = hfsmp->totalBlocks; 2241 2242 // set the nextAllocation pointer to the smallest free block number 2243 // we've seen so on the next mount we won't rescan unnecessarily 2244 lck_spin_lock(&hfsmp->vcbFreeExtLock); 2245 for(i=0; i < (int)hfsmp->vcbFreeExtCnt; i++) { 2246 if (hfsmp->vcbFreeExt[i].startBlock < min_start) { 2247 min_start = hfsmp->vcbFreeExt[i].startBlock; 2248 } 2249 } 2250 lck_spin_unlock(&hfsmp->vcbFreeExtLock); 2251 if (min_start < hfsmp->nextAllocation) { 2252 hfsmp->nextAllocation = min_start; 2253 } 2254 } 2255 } 2256 2257 2258 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); 2259 if (retval) { 2260 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask; 2261 if (!force) 2262 goto err_exit; /* could not flush everything */ 2263 } 2264 2265 if (started_tr) { 2266 hfs_end_transaction(hfsmp); 2267 started_tr = 0; 2268 } 2269 } 2270 2271 if (hfsmp->jnl) { 2272 hfs_journal_flush(hfsmp, FALSE); 2273 } 2274 2275 /* 2276 * Invalidate our caches and release metadata vnodes 2277 */ 2278 (void) hfsUnmount(hfsmp, p); 2279 2280 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord) 2281 (void) hfs_relconverter(hfsmp->hfs_encoding); 2282 2283 // XXXdbg 2284 if (hfsmp->jnl) { 2285 journal_close(hfsmp->jnl); 2286 hfsmp->jnl = NULL; 2287 } 2288 2289 VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context); 2290 2291 if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) { 2292 vnode_clearmountedon(hfsmp->jvp); 2293 retval = VNOP_CLOSE(hfsmp->jvp, 2294 hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, 2295 vfs_context_kernel()); 2296 vnode_put(hfsmp->jvp); 2297 hfsmp->jvp = NULL; 2298 } 2299 // XXXdbg 2300 2301 /* 2302 * Last chance to dump unreferenced system files. 2303 */ 2304 (void) vflush(mp, NULLVP, FORCECLOSE); 2305 2306#if HFS_SPARSE_DEV 2307 /* Drop our reference on the backing fs (if any). */ 2308 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) { 2309 struct vnode * tmpvp; 2310 2311 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE; 2312 tmpvp = hfsmp->hfs_backingfs_rootvp; 2313 hfsmp->hfs_backingfs_rootvp = NULLVP; 2314 vnode_rele(tmpvp); 2315 } 2316#endif /* HFS_SPARSE_DEV */ 2317 lck_mtx_destroy(&hfsmp->hfc_mutex, hfs_mutex_group); 2318 lck_spin_destroy(&hfsmp->vcbFreeExtLock, hfs_spinlock_group); 2319 vnode_rele(hfsmp->hfs_devvp); 2320 2321 hfs_delete_chash(hfsmp); 2322 FREE(hfsmp, M_HFSMNT); 2323 2324 return (0); 2325 2326 err_exit: 2327 if (started_tr) { 2328 hfs_end_transaction(hfsmp); 2329 } 2330 return retval; 2331} 2332 2333 2334/* 2335 * Return the root of a filesystem. 2336 */ 2337static int 2338hfs_vfs_root(struct mount *mp, struct vnode **vpp, __unused vfs_context_t context) 2339{ 2340 return hfs_vget(VFSTOHFS(mp), (cnid_t)kHFSRootFolderID, vpp, 1, 0); 2341} 2342 2343 2344/* 2345 * Do operations associated with quotas 2346 */ 2347#if !QUOTA 2348static int 2349hfs_quotactl(__unused struct mount *mp, __unused int cmds, __unused uid_t uid, __unused caddr_t datap, __unused vfs_context_t context) 2350{ 2351 return (ENOTSUP); 2352} 2353#else 2354static int 2355hfs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t context) 2356{ 2357 struct proc *p = vfs_context_proc(context); 2358 int cmd, type, error; 2359 2360 if (uid == ~0U) 2361 uid = kauth_cred_getuid(vfs_context_ucred(context)); 2362 cmd = cmds >> SUBCMDSHIFT; 2363 2364 switch (cmd) { 2365 case Q_SYNC: 2366 case Q_QUOTASTAT: 2367 break; 2368 case Q_GETQUOTA: 2369 if (uid == kauth_cred_getuid(vfs_context_ucred(context))) 2370 break; 2371 /* fall through */ 2372 default: 2373 if ( (error = vfs_context_suser(context)) ) 2374 return (error); 2375 } 2376 2377 type = cmds & SUBCMDMASK; 2378 if ((u_int)type >= MAXQUOTAS) 2379 return (EINVAL); 2380 if (vfs_busy(mp, LK_NOWAIT)) 2381 return (0); 2382 2383 switch (cmd) { 2384 2385 case Q_QUOTAON: 2386 error = hfs_quotaon(p, mp, type, datap); 2387 break; 2388 2389 case Q_QUOTAOFF: 2390 error = hfs_quotaoff(p, mp, type); 2391 break; 2392 2393 case Q_SETQUOTA: 2394 error = hfs_setquota(mp, uid, type, datap); 2395 break; 2396 2397 case Q_SETUSE: 2398 error = hfs_setuse(mp, uid, type, datap); 2399 break; 2400 2401 case Q_GETQUOTA: 2402 error = hfs_getquota(mp, uid, type, datap); 2403 break; 2404 2405 case Q_SYNC: 2406 error = hfs_qsync(mp); 2407 break; 2408 2409 case Q_QUOTASTAT: 2410 error = hfs_quotastat(mp, type, datap); 2411 break; 2412 2413 default: 2414 error = EINVAL; 2415 break; 2416 } 2417 vfs_unbusy(mp); 2418 2419 return (error); 2420} 2421#endif /* QUOTA */ 2422 2423/* Subtype is composite of bits */ 2424#define HFS_SUBTYPE_JOURNALED 0x01 2425#define HFS_SUBTYPE_CASESENSITIVE 0x02 2426/* bits 2 - 6 reserved */ 2427#define HFS_SUBTYPE_STANDARDHFS 0x80 2428 2429/* 2430 * Get file system statistics. 2431 */ 2432int 2433hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, __unused vfs_context_t context) 2434{ 2435 ExtendedVCB *vcb = VFSTOVCB(mp); 2436 struct hfsmount *hfsmp = VFSTOHFS(mp); 2437 u_int32_t freeCNIDs; 2438 u_int16_t subtype = 0; 2439 2440 freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)vcb->vcbNxtCNID; 2441 2442 sbp->f_bsize = (u_int32_t)vcb->blockSize; 2443 sbp->f_iosize = (size_t)cluster_max_io_size(mp, 0); 2444 sbp->f_blocks = (u_int64_t)((u_int32_t)vcb->totalBlocks); 2445 sbp->f_bfree = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 0)); 2446 sbp->f_bavail = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 1)); 2447 sbp->f_files = (u_int64_t)((u_int32_t )(vcb->totalBlocks - 2)); /* max files is constrained by total blocks */ 2448 sbp->f_ffree = (u_int64_t)((u_int32_t )(MIN(freeCNIDs, sbp->f_bavail))); 2449 2450 /* 2451 * Subtypes (flavors) for HFS 2452 * 0: Mac OS Extended 2453 * 1: Mac OS Extended (Journaled) 2454 * 2: Mac OS Extended (Case Sensitive) 2455 * 3: Mac OS Extended (Case Sensitive, Journaled) 2456 * 4 - 127: Reserved 2457 * 128: Mac OS Standard 2458 * 2459 */ 2460 if (hfsmp->hfs_flags & HFS_STANDARD) { 2461 subtype = HFS_SUBTYPE_STANDARDHFS; 2462 } else /* HFS Plus */ { 2463 if (hfsmp->jnl) 2464 subtype |= HFS_SUBTYPE_JOURNALED; 2465 if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE) 2466 subtype |= HFS_SUBTYPE_CASESENSITIVE; 2467 } 2468 sbp->f_fssubtype = subtype; 2469 2470 return (0); 2471} 2472 2473 2474// 2475// XXXdbg -- this is a callback to be used by the journal to 2476// get meta data blocks flushed out to disk. 2477// 2478// XXXdbg -- be smarter and don't flush *every* block on each 2479// call. try to only flush some so we don't wind up 2480// being too synchronous. 2481// 2482__private_extern__ 2483void 2484hfs_sync_metadata(void *arg) 2485{ 2486 struct mount *mp = (struct mount *)arg; 2487 struct hfsmount *hfsmp; 2488 ExtendedVCB *vcb; 2489 buf_t bp; 2490 int retval; 2491 daddr64_t priIDSector; 2492 hfsmp = VFSTOHFS(mp); 2493 vcb = HFSTOVCB(hfsmp); 2494 2495 // now make sure the super block is flushed 2496 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + 2497 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size)); 2498 2499 retval = (int)buf_meta_bread(hfsmp->hfs_devvp, 2500 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys), 2501 hfsmp->hfs_physical_block_size, NOCRED, &bp); 2502 if ((retval != 0 ) && (retval != ENXIO)) { 2503 printf("hfs_sync_metadata: can't read volume header at %d! (retval 0x%x)\n", 2504 (int)priIDSector, retval); 2505 } 2506 2507 if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) { 2508 buf_bwrite(bp); 2509 } else if (bp) { 2510 buf_brelse(bp); 2511 } 2512 2513 // the alternate super block... 2514 // XXXdbg - we probably don't need to do this each and every time. 2515 // hfs_btreeio.c:FlushAlternate() should flag when it was 2516 // written... 2517 if (hfsmp->hfs_alt_id_sector) { 2518 retval = (int)buf_meta_bread(hfsmp->hfs_devvp, 2519 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys), 2520 hfsmp->hfs_physical_block_size, NOCRED, &bp); 2521 if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) { 2522 buf_bwrite(bp); 2523 } else if (bp) { 2524 buf_brelse(bp); 2525 } 2526 } 2527} 2528 2529 2530struct hfs_sync_cargs { 2531 kauth_cred_t cred; 2532 struct proc *p; 2533 int waitfor; 2534 int error; 2535}; 2536 2537 2538static int 2539hfs_sync_callback(struct vnode *vp, void *cargs) 2540{ 2541 struct cnode *cp; 2542 struct hfs_sync_cargs *args; 2543 int error; 2544 2545 args = (struct hfs_sync_cargs *)cargs; 2546 2547 if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0) { 2548 return (VNODE_RETURNED); 2549 } 2550 cp = VTOC(vp); 2551 2552 if ((cp->c_flag & C_MODIFIED) || 2553 (cp->c_touch_acctime | cp->c_touch_chgtime | cp->c_touch_modtime) || 2554 vnode_hasdirtyblks(vp)) { 2555 error = hfs_fsync(vp, args->waitfor, 0, args->p); 2556 2557 if (error) 2558 args->error = error; 2559 } 2560 hfs_unlock(cp); 2561 return (VNODE_RETURNED); 2562} 2563 2564 2565 2566/* 2567 * Go through the disk queues to initiate sandbagged IO; 2568 * go through the inodes to write those that have been modified; 2569 * initiate the writing of the super block if it has been modified. 2570 * 2571 * Note: we are always called with the filesystem marked `MPBUSY'. 2572 */ 2573int 2574hfs_sync(struct mount *mp, int waitfor, vfs_context_t context) 2575{ 2576 struct proc *p = vfs_context_proc(context); 2577 struct cnode *cp; 2578 struct hfsmount *hfsmp; 2579 ExtendedVCB *vcb; 2580 struct vnode *meta_vp[4]; 2581 int i; 2582 int error, allerror = 0; 2583 struct hfs_sync_cargs args; 2584 2585 hfsmp = VFSTOHFS(mp); 2586 2587 /* 2588 * hfs_changefs might be manipulating vnodes so back off 2589 */ 2590 if (hfsmp->hfs_flags & HFS_IN_CHANGEFS) 2591 return (0); 2592 2593 if (hfsmp->hfs_flags & HFS_READ_ONLY) 2594 return (EROFS); 2595 2596 /* skip over frozen volumes */ 2597 if (!lck_rw_try_lock_shared(&hfsmp->hfs_insync)) 2598 return 0; 2599 2600 args.cred = kauth_cred_get(); 2601 args.waitfor = waitfor; 2602 args.p = p; 2603 args.error = 0; 2604 /* 2605 * hfs_sync_callback will be called for each vnode 2606 * hung off of this mount point... the vnode will be 2607 * properly referenced and unreferenced around the callback 2608 */ 2609 vnode_iterate(mp, 0, hfs_sync_callback, (void *)&args); 2610 2611 if (args.error) 2612 allerror = args.error; 2613 2614 vcb = HFSTOVCB(hfsmp); 2615 2616 meta_vp[0] = vcb->extentsRefNum; 2617 meta_vp[1] = vcb->catalogRefNum; 2618 meta_vp[2] = vcb->allocationsRefNum; /* This is NULL for standard HFS */ 2619 meta_vp[3] = hfsmp->hfs_attribute_vp; /* Optional file */ 2620 2621 /* Now sync our three metadata files */ 2622 for (i = 0; i < 4; ++i) { 2623 struct vnode *btvp; 2624 2625 btvp = meta_vp[i];; 2626 if ((btvp==0) || (vnode_mount(btvp) != mp)) 2627 continue; 2628 2629 /* XXX use hfs_systemfile_lock instead ? */ 2630 (void) hfs_lock(VTOC(btvp), HFS_EXCLUSIVE_LOCK); 2631 cp = VTOC(btvp); 2632 2633 if (((cp->c_flag & C_MODIFIED) == 0) && 2634 (cp->c_touch_acctime == 0) && 2635 (cp->c_touch_chgtime == 0) && 2636 (cp->c_touch_modtime == 0) && 2637 vnode_hasdirtyblks(btvp) == 0) { 2638 hfs_unlock(VTOC(btvp)); 2639 continue; 2640 } 2641 error = vnode_get(btvp); 2642 if (error) { 2643 hfs_unlock(VTOC(btvp)); 2644 continue; 2645 } 2646 if ((error = hfs_fsync(btvp, waitfor, 0, p))) 2647 allerror = error; 2648 2649 hfs_unlock(cp); 2650 vnode_put(btvp); 2651 }; 2652 2653 /* 2654 * Force stale file system control information to be flushed. 2655 */ 2656 if (vcb->vcbSigWord == kHFSSigWord) { 2657 if ((error = VNOP_FSYNC(hfsmp->hfs_devvp, waitfor, context))) { 2658 allerror = error; 2659 } 2660 } 2661#if QUOTA 2662 hfs_qsync(mp); 2663#endif /* QUOTA */ 2664 2665 hfs_hotfilesync(hfsmp, vfs_context_kernel()); 2666 2667 /* 2668 * Write back modified superblock. 2669 */ 2670 if (IsVCBDirty(vcb)) { 2671 error = hfs_flushvolumeheader(hfsmp, waitfor, 0); 2672 if (error) 2673 allerror = error; 2674 } 2675 2676 if (hfsmp->jnl) { 2677 hfs_journal_flush(hfsmp, FALSE); 2678 } 2679 2680 { 2681 clock_sec_t secs; 2682 clock_usec_t usecs; 2683 uint64_t now; 2684 2685 clock_get_calendar_microtime(&secs, &usecs); 2686 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; 2687 hfsmp->hfs_last_sync_time = now; 2688 } 2689 2690 lck_rw_unlock_shared(&hfsmp->hfs_insync); 2691 return (allerror); 2692} 2693 2694 2695/* 2696 * File handle to vnode 2697 * 2698 * Have to be really careful about stale file handles: 2699 * - check that the cnode id is valid 2700 * - call hfs_vget() to get the locked cnode 2701 * - check for an unallocated cnode (i_mode == 0) 2702 * - check that the given client host has export rights and return 2703 * those rights via. exflagsp and credanonp 2704 */ 2705static int 2706hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, __unused vfs_context_t context) 2707{ 2708 struct hfsfid *hfsfhp; 2709 struct vnode *nvp; 2710 int result; 2711 2712 *vpp = NULL; 2713 hfsfhp = (struct hfsfid *)fhp; 2714 2715 if (fhlen < (int)sizeof(struct hfsfid)) 2716 return (EINVAL); 2717 2718 result = hfs_vget(VFSTOHFS(mp), ntohl(hfsfhp->hfsfid_cnid), &nvp, 0, 0); 2719 if (result) { 2720 if (result == ENOENT) 2721 result = ESTALE; 2722 return result; 2723 } 2724 2725 /* 2726 * We used to use the create time as the gen id of the file handle, 2727 * but it is not static enough because it can change at any point 2728 * via system calls. We still don't have another volume ID or other 2729 * unique identifier to use for a generation ID across reboots that 2730 * persists until the file is removed. Using only the CNID exposes 2731 * us to the potential wrap-around case, but as of 2/2008, it would take 2732 * over 2 months to wrap around if the machine did nothing but allocate 2733 * CNIDs. Using some kind of wrap counter would only be effective if 2734 * each file had the wrap counter associated with it. For now, 2735 * we use only the CNID to identify the file as it's good enough. 2736 */ 2737 2738 *vpp = nvp; 2739 2740 hfs_unlock(VTOC(nvp)); 2741 return (0); 2742} 2743 2744 2745/* 2746 * Vnode pointer to File handle 2747 */ 2748/* ARGSUSED */ 2749static int 2750hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, __unused vfs_context_t context) 2751{ 2752 struct cnode *cp; 2753 struct hfsfid *hfsfhp; 2754 2755 if (ISHFS(VTOVCB(vp))) 2756 return (ENOTSUP); /* hfs standard is not exportable */ 2757 2758 if (*fhlenp < (int)sizeof(struct hfsfid)) 2759 return (EOVERFLOW); 2760 2761 cp = VTOC(vp); 2762 hfsfhp = (struct hfsfid *)fhp; 2763 /* only the CNID is used to identify the file now */ 2764 hfsfhp->hfsfid_cnid = htonl(cp->c_fileid); 2765 hfsfhp->hfsfid_gen = htonl(cp->c_fileid); 2766 *fhlenp = sizeof(struct hfsfid); 2767 2768 return (0); 2769} 2770 2771 2772/* 2773 * Initial HFS filesystems, done only once. 2774 */ 2775static int 2776hfs_init(__unused struct vfsconf *vfsp) 2777{ 2778 static int done = 0; 2779 2780 if (done) 2781 return (0); 2782 done = 1; 2783 hfs_chashinit(); 2784 hfs_converterinit(); 2785 2786 BTReserveSetup(); 2787 2788 2789 hfs_lock_attr = lck_attr_alloc_init(); 2790 hfs_group_attr = lck_grp_attr_alloc_init(); 2791 hfs_mutex_group = lck_grp_alloc_init("hfs-mutex", hfs_group_attr); 2792 hfs_rwlock_group = lck_grp_alloc_init("hfs-rwlock", hfs_group_attr); 2793 hfs_spinlock_group = lck_grp_alloc_init("hfs-spinlock", hfs_group_attr); 2794 2795#if HFS_COMPRESSION 2796 decmpfs_init(); 2797#endif 2798 2799 return (0); 2800} 2801 2802static int 2803hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp) 2804{ 2805 struct hfsmount * hfsmp; 2806 char fstypename[MFSNAMELEN]; 2807 2808 if (vp == NULL) 2809 return (EINVAL); 2810 2811 if (!vnode_isvroot(vp)) 2812 return (EINVAL); 2813 2814 vnode_vfsname(vp, fstypename); 2815 if (strncmp(fstypename, "hfs", sizeof(fstypename)) != 0) 2816 return (EINVAL); 2817 2818 hfsmp = VTOHFS(vp); 2819 2820 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord) 2821 return (EINVAL); 2822 2823 *hfsmpp = hfsmp; 2824 2825 return (0); 2826} 2827 2828// XXXdbg 2829#include <sys/filedesc.h> 2830 2831/* 2832 * HFS filesystem related variables. 2833 */ 2834int 2835hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, 2836 user_addr_t newp, size_t newlen, vfs_context_t context) 2837{ 2838 struct proc *p = vfs_context_proc(context); 2839 int error; 2840 struct hfsmount *hfsmp; 2841 2842 /* all sysctl names at this level are terminal */ 2843 2844 if (name[0] == HFS_ENCODINGBIAS) { 2845 int bias; 2846 2847 bias = hfs_getencodingbias(); 2848 error = sysctl_int(oldp, oldlenp, newp, newlen, &bias); 2849 if (error == 0 && newp) 2850 hfs_setencodingbias(bias); 2851 return (error); 2852 2853 } else if (name[0] == HFS_EXTEND_FS) { 2854 u_int64_t newsize; 2855 vnode_t vp = vfs_context_cwd(context); 2856 2857 if (newp == USER_ADDR_NULL || vp == NULLVP) 2858 return (EINVAL); 2859 if ((error = hfs_getmountpoint(vp, &hfsmp))) 2860 return (error); 2861 error = sysctl_quad(oldp, oldlenp, newp, newlen, (quad_t *)&newsize); 2862 if (error) 2863 return (error); 2864 2865 error = hfs_extendfs(hfsmp, newsize, context); 2866 return (error); 2867 2868 } else if (name[0] == HFS_ENCODINGHINT) { 2869 size_t bufsize; 2870 size_t bytes; 2871 u_int32_t hint; 2872 u_int16_t *unicode_name = NULL; 2873 char *filename = NULL; 2874 2875 if ((newlen <= 0) || (newlen > MAXPATHLEN)) 2876 return (EINVAL); 2877 2878 bufsize = MAX(newlen * 3, MAXPATHLEN); 2879 MALLOC(filename, char *, newlen, M_TEMP, M_WAITOK); 2880 if (filename == NULL) { 2881 error = ENOMEM; 2882 goto encodinghint_exit; 2883 } 2884 MALLOC(unicode_name, u_int16_t *, bufsize, M_TEMP, M_WAITOK); 2885 if (filename == NULL) { 2886 error = ENOMEM; 2887 goto encodinghint_exit; 2888 } 2889 2890 error = copyin(newp, (caddr_t)filename, newlen); 2891 if (error == 0) { 2892 error = utf8_decodestr((u_int8_t *)filename, newlen - 1, unicode_name, 2893 &bytes, bufsize, 0, UTF_DECOMPOSED); 2894 if (error == 0) { 2895 hint = hfs_pickencoding(unicode_name, bytes / 2); 2896 error = sysctl_int(oldp, oldlenp, USER_ADDR_NULL, 0, (int32_t *)&hint); 2897 } 2898 } 2899 2900encodinghint_exit: 2901 if (unicode_name) 2902 FREE(unicode_name, M_TEMP); 2903 if (filename) 2904 FREE(filename, M_TEMP); 2905 return (error); 2906 2907 } else if (name[0] == HFS_ENABLE_JOURNALING) { 2908 // make the file system journaled... 2909 vnode_t vp = vfs_context_cwd(context); 2910 vnode_t jvp; 2911 ExtendedVCB *vcb; 2912 struct cat_attr jnl_attr, jinfo_attr; 2913 struct cat_fork jnl_fork, jinfo_fork; 2914 void *jnl = NULL; 2915 int lockflags; 2916 2917 /* Only root can enable journaling */ 2918 if (!is_suser()) { 2919 return (EPERM); 2920 } 2921 if (vp == NULLVP) 2922 return EINVAL; 2923 2924 hfsmp = VTOHFS(vp); 2925 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2926 return EROFS; 2927 } 2928 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord) { 2929 printf("hfs: can't make a plain hfs volume journaled.\n"); 2930 return EINVAL; 2931 } 2932 2933 if (hfsmp->jnl) { 2934 printf("hfs: volume @ mp %p is already journaled!\n", vnode_mount(vp)); 2935 return EAGAIN; 2936 } 2937 2938 vcb = HFSTOVCB(hfsmp); 2939 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS, HFS_EXCLUSIVE_LOCK); 2940 if (BTHasContiguousNodes(VTOF(vcb->catalogRefNum)) == 0 || 2941 BTHasContiguousNodes(VTOF(vcb->extentsRefNum)) == 0) { 2942 2943 printf("hfs: volume has a btree w/non-contiguous nodes. can not enable journaling.\n"); 2944 hfs_systemfile_unlock(hfsmp, lockflags); 2945 return EINVAL; 2946 } 2947 hfs_systemfile_unlock(hfsmp, lockflags); 2948 2949 // make sure these both exist! 2950 if ( GetFileInfo(vcb, kHFSRootFolderID, ".journal_info_block", &jinfo_attr, &jinfo_fork) == 0 2951 || GetFileInfo(vcb, kHFSRootFolderID, ".journal", &jnl_attr, &jnl_fork) == 0) { 2952 2953 return EINVAL; 2954 } 2955 2956 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, context); 2957 2958 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n", 2959 (off_t)name[2], (off_t)name[3]); 2960 2961 // 2962 // XXXdbg - note that currently (Sept, 08) hfs_util does not support 2963 // enabling the journal on a separate device so it is safe 2964 // to just copy hfs_devvp here. If hfs_util gets the ability 2965 // to dynamically enable the journal on a separate device then 2966 // we will have to do the same thing as hfs_early_journal_init() 2967 // to locate and open the journal device. 2968 // 2969 jvp = hfsmp->hfs_devvp; 2970 jnl = journal_create(jvp, 2971 (off_t)name[2] * (off_t)HFSTOVCB(hfsmp)->blockSize 2972 + HFSTOVCB(hfsmp)->hfsPlusIOPosOffset, 2973 (off_t)((unsigned)name[3]), 2974 hfsmp->hfs_devvp, 2975 hfsmp->hfs_logical_block_size, 2976 0, 2977 0, 2978 hfs_sync_metadata, hfsmp->hfs_mp); 2979 2980 /* 2981 * Set up the trim callback function so that we can add 2982 * recently freed extents to the free extent cache once 2983 * the transaction that freed them is written to the 2984 * journal on disk. 2985 */ 2986 if (jnl) 2987 journal_trim_set_callback(jnl, hfs_trim_callback, hfsmp); 2988 2989 if (jnl == NULL) { 2990 printf("hfs: FAILED to create the journal!\n"); 2991 if (jvp && jvp != hfsmp->hfs_devvp) { 2992 vnode_clearmountedon(jvp); 2993 VNOP_CLOSE(jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel()); 2994 } 2995 jvp = NULL; 2996 2997 return EINVAL; 2998 } 2999 3000 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK); 3001 3002 /* 3003 * Flush all dirty metadata buffers. 3004 */ 3005 buf_flushdirtyblks(hfsmp->hfs_devvp, TRUE, 0, "hfs_sysctl"); 3006 buf_flushdirtyblks(hfsmp->hfs_extents_vp, TRUE, 0, "hfs_sysctl"); 3007 buf_flushdirtyblks(hfsmp->hfs_catalog_vp, TRUE, 0, "hfs_sysctl"); 3008 buf_flushdirtyblks(hfsmp->hfs_allocation_vp, TRUE, 0, "hfs_sysctl"); 3009 if (hfsmp->hfs_attribute_vp) 3010 buf_flushdirtyblks(hfsmp->hfs_attribute_vp, TRUE, 0, "hfs_sysctl"); 3011 3012 HFSTOVCB(hfsmp)->vcbJinfoBlock = name[1]; 3013 HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeJournaledMask; 3014 hfsmp->jvp = jvp; 3015 hfsmp->jnl = jnl; 3016 3017 // save this off for the hack-y check in hfs_remove() 3018 hfsmp->jnl_start = (u_int32_t)name[2]; 3019 hfsmp->jnl_size = (off_t)((unsigned)name[3]); 3020 hfsmp->hfs_jnlinfoblkid = jinfo_attr.ca_fileid; 3021 hfsmp->hfs_jnlfileid = jnl_attr.ca_fileid; 3022 3023 vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED)); 3024 3025 hfs_unlock_global (hfsmp); 3026 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1); 3027 3028 { 3029 fsid_t fsid; 3030 3031 fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev; 3032 fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp)); 3033 vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL); 3034 } 3035 return 0; 3036 } else if (name[0] == HFS_DISABLE_JOURNALING) { 3037 // clear the journaling bit 3038 vnode_t vp = vfs_context_cwd(context); 3039 3040 /* Only root can disable journaling */ 3041 if (!is_suser()) { 3042 return (EPERM); 3043 } 3044 if (vp == NULLVP) 3045 return EINVAL; 3046 3047 hfsmp = VTOHFS(vp); 3048 3049 /* 3050 * Disabling journaling is disallowed on volumes with directory hard links 3051 * because we have not tested the relevant code path. 3052 */ 3053 if (hfsmp->hfs_private_attr[DIR_HARDLINKS].ca_entries != 0){ 3054 printf("hfs: cannot disable journaling on volumes with directory hardlinks\n"); 3055 return EPERM; 3056 } 3057 3058 printf("hfs: disabling journaling for mount @ %p\n", vnode_mount(vp)); 3059 3060 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK); 3061 3062 // Lights out for you buddy! 3063 journal_close(hfsmp->jnl); 3064 hfsmp->jnl = NULL; 3065 3066 if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) { 3067 vnode_clearmountedon(hfsmp->jvp); 3068 VNOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel()); 3069 vnode_put(hfsmp->jvp); 3070 } 3071 hfsmp->jvp = NULL; 3072 vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED)); 3073 hfsmp->jnl_start = 0; 3074 hfsmp->hfs_jnlinfoblkid = 0; 3075 hfsmp->hfs_jnlfileid = 0; 3076 3077 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeJournaledMask; 3078 3079 hfs_unlock_global (hfsmp); 3080 3081 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1); 3082 3083 { 3084 fsid_t fsid; 3085 3086 fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev; 3087 fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp)); 3088 vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL); 3089 } 3090 return 0; 3091 } else if (name[0] == HFS_GET_JOURNAL_INFO) { 3092 vnode_t vp = vfs_context_cwd(context); 3093 off_t jnl_start, jnl_size; 3094 3095 if (vp == NULLVP) 3096 return EINVAL; 3097 3098 /* 64-bit processes won't work with this sysctl -- can't fit a pointer into an int! */ 3099 if (proc_is64bit(current_proc())) 3100 return EINVAL; 3101 3102 hfsmp = VTOHFS(vp); 3103 if (hfsmp->jnl == NULL) { 3104 jnl_start = 0; 3105 jnl_size = 0; 3106 } else { 3107 jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset; 3108 jnl_size = (off_t)hfsmp->jnl_size; 3109 } 3110 3111 if ((error = copyout((caddr_t)&jnl_start, CAST_USER_ADDR_T(name[1]), sizeof(off_t))) != 0) { 3112 return error; 3113 } 3114 if ((error = copyout((caddr_t)&jnl_size, CAST_USER_ADDR_T(name[2]), sizeof(off_t))) != 0) { 3115 return error; 3116 } 3117 3118 return 0; 3119 } else if (name[0] == HFS_SET_PKG_EXTENSIONS) { 3120 3121 return set_package_extensions_table((user_addr_t)((unsigned)name[1]), name[2], name[3]); 3122 3123 } else if (name[0] == VFS_CTL_QUERY) { 3124 struct sysctl_req *req; 3125 union union_vfsidctl vc; 3126 struct mount *mp; 3127 struct vfsquery vq; 3128 3129 req = CAST_DOWN(struct sysctl_req *, oldp); /* we're new style vfs sysctl. */ 3130 3131 error = SYSCTL_IN(req, &vc, proc_is64bit(p)? sizeof(vc.vc64):sizeof(vc.vc32)); 3132 if (error) return (error); 3133 3134 mp = vfs_getvfs(&vc.vc32.vc_fsid); /* works for 32 and 64 */ 3135 if (mp == NULL) return (ENOENT); 3136 3137 hfsmp = VFSTOHFS(mp); 3138 bzero(&vq, sizeof(vq)); 3139 vq.vq_flags = hfsmp->hfs_notification_conditions; 3140 return SYSCTL_OUT(req, &vq, sizeof(vq));; 3141 } else if (name[0] == HFS_REPLAY_JOURNAL) { 3142 vnode_t devvp = NULL; 3143 int device_fd; 3144 if (namelen != 2) { 3145 return (EINVAL); 3146 } 3147 device_fd = name[1]; 3148 error = file_vnode(device_fd, &devvp); 3149 if (error) { 3150 return error; 3151 } 3152 error = vnode_getwithref(devvp); 3153 if (error) { 3154 file_drop(device_fd); 3155 return error; 3156 } 3157 error = hfs_journal_replay(devvp, context); 3158 file_drop(device_fd); 3159 vnode_put(devvp); 3160 return error; 3161 } else if (name[0] == HFS_ENABLE_RESIZE_DEBUG) { 3162 hfs_resize_debug = 1; 3163 printf ("hfs_sysctl: Enabled volume resize debugging.\n"); 3164 return 0; 3165 } 3166 3167 return (ENOTSUP); 3168} 3169 3170/* 3171 * hfs_vfs_vget is not static since it is used in hfs_readwrite.c to support 3172 * the build_path ioctl. We use it to leverage the code below that updates 3173 * the origin list cache if necessary 3174 */ 3175 3176int 3177hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context) 3178{ 3179 int error; 3180 int lockflags; 3181 struct hfsmount *hfsmp; 3182 3183 hfsmp = VFSTOHFS(mp); 3184 3185 error = hfs_vget(hfsmp, (cnid_t)ino, vpp, 1, 0); 3186 if (error) 3187 return (error); 3188 3189 /* 3190 * ADLs may need to have their origin state updated 3191 * since build_path needs a valid parent. The same is true 3192 * for hardlinked files as well. There isn't a race window here 3193 * in re-acquiring the cnode lock since we aren't pulling any data 3194 * out of the cnode; instead, we're going to the catalog. 3195 */ 3196 if ((VTOC(*vpp)->c_flag & C_HARDLINK) && 3197 (hfs_lock(VTOC(*vpp), HFS_EXCLUSIVE_LOCK) == 0)) { 3198 cnode_t *cp = VTOC(*vpp); 3199 struct cat_desc cdesc; 3200 3201 if (!hfs_haslinkorigin(cp)) { 3202 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); 3203 error = cat_findname(hfsmp, (cnid_t)ino, &cdesc); 3204 hfs_systemfile_unlock(hfsmp, lockflags); 3205 if (error == 0) { 3206 if ((cdesc.cd_parentcnid != hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) && 3207 (cdesc.cd_parentcnid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)) { 3208 hfs_savelinkorigin(cp, cdesc.cd_parentcnid); 3209 } 3210 cat_releasedesc(&cdesc); 3211 } 3212 } 3213 hfs_unlock(cp); 3214 } 3215 return (0); 3216} 3217 3218 3219/* 3220 * Look up an HFS object by ID. 3221 * 3222 * The object is returned with an iocount reference and the cnode locked. 3223 * 3224 * If the object is a file then it will represent the data fork. 3225 */ 3226int 3227hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock, int allow_deleted) 3228{ 3229 struct vnode *vp = NULLVP; 3230 struct cat_desc cndesc; 3231 struct cat_attr cnattr; 3232 struct cat_fork cnfork; 3233 u_int32_t linkref = 0; 3234 int error; 3235 3236 /* Check for cnids that should't be exported. */ 3237 if ((cnid < kHFSFirstUserCatalogNodeID) && 3238 (cnid != kHFSRootFolderID && cnid != kHFSRootParentID)) { 3239 return (ENOENT); 3240 } 3241 /* Don't export our private directories. */ 3242 if (cnid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid || 3243 cnid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) { 3244 return (ENOENT); 3245 } 3246 /* 3247 * Check the hash first 3248 */ 3249 vp = hfs_chash_getvnode(hfsmp, cnid, 0, skiplock, allow_deleted); 3250 if (vp) { 3251 *vpp = vp; 3252 return(0); 3253 } 3254 3255 bzero(&cndesc, sizeof(cndesc)); 3256 bzero(&cnattr, sizeof(cnattr)); 3257 bzero(&cnfork, sizeof(cnfork)); 3258 3259 /* 3260 * Not in hash, lookup in catalog 3261 */ 3262 if (cnid == kHFSRootParentID) { 3263 static char hfs_rootname[] = "/"; 3264 3265 cndesc.cd_nameptr = (const u_int8_t *)&hfs_rootname[0]; 3266 cndesc.cd_namelen = 1; 3267 cndesc.cd_parentcnid = kHFSRootParentID; 3268 cndesc.cd_cnid = kHFSRootFolderID; 3269 cndesc.cd_flags = CD_ISDIR; 3270 3271 cnattr.ca_fileid = kHFSRootFolderID; 3272 cnattr.ca_linkcount = 1; 3273 cnattr.ca_entries = 1; 3274 cnattr.ca_dircount = 1; 3275 cnattr.ca_mode = (S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO); 3276 } else { 3277 int lockflags; 3278 cnid_t pid; 3279 const char *nameptr; 3280 3281 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); 3282 error = cat_idlookup(hfsmp, cnid, 0, 0, &cndesc, &cnattr, &cnfork); 3283 hfs_systemfile_unlock(hfsmp, lockflags); 3284 3285 if (error) { 3286 *vpp = NULL; 3287 return (error); 3288 } 3289 3290 /* 3291 * Check for a raw hardlink inode and save its linkref. 3292 */ 3293 pid = cndesc.cd_parentcnid; 3294 nameptr = (const char *)cndesc.cd_nameptr; 3295 3296 if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) && 3297 (bcmp(nameptr, HFS_INODE_PREFIX, HFS_INODE_PREFIX_LEN) == 0)) { 3298 linkref = strtoul(&nameptr[HFS_INODE_PREFIX_LEN], NULL, 10); 3299 3300 } else if ((pid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) && 3301 (bcmp(nameptr, HFS_DIRINODE_PREFIX, HFS_DIRINODE_PREFIX_LEN) == 0)) { 3302 linkref = strtoul(&nameptr[HFS_DIRINODE_PREFIX_LEN], NULL, 10); 3303 3304 } else if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) && 3305 (bcmp(nameptr, HFS_DELETE_PREFIX, HFS_DELETE_PREFIX_LEN) == 0)) { 3306 *vpp = NULL; 3307 cat_releasedesc(&cndesc); 3308 return (ENOENT); /* open unlinked file */ 3309 } 3310 } 3311 3312 /* 3313 * Finish initializing cnode descriptor for hardlinks. 3314 * 3315 * We need a valid name and parent for reverse lookups. 3316 */ 3317 if (linkref) { 3318 cnid_t nextlinkid; 3319 cnid_t prevlinkid; 3320 struct cat_desc linkdesc; 3321 int lockflags; 3322 3323 cnattr.ca_linkref = linkref; 3324 3325 /* 3326 * Pick up the first link in the chain and get a descriptor for it. 3327 * This allows blind volfs paths to work for hardlinks. 3328 */ 3329 if ((hfs_lookup_siblinglinks(hfsmp, linkref, &prevlinkid, &nextlinkid) == 0) && 3330 (nextlinkid != 0)) { 3331 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); 3332 error = cat_findname(hfsmp, nextlinkid, &linkdesc); 3333 hfs_systemfile_unlock(hfsmp, lockflags); 3334 if (error == 0) { 3335 cat_releasedesc(&cndesc); 3336 bcopy(&linkdesc, &cndesc, sizeof(linkdesc)); 3337 } 3338 } 3339 } 3340 3341 if (linkref) { 3342 int newvnode_flags = 0; 3343 3344 error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, 3345 &cnfork, &vp, &newvnode_flags); 3346 if (error == 0) { 3347 VTOC(vp)->c_flag |= C_HARDLINK; 3348 vnode_setmultipath(vp); 3349 } 3350 } else { 3351 struct componentname cn; 3352 int newvnode_flags = 0; 3353 3354 /* Supply hfs_getnewvnode with a component name. */ 3355 MALLOC_ZONE(cn.cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); 3356 cn.cn_nameiop = LOOKUP; 3357 cn.cn_flags = ISLASTCN | HASBUF; 3358 cn.cn_context = NULL; 3359 cn.cn_pnlen = MAXPATHLEN; 3360 cn.cn_nameptr = cn.cn_pnbuf; 3361 cn.cn_namelen = cndesc.cd_namelen; 3362 cn.cn_hash = 0; 3363 cn.cn_consume = 0; 3364 bcopy(cndesc.cd_nameptr, cn.cn_nameptr, cndesc.cd_namelen + 1); 3365 3366 error = hfs_getnewvnode(hfsmp, NULLVP, &cn, &cndesc, 0, &cnattr, 3367 &cnfork, &vp, &newvnode_flags); 3368 3369 if (error == 0 && (VTOC(vp)->c_flag & C_HARDLINK)) { 3370 hfs_savelinkorigin(VTOC(vp), cndesc.cd_parentcnid); 3371 } 3372 FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI); 3373 } 3374 cat_releasedesc(&cndesc); 3375 3376 *vpp = vp; 3377 if (vp && skiplock) { 3378 hfs_unlock(VTOC(vp)); 3379 } 3380 return (error); 3381} 3382 3383 3384/* 3385 * Flush out all the files in a filesystem. 3386 */ 3387static int 3388#if QUOTA 3389hfs_flushfiles(struct mount *mp, int flags, struct proc *p) 3390#else 3391hfs_flushfiles(struct mount *mp, int flags, __unused struct proc *p) 3392#endif /* QUOTA */ 3393{ 3394 struct hfsmount *hfsmp; 3395 struct vnode *skipvp = NULLVP; 3396 int error; 3397#if QUOTA 3398 int quotafilecnt; 3399 int i; 3400#endif 3401 3402 hfsmp = VFSTOHFS(mp); 3403 3404#if QUOTA 3405 /* 3406 * The open quota files have an indirect reference on 3407 * the root directory vnode. We must account for this 3408 * extra reference when doing the intial vflush. 3409 */ 3410 quotafilecnt = 0; 3411 if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) { 3412 3413 /* Find out how many quota files we have open. */ 3414 for (i = 0; i < MAXQUOTAS; i++) { 3415 if (hfsmp->hfs_qfiles[i].qf_vp != NULLVP) 3416 ++quotafilecnt; 3417 } 3418 3419 /* Obtain the root vnode so we can skip over it. */ 3420 skipvp = hfs_chash_getvnode(hfsmp, kHFSRootFolderID, 0, 0, 0); 3421 } 3422#endif /* QUOTA */ 3423 3424 error = vflush(mp, skipvp, SKIPSYSTEM | SKIPSWAP | flags); 3425 if (error != 0) 3426 return(error); 3427 3428 error = vflush(mp, skipvp, SKIPSYSTEM | flags); 3429 3430#if QUOTA 3431 if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) { 3432 if (skipvp) { 3433 /* 3434 * See if there are additional references on the 3435 * root vp besides the ones obtained from the open 3436 * quota files and the hfs_chash_getvnode call above. 3437 */ 3438 if ((error == 0) && 3439 (vnode_isinuse(skipvp, quotafilecnt))) { 3440 error = EBUSY; /* root directory is still open */ 3441 } 3442 hfs_unlock(VTOC(skipvp)); 3443 vnode_put(skipvp); 3444 } 3445 if (error && (flags & FORCECLOSE) == 0) 3446 return (error); 3447 3448 for (i = 0; i < MAXQUOTAS; i++) { 3449 if (hfsmp->hfs_qfiles[i].qf_vp == NULLVP) 3450 continue; 3451 hfs_quotaoff(p, mp, i); 3452 } 3453 error = vflush(mp, NULLVP, SKIPSYSTEM | flags); 3454 } 3455#endif /* QUOTA */ 3456 3457 return (error); 3458} 3459 3460/* 3461 * Update volume encoding bitmap (HFS Plus only) 3462 */ 3463__private_extern__ 3464void 3465hfs_setencodingbits(struct hfsmount *hfsmp, u_int32_t encoding) 3466{ 3467#define kIndexMacUkrainian 48 /* MacUkrainian encoding is 152 */ 3468#define kIndexMacFarsi 49 /* MacFarsi encoding is 140 */ 3469 3470 u_int32_t index; 3471 3472 switch (encoding) { 3473 case kTextEncodingMacUkrainian: 3474 index = kIndexMacUkrainian; 3475 break; 3476 case kTextEncodingMacFarsi: 3477 index = kIndexMacFarsi; 3478 break; 3479 default: 3480 index = encoding; 3481 break; 3482 } 3483 3484 if (index < 64 && (hfsmp->encodingsBitmap & (u_int64_t)(1ULL << index)) == 0) { 3485 HFS_MOUNT_LOCK(hfsmp, TRUE) 3486 hfsmp->encodingsBitmap |= (u_int64_t)(1ULL << index); 3487 MarkVCBDirty(hfsmp); 3488 HFS_MOUNT_UNLOCK(hfsmp, TRUE); 3489 } 3490} 3491 3492/* 3493 * Update volume stats 3494 * 3495 * On journal volumes this will cause a volume header flush 3496 */ 3497int 3498hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot) 3499{ 3500 struct timeval tv; 3501 3502 microtime(&tv); 3503 3504 lck_mtx_lock(&hfsmp->hfs_mutex); 3505 3506 MarkVCBDirty(hfsmp); 3507 hfsmp->hfs_mtime = tv.tv_sec; 3508 3509 switch (op) { 3510 case VOL_UPDATE: 3511 break; 3512 case VOL_MKDIR: 3513 if (hfsmp->hfs_dircount != 0xFFFFFFFF) 3514 ++hfsmp->hfs_dircount; 3515 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF) 3516 ++hfsmp->vcbNmRtDirs; 3517 break; 3518 case VOL_RMDIR: 3519 if (hfsmp->hfs_dircount != 0) 3520 --hfsmp->hfs_dircount; 3521 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF) 3522 --hfsmp->vcbNmRtDirs; 3523 break; 3524 case VOL_MKFILE: 3525 if (hfsmp->hfs_filecount != 0xFFFFFFFF) 3526 ++hfsmp->hfs_filecount; 3527 if (inroot && hfsmp->vcbNmFls != 0xFFFF) 3528 ++hfsmp->vcbNmFls; 3529 break; 3530 case VOL_RMFILE: 3531 if (hfsmp->hfs_filecount != 0) 3532 --hfsmp->hfs_filecount; 3533 if (inroot && hfsmp->vcbNmFls != 0xFFFF) 3534 --hfsmp->vcbNmFls; 3535 break; 3536 } 3537 3538 lck_mtx_unlock(&hfsmp->hfs_mutex); 3539 3540 if (hfsmp->jnl) { 3541 hfs_flushvolumeheader(hfsmp, 0, 0); 3542 } 3543 3544 return (0); 3545} 3546 3547 3548static int 3549hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush) 3550{ 3551 ExtendedVCB *vcb = HFSTOVCB(hfsmp); 3552 struct filefork *fp; 3553 HFSMasterDirectoryBlock *mdb; 3554 struct buf *bp = NULL; 3555 int retval; 3556 int sector_size; 3557 ByteCount namelen; 3558 3559 sector_size = hfsmp->hfs_logical_block_size; 3560 retval = (int)buf_bread(hfsmp->hfs_devvp, (daddr64_t)HFS_PRI_SECTOR(sector_size), sector_size, NOCRED, &bp); 3561 if (retval) { 3562 if (bp) 3563 buf_brelse(bp); 3564 return retval; 3565 } 3566 3567 lck_mtx_lock(&hfsmp->hfs_mutex); 3568 3569 mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp) + HFS_PRI_OFFSET(sector_size)); 3570 3571 mdb->drCrDate = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->hfs_itime))); 3572 mdb->drLsMod = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbLsMod))); 3573 mdb->drAtrb = SWAP_BE16 (vcb->vcbAtrb); 3574 mdb->drNmFls = SWAP_BE16 (vcb->vcbNmFls); 3575 mdb->drAllocPtr = SWAP_BE16 (vcb->nextAllocation); 3576 mdb->drClpSiz = SWAP_BE32 (vcb->vcbClpSiz); 3577 mdb->drNxtCNID = SWAP_BE32 (vcb->vcbNxtCNID); 3578 mdb->drFreeBks = SWAP_BE16 (vcb->freeBlocks); 3579 3580 namelen = strlen((char *)vcb->vcbVN); 3581 retval = utf8_to_hfs(vcb, namelen, vcb->vcbVN, mdb->drVN); 3582 /* Retry with MacRoman in case that's how it was exported. */ 3583 if (retval) 3584 retval = utf8_to_mac_roman(namelen, vcb->vcbVN, mdb->drVN); 3585 3586 mdb->drVolBkUp = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbVolBkUp))); 3587 mdb->drWrCnt = SWAP_BE32 (vcb->vcbWrCnt); 3588 mdb->drNmRtDirs = SWAP_BE16 (vcb->vcbNmRtDirs); 3589 mdb->drFilCnt = SWAP_BE32 (vcb->vcbFilCnt); 3590 mdb->drDirCnt = SWAP_BE32 (vcb->vcbDirCnt); 3591 3592 bcopy(vcb->vcbFndrInfo, mdb->drFndrInfo, sizeof(mdb->drFndrInfo)); 3593 3594 fp = VTOF(vcb->extentsRefNum); 3595 mdb->drXTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock); 3596 mdb->drXTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount); 3597 mdb->drXTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock); 3598 mdb->drXTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount); 3599 mdb->drXTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock); 3600 mdb->drXTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount); 3601 mdb->drXTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize); 3602 mdb->drXTClpSiz = SWAP_BE32 (fp->ff_clumpsize); 3603 FTOC(fp)->c_flag &= ~C_MODIFIED; 3604 3605 fp = VTOF(vcb->catalogRefNum); 3606 mdb->drCTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock); 3607 mdb->drCTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount); 3608 mdb->drCTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock); 3609 mdb->drCTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount); 3610 mdb->drCTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock); 3611 mdb->drCTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount); 3612 mdb->drCTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize); 3613 mdb->drCTClpSiz = SWAP_BE32 (fp->ff_clumpsize); 3614 FTOC(fp)->c_flag &= ~C_MODIFIED; 3615 3616 MarkVCBClean( vcb ); 3617 3618 lck_mtx_unlock(&hfsmp->hfs_mutex); 3619 3620 /* If requested, flush out the alternate MDB */ 3621 if (altflush) { 3622 struct buf *alt_bp = NULL; 3623 3624 if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, sector_size, NOCRED, &alt_bp) == 0) { 3625 bcopy(mdb, (char *)buf_dataptr(alt_bp) + HFS_ALT_OFFSET(sector_size), kMDBSize); 3626 3627 (void) VNOP_BWRITE(alt_bp); 3628 } else if (alt_bp) 3629 buf_brelse(alt_bp); 3630 } 3631 3632 if (waitfor != MNT_WAIT) 3633 buf_bawrite(bp); 3634 else 3635 retval = VNOP_BWRITE(bp); 3636 3637 return (retval); 3638} 3639 3640/* 3641 * Flush any dirty in-memory mount data to the on-disk 3642 * volume header. 3643 * 3644 * Note: the on-disk volume signature is intentionally 3645 * not flushed since the on-disk "H+" and "HX" signatures 3646 * are always stored in-memory as "H+". 3647 */ 3648int 3649hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) 3650{ 3651 ExtendedVCB *vcb = HFSTOVCB(hfsmp); 3652 struct filefork *fp; 3653 HFSPlusVolumeHeader *volumeHeader, *altVH; 3654 int retval; 3655 struct buf *bp, *alt_bp; 3656 int i; 3657 daddr64_t priIDSector; 3658 int critical; 3659 u_int16_t signature; 3660 u_int16_t hfsversion; 3661 3662 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 3663 return(0); 3664 } 3665 if (hfsmp->hfs_flags & HFS_STANDARD) { 3666 return hfs_flushMDB(hfsmp, waitfor, altflush); 3667 } 3668 critical = altflush; 3669 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + 3670 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size)); 3671 3672 if (hfs_start_transaction(hfsmp) != 0) { 3673 return EINVAL; 3674 } 3675 3676 bp = NULL; 3677 alt_bp = NULL; 3678 3679 retval = (int)buf_meta_bread(hfsmp->hfs_devvp, 3680 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys), 3681 hfsmp->hfs_physical_block_size, NOCRED, &bp); 3682 if (retval) { 3683 printf("hfs: err %d reading VH blk (%s)\n", retval, vcb->vcbVN); 3684 goto err_exit; 3685 } 3686 3687 volumeHeader = (HFSPlusVolumeHeader *)((char *)buf_dataptr(bp) + 3688 HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size)); 3689 3690 /* 3691 * Sanity check what we just read. If it's bad, try the alternate 3692 * instead. 3693 */ 3694 signature = SWAP_BE16 (volumeHeader->signature); 3695 hfsversion = SWAP_BE16 (volumeHeader->version); 3696 if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) || 3697 (hfsversion < kHFSPlusVersion) || (hfsversion > 100) || 3698 (SWAP_BE32 (volumeHeader->blockSize) != vcb->blockSize)) { 3699 printf("hfs: corrupt VH on %s, sig 0x%04x, ver %d, blksize %d%s\n", 3700 vcb->vcbVN, signature, hfsversion, 3701 SWAP_BE32 (volumeHeader->blockSize), 3702 hfsmp->hfs_alt_id_sector ? "; trying alternate" : ""); 3703 hfs_mark_volume_inconsistent(hfsmp); 3704 3705 if (hfsmp->hfs_alt_id_sector) { 3706 retval = buf_meta_bread(hfsmp->hfs_devvp, 3707 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys), 3708 hfsmp->hfs_physical_block_size, NOCRED, &alt_bp); 3709 if (retval) { 3710 printf("hfs: err %d reading alternate VH (%s)\n", retval, vcb->vcbVN); 3711 goto err_exit; 3712 } 3713 3714 altVH = (HFSPlusVolumeHeader *)((char *)buf_dataptr(alt_bp) + 3715 HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size)); 3716 signature = SWAP_BE16(altVH->signature); 3717 hfsversion = SWAP_BE16(altVH->version); 3718 3719 if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) || 3720 (hfsversion < kHFSPlusVersion) || (kHFSPlusVersion > 100) || 3721 (SWAP_BE32(altVH->blockSize) != vcb->blockSize)) { 3722 printf("hfs: corrupt alternate VH on %s, sig 0x%04x, ver %d, blksize %d\n", 3723 vcb->vcbVN, signature, hfsversion, 3724 SWAP_BE32(altVH->blockSize)); 3725 retval = EIO; 3726 goto err_exit; 3727 } 3728 3729 /* The alternate is plausible, so use it. */ 3730 bcopy(altVH, volumeHeader, kMDBSize); 3731 buf_brelse(alt_bp); 3732 alt_bp = NULL; 3733 } else { 3734 /* No alternate VH, nothing more we can do. */ 3735 retval = EIO; 3736 goto err_exit; 3737 } 3738 } 3739 3740 if (hfsmp->jnl) { 3741 journal_modify_block_start(hfsmp->jnl, bp); 3742 } 3743 3744 /* 3745 * For embedded HFS+ volumes, update create date if it changed 3746 * (ie from a setattrlist call) 3747 */ 3748 if ((vcb->hfsPlusIOPosOffset != 0) && 3749 (SWAP_BE32 (volumeHeader->createDate) != vcb->localCreateDate)) { 3750 struct buf *bp2; 3751 HFSMasterDirectoryBlock *mdb; 3752 3753 retval = (int)buf_meta_bread(hfsmp->hfs_devvp, 3754 HFS_PHYSBLK_ROUNDDOWN(HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size), hfsmp->hfs_log_per_phys), 3755 hfsmp->hfs_physical_block_size, NOCRED, &bp2); 3756 if (retval) { 3757 if (bp2) 3758 buf_brelse(bp2); 3759 retval = 0; 3760 } else { 3761 mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp2) + 3762 HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size)); 3763 3764 if ( SWAP_BE32 (mdb->drCrDate) != vcb->localCreateDate ) 3765 { 3766 if (hfsmp->jnl) { 3767 journal_modify_block_start(hfsmp->jnl, bp2); 3768 } 3769 3770 mdb->drCrDate = SWAP_BE32 (vcb->localCreateDate); /* pick up the new create date */ 3771 3772 if (hfsmp->jnl) { 3773 journal_modify_block_end(hfsmp->jnl, bp2, NULL, NULL); 3774 } else { 3775 (void) VNOP_BWRITE(bp2); /* write out the changes */ 3776 } 3777 } 3778 else 3779 { 3780 buf_brelse(bp2); /* just release it */ 3781 } 3782 } 3783 } 3784 3785 lck_mtx_lock(&hfsmp->hfs_mutex); 3786 3787 /* Note: only update the lower 16 bits worth of attributes */ 3788 volumeHeader->attributes = SWAP_BE32 (vcb->vcbAtrb); 3789 volumeHeader->journalInfoBlock = SWAP_BE32 (vcb->vcbJinfoBlock); 3790 if (hfsmp->jnl) { 3791 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSJMountVersion); 3792 } else { 3793 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSPlusMountVersion); 3794 } 3795 volumeHeader->createDate = SWAP_BE32 (vcb->localCreateDate); /* volume create date is in local time */ 3796 volumeHeader->modifyDate = SWAP_BE32 (to_hfs_time(vcb->vcbLsMod)); 3797 volumeHeader->backupDate = SWAP_BE32 (to_hfs_time(vcb->vcbVolBkUp)); 3798 volumeHeader->fileCount = SWAP_BE32 (vcb->vcbFilCnt); 3799 volumeHeader->folderCount = SWAP_BE32 (vcb->vcbDirCnt); 3800 volumeHeader->totalBlocks = SWAP_BE32 (vcb->totalBlocks); 3801 volumeHeader->freeBlocks = SWAP_BE32 (vcb->freeBlocks); 3802 volumeHeader->nextAllocation = SWAP_BE32 (vcb->nextAllocation); 3803 volumeHeader->rsrcClumpSize = SWAP_BE32 (vcb->vcbClpSiz); 3804 volumeHeader->dataClumpSize = SWAP_BE32 (vcb->vcbClpSiz); 3805 volumeHeader->nextCatalogID = SWAP_BE32 (vcb->vcbNxtCNID); 3806 volumeHeader->writeCount = SWAP_BE32 (vcb->vcbWrCnt); 3807 volumeHeader->encodingsBitmap = SWAP_BE64 (vcb->encodingsBitmap); 3808 3809 if (bcmp(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo)) != 0) { 3810 bcopy(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo)); 3811 critical = 1; 3812 } 3813 3814 /* 3815 * System files are only dirty when altflush is set. 3816 */ 3817 if (altflush == 0) { 3818 goto done; 3819 } 3820 3821 /* Sync Extents over-flow file meta data */ 3822 fp = VTOF(vcb->extentsRefNum); 3823 if (FTOC(fp)->c_flag & C_MODIFIED) { 3824 for (i = 0; i < kHFSPlusExtentDensity; i++) { 3825 volumeHeader->extentsFile.extents[i].startBlock = 3826 SWAP_BE32 (fp->ff_extents[i].startBlock); 3827 volumeHeader->extentsFile.extents[i].blockCount = 3828 SWAP_BE32 (fp->ff_extents[i].blockCount); 3829 } 3830 volumeHeader->extentsFile.logicalSize = SWAP_BE64 (fp->ff_size); 3831 volumeHeader->extentsFile.totalBlocks = SWAP_BE32 (fp->ff_blocks); 3832 volumeHeader->extentsFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize); 3833 FTOC(fp)->c_flag &= ~C_MODIFIED; 3834 } 3835 3836 /* Sync Catalog file meta data */ 3837 fp = VTOF(vcb->catalogRefNum); 3838 if (FTOC(fp)->c_flag & C_MODIFIED) { 3839 for (i = 0; i < kHFSPlusExtentDensity; i++) { 3840 volumeHeader->catalogFile.extents[i].startBlock = 3841 SWAP_BE32 (fp->ff_extents[i].startBlock); 3842 volumeHeader->catalogFile.extents[i].blockCount = 3843 SWAP_BE32 (fp->ff_extents[i].blockCount); 3844 } 3845 volumeHeader->catalogFile.logicalSize = SWAP_BE64 (fp->ff_size); 3846 volumeHeader->catalogFile.totalBlocks = SWAP_BE32 (fp->ff_blocks); 3847 volumeHeader->catalogFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize); 3848 FTOC(fp)->c_flag &= ~C_MODIFIED; 3849 } 3850 3851 /* Sync Allocation file meta data */ 3852 fp = VTOF(vcb->allocationsRefNum); 3853 if (FTOC(fp)->c_flag & C_MODIFIED) { 3854 for (i = 0; i < kHFSPlusExtentDensity; i++) { 3855 volumeHeader->allocationFile.extents[i].startBlock = 3856 SWAP_BE32 (fp->ff_extents[i].startBlock); 3857 volumeHeader->allocationFile.extents[i].blockCount = 3858 SWAP_BE32 (fp->ff_extents[i].blockCount); 3859 } 3860 volumeHeader->allocationFile.logicalSize = SWAP_BE64 (fp->ff_size); 3861 volumeHeader->allocationFile.totalBlocks = SWAP_BE32 (fp->ff_blocks); 3862 volumeHeader->allocationFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize); 3863 FTOC(fp)->c_flag &= ~C_MODIFIED; 3864 } 3865 3866 /* Sync Attribute file meta data */ 3867 if (hfsmp->hfs_attribute_vp) { 3868 fp = VTOF(hfsmp->hfs_attribute_vp); 3869 for (i = 0; i < kHFSPlusExtentDensity; i++) { 3870 volumeHeader->attributesFile.extents[i].startBlock = 3871 SWAP_BE32 (fp->ff_extents[i].startBlock); 3872 volumeHeader->attributesFile.extents[i].blockCount = 3873 SWAP_BE32 (fp->ff_extents[i].blockCount); 3874 } 3875 FTOC(fp)->c_flag &= ~C_MODIFIED; 3876 volumeHeader->attributesFile.logicalSize = SWAP_BE64 (fp->ff_size); 3877 volumeHeader->attributesFile.totalBlocks = SWAP_BE32 (fp->ff_blocks); 3878 volumeHeader->attributesFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize); 3879 } 3880 3881 /* Sync Startup file meta data */ 3882 if (hfsmp->hfs_startup_vp) { 3883 fp = VTOF(hfsmp->hfs_startup_vp); 3884 if (FTOC(fp)->c_flag & C_MODIFIED) { 3885 for (i = 0; i < kHFSPlusExtentDensity; i++) { 3886 volumeHeader->startupFile.extents[i].startBlock = 3887 SWAP_BE32 (fp->ff_extents[i].startBlock); 3888 volumeHeader->startupFile.extents[i].blockCount = 3889 SWAP_BE32 (fp->ff_extents[i].blockCount); 3890 } 3891 volumeHeader->startupFile.logicalSize = SWAP_BE64 (fp->ff_size); 3892 volumeHeader->startupFile.totalBlocks = SWAP_BE32 (fp->ff_blocks); 3893 volumeHeader->startupFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize); 3894 FTOC(fp)->c_flag &= ~C_MODIFIED; 3895 } 3896 } 3897 3898done: 3899 MarkVCBClean(hfsmp); 3900 lck_mtx_unlock(&hfsmp->hfs_mutex); 3901 3902 /* If requested, flush out the alternate volume header */ 3903 if (altflush && hfsmp->hfs_alt_id_sector) { 3904 if (buf_meta_bread(hfsmp->hfs_devvp, 3905 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys), 3906 hfsmp->hfs_physical_block_size, NOCRED, &alt_bp) == 0) { 3907 if (hfsmp->jnl) { 3908 journal_modify_block_start(hfsmp->jnl, alt_bp); 3909 } 3910 3911 bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) + 3912 HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), 3913 kMDBSize); 3914 3915 if (hfsmp->jnl) { 3916 journal_modify_block_end(hfsmp->jnl, alt_bp, NULL, NULL); 3917 } else { 3918 (void) VNOP_BWRITE(alt_bp); 3919 } 3920 } else if (alt_bp) 3921 buf_brelse(alt_bp); 3922 } 3923 3924 if (hfsmp->jnl) { 3925 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL); 3926 } else { 3927 if (waitfor != MNT_WAIT) 3928 buf_bawrite(bp); 3929 else { 3930 retval = VNOP_BWRITE(bp); 3931 /* When critical data changes, flush the device cache */ 3932 if (critical && (retval == 0)) { 3933 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, 3934 NULL, FWRITE, NULL); 3935 } 3936 } 3937 } 3938 hfs_end_transaction(hfsmp); 3939 3940 return (retval); 3941 3942err_exit: 3943 if (alt_bp) 3944 buf_brelse(alt_bp); 3945 if (bp) 3946 buf_brelse(bp); 3947 hfs_end_transaction(hfsmp); 3948 return retval; 3949} 3950 3951 3952/* 3953 * Extend a file system. 3954 */ 3955int 3956hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) 3957{ 3958 struct proc *p = vfs_context_proc(context); 3959 kauth_cred_t cred = vfs_context_ucred(context); 3960 struct vnode *vp; 3961 struct vnode *devvp; 3962 struct buf *bp; 3963 struct filefork *fp = NULL; 3964 ExtendedVCB *vcb; 3965 struct cat_fork forkdata; 3966 u_int64_t oldsize; 3967 u_int64_t newblkcnt; 3968 u_int64_t prev_phys_block_count; 3969 u_int32_t addblks; 3970 u_int64_t sector_count; 3971 u_int32_t sector_size; 3972 u_int32_t phys_sector_size; 3973 u_int32_t overage_blocks; 3974 daddr64_t prev_alt_sector; 3975 daddr_t bitmapblks; 3976 int lockflags = 0; 3977 int error; 3978 int64_t oldBitmapSize; 3979 Boolean usedExtendFileC = false; 3980 int transaction_begun = 0; 3981 3982 devvp = hfsmp->hfs_devvp; 3983 vcb = HFSTOVCB(hfsmp); 3984 3985 /* 3986 * - HFS Plus file systems only. 3987 * - Journaling must be enabled. 3988 * - No embedded volumes. 3989 */ 3990 if ((vcb->vcbSigWord == kHFSSigWord) || 3991 (hfsmp->jnl == NULL) || 3992 (vcb->hfsPlusIOPosOffset != 0)) { 3993 return (EPERM); 3994 } 3995 /* 3996 * If extending file system by non-root, then verify 3997 * ownership and check permissions. 3998 */ 3999 if (suser(cred, NULL)) { 4000 error = hfs_vget(hfsmp, kHFSRootFolderID, &vp, 0, 0); 4001 4002 if (error) 4003 return (error); 4004 error = hfs_owner_rights(hfsmp, VTOC(vp)->c_uid, cred, p, 0); 4005 if (error == 0) { 4006 error = hfs_write_access(vp, cred, p, false); 4007 } 4008 hfs_unlock(VTOC(vp)); 4009 vnode_put(vp); 4010 if (error) 4011 return (error); 4012 4013 error = vnode_authorize(devvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, context); 4014 if (error) 4015 return (error); 4016 } 4017 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)§or_size, 0, context)) { 4018 return (ENXIO); 4019 } 4020 if (sector_size != hfsmp->hfs_logical_block_size) { 4021 return (ENXIO); 4022 } 4023 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)§or_count, 0, context)) { 4024 return (ENXIO); 4025 } 4026 if ((sector_size * sector_count) < newsize) { 4027 printf("hfs_extendfs: not enough space on device\n"); 4028 return (ENOSPC); 4029 } 4030 error = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_sector_size, 0, context); 4031 if (error) { 4032 if ((error != ENOTSUP) && (error != ENOTTY)) { 4033 return (ENXIO); 4034 } 4035 /* If ioctl is not supported, force physical and logical sector size to be same */ 4036 phys_sector_size = sector_size; 4037 } 4038 oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize; 4039 4040 /* 4041 * Validate new size. 4042 */ 4043 if ((newsize <= oldsize) || (newsize % sector_size) || (newsize % phys_sector_size)) { 4044 printf("hfs_extendfs: invalid size\n"); 4045 return (EINVAL); 4046 } 4047 newblkcnt = newsize / vcb->blockSize; 4048 if (newblkcnt > (u_int64_t)0xFFFFFFFF) 4049 return (EOVERFLOW); 4050 4051 addblks = newblkcnt - vcb->totalBlocks; 4052 4053 if (hfs_resize_debug) { 4054 printf ("hfs_extendfs: old: size=%qu, blkcnt=%u\n", oldsize, hfsmp->totalBlocks); 4055 printf ("hfs_extendfs: new: size=%qu, blkcnt=%u, addblks=%u\n", newsize, (u_int32_t)newblkcnt, addblks); 4056 } 4057 printf("hfs_extendfs: will extend \"%s\" by %d blocks\n", vcb->vcbVN, addblks); 4058 4059 HFS_MOUNT_LOCK(hfsmp, TRUE); 4060 if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) { 4061 HFS_MOUNT_UNLOCK(hfsmp, TRUE); 4062 error = EALREADY; 4063 goto out; 4064 } 4065 hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS; 4066 HFS_MOUNT_UNLOCK(hfsmp, TRUE); 4067 4068 /* Start with a clean journal. */ 4069 hfs_journal_flush(hfsmp, TRUE); 4070 4071 /* 4072 * Enclose changes inside a transaction. 4073 */ 4074 if (hfs_start_transaction(hfsmp) != 0) { 4075 error = EINVAL; 4076 goto out; 4077 } 4078 transaction_begun = 1; 4079 4080 4081 /* Update the hfsmp fields for the physical information about the device */ 4082 prev_phys_block_count = hfsmp->hfs_logical_block_count; 4083 prev_alt_sector = hfsmp->hfs_alt_id_sector; 4084 4085 hfsmp->hfs_logical_block_count = sector_count; 4086 /* 4087 * Note that the new AltVH location must be based on the device's EOF rather than the new 4088 * filesystem's EOF, so we use logical_block_count here rather than newsize. 4089 */ 4090 hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / sector_size) + 4091 HFS_ALT_SECTOR(sector_size, hfsmp->hfs_logical_block_count); 4092 hfsmp->hfs_logical_bytes = (uint64_t) sector_count * (uint64_t) sector_size; 4093 4094 4095 /* 4096 * Note: we take the attributes lock in case we have an attribute data vnode 4097 * which needs to change size. 4098 */ 4099 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); 4100 vp = vcb->allocationsRefNum; 4101 fp = VTOF(vp); 4102 bcopy(&fp->ff_data, &forkdata, sizeof(forkdata)); 4103 4104 /* 4105 * Calculate additional space required (if any) by allocation bitmap. 4106 */ 4107 oldBitmapSize = fp->ff_size; 4108 bitmapblks = roundup((newblkcnt+7) / 8, vcb->vcbVBMIOSize) / vcb->blockSize; 4109 if (bitmapblks > (daddr_t)fp->ff_blocks) 4110 bitmapblks -= fp->ff_blocks; 4111 else 4112 bitmapblks = 0; 4113 4114 /* 4115 * The allocation bitmap can contain unused bits that are beyond end of 4116 * current volume's allocation blocks. Usually they are supposed to be 4117 * zero'ed out but there can be cases where they might be marked as used. 4118 * After extending the file system, those bits can represent valid 4119 * allocation blocks, so we mark all the bits from the end of current 4120 * volume to end of allocation bitmap as "free". 4121 * 4122 * Figure out the number of overage blocks before proceeding though, 4123 * so we don't add more bytes to our I/O than necessary. 4124 * First figure out the total number of blocks representable by the 4125 * end of the bitmap file vs. the total number of blocks in the new FS. 4126 * Then subtract away the number of blocks in the current FS. This is how much 4127 * we can mark as free right now without having to grow the bitmap file. 4128 */ 4129 overage_blocks = fp->ff_blocks * vcb->blockSize * 8; 4130 overage_blocks = MIN (overage_blocks, newblkcnt); 4131 overage_blocks -= vcb->totalBlocks; 4132 4133 BlockMarkFreeUnused(vcb, vcb->totalBlocks, overage_blocks); 4134 4135 if (bitmapblks > 0) { 4136 daddr64_t blkno; 4137 daddr_t blkcnt; 4138 off_t bytesAdded; 4139 4140 /* 4141 * Get the bitmap's current size (in allocation blocks) so we know 4142 * where to start zero filling once the new space is added. We've 4143 * got to do this before the bitmap is grown. 4144 */ 4145 blkno = (daddr64_t)fp->ff_blocks; 4146 4147 /* 4148 * Try to grow the allocation file in the normal way, using allocation 4149 * blocks already existing in the file system. This way, we might be 4150 * able to grow the bitmap contiguously, or at least in the metadata 4151 * zone. 4152 */ 4153 error = ExtendFileC(vcb, fp, bitmapblks * vcb->blockSize, 0, 4154 kEFAllMask | kEFNoClumpMask | kEFReserveMask 4155 | kEFMetadataMask | kEFContigMask, &bytesAdded); 4156 4157 if (error == 0) { 4158 usedExtendFileC = true; 4159 } else { 4160 /* 4161 * If the above allocation failed, fall back to allocating the new 4162 * extent of the bitmap from the space we're going to add. Since those 4163 * blocks don't yet belong to the file system, we have to update the 4164 * extent list directly, and manually adjust the file size. 4165 */ 4166 bytesAdded = 0; 4167 error = AddFileExtent(vcb, fp, vcb->totalBlocks, bitmapblks); 4168 if (error) { 4169 printf("hfs_extendfs: error %d adding extents\n", error); 4170 goto out; 4171 } 4172 fp->ff_blocks += bitmapblks; 4173 VTOC(vp)->c_blocks = fp->ff_blocks; 4174 VTOC(vp)->c_flag |= C_MODIFIED; 4175 } 4176 4177 /* 4178 * Update the allocation file's size to include the newly allocated 4179 * blocks. Note that ExtendFileC doesn't do this, which is why this 4180 * statement is outside the above "if" statement. 4181 */ 4182 fp->ff_size += (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize; 4183 4184 /* 4185 * Zero out the new bitmap blocks. 4186 */ 4187 { 4188 4189 bp = NULL; 4190 blkcnt = bitmapblks; 4191 while (blkcnt > 0) { 4192 error = (int)buf_meta_bread(vp, blkno, vcb->blockSize, NOCRED, &bp); 4193 if (error) { 4194 if (bp) { 4195 buf_brelse(bp); 4196 } 4197 break; 4198 } 4199 bzero((char *)buf_dataptr(bp), vcb->blockSize); 4200 buf_markaged(bp); 4201 error = (int)buf_bwrite(bp); 4202 if (error) 4203 break; 4204 --blkcnt; 4205 ++blkno; 4206 } 4207 } 4208 if (error) { 4209 printf("hfs_extendfs: error %d clearing blocks\n", error); 4210 goto out; 4211 } 4212 /* 4213 * Mark the new bitmap space as allocated. 4214 * 4215 * Note that ExtendFileC will have marked any blocks it allocated, so 4216 * this is only needed if we used AddFileExtent. Also note that this 4217 * has to come *after* the zero filling of new blocks in the case where 4218 * we used AddFileExtent (since the part of the bitmap we're touching 4219 * is in those newly allocated blocks). 4220 */ 4221 if (!usedExtendFileC) { 4222 error = BlockMarkAllocated(vcb, vcb->totalBlocks, bitmapblks); 4223 if (error) { 4224 printf("hfs_extendfs: error %d setting bitmap\n", error); 4225 goto out; 4226 } 4227 vcb->freeBlocks -= bitmapblks; 4228 } 4229 } 4230 /* 4231 * Mark the new alternate VH as allocated. 4232 */ 4233 if (vcb->blockSize == 512) 4234 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 2, 2); 4235 else 4236 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 1, 1); 4237 if (error) { 4238 printf("hfs_extendfs: error %d setting bitmap (VH)\n", error); 4239 goto out; 4240 } 4241 /* 4242 * Mark the old alternate VH as free. 4243 */ 4244 if (vcb->blockSize == 512) 4245 (void) BlockMarkFree(vcb, vcb->totalBlocks - 2, 2); 4246 else 4247 (void) BlockMarkFree(vcb, vcb->totalBlocks - 1, 1); 4248 /* 4249 * Adjust file system variables for new space. 4250 */ 4251 vcb->totalBlocks += addblks; 4252 vcb->freeBlocks += addblks; 4253 MarkVCBDirty(vcb); 4254 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); 4255 if (error) { 4256 printf("hfs_extendfs: couldn't flush volume headers (%d)", error); 4257 /* 4258 * Restore to old state. 4259 */ 4260 if (usedExtendFileC) { 4261 (void) TruncateFileC(vcb, fp, oldBitmapSize, 0, FORK_IS_RSRC(fp), 4262 FTOC(fp)->c_fileid, false); 4263 } else { 4264 fp->ff_blocks -= bitmapblks; 4265 fp->ff_size -= (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize; 4266 /* 4267 * No need to mark the excess blocks free since those bitmap blocks 4268 * are no longer part of the bitmap. But we do need to undo the 4269 * effect of the "vcb->freeBlocks -= bitmapblks" above. 4270 */ 4271 vcb->freeBlocks += bitmapblks; 4272 } 4273 vcb->totalBlocks -= addblks; 4274 vcb->freeBlocks -= addblks; 4275 hfsmp->hfs_logical_block_count = prev_phys_block_count; 4276 hfsmp->hfs_alt_id_sector = prev_alt_sector; 4277 MarkVCBDirty(vcb); 4278 if (vcb->blockSize == 512) { 4279 if (BlockMarkAllocated(vcb, vcb->totalBlocks - 2, 2)) { 4280 hfs_mark_volume_inconsistent(hfsmp); 4281 } 4282 } else { 4283 if (BlockMarkAllocated(vcb, vcb->totalBlocks - 1, 1)) { 4284 hfs_mark_volume_inconsistent(hfsmp); 4285 } 4286 } 4287 goto out; 4288 } 4289 /* 4290 * Invalidate the old alternate volume header. 4291 */ 4292 bp = NULL; 4293 if (prev_alt_sector) { 4294 if (buf_meta_bread(hfsmp->hfs_devvp, 4295 HFS_PHYSBLK_ROUNDDOWN(prev_alt_sector, hfsmp->hfs_log_per_phys), 4296 hfsmp->hfs_physical_block_size, NOCRED, &bp) == 0) { 4297 journal_modify_block_start(hfsmp->jnl, bp); 4298 4299 bzero((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), kMDBSize); 4300 4301 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL); 4302 } else if (bp) { 4303 buf_brelse(bp); 4304 } 4305 } 4306 4307 /* 4308 * Update the metadata zone size based on current volume size 4309 */ 4310 hfs_metadatazone_init(hfsmp, false); 4311 4312 /* 4313 * Adjust the size of hfsmp->hfs_attrdata_vp 4314 */ 4315 if (hfsmp->hfs_attrdata_vp) { 4316 struct cnode *attr_cp; 4317 struct filefork *attr_fp; 4318 4319 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) { 4320 attr_cp = VTOC(hfsmp->hfs_attrdata_vp); 4321 attr_fp = VTOF(hfsmp->hfs_attrdata_vp); 4322 4323 attr_cp->c_blocks = newblkcnt; 4324 attr_fp->ff_blocks = newblkcnt; 4325 attr_fp->ff_extents[0].blockCount = newblkcnt; 4326 attr_fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize; 4327 ubc_setsize(hfsmp->hfs_attrdata_vp, attr_fp->ff_size); 4328 vnode_put(hfsmp->hfs_attrdata_vp); 4329 } 4330 } 4331 4332 /* 4333 * Update the R/B Tree if necessary. Since we don't have to drop the systemfile 4334 * locks in the middle of these operations like we do in the truncate case 4335 * where we have to relocate files, we can only update the red-black tree 4336 * if there were actual changes made to the bitmap. Also, we can't really scan the 4337 * new portion of the bitmap before it has been allocated. The BlockMarkAllocated 4338 * routines are smart enough to avoid the r/b tree if the portion they are manipulating is 4339 * not currently controlled by the tree. 4340 * 4341 * We only update hfsmp->allocLimit if totalBlocks actually increased. 4342 */ 4343 if (error == 0) { 4344 UpdateAllocLimit(hfsmp, hfsmp->totalBlocks); 4345 } 4346 4347 /* Release all locks and sync up journal content before 4348 * checking and extending, if required, the journal 4349 */ 4350 if (lockflags) { 4351 hfs_systemfile_unlock(hfsmp, lockflags); 4352 lockflags = 0; 4353 } 4354 if (transaction_begun) { 4355 hfs_end_transaction(hfsmp); 4356 hfs_journal_flush(hfsmp, TRUE); 4357 transaction_begun = 0; 4358 } 4359 4360 /* Increase the journal size, if required. */ 4361 error = hfs_extend_journal(hfsmp, sector_size, sector_count, context); 4362 if (error) { 4363 printf ("hfs_extendfs: Could not extend journal size\n"); 4364 goto out_noalloc; 4365 } 4366 4367 /* Log successful extending */ 4368 printf("hfs_extendfs: extended \"%s\" to %d blocks (was %d blocks)\n", 4369 hfsmp->vcbVN, hfsmp->totalBlocks, (u_int32_t)(oldsize/hfsmp->blockSize)); 4370 4371out: 4372 if (error && fp) { 4373 /* Restore allocation fork. */ 4374 bcopy(&forkdata, &fp->ff_data, sizeof(forkdata)); 4375 VTOC(vp)->c_blocks = fp->ff_blocks; 4376 4377 } 4378 4379out_noalloc: 4380 HFS_MOUNT_LOCK(hfsmp, TRUE); 4381 hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS; 4382 HFS_MOUNT_UNLOCK(hfsmp, TRUE); 4383 if (lockflags) { 4384 hfs_systemfile_unlock(hfsmp, lockflags); 4385 } 4386 if (transaction_begun) { 4387 hfs_end_transaction(hfsmp); 4388 hfs_journal_flush(hfsmp, FALSE); 4389 /* Just to be sure, sync all data to the disk */ 4390 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); 4391 } 4392 4393 return MacToVFSError(error); 4394} 4395 4396#define HFS_MIN_SIZE (32LL * 1024LL * 1024LL) 4397 4398/* 4399 * Truncate a file system (while still mounted). 4400 */ 4401int 4402hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) 4403{ 4404 struct buf *bp = NULL; 4405 u_int64_t oldsize; 4406 u_int32_t newblkcnt; 4407 u_int32_t reclaimblks = 0; 4408 int lockflags = 0; 4409 int transaction_begun = 0; 4410 Boolean updateFreeBlocks = false; 4411 Boolean disable_sparse = false; 4412 int error = 0; 4413 4414 lck_mtx_lock(&hfsmp->hfs_mutex); 4415 if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) { 4416 lck_mtx_unlock(&hfsmp->hfs_mutex); 4417 return (EALREADY); 4418 } 4419 hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS; 4420 hfsmp->hfs_resize_blocksmoved = 0; 4421 hfsmp->hfs_resize_totalblocks = 0; 4422 hfsmp->hfs_resize_progress = 0; 4423 lck_mtx_unlock(&hfsmp->hfs_mutex); 4424 4425 /* 4426 * - Journaled HFS Plus volumes only. 4427 * - No embedded volumes. 4428 */ 4429 if ((hfsmp->jnl == NULL) || 4430 (hfsmp->hfsPlusIOPosOffset != 0)) { 4431 error = EPERM; 4432 goto out; 4433 } 4434 oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize; 4435 newblkcnt = newsize / hfsmp->blockSize; 4436 reclaimblks = hfsmp->totalBlocks - newblkcnt; 4437 4438 if (hfs_resize_debug) { 4439 printf ("hfs_truncatefs: old: size=%qu, blkcnt=%u, freeblks=%u\n", oldsize, hfsmp->totalBlocks, hfs_freeblks(hfsmp, 1)); 4440 printf ("hfs_truncatefs: new: size=%qu, blkcnt=%u, reclaimblks=%u\n", newsize, newblkcnt, reclaimblks); 4441 } 4442 4443 /* Make sure new size is valid. */ 4444 if ((newsize < HFS_MIN_SIZE) || 4445 (newsize >= oldsize) || 4446 (newsize % hfsmp->hfs_logical_block_size) || 4447 (newsize % hfsmp->hfs_physical_block_size)) { 4448 printf ("hfs_truncatefs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize); 4449 error = EINVAL; 4450 goto out; 4451 } 4452 4453 /* 4454 * Make sure that the file system has enough free blocks reclaim. 4455 * 4456 * Before resize, the disk is divided into four zones - 4457 * A. Allocated_Stationary - These are allocated blocks that exist 4458 * before the new end of disk. These blocks will not be 4459 * relocated or modified during resize. 4460 * B. Free_Stationary - These are free blocks that exist before the 4461 * new end of disk. These blocks can be used for any new 4462 * allocations during resize, including allocation for relocating 4463 * data from the area of disk being reclaimed. 4464 * C. Allocated_To-Reclaim - These are allocated blocks that exist 4465 * beyond the new end of disk. These blocks need to be reclaimed 4466 * during resize by allocating equal number of blocks in Free 4467 * Stationary zone and copying the data. 4468 * D. Free_To-Reclaim - These are free blocks that exist beyond the 4469 * new end of disk. Nothing special needs to be done to reclaim 4470 * them. 4471 * 4472 * Total number of blocks on the disk before resize: 4473 * ------------------------------------------------ 4474 * Total Blocks = Allocated_Stationary + Free_Stationary + 4475 * Allocated_To-Reclaim + Free_To-Reclaim 4476 * 4477 * Total number of blocks that need to be reclaimed: 4478 * ------------------------------------------------ 4479 * Blocks to Reclaim = Allocated_To-Reclaim + Free_To-Reclaim 4480 * 4481 * Note that the check below also makes sure that we have enough space 4482 * to relocate data from Allocated_To-Reclaim to Free_Stationary. 4483 * Therefore we do not need to check total number of blocks to relocate 4484 * later in the code. 4485 * 4486 * The condition below gets converted to: 4487 * 4488 * Allocated To-Reclaim + Free To-Reclaim >= Free Stationary + Free To-Reclaim 4489 * 4490 * which is equivalent to: 4491 * 4492 * Allocated To-Reclaim >= Free Stationary 4493 */ 4494 if (reclaimblks >= hfs_freeblks(hfsmp, 1)) { 4495 printf("hfs_truncatefs: insufficient space (need %u blocks; have %u free blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1)); 4496 error = ENOSPC; 4497 goto out; 4498 } 4499 4500 /* Start with a clean journal. */ 4501 hfs_journal_flush(hfsmp, TRUE); 4502 4503 if (hfs_start_transaction(hfsmp) != 0) { 4504 error = EINVAL; 4505 goto out; 4506 } 4507 transaction_begun = 1; 4508 4509 /* Take the bitmap lock to update the alloc limit field */ 4510 lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK); 4511 4512 /* 4513 * Prevent new allocations from using the part we're trying to truncate. 4514 * 4515 * NOTE: allocLimit is set to the allocation block number where the new 4516 * alternate volume header will be. That way there will be no files to 4517 * interfere with allocating the new alternate volume header, and no files 4518 * in the allocation blocks beyond (i.e. the blocks we're trying to 4519 * truncate away. 4520 * 4521 * Also shrink the red-black tree if needed. 4522 */ 4523 if (hfsmp->blockSize == 512) { 4524 error = UpdateAllocLimit (hfsmp, newblkcnt - 2); 4525 } 4526 else { 4527 error = UpdateAllocLimit (hfsmp, newblkcnt - 1); 4528 } 4529 4530 /* Sparse devices use first fit allocation which is not ideal 4531 * for volume resize which requires best fit allocation. If a 4532 * sparse device is being truncated, disable the sparse device 4533 * property temporarily for the duration of resize. Also reset 4534 * the free extent cache so that it is rebuilt as sorted by 4535 * totalBlocks instead of startBlock. 4536 * 4537 * Note that this will affect all allocations on the volume and 4538 * ideal fix would be just to modify resize-related allocations, 4539 * but it will result in complexity like handling of two free 4540 * extent caches sorted differently, etc. So we stick to this 4541 * solution for now. 4542 */ 4543 HFS_MOUNT_LOCK(hfsmp, TRUE); 4544 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) { 4545 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE; 4546 ResetVCBFreeExtCache(hfsmp); 4547 disable_sparse = true; 4548 } 4549 4550 /* 4551 * Update the volume free block count to reflect the total number 4552 * of free blocks that will exist after a successful resize. 4553 * Relocation of extents will result in no net change in the total 4554 * free space on the disk. Therefore the code that allocates 4555 * space for new extent and deallocates the old extent explicitly 4556 * prevents updating the volume free block count. It will also 4557 * prevent false disk full error when the number of blocks in 4558 * an extent being relocated is more than the free blocks that 4559 * will exist after the volume is resized. 4560 */ 4561 hfsmp->freeBlocks -= reclaimblks; 4562 updateFreeBlocks = true; 4563 HFS_MOUNT_UNLOCK(hfsmp, TRUE); 4564 4565 if (lockflags) { 4566 hfs_systemfile_unlock(hfsmp, lockflags); 4567 lockflags = 0; 4568 } 4569 4570 /* 4571 * Update the metadata zone size to match the new volume size, 4572 * and if it too less, metadata zone might be disabled. 4573 */ 4574 hfs_metadatazone_init(hfsmp, false); 4575 4576 /* 4577 * If some files have blocks at or beyond the location of the 4578 * new alternate volume header, recalculate free blocks and 4579 * reclaim blocks. Otherwise just update free blocks count. 4580 * 4581 * The current allocLimit is set to the location of new alternate 4582 * volume header, and reclaimblks are the total number of blocks 4583 * that need to be reclaimed. So the check below is really 4584 * ignoring the blocks allocated for old alternate volume header. 4585 */ 4586 if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) { 4587 /* 4588 * hfs_reclaimspace will use separate transactions when 4589 * relocating files (so we don't overwhelm the journal). 4590 */ 4591 hfs_end_transaction(hfsmp); 4592 transaction_begun = 0; 4593 4594 /* Attempt to reclaim some space. */ 4595 error = hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context); 4596 if (error != 0) { 4597 printf("hfs_truncatefs: couldn't reclaim space on %s (error=%d)\n", hfsmp->vcbVN, error); 4598 error = ENOSPC; 4599 goto out; 4600 } 4601 if (hfs_start_transaction(hfsmp) != 0) { 4602 error = EINVAL; 4603 goto out; 4604 } 4605 transaction_begun = 1; 4606 4607 /* Check if we're clear now. */ 4608 error = hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks); 4609 if (error != 0) { 4610 printf("hfs_truncatefs: didn't reclaim enough space on %s (error=%d)\n", hfsmp->vcbVN, error); 4611 error = EAGAIN; /* tell client to try again */ 4612 goto out; 4613 } 4614 } 4615 4616 /* 4617 * Note: we take the attributes lock in case we have an attribute data vnode 4618 * which needs to change size. 4619 */ 4620 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); 4621 4622 /* 4623 * Allocate last 1KB for alternate volume header. 4624 */ 4625 error = BlockMarkAllocated(hfsmp, hfsmp->allocLimit, (hfsmp->blockSize == 512) ? 2 : 1); 4626 if (error) { 4627 printf("hfs_truncatefs: Error %d allocating new alternate volume header\n", error); 4628 goto out; 4629 } 4630 4631 /* 4632 * Mark the old alternate volume header as free. 4633 * We don't bother shrinking allocation bitmap file. 4634 */ 4635 if (hfsmp->blockSize == 512) 4636 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 2, 2); 4637 else 4638 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 1, 1); 4639 4640 /* 4641 * Invalidate the existing alternate volume header. 4642 * 4643 * Don't include this in a transaction (don't call journal_modify_block) 4644 * since this block will be outside of the truncated file system! 4645 */ 4646 if (hfsmp->hfs_alt_id_sector) { 4647 error = buf_meta_bread(hfsmp->hfs_devvp, 4648 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys), 4649 hfsmp->hfs_physical_block_size, NOCRED, &bp); 4650 if (error == 0) { 4651 bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size)), kMDBSize); 4652 (void) VNOP_BWRITE(bp); 4653 } else { 4654 if (bp) { 4655 buf_brelse(bp); 4656 } 4657 } 4658 bp = NULL; 4659 } 4660 4661 /* Log successful shrinking. */ 4662 printf("hfs_truncatefs: shrank \"%s\" to %d blocks (was %d blocks)\n", 4663 hfsmp->vcbVN, newblkcnt, hfsmp->totalBlocks); 4664 4665 /* 4666 * Adjust file system variables and flush them to disk. 4667 */ 4668 hfsmp->totalBlocks = newblkcnt; 4669 hfsmp->hfs_logical_block_count = newsize / hfsmp->hfs_logical_block_size; 4670 hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size; 4671 4672 /* 4673 * Note that although the logical block size is updated here, it is only done for 4674 * the benefit of the partition management software. The logical block count change 4675 * has not yet actually been propagated to the disk device yet. 4676 */ 4677 4678 hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count); 4679 MarkVCBDirty(hfsmp); 4680 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); 4681 if (error) 4682 panic("hfs_truncatefs: unexpected error flushing volume header (%d)\n", error); 4683 4684 /* 4685 * Adjust the size of hfsmp->hfs_attrdata_vp 4686 */ 4687 if (hfsmp->hfs_attrdata_vp) { 4688 struct cnode *cp; 4689 struct filefork *fp; 4690 4691 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) { 4692 cp = VTOC(hfsmp->hfs_attrdata_vp); 4693 fp = VTOF(hfsmp->hfs_attrdata_vp); 4694 4695 cp->c_blocks = newblkcnt; 4696 fp->ff_blocks = newblkcnt; 4697 fp->ff_extents[0].blockCount = newblkcnt; 4698 fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize; 4699 ubc_setsize(hfsmp->hfs_attrdata_vp, fp->ff_size); 4700 vnode_put(hfsmp->hfs_attrdata_vp); 4701 } 4702 } 4703 4704out: 4705 /* 4706 * Update the allocLimit to acknowledge the last one or two blocks now. 4707 * Add it to the tree as well if necessary. 4708 */ 4709 UpdateAllocLimit (hfsmp, hfsmp->totalBlocks); 4710 4711 HFS_MOUNT_LOCK(hfsmp, TRUE); 4712 if (disable_sparse == true) { 4713 /* Now that resize is completed, set the volume to be sparse 4714 * device again so that all further allocations will be first 4715 * fit instead of best fit. Reset free extent cache so that 4716 * it is rebuilt. 4717 */ 4718 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE; 4719 ResetVCBFreeExtCache(hfsmp); 4720 } 4721 4722 if (error && (updateFreeBlocks == true)) { 4723 hfsmp->freeBlocks += reclaimblks; 4724 } 4725 4726 if (hfsmp->nextAllocation >= hfsmp->allocLimit) { 4727 hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1; 4728 } 4729 hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS; 4730 HFS_MOUNT_UNLOCK(hfsmp, TRUE); 4731 4732 /* On error, reset the metadata zone for original volume size */ 4733 if (error && (updateFreeBlocks == true)) { 4734 hfs_metadatazone_init(hfsmp, false); 4735 } 4736 4737 if (lockflags) { 4738 hfs_systemfile_unlock(hfsmp, lockflags); 4739 } 4740 if (transaction_begun) { 4741 hfs_end_transaction(hfsmp); 4742 hfs_journal_flush(hfsmp, FALSE); 4743 /* Just to be sure, sync all data to the disk */ 4744 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); 4745 } 4746 4747 return MacToVFSError(error); 4748} 4749 4750 4751/* 4752 * Invalidate the physical block numbers associated with buffer cache blocks 4753 * in the given extent of the given vnode. 4754 */ 4755struct hfs_inval_blk_no { 4756 daddr64_t sectorStart; 4757 daddr64_t sectorCount; 4758}; 4759static int 4760hfs_invalidate_block_numbers_callback(buf_t bp, void *args_in) 4761{ 4762 daddr64_t blkno; 4763 struct hfs_inval_blk_no *args; 4764 4765 blkno = buf_blkno(bp); 4766 args = args_in; 4767 4768 if (blkno >= args->sectorStart && blkno < args->sectorStart+args->sectorCount) 4769 buf_setblkno(bp, buf_lblkno(bp)); 4770 4771 return BUF_RETURNED; 4772} 4773static void 4774hfs_invalidate_sectors(struct vnode *vp, daddr64_t sectorStart, daddr64_t sectorCount) 4775{ 4776 struct hfs_inval_blk_no args; 4777 args.sectorStart = sectorStart; 4778 args.sectorCount = sectorCount; 4779 4780 buf_iterate(vp, hfs_invalidate_block_numbers_callback, BUF_SCAN_DIRTY|BUF_SCAN_CLEAN, &args); 4781} 4782 4783 4784/* 4785 * Copy the contents of an extent to a new location. Also invalidates the 4786 * physical block number of any buffer cache block in the copied extent 4787 * (so that if the block is written, it will go through VNOP_BLOCKMAP to 4788 * determine the new physical block number). 4789 * 4790 * At this point, for regular files, we hold the truncate lock exclusive 4791 * and the cnode lock exclusive. 4792 */ 4793static int 4794hfs_copy_extent( 4795 struct hfsmount *hfsmp, 4796 struct vnode *vp, /* The file whose extent is being copied. */ 4797 u_int32_t oldStart, /* The start of the source extent. */ 4798 u_int32_t newStart, /* The start of the destination extent. */ 4799 u_int32_t blockCount, /* The number of allocation blocks to copy. */ 4800 vfs_context_t context) 4801{ 4802 int err = 0; 4803 size_t bufferSize; 4804 void *buffer = NULL; 4805 struct vfsioattr ioattr; 4806 buf_t bp = NULL; 4807 off_t resid; 4808 size_t ioSize; 4809 u_int32_t ioSizeSectors; /* Device sectors in this I/O */ 4810 daddr64_t srcSector, destSector; 4811 u_int32_t sectorsPerBlock = hfsmp->blockSize / hfsmp->hfs_logical_block_size; 4812#if CONFIG_PROTECT 4813 int cpenabled = 0; 4814#endif 4815 4816 /* 4817 * Sanity check that we have locked the vnode of the file we're copying. 4818 * 4819 * But since hfs_systemfile_lock() doesn't actually take the lock on 4820 * the allocation file if a journal is active, ignore the check if the 4821 * file being copied is the allocation file. 4822 */ 4823 struct cnode *cp = VTOC(vp); 4824 if (cp != hfsmp->hfs_allocation_cp && cp->c_lockowner != current_thread()) 4825 panic("hfs_copy_extent: vp=%p (cp=%p) not owned?\n", vp, cp); 4826 4827#if CONFIG_PROTECT 4828 /* 4829 * Prepare the CP blob and get it ready for use, if necessary. 4830 * 4831 * Note that we specifically *exclude* system vnodes (catalog, bitmap, extents, EAs), 4832 * because they are implicitly protected via the media key on iOS. As such, they 4833 * must not be relocated except with the media key. So it is OK to not pass down 4834 * a special cpentry to the IOMedia/LwVM code for handling. 4835 */ 4836 if (!vnode_issystem (vp) && vnode_isreg(vp) && cp_fs_protected (hfsmp->hfs_mp)) { 4837 int cp_err = 0; 4838 /* 4839 * Ideally, the file whose extents we are about to manipulate is using the 4840 * newer offset-based IVs so that we can manipulate it regardless of the 4841 * current lock state. However, we must maintain support for older-style 4842 * EAs. 4843 * 4844 * For the older EA case, the IV was tied to the device LBA for file content. 4845 * This means that encrypted data cannot be moved from one location to another 4846 * in the filesystem without garbling the IV data. As a result, we need to 4847 * access the file's plaintext because we cannot do our AES-symmetry trick 4848 * here. This requires that we attempt a key-unwrap here (via cp_handle_relocate) 4849 * to make forward progress. If the keys are unavailable then we will 4850 * simply stop the resize in its tracks here since we cannot move 4851 * this extent at this time. 4852 */ 4853 if ((cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) == 0) { 4854 cp_err = cp_handle_relocate(cp, hfsmp); 4855 } 4856 4857 if (cp_err) { 4858 printf ("hfs_copy_extent: cp_handle_relocate failed (%d) \n", cp_err); 4859 return cp_err; 4860 } 4861 4862 cpenabled = 1; 4863 } 4864#endif 4865 4866 4867 /* 4868 * Determine the I/O size to use 4869 * 4870 * NOTE: Many external drives will result in an ioSize of 128KB. 4871 * TODO: Should we use a larger buffer, doing several consecutive 4872 * reads, then several consecutive writes? 4873 */ 4874 vfs_ioattr(hfsmp->hfs_mp, &ioattr); 4875 bufferSize = MIN(ioattr.io_maxreadcnt, ioattr.io_maxwritecnt); 4876 if (kmem_alloc(kernel_map, (vm_offset_t*) &buffer, bufferSize)) 4877 return ENOMEM; 4878 4879 /* Get a buffer for doing the I/O */ 4880 bp = buf_alloc(hfsmp->hfs_devvp); 4881 buf_setdataptr(bp, (uintptr_t)buffer); 4882 4883 resid = (off_t) blockCount * (off_t) hfsmp->blockSize; 4884 srcSector = (daddr64_t) oldStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size; 4885 destSector = (daddr64_t) newStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size; 4886 while (resid > 0) { 4887 ioSize = MIN(bufferSize, (size_t) resid); 4888 ioSizeSectors = ioSize / hfsmp->hfs_logical_block_size; 4889 4890 /* Prepare the buffer for reading */ 4891 buf_reset(bp, B_READ); 4892 buf_setsize(bp, ioSize); 4893 buf_setcount(bp, ioSize); 4894 buf_setblkno(bp, srcSector); 4895 buf_setlblkno(bp, srcSector); 4896 4897 /* 4898 * Note that because this is an I/O to the device vp 4899 * it is correct to have lblkno and blkno both point to the 4900 * start sector being read from. If it were being issued against the 4901 * underlying file then that would be different. 4902 */ 4903 4904 /* Attach the new CP blob to the buffer if needed */ 4905#if CONFIG_PROTECT 4906 if (cpenabled) { 4907 if (cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) { 4908 /* attach the RELOCATION_INFLIGHT flag for the underlying call to VNOP_STRATEGY */ 4909 cp->c_cpentry->cp_flags |= CP_RELOCATION_INFLIGHT; 4910 buf_setcpaddr(bp, hfsmp->hfs_resize_cpentry); 4911 } 4912 else { 4913 /* 4914 * Use the cnode's cp key. This file is tied to the 4915 * LBAs of the physical blocks that it occupies. 4916 */ 4917 buf_setcpaddr (bp, cp->c_cpentry); 4918 } 4919 4920 /* Initialize the content protection file offset to start at 0 */ 4921 buf_setcpoff (bp, 0); 4922 } 4923#endif 4924 4925 /* Do the read */ 4926 err = VNOP_STRATEGY(bp); 4927 if (!err) 4928 err = buf_biowait(bp); 4929 if (err) { 4930#if CONFIG_PROTECT 4931 /* Turn the flag off in error cases. */ 4932 if (cpenabled) { 4933 cp->c_cpentry->cp_flags &= ~CP_RELOCATION_INFLIGHT; 4934 } 4935#endif 4936 printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (read)\n", err); 4937 break; 4938 } 4939 4940 /* Prepare the buffer for writing */ 4941 buf_reset(bp, B_WRITE); 4942 buf_setsize(bp, ioSize); 4943 buf_setcount(bp, ioSize); 4944 buf_setblkno(bp, destSector); 4945 buf_setlblkno(bp, destSector); 4946 if (vnode_issystem(vp) && journal_uses_fua(hfsmp->jnl)) 4947 buf_markfua(bp); 4948 4949#if CONFIG_PROTECT 4950 /* Attach the CP to the buffer if needed */ 4951 if (cpenabled) { 4952 if (cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) { 4953 buf_setcpaddr(bp, hfsmp->hfs_resize_cpentry); 4954 } 4955 else { 4956 /* 4957 * Use the cnode's CP key. This file is still tied 4958 * to the LBAs of the physical blocks that it occupies. 4959 */ 4960 buf_setcpaddr (bp, cp->c_cpentry); 4961 } 4962 /* 4963 * The last STRATEGY call may have updated the cp file offset behind our 4964 * back, so we cannot trust it. Re-initialize the content protection 4965 * file offset back to 0 before initiating the write portion of this I/O. 4966 */ 4967 buf_setcpoff (bp, 0); 4968 } 4969#endif 4970 4971 /* Do the write */ 4972 vnode_startwrite(hfsmp->hfs_devvp); 4973 err = VNOP_STRATEGY(bp); 4974 if (!err) { 4975 err = buf_biowait(bp); 4976 } 4977#if CONFIG_PROTECT 4978 /* Turn the flag off regardless once the strategy call finishes. */ 4979 if (cpenabled) { 4980 cp->c_cpentry->cp_flags &= ~CP_RELOCATION_INFLIGHT; 4981 } 4982#endif 4983 if (err) { 4984 printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (write)\n", err); 4985 break; 4986 } 4987 4988 resid -= ioSize; 4989 srcSector += ioSizeSectors; 4990 destSector += ioSizeSectors; 4991 } 4992 if (bp) 4993 buf_free(bp); 4994 if (buffer) 4995 kmem_free(kernel_map, (vm_offset_t)buffer, bufferSize); 4996 4997 /* Make sure all writes have been flushed to disk. */ 4998 if (vnode_issystem(vp) && !journal_uses_fua(hfsmp->jnl)) { 4999 err = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); 5000 if (err) { 5001 printf("hfs_copy_extent: DKIOCSYNCHRONIZECACHE failed (%d)\n", err); 5002 err = 0; /* Don't fail the copy. */ 5003 } 5004 } 5005 5006 if (!err) 5007 hfs_invalidate_sectors(vp, (daddr64_t)oldStart*sectorsPerBlock, (daddr64_t)blockCount*sectorsPerBlock); 5008 5009 return err; 5010} 5011 5012 5013/* Structure to store state of reclaiming extents from a 5014 * given file. hfs_reclaim_file()/hfs_reclaim_xattr() 5015 * initializes the values in this structure which are then 5016 * used by code that reclaims and splits the extents. 5017 */ 5018struct hfs_reclaim_extent_info { 5019 struct vnode *vp; 5020 u_int32_t fileID; 5021 u_int8_t forkType; 5022 u_int8_t is_dirlink; /* Extent belongs to directory hard link */ 5023 u_int8_t is_sysfile; /* Extent belongs to system file */ 5024 u_int8_t is_xattr; /* Extent belongs to extent-based xattr */ 5025 u_int8_t extent_index; 5026 int lockflags; /* Locks that reclaim and split code should grab before modifying the extent record */ 5027 u_int32_t blocks_relocated; /* Total blocks relocated for this file till now */ 5028 u_int32_t recStartBlock; /* File allocation block number (FABN) for current extent record */ 5029 u_int32_t cur_blockCount; /* Number of allocation blocks that have been checked for reclaim */ 5030 struct filefork *catalog_fp; /* If non-NULL, extent is from catalog record */ 5031 union record { 5032 HFSPlusExtentRecord overflow;/* Extent record from overflow extents btree */ 5033 HFSPlusAttrRecord xattr; /* Attribute record for large EAs */ 5034 } record; 5035 HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being processed. 5036 * For catalog extent record, points to the correct 5037 * extent information in filefork. For overflow extent 5038 * record, or xattr record, points to extent record 5039 * in the structure above 5040 */ 5041 struct cat_desc *dirlink_desc; 5042 struct cat_attr *dirlink_attr; 5043 struct filefork *dirlink_fork; /* For directory hard links, fp points actually to this */ 5044 struct BTreeIterator *iterator; /* Shared read/write iterator, hfs_reclaim_file/xattr() 5045 * use it for reading and hfs_reclaim_extent()/hfs_split_extent() 5046 * use it for writing updated extent record 5047 */ 5048 struct FSBufferDescriptor btdata; /* Shared btdata for reading/writing extent record, same as iterator above */ 5049 u_int16_t recordlen; 5050 int overflow_count; /* For debugging, counter for overflow extent record */ 5051 FCB *fcb; /* Pointer to the current btree being traversed */ 5052}; 5053 5054/* 5055 * Split the current extent into two extents, with first extent 5056 * to contain given number of allocation blocks. Splitting of 5057 * extent creates one new extent entry which can result in 5058 * shifting of many entries through all the extent records of a 5059 * file, and/or creating a new extent record in the overflow 5060 * extent btree. 5061 * 5062 * Example: 5063 * The diagram below represents two consecutive extent records, 5064 * for simplicity, lets call them record X and X+1 respectively. 5065 * Interesting extent entries have been denoted by letters. 5066 * If the letter is unchanged before and after split, it means 5067 * that the extent entry was not modified during the split. 5068 * A '.' means that the entry remains unchanged after the split 5069 * and is not relevant for our example. A '0' means that the 5070 * extent entry is empty. 5071 * 5072 * If there isn't sufficient contiguous free space to relocate 5073 * an extent (extent "C" below), we will have to break the one 5074 * extent into multiple smaller extents, and relocate each of 5075 * the smaller extents individually. The way we do this is by 5076 * finding the largest contiguous free space that is currently 5077 * available (N allocation blocks), and then convert extent "C" 5078 * into two extents, C1 and C2, that occupy exactly the same 5079 * allocation blocks as extent C. Extent C1 is the first 5080 * N allocation blocks of extent C, and extent C2 is the remainder 5081 * of extent C. Then we can relocate extent C1 since we know 5082 * we have enough contiguous free space to relocate it in its 5083 * entirety. We then repeat the process starting with extent C2. 5084 * 5085 * In record X, only the entries following entry C are shifted, and 5086 * the original entry C is replaced with two entries C1 and C2 which 5087 * are actually two extent entries for contiguous allocation blocks. 5088 * 5089 * Note that the entry E from record X is shifted into record X+1 as 5090 * the new first entry. Since the first entry of record X+1 is updated, 5091 * the FABN will also get updated with the blockCount of entry E. 5092 * This also results in shifting of all extent entries in record X+1. 5093 * Note that the number of empty entries after the split has been 5094 * changed from 3 to 2. 5095 * 5096 * Before: 5097 * record X record X+1 5098 * ---------------------===--------- --------------------------------- 5099 * | A | . | . | . | B | C | D | E | | F | . | . | . | G | 0 | 0 | 0 | 5100 * ---------------------===--------- --------------------------------- 5101 * 5102 * After: 5103 * ---------------------=======----- --------------------------------- 5104 * | A | . | . | . | B | C1| C2| D | | E | F | . | . | . | G | 0 | 0 | 5105 * ---------------------=======----- --------------------------------- 5106 * 5107 * C1.startBlock = C.startBlock 5108 * C1.blockCount = N 5109 * 5110 * C2.startBlock = C.startBlock + N 5111 * C2.blockCount = C.blockCount - N 5112 * 5113 * FABN = old FABN - E.blockCount 5114 * 5115 * Inputs: 5116 * extent_info - This is the structure that contains state about 5117 * the current file, extent, and extent record that 5118 * is being relocated. This structure is shared 5119 * among code that traverses through all the extents 5120 * of the file, code that relocates extents, and 5121 * code that splits the extent. 5122 * newBlockCount - The blockCount of the extent to be split after 5123 * successfully split operation. 5124 * Output: 5125 * Zero on success, non-zero on failure. 5126 */ 5127static int 5128hfs_split_extent(struct hfs_reclaim_extent_info *extent_info, uint32_t newBlockCount) 5129{ 5130 int error = 0; 5131 int index = extent_info->extent_index; 5132 int i; 5133 HFSPlusExtentDescriptor shift_extent; /* Extent entry that should be shifted into next extent record */ 5134 HFSPlusExtentDescriptor last_extent; 5135 HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being manipulated */ 5136 HFSPlusExtentRecord *extents_rec = NULL; 5137 HFSPlusExtentKey *extents_key = NULL; 5138 HFSPlusAttrRecord *xattr_rec = NULL; 5139 HFSPlusAttrKey *xattr_key = NULL; 5140 struct BTreeIterator iterator; 5141 struct FSBufferDescriptor btdata; 5142 uint16_t reclen; 5143 uint32_t read_recStartBlock; /* Starting allocation block number to read old extent record */ 5144 uint32_t write_recStartBlock; /* Starting allocation block number to insert newly updated extent record */ 5145 Boolean create_record = false; 5146 Boolean is_xattr; 5147 struct cnode *cp; 5148 5149 is_xattr = extent_info->is_xattr; 5150 extents = extent_info->extents; 5151 cp = VTOC(extent_info->vp); 5152 5153 if (newBlockCount == 0) { 5154 if (hfs_resize_debug) { 5155 printf ("hfs_split_extent: No splitting required for newBlockCount=0\n"); 5156 } 5157 return error; 5158 } 5159 5160 if (hfs_resize_debug) { 5161 printf ("hfs_split_extent: Split record:%u recStartBlock=%u %u:(%u,%u) for %u blocks\n", extent_info->overflow_count, extent_info->recStartBlock, index, extents[index].startBlock, extents[index].blockCount, newBlockCount); 5162 } 5163 5164 /* Extents overflow btree can not have more than 8 extents. 5165 * No split allowed if the 8th extent is already used. 5166 */ 5167 if ((extent_info->fileID == kHFSExtentsFileID) && (extents[kHFSPlusExtentDensity - 1].blockCount != 0)) { 5168 printf ("hfs_split_extent: Maximum 8 extents allowed for extents overflow btree, cannot split further.\n"); 5169 error = ENOSPC; 5170 goto out; 5171 } 5172 5173 /* Determine the starting allocation block number for the following 5174 * overflow extent record, if any, before the current record 5175 * gets modified. 5176 */ 5177 read_recStartBlock = extent_info->recStartBlock; 5178 for (i = 0; i < kHFSPlusExtentDensity; i++) { 5179 if (extents[i].blockCount == 0) { 5180 break; 5181 } 5182 read_recStartBlock += extents[i].blockCount; 5183 } 5184 5185 /* Shift and split */ 5186 if (index == kHFSPlusExtentDensity-1) { 5187 /* The new extent created after split will go into following overflow extent record */ 5188 shift_extent.startBlock = extents[index].startBlock + newBlockCount; 5189 shift_extent.blockCount = extents[index].blockCount - newBlockCount; 5190 5191 /* Last extent in the record will be split, so nothing to shift */ 5192 } else { 5193 /* Splitting of extents can result in at most of one 5194 * extent entry to be shifted into following overflow extent 5195 * record. So, store the last extent entry for later. 5196 */ 5197 shift_extent = extents[kHFSPlusExtentDensity-1]; 5198 if ((hfs_resize_debug) && (shift_extent.blockCount != 0)) { 5199 printf ("hfs_split_extent: Save 7:(%u,%u) to shift into overflow record\n", shift_extent.startBlock, shift_extent.blockCount); 5200 } 5201 5202 /* Start shifting extent information from the end of the extent 5203 * record to the index where we want to insert the new extent. 5204 * Note that kHFSPlusExtentDensity-1 is already saved above, and 5205 * does not need to be shifted. The extent entry that is being 5206 * split does not get shifted. 5207 */ 5208 for (i = kHFSPlusExtentDensity-2; i > index; i--) { 5209 if (hfs_resize_debug) { 5210 if (extents[i].blockCount) { 5211 printf ("hfs_split_extent: Shift %u:(%u,%u) to %u:(%u,%u)\n", i, extents[i].startBlock, extents[i].blockCount, i+1, extents[i].startBlock, extents[i].blockCount); 5212 } 5213 } 5214 extents[i+1] = extents[i]; 5215 } 5216 } 5217 5218 if (index == kHFSPlusExtentDensity-1) { 5219 /* The second half of the extent being split will be the overflow 5220 * entry that will go into following overflow extent record. The 5221 * value has been stored in 'shift_extent' above, so there is 5222 * nothing to be done here. 5223 */ 5224 } else { 5225 /* Update the values in the second half of the extent being split 5226 * before updating the first half of the split. Note that the 5227 * extent to split or first half of the split is at index 'index' 5228 * and a new extent or second half of the split will be inserted at 5229 * 'index+1' or into following overflow extent record. 5230 */ 5231 extents[index+1].startBlock = extents[index].startBlock + newBlockCount; 5232 extents[index+1].blockCount = extents[index].blockCount - newBlockCount; 5233 } 5234 /* Update the extent being split, only the block count will change */ 5235 extents[index].blockCount = newBlockCount; 5236 5237 if (hfs_resize_debug) { 5238 printf ("hfs_split_extent: Split %u:(%u,%u) and ", index, extents[index].startBlock, extents[index].blockCount); 5239 if (index != kHFSPlusExtentDensity-1) { 5240 printf ("%u:(%u,%u)\n", index+1, extents[index+1].startBlock, extents[index+1].blockCount); 5241 } else { 5242 printf ("overflow:(%u,%u)\n", shift_extent.startBlock, shift_extent.blockCount); 5243 } 5244 } 5245 5246 /* Write out information about the newly split extent to the disk */ 5247 if (extent_info->catalog_fp) { 5248 /* (extent_info->catalog_fp != NULL) means the newly split 5249 * extent exists in the catalog record. This means that 5250 * the cnode was updated. Therefore, to write out the changes, 5251 * mark the cnode as modified. We cannot call hfs_update() 5252 * in this function because the caller hfs_reclaim_extent() 5253 * is holding the catalog lock currently. 5254 */ 5255 cp->c_flag |= C_MODIFIED; 5256 } else { 5257 /* The newly split extent is for large EAs or is in overflow 5258 * extent record, so update it directly in the btree using the 5259 * iterator information from the shared extent_info structure 5260 */ 5261 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator, 5262 &(extent_info->btdata), extent_info->recordlen); 5263 if (error) { 5264 printf ("hfs_split_extent: fileID=%u BTReplaceRecord returned error=%d\n", extent_info->fileID, error); 5265 goto out; 5266 } 5267 } 5268 5269 /* No extent entry to be shifted into another extent overflow record */ 5270 if (shift_extent.blockCount == 0) { 5271 if (hfs_resize_debug) { 5272 printf ("hfs_split_extent: No extent entry to be shifted into overflow records\n"); 5273 } 5274 error = 0; 5275 goto out; 5276 } 5277 5278 /* The overflow extent entry has to be shifted into an extent 5279 * overflow record. This means that we might have to shift 5280 * extent entries from all subsequent overflow records by one. 5281 * We start iteration from the first record to the last record, 5282 * and shift the extent entry from one record to another. 5283 * We might have to create a new extent record for the last 5284 * extent entry for the file. 5285 */ 5286 5287 /* Initialize iterator to search the next record */ 5288 bzero(&iterator, sizeof(iterator)); 5289 if (is_xattr) { 5290 /* Copy the key from the iterator that was used to update the modified attribute record. */ 5291 xattr_key = (HFSPlusAttrKey *)&(iterator.key); 5292 bcopy((HFSPlusAttrKey *)&(extent_info->iterator->key), xattr_key, sizeof(HFSPlusAttrKey)); 5293 /* Note: xattr_key->startBlock will be initialized later in the iteration loop */ 5294 5295 MALLOC(xattr_rec, HFSPlusAttrRecord *, 5296 sizeof(HFSPlusAttrRecord), M_TEMP, M_WAITOK); 5297 if (xattr_rec == NULL) { 5298 error = ENOMEM; 5299 goto out; 5300 } 5301 btdata.bufferAddress = xattr_rec; 5302 btdata.itemSize = sizeof(HFSPlusAttrRecord); 5303 btdata.itemCount = 1; 5304 extents = xattr_rec->overflowExtents.extents; 5305 } else { 5306 /* Initialize the extent key for the current file */ 5307 extents_key = (HFSPlusExtentKey *) &(iterator.key); 5308 extents_key->keyLength = kHFSPlusExtentKeyMaximumLength; 5309 extents_key->forkType = extent_info->forkType; 5310 extents_key->fileID = extent_info->fileID; 5311 /* Note: extents_key->startBlock will be initialized later in the iteration loop */ 5312 5313 MALLOC(extents_rec, HFSPlusExtentRecord *, 5314 sizeof(HFSPlusExtentRecord), M_TEMP, M_WAITOK); 5315 if (extents_rec == NULL) { 5316 error = ENOMEM; 5317 goto out; 5318 } 5319 btdata.bufferAddress = extents_rec; 5320 btdata.itemSize = sizeof(HFSPlusExtentRecord); 5321 btdata.itemCount = 1; 5322 extents = extents_rec[0]; 5323 } 5324 5325 /* The overflow extent entry has to be shifted into an extent 5326 * overflow record. This means that we might have to shift 5327 * extent entries from all subsequent overflow records by one. 5328 * We start iteration from the first record to the last record, 5329 * examine one extent record in each iteration and shift one 5330 * extent entry from one record to another. We might have to 5331 * create a new extent record for the last extent entry for the 5332 * file. 5333 * 5334 * If shift_extent.blockCount is non-zero, it means that there is 5335 * an extent entry that needs to be shifted into the next 5336 * overflow extent record. We keep on going till there are no such 5337 * entries left to be shifted. This will also change the starting 5338 * allocation block number of the extent record which is part of 5339 * the key for the extent record in each iteration. Note that 5340 * because the extent record key is changing while we are searching, 5341 * the record can not be updated directly, instead it has to be 5342 * deleted and inserted again. 5343 */ 5344 while (shift_extent.blockCount) { 5345 if (hfs_resize_debug) { 5346 printf ("hfs_split_extent: Will shift (%u,%u) into overflow record with startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, read_recStartBlock); 5347 } 5348 5349 /* Search if there is any existing overflow extent record 5350 * that matches the current file and the logical start block 5351 * number. 5352 * 5353 * For this, the logical start block number in the key is 5354 * the value calculated based on the logical start block 5355 * number of the current extent record and the total number 5356 * of blocks existing in the current extent record. 5357 */ 5358 if (is_xattr) { 5359 xattr_key->startBlock = read_recStartBlock; 5360 } else { 5361 extents_key->startBlock = read_recStartBlock; 5362 } 5363 error = BTSearchRecord(extent_info->fcb, &iterator, &btdata, &reclen, &iterator); 5364 if (error) { 5365 if (error != btNotFound) { 5366 printf ("hfs_split_extent: fileID=%u startBlock=%u BTSearchRecord error=%d\n", extent_info->fileID, read_recStartBlock, error); 5367 goto out; 5368 } 5369 /* No matching record was found, so create a new extent record. 5370 * Note: Since no record was found, we can't rely on the 5371 * btree key in the iterator any longer. This will be initialized 5372 * later before we insert the record. 5373 */ 5374 create_record = true; 5375 } 5376 5377 /* The extra extent entry from the previous record is being inserted 5378 * as the first entry in the current extent record. This will change 5379 * the file allocation block number (FABN) of the current extent 5380 * record, which is the startBlock value from the extent record key. 5381 * Since one extra entry is being inserted in the record, the new 5382 * FABN for the record will less than old FABN by the number of blocks 5383 * in the new extent entry being inserted at the start. We have to 5384 * do this before we update read_recStartBlock to point at the 5385 * startBlock of the following record. 5386 */ 5387 write_recStartBlock = read_recStartBlock - shift_extent.blockCount; 5388 if (hfs_resize_debug) { 5389 if (create_record) { 5390 printf ("hfs_split_extent: No records found for startBlock=%u, will create new with startBlock=%u\n", read_recStartBlock, write_recStartBlock); 5391 } 5392 } 5393 5394 /* Now update the read_recStartBlock to account for total number 5395 * of blocks in this extent record. It will now point to the 5396 * starting allocation block number for the next extent record. 5397 */ 5398 for (i = 0; i < kHFSPlusExtentDensity; i++) { 5399 if (extents[i].blockCount == 0) { 5400 break; 5401 } 5402 read_recStartBlock += extents[i].blockCount; 5403 } 5404 5405 if (create_record == true) { 5406 /* Initialize new record content with only one extent entry */ 5407 bzero(extents, sizeof(HFSPlusExtentRecord)); 5408 /* The new record will contain only one extent entry */ 5409 extents[0] = shift_extent; 5410 /* There are no more overflow extents to be shifted */ 5411 shift_extent.startBlock = shift_extent.blockCount = 0; 5412 5413 if (is_xattr) { 5414 /* BTSearchRecord above returned btNotFound, 5415 * but since the attribute btree is never empty 5416 * if we are trying to insert new overflow 5417 * record for the xattrs, the extents_key will 5418 * contain correct data. So we don't need to 5419 * re-initialize it again like below. 5420 */ 5421 5422 /* Initialize the new xattr record */ 5423 xattr_rec->recordType = kHFSPlusAttrExtents; 5424 xattr_rec->overflowExtents.reserved = 0; 5425 reclen = sizeof(HFSPlusAttrExtents); 5426 } else { 5427 /* BTSearchRecord above returned btNotFound, 5428 * which means that extents_key content might 5429 * not correspond to the record that we are 5430 * trying to create, especially when the extents 5431 * overflow btree is empty. So we reinitialize 5432 * the extents_key again always. 5433 */ 5434 extents_key->keyLength = kHFSPlusExtentKeyMaximumLength; 5435 extents_key->forkType = extent_info->forkType; 5436 extents_key->fileID = extent_info->fileID; 5437 5438 /* Initialize the new extent record */ 5439 reclen = sizeof(HFSPlusExtentRecord); 5440 } 5441 } else { 5442 /* The overflow extent entry from previous record will be 5443 * the first entry in this extent record. If the last 5444 * extent entry in this record is valid, it will be shifted 5445 * into the following extent record as its first entry. So 5446 * save the last entry before shifting entries in current 5447 * record. 5448 */ 5449 last_extent = extents[kHFSPlusExtentDensity-1]; 5450 5451 /* Shift all entries by one index towards the end */ 5452 for (i = kHFSPlusExtentDensity-2; i >= 0; i--) { 5453 extents[i+1] = extents[i]; 5454 } 5455 5456 /* Overflow extent entry saved from previous record 5457 * is now the first entry in the current record. 5458 */ 5459 extents[0] = shift_extent; 5460 5461 if (hfs_resize_debug) { 5462 printf ("hfs_split_extent: Shift overflow=(%u,%u) to record with updated startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, write_recStartBlock); 5463 } 5464 5465 /* The last entry from current record will be the 5466 * overflow entry which will be the first entry for 5467 * the following extent record. 5468 */ 5469 shift_extent = last_extent; 5470 5471 /* Since the key->startBlock is being changed for this record, 5472 * it should be deleted and inserted with the new key. 5473 */ 5474 error = BTDeleteRecord(extent_info->fcb, &iterator); 5475 if (error) { 5476 printf ("hfs_split_extent: fileID=%u startBlock=%u BTDeleteRecord error=%d\n", extent_info->fileID, read_recStartBlock, error); 5477 goto out; 5478 } 5479 if (hfs_resize_debug) { 5480 printf ("hfs_split_extent: Deleted extent record with startBlock=%u\n", (is_xattr ? xattr_key->startBlock : extents_key->startBlock)); 5481 } 5482 } 5483 5484 /* Insert the newly created or modified extent record */ 5485 bzero(&iterator.hint, sizeof(iterator.hint)); 5486 if (is_xattr) { 5487 xattr_key->startBlock = write_recStartBlock; 5488 } else { 5489 extents_key->startBlock = write_recStartBlock; 5490 } 5491 error = BTInsertRecord(extent_info->fcb, &iterator, &btdata, reclen); 5492 if (error) { 5493 printf ("hfs_split_extent: fileID=%u, startBlock=%u BTInsertRecord error=%d\n", extent_info->fileID, write_recStartBlock, error); 5494 goto out; 5495 } 5496 if (hfs_resize_debug) { 5497 printf ("hfs_split_extent: Inserted extent record with startBlock=%u\n", write_recStartBlock); 5498 } 5499 } 5500 5501out: 5502 /* 5503 * Extents overflow btree or attributes btree headers might have 5504 * been modified during the split/shift operation, so flush the 5505 * changes to the disk while we are inside journal transaction. 5506 * We should only be able to generate I/O that modifies the B-Tree 5507 * header nodes while we're in the middle of a journal transaction. 5508 * Otherwise it might result in panic during unmount. 5509 */ 5510 BTFlushPath(extent_info->fcb); 5511 5512 if (extents_rec) { 5513 FREE (extents_rec, M_TEMP); 5514 } 5515 if (xattr_rec) { 5516 FREE (xattr_rec, M_TEMP); 5517 } 5518 return error; 5519} 5520 5521 5522/* 5523 * Relocate an extent if it lies beyond the expected end of volume. 5524 * 5525 * This function is called for every extent of the file being relocated. 5526 * It allocates space for relocation, copies the data, deallocates 5527 * the old extent, and update corresponding on-disk extent. If the function 5528 * does not find contiguous space to relocate an extent, it splits the 5529 * extent in smaller size to be able to relocate it out of the area of 5530 * disk being reclaimed. As an optimization, if an extent lies partially 5531 * in the area of the disk being reclaimed, it is split so that we only 5532 * have to relocate the area that was overlapping with the area of disk 5533 * being reclaimed. 5534 * 5535 * Note that every extent is relocated in its own transaction so that 5536 * they do not overwhelm the journal. This function handles the extent 5537 * record that exists in the catalog record, extent record from overflow 5538 * extents btree, and extents for large EAs. 5539 * 5540 * Inputs: 5541 * extent_info - This is the structure that contains state about 5542 * the current file, extent, and extent record that 5543 * is being relocated. This structure is shared 5544 * among code that traverses through all the extents 5545 * of the file, code that relocates extents, and 5546 * code that splits the extent. 5547 */ 5548static int 5549hfs_reclaim_extent(struct hfsmount *hfsmp, const u_long allocLimit, struct hfs_reclaim_extent_info *extent_info, vfs_context_t context) 5550{ 5551 int error = 0; 5552 int index; 5553 struct cnode *cp; 5554 u_int32_t oldStartBlock; 5555 u_int32_t oldBlockCount; 5556 u_int32_t newStartBlock; 5557 u_int32_t newBlockCount; 5558 u_int32_t roundedBlockCount; 5559 uint16_t node_size; 5560 uint32_t remainder_blocks; 5561 u_int32_t alloc_flags; 5562 int blocks_allocated = false; 5563 5564 index = extent_info->extent_index; 5565 cp = VTOC(extent_info->vp); 5566 5567 oldStartBlock = extent_info->extents[index].startBlock; 5568 oldBlockCount = extent_info->extents[index].blockCount; 5569 5570 if (0 && hfs_resize_debug) { 5571 printf ("hfs_reclaim_extent: Examine record:%u recStartBlock=%u, %u:(%u,%u)\n", extent_info->overflow_count, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount); 5572 } 5573 5574 /* If the current extent lies completely within allocLimit, 5575 * it does not require any relocation. 5576 */ 5577 if ((oldStartBlock + oldBlockCount) <= allocLimit) { 5578 extent_info->cur_blockCount += oldBlockCount; 5579 return error; 5580 } 5581 5582 /* Every extent should be relocated in its own transaction 5583 * to make sure that we don't overflow the journal buffer. 5584 */ 5585 error = hfs_start_transaction(hfsmp); 5586 if (error) { 5587 return error; 5588 } 5589 extent_info->lockflags = hfs_systemfile_lock(hfsmp, extent_info->lockflags, HFS_EXCLUSIVE_LOCK); 5590 5591 /* Check if the extent lies partially in the area to reclaim, 5592 * i.e. it starts before allocLimit and ends beyond allocLimit. 5593 * We have already skipped extents that lie completely within 5594 * allocLimit in the check above, so we only check for the 5595 * startBlock. If it lies partially, split it so that we 5596 * only relocate part of the extent. 5597 */ 5598 if (oldStartBlock < allocLimit) { 5599 newBlockCount = allocLimit - oldStartBlock; 5600 5601 if (hfs_resize_debug) { 5602 int idx = extent_info->extent_index; 5603 printf ("hfs_reclaim_extent: Split straddling extent %u:(%u,%u) for %u blocks\n", idx, extent_info->extents[idx].startBlock, extent_info->extents[idx].blockCount, newBlockCount); 5604 } 5605 5606 /* If the extent belongs to a btree, check and trim 5607 * it to be multiple of the node size. 5608 */ 5609 if (extent_info->is_sysfile) { 5610 node_size = get_btree_nodesize(extent_info->vp); 5611 /* If the btree node size is less than the block size, 5612 * splitting this extent will not split a node across 5613 * different extents. So we only check and trim if 5614 * node size is more than the allocation block size. 5615 */ 5616 if (node_size > hfsmp->blockSize) { 5617 remainder_blocks = newBlockCount % (node_size / hfsmp->blockSize); 5618 if (remainder_blocks) { 5619 newBlockCount -= remainder_blocks; 5620 if (hfs_resize_debug) { 5621 printf ("hfs_reclaim_extent: Round-down newBlockCount to be multiple of nodeSize, node_allocblks=%u, old=%u, new=%u\n", node_size/hfsmp->blockSize, newBlockCount + remainder_blocks, newBlockCount); 5622 } 5623 } 5624 } 5625 /* The newBlockCount is zero because of rounding-down so that 5626 * btree nodes are not split across extents. Therefore this 5627 * straddling extent across resize-boundary does not require 5628 * splitting. Skip over to relocating of complete extent. 5629 */ 5630 if (newBlockCount == 0) { 5631 if (hfs_resize_debug) { 5632 printf ("hfs_reclaim_extent: After round-down newBlockCount=0, skip split, relocate full extent\n"); 5633 } 5634 goto relocate_full_extent; 5635 } 5636 } 5637 5638 /* Split the extents into two parts --- the first extent lies 5639 * completely within allocLimit and therefore does not require 5640 * relocation. The second extent will require relocation which 5641 * will be handled when the caller calls this function again 5642 * for the next extent. 5643 */ 5644 error = hfs_split_extent(extent_info, newBlockCount); 5645 if (error == 0) { 5646 /* Split success, no relocation required */ 5647 goto out; 5648 } 5649 /* Split failed, so try to relocate entire extent */ 5650 if (hfs_resize_debug) { 5651 int idx = extent_info->extent_index; 5652 printf ("hfs_reclaim_extent: Split straddling extent %u:(%u,%u) for %u blocks failed, relocate full extent\n", idx, extent_info->extents[idx].startBlock, extent_info->extents[idx].blockCount, newBlockCount); 5653 } 5654 } 5655 5656relocate_full_extent: 5657 /* At this point, the current extent requires relocation. 5658 * We will try to allocate space equal to the size of the extent 5659 * being relocated first to try to relocate it without splitting. 5660 * If the allocation fails, we will try to allocate contiguous 5661 * blocks out of metadata zone. If that allocation also fails, 5662 * then we will take a whatever contiguous block run is returned 5663 * by the allocation, split the extent into two parts, and then 5664 * relocate the first splitted extent. 5665 */ 5666 alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS; 5667 if (extent_info->is_sysfile) { 5668 alloc_flags |= HFS_ALLOC_METAZONE; 5669 } 5670 5671 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags, 5672 &newStartBlock, &newBlockCount); 5673 if ((extent_info->is_sysfile == false) && 5674 ((error == dskFulErr) || (error == ENOSPC))) { 5675 /* For non-system files, try reallocating space in metadata zone */ 5676 alloc_flags |= HFS_ALLOC_METAZONE; 5677 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, 5678 alloc_flags, &newStartBlock, &newBlockCount); 5679 } 5680 if ((error == dskFulErr) || (error == ENOSPC)) { 5681 /* We did not find desired contiguous space for this extent. 5682 * So try to allocate the maximum contiguous space available. 5683 */ 5684 alloc_flags &= ~HFS_ALLOC_FORCECONTIG; 5685 5686 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, 5687 alloc_flags, &newStartBlock, &newBlockCount); 5688 if (error) { 5689 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error); 5690 goto out; 5691 } 5692 blocks_allocated = true; 5693 5694 /* The number of blocks allocated is less than the requested 5695 * number of blocks. For btree extents, check and trim the 5696 * extent to be multiple of the node size. 5697 */ 5698 if (extent_info->is_sysfile) { 5699 node_size = get_btree_nodesize(extent_info->vp); 5700 if (node_size > hfsmp->blockSize) { 5701 remainder_blocks = newBlockCount % (node_size / hfsmp->blockSize); 5702 if (remainder_blocks) { 5703 roundedBlockCount = newBlockCount - remainder_blocks; 5704 /* Free tail-end blocks of the newly allocated extent */ 5705 BlockDeallocate(hfsmp, newStartBlock + roundedBlockCount, 5706 newBlockCount - roundedBlockCount, 5707 HFS_ALLOC_SKIPFREEBLKS); 5708 newBlockCount = roundedBlockCount; 5709 if (hfs_resize_debug) { 5710 printf ("hfs_reclaim_extent: Fixing extent block count, node_blks=%u, old=%u, new=%u\n", node_size/hfsmp->blockSize, newBlockCount + remainder_blocks, newBlockCount); 5711 } 5712 if (newBlockCount == 0) { 5713 printf ("hfs_reclaim_extent: Not enough contiguous blocks available to relocate fileID=%d\n", extent_info->fileID); 5714 error = ENOSPC; 5715 goto out; 5716 } 5717 } 5718 } 5719 } 5720 5721 /* The number of blocks allocated is less than the number of 5722 * blocks requested, so split this extent --- the first extent 5723 * will be relocated as part of this function call and the caller 5724 * will handle relocating the second extent by calling this 5725 * function again for the second extent. 5726 */ 5727 error = hfs_split_extent(extent_info, newBlockCount); 5728 if (error) { 5729 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) split error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error); 5730 goto out; 5731 } 5732 oldBlockCount = newBlockCount; 5733 } 5734 if (error) { 5735 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) contig BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error); 5736 goto out; 5737 } 5738 blocks_allocated = true; 5739 5740 /* Copy data from old location to new location */ 5741 error = hfs_copy_extent(hfsmp, extent_info->vp, oldStartBlock, 5742 newStartBlock, newBlockCount, context); 5743 if (error) { 5744 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u)=>(%u,%u) hfs_copy_extent error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount, error); 5745 goto out; 5746 } 5747 5748 /* Update the extent record with the new start block information */ 5749 extent_info->extents[index].startBlock = newStartBlock; 5750 5751 /* Sync the content back to the disk */ 5752 if (extent_info->catalog_fp) { 5753 /* Update the extents in catalog record */ 5754 if (extent_info->is_dirlink) { 5755 error = cat_update_dirlink(hfsmp, extent_info->forkType, 5756 extent_info->dirlink_desc, extent_info->dirlink_attr, 5757 &(extent_info->dirlink_fork->ff_data)); 5758 } else { 5759 cp->c_flag |= C_MODIFIED; 5760 /* If this is a system file, sync volume headers on disk */ 5761 if (extent_info->is_sysfile) { 5762 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); 5763 } 5764 } 5765 } else { 5766 /* Replace record for extents overflow or extents-based xattrs */ 5767 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator, 5768 &(extent_info->btdata), extent_info->recordlen); 5769 } 5770 if (error) { 5771 printf ("hfs_reclaim_extent: fileID=%u, update record error=%u\n", extent_info->fileID, error); 5772 goto out; 5773 } 5774 5775 /* Deallocate the old extent */ 5776 error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS); 5777 if (error) { 5778 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockDeallocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error); 5779 goto out; 5780 } 5781 extent_info->blocks_relocated += newBlockCount; 5782 5783 if (hfs_resize_debug) { 5784 printf ("hfs_reclaim_extent: Relocated record:%u %u:(%u,%u) to (%u,%u)\n", extent_info->overflow_count, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount); 5785 } 5786 5787out: 5788 if (error != 0) { 5789 if (blocks_allocated == true) { 5790 BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS); 5791 } 5792 } else { 5793 /* On success, increment the total allocation blocks processed */ 5794 extent_info->cur_blockCount += newBlockCount; 5795 } 5796 5797 hfs_systemfile_unlock(hfsmp, extent_info->lockflags); 5798 5799 /* For a non-system file, if an extent entry from catalog record 5800 * was modified, sync the in-memory changes to the catalog record 5801 * on disk before ending the transaction. 5802 */ 5803 if ((extent_info->catalog_fp) && 5804 (extent_info->is_sysfile == false)) { 5805 (void) hfs_update(extent_info->vp, MNT_WAIT); 5806 } 5807 5808 hfs_end_transaction(hfsmp); 5809 5810 return error; 5811} 5812 5813/* Report intermediate progress during volume resize */ 5814static void 5815hfs_truncatefs_progress(struct hfsmount *hfsmp) 5816{ 5817 u_int32_t cur_progress = 0; 5818 5819 hfs_resize_progress(hfsmp, &cur_progress); 5820 if (cur_progress > (hfsmp->hfs_resize_progress + 9)) { 5821 printf("hfs_truncatefs: %d%% done...\n", cur_progress); 5822 hfsmp->hfs_resize_progress = cur_progress; 5823 } 5824 return; 5825} 5826 5827/* 5828 * Reclaim space at the end of a volume for given file and forktype. 5829 * 5830 * This routine attempts to move any extent which contains allocation blocks 5831 * at or after "allocLimit." A separate transaction is used for every extent 5832 * that needs to be moved. If there is not contiguous space available for 5833 * moving an extent, it can be split into smaller extents. The contents of 5834 * any moved extents are read and written via the volume's device vnode -- 5835 * NOT via "vp." During the move, moved blocks which are part of a transaction 5836 * have their physical block numbers invalidated so they will eventually be 5837 * written to their new locations. 5838 * 5839 * This function is also called for directory hard links. Directory hard links 5840 * are regular files with no data fork and resource fork that contains alias 5841 * information for backward compatibility with pre-Leopard systems. However 5842 * non-Mac OS X implementation can add/modify data fork or resource fork 5843 * information to directory hard links, so we check, and if required, relocate 5844 * both data fork and resource fork. 5845 * 5846 * Inputs: 5847 * hfsmp The volume being resized. 5848 * vp The vnode for the system file. 5849 * fileID ID of the catalog record that needs to be relocated 5850 * forktype The type of fork that needs relocated, 5851 * kHFSResourceForkType for resource fork, 5852 * kHFSDataForkType for data fork 5853 * allocLimit Allocation limit for the new volume size, 5854 * do not use this block or beyond. All extents 5855 * that use this block or any blocks beyond this limit 5856 * will be relocated. 5857 * 5858 * Side Effects: 5859 * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation 5860 * blocks that were relocated. 5861 */ 5862static int 5863hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID, 5864 u_int8_t forktype, u_long allocLimit, vfs_context_t context) 5865{ 5866 int error = 0; 5867 struct hfs_reclaim_extent_info *extent_info; 5868 int i; 5869 int lockflags = 0; 5870 struct cnode *cp; 5871 struct filefork *fp; 5872 int took_truncate_lock = false; 5873 int release_desc = false; 5874 HFSPlusExtentKey *key; 5875 5876 /* If there is no vnode for this file, then there's nothing to do. */ 5877 if (vp == NULL) { 5878 return 0; 5879 } 5880 5881 cp = VTOC(vp); 5882 5883 if (hfs_resize_debug) { 5884 const char *filename = (const char *) cp->c_desc.cd_nameptr; 5885 int namelen = cp->c_desc.cd_namelen; 5886 5887 if (filename == NULL) { 5888 filename = ""; 5889 namelen = 0; 5890 } 5891 printf("hfs_reclaim_file: reclaiming '%.*s'\n", namelen, filename); 5892 } 5893 5894 MALLOC(extent_info, struct hfs_reclaim_extent_info *, 5895 sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK); 5896 if (extent_info == NULL) { 5897 return ENOMEM; 5898 } 5899 bzero(extent_info, sizeof(struct hfs_reclaim_extent_info)); 5900 extent_info->vp = vp; 5901 extent_info->fileID = fileID; 5902 extent_info->forkType = forktype; 5903 extent_info->is_sysfile = vnode_issystem(vp); 5904 if (vnode_isdir(vp) && (cp->c_flag & C_HARDLINK)) { 5905 extent_info->is_dirlink = true; 5906 } 5907 /* We always need allocation bitmap and extent btree lock */ 5908 lockflags = SFL_BITMAP | SFL_EXTENTS; 5909 if ((fileID == kHFSCatalogFileID) || (extent_info->is_dirlink == true)) { 5910 lockflags |= SFL_CATALOG; 5911 } else if (fileID == kHFSAttributesFileID) { 5912 lockflags |= SFL_ATTRIBUTE; 5913 } else if (fileID == kHFSStartupFileID) { 5914 lockflags |= SFL_STARTUP; 5915 } 5916 extent_info->lockflags = lockflags; 5917 extent_info->fcb = VTOF(hfsmp->hfs_extents_vp); 5918 5919 /* Flush data associated with current file on disk. 5920 * 5921 * If the current vnode is directory hard link, no flushing of 5922 * journal or vnode is required. The current kernel does not 5923 * modify data/resource fork of directory hard links, so nothing 5924 * will be in the cache. If a directory hard link is newly created, 5925 * the resource fork data is written directly using devvp and 5926 * the code that actually relocates data (hfs_copy_extent()) also 5927 * uses devvp for its I/O --- so they will see a consistent copy. 5928 */ 5929 if (extent_info->is_sysfile) { 5930 /* If the current vnode is system vnode, flush journal 5931 * to make sure that all data is written to the disk. 5932 */ 5933 error = hfs_journal_flush(hfsmp, TRUE); 5934 if (error) { 5935 printf ("hfs_reclaim_file: journal_flush returned %d\n", error); 5936 goto out; 5937 } 5938 } else if (extent_info->is_dirlink == false) { 5939 /* Flush all blocks associated with this regular file vnode. 5940 * Normally there should not be buffer cache blocks for regular 5941 * files, but for objects like symlinks, we can have buffer cache 5942 * blocks associated with the vnode. Therefore we call 5943 * buf_flushdirtyblks() also. 5944 */ 5945 buf_flushdirtyblks(vp, 0, BUF_SKIP_LOCKED, "hfs_reclaim_file"); 5946 5947 hfs_unlock(cp); 5948 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK); 5949 took_truncate_lock = true; 5950 (void) cluster_push(vp, 0); 5951 error = hfs_lock(cp, HFS_FORCE_LOCK); 5952 if (error) { 5953 goto out; 5954 } 5955 5956 /* If the file no longer exists, nothing left to do */ 5957 if (cp->c_flag & C_NOEXISTS) { 5958 error = 0; 5959 goto out; 5960 } 5961 5962 /* Wait for any in-progress writes to this vnode to complete, so that we'll 5963 * be copying consistent bits. (Otherwise, it's possible that an async 5964 * write will complete to the old extent after we read from it. That 5965 * could lead to corruption.) 5966 */ 5967 error = vnode_waitforwrites(vp, 0, 0, 0, "hfs_reclaim_file"); 5968 if (error) { 5969 goto out; 5970 } 5971 } 5972 5973 if (hfs_resize_debug) { 5974 printf("hfs_reclaim_file: === Start reclaiming %sfork for %sid=%u ===\n", (forktype ? "rsrc" : "data"), (extent_info->is_dirlink ? "dirlink" : "file"), fileID); 5975 } 5976 5977 if (extent_info->is_dirlink) { 5978 MALLOC(extent_info->dirlink_desc, struct cat_desc *, 5979 sizeof(struct cat_desc), M_TEMP, M_WAITOK); 5980 MALLOC(extent_info->dirlink_attr, struct cat_attr *, 5981 sizeof(struct cat_attr), M_TEMP, M_WAITOK); 5982 MALLOC(extent_info->dirlink_fork, struct filefork *, 5983 sizeof(struct filefork), M_TEMP, M_WAITOK); 5984 if ((extent_info->dirlink_desc == NULL) || 5985 (extent_info->dirlink_attr == NULL) || 5986 (extent_info->dirlink_fork == NULL)) { 5987 error = ENOMEM; 5988 goto out; 5989 } 5990 5991 /* Lookup catalog record for directory hard link and 5992 * create a fake filefork for the value looked up from 5993 * the disk. 5994 */ 5995 fp = extent_info->dirlink_fork; 5996 bzero(extent_info->dirlink_fork, sizeof(struct filefork)); 5997 extent_info->dirlink_fork->ff_cp = cp; 5998 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 5999 error = cat_lookup_dirlink(hfsmp, fileID, forktype, 6000 extent_info->dirlink_desc, extent_info->dirlink_attr, 6001 &(extent_info->dirlink_fork->ff_data)); 6002 hfs_systemfile_unlock(hfsmp, lockflags); 6003 if (error) { 6004 printf ("hfs_reclaim_file: cat_lookup_dirlink for fileID=%u returned error=%u\n", fileID, error); 6005 goto out; 6006 } 6007 release_desc = true; 6008 } else { 6009 fp = VTOF(vp); 6010 } 6011 6012 extent_info->catalog_fp = fp; 6013 extent_info->recStartBlock = 0; 6014 extent_info->extents = extent_info->catalog_fp->ff_extents; 6015 /* Relocate extents from the catalog record */ 6016 for (i = 0; i < kHFSPlusExtentDensity; ++i) { 6017 if (fp->ff_extents[i].blockCount == 0) { 6018 break; 6019 } 6020 extent_info->extent_index = i; 6021 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context); 6022 if (error) { 6023 printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount, error); 6024 goto out; 6025 } 6026 } 6027 6028 /* If the number of allocation blocks processed for reclaiming 6029 * are less than total number of blocks for the file, continuing 6030 * working on overflow extents record. 6031 */ 6032 if (fp->ff_blocks <= extent_info->cur_blockCount) { 6033 if (0 && hfs_resize_debug) { 6034 printf ("hfs_reclaim_file: Nothing more to relocate, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount); 6035 } 6036 goto out; 6037 } 6038 6039 if (hfs_resize_debug) { 6040 printf ("hfs_reclaim_file: Will check overflow records, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount); 6041 } 6042 6043 MALLOC(extent_info->iterator, struct BTreeIterator *, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK); 6044 if (extent_info->iterator == NULL) { 6045 error = ENOMEM; 6046 goto out; 6047 } 6048 bzero(extent_info->iterator, sizeof(struct BTreeIterator)); 6049 key = (HFSPlusExtentKey *) &(extent_info->iterator->key); 6050 key->keyLength = kHFSPlusExtentKeyMaximumLength; 6051 key->forkType = forktype; 6052 key->fileID = fileID; 6053 key->startBlock = extent_info->cur_blockCount; 6054 6055 extent_info->btdata.bufferAddress = extent_info->record.overflow; 6056 extent_info->btdata.itemSize = sizeof(HFSPlusExtentRecord); 6057 extent_info->btdata.itemCount = 1; 6058 6059 extent_info->catalog_fp = NULL; 6060 6061 /* Search the first overflow extent with expected startBlock as 'cur_blockCount' */ 6062 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 6063 error = BTSearchRecord(extent_info->fcb, extent_info->iterator, 6064 &(extent_info->btdata), &(extent_info->recordlen), 6065 extent_info->iterator); 6066 hfs_systemfile_unlock(hfsmp, lockflags); 6067 while (error == 0) { 6068 extent_info->overflow_count++; 6069 extent_info->recStartBlock = key->startBlock; 6070 extent_info->extents = extent_info->record.overflow; 6071 for (i = 0; i < kHFSPlusExtentDensity; i++) { 6072 if (extent_info->record.overflow[i].blockCount == 0) { 6073 goto out; 6074 } 6075 extent_info->extent_index = i; 6076 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context); 6077 if (error) { 6078 printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, extent_info->record.overflow[i].startBlock, extent_info->record.overflow[i].blockCount, error); 6079 goto out; 6080 } 6081 } 6082 6083 /* Look for more overflow records */ 6084 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 6085 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord, 6086 extent_info->iterator, &(extent_info->btdata), 6087 &(extent_info->recordlen)); 6088 hfs_systemfile_unlock(hfsmp, lockflags); 6089 if (error) { 6090 break; 6091 } 6092 /* Stop when we encounter a different file or fork. */ 6093 if ((key->fileID != fileID) || (key->forkType != forktype)) { 6094 break; 6095 } 6096 } 6097 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) { 6098 error = 0; 6099 } 6100 6101out: 6102 /* If any blocks were relocated, account them and report progress */ 6103 if (extent_info->blocks_relocated) { 6104 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated; 6105 hfs_truncatefs_progress(hfsmp); 6106 if (fileID < kHFSFirstUserCatalogNodeID) { 6107 printf ("hfs_reclaim_file: Relocated %u blocks from fileID=%u on \"%s\"\n", 6108 extent_info->blocks_relocated, fileID, hfsmp->vcbVN); 6109 } 6110 } 6111 if (extent_info->iterator) { 6112 FREE(extent_info->iterator, M_TEMP); 6113 } 6114 if (release_desc == true) { 6115 cat_releasedesc(extent_info->dirlink_desc); 6116 } 6117 if (extent_info->dirlink_desc) { 6118 FREE(extent_info->dirlink_desc, M_TEMP); 6119 } 6120 if (extent_info->dirlink_attr) { 6121 FREE(extent_info->dirlink_attr, M_TEMP); 6122 } 6123 if (extent_info->dirlink_fork) { 6124 FREE(extent_info->dirlink_fork, M_TEMP); 6125 } 6126 if ((extent_info->blocks_relocated != 0) && (extent_info->is_sysfile == false)) { 6127 (void) hfs_update(vp, MNT_WAIT); 6128 } 6129 if (took_truncate_lock) { 6130 hfs_unlock_truncate(cp, 0); 6131 } 6132 if (extent_info) { 6133 FREE(extent_info, M_TEMP); 6134 } 6135 if (hfs_resize_debug) { 6136 printf("hfs_reclaim_file: === Finished relocating %sfork for fileid=%u (error=%d) ===\n", (forktype ? "rsrc" : "data"), fileID, error); 6137 } 6138 6139 return error; 6140} 6141 6142 6143/* 6144 * This journal_relocate callback updates the journal info block to point 6145 * at the new journal location. This write must NOT be done using the 6146 * transaction. We must write the block immediately. We must also force 6147 * it to get to the media so that the new journal location will be seen by 6148 * the replay code before we can safely let journaled blocks be written 6149 * to their normal locations. 6150 * 6151 * The tests for journal_uses_fua below are mildly hacky. Since the journal 6152 * and the file system are both on the same device, I'm leveraging what 6153 * the journal has decided about FUA. 6154 */ 6155struct hfs_journal_relocate_args { 6156 struct hfsmount *hfsmp; 6157 vfs_context_t context; 6158 u_int32_t newStartBlock; 6159 u_int32_t newBlockCount; 6160}; 6161 6162static errno_t 6163hfs_journal_relocate_callback(void *_args) 6164{ 6165 int error; 6166 struct hfs_journal_relocate_args *args = _args; 6167 struct hfsmount *hfsmp = args->hfsmp; 6168 buf_t bp; 6169 JournalInfoBlock *jibp; 6170 6171 error = buf_meta_bread(hfsmp->hfs_devvp, 6172 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size), 6173 hfsmp->blockSize, vfs_context_ucred(args->context), &bp); 6174 if (error) { 6175 printf("hfs_journal_relocate_callback: failed to read JIB (%d)\n", error); 6176 if (bp) { 6177 buf_brelse(bp); 6178 } 6179 return error; 6180 } 6181 jibp = (JournalInfoBlock*) buf_dataptr(bp); 6182 jibp->offset = SWAP_BE64((u_int64_t)args->newStartBlock * hfsmp->blockSize); 6183 jibp->size = SWAP_BE64((u_int64_t)args->newBlockCount * hfsmp->blockSize); 6184 if (journal_uses_fua(hfsmp->jnl)) 6185 buf_markfua(bp); 6186 error = buf_bwrite(bp); 6187 if (error) { 6188 printf("hfs_journal_relocate_callback: failed to write JIB (%d)\n", error); 6189 return error; 6190 } 6191 if (!journal_uses_fua(hfsmp->jnl)) { 6192 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, args->context); 6193 if (error) { 6194 printf("hfs_journal_relocate_callback: DKIOCSYNCHRONIZECACHE failed (%d)\n", error); 6195 error = 0; /* Don't fail the operation. */ 6196 } 6197 } 6198 6199 return error; 6200} 6201 6202 6203/* Type of resize operation in progress */ 6204#define HFS_RESIZE_TRUNCATE 1 6205#define HFS_RESIZE_EXTEND 2 6206 6207/* 6208 * Core function to relocate the journal file. This function takes the 6209 * journal size of the newly relocated journal --- the caller can 6210 * provide a new journal size if they want to change the size of 6211 * the journal. The function takes care of updating the journal info 6212 * block and all other data structures correctly. 6213 * 6214 * Note: This function starts a transaction and grabs the btree locks. 6215 */ 6216static int 6217hfs_relocate_journal_file(struct hfsmount *hfsmp, u_int32_t jnl_size, int resize_type, vfs_context_t context) 6218{ 6219 int error; 6220 int journal_err; 6221 int lockflags; 6222 u_int32_t oldStartBlock; 6223 u_int32_t newStartBlock; 6224 u_int32_t oldBlockCount; 6225 u_int32_t newBlockCount; 6226 u_int32_t jnlBlockCount; 6227 u_int32_t alloc_skipfreeblks; 6228 struct cat_desc journal_desc; 6229 struct cat_attr journal_attr; 6230 struct cat_fork journal_fork; 6231 struct hfs_journal_relocate_args callback_args; 6232 6233 /* Calculate the number of allocation blocks required for the journal */ 6234 jnlBlockCount = howmany(jnl_size, hfsmp->blockSize); 6235 6236 /* 6237 * During truncatefs(), the volume free block count is updated 6238 * before relocating data and reflects the total number of free 6239 * blocks that will exist on volume after the resize is successful. 6240 * This means that the allocation blocks required for relocation 6241 * have already been reserved and accounted for in the free block 6242 * count. Therefore, block allocation and deallocation routines 6243 * can skip the free block check by passing HFS_ALLOC_SKIPFREEBLKS 6244 * flag. 6245 * 6246 * This special handling is not required when the file system 6247 * is being extended as we want all the allocated and deallocated 6248 * blocks to be accounted for correctly. 6249 */ 6250 if (resize_type == HFS_RESIZE_TRUNCATE) { 6251 alloc_skipfreeblks = HFS_ALLOC_SKIPFREEBLKS; 6252 } else { 6253 alloc_skipfreeblks = 0; 6254 } 6255 6256 error = hfs_start_transaction(hfsmp); 6257 if (error) { 6258 printf("hfs_relocate_journal_file: hfs_start_transaction returned %d\n", error); 6259 return error; 6260 } 6261 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); 6262 6263 error = BlockAllocate(hfsmp, 1, jnlBlockCount, jnlBlockCount, 6264 HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | alloc_skipfreeblks, 6265 &newStartBlock, &newBlockCount); 6266 if (error) { 6267 printf("hfs_relocate_journal_file: BlockAllocate returned %d\n", error); 6268 goto fail; 6269 } 6270 if (newBlockCount != jnlBlockCount) { 6271 printf("hfs_relocate_journal_file: newBlockCount != jnlBlockCount (%u, %u)\n", newBlockCount, jnlBlockCount); 6272 goto free_fail; 6273 } 6274 6275 error = cat_idlookup(hfsmp, hfsmp->hfs_jnlfileid, 1, 0, &journal_desc, &journal_attr, &journal_fork); 6276 if (error) { 6277 printf("hfs_relocate_journal_file: cat_idlookup returned %d\n", error); 6278 goto free_fail; 6279 } 6280 6281 oldStartBlock = journal_fork.cf_extents[0].startBlock; 6282 oldBlockCount = journal_fork.cf_extents[0].blockCount; 6283 error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, alloc_skipfreeblks); 6284 if (error) { 6285 printf("hfs_relocate_journal_file: BlockDeallocate returned %d\n", error); 6286 goto free_fail; 6287 } 6288 6289 /* Update the catalog record for .journal */ 6290 journal_fork.cf_size = newBlockCount * hfsmp->blockSize; 6291 journal_fork.cf_extents[0].startBlock = newStartBlock; 6292 journal_fork.cf_extents[0].blockCount = newBlockCount; 6293 journal_fork.cf_blocks = newBlockCount; 6294 error = cat_update(hfsmp, &journal_desc, &journal_attr, &journal_fork, NULL); 6295 cat_releasedesc(&journal_desc); /* all done with cat descriptor */ 6296 if (error) { 6297 printf("hfs_relocate_journal_file: cat_update returned %d\n", error); 6298 goto free_fail; 6299 } 6300 6301 /* 6302 * If the journal is part of the file system, then tell the journal 6303 * code about the new location. If the journal is on an external 6304 * device, then just keep using it as-is. 6305 */ 6306 if (hfsmp->jvp == hfsmp->hfs_devvp) { 6307 callback_args.hfsmp = hfsmp; 6308 callback_args.context = context; 6309 callback_args.newStartBlock = newStartBlock; 6310 callback_args.newBlockCount = newBlockCount; 6311 6312 error = journal_relocate(hfsmp->jnl, (off_t)newStartBlock*hfsmp->blockSize, 6313 (off_t)newBlockCount*hfsmp->blockSize, 0, 6314 hfs_journal_relocate_callback, &callback_args); 6315 if (error) { 6316 /* NOTE: journal_relocate will mark the journal invalid. */ 6317 printf("hfs_relocate_journal_file: journal_relocate returned %d\n", error); 6318 goto fail; 6319 } 6320 if (hfs_resize_debug) { 6321 printf ("hfs_relocate_journal_file: Successfully relocated journal from (%u,%u) to (%u,%u)\n", oldStartBlock, oldBlockCount, newStartBlock, newBlockCount); 6322 } 6323 hfsmp->jnl_start = newStartBlock; 6324 hfsmp->jnl_size = (off_t)newBlockCount * hfsmp->blockSize; 6325 } 6326 6327 hfs_systemfile_unlock(hfsmp, lockflags); 6328 error = hfs_end_transaction(hfsmp); 6329 if (error) { 6330 printf("hfs_relocate_journal_file: hfs_end_transaction returned %d\n", error); 6331 } 6332 6333 return error; 6334 6335free_fail: 6336 journal_err = BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS); 6337 if (journal_err) { 6338 printf("hfs_relocate_journal_file: BlockDeallocate returned %d\n", error); 6339 hfs_mark_volume_inconsistent(hfsmp); 6340 } 6341fail: 6342 hfs_systemfile_unlock(hfsmp, lockflags); 6343 (void) hfs_end_transaction(hfsmp); 6344 if (hfs_resize_debug) { 6345 printf ("hfs_relocate_journal_file: Error relocating journal file (error=%d)\n", error); 6346 } 6347 return error; 6348} 6349 6350 6351/* 6352 * Relocate the journal file when the file system is being truncated. 6353 * We do not down-size the journal when the file system size is 6354 * reduced, so we always provide the current journal size to the 6355 * relocate code. 6356 */ 6357static int 6358hfs_reclaim_journal_file(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context) 6359{ 6360 int error = 0; 6361 u_int32_t startBlock; 6362 u_int32_t blockCount = hfsmp->jnl_size / hfsmp->blockSize; 6363 6364 /* 6365 * Figure out the location of the .journal file. When the journal 6366 * is on an external device, we need to look up the .journal file. 6367 */ 6368 if (hfsmp->jvp == hfsmp->hfs_devvp) { 6369 startBlock = hfsmp->jnl_start; 6370 blockCount = hfsmp->jnl_size / hfsmp->blockSize; 6371 } else { 6372 u_int32_t fileid; 6373 u_int32_t old_jnlfileid; 6374 struct cat_attr attr; 6375 struct cat_fork fork; 6376 6377 /* 6378 * The cat_lookup inside GetFileInfo will fail because hfs_jnlfileid 6379 * is set, and it is trying to hide the .journal file. So temporarily 6380 * unset the field while calling GetFileInfo. 6381 */ 6382 old_jnlfileid = hfsmp->hfs_jnlfileid; 6383 hfsmp->hfs_jnlfileid = 0; 6384 fileid = GetFileInfo(hfsmp, kHFSRootFolderID, ".journal", &attr, &fork); 6385 hfsmp->hfs_jnlfileid = old_jnlfileid; 6386 if (fileid != old_jnlfileid) { 6387 printf("hfs_reclaim_journal_file: cannot find .journal file!\n"); 6388 return EIO; 6389 } 6390 6391 startBlock = fork.cf_extents[0].startBlock; 6392 blockCount = fork.cf_extents[0].blockCount; 6393 } 6394 6395 if (startBlock + blockCount <= allocLimit) { 6396 /* The journal file does not require relocation */ 6397 return 0; 6398 } 6399 6400 error = hfs_relocate_journal_file(hfsmp, blockCount * hfsmp->blockSize, HFS_RESIZE_TRUNCATE, context); 6401 if (error == 0) { 6402 hfsmp->hfs_resize_blocksmoved += blockCount; 6403 hfs_truncatefs_progress(hfsmp); 6404 printf ("hfs_reclaim_journal_file: Relocated %u blocks from journal on \"%s\"\n", 6405 blockCount, hfsmp->vcbVN); 6406 } 6407 6408 return error; 6409} 6410 6411 6412/* 6413 * Move the journal info block to a new location. We have to make sure the 6414 * new copy of the journal info block gets to the media first, then change 6415 * the field in the volume header and the catalog record. 6416 */ 6417static int 6418hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context) 6419{ 6420 int error; 6421 int journal_err; 6422 int lockflags; 6423 u_int32_t oldBlock; 6424 u_int32_t newBlock; 6425 u_int32_t blockCount; 6426 struct cat_desc jib_desc; 6427 struct cat_attr jib_attr; 6428 struct cat_fork jib_fork; 6429 buf_t old_bp, new_bp; 6430 6431 if (hfsmp->vcbJinfoBlock <= allocLimit) { 6432 /* The journal info block does not require relocation */ 6433 return 0; 6434 } 6435 6436 error = hfs_start_transaction(hfsmp); 6437 if (error) { 6438 printf("hfs_reclaim_journal_info_block: hfs_start_transaction returned %d\n", error); 6439 return error; 6440 } 6441 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); 6442 6443 error = BlockAllocate(hfsmp, 1, 1, 1, 6444 HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS, 6445 &newBlock, &blockCount); 6446 if (error) { 6447 printf("hfs_reclaim_journal_info_block: BlockAllocate returned %d\n", error); 6448 goto fail; 6449 } 6450 if (blockCount != 1) { 6451 printf("hfs_reclaim_journal_info_block: blockCount != 1 (%u)\n", blockCount); 6452 goto free_fail; 6453 } 6454 error = BlockDeallocate(hfsmp, hfsmp->vcbJinfoBlock, 1, HFS_ALLOC_SKIPFREEBLKS); 6455 if (error) { 6456 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error); 6457 goto free_fail; 6458 } 6459 6460 /* Copy the old journal info block content to the new location */ 6461 error = buf_meta_bread(hfsmp->hfs_devvp, 6462 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size), 6463 hfsmp->blockSize, vfs_context_ucred(context), &old_bp); 6464 if (error) { 6465 printf("hfs_reclaim_journal_info_block: failed to read JIB (%d)\n", error); 6466 if (old_bp) { 6467 buf_brelse(old_bp); 6468 } 6469 goto free_fail; 6470 } 6471 new_bp = buf_getblk(hfsmp->hfs_devvp, 6472 newBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size), 6473 hfsmp->blockSize, 0, 0, BLK_META); 6474 bcopy((char*)buf_dataptr(old_bp), (char*)buf_dataptr(new_bp), hfsmp->blockSize); 6475 buf_brelse(old_bp); 6476 if (journal_uses_fua(hfsmp->jnl)) 6477 buf_markfua(new_bp); 6478 error = buf_bwrite(new_bp); 6479 if (error) { 6480 printf("hfs_reclaim_journal_info_block: failed to write new JIB (%d)\n", error); 6481 goto free_fail; 6482 } 6483 if (!journal_uses_fua(hfsmp->jnl)) { 6484 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); 6485 if (error) { 6486 printf("hfs_reclaim_journal_info_block: DKIOCSYNCHRONIZECACHE failed (%d)\n", error); 6487 /* Don't fail the operation. */ 6488 } 6489 } 6490 6491 /* Update the catalog record for .journal_info_block */ 6492 error = cat_idlookup(hfsmp, hfsmp->hfs_jnlinfoblkid, 1, 0, &jib_desc, &jib_attr, &jib_fork); 6493 if (error) { 6494 printf("hfs_reclaim_journal_info_block: cat_idlookup returned %d\n", error); 6495 goto fail; 6496 } 6497 oldBlock = jib_fork.cf_extents[0].startBlock; 6498 jib_fork.cf_size = hfsmp->blockSize; 6499 jib_fork.cf_extents[0].startBlock = newBlock; 6500 jib_fork.cf_extents[0].blockCount = 1; 6501 jib_fork.cf_blocks = 1; 6502 error = cat_update(hfsmp, &jib_desc, &jib_attr, &jib_fork, NULL); 6503 cat_releasedesc(&jib_desc); /* all done with cat descriptor */ 6504 if (error) { 6505 printf("hfs_reclaim_journal_info_block: cat_update returned %d\n", error); 6506 goto fail; 6507 } 6508 6509 /* Update the pointer to the journal info block in the volume header. */ 6510 hfsmp->vcbJinfoBlock = newBlock; 6511 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); 6512 if (error) { 6513 printf("hfs_reclaim_journal_info_block: hfs_flushvolumeheader returned %d\n", error); 6514 goto fail; 6515 } 6516 hfs_systemfile_unlock(hfsmp, lockflags); 6517 error = hfs_end_transaction(hfsmp); 6518 if (error) { 6519 printf("hfs_reclaim_journal_info_block: hfs_end_transaction returned %d\n", error); 6520 } 6521 error = hfs_journal_flush(hfsmp, FALSE); 6522 if (error) { 6523 printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error); 6524 } 6525 6526 /* Account for the block relocated and print progress */ 6527 hfsmp->hfs_resize_blocksmoved += 1; 6528 hfs_truncatefs_progress(hfsmp); 6529 if (!error) { 6530 printf ("hfs_reclaim_journal_info: Relocated 1 block from journal info on \"%s\"\n", 6531 hfsmp->vcbVN); 6532 if (hfs_resize_debug) { 6533 printf ("hfs_reclaim_journal_info_block: Successfully relocated journal info block from (%u,%u) to (%u,%u)\n", oldBlock, blockCount, newBlock, blockCount); 6534 } 6535 } 6536 return error; 6537 6538free_fail: 6539 journal_err = BlockDeallocate(hfsmp, newBlock, blockCount, HFS_ALLOC_SKIPFREEBLKS); 6540 if (journal_err) { 6541 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error); 6542 hfs_mark_volume_inconsistent(hfsmp); 6543 } 6544 6545fail: 6546 hfs_systemfile_unlock(hfsmp, lockflags); 6547 (void) hfs_end_transaction(hfsmp); 6548 if (hfs_resize_debug) { 6549 printf ("hfs_reclaim_journal_info_block: Error relocating journal info block (error=%d)\n", error); 6550 } 6551 return error; 6552} 6553 6554 6555static u_int64_t 6556calculate_journal_size(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count) 6557{ 6558 u_int64_t journal_size; 6559 u_int32_t journal_scale; 6560 6561#define DEFAULT_JOURNAL_SIZE (8*1024*1024) 6562#define MAX_JOURNAL_SIZE (512*1024*1024) 6563 6564 /* Calculate the journal size for this volume. We want 6565 * at least 8 MB of journal for each 100 GB of disk space. 6566 * We cap the size at 512 MB, unless the allocation block 6567 * size is larger, in which case, we use one allocation 6568 * block. 6569 */ 6570 journal_scale = (sector_size * sector_count) / ((u_int64_t)100 * 1024 * 1024 * 1024); 6571 journal_size = DEFAULT_JOURNAL_SIZE * (journal_scale + 1); 6572 if (journal_size > MAX_JOURNAL_SIZE) { 6573 journal_size = MAX_JOURNAL_SIZE; 6574 } 6575 if (journal_size < hfsmp->blockSize) { 6576 journal_size = hfsmp->blockSize; 6577 } 6578 return journal_size; 6579} 6580 6581 6582/* 6583 * Calculate the expected journal size based on current partition size. 6584 * If the size of the current journal is less than the calculated size, 6585 * force journal relocation with the new journal size. 6586 */ 6587static int 6588hfs_extend_journal(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count, vfs_context_t context) 6589{ 6590 int error = 0; 6591 u_int64_t calc_journal_size; 6592 6593 if (hfsmp->jvp != hfsmp->hfs_devvp) { 6594 if (hfs_resize_debug) { 6595 printf("hfs_extend_journal: not resizing the journal because it is on an external device.\n"); 6596 } 6597 return 0; 6598 } 6599 6600 calc_journal_size = calculate_journal_size(hfsmp, sector_size, sector_count); 6601 if (calc_journal_size <= hfsmp->jnl_size) { 6602 /* The journal size requires no modification */ 6603 goto out; 6604 } 6605 6606 if (hfs_resize_debug) { 6607 printf ("hfs_extend_journal: journal old=%u, new=%qd\n", hfsmp->jnl_size, calc_journal_size); 6608 } 6609 6610 /* Extend the journal to the new calculated size */ 6611 error = hfs_relocate_journal_file(hfsmp, calc_journal_size, HFS_RESIZE_EXTEND, context); 6612 if (error == 0) { 6613 printf ("hfs_extend_journal: Extended journal size to %u bytes on \"%s\"\n", 6614 hfsmp->jnl_size, hfsmp->vcbVN); 6615 } 6616out: 6617 return error; 6618} 6619 6620 6621/* 6622 * This function traverses through all extended attribute records for a given 6623 * fileID, and calls function that reclaims data blocks that exist in the 6624 * area of the disk being reclaimed which in turn is responsible for allocating 6625 * new space, copying extent data, deallocating new space, and if required, 6626 * splitting the extent. 6627 * 6628 * Note: The caller has already acquired the cnode lock on the file. Therefore 6629 * we are assured that no other thread would be creating/deleting/modifying 6630 * extended attributes for this file. 6631 * 6632 * Side Effects: 6633 * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation 6634 * blocks that were relocated. 6635 * 6636 * Returns: 6637 * 0 on success, non-zero on failure. 6638 */ 6639static int 6640hfs_reclaim_xattr(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID, u_int32_t allocLimit, vfs_context_t context) 6641{ 6642 int error = 0; 6643 struct hfs_reclaim_extent_info *extent_info; 6644 int i; 6645 HFSPlusAttrKey *key; 6646 int *lockflags; 6647 6648 if (hfs_resize_debug) { 6649 printf("hfs_reclaim_xattr: === Start reclaiming xattr for id=%u ===\n", fileID); 6650 } 6651 6652 MALLOC(extent_info, struct hfs_reclaim_extent_info *, 6653 sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK); 6654 if (extent_info == NULL) { 6655 return ENOMEM; 6656 } 6657 bzero(extent_info, sizeof(struct hfs_reclaim_extent_info)); 6658 extent_info->vp = vp; 6659 extent_info->fileID = fileID; 6660 extent_info->is_xattr = true; 6661 extent_info->is_sysfile = vnode_issystem(vp); 6662 extent_info->fcb = VTOF(hfsmp->hfs_attribute_vp); 6663 lockflags = &(extent_info->lockflags); 6664 *lockflags = SFL_ATTRIBUTE | SFL_BITMAP; 6665 6666 /* Initialize iterator from the extent_info structure */ 6667 MALLOC(extent_info->iterator, struct BTreeIterator *, 6668 sizeof(struct BTreeIterator), M_TEMP, M_WAITOK); 6669 if (extent_info->iterator == NULL) { 6670 error = ENOMEM; 6671 goto out; 6672 } 6673 bzero(extent_info->iterator, sizeof(struct BTreeIterator)); 6674 6675 /* Build attribute key */ 6676 key = (HFSPlusAttrKey *)&(extent_info->iterator->key); 6677 error = hfs_buildattrkey(fileID, NULL, key); 6678 if (error) { 6679 goto out; 6680 } 6681 6682 /* Initialize btdata from extent_info structure. Note that the 6683 * buffer pointer actually points to the xattr record from the 6684 * extent_info structure itself. 6685 */ 6686 extent_info->btdata.bufferAddress = &(extent_info->record.xattr); 6687 extent_info->btdata.itemSize = sizeof(HFSPlusAttrRecord); 6688 extent_info->btdata.itemCount = 1; 6689 6690 /* 6691 * Sync all extent-based attribute data to the disk. 6692 * 6693 * All extent-based attribute data I/O is performed via cluster 6694 * I/O using a virtual file that spans across entire file system 6695 * space. 6696 */ 6697 hfs_lock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_EXCLUSIVE_LOCK); 6698 (void)cluster_push(hfsmp->hfs_attrdata_vp, 0); 6699 error = vnode_waitforwrites(hfsmp->hfs_attrdata_vp, 0, 0, 0, "hfs_reclaim_xattr"); 6700 hfs_unlock_truncate(VTOC(hfsmp->hfs_attrdata_vp), 0); 6701 if (error) { 6702 goto out; 6703 } 6704 6705 /* Search for extended attribute for current file. This 6706 * will place the iterator before the first matching record. 6707 */ 6708 *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK); 6709 error = BTSearchRecord(extent_info->fcb, extent_info->iterator, 6710 &(extent_info->btdata), &(extent_info->recordlen), 6711 extent_info->iterator); 6712 hfs_systemfile_unlock(hfsmp, *lockflags); 6713 if (error) { 6714 if (error != btNotFound) { 6715 goto out; 6716 } 6717 /* btNotFound is expected here, so just mask it */ 6718 error = 0; 6719 } 6720 6721 while (1) { 6722 /* Iterate to the next record */ 6723 *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK); 6724 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord, 6725 extent_info->iterator, &(extent_info->btdata), 6726 &(extent_info->recordlen)); 6727 hfs_systemfile_unlock(hfsmp, *lockflags); 6728 6729 /* Stop the iteration if we encounter end of btree or xattr with different fileID */ 6730 if (error || key->fileID != fileID) { 6731 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) { 6732 error = 0; 6733 } 6734 break; 6735 } 6736 6737 /* We only care about extent-based EAs */ 6738 if ((extent_info->record.xattr.recordType != kHFSPlusAttrForkData) && 6739 (extent_info->record.xattr.recordType != kHFSPlusAttrExtents)) { 6740 continue; 6741 } 6742 6743 if (extent_info->record.xattr.recordType == kHFSPlusAttrForkData) { 6744 extent_info->overflow_count = 0; 6745 extent_info->extents = extent_info->record.xattr.forkData.theFork.extents; 6746 } else if (extent_info->record.xattr.recordType == kHFSPlusAttrExtents) { 6747 extent_info->overflow_count++; 6748 extent_info->extents = extent_info->record.xattr.overflowExtents.extents; 6749 } 6750 6751 extent_info->recStartBlock = key->startBlock; 6752 for (i = 0; i < kHFSPlusExtentDensity; i++) { 6753 if (extent_info->extents[i].blockCount == 0) { 6754 break; 6755 } 6756 extent_info->extent_index = i; 6757 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context); 6758 if (error) { 6759 printf ("hfs_reclaim_xattr: fileID=%u hfs_reclaim_extent error=%d\n", fileID, error); 6760 goto out; 6761 } 6762 } 6763 } 6764 6765out: 6766 /* If any blocks were relocated, account them and report progress */ 6767 if (extent_info->blocks_relocated) { 6768 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated; 6769 hfs_truncatefs_progress(hfsmp); 6770 } 6771 if (extent_info->iterator) { 6772 FREE(extent_info->iterator, M_TEMP); 6773 } 6774 if (extent_info) { 6775 FREE(extent_info, M_TEMP); 6776 } 6777 if (hfs_resize_debug) { 6778 printf("hfs_reclaim_xattr: === Finished relocating xattr for fileid=%u (error=%d) ===\n", fileID, error); 6779 } 6780 return error; 6781} 6782 6783/* 6784 * Reclaim any extent-based extended attributes allocation blocks from 6785 * the area of the disk that is being truncated. 6786 * 6787 * The function traverses the attribute btree to find out the fileIDs 6788 * of the extended attributes that need to be relocated. For every 6789 * file whose large EA requires relocation, it looks up the cnode and 6790 * calls hfs_reclaim_xattr() to do all the work for allocating 6791 * new space, copying data, deallocating old space, and if required, 6792 * splitting the extents. 6793 * 6794 * Inputs: 6795 * allocLimit - starting block of the area being reclaimed 6796 * 6797 * Returns: 6798 * returns 0 on success, non-zero on failure. 6799 */ 6800static int 6801hfs_reclaim_xattrspace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context) 6802{ 6803 int error = 0; 6804 FCB *fcb; 6805 struct BTreeIterator *iterator = NULL; 6806 struct FSBufferDescriptor btdata; 6807 HFSPlusAttrKey *key; 6808 HFSPlusAttrRecord rec; 6809 int lockflags = 0; 6810 cnid_t prev_fileid = 0; 6811 struct vnode *vp; 6812 int need_relocate; 6813 int btree_operation; 6814 u_int32_t files_moved = 0; 6815 u_int32_t prev_blocksmoved; 6816 int i; 6817 6818 fcb = VTOF(hfsmp->hfs_attribute_vp); 6819 /* Store the value to print total blocks moved by this function in end */ 6820 prev_blocksmoved = hfsmp->hfs_resize_blocksmoved; 6821 6822 if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) { 6823 return ENOMEM; 6824 } 6825 bzero(iterator, sizeof(*iterator)); 6826 key = (HFSPlusAttrKey *)&iterator->key; 6827 btdata.bufferAddress = &rec; 6828 btdata.itemSize = sizeof(rec); 6829 btdata.itemCount = 1; 6830 6831 need_relocate = false; 6832 btree_operation = kBTreeFirstRecord; 6833 /* Traverse the attribute btree to find extent-based EAs to reclaim */ 6834 while (1) { 6835 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK); 6836 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL); 6837 hfs_systemfile_unlock(hfsmp, lockflags); 6838 if (error) { 6839 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) { 6840 error = 0; 6841 } 6842 break; 6843 } 6844 btree_operation = kBTreeNextRecord; 6845 6846 /* If the extents of current fileID were already relocated, skip it */ 6847 if (prev_fileid == key->fileID) { 6848 continue; 6849 } 6850 6851 /* Check if any of the extents in the current record need to be relocated */ 6852 need_relocate = false; 6853 switch(rec.recordType) { 6854 case kHFSPlusAttrForkData: 6855 for (i = 0; i < kHFSPlusExtentDensity; i++) { 6856 if (rec.forkData.theFork.extents[i].blockCount == 0) { 6857 break; 6858 } 6859 if ((rec.forkData.theFork.extents[i].startBlock + 6860 rec.forkData.theFork.extents[i].blockCount) > allocLimit) { 6861 need_relocate = true; 6862 break; 6863 } 6864 } 6865 break; 6866 6867 case kHFSPlusAttrExtents: 6868 for (i = 0; i < kHFSPlusExtentDensity; i++) { 6869 if (rec.overflowExtents.extents[i].blockCount == 0) { 6870 break; 6871 } 6872 if ((rec.overflowExtents.extents[i].startBlock + 6873 rec.overflowExtents.extents[i].blockCount) > allocLimit) { 6874 need_relocate = true; 6875 break; 6876 } 6877 } 6878 break; 6879 }; 6880 6881 /* Continue iterating to next attribute record */ 6882 if (need_relocate == false) { 6883 continue; 6884 } 6885 6886 /* Look up the vnode for corresponding file. The cnode 6887 * will be locked which will ensure that no one modifies 6888 * the xattrs when we are relocating them. 6889 * 6890 * We want to allow open-unlinked files to be moved, 6891 * so provide allow_deleted == 1 for hfs_vget(). 6892 */ 6893 if (hfs_vget(hfsmp, key->fileID, &vp, 0, 1) != 0) { 6894 continue; 6895 } 6896 6897 error = hfs_reclaim_xattr(hfsmp, vp, key->fileID, allocLimit, context); 6898 hfs_unlock(VTOC(vp)); 6899 vnode_put(vp); 6900 if (error) { 6901 printf ("hfs_reclaim_xattrspace: Error relocating xattrs for fileid=%u (error=%d)\n", key->fileID, error); 6902 break; 6903 } 6904 prev_fileid = key->fileID; 6905 files_moved++; 6906 } 6907 6908 if (files_moved) { 6909 printf("hfs_reclaim_xattrspace: Relocated %u xattr blocks from %u files on \"%s\"\n", 6910 (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved), 6911 files_moved, hfsmp->vcbVN); 6912 } 6913 6914 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator)); 6915 return error; 6916} 6917 6918/* 6919 * Reclaim blocks from regular files. 6920 * 6921 * This function iterates over all the record in catalog btree looking 6922 * for files with extents that overlap into the space we're trying to 6923 * free up. If a file extent requires relocation, it looks up the vnode 6924 * and calls function to relocate the data. 6925 * 6926 * Returns: 6927 * Zero on success, non-zero on failure. 6928 */ 6929static int 6930hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context) 6931{ 6932 int error; 6933 FCB *fcb; 6934 struct BTreeIterator *iterator = NULL; 6935 struct FSBufferDescriptor btdata; 6936 int btree_operation; 6937 int lockflags; 6938 struct HFSPlusCatalogFile filerec; 6939 struct vnode *vp; 6940 struct vnode *rvp; 6941 struct filefork *datafork; 6942 u_int32_t files_moved = 0; 6943 u_int32_t prev_blocksmoved; 6944 6945 fcb = VTOF(hfsmp->hfs_catalog_vp); 6946 /* Store the value to print total blocks moved by this function at the end */ 6947 prev_blocksmoved = hfsmp->hfs_resize_blocksmoved; 6948 6949 if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) { 6950 error = ENOMEM; 6951 goto reclaim_filespace_done; 6952 } 6953 6954#if CONFIG_PROTECT 6955 int keys_generated = 0; 6956 /* 6957 * For content-protected filesystems, we may need to relocate files that 6958 * are encrypted. If they use the new-style offset-based IVs, then 6959 * we can move them regardless of the lock state. We create a temporary 6960 * key here that we use to read/write the data, then we discard it at the 6961 * end of the function. 6962 */ 6963 if (cp_fs_protected (hfsmp->hfs_mp)) { 6964 error = cp_entry_gentempkeys(&hfsmp->hfs_resize_cpentry, hfsmp); 6965 if (error) { 6966 printf("hfs_reclaimspace: Error generating temporary keys for resize (%d)\n", error); 6967 goto reclaim_filespace_done; 6968 } 6969 } 6970#endif 6971 6972 bzero(iterator, sizeof(*iterator)); 6973 6974 btdata.bufferAddress = &filerec; 6975 btdata.itemSize = sizeof(filerec); 6976 btdata.itemCount = 1; 6977 6978 btree_operation = kBTreeFirstRecord; 6979 while (1) { 6980 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); 6981 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL); 6982 hfs_systemfile_unlock(hfsmp, lockflags); 6983 if (error) { 6984 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) { 6985 error = 0; 6986 } 6987 break; 6988 } 6989 btree_operation = kBTreeNextRecord; 6990 6991 if (filerec.recordType != kHFSPlusFileRecord) { 6992 continue; 6993 } 6994 6995 /* Check if any of the extents require relocation */ 6996 if (hfs_file_extent_overlaps(hfsmp, allocLimit, &filerec) == false) { 6997 continue; 6998 } 6999 7000 /* We want to allow open-unlinked files to be moved, so allow_deleted == 1 */ 7001 if (hfs_vget(hfsmp, filerec.fileID, &vp, 0, 1) != 0) { 7002 if (hfs_resize_debug) { 7003 printf("hfs_reclaim_filespace: hfs_vget(%u) failed.\n", filerec.fileID); 7004 } 7005 continue; 7006 } 7007 7008 /* If data fork exists or item is a directory hard link, relocate blocks */ 7009 datafork = VTOF(vp); 7010 if ((datafork && datafork->ff_blocks > 0) || vnode_isdir(vp)) { 7011 error = hfs_reclaim_file(hfsmp, vp, filerec.fileID, 7012 kHFSDataForkType, allocLimit, context); 7013 if (error) { 7014 printf ("hfs_reclaimspace: Error reclaiming datafork blocks of fileid=%u (error=%d)\n", filerec.fileID, error); 7015 hfs_unlock(VTOC(vp)); 7016 vnode_put(vp); 7017 break; 7018 } 7019 } 7020 7021 /* If resource fork exists or item is a directory hard link, relocate blocks */ 7022 if (((VTOC(vp)->c_blocks - (datafork ? datafork->ff_blocks : 0)) > 0) || vnode_isdir(vp)) { 7023 if (vnode_isdir(vp)) { 7024 /* Resource fork vnode lookup is invalid for directory hard link. 7025 * So we fake data fork vnode as resource fork vnode. 7026 */ 7027 rvp = vp; 7028 } else { 7029 error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE); 7030 if (error) { 7031 printf ("hfs_reclaimspace: Error looking up rvp for fileid=%u (error=%d)\n", filerec.fileID, error); 7032 hfs_unlock(VTOC(vp)); 7033 vnode_put(vp); 7034 break; 7035 } 7036 VTOC(rvp)->c_flag |= C_NEED_RVNODE_PUT; 7037 } 7038 7039 error = hfs_reclaim_file(hfsmp, rvp, filerec.fileID, 7040 kHFSResourceForkType, allocLimit, context); 7041 if (error) { 7042 printf ("hfs_reclaimspace: Error reclaiming rsrcfork blocks of fileid=%u (error=%d)\n", filerec.fileID, error); 7043 hfs_unlock(VTOC(vp)); 7044 vnode_put(vp); 7045 break; 7046 } 7047 } 7048 7049 /* The file forks were relocated successfully, now drop the 7050 * cnode lock and vnode reference, and continue iterating to 7051 * next catalog record. 7052 */ 7053 hfs_unlock(VTOC(vp)); 7054 vnode_put(vp); 7055 files_moved++; 7056 } 7057 7058 if (files_moved) { 7059 printf("hfs_reclaim_filespace: Relocated %u blocks from %u files on \"%s\"\n", 7060 (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved), 7061 files_moved, hfsmp->vcbVN); 7062 } 7063 7064reclaim_filespace_done: 7065 if (iterator) { 7066 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator)); 7067 } 7068 7069#if CONFIG_PROTECT 7070 if (keys_generated) { 7071 cp_entry_destroy(&hfsmp->hfs_resize_cpentry); 7072 } 7073#endif 7074 return error; 7075} 7076 7077/* 7078 * Reclaim space at the end of a file system. 7079 * 7080 * Inputs - 7081 * allocLimit - start block of the space being reclaimed 7082 * reclaimblks - number of allocation blocks to reclaim 7083 */ 7084static int 7085hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context) 7086{ 7087 int error = 0; 7088 7089 /* 7090 * Preflight the bitmap to find out total number of blocks that need 7091 * relocation. 7092 * 7093 * Note: Since allocLimit is set to the location of new alternate volume 7094 * header, the check below does not account for blocks allocated for old 7095 * alternate volume header. 7096 */ 7097 error = hfs_count_allocated(hfsmp, allocLimit, reclaimblks, &(hfsmp->hfs_resize_totalblocks)); 7098 if (error) { 7099 printf ("hfs_reclaimspace: Unable to determine total blocks to reclaim error=%d\n", error); 7100 return error; 7101 } 7102 if (hfs_resize_debug) { 7103 printf ("hfs_reclaimspace: Total number of blocks to reclaim = %u\n", hfsmp->hfs_resize_totalblocks); 7104 } 7105 7106 /* Just to be safe, sync the content of the journal to the disk before we proceed */ 7107 hfs_journal_flush(hfsmp, TRUE); 7108 7109 /* First, relocate journal file blocks if they're in the way. 7110 * Doing this first will make sure that journal relocate code 7111 * gets access to contiguous blocks on disk first. The journal 7112 * file has to be contiguous on the disk, otherwise resize will 7113 * fail. 7114 */ 7115 error = hfs_reclaim_journal_file(hfsmp, allocLimit, context); 7116 if (error) { 7117 printf("hfs_reclaimspace: hfs_reclaim_journal_file failed (%d)\n", error); 7118 return error; 7119 } 7120 7121 /* Relocate journal info block blocks if they're in the way. */ 7122 error = hfs_reclaim_journal_info_block(hfsmp, allocLimit, context); 7123 if (error) { 7124 printf("hfs_reclaimspace: hfs_reclaim_journal_info_block failed (%d)\n", error); 7125 return error; 7126 } 7127 7128 /* Relocate extents of the Extents B-tree if they're in the way. 7129 * Relocating extents btree before other btrees is important as 7130 * this will provide access to largest contiguous block range on 7131 * the disk for relocating extents btree. Note that extents btree 7132 * can only have maximum of 8 extents. 7133 */ 7134 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_extents_vp, kHFSExtentsFileID, 7135 kHFSDataForkType, allocLimit, context); 7136 if (error) { 7137 printf("hfs_reclaimspace: reclaim extents b-tree returned %d\n", error); 7138 return error; 7139 } 7140 7141 /* Relocate extents of the Allocation file if they're in the way. */ 7142 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_allocation_vp, kHFSAllocationFileID, 7143 kHFSDataForkType, allocLimit, context); 7144 if (error) { 7145 printf("hfs_reclaimspace: reclaim allocation file returned %d\n", error); 7146 return error; 7147 } 7148 7149 /* Relocate extents of the Catalog B-tree if they're in the way. */ 7150 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_catalog_vp, kHFSCatalogFileID, 7151 kHFSDataForkType, allocLimit, context); 7152 if (error) { 7153 printf("hfs_reclaimspace: reclaim catalog b-tree returned %d\n", error); 7154 return error; 7155 } 7156 7157 /* Relocate extents of the Attributes B-tree if they're in the way. */ 7158 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_attribute_vp, kHFSAttributesFileID, 7159 kHFSDataForkType, allocLimit, context); 7160 if (error) { 7161 printf("hfs_reclaimspace: reclaim attribute b-tree returned %d\n", error); 7162 return error; 7163 } 7164 7165 /* Relocate extents of the Startup File if there is one and they're in the way. */ 7166 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_startup_vp, kHFSStartupFileID, 7167 kHFSDataForkType, allocLimit, context); 7168 if (error) { 7169 printf("hfs_reclaimspace: reclaim startup file returned %d\n", error); 7170 return error; 7171 } 7172 7173 /* 7174 * We need to make sure the alternate volume header gets flushed if we moved 7175 * any extents in the volume header. But we need to do that before 7176 * shrinking the size of the volume, or else the journal code will panic 7177 * with an invalid (too large) block number. 7178 * 7179 * Note that blks_moved will be set if ANY extent was moved, even 7180 * if it was just an overflow extent. In this case, the journal_flush isn't 7181 * strictly required, but shouldn't hurt. 7182 */ 7183 if (hfsmp->hfs_resize_blocksmoved) { 7184 hfs_journal_flush(hfsmp, TRUE); 7185 } 7186 7187 /* Reclaim extents from catalog file records */ 7188 error = hfs_reclaim_filespace(hfsmp, allocLimit, context); 7189 if (error) { 7190 printf ("hfs_reclaimspace: hfs_reclaim_filespace returned error=%d\n", error); 7191 return error; 7192 } 7193 7194 /* Reclaim extents from extent-based extended attributes, if any */ 7195 error = hfs_reclaim_xattrspace(hfsmp, allocLimit, context); 7196 if (error) { 7197 printf ("hfs_reclaimspace: hfs_reclaim_xattrspace returned error=%d\n", error); 7198 return error; 7199 } 7200 7201 return error; 7202} 7203 7204 7205/* 7206 * Check if there are any extents (including overflow extents) that overlap 7207 * into the disk space that is being reclaimed. 7208 * 7209 * Output - 7210 * true - One of the extents need to be relocated 7211 * false - No overflow extents need to be relocated, or there was an error 7212 */ 7213static int 7214hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec) 7215{ 7216 struct BTreeIterator * iterator = NULL; 7217 struct FSBufferDescriptor btdata; 7218 HFSPlusExtentRecord extrec; 7219 HFSPlusExtentKey *extkeyptr; 7220 FCB *fcb; 7221 int overlapped = false; 7222 int i, j; 7223 int error; 7224 int lockflags = 0; 7225 u_int32_t endblock; 7226 7227 /* Check if data fork overlaps the target space */ 7228 for (i = 0; i < kHFSPlusExtentDensity; ++i) { 7229 if (filerec->dataFork.extents[i].blockCount == 0) { 7230 break; 7231 } 7232 endblock = filerec->dataFork.extents[i].startBlock + 7233 filerec->dataFork.extents[i].blockCount; 7234 if (endblock > allocLimit) { 7235 overlapped = true; 7236 goto out; 7237 } 7238 } 7239 7240 /* Check if resource fork overlaps the target space */ 7241 for (j = 0; j < kHFSPlusExtentDensity; ++j) { 7242 if (filerec->resourceFork.extents[j].blockCount == 0) { 7243 break; 7244 } 7245 endblock = filerec->resourceFork.extents[j].startBlock + 7246 filerec->resourceFork.extents[j].blockCount; 7247 if (endblock > allocLimit) { 7248 overlapped = true; 7249 goto out; 7250 } 7251 } 7252 7253 /* Return back if there are no overflow extents for this file */ 7254 if ((i < kHFSPlusExtentDensity) && (j < kHFSPlusExtentDensity)) { 7255 goto out; 7256 } 7257 7258 if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) { 7259 return 0; 7260 } 7261 bzero(iterator, sizeof(*iterator)); 7262 extkeyptr = (HFSPlusExtentKey *)&iterator->key; 7263 extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength; 7264 extkeyptr->forkType = 0; 7265 extkeyptr->fileID = filerec->fileID; 7266 extkeyptr->startBlock = 0; 7267 7268 btdata.bufferAddress = &extrec; 7269 btdata.itemSize = sizeof(extrec); 7270 btdata.itemCount = 1; 7271 7272 fcb = VTOF(hfsmp->hfs_extents_vp); 7273 7274 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK); 7275 7276 /* This will position the iterator just before the first overflow 7277 * extent record for given fileID. It will always return btNotFound, 7278 * so we special case the error code. 7279 */ 7280 error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator); 7281 if (error && (error != btNotFound)) { 7282 goto out; 7283 } 7284 7285 /* BTIterateRecord() might return error if the btree is empty, and 7286 * therefore we return that the extent does not overflow to the caller 7287 */ 7288 error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL); 7289 while (error == 0) { 7290 /* Stop when we encounter a different file. */ 7291 if (extkeyptr->fileID != filerec->fileID) { 7292 break; 7293 } 7294 /* Check if any of the forks exist in the target space. */ 7295 for (i = 0; i < kHFSPlusExtentDensity; ++i) { 7296 if (extrec[i].blockCount == 0) { 7297 break; 7298 } 7299 endblock = extrec[i].startBlock + extrec[i].blockCount; 7300 if (endblock > allocLimit) { 7301 overlapped = true; 7302 goto out; 7303 } 7304 } 7305 /* Look for more records. */ 7306 error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL); 7307 } 7308 7309out: 7310 if (lockflags) { 7311 hfs_systemfile_unlock(hfsmp, lockflags); 7312 } 7313 if (iterator) { 7314 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator)); 7315 } 7316 return overlapped; 7317} 7318 7319 7320/* 7321 * Calculate the progress of a file system resize operation. 7322 */ 7323__private_extern__ 7324int 7325hfs_resize_progress(struct hfsmount *hfsmp, u_int32_t *progress) 7326{ 7327 if ((hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) == 0) { 7328 return (ENXIO); 7329 } 7330 7331 if (hfsmp->hfs_resize_totalblocks > 0) { 7332 *progress = (u_int32_t)((hfsmp->hfs_resize_blocksmoved * 100ULL) / hfsmp->hfs_resize_totalblocks); 7333 } else { 7334 *progress = 0; 7335 } 7336 7337 return (0); 7338} 7339 7340 7341/* 7342 * Creates a UUID from a unique "name" in the HFS UUID Name space. 7343 * See version 3 UUID. 7344 */ 7345static void 7346hfs_getvoluuid(struct hfsmount *hfsmp, uuid_t result) 7347{ 7348 MD5_CTX md5c; 7349 uint8_t rawUUID[8]; 7350 7351 ((uint32_t *)rawUUID)[0] = hfsmp->vcbFndrInfo[6]; 7352 ((uint32_t *)rawUUID)[1] = hfsmp->vcbFndrInfo[7]; 7353 7354 MD5Init( &md5c ); 7355 MD5Update( &md5c, HFS_UUID_NAMESPACE_ID, sizeof( uuid_t ) ); 7356 MD5Update( &md5c, rawUUID, sizeof (rawUUID) ); 7357 MD5Final( result, &md5c ); 7358 7359 result[6] = 0x30 | ( result[6] & 0x0F ); 7360 result[8] = 0x80 | ( result[8] & 0x3F ); 7361} 7362 7363/* 7364 * Get file system attributes. 7365 */ 7366static int 7367hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context) 7368{ 7369#define HFS_ATTR_CMN_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_NAMEDATTRCOUNT | ATTR_CMN_NAMEDATTRLIST)) 7370#define HFS_ATTR_FILE_VALIDMASK (ATTR_FILE_VALIDMASK & ~(ATTR_FILE_FILETYPE | ATTR_FILE_FORKCOUNT | ATTR_FILE_FORKLIST)) 7371#define HFS_ATTR_CMN_VOL_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_NAMEDATTRCOUNT | ATTR_CMN_NAMEDATTRLIST | ATTR_CMN_ACCTIME)) 7372 7373 ExtendedVCB *vcb = VFSTOVCB(mp); 7374 struct hfsmount *hfsmp = VFSTOHFS(mp); 7375 u_int32_t freeCNIDs; 7376 7377 freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)hfsmp->vcbNxtCNID; 7378 7379 VFSATTR_RETURN(fsap, f_objcount, (u_int64_t)hfsmp->vcbFilCnt + (u_int64_t)hfsmp->vcbDirCnt); 7380 VFSATTR_RETURN(fsap, f_filecount, (u_int64_t)hfsmp->vcbFilCnt); 7381 VFSATTR_RETURN(fsap, f_dircount, (u_int64_t)hfsmp->vcbDirCnt); 7382 VFSATTR_RETURN(fsap, f_maxobjcount, (u_int64_t)0xFFFFFFFF); 7383 VFSATTR_RETURN(fsap, f_iosize, (size_t)cluster_max_io_size(mp, 0)); 7384 VFSATTR_RETURN(fsap, f_blocks, (u_int64_t)hfsmp->totalBlocks); 7385 VFSATTR_RETURN(fsap, f_bfree, (u_int64_t)hfs_freeblks(hfsmp, 0)); 7386 VFSATTR_RETURN(fsap, f_bavail, (u_int64_t)hfs_freeblks(hfsmp, 1)); 7387 VFSATTR_RETURN(fsap, f_bsize, (u_int32_t)vcb->blockSize); 7388 /* XXX needs clarification */ 7389 VFSATTR_RETURN(fsap, f_bused, hfsmp->totalBlocks - hfs_freeblks(hfsmp, 1)); 7390 /* Maximum files is constrained by total blocks. */ 7391 VFSATTR_RETURN(fsap, f_files, (u_int64_t)(hfsmp->totalBlocks - 2)); 7392 VFSATTR_RETURN(fsap, f_ffree, MIN((u_int64_t)freeCNIDs, (u_int64_t)hfs_freeblks(hfsmp, 1))); 7393 7394 fsap->f_fsid.val[0] = hfsmp->hfs_raw_dev; 7395 fsap->f_fsid.val[1] = vfs_typenum(mp); 7396 VFSATTR_SET_SUPPORTED(fsap, f_fsid); 7397 7398 VFSATTR_RETURN(fsap, f_signature, vcb->vcbSigWord); 7399 VFSATTR_RETURN(fsap, f_carbon_fsid, 0); 7400 7401 if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) { 7402 vol_capabilities_attr_t *cap; 7403 7404 cap = &fsap->f_capabilities; 7405 7406 if (hfsmp->hfs_flags & HFS_STANDARD) { 7407 cap->capabilities[VOL_CAPABILITIES_FORMAT] = 7408 VOL_CAP_FMT_PERSISTENTOBJECTIDS | 7409 VOL_CAP_FMT_CASE_PRESERVING | 7410 VOL_CAP_FMT_FAST_STATFS | 7411 VOL_CAP_FMT_HIDDEN_FILES | 7412 VOL_CAP_FMT_PATH_FROM_ID; 7413 } else { 7414 cap->capabilities[VOL_CAPABILITIES_FORMAT] = 7415 VOL_CAP_FMT_PERSISTENTOBJECTIDS | 7416 VOL_CAP_FMT_SYMBOLICLINKS | 7417 VOL_CAP_FMT_HARDLINKS | 7418 VOL_CAP_FMT_JOURNAL | 7419 VOL_CAP_FMT_ZERO_RUNS | 7420 (hfsmp->jnl ? VOL_CAP_FMT_JOURNAL_ACTIVE : 0) | 7421 (hfsmp->hfs_flags & HFS_CASE_SENSITIVE ? VOL_CAP_FMT_CASE_SENSITIVE : 0) | 7422 VOL_CAP_FMT_CASE_PRESERVING | 7423 VOL_CAP_FMT_FAST_STATFS | 7424 VOL_CAP_FMT_2TB_FILESIZE | 7425 VOL_CAP_FMT_HIDDEN_FILES | 7426#if HFS_COMPRESSION 7427 VOL_CAP_FMT_PATH_FROM_ID | 7428 VOL_CAP_FMT_DECMPFS_COMPRESSION; 7429#else 7430 VOL_CAP_FMT_PATH_FROM_ID; 7431#endif 7432 } 7433 cap->capabilities[VOL_CAPABILITIES_INTERFACES] = 7434 VOL_CAP_INT_SEARCHFS | 7435 VOL_CAP_INT_ATTRLIST | 7436 VOL_CAP_INT_NFSEXPORT | 7437 VOL_CAP_INT_READDIRATTR | 7438 VOL_CAP_INT_EXCHANGEDATA | 7439 VOL_CAP_INT_ALLOCATE | 7440 VOL_CAP_INT_VOL_RENAME | 7441 VOL_CAP_INT_ADVLOCK | 7442 VOL_CAP_INT_FLOCK | 7443#if NAMEDSTREAMS 7444 VOL_CAP_INT_EXTENDED_ATTR | 7445 VOL_CAP_INT_NAMEDSTREAMS; 7446#else 7447 VOL_CAP_INT_EXTENDED_ATTR; 7448#endif 7449 cap->capabilities[VOL_CAPABILITIES_RESERVED1] = 0; 7450 cap->capabilities[VOL_CAPABILITIES_RESERVED2] = 0; 7451 7452 cap->valid[VOL_CAPABILITIES_FORMAT] = 7453 VOL_CAP_FMT_PERSISTENTOBJECTIDS | 7454 VOL_CAP_FMT_SYMBOLICLINKS | 7455 VOL_CAP_FMT_HARDLINKS | 7456 VOL_CAP_FMT_JOURNAL | 7457 VOL_CAP_FMT_JOURNAL_ACTIVE | 7458 VOL_CAP_FMT_NO_ROOT_TIMES | 7459 VOL_CAP_FMT_SPARSE_FILES | 7460 VOL_CAP_FMT_ZERO_RUNS | 7461 VOL_CAP_FMT_CASE_SENSITIVE | 7462 VOL_CAP_FMT_CASE_PRESERVING | 7463 VOL_CAP_FMT_FAST_STATFS | 7464 VOL_CAP_FMT_2TB_FILESIZE | 7465 VOL_CAP_FMT_OPENDENYMODES | 7466 VOL_CAP_FMT_HIDDEN_FILES | 7467#if HFS_COMPRESSION 7468 VOL_CAP_FMT_PATH_FROM_ID | 7469 VOL_CAP_FMT_DECMPFS_COMPRESSION; 7470#else 7471 VOL_CAP_FMT_PATH_FROM_ID; 7472#endif 7473 cap->valid[VOL_CAPABILITIES_INTERFACES] = 7474 VOL_CAP_INT_SEARCHFS | 7475 VOL_CAP_INT_ATTRLIST | 7476 VOL_CAP_INT_NFSEXPORT | 7477 VOL_CAP_INT_READDIRATTR | 7478 VOL_CAP_INT_EXCHANGEDATA | 7479 VOL_CAP_INT_COPYFILE | 7480 VOL_CAP_INT_ALLOCATE | 7481 VOL_CAP_INT_VOL_RENAME | 7482 VOL_CAP_INT_ADVLOCK | 7483 VOL_CAP_INT_FLOCK | 7484 VOL_CAP_INT_MANLOCK | 7485#if NAMEDSTREAMS 7486 VOL_CAP_INT_EXTENDED_ATTR | 7487 VOL_CAP_INT_NAMEDSTREAMS; 7488#else 7489 VOL_CAP_INT_EXTENDED_ATTR; 7490#endif 7491 cap->valid[VOL_CAPABILITIES_RESERVED1] = 0; 7492 cap->valid[VOL_CAPABILITIES_RESERVED2] = 0; 7493 VFSATTR_SET_SUPPORTED(fsap, f_capabilities); 7494 } 7495 if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) { 7496 vol_attributes_attr_t *attrp = &fsap->f_attributes; 7497 7498 attrp->validattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK; 7499 attrp->validattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO; 7500 attrp->validattr.dirattr = ATTR_DIR_VALIDMASK; 7501 attrp->validattr.fileattr = HFS_ATTR_FILE_VALIDMASK; 7502 attrp->validattr.forkattr = 0; 7503 7504 attrp->nativeattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK; 7505 attrp->nativeattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO; 7506 attrp->nativeattr.dirattr = ATTR_DIR_VALIDMASK; 7507 attrp->nativeattr.fileattr = HFS_ATTR_FILE_VALIDMASK; 7508 attrp->nativeattr.forkattr = 0; 7509 VFSATTR_SET_SUPPORTED(fsap, f_attributes); 7510 } 7511 fsap->f_create_time.tv_sec = hfsmp->hfs_itime; 7512 fsap->f_create_time.tv_nsec = 0; 7513 VFSATTR_SET_SUPPORTED(fsap, f_create_time); 7514 fsap->f_modify_time.tv_sec = hfsmp->vcbLsMod; 7515 fsap->f_modify_time.tv_nsec = 0; 7516 VFSATTR_SET_SUPPORTED(fsap, f_modify_time); 7517 7518 fsap->f_backup_time.tv_sec = hfsmp->vcbVolBkUp; 7519 fsap->f_backup_time.tv_nsec = 0; 7520 VFSATTR_SET_SUPPORTED(fsap, f_backup_time); 7521 if (VFSATTR_IS_ACTIVE(fsap, f_fssubtype)) { 7522 u_int16_t subtype = 0; 7523 7524 /* 7525 * Subtypes (flavors) for HFS 7526 * 0: Mac OS Extended 7527 * 1: Mac OS Extended (Journaled) 7528 * 2: Mac OS Extended (Case Sensitive) 7529 * 3: Mac OS Extended (Case Sensitive, Journaled) 7530 * 4 - 127: Reserved 7531 * 128: Mac OS Standard 7532 * 7533 */ 7534 if (hfsmp->hfs_flags & HFS_STANDARD) { 7535 subtype = HFS_SUBTYPE_STANDARDHFS; 7536 } else /* HFS Plus */ { 7537 if (hfsmp->jnl) 7538 subtype |= HFS_SUBTYPE_JOURNALED; 7539 if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE) 7540 subtype |= HFS_SUBTYPE_CASESENSITIVE; 7541 } 7542 fsap->f_fssubtype = subtype; 7543 VFSATTR_SET_SUPPORTED(fsap, f_fssubtype); 7544 } 7545 7546 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) { 7547 strlcpy(fsap->f_vol_name, (char *) hfsmp->vcbVN, MAXPATHLEN); 7548 VFSATTR_SET_SUPPORTED(fsap, f_vol_name); 7549 } 7550 if (VFSATTR_IS_ACTIVE(fsap, f_uuid)) { 7551 hfs_getvoluuid(hfsmp, fsap->f_uuid); 7552 VFSATTR_SET_SUPPORTED(fsap, f_uuid); 7553 } 7554 return (0); 7555} 7556 7557/* 7558 * Perform a volume rename. Requires the FS' root vp. 7559 */ 7560static int 7561hfs_rename_volume(struct vnode *vp, const char *name, proc_t p) 7562{ 7563 ExtendedVCB *vcb = VTOVCB(vp); 7564 struct cnode *cp = VTOC(vp); 7565 struct hfsmount *hfsmp = VTOHFS(vp); 7566 struct cat_desc to_desc; 7567 struct cat_desc todir_desc; 7568 struct cat_desc new_desc; 7569 cat_cookie_t cookie; 7570 int lockflags; 7571 int error = 0; 7572 char converted_volname[256]; 7573 size_t volname_length = 0; 7574 size_t conv_volname_length = 0; 7575 7576 7577 /* 7578 * Ignore attempts to rename a volume to a zero-length name. 7579 */ 7580 if (name[0] == 0) 7581 return(0); 7582 7583 bzero(&to_desc, sizeof(to_desc)); 7584 bzero(&todir_desc, sizeof(todir_desc)); 7585 bzero(&new_desc, sizeof(new_desc)); 7586 bzero(&cookie, sizeof(cookie)); 7587 7588 todir_desc.cd_parentcnid = kHFSRootParentID; 7589 todir_desc.cd_cnid = kHFSRootFolderID; 7590 todir_desc.cd_flags = CD_ISDIR; 7591 7592 to_desc.cd_nameptr = (const u_int8_t *)name; 7593 to_desc.cd_namelen = strlen(name); 7594 to_desc.cd_parentcnid = kHFSRootParentID; 7595 to_desc.cd_cnid = cp->c_cnid; 7596 to_desc.cd_flags = CD_ISDIR; 7597 7598 if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK)) == 0) { 7599 if ((error = hfs_start_transaction(hfsmp)) == 0) { 7600 if ((error = cat_preflight(hfsmp, CAT_RENAME, &cookie, p)) == 0) { 7601 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); 7602 7603 error = cat_rename(hfsmp, &cp->c_desc, &todir_desc, &to_desc, &new_desc); 7604 7605 /* 7606 * If successful, update the name in the VCB, ensure it's terminated. 7607 */ 7608 if (!error) { 7609 strlcpy((char *)vcb->vcbVN, name, sizeof(vcb->vcbVN)); 7610 volname_length = strlen ((const char*)vcb->vcbVN); 7611#define DKIOCCSSETLVNAME _IOW('d', 198, char[256]) 7612 /* Send the volume name down to CoreStorage if necessary */ 7613 error = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED); 7614 if (error == 0) { 7615 (void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current()); 7616 } 7617 error = 0; 7618 } 7619 7620 hfs_systemfile_unlock(hfsmp, lockflags); 7621 cat_postflight(hfsmp, &cookie, p); 7622 7623 if (error) 7624 MarkVCBDirty(vcb); 7625 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); 7626 } 7627 hfs_end_transaction(hfsmp); 7628 } 7629 if (!error) { 7630 /* Release old allocated name buffer */ 7631 if (cp->c_desc.cd_flags & CD_HASBUF) { 7632 const char *tmp_name = (const char *)cp->c_desc.cd_nameptr; 7633 7634 cp->c_desc.cd_nameptr = 0; 7635 cp->c_desc.cd_namelen = 0; 7636 cp->c_desc.cd_flags &= ~CD_HASBUF; 7637 vfs_removename(tmp_name); 7638 } 7639 /* Update cnode's catalog descriptor */ 7640 replace_desc(cp, &new_desc); 7641 vcb->volumeNameEncodingHint = new_desc.cd_encoding; 7642 cp->c_touch_chgtime = TRUE; 7643 } 7644 7645 hfs_unlock(cp); 7646 } 7647 7648 return(error); 7649} 7650 7651/* 7652 * Get file system attributes. 7653 */ 7654static int 7655hfs_vfs_setattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context) 7656{ 7657 kauth_cred_t cred = vfs_context_ucred(context); 7658 int error = 0; 7659 7660 /* 7661 * Must be superuser or owner of filesystem to change volume attributes 7662 */ 7663 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(mp)->f_owner)) 7664 return(EACCES); 7665 7666 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) { 7667 vnode_t root_vp; 7668 7669 error = hfs_vfs_root(mp, &root_vp, context); 7670 if (error) 7671 goto out; 7672 7673 error = hfs_rename_volume(root_vp, fsap->f_vol_name, vfs_context_proc(context)); 7674 (void) vnode_put(root_vp); 7675 if (error) 7676 goto out; 7677 7678 VFSATTR_SET_SUPPORTED(fsap, f_vol_name); 7679 } 7680 7681out: 7682 return error; 7683} 7684 7685/* If a runtime corruption is detected, set the volume inconsistent 7686 * bit in the volume attributes. The volume inconsistent bit is a persistent 7687 * bit which represents that the volume is corrupt and needs repair. 7688 * The volume inconsistent bit can be set from the kernel when it detects 7689 * runtime corruption or from file system repair utilities like fsck_hfs when 7690 * a repair operation fails. The bit should be cleared only from file system 7691 * verify/repair utility like fsck_hfs when a verify/repair succeeds. 7692 */ 7693void hfs_mark_volume_inconsistent(struct hfsmount *hfsmp) 7694{ 7695 HFS_MOUNT_LOCK(hfsmp, TRUE); 7696 if ((hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) == 0) { 7697 hfsmp->vcbAtrb |= kHFSVolumeInconsistentMask; 7698 MarkVCBDirty(hfsmp); 7699 } 7700 if ((hfsmp->hfs_flags & HFS_READ_ONLY)==0) { 7701 /* Log information to ASL log */ 7702 fslog_fs_corrupt(hfsmp->hfs_mp); 7703 printf("hfs: Runtime corruption detected on %s, fsck will be forced on next mount.\n", hfsmp->vcbVN); 7704 } 7705 HFS_MOUNT_UNLOCK(hfsmp, TRUE); 7706} 7707 7708/* Replay the journal on the device node provided. Returns zero if 7709 * journal replay succeeded or no journal was supposed to be replayed. 7710 */ 7711static int hfs_journal_replay(vnode_t devvp, vfs_context_t context) 7712{ 7713 int retval = 0; 7714 int error = 0; 7715 struct mount *mp = NULL; 7716 struct hfs_mount_args *args = NULL; 7717 7718 /* Replay allowed only on raw devices */ 7719 if (!vnode_ischr(devvp) && !vnode_isblk(devvp)) { 7720 retval = EINVAL; 7721 goto out; 7722 } 7723 7724 /* Create dummy mount structures */ 7725 MALLOC(mp, struct mount *, sizeof(struct mount), M_TEMP, M_WAITOK); 7726 if (mp == NULL) { 7727 retval = ENOMEM; 7728 goto out; 7729 } 7730 bzero(mp, sizeof(struct mount)); 7731 mount_lock_init(mp); 7732 7733 MALLOC(args, struct hfs_mount_args *, sizeof(struct hfs_mount_args), M_TEMP, M_WAITOK); 7734 if (args == NULL) { 7735 retval = ENOMEM; 7736 goto out; 7737 } 7738 bzero(args, sizeof(struct hfs_mount_args)); 7739 7740 retval = hfs_mountfs(devvp, mp, args, 1, context); 7741 buf_flushdirtyblks(devvp, TRUE, 0, "hfs_journal_replay"); 7742 7743 /* FSYNC the devnode to be sure all data has been flushed */ 7744 error = VNOP_FSYNC(devvp, MNT_WAIT, context); 7745 if (error) { 7746 retval = error; 7747 } 7748 7749out: 7750 if (mp) { 7751 mount_lock_destroy(mp); 7752 FREE(mp, M_TEMP); 7753 } 7754 if (args) { 7755 FREE(args, M_TEMP); 7756 } 7757 return retval; 7758} 7759 7760/* 7761 * hfs vfs operations. 7762 */ 7763struct vfsops hfs_vfsops = { 7764 hfs_mount, 7765 hfs_start, 7766 hfs_unmount, 7767 hfs_vfs_root, 7768 hfs_quotactl, 7769 hfs_vfs_getattr, /* was hfs_statfs */ 7770 hfs_sync, 7771 hfs_vfs_vget, 7772 hfs_fhtovp, 7773 hfs_vptofh, 7774 hfs_init, 7775 hfs_sysctl, 7776 hfs_vfs_setattr, 7777 {NULL} 7778}; 7779