1/*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*	@(#)hfs_vfsutils.c	4.0
29*
30*	(c) 1997-2002 Apple Computer, Inc.  All Rights Reserved
31*
32*	hfs_vfsutils.c -- Routines that go between the HFS layer and the VFS.
33*
34*/
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/malloc.h>
39#include <sys/stat.h>
40#include <sys/mount.h>
41#include <sys/mount_internal.h>
42#include <sys/buf.h>
43#include <sys/buf_internal.h>
44#include <sys/ubc.h>
45#include <sys/unistd.h>
46#include <sys/utfconv.h>
47#include <sys/kauth.h>
48#include <sys/fcntl.h>
49#include <sys/fsctl.h>
50#include <sys/vnode_internal.h>
51#include <kern/clock.h>
52
53#include <libkern/OSAtomic.h>
54
55#include "hfs.h"
56#include "hfs_catalog.h"
57#include "hfs_dbg.h"
58#include "hfs_mount.h"
59#include "hfs_endian.h"
60#include "hfs_cnode.h"
61#include "hfs_fsctl.h"
62
63#include "hfscommon/headers/FileMgrInternal.h"
64#include "hfscommon/headers/BTreesInternal.h"
65#include "hfscommon/headers/HFSUnicodeWrappers.h"
66
67static void ReleaseMetaFileVNode(struct vnode *vp);
68static int  hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args);
69
70static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *);
71
72#define HFS_MOUNT_DEBUG 1
73
74
75//*******************************************************************************
76// Note: Finder information in the HFS/HFS+ metadata are considered opaque and
77//       hence are not in the right byte order on little endian machines. It is
78//       the responsibility of the finder and other clients to swap the data.
79//*******************************************************************************
80
81//*******************************************************************************
82//	Routine:	hfs_MountHFSVolume
83//
84//
85//*******************************************************************************
86unsigned char hfs_catname[] = "Catalog B-tree";
87unsigned char hfs_extname[] = "Extents B-tree";
88unsigned char hfs_vbmname[] = "Volume Bitmap";
89unsigned char hfs_attrname[] = "Attribute B-tree";
90unsigned char hfs_startupname[] = "Startup File";
91
92
93OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
94		__unused struct proc *p)
95{
96	ExtendedVCB *vcb = HFSTOVCB(hfsmp);
97	int error;
98	ByteCount utf8chars;
99	struct cat_desc cndesc;
100	struct cat_attr cnattr;
101	struct cat_fork fork;
102	int newvnode_flags = 0;
103
104	/* Block size must be a multiple of 512 */
105	if (SWAP_BE32(mdb->drAlBlkSiz) == 0 ||
106	    (SWAP_BE32(mdb->drAlBlkSiz) & 0x01FF) != 0)
107		return (EINVAL);
108
109	/* don't mount a writeable volume if its dirty, it must be cleaned by fsck_hfs */
110	if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
111	    ((SWAP_BE16(mdb->drAtrb) & kHFSVolumeUnmountedMask) == 0)) {
112		return (EINVAL);
113	}
114	hfsmp->hfs_flags |= HFS_STANDARD;
115	/*
116	 * The MDB seems OK: transfer info from it into VCB
117	 * Note - the VCB starts out clear (all zeros)
118	 *
119	 */
120	vcb->vcbSigWord		= SWAP_BE16 (mdb->drSigWord);
121	vcb->hfs_itime		= to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drCrDate)));
122	vcb->localCreateDate	= SWAP_BE32 (mdb->drCrDate);
123	vcb->vcbLsMod		= to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drLsMod)));
124	vcb->vcbAtrb		= SWAP_BE16 (mdb->drAtrb);
125	vcb->vcbNmFls		= SWAP_BE16 (mdb->drNmFls);
126	vcb->vcbVBMSt		= SWAP_BE16 (mdb->drVBMSt);
127	vcb->nextAllocation	= SWAP_BE16 (mdb->drAllocPtr);
128	vcb->totalBlocks	= SWAP_BE16 (mdb->drNmAlBlks);
129	vcb->allocLimit		= vcb->totalBlocks;
130	vcb->blockSize		= SWAP_BE32 (mdb->drAlBlkSiz);
131	vcb->vcbClpSiz		= SWAP_BE32 (mdb->drClpSiz);
132	vcb->vcbAlBlSt		= SWAP_BE16 (mdb->drAlBlSt);
133	vcb->vcbNxtCNID		= SWAP_BE32 (mdb->drNxtCNID);
134	vcb->freeBlocks		= SWAP_BE16 (mdb->drFreeBks);
135	vcb->vcbVolBkUp		= to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drVolBkUp)));
136	vcb->vcbWrCnt		= SWAP_BE32 (mdb->drWrCnt);
137	vcb->vcbNmRtDirs	= SWAP_BE16 (mdb->drNmRtDirs);
138	vcb->vcbFilCnt		= SWAP_BE32 (mdb->drFilCnt);
139	vcb->vcbDirCnt		= SWAP_BE32 (mdb->drDirCnt);
140	bcopy(mdb->drFndrInfo, vcb->vcbFndrInfo, sizeof(vcb->vcbFndrInfo));
141	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
142		vcb->vcbWrCnt++;	/* Compensate for write of MDB on last flush */
143
144	/* convert hfs encoded name into UTF-8 string */
145	error = hfs_to_utf8(vcb, mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
146	/*
147	 * When an HFS name cannot be encoded with the current
148	 * volume encoding we use MacRoman as a fallback.
149	 */
150	if (error || (utf8chars == 0)) {
151		error = mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
152		/* If we fail to encode to UTF8 from Mac Roman, the name is bad.  Deny the mount */
153		if (error) {
154			goto MtVolErr;
155		}
156	}
157
158	hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
159	vcb->vcbVBMIOSize = kHFSBlockSize;
160
161	hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
162	                                          hfsmp->hfs_logical_block_count);
163
164	bzero(&cndesc, sizeof(cndesc));
165	cndesc.cd_parentcnid = kHFSRootParentID;
166	cndesc.cd_flags |= CD_ISMETA;
167	bzero(&cnattr, sizeof(cnattr));
168	cnattr.ca_linkcount = 1;
169	cnattr.ca_mode = S_IFREG;
170	bzero(&fork, sizeof(fork));
171
172	/*
173	 * Set up Extents B-tree vnode
174	 */
175	cndesc.cd_nameptr = hfs_extname;
176	cndesc.cd_namelen = strlen((char *)hfs_extname);
177	cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
178	fork.cf_size = SWAP_BE32(mdb->drXTFlSize);
179	fork.cf_blocks = fork.cf_size / vcb->blockSize;
180	fork.cf_clump = SWAP_BE32(mdb->drXTClpSiz);
181	fork.cf_vblocks = 0;
182	fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drXTExtRec[0].startBlock);
183	fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drXTExtRec[0].blockCount);
184	fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drXTExtRec[1].startBlock);
185	fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drXTExtRec[1].blockCount);
186	fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drXTExtRec[2].startBlock);
187	fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drXTExtRec[2].blockCount);
188	cnattr.ca_blocks = fork.cf_blocks;
189
190	error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
191	                        &hfsmp->hfs_extents_vp, &newvnode_flags);
192	if (error) {
193		if (HFS_MOUNT_DEBUG) {
194			printf("hfs_mounthfs (std): error creating Ext Vnode (%d) \n", error);
195		}
196		goto MtVolErr;
197	}
198	error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
199	                                 (KeyCompareProcPtr)CompareExtentKeys));
200	if (error) {
201		if (HFS_MOUNT_DEBUG) {
202			printf("hfs_mounthfs (std): error opening Ext Vnode (%d) \n", error);
203		}
204		hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
205		goto MtVolErr;
206	}
207	hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
208
209	/*
210	 * Set up Catalog B-tree vnode...
211	 */
212	cndesc.cd_nameptr = hfs_catname;
213	cndesc.cd_namelen = strlen((char *)hfs_catname);
214	cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
215	fork.cf_size = SWAP_BE32(mdb->drCTFlSize);
216	fork.cf_blocks = fork.cf_size / vcb->blockSize;
217	fork.cf_clump = SWAP_BE32(mdb->drCTClpSiz);
218	fork.cf_vblocks = 0;
219	fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drCTExtRec[0].startBlock);
220	fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drCTExtRec[0].blockCount);
221	fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drCTExtRec[1].startBlock);
222	fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drCTExtRec[1].blockCount);
223	fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drCTExtRec[2].startBlock);
224	fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drCTExtRec[2].blockCount);
225	cnattr.ca_blocks = fork.cf_blocks;
226
227	error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
228	                        &hfsmp->hfs_catalog_vp, &newvnode_flags);
229	if (error) {
230		if (HFS_MOUNT_DEBUG) {
231			printf("hfs_mounthfs (std): error creating catalog Vnode (%d) \n", error);
232		}
233		hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
234		goto MtVolErr;
235	}
236	error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
237	                                 (KeyCompareProcPtr)CompareCatalogKeys));
238	if (error) {
239		if (HFS_MOUNT_DEBUG) {
240			printf("hfs_mounthfs (std): error opening catalog Vnode (%d) \n", error);
241		}
242		hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
243		hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
244		goto MtVolErr;
245	}
246	hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
247
248	/*
249	 * Set up dummy Allocation file vnode (used only for locking bitmap)
250	 */
251	cndesc.cd_nameptr = hfs_vbmname;
252	cndesc.cd_namelen = strlen((char *)hfs_vbmname);
253	cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
254	bzero(&fork, sizeof(fork));
255	cnattr.ca_blocks = 0;
256
257	error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
258	                         &hfsmp->hfs_allocation_vp, &newvnode_flags);
259	if (error) {
260		if (HFS_MOUNT_DEBUG) {
261			printf("hfs_mounthfs (std): error creating bitmap Vnode (%d) \n", error);
262		}
263		hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
264		hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
265		goto MtVolErr;
266	}
267	hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
268
269      	/* mark the volume dirty (clear clean unmount bit) */
270	vcb->vcbAtrb &=	~kHFSVolumeUnmountedMask;
271
272    if (error == noErr) {
273		error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, NULL, NULL, NULL);
274		if (HFS_MOUNT_DEBUG) {
275			printf("hfs_mounthfs (std): error looking up root folder (%d) \n", error);
276		}
277	}
278
279    if (error == noErr) {
280		/* If the disk isn't write protected.. */
281        if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask)) {
282            MarkVCBDirty (vcb); //	mark VCB dirty so it will be written
283		}
284	}
285
286	/*
287	 * all done with system files so we can unlock now...
288	 */
289	hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
290	hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
291	hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
292
293	if (error == noErr) {
294		/* If successful, then we can just return once we've unlocked the cnodes */
295		return error;
296	}
297
298    //--	Release any resources allocated so far before exiting with an error:
299MtVolErr:
300	hfsUnmount(hfsmp, NULL);
301
302    return (error);
303}
304
305//*******************************************************************************
306//	Routine:	hfs_MountHFSPlusVolume
307//
308//
309//*******************************************************************************
310
311OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
312	off_t embeddedOffset, u_int64_t disksize, __unused struct proc *p, void *args, kauth_cred_t cred)
313{
314	register ExtendedVCB *vcb;
315	struct cat_desc cndesc;
316	struct cat_attr cnattr;
317	struct cat_fork cfork;
318	u_int32_t blockSize;
319	daddr64_t spare_sectors;
320	struct BTreeInfoRec btinfo;
321	u_int16_t  signature;
322	u_int16_t  hfs_version;
323	int newvnode_flags = 0;
324	int  i;
325	OSErr retval;
326	char converted_volname[256];
327	size_t volname_length = 0;
328	size_t conv_volname_length = 0;
329
330	signature = SWAP_BE16(vhp->signature);
331	hfs_version = SWAP_BE16(vhp->version);
332
333	if (signature == kHFSPlusSigWord) {
334		if (hfs_version != kHFSPlusVersion) {
335			printf("hfs_mount: invalid HFS+ version: %d\n", hfs_version);
336			return (EINVAL);
337		}
338	} else if (signature == kHFSXSigWord) {
339		if (hfs_version != kHFSXVersion) {
340			printf("hfs_mount: invalid HFSX version: %d\n", hfs_version);
341			return (EINVAL);
342		}
343		/* The in-memory signature is always 'H+'. */
344		signature = kHFSPlusSigWord;
345		hfsmp->hfs_flags |= HFS_X;
346	} else {
347		/* Removed printf for invalid HFS+ signature because it gives
348		 * false error for UFS root volume
349		 */
350		if (HFS_MOUNT_DEBUG) {
351			printf("hfs_mounthfsplus: unknown Volume Signature\n");
352		}
353		return (EINVAL);
354	}
355
356	/* Block size must be at least 512 and a power of 2 */
357	blockSize = SWAP_BE32(vhp->blockSize);
358	if (blockSize < 512 || !powerof2(blockSize)) {
359		if (HFS_MOUNT_DEBUG) {
360			printf("hfs_mounthfsplus: invalid blocksize (%d) \n", blockSize);
361		}
362		return (EINVAL);
363	}
364
365	/* don't mount a writable volume if its dirty, it must be cleaned by fsck_hfs */
366	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0 && hfsmp->jnl == NULL &&
367	    (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0) {
368		if (HFS_MOUNT_DEBUG) {
369			printf("hfs_mounthfsplus: cannot mount dirty non-journaled volumes\n");
370		}
371		return (EINVAL);
372	}
373
374	/* Make sure we can live with the physical block size. */
375	if ((disksize & (hfsmp->hfs_logical_block_size - 1)) ||
376	    (embeddedOffset & (hfsmp->hfs_logical_block_size - 1)) ||
377	    (blockSize < hfsmp->hfs_logical_block_size)) {
378		if (HFS_MOUNT_DEBUG) {
379			printf("hfs_mounthfsplus: invalid physical blocksize (%d), hfs_logical_blocksize (%d) \n",
380					blockSize, hfsmp->hfs_logical_block_size);
381		}
382		return (ENXIO);
383	}
384
385	/* If allocation block size is less than the physical
386	 * block size, we assume that the physical block size
387	 * is same as logical block size.  The physical block
388	 * size value is used to round down the offsets for
389	 * reading and writing the primary and alternate volume
390	 * headers at physical block boundary and will cause
391	 * problems if it is less than the block size.
392	 */
393	if (blockSize < hfsmp->hfs_physical_block_size) {
394		hfsmp->hfs_physical_block_size = hfsmp->hfs_logical_block_size;
395		hfsmp->hfs_log_per_phys = 1;
396	}
397
398	/*
399	 * The VolumeHeader seems OK: transfer info from it into VCB
400	 * Note - the VCB starts out clear (all zeros)
401	 */
402	vcb = HFSTOVCB(hfsmp);
403
404	vcb->vcbSigWord	= signature;
405	vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
406	vcb->vcbLsMod	= to_bsd_time(SWAP_BE32(vhp->modifyDate));
407	vcb->vcbAtrb	= SWAP_BE32(vhp->attributes);
408	vcb->vcbClpSiz	= SWAP_BE32(vhp->rsrcClumpSize);
409	vcb->vcbNxtCNID	= SWAP_BE32(vhp->nextCatalogID);
410	vcb->vcbVolBkUp	= to_bsd_time(SWAP_BE32(vhp->backupDate));
411	vcb->vcbWrCnt	= SWAP_BE32(vhp->writeCount);
412	vcb->vcbFilCnt	= SWAP_BE32(vhp->fileCount);
413	vcb->vcbDirCnt	= SWAP_BE32(vhp->folderCount);
414
415	/* copy 32 bytes of Finder info */
416	bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
417
418	vcb->vcbAlBlSt = 0;		/* hfs+ allocation blocks start at first block of volume */
419	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
420		vcb->vcbWrCnt++;	/* compensate for write of Volume Header on last flush */
421
422	/* Now fill in the Extended VCB info */
423	vcb->nextAllocation	= SWAP_BE32(vhp->nextAllocation);
424	vcb->totalBlocks	= SWAP_BE32(vhp->totalBlocks);
425	vcb->allocLimit		= vcb->totalBlocks;
426	vcb->freeBlocks		= SWAP_BE32(vhp->freeBlocks);
427	vcb->blockSize		= blockSize;
428	vcb->encodingsBitmap	= SWAP_BE64(vhp->encodingsBitmap);
429	vcb->localCreateDate	= SWAP_BE32(vhp->createDate);
430
431	vcb->hfsPlusIOPosOffset	= embeddedOffset;
432
433	/* Default to no free block reserve */
434	vcb->reserveBlocks = 0;
435
436	/*
437	 * Update the logical block size in the mount struct
438	 * (currently set up from the wrapper MDB) using the
439	 * new blocksize value:
440	 */
441	hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
442	vcb->vcbVBMIOSize = min(vcb->blockSize, MAXPHYSIO);
443
444	/*
445	 * Validate and initialize the location of the alternate volume header.
446	 */
447	spare_sectors = hfsmp->hfs_logical_block_count -
448	                (((daddr64_t)vcb->totalBlocks * blockSize) /
449	                   hfsmp->hfs_logical_block_size);
450
451	if (spare_sectors > (daddr64_t)(blockSize / hfsmp->hfs_logical_block_size)) {
452		hfsmp->hfs_alt_id_sector = 0;  /* partition has grown! */
453	} else {
454		hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
455					   HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
456							  hfsmp->hfs_logical_block_count);
457	}
458
459	bzero(&cndesc, sizeof(cndesc));
460	cndesc.cd_parentcnid = kHFSRootParentID;
461	cndesc.cd_flags |= CD_ISMETA;
462	bzero(&cnattr, sizeof(cnattr));
463	cnattr.ca_linkcount = 1;
464	cnattr.ca_mode = S_IFREG;
465
466	/*
467	 * Set up Extents B-tree vnode
468	 */
469	cndesc.cd_nameptr = hfs_extname;
470	cndesc.cd_namelen = strlen((char *)hfs_extname);
471	cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
472
473	cfork.cf_size    = SWAP_BE64 (vhp->extentsFile.logicalSize);
474	cfork.cf_new_size= 0;
475	cfork.cf_clump   = SWAP_BE32 (vhp->extentsFile.clumpSize);
476	cfork.cf_blocks  = SWAP_BE32 (vhp->extentsFile.totalBlocks);
477	cfork.cf_vblocks = 0;
478	cnattr.ca_blocks = cfork.cf_blocks;
479	for (i = 0; i < kHFSPlusExtentDensity; i++) {
480		cfork.cf_extents[i].startBlock =
481				SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
482		cfork.cf_extents[i].blockCount =
483				SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
484	}
485	retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
486	                         &hfsmp->hfs_extents_vp, &newvnode_flags);
487	if (retval)
488	{
489		if (HFS_MOUNT_DEBUG) {
490			printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting extentoverflow BT\n", retval);
491		}
492		goto ErrorExit;
493	}
494	hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
495	hfs_unlock(hfsmp->hfs_extents_cp);
496
497	retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
498	                                  (KeyCompareProcPtr) CompareExtentKeysPlus));
499	if (retval)
500	{
501		if (HFS_MOUNT_DEBUG) {
502			printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting extentoverflow BT\n", retval);
503		}
504		goto ErrorExit;
505	}
506	/*
507	 * Set up Catalog B-tree vnode
508	 */
509	cndesc.cd_nameptr = hfs_catname;
510	cndesc.cd_namelen = strlen((char *)hfs_catname);
511	cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
512
513	cfork.cf_size    = SWAP_BE64 (vhp->catalogFile.logicalSize);
514	cfork.cf_clump   = SWAP_BE32 (vhp->catalogFile.clumpSize);
515	cfork.cf_blocks  = SWAP_BE32 (vhp->catalogFile.totalBlocks);
516	cfork.cf_vblocks = 0;
517	cnattr.ca_blocks = cfork.cf_blocks;
518	for (i = 0; i < kHFSPlusExtentDensity; i++) {
519		cfork.cf_extents[i].startBlock =
520				SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
521		cfork.cf_extents[i].blockCount =
522				SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
523	}
524	retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
525	                         &hfsmp->hfs_catalog_vp, &newvnode_flags);
526	if (retval) {
527		if (HFS_MOUNT_DEBUG) {
528			printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting catalog BT\n", retval);
529		}
530		goto ErrorExit;
531	}
532	hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
533	hfs_unlock(hfsmp->hfs_catalog_cp);
534
535	retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
536	                                  (KeyCompareProcPtr) CompareExtendedCatalogKeys));
537	if (retval) {
538		if (HFS_MOUNT_DEBUG) {
539			printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting catalog BT\n", retval);
540		}
541		goto ErrorExit;
542	}
543	if ((hfsmp->hfs_flags & HFS_X) &&
544	    BTGetInformation(VTOF(hfsmp->hfs_catalog_vp), 0, &btinfo) == 0) {
545		if (btinfo.keyCompareType == kHFSBinaryCompare) {
546			hfsmp->hfs_flags |= HFS_CASE_SENSITIVE;
547			/* Install a case-sensitive key compare */
548			(void) BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
549			                  (KeyCompareProcPtr)cat_binarykeycompare);
550		}
551	}
552
553	/*
554	 * Set up Allocation file vnode
555	 */
556	cndesc.cd_nameptr = hfs_vbmname;
557	cndesc.cd_namelen = strlen((char *)hfs_vbmname);
558	cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
559
560	cfork.cf_size    = SWAP_BE64 (vhp->allocationFile.logicalSize);
561	cfork.cf_clump   = SWAP_BE32 (vhp->allocationFile.clumpSize);
562	cfork.cf_blocks  = SWAP_BE32 (vhp->allocationFile.totalBlocks);
563	cfork.cf_vblocks = 0;
564	cnattr.ca_blocks = cfork.cf_blocks;
565	for (i = 0; i < kHFSPlusExtentDensity; i++) {
566		cfork.cf_extents[i].startBlock =
567				SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
568		cfork.cf_extents[i].blockCount =
569				SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
570	}
571	retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
572	                         &hfsmp->hfs_allocation_vp, &newvnode_flags);
573	if (retval) {
574		if (HFS_MOUNT_DEBUG) {
575			printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting bitmap\n", retval);
576		}
577		goto ErrorExit;
578	}
579	hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
580	hfs_unlock(hfsmp->hfs_allocation_cp);
581
582	/*
583	 * Set up Attribute B-tree vnode
584	 */
585	if (vhp->attributesFile.totalBlocks != 0) {
586		cndesc.cd_nameptr = hfs_attrname;
587		cndesc.cd_namelen = strlen((char *)hfs_attrname);
588		cndesc.cd_cnid = cnattr.ca_fileid = kHFSAttributesFileID;
589
590		cfork.cf_size    = SWAP_BE64 (vhp->attributesFile.logicalSize);
591		cfork.cf_clump   = SWAP_BE32 (vhp->attributesFile.clumpSize);
592		cfork.cf_blocks  = SWAP_BE32 (vhp->attributesFile.totalBlocks);
593		cfork.cf_vblocks = 0;
594		cnattr.ca_blocks = cfork.cf_blocks;
595		for (i = 0; i < kHFSPlusExtentDensity; i++) {
596			cfork.cf_extents[i].startBlock =
597					SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
598			cfork.cf_extents[i].blockCount =
599					SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
600		}
601		retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
602					 &hfsmp->hfs_attribute_vp, &newvnode_flags);
603		if (retval) {
604			if (HFS_MOUNT_DEBUG) {
605				printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting EA BT\n", retval);
606			}
607			goto ErrorExit;
608		}
609		hfsmp->hfs_attribute_cp = VTOC(hfsmp->hfs_attribute_vp);
610		hfs_unlock(hfsmp->hfs_attribute_cp);
611		retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_attribute_vp),
612						  (KeyCompareProcPtr) hfs_attrkeycompare));
613		if (retval) {
614			if (HFS_MOUNT_DEBUG) {
615				printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting EA BT\n", retval);
616			}
617			goto ErrorExit;
618		}
619
620		/* Initialize vnode for virtual attribute data file that spans the
621		 * entire file system space for performing I/O to attribute btree
622		 * We hold iocount on the attrdata vnode for the entire duration
623		 * of mount (similar to btree vnodes)
624		 */
625		retval = init_attrdata_vnode(hfsmp);
626		if (retval) {
627			if (HFS_MOUNT_DEBUG) {
628				printf("hfs_mounthfsplus: init_attrdata_vnode returned (%d) for virtual EA file\n", retval);
629			}
630			goto ErrorExit;
631		}
632	}
633
634	/*
635	 * Set up Startup file vnode
636	 */
637	if (vhp->startupFile.totalBlocks != 0) {
638		cndesc.cd_nameptr = hfs_startupname;
639		cndesc.cd_namelen = strlen((char *)hfs_startupname);
640		cndesc.cd_cnid = cnattr.ca_fileid = kHFSStartupFileID;
641
642		cfork.cf_size    = SWAP_BE64 (vhp->startupFile.logicalSize);
643		cfork.cf_clump   = SWAP_BE32 (vhp->startupFile.clumpSize);
644		cfork.cf_blocks  = SWAP_BE32 (vhp->startupFile.totalBlocks);
645		cfork.cf_vblocks = 0;
646		cnattr.ca_blocks = cfork.cf_blocks;
647		for (i = 0; i < kHFSPlusExtentDensity; i++) {
648			cfork.cf_extents[i].startBlock =
649					SWAP_BE32 (vhp->startupFile.extents[i].startBlock);
650			cfork.cf_extents[i].blockCount =
651					SWAP_BE32 (vhp->startupFile.extents[i].blockCount);
652		}
653		retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
654					 &hfsmp->hfs_startup_vp, &newvnode_flags);
655		if (retval) {
656			if (HFS_MOUNT_DEBUG) {
657				printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting startup file\n", retval);
658			}
659			goto ErrorExit;
660		}
661		hfsmp->hfs_startup_cp = VTOC(hfsmp->hfs_startup_vp);
662		hfs_unlock(hfsmp->hfs_startup_cp);
663	}
664
665	/* Pick up volume name and create date */
666	retval = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, &cndesc, &cnattr, NULL);
667	if (retval) {
668		if (HFS_MOUNT_DEBUG) {
669			printf("hfs_mounthfsplus: cat_idlookup returned (%d) getting rootfolder \n", retval);
670		}
671		goto ErrorExit;
672	}
673	vcb->hfs_itime = cnattr.ca_itime;
674	vcb->volumeNameEncodingHint = cndesc.cd_encoding;
675	bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
676	volname_length = strlen ((const char*)vcb->vcbVN);
677	cat_releasedesc(&cndesc);
678
679#define DKIOCCSSETLVNAME _IOW('d', 198, char[256])
680
681
682	/* Send the volume name down to CoreStorage if necessary */
683	retval = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
684	if (retval == 0) {
685		(void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
686	}
687
688	/* reset retval == 0. we don't care about errors in volname conversion */
689	retval = 0;
690
691	/* mark the volume dirty (clear clean unmount bit) */
692	vcb->vcbAtrb &=	~kHFSVolumeUnmountedMask;
693	if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
694		hfs_flushvolumeheader(hfsmp, TRUE, 0);
695	}
696
697	/* kHFSHasFolderCount is only supported/updated on HFSX volumes */
698	if ((hfsmp->hfs_flags & HFS_X) != 0) {
699		hfsmp->hfs_flags |= HFS_FOLDERCOUNT;
700	}
701
702	//
703	// Check if we need to do late journal initialization.  This only
704	// happens if a previous version of MacOS X (or 9) touched the disk.
705	// In that case hfs_late_journal_init() will go re-locate the journal
706	// and journal_info_block files and validate that they're still kosher.
707	//
708	if (   (vcb->vcbAtrb & kHFSVolumeJournaledMask)
709		&& (SWAP_BE32(vhp->lastMountedVersion) != kHFSJMountVersion)
710		&& (hfsmp->jnl == NULL)) {
711
712		retval = hfs_late_journal_init(hfsmp, vhp, args);
713		if (retval != 0) {
714			if (retval == EROFS) {
715				// EROFS is a special error code that means the volume has an external
716				// journal which we couldn't find.  in that case we do not want to
717				// rewrite the volume header - we'll just refuse to mount the volume.
718				if (HFS_MOUNT_DEBUG) {
719					printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d), maybe an external jnl?\n", retval);
720				}
721				retval = EINVAL;
722				goto ErrorExit;
723			}
724
725			hfsmp->jnl = NULL;
726
727			// if the journal failed to open, then set the lastMountedVersion
728			// to be "FSK!" which fsck_hfs will see and force the fsck instead
729			// of just bailing out because the volume is journaled.
730			if (!(hfsmp->hfs_flags & HFS_READ_ONLY)) {
731				HFSPlusVolumeHeader *jvhp;
732				daddr64_t mdb_offset;
733				struct buf *bp = NULL;
734
735				hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
736
737				mdb_offset = (daddr64_t)((embeddedOffset / blockSize) + HFS_PRI_SECTOR(blockSize));
738
739				bp = NULL;
740				retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
741						HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
742						hfsmp->hfs_physical_block_size, cred, &bp);
743				if (retval == 0) {
744					jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
745
746					if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
747						printf ("hfs(3): Journal replay fail.  Writing lastMountVersion as FSK!\n");
748						jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
749					   	buf_bwrite(bp);
750					} else {
751						buf_brelse(bp);
752					}
753					bp = NULL;
754				} else if (bp) {
755					buf_brelse(bp);
756					// clear this so the error exit path won't try to use it
757					bp = NULL;
758			    }
759			}
760
761			if (HFS_MOUNT_DEBUG) {
762				printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d)\n", retval);
763			}
764			retval = EINVAL;
765			goto ErrorExit;
766		} else if (hfsmp->jnl) {
767			vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
768		}
769	} else if (hfsmp->jnl || ((vcb->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY))) {
770		struct cat_attr jinfo_attr, jnl_attr;
771
772		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
773		    vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
774		}
775
776		// if we're here we need to fill in the fileid's for the
777		// journal and journal_info_block.
778		hfsmp->hfs_jnlinfoblkid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jinfo_attr, NULL);
779		hfsmp->hfs_jnlfileid    = GetFileInfo(vcb, kRootDirID, ".journal", &jnl_attr, NULL);
780		if (hfsmp->hfs_jnlinfoblkid == 0 || hfsmp->hfs_jnlfileid == 0) {
781			printf("hfs: danger! couldn't find the file-id's for the journal or journal_info_block\n");
782			printf("hfs: jnlfileid %d, jnlinfoblkid %d\n", hfsmp->hfs_jnlfileid, hfsmp->hfs_jnlinfoblkid);
783		}
784
785		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
786		    vcb->vcbAtrb |= kHFSVolumeJournaledMask;
787		}
788
789		if (hfsmp->jnl == NULL) {
790		    vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
791		}
792	}
793
794	/*
795	 * Establish a metadata allocation zone.
796	 */
797	hfs_metadatazone_init(hfsmp, false);
798
799	/*
800	 * Make any metadata zone adjustments.
801	 */
802	if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
803		/* Keep the roving allocator out of the metadata zone. */
804		if (vcb->nextAllocation >= hfsmp->hfs_metazone_start &&
805		    vcb->nextAllocation <= hfsmp->hfs_metazone_end) {
806			HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
807		}
808	} else {
809		if (vcb->nextAllocation <= 1) {
810			vcb->nextAllocation = hfsmp->hfs_min_alloc_start;
811		}
812	}
813	vcb->sparseAllocation = hfsmp->hfs_min_alloc_start;
814
815	/* Setup private/hidden directories for hardlinks. */
816	hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
817	hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
818
819	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
820		hfs_remove_orphans(hfsmp);
821
822	/* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
823	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
824	{
825		retval = hfs_erase_unused_nodes(hfsmp);
826		if (retval) {
827			if (HFS_MOUNT_DEBUG) {
828				printf("hfs_mounthfsplus: hfs_erase_unused_nodes returned (%d) for %s \n", retval, hfsmp->vcbVN);
829			}
830
831			goto ErrorExit;
832		}
833	}
834
835	if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) )	// if the disk is not write protected
836	{
837		MarkVCBDirty( vcb );	// mark VCB dirty so it will be written
838	}
839
840	/*
841	 * Allow hot file clustering if conditions allow.
842	 */
843	if ((hfsmp->hfs_flags & HFS_METADATA_ZONE)  &&
844	    ((hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_SSD)) == 0)) {
845		(void) hfs_recording_init(hfsmp);
846	}
847
848	/* Force ACLs on HFS+ file systems. */
849	vfs_setextendedsecurity(HFSTOVFS(hfsmp));
850
851	/* Enable extent-based extended attributes by default */
852	hfsmp->hfs_flags |= HFS_XATTR_EXTENTS;
853
854	/* See if this volume should have per-file content protection enabled */
855	if (vcb->vcbAtrb & kHFSContentProtectionMask) {
856		vfs_setflags (hfsmp->hfs_mp, MNT_CPROTECT);
857	}
858
859	return (0);
860
861ErrorExit:
862	/*
863	 * A fatal error occurred and the volume cannot be mounted, so
864	 * release any resources that we acquired...
865	 */
866	hfsUnmount(hfsmp, NULL);
867
868	if (HFS_MOUNT_DEBUG) {
869		printf("hfs_mounthfsplus: encountered errorr (%d)\n", retval);
870	}
871	return (retval);
872}
873
874
875/*
876 * ReleaseMetaFileVNode
877 *
878 * vp	L - -
879 */
880static void ReleaseMetaFileVNode(struct vnode *vp)
881{
882	struct filefork *fp;
883
884	if (vp && (fp = VTOF(vp))) {
885		if (fp->fcbBTCBPtr != NULL) {
886			(void)hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
887			(void) BTClosePath(fp);
888			hfs_unlock(VTOC(vp));
889		}
890
891		/* release the node even if BTClosePath fails */
892		vnode_recycle(vp);
893		vnode_put(vp);
894	}
895}
896
897
898/*************************************************************
899*
900* Unmounts a hfs volume.
901*	At this point vflush() has been called (to dump all non-metadata files)
902*
903*************************************************************/
904
905int
906hfsUnmount( register struct hfsmount *hfsmp, __unused struct proc *p)
907{
908	/* Get rid of our attribute data vnode (if any).  This is done
909	 * after the vflush() during mount, so we don't need to worry
910	 * about any locks.
911	 */
912	if (hfsmp->hfs_attrdata_vp) {
913		ReleaseMetaFileVNode(hfsmp->hfs_attrdata_vp);
914		hfsmp->hfs_attrdata_vp = NULLVP;
915	}
916
917	if (hfsmp->hfs_startup_vp) {
918		ReleaseMetaFileVNode(hfsmp->hfs_startup_vp);
919		hfsmp->hfs_startup_cp = NULL;
920		hfsmp->hfs_startup_vp = NULL;
921	}
922
923	if (hfsmp->hfs_attribute_vp) {
924		ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
925		hfsmp->hfs_attribute_cp = NULL;
926		hfsmp->hfs_attribute_vp = NULL;
927	}
928
929	if (hfsmp->hfs_catalog_vp) {
930		ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
931		hfsmp->hfs_catalog_cp = NULL;
932		hfsmp->hfs_catalog_vp = NULL;
933	}
934
935	if (hfsmp->hfs_extents_vp) {
936		ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
937		hfsmp->hfs_extents_cp = NULL;
938		hfsmp->hfs_extents_vp = NULL;
939	}
940
941	if (hfsmp->hfs_allocation_vp) {
942		ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
943		hfsmp->hfs_allocation_cp = NULL;
944		hfsmp->hfs_allocation_vp = NULL;
945	}
946
947	return (0);
948}
949
950
951/*
952 * Test if fork has overflow extents.
953 */
954__private_extern__
955int
956overflow_extents(struct filefork *fp)
957{
958	u_int32_t blocks;
959
960	//
961	// If the vnode pointer is NULL then we're being called
962	// from hfs_remove_orphans() with a faked-up filefork
963	// and therefore it has to be an HFS+ volume.  Otherwise
964	// we check through the volume header to see what type
965	// of volume we're on.
966        //
967	if (FTOV(fp) == NULL || VTOVCB(FTOV(fp))->vcbSigWord == kHFSPlusSigWord) {
968		if (fp->ff_extents[7].blockCount == 0)
969			return (0);
970
971		blocks = fp->ff_extents[0].blockCount +
972		         fp->ff_extents[1].blockCount +
973		         fp->ff_extents[2].blockCount +
974		         fp->ff_extents[3].blockCount +
975		         fp->ff_extents[4].blockCount +
976		         fp->ff_extents[5].blockCount +
977		         fp->ff_extents[6].blockCount +
978		         fp->ff_extents[7].blockCount;
979	} else {
980		if (fp->ff_extents[2].blockCount == 0)
981			return false;
982
983		blocks = fp->ff_extents[0].blockCount +
984		         fp->ff_extents[1].blockCount +
985		         fp->ff_extents[2].blockCount;
986	  }
987
988	return (fp->ff_blocks > blocks);
989}
990
991/*
992 * Lock the HFS global journal lock
993 */
994int
995hfs_lock_global (struct hfsmount *hfsmp, enum hfslocktype locktype) {
996
997	void *thread = current_thread();
998
999	if (hfsmp->hfs_global_lockowner == thread) {
1000		panic ("hfs_lock_global: locking against myself!");
1001	}
1002
1003    /* HFS_SHARED_LOCK */
1004	if (locktype == HFS_SHARED_LOCK) {
1005		lck_rw_lock_shared (&hfsmp->hfs_global_lock);
1006		hfsmp->hfs_global_lockowner = HFS_SHARED_OWNER;
1007	}
1008    /* HFS_EXCLUSIVE_LOCK */
1009	else {
1010		lck_rw_lock_exclusive (&hfsmp->hfs_global_lock);
1011		hfsmp->hfs_global_lockowner = thread;
1012	}
1013
1014	return 0;
1015}
1016
1017
1018/*
1019 * Unlock the HFS global journal lock
1020 */
1021void
1022hfs_unlock_global (struct hfsmount *hfsmp) {
1023
1024	void *thread = current_thread();
1025
1026    /* HFS_LOCK_EXCLUSIVE */
1027	if (hfsmp->hfs_global_lockowner == thread) {
1028		hfsmp->hfs_global_lockowner = NULL;
1029		lck_rw_unlock_exclusive (&hfsmp->hfs_global_lock);
1030	}
1031    /* HFS_LOCK_SHARED */
1032	else {
1033		lck_rw_unlock_shared (&hfsmp->hfs_global_lock);
1034	}
1035}
1036
1037
1038/*
1039 * Lock HFS system file(s).
1040 */
1041int
1042hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype)
1043{
1044	/*
1045	 * Locking order is Catalog file, Attributes file, Startup file, Bitmap file, Extents file
1046	 */
1047	if (flags & SFL_CATALOG) {
1048
1049#ifdef HFS_CHECK_LOCK_ORDER
1050		if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) {
1051			panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)");
1052		}
1053		if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1054			panic("hfs_systemfile_lock: bad lock order (Startup before Catalog)");
1055		}
1056		if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1057			panic("hfs_systemfile_lock: bad lock order (Extents before Catalog)");
1058		}
1059#endif /* HFS_CHECK_LOCK_ORDER */
1060
1061		if (hfsmp->hfs_catalog_cp) {
1062			(void) hfs_lock(hfsmp->hfs_catalog_cp, locktype);
1063		} else {
1064			flags &= ~SFL_CATALOG;
1065		}
1066
1067		/*
1068		 * When the catalog file has overflow extents then
1069		 * also acquire the extents b-tree lock if its not
1070		 * already requested.
1071		 */
1072		if ((flags & SFL_EXTENTS) == 0 &&
1073		    overflow_extents(VTOF(hfsmp->hfs_catalog_vp))) {
1074			flags |= SFL_EXTENTS;
1075		}
1076	}
1077	if (flags & SFL_ATTRIBUTE) {
1078
1079#ifdef HFS_CHECK_LOCK_ORDER
1080		if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1081			panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)");
1082		}
1083		if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1084			panic("hfs_systemfile_lock: bad lock order (Extents before Attributes)");
1085		}
1086#endif /* HFS_CHECK_LOCK_ORDER */
1087
1088		if (hfsmp->hfs_attribute_cp) {
1089			(void) hfs_lock(hfsmp->hfs_attribute_cp, locktype);
1090			/*
1091			 * When the attribute file has overflow extents then
1092			 * also acquire the extents b-tree lock if its not
1093			 * already requested.
1094			 */
1095			if ((flags & SFL_EXTENTS) == 0 &&
1096			    overflow_extents(VTOF(hfsmp->hfs_attribute_vp))) {
1097				flags |= SFL_EXTENTS;
1098			}
1099		} else {
1100			flags &= ~SFL_ATTRIBUTE;
1101		}
1102	}
1103	if (flags & SFL_STARTUP) {
1104#ifdef HFS_CHECK_LOCK_ORDER
1105		if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1106			panic("hfs_systemfile_lock: bad lock order (Extents before Startup)");
1107		}
1108#endif /* HFS_CHECK_LOCK_ORDER */
1109
1110		if (hfsmp->hfs_startup_cp) {
1111			(void) hfs_lock(hfsmp->hfs_startup_cp, locktype);
1112		} else {
1113			flags &= ~SFL_STARTUP;
1114		}
1115
1116		/*
1117		 * When the startup file has overflow extents then
1118		 * also acquire the extents b-tree lock if its not
1119		 * already requested.
1120		 */
1121		if ((flags & SFL_EXTENTS) == 0 &&
1122		    overflow_extents(VTOF(hfsmp->hfs_startup_vp))) {
1123			flags |= SFL_EXTENTS;
1124		}
1125	}
1126	/*
1127	 * To prevent locks being taken in the wrong order, the extent lock
1128	 * gets a bitmap lock as well.
1129	 */
1130	if (flags & (SFL_BITMAP | SFL_EXTENTS)) {
1131		/*
1132		 * If there's no bitmap cnode, ignore the bitmap lock.
1133		 */
1134		if (hfsmp->hfs_allocation_cp == NULL) {
1135			flags &= ~SFL_BITMAP;
1136		} else {
1137			(void) hfs_lock(hfsmp->hfs_allocation_cp, HFS_EXCLUSIVE_LOCK);
1138			/*
1139			 * The bitmap lock is also grabbed when only extent lock
1140			 * was requested. Set the bitmap lock bit in the lock
1141			 * flags which callers will use during unlock.
1142			 */
1143			flags |= SFL_BITMAP;
1144		}
1145	}
1146	if (flags & SFL_EXTENTS) {
1147		/*
1148		 * Since the extents btree lock is recursive we always
1149		 * need exclusive access.
1150		 */
1151		if (hfsmp->hfs_extents_cp) {
1152			(void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK);
1153		} else {
1154			flags &= ~SFL_EXTENTS;
1155		}
1156	}
1157	return (flags);
1158}
1159
1160/*
1161 * unlock HFS system file(s).
1162 */
1163void
1164hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags)
1165{
1166	struct timeval tv;
1167	u_int32_t lastfsync;
1168	int numOfLockedBuffs;
1169
1170	if (hfsmp->jnl == NULL) {
1171		microuptime(&tv);
1172		lastfsync = tv.tv_sec;
1173	}
1174	if (flags & SFL_STARTUP && hfsmp->hfs_startup_cp) {
1175		hfs_unlock(hfsmp->hfs_startup_cp);
1176	}
1177	if (flags & SFL_ATTRIBUTE && hfsmp->hfs_attribute_cp) {
1178		if (hfsmp->jnl == NULL) {
1179			BTGetLastSync((FCB*)VTOF(hfsmp->hfs_attribute_vp), &lastfsync);
1180			numOfLockedBuffs = count_lock_queue();
1181			if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1182			    ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1183			      kMaxSecsForFsync))) {
1184				hfs_btsync(hfsmp->hfs_attribute_vp, HFS_SYNCTRANS);
1185			}
1186		}
1187		hfs_unlock(hfsmp->hfs_attribute_cp);
1188	}
1189	if (flags & SFL_CATALOG && hfsmp->hfs_catalog_cp) {
1190		if (hfsmp->jnl == NULL) {
1191			BTGetLastSync((FCB*)VTOF(hfsmp->hfs_catalog_vp), &lastfsync);
1192			numOfLockedBuffs = count_lock_queue();
1193			if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1194			    ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1195			      kMaxSecsForFsync))) {
1196				hfs_btsync(hfsmp->hfs_catalog_vp, HFS_SYNCTRANS);
1197			}
1198		}
1199		hfs_unlock(hfsmp->hfs_catalog_cp);
1200	}
1201	if (flags & SFL_BITMAP && hfsmp->hfs_allocation_cp) {
1202		hfs_unlock(hfsmp->hfs_allocation_cp);
1203	}
1204	if (flags & SFL_EXTENTS && hfsmp->hfs_extents_cp) {
1205		if (hfsmp->jnl == NULL) {
1206			BTGetLastSync((FCB*)VTOF(hfsmp->hfs_extents_vp), &lastfsync);
1207			numOfLockedBuffs = count_lock_queue();
1208			if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1209			    ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1210			      kMaxSecsForFsync))) {
1211				hfs_btsync(hfsmp->hfs_extents_vp, HFS_SYNCTRANS);
1212			}
1213		}
1214		hfs_unlock(hfsmp->hfs_extents_cp);
1215	}
1216}
1217
1218
1219/*
1220 * RequireFileLock
1221 *
1222 * Check to see if a vnode is locked in the current context
1223 * This is to be used for debugging purposes only!!
1224 */
1225#if HFS_DIAGNOSTIC
1226void RequireFileLock(FileReference vp, int shareable)
1227{
1228	int locked;
1229
1230	/* The extents btree and allocation bitmap are always exclusive. */
1231	if (VTOC(vp)->c_fileid == kHFSExtentsFileID ||
1232	    VTOC(vp)->c_fileid == kHFSAllocationFileID) {
1233		shareable = 0;
1234	}
1235
1236	locked = VTOC(vp)->c_lockowner == (void *)current_thread();
1237
1238	if (!locked && !shareable) {
1239		switch (VTOC(vp)->c_fileid) {
1240		case kHFSExtentsFileID:
1241			panic("hfs: extents btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1242			break;
1243		case kHFSCatalogFileID:
1244			panic("hfs: catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1245			break;
1246		case kHFSAllocationFileID:
1247			/* The allocation file can hide behind the jornal lock. */
1248			if (VTOHFS(vp)->jnl == NULL)
1249				panic("hfs: allocation file not locked! v: 0x%08X\n #\n", (u_int)vp);
1250			break;
1251		case kHFSStartupFileID:
1252			panic("hfs: startup file not locked! v: 0x%08X\n #\n", (u_int)vp);
1253		case kHFSAttributesFileID:
1254			panic("hfs: attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1255			break;
1256		}
1257	}
1258}
1259#endif
1260
1261
1262/*
1263 * There are three ways to qualify for ownership rights on an object:
1264 *
1265 * 1. (a) Your UID matches the cnode's UID.
1266 *    (b) The object in question is owned by "unknown"
1267 * 2. (a) Permissions on the filesystem are being ignored and
1268 *        your UID matches the replacement UID.
1269 *    (b) Permissions on the filesystem are being ignored and
1270 *        the replacement UID is "unknown".
1271 * 3. You are root.
1272 *
1273 */
1274int
1275hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred,
1276		__unused struct proc *p, int invokesuperuserstatus)
1277{
1278	if ((kauth_cred_getuid(cred) == cnode_uid) ||                                    /* [1a] */
1279	    (cnode_uid == UNKNOWNUID) ||  									  /* [1b] */
1280	    ((((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) &&          /* [2] */
1281	      ((kauth_cred_getuid(cred) == hfsmp->hfs_uid) ||                            /* [2a] */
1282	        (hfsmp->hfs_uid == UNKNOWNUID))) ||                           /* [2b] */
1283	    (invokesuperuserstatus && (suser(cred, 0) == 0))) {    /* [3] */
1284		return (0);
1285	} else {
1286		return (EPERM);
1287	}
1288}
1289
1290
1291u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize,
1292                               u_int32_t blockSizeLimit,
1293                               u_int32_t baseMultiple) {
1294    /*
1295       Compute the optimal (largest) block size (no larger than allocationBlockSize) that is less than the
1296       specified limit but still an even multiple of the baseMultiple.
1297     */
1298    int baseBlockCount, blockCount;
1299    u_int32_t trialBlockSize;
1300
1301    if (allocationBlockSize % baseMultiple != 0) {
1302        /*
1303           Whoops: the allocation blocks aren't even multiples of the specified base:
1304           no amount of dividing them into even parts will be a multiple, either then!
1305        */
1306        return 512;		/* Hope for the best */
1307    };
1308
1309    /* Try the obvious winner first, to prevent 12K allocation blocks, for instance,
1310       from being handled as two 6K logical blocks instead of 3 4K logical blocks.
1311       Even though the former (the result of the loop below) is the larger allocation
1312       block size, the latter is more efficient: */
1313    if (allocationBlockSize % PAGE_SIZE == 0) return PAGE_SIZE;
1314
1315    /* No clear winner exists: pick the largest even fraction <= MAXBSIZE: */
1316    baseBlockCount = allocationBlockSize / baseMultiple;				/* Now guaranteed to be an even multiple */
1317
1318    for (blockCount = baseBlockCount; blockCount > 0; --blockCount) {
1319        trialBlockSize = blockCount * baseMultiple;
1320        if (allocationBlockSize % trialBlockSize == 0) {				/* An even multiple? */
1321            if ((trialBlockSize <= blockSizeLimit) &&
1322                (trialBlockSize % baseMultiple == 0)) {
1323                return trialBlockSize;
1324            };
1325        };
1326    };
1327
1328    /* Note: we should never get here, since blockCount = 1 should always work,
1329       but this is nice and safe and makes the compiler happy, too ... */
1330    return 512;
1331}
1332
1333
1334u_int32_t
1335GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name,
1336			struct cat_attr *fattr, struct cat_fork *forkinfo)
1337{
1338	struct hfsmount * hfsmp;
1339	struct cat_desc jdesc;
1340	int lockflags;
1341	int error;
1342
1343	if (vcb->vcbSigWord != kHFSPlusSigWord)
1344		return (0);
1345
1346	hfsmp = VCBTOHFS(vcb);
1347
1348	memset(&jdesc, 0, sizeof(struct cat_desc));
1349	jdesc.cd_parentcnid = kRootDirID;
1350	jdesc.cd_nameptr = (const u_int8_t *)name;
1351	jdesc.cd_namelen = strlen(name);
1352
1353	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1354	error = cat_lookup(hfsmp, &jdesc, 0, NULL, fattr, forkinfo, NULL);
1355	hfs_systemfile_unlock(hfsmp, lockflags);
1356
1357	if (error == 0) {
1358		return (fattr->ca_fileid);
1359	} else if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1360		return (0);
1361	}
1362
1363	return (0);	/* XXX what callers expect on an error */
1364}
1365
1366
1367/*
1368 * On HFS Plus Volumes, there can be orphaned files or directories
1369 * These are files or directories that were unlinked while busy.
1370 * If the volume was not cleanly unmounted then some of these may
1371 * have persisted and need to be removed.
1372 */
1373void
1374hfs_remove_orphans(struct hfsmount * hfsmp)
1375{
1376	struct BTreeIterator * iterator = NULL;
1377	struct FSBufferDescriptor btdata;
1378	struct HFSPlusCatalogFile filerec;
1379	struct HFSPlusCatalogKey * keyp;
1380	struct proc *p = current_proc();
1381	FCB *fcb;
1382	ExtendedVCB *vcb;
1383	char filename[32];
1384	char tempname[32];
1385	size_t namelen;
1386	cat_cookie_t cookie;
1387	int catlock = 0;
1388	int catreserve = 0;
1389	int started_tr = 0;
1390	int lockflags;
1391	int result;
1392	int orphaned_files = 0;
1393	int orphaned_dirs = 0;
1394
1395	bzero(&cookie, sizeof(cookie));
1396
1397	if (hfsmp->hfs_flags & HFS_CLEANED_ORPHANS)
1398		return;
1399
1400	vcb = HFSTOVCB(hfsmp);
1401	fcb = VTOF(hfsmp->hfs_catalog_vp);
1402
1403	btdata.bufferAddress = &filerec;
1404	btdata.itemSize = sizeof(filerec);
1405	btdata.itemCount = 1;
1406
1407	MALLOC(iterator, struct BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
1408	bzero(iterator, sizeof(*iterator));
1409
1410	/* Build a key to "temp" */
1411	keyp = (HFSPlusCatalogKey*)&iterator->key;
1412	keyp->parentID = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1413	keyp->nodeName.length = 4;  /* "temp" */
1414	keyp->keyLength = kHFSPlusCatalogKeyMinimumLength + keyp->nodeName.length * 2;
1415	keyp->nodeName.unicode[0] = 't';
1416	keyp->nodeName.unicode[1] = 'e';
1417	keyp->nodeName.unicode[2] = 'm';
1418	keyp->nodeName.unicode[3] = 'p';
1419
1420	/*
1421	 * Position the iterator just before the first real temp file/dir.
1422	 */
1423	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1424	(void) BTSearchRecord(fcb, iterator, NULL, NULL, iterator);
1425	hfs_systemfile_unlock(hfsmp, lockflags);
1426
1427	/* Visit all the temp files/dirs in the HFS+ private directory. */
1428	for (;;) {
1429		lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1430		result = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
1431		hfs_systemfile_unlock(hfsmp, lockflags);
1432		if (result)
1433			break;
1434		if (keyp->parentID != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)
1435			break;
1436
1437		(void) utf8_encodestr(keyp->nodeName.unicode, keyp->nodeName.length * 2,
1438		                      (u_int8_t *)filename, &namelen, sizeof(filename), 0, 0);
1439
1440		(void) snprintf(tempname, sizeof(tempname), "%s%d",
1441				HFS_DELETE_PREFIX, filerec.fileID);
1442
1443		/*
1444		 * Delete all files (and directories) named "tempxxx",
1445		 * where xxx is the file's cnid in decimal.
1446		 *
1447		 */
1448		if (bcmp(tempname, filename, namelen) == 0) {
1449   			struct filefork dfork;
1450    		struct filefork rfork;
1451  			struct cnode cnode;
1452			int mode = 0;
1453
1454			bzero(&dfork, sizeof(dfork));
1455			bzero(&rfork, sizeof(rfork));
1456			bzero(&cnode, sizeof(cnode));
1457
1458			/* Delete any attributes, ignore errors */
1459			(void) hfs_removeallattr(hfsmp, filerec.fileID);
1460
1461			if (hfs_start_transaction(hfsmp) != 0) {
1462			    printf("hfs_remove_orphans: failed to start transaction\n");
1463			    goto exit;
1464			}
1465			started_tr = 1;
1466
1467			/*
1468			 * Reserve some space in the Catalog file.
1469			 */
1470			if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) {
1471			    printf("hfs_remove_orphans: cat_preflight failed\n");
1472				goto exit;
1473			}
1474			catreserve = 1;
1475
1476			lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1477			catlock = 1;
1478
1479			/* Build a fake cnode */
1480			cat_convertattr(hfsmp, (CatalogRecord *)&filerec, &cnode.c_attr,
1481			                &dfork.ff_data, &rfork.ff_data);
1482			cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1483			cnode.c_desc.cd_nameptr = (const u_int8_t *)filename;
1484			cnode.c_desc.cd_namelen = namelen;
1485			cnode.c_desc.cd_cnid = cnode.c_attr.ca_fileid;
1486			cnode.c_blocks = dfork.ff_blocks + rfork.ff_blocks;
1487
1488			/* Position iterator at previous entry */
1489			if (BTIterateRecord(fcb, kBTreePrevRecord, iterator,
1490			    NULL, NULL) != 0) {
1491				break;
1492			}
1493
1494			/* Truncate the file to zero (both forks) */
1495			if (dfork.ff_blocks > 0) {
1496				u_int64_t fsize;
1497
1498				dfork.ff_cp = &cnode;
1499				cnode.c_datafork = &dfork;
1500				cnode.c_rsrcfork = NULL;
1501				fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize;
1502				while (fsize > 0) {
1503				    if (fsize > HFS_BIGFILE_SIZE && overflow_extents(&dfork)) {
1504						fsize -= HFS_BIGFILE_SIZE;
1505					} else {
1506						fsize = 0;
1507					}
1508
1509					if (TruncateFileC(vcb, (FCB*)&dfork, fsize, 1, 0,
1510									  cnode.c_attr.ca_fileid, false) != 0) {
1511						printf("hfs: error truncating data fork!\n");
1512
1513						break;
1514					}
1515
1516					//
1517					// if we're iteratively truncating this file down,
1518					// then end the transaction and start a new one so
1519					// that no one transaction gets too big.
1520					//
1521					if (fsize > 0 && started_tr) {
1522						/* Drop system file locks before starting
1523						 * another transaction to preserve lock order.
1524						 */
1525						hfs_systemfile_unlock(hfsmp, lockflags);
1526						catlock = 0;
1527						hfs_end_transaction(hfsmp);
1528
1529						if (hfs_start_transaction(hfsmp) != 0) {
1530							started_tr = 0;
1531							break;
1532						}
1533						lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1534						catlock = 1;
1535					}
1536				}
1537			}
1538
1539			if (rfork.ff_blocks > 0) {
1540				rfork.ff_cp = &cnode;
1541				cnode.c_datafork = NULL;
1542				cnode.c_rsrcfork = &rfork;
1543				if (TruncateFileC(vcb, (FCB*)&rfork, 0, 1, 1, cnode.c_attr.ca_fileid, false) != 0) {
1544					printf("hfs: error truncating rsrc fork!\n");
1545					break;
1546				}
1547			}
1548
1549			/* Remove the file or folder record from the Catalog */
1550			if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) {
1551				printf("hfs_remove_orphans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid);
1552				hfs_systemfile_unlock(hfsmp, lockflags);
1553				catlock = 0;
1554				hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1555				break;
1556			}
1557
1558			mode = cnode.c_attr.ca_mode & S_IFMT;
1559
1560			if (mode == S_IFDIR) {
1561				orphaned_dirs++;
1562			}
1563			else {
1564				orphaned_files++;
1565			}
1566
1567			/* Update parent and volume counts */
1568			hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
1569			if (mode == S_IFDIR) {
1570				DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
1571			}
1572
1573			(void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
1574			                 &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
1575
1576			/* Drop locks and end the transaction */
1577			hfs_systemfile_unlock(hfsmp, lockflags);
1578			cat_postflight(hfsmp, &cookie, p);
1579			catlock = catreserve = 0;
1580
1581			/*
1582			   Now that Catalog is unlocked, update the volume info, making
1583			   sure to differentiate between files and directories
1584			*/
1585			if (mode == S_IFDIR) {
1586				hfs_volupdate(hfsmp, VOL_RMDIR, 0);
1587			}
1588			else{
1589 				hfs_volupdate(hfsmp, VOL_RMFILE, 0);
1590			}
1591
1592			if (started_tr) {
1593				hfs_end_transaction(hfsmp);
1594				started_tr = 0;
1595			}
1596
1597		} /* end if */
1598	} /* end for */
1599	if (orphaned_files > 0 || orphaned_dirs > 0)
1600		printf("hfs: Removed %d orphaned / unlinked files and %d directories \n", orphaned_files, orphaned_dirs);
1601exit:
1602	if (catlock) {
1603		hfs_systemfile_unlock(hfsmp, lockflags);
1604	}
1605	if (catreserve) {
1606		cat_postflight(hfsmp, &cookie, p);
1607	}
1608	if (started_tr) {
1609		hfs_end_transaction(hfsmp);
1610	}
1611
1612	FREE(iterator, M_TEMP);
1613	hfsmp->hfs_flags |= HFS_CLEANED_ORPHANS;
1614}
1615
1616
1617/*
1618 * This will return the correct logical block size for a given vnode.
1619 * For most files, it is the allocation block size, for meta data like
1620 * BTrees, this is kept as part of the BTree private nodeSize
1621 */
1622u_int32_t
1623GetLogicalBlockSize(struct vnode *vp)
1624{
1625u_int32_t logBlockSize;
1626
1627	DBG_ASSERT(vp != NULL);
1628
1629	/* start with default */
1630	logBlockSize = VTOHFS(vp)->hfs_logBlockSize;
1631
1632	if (vnode_issystem(vp)) {
1633		if (VTOF(vp)->fcbBTCBPtr != NULL) {
1634			BTreeInfoRec			bTreeInfo;
1635
1636			/*
1637			 * We do not lock the BTrees, because if we are getting block..then the tree
1638			 * should be locked in the first place.
1639			 * We just want the nodeSize wich will NEVER change..so even if the world
1640			 * is changing..the nodeSize should remain the same. Which argues why lock
1641			 * it in the first place??
1642			 */
1643
1644			(void) BTGetInformation	(VTOF(vp), kBTreeInfoVersion, &bTreeInfo);
1645
1646			logBlockSize = bTreeInfo.nodeSize;
1647
1648		} else if (VTOC(vp)->c_fileid == kHFSAllocationFileID) {
1649				logBlockSize = VTOVCB(vp)->vcbVBMIOSize;
1650		}
1651	}
1652
1653	DBG_ASSERT(logBlockSize > 0);
1654
1655	return logBlockSize;
1656}
1657
1658u_int32_t
1659hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
1660{
1661	u_int32_t freeblks;
1662	u_int32_t rsrvblks;
1663	u_int32_t loanblks;
1664
1665	/*
1666	 * We don't bother taking the mount lock
1667	 * to look at these values since the values
1668	 * themselves are each updated atomically
1669	 * on aligned addresses.
1670	 */
1671	freeblks = hfsmp->freeBlocks;
1672	rsrvblks = hfsmp->reserveBlocks;
1673	loanblks = hfsmp->loanedBlocks;
1674	if (wantreserve) {
1675		if (freeblks > rsrvblks)
1676			freeblks -= rsrvblks;
1677		else
1678			freeblks = 0;
1679	}
1680	if (freeblks > loanblks)
1681		freeblks -= loanblks;
1682	else
1683		freeblks = 0;
1684
1685#if HFS_SPARSE_DEV
1686	/*
1687	 * When the underlying device is sparse, check the
1688	 * available space on the backing store volume.
1689	 */
1690	if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) {
1691		struct vfsstatfs *vfsp;  /* 272 bytes */
1692		u_int64_t vfreeblks;
1693		u_int32_t loanedblks;
1694		struct mount * backingfs_mp;
1695		struct timeval now;
1696
1697		backingfs_mp = vnode_mount(hfsmp->hfs_backingfs_rootvp);
1698
1699		microtime(&now);
1700		if ((now.tv_sec - hfsmp->hfs_last_backingstatfs) >= 1) {
1701		    vfs_update_vfsstat(backingfs_mp, vfs_context_kernel(), VFS_KERNEL_EVENT);
1702		    hfsmp->hfs_last_backingstatfs = now.tv_sec;
1703		}
1704
1705		if ((vfsp = vfs_statfs(backingfs_mp))) {
1706			HFS_MOUNT_LOCK(hfsmp, TRUE);
1707			vfreeblks = vfsp->f_bavail;
1708			/* Normalize block count if needed. */
1709			if (vfsp->f_bsize != hfsmp->blockSize) {
1710				vfreeblks = ((u_int64_t)vfreeblks * (u_int64_t)(vfsp->f_bsize)) / hfsmp->blockSize;
1711			}
1712			if (vfreeblks > (unsigned int)hfsmp->hfs_sparsebandblks)
1713				vfreeblks -= hfsmp->hfs_sparsebandblks;
1714			else
1715				vfreeblks = 0;
1716
1717			/* Take into account any delayed allocations. */
1718			loanedblks = 2 * hfsmp->loanedBlocks;
1719			if (vfreeblks > loanedblks)
1720				vfreeblks -= loanedblks;
1721			else
1722				vfreeblks = 0;
1723
1724			if (hfsmp->hfs_backingfs_maxblocks) {
1725				vfreeblks = MIN(vfreeblks, hfsmp->hfs_backingfs_maxblocks);
1726			}
1727			freeblks = MIN(vfreeblks, freeblks);
1728			HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1729		}
1730	}
1731#endif /* HFS_SPARSE_DEV */
1732
1733	return (freeblks);
1734}
1735
1736/*
1737 * Map HFS Common errors (negative) to BSD error codes (positive).
1738 * Positive errors (ie BSD errors) are passed through unchanged.
1739 */
1740short MacToVFSError(OSErr err)
1741{
1742	if (err >= 0)
1743        	return err;
1744
1745	switch (err) {
1746	case dskFulErr:			/*    -34 */
1747	case btNoSpaceAvail:		/* -32733 */
1748		return ENOSPC;
1749	case fxOvFlErr:			/* -32750 */
1750		return EOVERFLOW;
1751
1752	case btBadNode:			/* -32731 */
1753		return EIO;
1754
1755	case memFullErr:		/*  -108 */
1756		return ENOMEM;		/*   +12 */
1757
1758	case cmExists:			/* -32718 */
1759	case btExists:			/* -32734 */
1760		return EEXIST;		/*    +17 */
1761
1762	case cmNotFound:		/* -32719 */
1763	case btNotFound:		/* -32735 */
1764		return ENOENT;		/*     28 */
1765
1766	case cmNotEmpty:		/* -32717 */
1767		return ENOTEMPTY;	/*     66 */
1768
1769	case cmFThdDirErr:		/* -32714 */
1770		return EISDIR;		/*     21 */
1771
1772	case fxRangeErr:		/* -32751 */
1773		return ERANGE;
1774
1775	case bdNamErr:			/*   -37 */
1776		return ENAMETOOLONG;	/*    63 */
1777
1778	case paramErr:			/*   -50 */
1779	case fileBoundsErr:		/* -1309 */
1780		return EINVAL;		/*   +22 */
1781
1782	case fsBTBadNodeSize:
1783		return ENXIO;
1784
1785	default:
1786		return EIO;		/*   +5 */
1787	}
1788}
1789
1790
1791/*
1792 * Find the current thread's directory hint for a given index.
1793 *
1794 * Requires an exclusive lock on directory cnode.
1795 *
1796 * Use detach if the cnode lock must be dropped while the hint is still active.
1797 */
1798__private_extern__
1799directoryhint_t *
1800hfs_getdirhint(struct cnode *dcp, int index, int detach)
1801{
1802	struct timeval tv;
1803	directoryhint_t *hint;
1804	boolean_t need_remove, need_init;
1805	const u_int8_t * name;
1806
1807	microuptime(&tv);
1808
1809	/*
1810	 *  Look for an existing hint first.  If not found, create a new one (when
1811	 *  the list is not full) or recycle the oldest hint.  Since new hints are
1812	 *  always added to the head of the list, the last hint is always the
1813	 *  oldest.
1814	 */
1815	TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
1816		if (hint->dh_index == index)
1817			break;
1818	}
1819	if (hint != NULL) { /* found an existing hint */
1820		need_init = false;
1821		need_remove = true;
1822	} else { /* cannot find an existing hint */
1823		need_init = true;
1824		if (dcp->c_dirhintcnt < HFS_MAXDIRHINTS) { /* we don't need recycling */
1825			/* Create a default directory hint */
1826			MALLOC_ZONE(hint, directoryhint_t *, sizeof(directoryhint_t), M_HFSDIRHINT, M_WAITOK);
1827			++dcp->c_dirhintcnt;
1828			need_remove = false;
1829		} else {				/* recycle the last (i.e., the oldest) hint */
1830			hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead);
1831			if ((hint->dh_desc.cd_flags & CD_HASBUF) &&
1832			    (name = hint->dh_desc.cd_nameptr)) {
1833				hint->dh_desc.cd_nameptr = NULL;
1834				hint->dh_desc.cd_namelen = 0;
1835				hint->dh_desc.cd_flags &= ~CD_HASBUF;
1836				vfs_removename((const char *)name);
1837			}
1838			need_remove = true;
1839		}
1840	}
1841
1842	if (need_remove)
1843		TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
1844
1845	if (detach)
1846		--dcp->c_dirhintcnt;
1847	else
1848		TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
1849
1850	if (need_init) {
1851		hint->dh_index = index;
1852		hint->dh_desc.cd_flags = 0;
1853		hint->dh_desc.cd_encoding = 0;
1854		hint->dh_desc.cd_namelen = 0;
1855		hint->dh_desc.cd_nameptr = NULL;
1856		hint->dh_desc.cd_parentcnid = dcp->c_fileid;
1857		hint->dh_desc.cd_hint = dcp->c_childhint;
1858		hint->dh_desc.cd_cnid = 0;
1859	}
1860	hint->dh_time = tv.tv_sec;
1861	return (hint);
1862}
1863
1864/*
1865 * Release a single directory hint.
1866 *
1867 * Requires an exclusive lock on directory cnode.
1868 */
1869__private_extern__
1870void
1871hfs_reldirhint(struct cnode *dcp, directoryhint_t * relhint)
1872{
1873	const u_int8_t * name;
1874	directoryhint_t *hint;
1875
1876	/* Check if item is on list (could be detached) */
1877	TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
1878		if (hint == relhint) {
1879			TAILQ_REMOVE(&dcp->c_hintlist, relhint, dh_link);
1880			--dcp->c_dirhintcnt;
1881			break;
1882		}
1883	}
1884	name = relhint->dh_desc.cd_nameptr;
1885	if ((relhint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
1886		relhint->dh_desc.cd_nameptr = NULL;
1887		relhint->dh_desc.cd_namelen = 0;
1888		relhint->dh_desc.cd_flags &= ~CD_HASBUF;
1889		vfs_removename((const char *)name);
1890	}
1891	FREE_ZONE(relhint, sizeof(directoryhint_t), M_HFSDIRHINT);
1892}
1893
1894/*
1895 * Release directory hints for given directory
1896 *
1897 * Requires an exclusive lock on directory cnode.
1898 */
1899__private_extern__
1900void
1901hfs_reldirhints(struct cnode *dcp, int stale_hints_only)
1902{
1903	struct timeval tv;
1904	directoryhint_t *hint, *prev;
1905	const u_int8_t * name;
1906
1907	if (stale_hints_only)
1908		microuptime(&tv);
1909
1910	/* searching from the oldest to the newest, so we can stop early when releasing stale hints only */
1911	for (hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead); hint != NULL; hint = prev) {
1912		if (stale_hints_only && (tv.tv_sec - hint->dh_time) < HFS_DIRHINT_TTL)
1913			break;  /* stop here if this entry is too new */
1914		name = hint->dh_desc.cd_nameptr;
1915		if ((hint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
1916			hint->dh_desc.cd_nameptr = NULL;
1917			hint->dh_desc.cd_namelen = 0;
1918			hint->dh_desc.cd_flags &= ~CD_HASBUF;
1919			vfs_removename((const char *)name);
1920		}
1921		prev = TAILQ_PREV(hint, hfs_hinthead, dh_link); /* must save this pointer before calling FREE_ZONE on this node */
1922		TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
1923		FREE_ZONE(hint, sizeof(directoryhint_t), M_HFSDIRHINT);
1924		--dcp->c_dirhintcnt;
1925	}
1926}
1927
1928/*
1929 * Insert a detached directory hint back into the list of dirhints.
1930 *
1931 * Requires an exclusive lock on directory cnode.
1932 */
1933__private_extern__
1934void
1935hfs_insertdirhint(struct cnode *dcp, directoryhint_t * hint)
1936{
1937	directoryhint_t *test;
1938
1939	TAILQ_FOREACH(test, &dcp->c_hintlist, dh_link) {
1940		if (test == hint)
1941			panic("hfs_insertdirhint: hint %p already on list!", hint);
1942	}
1943
1944	TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
1945	++dcp->c_dirhintcnt;
1946}
1947
1948/*
1949 * Perform a case-insensitive compare of two UTF-8 filenames.
1950 *
1951 * Returns 0 if the strings match.
1952 */
1953__private_extern__
1954int
1955hfs_namecmp(const u_int8_t *str1, size_t len1, const u_int8_t *str2, size_t len2)
1956{
1957	u_int16_t *ustr1, *ustr2;
1958	size_t ulen1, ulen2;
1959	size_t maxbytes;
1960	int cmp = -1;
1961
1962	if (len1 != len2)
1963		return (cmp);
1964
1965	maxbytes = kHFSPlusMaxFileNameChars << 1;
1966	MALLOC(ustr1, u_int16_t *, maxbytes << 1, M_TEMP, M_WAITOK);
1967	ustr2 = ustr1 + (maxbytes >> 1);
1968
1969	if (utf8_decodestr(str1, len1, ustr1, &ulen1, maxbytes, ':', 0) != 0)
1970		goto out;
1971	if (utf8_decodestr(str2, len2, ustr2, &ulen2, maxbytes, ':', 0) != 0)
1972		goto out;
1973
1974	cmp = FastUnicodeCompare(ustr1, ulen1>>1, ustr2, ulen2>>1);
1975out:
1976	FREE(ustr1, M_TEMP);
1977	return (cmp);
1978}
1979
1980
1981typedef struct jopen_cb_info {
1982	off_t   jsize;
1983	char   *desired_uuid;
1984        struct  vnode *jvp;
1985	size_t  blksize;
1986	int     need_clean;
1987	int     need_init;
1988} jopen_cb_info;
1989
1990static int
1991journal_open_cb(const char *bsd_dev_name, const char *uuid_str, void *arg)
1992{
1993	struct nameidata nd;
1994	jopen_cb_info *ji = (jopen_cb_info *)arg;
1995	char bsd_name[256];
1996	int error;
1997
1998	strlcpy(&bsd_name[0], "/dev/", sizeof(bsd_name));
1999	strlcpy(&bsd_name[5], bsd_dev_name, sizeof(bsd_name)-5);
2000
2001	if (ji->desired_uuid && ji->desired_uuid[0] && strcmp(uuid_str, ji->desired_uuid) != 0) {
2002		return 1;   // keep iterating
2003	}
2004
2005	// if we're here, either the desired uuid matched or there was no
2006	// desired uuid so let's try to open the device for writing and
2007	// see if it works.  if it does, we'll use it.
2008
2009	NDINIT(&nd, LOOKUP, OP_LOOKUP, LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(bsd_name), vfs_context_kernel());
2010	if ((error = namei(&nd))) {
2011		printf("hfs: journal open cb: error %d looking up device %s (dev uuid %s)\n", error, bsd_name, uuid_str);
2012		return 1;   // keep iterating
2013	}
2014
2015	ji->jvp = nd.ni_vp;
2016	nameidone(&nd);
2017
2018	if (ji->jvp == NULL) {
2019		printf("hfs: journal open cb: did not find %s (error %d)\n", bsd_name, error);
2020	} else {
2021		error = VNOP_OPEN(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
2022		if (error == 0) {
2023			// if the journal is dirty and we didn't specify a desired
2024			// journal device uuid, then do not use the journal.  but
2025			// if the journal is just invalid (e.g. it hasn't been
2026			// initialized) then just set the need_init flag.
2027			if (ji->need_clean && ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2028				error = journal_is_clean(ji->jvp, 0, ji->jsize, (void *)1, ji->blksize);
2029				if (error == EBUSY) {
2030					VNOP_CLOSE(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
2031					vnode_put(ji->jvp);
2032					ji->jvp = NULL;
2033					return 1;    // keep iterating
2034				} else if (error == EINVAL) {
2035					ji->need_init = 1;
2036				}
2037			}
2038
2039			if (ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2040				strlcpy(ji->desired_uuid, uuid_str, 128);
2041			}
2042			vnode_setmountedon(ji->jvp);
2043			// printf("hfs: journal open cb: got device %s (%s)\n", bsd_name, uuid_str);
2044			return 0;   // stop iterating
2045		} else {
2046			vnode_put(ji->jvp);
2047			ji->jvp = NULL;
2048		}
2049	}
2050
2051	return 1;   // keep iterating
2052}
2053
2054extern dev_t IOBSDGetMediaWithUUID(const char *uuid_cstring, char *bsd_name, int bsd_name_len, int timeout);
2055extern void IOBSDIterateMediaWithContent(const char *uuid_cstring, int (*func)(const char *bsd_dev_name, const char *uuid_str, void *arg), void *arg);
2056kern_return_t IOBSDGetPlatformSerialNumber(char *serial_number_str, u_int32_t len);
2057
2058
2059static vnode_t
2060open_journal_dev(const char *vol_device,
2061		 int need_clean,
2062		 char *uuid_str,
2063		 char *machine_serial_num,
2064		 off_t jsize,
2065		 size_t blksize,
2066		 int *need_init)
2067{
2068    int retry_counter=0;
2069    jopen_cb_info ji;
2070
2071    ji.jsize        = jsize;
2072    ji.desired_uuid = uuid_str;
2073    ji.jvp          = NULL;
2074    ji.blksize      = blksize;
2075    ji.need_clean   = need_clean;
2076    ji.need_init    = 0;
2077
2078//    if (uuid_str[0] == '\0') {
2079//	    printf("hfs: open journal dev: %s: locating any available non-dirty external journal partition\n", vol_device);
2080//    } else {
2081//	    printf("hfs: open journal dev: %s: trying to find the external journal partition w/uuid %s\n", vol_device, uuid_str);
2082//    }
2083    while (ji.jvp == NULL && retry_counter++ < 4) {
2084	    if (retry_counter > 1) {
2085		    if (uuid_str[0]) {
2086			    printf("hfs: open_journal_dev: uuid %s not found.  waiting 10sec.\n", uuid_str);
2087		    } else {
2088			    printf("hfs: open_journal_dev: no available external journal partition found.  waiting 10sec.\n");
2089		    }
2090		    delay_for_interval(10* 1000000, NSEC_PER_USEC);    // wait for ten seconds and then try again
2091	    }
2092
2093	    IOBSDIterateMediaWithContent(EXTJNL_CONTENT_TYPE_UUID, journal_open_cb, &ji);
2094    }
2095
2096    if (ji.jvp == NULL) {
2097	    printf("hfs: volume: %s: did not find jnl device uuid: %s from machine serial number: %s\n",
2098		   vol_device, uuid_str, machine_serial_num);
2099    }
2100
2101    *need_init = ji.need_init;
2102
2103    return ji.jvp;
2104}
2105
2106
2107int
2108hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
2109					   void *_args, off_t embeddedOffset, daddr64_t mdb_offset,
2110					   HFSMasterDirectoryBlock *mdbp, kauth_cred_t cred)
2111{
2112	JournalInfoBlock *jibp;
2113	struct buf       *jinfo_bp, *bp;
2114	int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2115	int               retval, write_jibp = 0;
2116	uint32_t		  blksize = hfsmp->hfs_logical_block_size;
2117	struct vnode     *devvp;
2118	struct hfs_mount_args *args = _args;
2119	u_int32_t	  jib_flags;
2120	u_int64_t	  jib_offset;
2121	u_int64_t	  jib_size;
2122	const char *dev_name;
2123
2124	devvp = hfsmp->hfs_devvp;
2125	dev_name = vnode_name(devvp);
2126	if (dev_name == NULL) {
2127		dev_name = "unknown-dev";
2128	}
2129
2130	if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2131		arg_flags  = args->journal_flags;
2132		arg_tbufsz = args->journal_tbuffer_size;
2133	}
2134
2135	sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / blksize;
2136
2137	jinfo_bp = NULL;
2138	retval = (int)buf_meta_bread(devvp,
2139						(daddr64_t)((embeddedOffset/blksize) +
2140						((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2141						hfsmp->hfs_physical_block_size, cred, &jinfo_bp);
2142	if (retval) {
2143		if (jinfo_bp) {
2144			buf_brelse(jinfo_bp);
2145		}
2146		return retval;
2147	}
2148
2149	jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2150	jib_flags  = SWAP_BE32(jibp->flags);
2151	jib_size   = SWAP_BE64(jibp->size);
2152
2153	if (jib_flags & kJIJournalInFSMask) {
2154		hfsmp->jvp = hfsmp->hfs_devvp;
2155		jib_offset = SWAP_BE64(jibp->offset);
2156	} else {
2157	    int need_init=0;
2158
2159	    // if the volume was unmounted cleanly then we'll pick any
2160	    // available external journal partition
2161	    //
2162	    if (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) {
2163		    *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2164	    }
2165
2166	    hfsmp->jvp = open_journal_dev(dev_name,
2167					  !(jib_flags & kJIJournalNeedInitMask),
2168					  (char *)&jibp->ext_jnl_uuid[0],
2169					  (char *)&jibp->machine_serial_num[0],
2170					  jib_size,
2171					  hfsmp->hfs_logical_block_size,
2172					  &need_init);
2173	    if (hfsmp->jvp == NULL) {
2174		buf_brelse(jinfo_bp);
2175		return EROFS;
2176	    } else {
2177		    if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2178			    strlcpy(&jibp->machine_serial_num[0], "unknown-machine-uuid", sizeof(jibp->machine_serial_num));
2179		    }
2180	    }
2181
2182	    jib_offset = 0;
2183	    write_jibp = 1;
2184	    if (need_init) {
2185		    jib_flags |= kJIJournalNeedInitMask;
2186	    }
2187	}
2188
2189	// save this off for the hack-y check in hfs_remove()
2190	hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2191	hfsmp->jnl_size  = jib_size;
2192
2193	if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2194	    // if the file system is read-only, check if the journal is empty.
2195	    // if it is, then we can allow the mount.  otherwise we have to
2196	    // return failure.
2197	    retval = journal_is_clean(hfsmp->jvp,
2198				      jib_offset + embeddedOffset,
2199				      jib_size,
2200				      devvp,
2201				      hfsmp->hfs_logical_block_size);
2202
2203	    hfsmp->jnl = NULL;
2204
2205	    buf_brelse(jinfo_bp);
2206
2207	    if (retval) {
2208		const char *name = vnode_getname(devvp);
2209	      printf("hfs: early journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
2210                     name ? name : "");
2211		if (name)
2212			vnode_putname(name);
2213	    }
2214
2215	    return retval;
2216	}
2217
2218	if (jib_flags & kJIJournalNeedInitMask) {
2219		printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2220			   jib_offset + embeddedOffset, jib_size);
2221		hfsmp->jnl = journal_create(hfsmp->jvp,
2222									jib_offset + embeddedOffset,
2223									jib_size,
2224									devvp,
2225									blksize,
2226									arg_flags,
2227									arg_tbufsz,
2228									hfs_sync_metadata, hfsmp->hfs_mp);
2229		if (hfsmp->jnl)
2230			journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2231
2232		// no need to start a transaction here... if this were to fail
2233		// we'd just re-init it on the next mount.
2234		jib_flags &= ~kJIJournalNeedInitMask;
2235		jibp->flags  = SWAP_BE32(jib_flags);
2236		buf_bwrite(jinfo_bp);
2237		jinfo_bp = NULL;
2238		jibp     = NULL;
2239	} else {
2240		//printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2241		//	   jib_offset + embeddedOffset,
2242		//	   jib_size, SWAP_BE32(vhp->blockSize));
2243
2244		hfsmp->jnl = journal_open(hfsmp->jvp,
2245								  jib_offset + embeddedOffset,
2246								  jib_size,
2247								  devvp,
2248								  blksize,
2249								  arg_flags,
2250								  arg_tbufsz,
2251								  hfs_sync_metadata, hfsmp->hfs_mp);
2252		if (hfsmp->jnl)
2253			journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2254
2255		if (write_jibp) {
2256			buf_bwrite(jinfo_bp);
2257		} else {
2258			buf_brelse(jinfo_bp);
2259		}
2260		jinfo_bp = NULL;
2261		jibp     = NULL;
2262
2263		if (hfsmp->jnl && mdbp) {
2264			// reload the mdb because it could have changed
2265			// if the journal had to be replayed.
2266			if (mdb_offset == 0) {
2267				mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
2268			}
2269			bp = NULL;
2270			retval = (int)buf_meta_bread(devvp,
2271					HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
2272					hfsmp->hfs_physical_block_size, cred, &bp);
2273			if (retval) {
2274				if (bp) {
2275					buf_brelse(bp);
2276				}
2277				printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n",
2278					   retval);
2279				return retval;
2280			}
2281			bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size), mdbp, 512);
2282			buf_brelse(bp);
2283			bp = NULL;
2284		}
2285	}
2286
2287
2288	//printf("journal @ 0x%x\n", hfsmp->jnl);
2289
2290	// if we expected the journal to be there and we couldn't
2291	// create it or open it then we have to bail out.
2292	if (hfsmp->jnl == NULL) {
2293		printf("hfs: early jnl init: failed to open/create the journal (retval %d).\n", retval);
2294		return EINVAL;
2295	}
2296
2297	return 0;
2298}
2299
2300
2301//
2302// This function will go and re-locate the .journal_info_block and
2303// the .journal files in case they moved (which can happen if you
2304// run Norton SpeedDisk).  If we fail to find either file we just
2305// disable journaling for this volume and return.  We turn off the
2306// journaling bit in the vcb and assume it will get written to disk
2307// later (if it doesn't on the next mount we'd do the same thing
2308// again which is harmless).  If we disable journaling we don't
2309// return an error so that the volume is still mountable.
2310//
2311// If the info we find for the .journal_info_block and .journal files
2312// isn't what we had stored, we re-set our cached info and proceed
2313// with opening the journal normally.
2314//
2315static int
2316hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args)
2317{
2318	JournalInfoBlock *jibp;
2319	struct buf       *jinfo_bp;
2320	int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2321	int               retval, write_jibp = 0, recreate_journal = 0;
2322	struct vnode     *devvp;
2323	struct cat_attr   jib_attr, jattr;
2324	struct cat_fork   jib_fork, jfork;
2325	ExtendedVCB      *vcb;
2326	u_int32_t            fid;
2327	struct hfs_mount_args *args = _args;
2328	u_int32_t	  jib_flags;
2329	u_int64_t	  jib_offset;
2330	u_int64_t	  jib_size;
2331
2332	devvp = hfsmp->hfs_devvp;
2333	vcb = HFSTOVCB(hfsmp);
2334
2335	if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2336		if (args->journal_disable) {
2337			return 0;
2338		}
2339
2340		arg_flags  = args->journal_flags;
2341		arg_tbufsz = args->journal_tbuffer_size;
2342	}
2343
2344	fid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jib_attr, &jib_fork);
2345	if (fid == 0 || jib_fork.cf_extents[0].startBlock == 0 || jib_fork.cf_size == 0) {
2346		printf("hfs: can't find the .journal_info_block! disabling journaling (start: %d).\n",
2347			   jib_fork.cf_extents[0].startBlock);
2348		vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2349		return 0;
2350	}
2351	hfsmp->hfs_jnlinfoblkid = fid;
2352
2353	// make sure the journal_info_block begins where we think it should.
2354	if (SWAP_BE32(vhp->journalInfoBlock) != jib_fork.cf_extents[0].startBlock) {
2355		printf("hfs: The journal_info_block moved (was: %d; is: %d).  Fixing up\n",
2356			   SWAP_BE32(vhp->journalInfoBlock), jib_fork.cf_extents[0].startBlock);
2357
2358		vcb->vcbJinfoBlock    = jib_fork.cf_extents[0].startBlock;
2359		vhp->journalInfoBlock = SWAP_BE32(jib_fork.cf_extents[0].startBlock);
2360		recreate_journal = 1;
2361	}
2362
2363
2364	sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_logical_block_size;
2365	jinfo_bp = NULL;
2366	retval = (int)buf_meta_bread(devvp,
2367						(vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size +
2368						((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2369						hfsmp->hfs_physical_block_size, NOCRED, &jinfo_bp);
2370	if (retval) {
2371		if (jinfo_bp) {
2372			buf_brelse(jinfo_bp);
2373		}
2374		printf("hfs: can't read journal info block. disabling journaling.\n");
2375		vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2376		return 0;
2377	}
2378
2379	jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2380	jib_flags  = SWAP_BE32(jibp->flags);
2381	jib_offset = SWAP_BE64(jibp->offset);
2382	jib_size   = SWAP_BE64(jibp->size);
2383
2384	fid = GetFileInfo(vcb, kRootDirID, ".journal", &jattr, &jfork);
2385	if (fid == 0 || jfork.cf_extents[0].startBlock == 0 || jfork.cf_size == 0) {
2386		printf("hfs: can't find the journal file! disabling journaling (start: %d)\n",
2387			   jfork.cf_extents[0].startBlock);
2388		buf_brelse(jinfo_bp);
2389		vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2390		return 0;
2391	}
2392	hfsmp->hfs_jnlfileid = fid;
2393
2394	// make sure the journal file begins where we think it should.
2395	if ((jib_flags & kJIJournalInFSMask) && (jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) {
2396		printf("hfs: The journal file moved (was: %lld; is: %d).  Fixing up\n",
2397			   (jib_offset / (u_int64_t)vcb->blockSize), jfork.cf_extents[0].startBlock);
2398
2399		jib_offset = (u_int64_t)jfork.cf_extents[0].startBlock * (u_int64_t)vcb->blockSize;
2400		write_jibp   = 1;
2401		recreate_journal = 1;
2402	}
2403
2404	// check the size of the journal file.
2405	if (jib_size != (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize) {
2406		printf("hfs: The journal file changed size! (was %lld; is %lld).  Fixing up.\n",
2407			   jib_size, (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize);
2408
2409		jib_size = (u_int64_t)jfork.cf_extents[0].blockCount * vcb->blockSize;
2410		write_jibp = 1;
2411		recreate_journal = 1;
2412	}
2413
2414	if (jib_flags & kJIJournalInFSMask) {
2415		hfsmp->jvp = hfsmp->hfs_devvp;
2416		jib_offset += (off_t)vcb->hfsPlusIOPosOffset;
2417	} else {
2418	    const char *dev_name;
2419	    int need_init = 0;
2420
2421	    dev_name = vnode_name(devvp);
2422	    if (dev_name == NULL) {
2423		    dev_name = "unknown-dev";
2424	    }
2425
2426            // since the journal is empty, just use any available external journal
2427	    *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2428
2429	    // this fills in the uuid of the device we actually get
2430	    hfsmp->jvp = open_journal_dev(dev_name,
2431					  !(jib_flags & kJIJournalNeedInitMask),
2432					  (char *)&jibp->ext_jnl_uuid[0],
2433					  (char *)&jibp->machine_serial_num[0],
2434					  jib_size,
2435					  hfsmp->hfs_logical_block_size,
2436					  &need_init);
2437	    if (hfsmp->jvp == NULL) {
2438		buf_brelse(jinfo_bp);
2439		return EROFS;
2440	    } else {
2441		    if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2442			    strlcpy(&jibp->machine_serial_num[0], "unknown-machine-serial-num", sizeof(jibp->machine_serial_num));
2443		    }
2444	    }
2445	    jib_offset = 0;
2446	    recreate_journal = 1;
2447	    write_jibp = 1;
2448	    if (need_init) {
2449		    jib_flags |= kJIJournalNeedInitMask;
2450	    }
2451	}
2452
2453	// save this off for the hack-y check in hfs_remove()
2454	hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2455	hfsmp->jnl_size  = jib_size;
2456
2457	if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2458	    // if the file system is read-only, check if the journal is empty.
2459	    // if it is, then we can allow the mount.  otherwise we have to
2460	    // return failure.
2461	    retval = journal_is_clean(hfsmp->jvp,
2462				      jib_offset,
2463				      jib_size,
2464				      devvp,
2465		                      hfsmp->hfs_logical_block_size);
2466
2467	    hfsmp->jnl = NULL;
2468
2469	    buf_brelse(jinfo_bp);
2470
2471	    if (retval) {
2472		const char *name = vnode_getname(devvp);
2473	      printf("hfs: late journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
2474		     name ? name : "");
2475		if (name)
2476			vnode_putname(name);
2477	    }
2478
2479	    return retval;
2480	}
2481
2482	if ((jib_flags & kJIJournalNeedInitMask) || recreate_journal) {
2483		printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2484			   jib_offset, jib_size);
2485		hfsmp->jnl = journal_create(hfsmp->jvp,
2486									jib_offset,
2487									jib_size,
2488									devvp,
2489									hfsmp->hfs_logical_block_size,
2490									arg_flags,
2491									arg_tbufsz,
2492									hfs_sync_metadata, hfsmp->hfs_mp);
2493		if (hfsmp->jnl)
2494			journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2495
2496		// no need to start a transaction here... if this were to fail
2497		// we'd just re-init it on the next mount.
2498		jib_flags &= ~kJIJournalNeedInitMask;
2499		write_jibp   = 1;
2500
2501	} else {
2502		//
2503		// if we weren't the last person to mount this volume
2504		// then we need to throw away the journal because it
2505		// is likely that someone else mucked with the disk.
2506		// if the journal is empty this is no big deal.  if the
2507		// disk is dirty this prevents us from replaying the
2508		// journal over top of changes that someone else made.
2509		//
2510		arg_flags |= JOURNAL_RESET;
2511
2512		//printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2513		//	   jib_offset,
2514		//	   jib_size, SWAP_BE32(vhp->blockSize));
2515
2516		hfsmp->jnl = journal_open(hfsmp->jvp,
2517								  jib_offset,
2518								  jib_size,
2519								  devvp,
2520								  hfsmp->hfs_logical_block_size,
2521								  arg_flags,
2522								  arg_tbufsz,
2523								  hfs_sync_metadata, hfsmp->hfs_mp);
2524		if (hfsmp->jnl)
2525			journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2526	}
2527
2528
2529	if (write_jibp) {
2530		jibp->flags  = SWAP_BE32(jib_flags);
2531		jibp->offset = SWAP_BE64(jib_offset);
2532		jibp->size   = SWAP_BE64(jib_size);
2533
2534		buf_bwrite(jinfo_bp);
2535	} else {
2536		buf_brelse(jinfo_bp);
2537	}
2538	jinfo_bp = NULL;
2539	jibp     = NULL;
2540
2541	//printf("hfs: journal @ 0x%x\n", hfsmp->jnl);
2542
2543	// if we expected the journal to be there and we couldn't
2544	// create it or open it then we have to bail out.
2545	if (hfsmp->jnl == NULL) {
2546		printf("hfs: late jnl init: failed to open/create the journal (retval %d).\n", retval);
2547		return EINVAL;
2548	}
2549
2550	return 0;
2551}
2552
2553/*
2554 * Calculate the allocation zone for metadata.
2555 *
2556 * This zone includes the following:
2557 *	Allocation Bitmap file
2558 *	Overflow Extents file
2559 *	Journal file
2560 *	Quota files
2561 *	Clustered Hot files
2562 *	Catalog file
2563 *
2564 *                          METADATA ALLOCATION ZONE
2565 * ____________________________________________________________________________
2566 * |    |    |     |               |                              |           |
2567 * | BM | JF | OEF |    CATALOG    |--->                          | HOT FILES |
2568 * |____|____|_____|_______________|______________________________|___________|
2569 *
2570 * <------------------------------- N * 128 MB ------------------------------->
2571 *
2572 */
2573#define GIGABYTE  (u_int64_t)(1024*1024*1024)
2574
2575#define OVERFLOW_DEFAULT_SIZE (4*1024*1024)
2576#define OVERFLOW_MAXIMUM_SIZE (128*1024*1024)
2577#define JOURNAL_DEFAULT_SIZE  (8*1024*1024)
2578#define JOURNAL_MAXIMUM_SIZE  (512*1024*1024)
2579#define HOTBAND_MINIMUM_SIZE  (10*1024*1024)
2580#define HOTBAND_MAXIMUM_SIZE  (512*1024*1024)
2581
2582/* Initialize the metadata zone.
2583 *
2584 * If the size of  the volume is less than the minimum size for
2585 * metadata zone, metadata zone is disabled.
2586 *
2587 * If disable is true, disable metadata zone unconditionally.
2588 */
2589void
2590hfs_metadatazone_init(struct hfsmount *hfsmp, int disable)
2591{
2592	ExtendedVCB  *vcb;
2593	u_int64_t  fs_size;
2594	u_int64_t  zonesize;
2595	u_int64_t  temp;
2596	u_int64_t  filesize;
2597	u_int32_t  blk;
2598	int  items, really_do_it=1;
2599
2600	vcb = HFSTOVCB(hfsmp);
2601	fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->allocLimit;
2602
2603	/*
2604	 * For volumes less than 10 GB, don't bother.
2605	 */
2606	if (fs_size < ((u_int64_t)10 * GIGABYTE)) {
2607		really_do_it = 0;
2608	}
2609
2610	/*
2611	 * Skip non-journaled volumes as well.
2612	 */
2613	if (hfsmp->jnl == NULL) {
2614		really_do_it = 0;
2615	}
2616
2617	/* If caller wants to disable metadata zone, do it */
2618	if (disable == true) {
2619		really_do_it = 0;
2620	}
2621
2622	/*
2623	 * Start with space for the boot blocks and Volume Header.
2624	 * 1536 = byte offset from start of volume to end of volume header:
2625	 * 1024 bytes is the offset from the start of the volume to the
2626	 * start of the volume header (defined by the volume format)
2627	 * + 512 bytes (the size of the volume header).
2628	 */
2629	zonesize = roundup(1536, hfsmp->blockSize);
2630
2631	/*
2632	 * Add the on-disk size of allocation bitmap.
2633	 */
2634	zonesize += hfsmp->hfs_allocation_cp->c_datafork->ff_blocks * hfsmp->blockSize;
2635
2636	/*
2637	 * Add space for the Journal Info Block and Journal (if they're in
2638	 * this file system).
2639	 */
2640	if (hfsmp->jnl && hfsmp->jvp == hfsmp->hfs_devvp) {
2641		zonesize += hfsmp->blockSize + hfsmp->jnl_size;
2642	}
2643
2644	/*
2645	 * Add the existing size of the Extents Overflow B-tree.
2646	 * (It rarely grows, so don't bother reserving additional room for it.)
2647	 */
2648	zonesize += hfsmp->hfs_extents_cp->c_datafork->ff_blocks * hfsmp->blockSize;
2649
2650	/*
2651	 * If there is an Attributes B-tree, leave room for 11 clumps worth.
2652	 * newfs_hfs allocates one clump, and leaves a gap of 10 clumps.
2653	 * When installing a full OS install onto a 20GB volume, we use
2654	 * 7 to 8 clumps worth of space (depending on packages), so that leaves
2655	 * us with another 3 or 4 clumps worth before we need another extent.
2656	 */
2657	if (hfsmp->hfs_attribute_cp) {
2658		zonesize += 11 * hfsmp->hfs_attribute_cp->c_datafork->ff_clumpsize;
2659	}
2660
2661	/*
2662	 * Leave room for 11 clumps of the Catalog B-tree.
2663	 * Again, newfs_hfs allocates one clump plus a gap of 10 clumps.
2664	 * When installing a full OS install onto a 20GB volume, we use
2665	 * 7 to 8 clumps worth of space (depending on packages), so that leaves
2666	 * us with another 3 or 4 clumps worth before we need another extent.
2667	 */
2668	zonesize += 11 * hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize;
2669
2670	/*
2671	 * Add space for hot file region.
2672	 *
2673	 * ...for now, use 5 MB per 1 GB (0.5 %)
2674	 */
2675	filesize = (fs_size / 1024) * 5;
2676	if (filesize > HOTBAND_MAXIMUM_SIZE)
2677		filesize = HOTBAND_MAXIMUM_SIZE;
2678	else if (filesize < HOTBAND_MINIMUM_SIZE)
2679		filesize = HOTBAND_MINIMUM_SIZE;
2680	/*
2681	 * Calculate user quota file requirements.
2682	 */
2683	if (hfsmp->hfs_flags & HFS_QUOTAS) {
2684		items = QF_USERS_PER_GB * (fs_size / GIGABYTE);
2685		if (items < QF_MIN_USERS)
2686			items = QF_MIN_USERS;
2687		else if (items > QF_MAX_USERS)
2688			items = QF_MAX_USERS;
2689		if (!powerof2(items)) {
2690			int x = items;
2691			items = 4;
2692			while (x>>1 != 1) {
2693				x = x >> 1;
2694				items = items << 1;
2695			}
2696		}
2697		filesize += (items + 1) * sizeof(struct dqblk);
2698		/*
2699		 * Calculate group quota file requirements.
2700		 *
2701		 */
2702		items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE);
2703		if (items < QF_MIN_GROUPS)
2704			items = QF_MIN_GROUPS;
2705		else if (items > QF_MAX_GROUPS)
2706			items = QF_MAX_GROUPS;
2707		if (!powerof2(items)) {
2708			int x = items;
2709			items = 4;
2710			while (x>>1 != 1) {
2711				x = x >> 1;
2712				items = items << 1;
2713			}
2714		}
2715		filesize += (items + 1) * sizeof(struct dqblk);
2716	}
2717	zonesize += filesize;
2718
2719	/*
2720	 * Round up entire zone to a bitmap block's worth.
2721	 * The extra space goes to the catalog file and hot file area.
2722	 */
2723	temp = zonesize;
2724	zonesize = roundup(zonesize, (u_int64_t)vcb->vcbVBMIOSize * 8 * vcb->blockSize);
2725	hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize;
2726	/*
2727	 * If doing the round up for hfs_min_alloc_start would push us past
2728	 * allocLimit, then just reset it back to 0.  Though using a value
2729	 * bigger than allocLimit would not cause damage in the block allocator
2730	 * code, this value could get stored in the volume header and make it out
2731	 * to disk, making the volume header technically corrupt.
2732	 */
2733	if (hfsmp->hfs_min_alloc_start >= hfsmp->allocLimit) {
2734		hfsmp->hfs_min_alloc_start = 0;
2735	}
2736
2737	if (really_do_it == 0) {
2738		/* If metadata zone needs to be disabled because the
2739		 * volume was truncated, clear the bit and zero out
2740		 * the values that are no longer needed.
2741		 */
2742		if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2743			/* Disable metadata zone */
2744			hfsmp->hfs_flags &= ~HFS_METADATA_ZONE;
2745
2746			/* Zero out mount point values that are not required */
2747			hfsmp->hfs_catalog_maxblks = 0;
2748			hfsmp->hfs_hotfile_maxblks = 0;
2749			hfsmp->hfs_hotfile_start = 0;
2750			hfsmp->hfs_hotfile_end = 0;
2751			hfsmp->hfs_hotfile_freeblks = 0;
2752			hfsmp->hfs_metazone_start = 0;
2753			hfsmp->hfs_metazone_end = 0;
2754		}
2755
2756		return;
2757	}
2758
2759	temp = zonesize - temp;  /* temp has extra space */
2760	filesize += temp / 3;
2761	hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize;
2762
2763	hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize;
2764
2765	/* Convert to allocation blocks. */
2766	blk = zonesize / vcb->blockSize;
2767
2768	/* The default metadata zone location is at the start of volume. */
2769	hfsmp->hfs_metazone_start = 1;
2770	hfsmp->hfs_metazone_end = blk - 1;
2771
2772	/* The default hotfile area is at the end of the zone. */
2773	hfsmp->hfs_hotfile_start = blk - (filesize / vcb->blockSize);
2774	hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end;
2775	hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp);
2776#if 0
2777	printf("hfs: metadata zone is %d to %d\n", hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
2778	printf("hfs: hot file band is %d to %d\n", hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
2779	printf("hfs: hot file band free blocks = %d\n", hfsmp->hfs_hotfile_freeblks);
2780#endif
2781	hfsmp->hfs_flags |= HFS_METADATA_ZONE;
2782}
2783
2784
2785static u_int32_t
2786hfs_hotfile_freeblocks(struct hfsmount *hfsmp)
2787{
2788	ExtendedVCB  *vcb = HFSTOVCB(hfsmp);
2789	int  lockflags;
2790	int  freeblocks;
2791
2792	lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
2793	freeblocks = MetaZoneFreeBlocks(vcb);
2794	hfs_systemfile_unlock(hfsmp, lockflags);
2795
2796	/* Minus Extents overflow file reserve. */
2797	freeblocks -=
2798		hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks;
2799	/* Minus catalog file reserve. */
2800	freeblocks -=
2801		hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks;
2802	if (freeblocks < 0)
2803		freeblocks = 0;
2804
2805	return MIN(freeblocks, hfsmp->hfs_hotfile_maxblks);
2806}
2807
2808/*
2809 * Determine if a file is a "virtual" metadata file.
2810 * This includes journal and quota files.
2811 */
2812int
2813hfs_virtualmetafile(struct cnode *cp)
2814{
2815	const char * filename;
2816
2817
2818	if (cp->c_parentcnid != kHFSRootFolderID)
2819		return (0);
2820
2821	filename = (const char *)cp->c_desc.cd_nameptr;
2822	if (filename == NULL)
2823		return (0);
2824
2825	if ((strncmp(filename, ".journal", sizeof(".journal")) == 0) ||
2826	    (strncmp(filename, ".journal_info_block", sizeof(".journal_info_block")) == 0) ||
2827	    (strncmp(filename, ".quota.user", sizeof(".quota.user")) == 0) ||
2828	    (strncmp(filename, ".quota.group", sizeof(".quota.group")) == 0) ||
2829	    (strncmp(filename, ".hotfiles.btree", sizeof(".hotfiles.btree")) == 0))
2830		return (1);
2831
2832	return (0);
2833}
2834
2835
2836//
2837// Fire off a timed callback to sync the disk if the
2838// volume is on ejectable media.
2839//
2840 __private_extern__
2841void
2842hfs_sync_ejectable(struct hfsmount *hfsmp)
2843{
2844	if (hfsmp->hfs_syncer)	{
2845		clock_sec_t secs;
2846		clock_usec_t usecs;
2847		uint64_t now;
2848
2849		clock_get_calendar_microtime(&secs, &usecs);
2850		now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
2851
2852		if (hfsmp->hfs_sync_incomplete && hfsmp->hfs_mp->mnt_pending_write_size >= hfsmp->hfs_max_pending_io) {
2853			// if we have a sync scheduled but i/o is starting to pile up,
2854			// don't call thread_call_enter_delayed() again because that
2855			// will defer the sync.
2856			return;
2857		}
2858
2859		if (hfsmp->hfs_sync_scheduled == 0) {
2860			uint64_t deadline;
2861
2862			hfsmp->hfs_last_sync_request_time = now;
2863
2864			clock_interval_to_deadline(HFS_META_DELAY, HFS_MILLISEC_SCALE, &deadline);
2865
2866			/*
2867			 * Increment hfs_sync_scheduled on the assumption that we're the
2868			 * first thread to schedule the timer.  If some other thread beat
2869			 * us, then we'll decrement it.  If we *were* the first to
2870			 * schedule the timer, then we need to keep track that the
2871			 * callback is waiting to complete.
2872			 */
2873			OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
2874			if (thread_call_enter_delayed(hfsmp->hfs_syncer, deadline))
2875				OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
2876			else
2877				OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
2878		}
2879	}
2880}
2881
2882
2883int
2884hfs_start_transaction(struct hfsmount *hfsmp)
2885{
2886	int ret, unlock_on_err=0;
2887	void * thread = current_thread();
2888
2889#ifdef HFS_CHECK_LOCK_ORDER
2890	/*
2891	 * You cannot start a transaction while holding a system
2892	 * file lock. (unless the transaction is nested.)
2893	 */
2894	if (hfsmp->jnl && journal_owner(hfsmp->jnl) != thread) {
2895		if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
2896			panic("hfs_start_transaction: bad lock order (cat before jnl)\n");
2897		}
2898		if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
2899			panic("hfs_start_transaction: bad lock order (attr before jnl)\n");
2900		}
2901		if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
2902			panic("hfs_start_transaction: bad lock order (ext before jnl)\n");
2903		}
2904	}
2905#endif /* HFS_CHECK_LOCK_ORDER */
2906
2907	if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) {
2908		hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
2909		OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
2910		unlock_on_err = 1;
2911	}
2912
2913	/* If a downgrade to read-only mount is in progress, no other
2914	 * process than the downgrade process is allowed to modify
2915	 * the file system.
2916	 */
2917	if ((hfsmp->hfs_flags & HFS_RDONLY_DOWNGRADE) &&
2918			(hfsmp->hfs_downgrading_proc != thread)) {
2919		ret = EROFS;
2920		goto out;
2921	}
2922
2923	if (hfsmp->jnl) {
2924		ret = journal_start_transaction(hfsmp->jnl);
2925		if (ret == 0) {
2926			OSAddAtomic(1, &hfsmp->hfs_global_lock_nesting);
2927		}
2928	} else {
2929		ret = 0;
2930	}
2931
2932out:
2933	if (ret != 0 && unlock_on_err) {
2934		hfs_unlock_global (hfsmp);
2935		OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
2936	}
2937
2938    return ret;
2939}
2940
2941int
2942hfs_end_transaction(struct hfsmount *hfsmp)
2943{
2944    int need_unlock=0, ret;
2945
2946    if ((hfsmp->jnl == NULL) || ( journal_owner(hfsmp->jnl) == current_thread()
2947	    && (OSAddAtomic(-1, &hfsmp->hfs_global_lock_nesting) == 1)) ) {
2948	    need_unlock = 1;
2949    }
2950
2951	if (hfsmp->jnl) {
2952		ret = journal_end_transaction(hfsmp->jnl);
2953	} else {
2954		ret = 0;
2955	}
2956
2957	if (need_unlock) {
2958		OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
2959		hfs_unlock_global (hfsmp);
2960		hfs_sync_ejectable(hfsmp);
2961	}
2962
2963    return ret;
2964}
2965
2966
2967/*
2968 * Flush the contents of the journal to the disk.
2969 *
2970 *  Input:
2971 *  	wait_for_IO -
2972 *  	If TRUE, wait to write in-memory journal to the disk
2973 *  	consistently, and also wait to write all asynchronous
2974 *  	metadata blocks to its corresponding locations
2975 *  	consistently on the disk.  This means that the journal
2976 *  	is empty at this point and does not contain any
2977 *  	transactions.  This is overkill in normal scenarios
2978 *  	but is useful whenever the metadata blocks are required
2979 *  	to be consistent on-disk instead of just the journal
2980 *  	being consistent; like before live verification
2981 *  	and live volume resizing.
2982 *
2983 *  	If FALSE, only wait to write in-memory journal to the
2984 *  	disk consistently.  This means that the journal still
2985 *  	contains uncommitted transactions and the file system
2986 *  	metadata blocks in the journal transactions might be
2987 *  	written asynchronously to the disk.  But there is no
2988 *  	guarantee that they are written to the disk before
2989 *  	returning to the caller.  Note that this option is
2990 *  	sufficient for file system data integrity as it
2991 *  	guarantees consistent journal content on the disk.
2992 */
2993int
2994hfs_journal_flush(struct hfsmount *hfsmp, boolean_t wait_for_IO)
2995{
2996	int ret;
2997
2998	/* Only peek at hfsmp->jnl while holding the global lock */
2999	hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3000	if (hfsmp->jnl) {
3001		ret = journal_flush(hfsmp->jnl, wait_for_IO);
3002	} else {
3003		ret = 0;
3004	}
3005	hfs_unlock_global (hfsmp);
3006
3007	return ret;
3008}
3009
3010
3011/*
3012 * hfs_erase_unused_nodes
3013 *
3014 * Check wheter a volume may suffer from unused Catalog B-tree nodes that
3015 * are not zeroed (due to <rdar://problem/6947811>).  If so, just write
3016 * zeroes to the unused nodes.
3017 *
3018 * How do we detect when a volume needs this repair?  We can't always be
3019 * certain.  If a volume was created after a certain date, then it may have
3020 * been created with the faulty newfs_hfs.  Since newfs_hfs only created one
3021 * clump, we can assume that if a Catalog B-tree is larger than its clump size,
3022 * that means that the entire first clump must have been written to, which means
3023 * there shouldn't be unused and unwritten nodes in that first clump, and this
3024 * repair is not needed.
3025 *
3026 * We have defined a bit in the Volume Header's attributes to indicate when the
3027 * unused nodes have been repaired.  A newer newfs_hfs will set this bit.
3028 * As will fsck_hfs when it repairs the unused nodes.
3029 */
3030int hfs_erase_unused_nodes(struct hfsmount *hfsmp)
3031{
3032	int result;
3033	struct filefork *catalog;
3034	int lockflags;
3035
3036	if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask)
3037	{
3038		/* This volume has already been checked and repaired. */
3039		return 0;
3040	}
3041
3042	if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate))
3043	{
3044		/* This volume is too old to have had the problem. */
3045		hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3046		return 0;
3047	}
3048
3049	catalog = hfsmp->hfs_catalog_cp->c_datafork;
3050	if (catalog->ff_size > catalog->ff_clumpsize)
3051	{
3052		/* The entire first clump must have been in use at some point. */
3053		hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3054		return 0;
3055	}
3056
3057	/*
3058	 * If we get here, we need to zero out those unused nodes.
3059	 *
3060	 * We start a transaction and lock the catalog since we're going to be
3061	 * making on-disk changes.  But note that BTZeroUnusedNodes doens't actually
3062	 * do its writing via the journal, because that would be too much I/O
3063	 * to fit in a transaction, and it's a pain to break it up into multiple
3064	 * transactions.  (It behaves more like growing a B-tree would.)
3065	 */
3066	printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN);
3067	result = hfs_start_transaction(hfsmp);
3068	if (result)
3069		goto done;
3070	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
3071	result = BTZeroUnusedNodes(catalog);
3072	vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes");
3073	hfs_systemfile_unlock(hfsmp, lockflags);
3074	hfs_end_transaction(hfsmp);
3075	if (result == 0)
3076		hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3077	printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN);
3078
3079done:
3080	return result;
3081}
3082
3083
3084extern time_t snapshot_timestamp;
3085
3086int
3087check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *arg)
3088{
3089	int tracked_error = 0, snapshot_error = 0;
3090
3091	if (vp == NULL) {
3092		return 0;
3093	}
3094
3095	if (VTOC(vp)->c_bsdflags & UF_TRACKED) {
3096		// the file has the tracked bit set, so send an event to the tracked-file handler
3097		int error;
3098
3099		// printf("hfs: tracked-file: encountered a file with the tracked bit set! (vp %p)\n", vp);
3100		error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_TRACK_EVENT);
3101		if (error) {
3102			if (error == EAGAIN) {
3103				printf("hfs: tracked-file: timed out waiting for namespace handler...\n");
3104
3105			} else if (error == EINTR) {
3106				// printf("hfs: tracked-file: got a signal while waiting for namespace handler...\n");
3107				tracked_error = EINTR;
3108			}
3109		}
3110	}
3111
3112	if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
3113		// the change time is within this epoch
3114		int error;
3115
3116		error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
3117		if (error == EDEADLK) {
3118			snapshot_error = 0;
3119		} else if (error) {
3120			if (error == EAGAIN) {
3121				printf("hfs: cow-snapshot: timed out waiting for namespace handler...\n");
3122			} else if (error == EINTR) {
3123				// printf("hfs: cow-snapshot: got a signal while waiting for namespace handler...\n");
3124				snapshot_error = EINTR;
3125			}
3126		}
3127	}
3128
3129	if (tracked_error) return tracked_error;
3130	if (snapshot_error) return snapshot_error;
3131
3132	return 0;
3133}
3134
3135int
3136check_for_dataless_file(struct vnode *vp, uint64_t op_type)
3137{
3138	int error;
3139
3140	if (vp == NULL || (VTOC(vp)->c_bsdflags & UF_COMPRESSED) == 0 || VTOCMP(vp) == NULL || VTOCMP(vp)->cmp_type != DATALESS_CMPFS_TYPE) {
3141		// there's nothing to do, it's not dataless
3142		return 0;
3143	}
3144
3145	// printf("hfs: dataless: encountered a file with the dataless bit set! (vp %p)\n", vp);
3146	error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_NSPACE_EVENT);
3147	if (error == EDEADLK && op_type == NAMESPACE_HANDLER_WRITE_OP) {
3148		error = 0;
3149	} else if (error) {
3150		if (error == EAGAIN) {
3151			printf("hfs: dataless: timed out waiting for namespace handler...\n");
3152			// XXXdbg - return the fabled ENOTPRESENT (i.e. EJUKEBOX)?
3153			return 0;
3154		} else if (error == EINTR) {
3155			// printf("hfs: dataless: got a signal while waiting for namespace handler...\n");
3156			return EINTR;
3157		}
3158	} else if (VTOC(vp)->c_bsdflags & UF_COMPRESSED) {
3159		//
3160		// if we're here, the dataless bit is still set on the file
3161		// which means it didn't get handled.  we return an error
3162		// but it's presently ignored by all callers of this function.
3163		//
3164		// XXXdbg - EDATANOTPRESENT is what we really need...
3165		//
3166		return EBADF;
3167	}
3168
3169	return error;
3170}
3171