1/*
2 * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/param.h>
30#include <sys/systm.h>
31#include <sys/fcntl.h>
32#include <sys/kernel.h>
33#include <sys/malloc.h>
34#include <sys/ubc.h>
35#include <sys/ubc_internal.h>
36#include <sys/vnode.h>
37#include <sys/vnode_internal.h>
38#include <sys/kauth.h>
39
40#include <hfs/hfs.h>
41#include <hfs/hfs_endian.h>
42#include <hfs/hfs_format.h>
43#include <hfs/hfs_mount.h>
44#include <hfs/hfs_hotfiles.h>
45
46#include "hfscommon/headers/BTreeScanner.h"
47
48
49#define HFC_DEBUG  0
50#define HFC_VERBOSE 0
51
52
53/*
54 * Minimum post Tiger base time.
55 * Thu Mar 31 17:00:00 2005
56 */
57#define HFC_MIN_BASE_TIME   0x424c8f00L
58
59/*
60 * Hot File List (runtime).
61 */
62typedef struct hotfileinfo {
63	u_int32_t  hf_fileid;
64	u_int32_t  hf_temperature;
65	u_int32_t  hf_blocks;
66} hotfileinfo_t;
67
68typedef struct hotfilelist {
69	u_int32_t     hfl_magic;
70	u_int32_t     hfl_version;
71	time_t        hfl_duration;    /* duration of sample period */
72	int           hfl_count;       /* count of hot files recorded */
73	int           hfl_next;        /* next file to move */
74	int           hfl_totalblocks; /* total hot file blocks */
75	int           hfl_reclaimblks; /* blocks to reclaim in HFV */
76	u_int32_t     hfl_spare[2];
77	hotfileinfo_t hfl_hotfile[1];  /* array of hot files */
78} hotfilelist_t;
79
80
81/*
82 * Hot File Entry (runtime).
83 */
84typedef struct hotfile_entry {
85	struct  hotfile_entry  *left;
86	struct  hotfile_entry  *right;
87	u_int32_t  fileid;
88	u_int32_t  temperature;
89	u_int32_t  blocks;
90} hotfile_entry_t;
91
92/*
93 * Hot File Recording Data (runtime).
94 */
95typedef struct hotfile_data {
96	struct hfsmount *hfsmp;
97	long             refcount;
98	int		 activefiles;  /* active number of hot files */
99	u_int32_t	 threshold;
100	u_int32_t	 maxblocks;
101	hotfile_entry_t	*rootentry;
102	hotfile_entry_t	*freelist;
103	hotfile_entry_t	*coldest;
104	hotfile_entry_t	 entries[1];
105} hotfile_data_t;
106
107static int  hfs_recording_start (struct hfsmount *);
108static int  hfs_recording_stop (struct hfsmount *);
109
110
111/*
112 * Hot File Data recording functions (in-memory binary tree).
113 */
114static void              hf_insert (hotfile_data_t *, hotfile_entry_t *);
115static void              hf_delete (hotfile_data_t *, u_int32_t, u_int32_t);
116static hotfile_entry_t * hf_coldest (hotfile_data_t *);
117static hotfile_entry_t * hf_getnewentry (hotfile_data_t *);
118static void              hf_getsortedlist (hotfile_data_t *, hotfilelist_t *);
119
120#if HFC_DEBUG
121static hotfile_entry_t * hf_lookup (hotfile_data_t *, u_int32_t, u_int32_t);
122static void  hf_maxdepth(hotfile_entry_t *, int, int *);
123static void  hf_printtree (hotfile_entry_t *);
124#endif
125
126/*
127 * Hot File misc support functions.
128 */
129static int  hotfiles_collect (struct hfsmount *);
130static int  hotfiles_age (struct hfsmount *);
131static int  hotfiles_adopt (struct hfsmount *);
132static int  hotfiles_evict (struct hfsmount *, vfs_context_t);
133static int  hotfiles_refine (struct hfsmount *);
134static int  hotextents(struct hfsmount *, HFSPlusExtentDescriptor *);
135static int  hfs_addhotfile_internal(struct vnode *);
136
137
138/*
139 * Hot File Cluster B-tree (on disk) functions.
140 */
141static int  hfc_btree_create (struct hfsmount *, unsigned int, unsigned int);
142static int  hfc_btree_open (struct hfsmount *, struct vnode **);
143static int  hfc_btree_close (struct hfsmount *, struct vnode *);
144static int  hfc_comparekeys (HotFileKey *, HotFileKey *);
145
146
147char hfc_tag[] = "CLUSTERED HOT FILES B-TREE     ";
148
149
150/*
151 *========================================================================
152 *                       HOT FILE INTERFACE ROUTINES
153 *========================================================================
154 */
155
156/*
157 * Start recording the hotest files on a file system.
158 *
159 * Requires that the hfc_mutex be held.
160 */
161static int
162hfs_recording_start(struct hfsmount *hfsmp)
163{
164	hotfile_data_t *hotdata;
165	struct timeval tv;
166	int maxentries;
167	size_t size;
168	int i;
169	int error;
170
171	if ((hfsmp->hfs_flags & HFS_READ_ONLY) ||
172	    (hfsmp->jnl == NULL) ||
173	    (hfsmp->hfs_flags & HFS_METADATA_ZONE) == 0) {
174		return (EPERM);
175	}
176	if (HFSTOVCB(hfsmp)->freeBlocks < (2 * (u_int32_t)hfsmp->hfs_hotfile_maxblks)) {
177		return (ENOSPC);
178	}
179	if (hfsmp->hfc_stage != HFC_IDLE) {
180		return (EBUSY);
181	}
182	hfsmp->hfc_stage = HFC_BUSY;
183
184	/*
185	 * Dump previous recording data.
186	 */
187	if (hfsmp->hfc_recdata) {
188		void * tmp;
189
190		tmp = hfsmp->hfc_recdata;
191		hfsmp->hfc_recdata = NULL;
192		FREE(tmp, M_TEMP);
193	}
194
195	microtime(&tv);  /* Times are base on GMT time. */
196
197	/*
198	 * On first startup check for suspended recording.
199	 */
200	if (hfsmp->hfc_timebase == 0 &&
201	    hfc_btree_open(hfsmp, &hfsmp->hfc_filevp) == 0) {
202		HotFilesInfo hotfileinfo;
203
204		if ((BTGetUserData(VTOF(hfsmp->hfc_filevp), &hotfileinfo,
205		                   sizeof(hotfileinfo)) == 0) &&
206		    (SWAP_BE32 (hotfileinfo.magic) == HFC_MAGIC) &&
207		    (SWAP_BE32 (hotfileinfo.timeleft) > 0) &&
208		    (SWAP_BE32 (hotfileinfo.timebase) > 0)) {
209			hfsmp->hfc_maxfiles = SWAP_BE32 (hotfileinfo.maxfilecnt);
210			hfsmp->hfc_timeout = SWAP_BE32 (hotfileinfo.timeleft) + tv.tv_sec ;
211			hfsmp->hfc_timebase = SWAP_BE32 (hotfileinfo.timebase);
212			/* Fix up any bogus timebase values. */
213			if (hfsmp->hfc_timebase < HFC_MIN_BASE_TIME) {
214				hfsmp->hfc_timebase = hfsmp->hfc_timeout - HFC_DEFAULT_DURATION;
215			}
216#if HFC_VERBOSE
217			printf("hfs: Resume recording hot files on %s (%d secs left)\n",
218				hfsmp->vcbVN, SWAP_BE32 (hotfileinfo.timeleft));
219#endif
220		} else {
221			hfsmp->hfc_maxfiles = HFC_DEFAULT_FILE_COUNT;
222			hfsmp->hfc_timebase = tv.tv_sec + 1;
223			hfsmp->hfc_timeout = hfsmp->hfc_timebase + HFC_DEFAULT_DURATION;
224		}
225		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
226		hfsmp->hfc_filevp = NULL;
227	} else {
228		struct cat_attr cattr;
229		u_int32_t cnid;
230
231		/*
232		 * Make sure a btree file exists.
233		 */
234		cnid = GetFileInfo(HFSTOVCB(hfsmp), kRootDirID, HFC_FILENAME, &cattr, NULL);
235		if ((cnid == 0) &&
236		    !S_ISREG(cattr.ca_mode) &&
237		    (error = hfc_btree_create(hfsmp, HFSTOVCB(hfsmp)->blockSize, HFC_DEFAULT_FILE_COUNT))) {
238			hfsmp->hfc_stage = HFC_IDLE;
239			wakeup((caddr_t)&hfsmp->hfc_stage);
240			return (error);
241		}
242#if HFC_VERBOSE
243		printf("hfs: begin recording hot files on %s\n", hfsmp->vcbVN);
244#endif
245		hfsmp->hfc_maxfiles = HFC_DEFAULT_FILE_COUNT;
246		hfsmp->hfc_timeout = tv.tv_sec + HFC_DEFAULT_DURATION;
247
248		/* Reset time base.  */
249		if (hfsmp->hfc_timebase == 0) {
250			hfsmp->hfc_timebase = tv.tv_sec + 1;
251		} else {
252			time_t cumulativebase;
253
254			cumulativebase = hfsmp->hfc_timeout - (HFC_CUMULATIVE_CYCLES * HFC_DEFAULT_DURATION);
255			hfsmp->hfc_timebase = MAX(hfsmp->hfc_timebase, cumulativebase);
256		}
257	}
258
259	if ((hfsmp->hfc_maxfiles == 0) ||
260	    (hfsmp->hfc_maxfiles > HFC_MAXIMUM_FILE_COUNT)) {
261		hfsmp->hfc_maxfiles = HFC_DEFAULT_FILE_COUNT;
262	}
263	maxentries = hfsmp->hfc_maxfiles;
264
265	size = sizeof(hotfile_data_t) + (maxentries * sizeof(hotfile_entry_t));
266	MALLOC(hotdata, hotfile_data_t *, size, M_TEMP, M_WAITOK);
267	if (hotdata == NULL) {
268		hfsmp->hfc_recdata = NULL;
269		hfsmp->hfc_stage = HFC_IDLE;
270		wakeup((caddr_t)&hfsmp->hfc_stage);
271		return(ENOMEM);
272	}
273
274	bzero(hotdata, size);
275
276	for (i = 1; i < maxentries ; i++)
277		hotdata->entries[i-1].right = &hotdata->entries[i];
278
279	hotdata->freelist = &hotdata->entries[0];
280	/*
281	 * Establish minimum temperature and maximum file size.
282	 */
283	hotdata->threshold = HFC_MINIMUM_TEMPERATURE;
284	hotdata->maxblocks = HFC_MAXIMUM_FILESIZE / HFSTOVCB(hfsmp)->blockSize;
285	hotdata->hfsmp = hfsmp;
286
287	hfsmp->hfc_recdata = hotdata;
288	hfsmp->hfc_stage = HFC_RECORDING;
289	wakeup((caddr_t)&hfsmp->hfc_stage);
290	return (0);
291}
292
293/*
294 * Stop recording the hotest files on a file system.
295 *
296 * Requires that the hfc_mutex be held.
297 */
298static int
299hfs_recording_stop(struct hfsmount *hfsmp)
300{
301	hotfile_data_t *hotdata;
302	hotfilelist_t  *listp;
303	struct timeval tv;
304	size_t  size;
305	enum hfc_stage newstage = HFC_IDLE;
306	int  error;
307
308	if (hfsmp->hfc_stage != HFC_RECORDING)
309		return (EPERM);
310
311	hfsmp->hfc_stage = HFC_BUSY;
312
313	hotfiles_collect(hfsmp);
314
315
316	/*
317	 * Convert hot file data into a simple file id list....
318	 *
319	 * then dump the sample data
320	 */
321#if HFC_VERBOSE
322	printf("hfs: end of hot file recording on %s\n", hfsmp->vcbVN);
323#endif
324	hotdata = (hotfile_data_t *)hfsmp->hfc_recdata;
325	if (hotdata == NULL)
326		return (0);
327	hfsmp->hfc_recdata = NULL;
328	hfsmp->hfc_stage = HFC_EVALUATION;
329	wakeup((caddr_t)&hfsmp->hfc_stage);
330
331#if HFC_VERBOSE
332	printf("hfs:   curentries: %d\n", hotdata->activefiles);
333#endif
334	/*
335	 * If no hot files recorded then we're done.
336	 */
337	if (hotdata->rootentry == NULL) {
338		error = 0;
339		goto out;
340	}
341
342	/* Open the B-tree file for writing... */
343	if (hfsmp->hfc_filevp)
344		panic("hfs_recording_stop: hfc_filevp exists (vp = %p)", hfsmp->hfc_filevp);
345
346	error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
347	if (error) {
348		goto out;
349	}
350
351	/*
352	 * Age the previous set of clustered hot files.
353	 */
354	error = hotfiles_age(hfsmp);
355	if (error) {
356		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
357		hfsmp->hfc_filevp = NULL;
358		goto out;
359	}
360
361	/*
362	 * Create a sorted list of hotest files.
363	 */
364	size = sizeof(hotfilelist_t);
365	size += sizeof(hotfileinfo_t) * (hotdata->activefiles - 1);
366	MALLOC(listp, hotfilelist_t *, size, M_TEMP, M_WAITOK);
367	if (listp == NULL) {
368		error = ENOMEM;
369		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
370		hfsmp->hfc_filevp = NULL;
371		goto out;
372	}
373
374	bzero(listp, size);
375
376	hf_getsortedlist(hotdata, listp);	/* NOTE: destroys hot file tree! */
377	microtime(&tv);
378	listp->hfl_duration = tv.tv_sec - hfsmp->hfc_timebase;
379	hfsmp->hfc_recdata = listp;
380
381	/*
382	 * Account for duplicates.
383	 */
384	error = hotfiles_refine(hfsmp);
385	if (error) {
386		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
387		hfsmp->hfc_filevp = NULL;
388		goto out;
389	}
390
391	/*
392	 * Compute the amount of space to reclaim...
393	 */
394	if (listp->hfl_totalblocks > hfsmp->hfs_hotfile_freeblks) {
395		listp->hfl_reclaimblks =
396			MIN(listp->hfl_totalblocks, hfsmp->hfs_hotfile_maxblks) -
397			hfsmp->hfs_hotfile_freeblks;
398#if HFC_VERBOSE
399		printf("hfs_recording_stop: need to reclaim %d blocks\n", listp->hfl_reclaimblks);
400#endif
401		if (listp->hfl_reclaimblks)
402			newstage = HFC_EVICTION;
403		else
404			newstage = HFC_ADOPTION;
405	} else {
406		newstage = HFC_ADOPTION;
407	}
408
409	if (newstage == HFC_ADOPTION && listp->hfl_totalblocks == 0) {
410		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
411		hfsmp->hfc_filevp = NULL;
412		newstage = HFC_IDLE;
413	}
414out:
415#if HFC_VERBOSE
416	if (newstage == HFC_EVICTION)
417		printf("hfs: evicting coldest files\n");
418	else if (newstage == HFC_ADOPTION)
419		printf("hfs: adopting hotest files\n");
420#endif
421	FREE(hotdata, M_TEMP);
422
423	hfsmp->hfc_stage = newstage;
424	wakeup((caddr_t)&hfsmp->hfc_stage);
425	return (error);
426}
427
428/*
429 * Suspend recording the hotest files on a file system.
430 */
431int
432hfs_recording_suspend(struct hfsmount *hfsmp)
433{
434	HotFilesInfo hotfileinfo;
435	hotfile_data_t *hotdata = NULL;
436	struct timeval tv;
437	int  error;
438
439	if (hfsmp->hfc_stage == HFC_DISABLED)
440		return (0);
441
442	lck_mtx_lock(&hfsmp->hfc_mutex);
443
444	/*
445	 * XXX NOTE
446	 * A suspend can occur during eval/evict/adopt stage.
447	 * In that case we would need to write out info and
448	 * flush our HFBT vnode. Currently we just bail.
449	 */
450
451	hotdata = (hotfile_data_t *)hfsmp->hfc_recdata;
452	if (hotdata == NULL || hfsmp->hfc_stage != HFC_RECORDING) {
453		error = 0;
454		goto out;
455	}
456	hfsmp->hfc_stage = HFC_BUSY;
457
458#if HFC_VERBOSE
459	printf("hfs: suspend hot file recording on %s\n", hfsmp->vcbVN);
460#endif
461	error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
462	if (error) {
463		printf("hfs_recording_suspend: err %d opening btree\n", error);
464		goto out;
465	}
466
467	if (hfs_start_transaction(hfsmp) != 0) {
468	    error = EINVAL;
469	    goto out;
470	}
471	if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK) != 0) {
472		error = EPERM;
473		goto end_transaction;
474	}
475
476	microtime(&tv);
477	hotfileinfo.magic       = SWAP_BE32 (HFC_MAGIC);
478	hotfileinfo.version     = SWAP_BE32 (HFC_VERSION);
479	hotfileinfo.duration    = SWAP_BE32 (HFC_DEFAULT_DURATION);
480	hotfileinfo.timebase    = SWAP_BE32 (hfsmp->hfc_timebase);
481	hotfileinfo.timeleft    = SWAP_BE32 (hfsmp->hfc_timeout - tv.tv_sec);
482	hotfileinfo.threshold   = SWAP_BE32 (hotdata->threshold);
483	hotfileinfo.maxfileblks = SWAP_BE32 (hotdata->maxblocks);
484	hotfileinfo.maxfilecnt  = SWAP_BE32 (HFC_DEFAULT_FILE_COUNT);
485	strlcpy((char *)hotfileinfo.tag, hfc_tag, sizeof hotfileinfo.tag);
486	(void) BTSetUserData(VTOF(hfsmp->hfc_filevp), &hotfileinfo, sizeof(hotfileinfo));
487
488	hfs_unlock(VTOC(hfsmp->hfc_filevp));
489
490end_transaction:
491	hfs_end_transaction(hfsmp);
492
493out:
494	if (hfsmp->hfc_filevp) {
495		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
496		hfsmp->hfc_filevp = NULL;
497	}
498	if (hotdata) {
499		FREE(hotdata, M_TEMP);
500		hfsmp->hfc_recdata = NULL;
501	}
502	hfsmp->hfc_stage = HFC_DISABLED;
503	wakeup((caddr_t)&hfsmp->hfc_stage);
504
505	lck_mtx_unlock(&hfsmp->hfc_mutex);
506	return (error);
507}
508
509
510/*
511 *
512 */
513int
514hfs_recording_init(struct hfsmount *hfsmp)
515{
516	CatalogKey * keyp;
517	CatalogRecord * datap;
518	u_int32_t  dataSize;
519	HFSPlusCatalogFile *filep;
520	BTScanState scanstate;
521	BTreeIterator * iterator = NULL;
522	FSBufferDescriptor  record;
523	HotFileKey * key;
524	filefork_t * filefork;
525	u_int32_t  data;
526	struct cat_attr cattr;
527	u_int32_t  cnid;
528	int error = 0;
529
530	int inserted = 0;  /* debug variables */
531	int filecount = 0;
532
533	/*
534	 * For now, only the boot volume is supported.
535	 */
536	if ((vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) == 0) {
537		hfsmp->hfc_stage = HFC_DISABLED;
538		return (EPERM);
539	}
540
541	/*
542	 * Tracking of hot files requires up-to-date access times.
543	 * So if access time updates are disabled, then we disable
544	 * hot files, too.
545	 */
546	if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_NOATIME) {
547		hfsmp->hfc_stage = HFC_DISABLED;
548		return EPERM;
549	}
550
551	/*
552	 * If the Hot File btree exists then metadata zone is ready.
553	 */
554	cnid = GetFileInfo(HFSTOVCB(hfsmp), kRootDirID, HFC_FILENAME, &cattr, NULL);
555	if (cnid != 0 && S_ISREG(cattr.ca_mode)) {
556		if (hfsmp->hfc_stage == HFC_DISABLED)
557			hfsmp->hfc_stage = HFC_IDLE;
558		return (0);
559	}
560
561	if (hfs_start_transaction(hfsmp) != 0) {
562		return EINVAL;
563	}
564
565	error = hfc_btree_create(hfsmp, HFSTOVCB(hfsmp)->blockSize, HFC_DEFAULT_FILE_COUNT);
566	if (error) {
567#if HFC_VERBOSE
568		printf("hfs: Error %d creating hot file b-tree on %s \n", error, hfsmp->vcbVN);
569#endif
570		goto out2;
571	}
572	/*
573	 * Open the Hot File B-tree file for writing.
574	 */
575	if (hfsmp->hfc_filevp)
576		panic("hfs_recording_init: hfc_filevp exists (vp = %p)", hfsmp->hfc_filevp);
577	error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
578	if (error) {
579#if HFC_VERBOSE
580		printf("hfs: Error %d opening hot file b-tree on %s \n", error, hfsmp->vcbVN);
581#endif
582		goto out2;
583	}
584	MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
585	if (iterator == NULL) {
586		error = ENOMEM;
587		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
588		hfsmp->hfc_filevp = NULL;
589		goto out2;
590	}
591	bzero(iterator, sizeof(*iterator));
592	key = (HotFileKey*) &iterator->key;
593	key->keyLength = HFC_KEYLENGTH;
594
595	record.bufferAddress = &data;
596	record.itemSize = sizeof(u_int32_t);
597	record.itemCount = 1;
598#if HFC_VERBOSE
599	printf("hfs: Evaluating space for \"%s\" metadata zone...\n", HFSTOVCB(hfsmp)->vcbVN);
600#endif
601	/*
602	 * Get ready to scan the Catalog file.
603	 */
604	error = BTScanInitialize(VTOF(HFSTOVCB(hfsmp)->catalogRefNum), 0, 0, 0,
605	                       kCatSearchBufferSize, &scanstate);
606	if (error) {
607		printf("hfs_recording_init: err %d BTScanInit\n", error);
608		goto out2;
609	}
610
611	/*
612	 * The writes to Hot File B-tree file are journaled.
613	 */
614	if (hfs_start_transaction(hfsmp) != 0) {
615	    error = EINVAL;
616	    goto out1;
617	}
618	if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK) != 0) {
619		error = EPERM;
620		goto out0;
621	}
622	filefork = VTOF(hfsmp->hfc_filevp);
623
624	/*
625	 * Visit all the catalog btree leaf records.
626	 */
627	for (;;) {
628		error = BTScanNextRecord(&scanstate, 0, (void **)&keyp, (void **)&datap, &dataSize);
629		if (error) {
630			if (error == btNotFound)
631				error = 0;
632			else
633				printf("hfs_recording_init: err %d BTScanNext\n", error);
634			break;
635		}
636		if ((datap->recordType != kHFSPlusFileRecord) ||
637		    (dataSize != sizeof(HFSPlusCatalogFile))) {
638			continue;
639		}
640		filep = (HFSPlusCatalogFile *)datap;
641		filecount++;
642		if (filep->dataFork.totalBlocks == 0) {
643			continue;
644		}
645		/*
646		 * Any file that has blocks inside the hot file
647		 * space is recorded for later eviction.
648		 *
649		 * For now, resource forks are ignored.
650		 */
651		if (!hotextents(hfsmp, &filep->dataFork.extents[0])) {
652			continue;
653		}
654		cnid = filep->fileID;
655
656		/* Skip over journal files. */
657		if (cnid == hfsmp->hfs_jnlfileid || cnid == hfsmp->hfs_jnlinfoblkid) {
658			continue;
659		}
660		/*
661		 * XXX - need to skip quota files as well.
662		 */
663
664		/* Insert a hot file entry. */
665		key->keyLength   = HFC_KEYLENGTH;
666		key->temperature = HFC_MINIMUM_TEMPERATURE;
667		key->fileID      = cnid;
668		key->forkType    = 0;
669		data = 0x3f3f3f3f;
670		error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
671		if (error) {
672			printf("hfs_recording_init: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID);
673			error = MacToVFSError(error);
674			break;
675		}
676
677		/* Insert the corresponding thread record. */
678		key->keyLength = HFC_KEYLENGTH;
679		key->temperature = HFC_LOOKUPTAG;
680		key->fileID = cnid;
681		key->forkType = 0;
682		data = HFC_MINIMUM_TEMPERATURE;
683		error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
684		if (error) {
685			printf("hfs_recording_init: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID);
686			error = MacToVFSError(error);
687			break;
688		}
689		inserted++;
690	}
691	(void) BTFlushPath(filefork);
692	hfs_unlock(VTOC(hfsmp->hfc_filevp));
693
694out0:
695	hfs_end_transaction(hfsmp);
696#if HFC_VERBOSE
697	printf("hfs: %d files identified out of %d\n", inserted, filecount);
698#endif
699
700out1:
701	(void) BTScanTerminate(&scanstate, &data, &data, &data);
702out2:
703	hfs_end_transaction(hfsmp);
704	if (iterator)
705		FREE(iterator, M_TEMP);
706	if (hfsmp->hfc_filevp) {
707		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
708		hfsmp->hfc_filevp = NULL;
709	}
710	if (error == 0)
711		hfsmp->hfc_stage = HFC_IDLE;
712
713	return (error);
714}
715
716/*
717 * Use sync to perform ocassional background work.
718 */
719int
720hfs_hotfilesync(struct hfsmount *hfsmp, vfs_context_t ctx)
721{
722	if (hfsmp->hfc_stage) {
723		struct timeval tv;
724
725		lck_mtx_lock(&hfsmp->hfc_mutex);
726
727		switch (hfsmp->hfc_stage) {
728		case HFC_IDLE:
729			(void) hfs_recording_start(hfsmp);
730			break;
731
732		case HFC_RECORDING:
733			microtime(&tv);
734			if (tv.tv_sec > hfsmp->hfc_timeout)
735				(void) hfs_recording_stop(hfsmp);
736			break;
737
738		case HFC_EVICTION:
739			(void) hotfiles_evict(hfsmp, ctx);
740			break;
741
742		case HFC_ADOPTION:
743			(void) hotfiles_adopt(hfsmp);
744			break;
745		default:
746			break;
747		}
748
749		lck_mtx_unlock(&hfsmp->hfc_mutex);
750	}
751	return (0);
752}
753
754/*
755 * Add a hot file to the recording list.
756 *
757 * This can happen when a hot file gets reclaimed or at the
758 * end of the recording period for any active hot file.
759 *
760 * NOTE: Since both the data and resource fork can  be hot,
761 * there can be two entries for the same file id.
762 *
763 * Note: the cnode is locked on entry.
764 */
765int
766hfs_addhotfile(struct vnode *vp)
767{
768	hfsmount_t *hfsmp;
769	int error;
770
771	hfsmp = VTOHFS(vp);
772	if (hfsmp->hfc_stage != HFC_RECORDING)
773		return (0);
774
775	lck_mtx_lock(&hfsmp->hfc_mutex);
776	error = hfs_addhotfile_internal(vp);
777	lck_mtx_unlock(&hfsmp->hfc_mutex);
778	return (error);
779}
780
781static int
782hfs_addhotfile_internal(struct vnode *vp)
783{
784	hotfile_data_t *hotdata;
785	hotfile_entry_t *entry;
786	hfsmount_t *hfsmp;
787	cnode_t *cp;
788	filefork_t *ffp;
789	u_int32_t temperature;
790
791	hfsmp = VTOHFS(vp);
792	if (hfsmp->hfc_stage != HFC_RECORDING)
793		return (0);
794
795	if ((!vnode_isreg(vp) && !vnode_islnk(vp)) || vnode_issystem(vp)) {
796		return (0);
797	}
798	/* Skip resource forks for now. */
799	if (VNODE_IS_RSRC(vp)) {
800		return (0);
801	}
802	if ((hotdata = (hotfile_data_t *)hfsmp->hfc_recdata) == NULL) {
803		return (0);
804	}
805	ffp = VTOF(vp);
806	cp = VTOC(vp);
807
808	if ((ffp->ff_bytesread == 0) ||
809	    (ffp->ff_blocks == 0) ||
810	    (ffp->ff_size == 0) ||
811	    (ffp->ff_blocks > hotdata->maxblocks) ||
812	    (cp->c_flag & (C_DELETED | C_NOEXISTS)) ||
813	    (cp->c_bsdflags & UF_NODUMP) ||
814	    (cp->c_atime < hfsmp->hfc_timebase)) {
815		return (0);
816	}
817
818	temperature = ffp->ff_bytesread / ffp->ff_size;
819	if (temperature < hotdata->threshold) {
820		return (0);
821	}
822	/*
823	 * If there is room or this file is hotter than
824	 * the coldest one then add it to the list.
825	 *
826	 */
827	if ((hotdata->activefiles < hfsmp->hfc_maxfiles) ||
828	    (hotdata->coldest == NULL) ||
829	    (temperature > hotdata->coldest->temperature)) {
830		++hotdata->refcount;
831		entry = hf_getnewentry(hotdata);
832		entry->temperature = temperature;
833		entry->fileid = cp->c_fileid;
834		entry->blocks = ffp->ff_blocks;
835		hf_insert(hotdata, entry);
836		--hotdata->refcount;
837	}
838
839	return (0);
840}
841
842/*
843 * Remove a hot file from the recording list.
844 *
845 * This can happen when a hot file becomes
846 * an active vnode (active hot files are
847 * not kept in the recording list until the
848 * end of the recording period).
849 *
850 * Note: the cnode is locked on entry.
851 */
852int
853hfs_removehotfile(struct vnode *vp)
854{
855	hotfile_data_t *hotdata;
856	hfsmount_t *hfsmp;
857	cnode_t *cp;
858	filefork_t *ffp;
859	u_int32_t temperature;
860
861	hfsmp = VTOHFS(vp);
862	if (hfsmp->hfc_stage != HFC_RECORDING)
863		return (0);
864
865	if ((!vnode_isreg(vp) && !vnode_islnk(vp)) || vnode_issystem(vp)) {
866		return (0);
867	}
868
869	ffp = VTOF(vp);
870	cp = VTOC(vp);
871
872	if ((ffp->ff_bytesread == 0) || (ffp->ff_blocks == 0) ||
873	    (ffp->ff_size == 0) || (cp->c_atime < hfsmp->hfc_timebase)) {
874		return (0);
875	}
876
877	lck_mtx_lock(&hfsmp->hfc_mutex);
878	if (hfsmp->hfc_stage != HFC_RECORDING)
879		goto out;
880	if ((hotdata = (hotfile_data_t *)hfsmp->hfc_recdata) == NULL)
881		goto out;
882
883	temperature = ffp->ff_bytesread / ffp->ff_size;
884	if (temperature < hotdata->threshold)
885		goto out;
886
887	if (hotdata->coldest && (temperature >= hotdata->coldest->temperature)) {
888		++hotdata->refcount;
889		hf_delete(hotdata, VTOC(vp)->c_fileid, temperature);
890		--hotdata->refcount;
891	}
892out:
893	lck_mtx_unlock(&hfsmp->hfc_mutex);
894	return (0);
895}
896
897
898/*
899 *========================================================================
900 *                     HOT FILE MAINTENANCE ROUTINES
901 *========================================================================
902 */
903
904static int
905hotfiles_collect_callback(struct vnode *vp, __unused void *cargs)
906{
907        if ((vnode_isreg(vp) || vnode_islnk(vp)) && !vnode_issystem(vp))
908	        (void) hfs_addhotfile_internal(vp);
909
910	return (VNODE_RETURNED);
911}
912
913/*
914 * Add all active hot files to the recording list.
915 */
916static int
917hotfiles_collect(struct hfsmount *hfsmp)
918{
919	struct mount *mp = HFSTOVFS(hfsmp);
920
921	if (vfs_busy(mp, LK_NOWAIT))
922		return (0);
923
924	/*
925	 * hotfiles_collect_callback will be called for each vnode
926	 * hung off of this mount point
927	 * the vnode will be
928	 * properly referenced and unreferenced around the callback
929	 */
930	vnode_iterate(mp, 0, hotfiles_collect_callback, (void *)NULL);
931
932	vfs_unbusy(mp);
933
934	return (0);
935}
936
937
938/*
939 * Update the data of a btree record
940 * This is called from within BTUpdateRecord.
941 */
942static int
943update_callback(const HotFileKey *key, u_int32_t *data, u_int32_t *state)
944{
945	if (key->temperature == HFC_LOOKUPTAG)
946		*data = *state;
947	return (0);
948}
949
950/*
951 * Identify files already in hot area.
952 */
953static int
954hotfiles_refine(struct hfsmount *hfsmp)
955{
956	BTreeIterator * iterator = NULL;
957	struct mount *mp;
958	filefork_t * filefork;
959	hotfilelist_t  *listp;
960	FSBufferDescriptor  record;
961	HotFileKey * key;
962	u_int32_t  data;
963	int  i;
964	int  error = 0;
965
966
967	if ((listp = (hotfilelist_t  *)hfsmp->hfc_recdata) == NULL)
968		return (0);
969
970	mp = HFSTOVFS(hfsmp);
971
972	MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
973	if (iterator == NULL) {
974		error = ENOMEM;
975		goto out;
976	}
977	bzero(iterator, sizeof(*iterator));
978	key = (HotFileKey*) &iterator->key;
979
980	record.bufferAddress = &data;
981	record.itemSize = sizeof(u_int32_t);
982	record.itemCount = 1;
983
984	if (hfs_start_transaction(hfsmp) != 0) {
985	    error = EINVAL;
986	    goto out;
987	}
988	if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK) != 0) {
989		error = EPERM;
990		goto out1;
991	}
992	filefork = VTOF(hfsmp->hfc_filevp);
993
994	for (i = 0; i < listp->hfl_count; ++i) {
995		/*
996		 * Check if entry (thread) is already in hot area.
997		 */
998		key->keyLength = HFC_KEYLENGTH;
999		key->temperature = HFC_LOOKUPTAG;
1000		key->fileID = listp->hfl_hotfile[i].hf_fileid;
1001		key->forkType = 0;
1002		(void) BTInvalidateHint(iterator);
1003		if (BTSearchRecord(filefork, iterator, &record, NULL, iterator) != 0) {
1004			continue;  /* not in hot area, so skip */
1005		}
1006
1007		/*
1008		 * Update thread entry with latest temperature.
1009		 */
1010		error = BTUpdateRecord(filefork, iterator,
1011				(IterateCallBackProcPtr)update_callback,
1012				&listp->hfl_hotfile[i].hf_temperature);
1013		if (error) {
1014			printf("hfs: hotfiles_refine: BTUpdateRecord failed %d (file %d)\n", error, key->fileID);
1015			error = MacToVFSError(error);
1016		//	break;
1017		}
1018		/*
1019		 * Re-key entry with latest temperature.
1020		 */
1021		key->keyLength = HFC_KEYLENGTH;
1022		key->temperature = data;
1023		key->fileID = listp->hfl_hotfile[i].hf_fileid;
1024		key->forkType = 0;
1025		/* Pick up record data. */
1026		(void) BTInvalidateHint(iterator);
1027		(void) BTSearchRecord(filefork, iterator, &record, NULL, iterator);
1028		error = BTDeleteRecord(filefork, iterator);
1029		if (error) {
1030			printf("hfs: hotfiles_refine: BTDeleteRecord failed %d (file %d)\n", error, key->fileID);
1031			error = MacToVFSError(error);
1032			break;
1033		}
1034		key->keyLength = HFC_KEYLENGTH;
1035		key->temperature = listp->hfl_hotfile[i].hf_temperature;
1036		key->fileID = listp->hfl_hotfile[i].hf_fileid;
1037		key->forkType = 0;
1038		error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
1039		if (error) {
1040			printf("hfs: hotfiles_refine: BTInsertRecord failed %d (file %d)\n", error, key->fileID);
1041			error = MacToVFSError(error);
1042			break;
1043		}
1044
1045		/*
1046		 * Invalidate this entry in the list.
1047		 */
1048		listp->hfl_hotfile[i].hf_temperature = 0;
1049		listp->hfl_totalblocks -= listp->hfl_hotfile[i].hf_blocks;
1050
1051	} /* end for */
1052
1053	(void) BTFlushPath(filefork);
1054	hfs_unlock(VTOC(hfsmp->hfc_filevp));
1055
1056out1:
1057	hfs_end_transaction(hfsmp);
1058out:
1059	if (iterator)
1060		FREE(iterator, M_TEMP);
1061	return (error);
1062}
1063
1064/*
1065 * Move new hot files into hot area.
1066 *
1067 * Requires that the hfc_mutex be held.
1068 */
1069static int
1070hotfiles_adopt(struct hfsmount *hfsmp)
1071{
1072	BTreeIterator * iterator = NULL;
1073	struct vnode *vp;
1074	filefork_t * filefork;
1075	hotfilelist_t  *listp;
1076	FSBufferDescriptor  record;
1077	HotFileKey * key;
1078	u_int32_t  data;
1079	enum hfc_stage stage;
1080	int  fileblocks;
1081	int  blksmoved;
1082	int  i;
1083	int  last;
1084	int  error = 0;
1085	int  startedtrans = 0;
1086
1087	if ((listp = (hotfilelist_t  *)hfsmp->hfc_recdata) == NULL)
1088		return (0);
1089
1090	if (hfsmp->hfc_stage != HFC_ADOPTION) {
1091		return (EBUSY);
1092	}
1093	if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK) != 0) {
1094		return (EPERM);
1095	}
1096
1097	MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
1098	if (iterator == NULL) {
1099		hfs_unlock(VTOC(hfsmp->hfc_filevp));
1100		return (ENOMEM);
1101	}
1102
1103	stage = hfsmp->hfc_stage;
1104	hfsmp->hfc_stage = HFC_BUSY;
1105
1106	blksmoved = 0;
1107	last = listp->hfl_next + HFC_FILESPERSYNC;
1108	if (last > listp->hfl_count)
1109		last = listp->hfl_count;
1110
1111	bzero(iterator, sizeof(*iterator));
1112	key = (HotFileKey*) &iterator->key;
1113	key->keyLength = HFC_KEYLENGTH;
1114
1115	record.bufferAddress = &data;
1116	record.itemSize = sizeof(u_int32_t);
1117	record.itemCount = 1;
1118
1119	filefork = VTOF(hfsmp->hfc_filevp);
1120
1121	for (i = listp->hfl_next; (i < last) && (blksmoved < HFC_BLKSPERSYNC); ++i) {
1122		/*
1123		 * Skip invalid entries (already in hot area).
1124		 */
1125		if (listp->hfl_hotfile[i].hf_temperature == 0) {
1126				listp->hfl_next++;
1127				continue;
1128		}
1129		/*
1130		 * Acquire a vnode for this file.
1131		 */
1132		error = hfs_vget(hfsmp, listp->hfl_hotfile[i].hf_fileid, &vp, 0, 0);
1133		if (error) {
1134			if (error == ENOENT) {
1135				error = 0;
1136				listp->hfl_next++;
1137				continue;  /* stale entry, go to next */
1138			}
1139			break;
1140		}
1141		if (!vnode_isreg(vp) && !vnode_islnk(vp)) {
1142			printf("hfs: hotfiles_adopt: huh, not a file %d (%d)\n", listp->hfl_hotfile[i].hf_fileid, VTOC(vp)->c_cnid);
1143			hfs_unlock(VTOC(vp));
1144			vnode_put(vp);
1145			listp->hfl_hotfile[i].hf_temperature = 0;
1146			listp->hfl_next++;
1147			continue;  /* stale entry, go to next */
1148		}
1149		if (hotextents(hfsmp, &VTOF(vp)->ff_extents[0])) {
1150			hfs_unlock(VTOC(vp));
1151			vnode_put(vp);
1152			listp->hfl_hotfile[i].hf_temperature = 0;
1153			listp->hfl_next++;
1154			listp->hfl_totalblocks -= listp->hfl_hotfile[i].hf_blocks;
1155			continue;  /* stale entry, go to next */
1156		}
1157		fileblocks = VTOF(vp)->ff_blocks;
1158		if (fileblocks > hfsmp->hfs_hotfile_freeblks) {
1159			hfs_unlock(VTOC(vp));
1160			vnode_put(vp);
1161			listp->hfl_next++;
1162			listp->hfl_totalblocks -= fileblocks;
1163			continue;  /* entry too big, go to next */
1164		}
1165
1166		if ((blksmoved > 0) &&
1167		    (blksmoved + fileblocks) > HFC_BLKSPERSYNC) {
1168			hfs_unlock(VTOC(vp));
1169			vnode_put(vp);
1170			break;  /* adopt this entry the next time around */
1171		}
1172		if (VTOC(vp)->c_desc.cd_nameptr)
1173			data = *(const u_int32_t *)(VTOC(vp)->c_desc.cd_nameptr);
1174		else
1175			data = 0x3f3f3f3f;
1176
1177		error = hfs_relocate(vp, hfsmp->hfs_hotfile_start, kauth_cred_get(), current_proc());
1178		hfs_unlock(VTOC(vp));
1179		vnode_put(vp);
1180		if (error) {
1181			/* Move on to next item. */
1182			listp->hfl_next++;
1183			continue;
1184		}
1185		/* Keep hot file free space current. */
1186		hfsmp->hfs_hotfile_freeblks -= fileblocks;
1187		listp->hfl_totalblocks -= fileblocks;
1188
1189		/* Insert hot file entry */
1190		key->keyLength   = HFC_KEYLENGTH;
1191		key->temperature = listp->hfl_hotfile[i].hf_temperature;
1192		key->fileID      = listp->hfl_hotfile[i].hf_fileid;
1193		key->forkType    = 0;
1194
1195		/* Start a new transaction before calling BTree code. */
1196		if (hfs_start_transaction(hfsmp) != 0) {
1197		    error = EINVAL;
1198		    break;
1199		}
1200		startedtrans = 1;
1201
1202		error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
1203		if (error) {
1204			printf("hfs: hotfiles_adopt: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID);
1205			error = MacToVFSError(error);
1206			stage = HFC_IDLE;
1207			break;
1208		}
1209
1210		/* Insert thread record */
1211		key->keyLength = HFC_KEYLENGTH;
1212		key->temperature = HFC_LOOKUPTAG;
1213		key->fileID = listp->hfl_hotfile[i].hf_fileid;
1214		key->forkType = 0;
1215		data = listp->hfl_hotfile[i].hf_temperature;
1216		error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
1217		if (error) {
1218			printf("hfs: hotfiles_adopt: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID);
1219			error = MacToVFSError(error);
1220			stage = HFC_IDLE;
1221			break;
1222		}
1223		(void) BTFlushPath(filefork);
1224
1225		/* Transaction complete. */
1226		if (startedtrans) {
1227		    hfs_end_transaction(hfsmp);
1228		    startedtrans = 0;
1229		}
1230
1231		blksmoved += fileblocks;
1232		listp->hfl_next++;
1233		if (listp->hfl_next >= listp->hfl_count) {
1234			break;
1235		}
1236		if (hfsmp->hfs_hotfile_freeblks <= 0) {
1237#if HFC_VERBOSE
1238			printf("hfs: hotfiles_adopt: free space exhausted (%d)\n", hfsmp->hfs_hotfile_freeblks);
1239#endif
1240			break;
1241		}
1242	} /* end for */
1243
1244#if HFC_VERBOSE
1245	printf("hfs: hotfiles_adopt: [%d] adopted %d blocks (%d left)\n", listp->hfl_next, blksmoved, listp->hfl_totalblocks);
1246#endif
1247	/* Finish any outstanding transactions. */
1248	if (startedtrans) {
1249		(void) BTFlushPath(filefork);
1250		hfs_end_transaction(hfsmp);
1251		startedtrans = 0;
1252	}
1253	hfs_unlock(VTOC(hfsmp->hfc_filevp));
1254
1255	if ((listp->hfl_next >= listp->hfl_count) || (hfsmp->hfs_hotfile_freeblks <= 0)) {
1256#if HFC_VERBOSE
1257		printf("hfs: hotfiles_adopt: all done relocating %d files\n", listp->hfl_count);
1258		printf("hfs: hotfiles_adopt: %d blocks free in hot file band\n", hfsmp->hfs_hotfile_freeblks);
1259#endif
1260		stage = HFC_IDLE;
1261	}
1262	FREE(iterator, M_TEMP);
1263
1264	if (stage != HFC_ADOPTION && hfsmp->hfc_filevp) {
1265		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
1266		hfsmp->hfc_filevp = NULL;
1267	}
1268	hfsmp->hfc_stage = stage;
1269	wakeup((caddr_t)&hfsmp->hfc_stage);
1270	return (error);
1271}
1272
1273/*
1274 * Reclaim space by evicting the coldest files.
1275 *
1276 * Requires that the hfc_mutex be held.
1277 */
1278static int
1279hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx)
1280{
1281	BTreeIterator * iterator = NULL;
1282	struct vnode *vp;
1283	HotFileKey * key;
1284	filefork_t * filefork;
1285	hotfilelist_t  *listp;
1286	enum hfc_stage stage;
1287	u_int32_t savedtemp;
1288	int  blksmoved;
1289	int  filesmoved;
1290	int  fileblocks;
1291	int  error = 0;
1292	int  startedtrans = 0;
1293	int  bt_op;
1294
1295	if (hfsmp->hfc_stage != HFC_EVICTION) {
1296		return (EBUSY);
1297	}
1298
1299	if ((listp = (hotfilelist_t  *)hfsmp->hfc_recdata) == NULL)
1300		return (0);
1301
1302	if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK) != 0) {
1303		return (EPERM);
1304	}
1305
1306	MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
1307	if (iterator == NULL) {
1308		hfs_unlock(VTOC(hfsmp->hfc_filevp));
1309		return (ENOMEM);
1310	}
1311
1312	stage = hfsmp->hfc_stage;
1313	hfsmp->hfc_stage = HFC_BUSY;
1314
1315	filesmoved = blksmoved = 0;
1316	bt_op = kBTreeFirstRecord;
1317
1318	bzero(iterator, sizeof(*iterator));
1319	key = (HotFileKey*) &iterator->key;
1320
1321	filefork = VTOF(hfsmp->hfc_filevp);
1322
1323	while (listp->hfl_reclaimblks > 0 &&
1324	       blksmoved < HFC_BLKSPERSYNC &&
1325	       filesmoved < HFC_FILESPERSYNC) {
1326
1327		/*
1328		 * Obtain the first record (ie the coldest one).
1329		 */
1330		if (BTIterateRecord(filefork, bt_op, iterator, NULL, NULL) != 0) {
1331#if HFC_VERBOSE
1332			printf("hfs: hotfiles_evict: no more records\n");
1333#endif
1334			error = 0;
1335			stage = HFC_ADOPTION;
1336			break;
1337		}
1338		if (key->keyLength != HFC_KEYLENGTH) {
1339			printf("hfs: hotfiles_evict: invalid key length %d\n", key->keyLength);
1340			error = EFTYPE;
1341			break;
1342		}
1343		if (key->temperature == HFC_LOOKUPTAG) {
1344#if HFC_VERBOSE
1345			printf("hfs: hotfiles_evict: ran into thread records\n");
1346#endif
1347			error = 0;
1348			stage = HFC_ADOPTION;
1349			break;
1350		}
1351		/*
1352		 * Aquire the vnode for this file.
1353		 */
1354		error = hfs_vget(hfsmp, key->fileID, &vp, 0, 0);
1355		if (error) {
1356			if (error == ENOENT) {
1357				goto delete;  /* stale entry, go to next */
1358			} else {
1359				printf("hfs: hotfiles_evict: err %d getting file %d\n",
1360				       error, key->fileID);
1361			}
1362			break;
1363		}
1364		if (!vnode_isreg(vp) && !vnode_islnk(vp)) {
1365			printf("hfs: hotfiles_evict: huh, not a file %d\n", key->fileID);
1366			hfs_unlock(VTOC(vp));
1367			vnode_put(vp);
1368			goto delete;  /* invalid entry, go to next */
1369		}
1370		fileblocks = VTOF(vp)->ff_blocks;
1371		if ((blksmoved > 0) &&
1372		    (blksmoved + fileblocks) > HFC_BLKSPERSYNC) {
1373			hfs_unlock(VTOC(vp));
1374			vnode_put(vp);
1375			break;
1376		}
1377		/*
1378		 * Make sure file is in the hot area.
1379		 */
1380		if (!hotextents(hfsmp, &VTOF(vp)->ff_extents[0])) {
1381#if HFC_VERBOSE
1382			printf("hfs: hotfiles_evict: file %d isn't hot!\n", key->fileID);
1383#endif
1384			hfs_unlock(VTOC(vp));
1385			vnode_put(vp);
1386			goto delete;  /* stale entry, go to next */
1387		}
1388
1389		/*
1390		 * Relocate file out of hot area.
1391		 */
1392		error = hfs_relocate(vp, HFSTOVCB(hfsmp)->nextAllocation, vfs_context_ucred(ctx), vfs_context_proc(ctx));
1393		if (error) {
1394			printf("hfs: hotfiles_evict: err %d relocating file %d\n", error, key->fileID);
1395			hfs_unlock(VTOC(vp));
1396			vnode_put(vp);
1397			bt_op = kBTreeNextRecord;
1398			goto next;  /* go to next */
1399		}
1400
1401		//
1402		// We do not believe that this call to hfs_fsync() is
1403		// necessary and it causes a journal transaction
1404		// deadlock so we are removing it.
1405		//
1406		// (void) hfs_fsync(vp, MNT_WAIT, 0, p);
1407
1408		hfs_unlock(VTOC(vp));
1409		vnode_put(vp);
1410
1411		hfsmp->hfs_hotfile_freeblks += fileblocks;
1412		listp->hfl_reclaimblks -= fileblocks;
1413		if (listp->hfl_reclaimblks < 0)
1414			listp->hfl_reclaimblks = 0;
1415		blksmoved += fileblocks;
1416		filesmoved++;
1417delete:
1418		/* Start a new transaction before calling BTree code. */
1419		if (hfs_start_transaction(hfsmp) != 0) {
1420		    error = EINVAL;
1421		    break;
1422		}
1423		startedtrans = 1;
1424
1425		error = BTDeleteRecord(filefork, iterator);
1426		if (error) {
1427			error = MacToVFSError(error);
1428			break;
1429		}
1430		savedtemp = key->temperature;
1431		key->temperature = HFC_LOOKUPTAG;
1432		error = BTDeleteRecord(filefork, iterator);
1433		if (error) {
1434			error = MacToVFSError(error);
1435			break;
1436		}
1437		key->temperature = savedtemp;
1438next:
1439		(void) BTFlushPath(filefork);
1440
1441		/* Transaction complete. */
1442		if (startedtrans) {
1443			hfs_end_transaction(hfsmp);
1444			startedtrans = 0;
1445		}
1446
1447	} /* end while */
1448
1449#if HFC_VERBOSE
1450	printf("hfs: hotfiles_evict: moved %d files (%d blks, %d to go)\n", filesmoved, blksmoved, listp->hfl_reclaimblks);
1451#endif
1452	/* Finish any outstanding transactions. */
1453	if (startedtrans) {
1454		(void) BTFlushPath(filefork);
1455		hfs_end_transaction(hfsmp);
1456		startedtrans = 0;
1457	}
1458	hfs_unlock(VTOC(hfsmp->hfc_filevp));
1459
1460	/*
1461	 * Move to next stage when finished.
1462	 */
1463	if (listp->hfl_reclaimblks <= 0) {
1464		stage = HFC_ADOPTION;
1465#if HFC_VERBOSE
1466		printf("hfs: hotfiles_evict: %d blocks free in hot file band\n", hfsmp->hfs_hotfile_freeblks);
1467#endif
1468	}
1469	FREE(iterator, M_TEMP);
1470	hfsmp->hfc_stage = stage;
1471	wakeup((caddr_t)&hfsmp->hfc_stage);
1472	return (error);
1473}
1474
1475/*
1476 * Age the existing records in the hot files b-tree.
1477 */
1478static int
1479hotfiles_age(struct hfsmount *hfsmp)
1480{
1481	BTreeInfoRec  btinfo;
1482	BTreeIterator * iterator = NULL;
1483	BTreeIterator * prev_iterator;
1484	FSBufferDescriptor  record;
1485	FSBufferDescriptor  prev_record;
1486	HotFileKey * key;
1487	HotFileKey * prev_key;
1488	filefork_t * filefork;
1489	u_int32_t  data;
1490	u_int32_t  prev_data;
1491	u_int32_t  newtemp;
1492	int  error;
1493	int  i;
1494	int  numrecs;
1495	int  aged = 0;
1496	u_int16_t  reclen;
1497
1498
1499	MALLOC(iterator, BTreeIterator *, 2 * sizeof(*iterator), M_TEMP, M_WAITOK);
1500	if (iterator == NULL) {
1501		error = ENOMEM;
1502		goto out2;
1503	}
1504	bzero(iterator, 2 * sizeof(*iterator));
1505	key = (HotFileKey*) &iterator->key;
1506
1507	prev_iterator = &iterator[1];
1508	prev_key = (HotFileKey*) &prev_iterator->key;
1509
1510	record.bufferAddress = &data;
1511	record.itemSize = sizeof(data);
1512	record.itemCount = 1;
1513	prev_record.bufferAddress = &prev_data;
1514	prev_record.itemSize = sizeof(prev_data);
1515	prev_record.itemCount = 1;
1516
1517	/*
1518	 * Capture b-tree changes inside a transaction
1519	 */
1520	if (hfs_start_transaction(hfsmp) != 0) {
1521	    error = EINVAL;
1522	    goto out2;
1523	}
1524	if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK) != 0) {
1525		error = EPERM;
1526		goto out1;
1527	}
1528	filefork = VTOF(hfsmp->hfc_filevp);
1529
1530	error = BTGetInformation(filefork, 0, &btinfo);
1531	if (error) {
1532		error = MacToVFSError(error);
1533		goto out;
1534	}
1535	if (btinfo.numRecords < 2) {
1536		error = 0;
1537		goto out;
1538	}
1539
1540	/* Only want 1st half of leaf records */
1541	numrecs = (btinfo.numRecords /= 2) - 1;
1542
1543	error = BTIterateRecord(filefork, kBTreeFirstRecord, iterator, &record, &reclen);
1544	if (error) {
1545		printf("hfs_agehotfiles: BTIterateRecord: %d\n", error);
1546		error = MacToVFSError(error);
1547		goto out;
1548	}
1549	bcopy(iterator, prev_iterator, sizeof(BTreeIterator));
1550	prev_data = data;
1551
1552	for (i = 0; i < numrecs; ++i) {
1553		error = BTIterateRecord(filefork, kBTreeNextRecord, iterator, &record, &reclen);
1554		if (error == 0) {
1555			if (key->temperature < prev_key->temperature) {
1556				printf("hfs_agehotfiles: out of order keys!\n");
1557				error = EFTYPE;
1558				break;
1559			}
1560			if (reclen != sizeof(data)) {
1561				printf("hfs_agehotfiles: invalid record length %d\n", reclen);
1562				error = EFTYPE;
1563				break;
1564			}
1565			if (key->keyLength != HFC_KEYLENGTH) {
1566				printf("hfs_agehotfiles: invalid key length %d\n", key->keyLength);
1567				error = EFTYPE;
1568				break;
1569			}
1570		} else if ((error == fsBTEndOfIterationErr || error == fsBTRecordNotFoundErr) &&
1571		    (i == (numrecs - 1))) {
1572			error = 0;
1573		} else if (error) {
1574			printf("hfs_agehotfiles: %d of %d BTIterateRecord: %d\n", i, numrecs, error);
1575			error = MacToVFSError(error);
1576			break;
1577		}
1578		if (prev_key->temperature == HFC_LOOKUPTAG) {
1579#if HFC_VERBOSE
1580			printf("hfs_agehotfiles: ran into thread record\n");
1581#endif
1582			error = 0;
1583			break;
1584		}
1585		error = BTDeleteRecord(filefork, prev_iterator);
1586		if (error) {
1587			printf("hfs_agehotfiles: BTDeleteRecord failed %d (file %d)\n", error, prev_key->fileID);
1588			error = MacToVFSError(error);
1589			break;
1590		}
1591
1592		/* Age by halving the temperature (floor = 4) */
1593		newtemp = MAX(prev_key->temperature >> 1, 4);
1594		prev_key->temperature = newtemp;
1595
1596		error = BTInsertRecord(filefork, prev_iterator, &prev_record, prev_record.itemSize);
1597		if (error) {
1598			printf("hfs_agehotfiles: BTInsertRecord failed %d (file %d)\n", error, prev_key->fileID);
1599			error = MacToVFSError(error);
1600			break;
1601		}
1602		++aged;
1603		/*
1604		 * Update thread entry with latest temperature.
1605		 */
1606		prev_key->temperature = HFC_LOOKUPTAG;
1607		error = BTUpdateRecord(filefork, prev_iterator,
1608				(IterateCallBackProcPtr)update_callback,
1609				&newtemp);
1610		if (error) {
1611			printf("hfs_agehotfiles: %d of %d BTUpdateRecord failed %d (file %d, %d)\n",
1612				i, numrecs, error, prev_key->fileID, newtemp);
1613			error = MacToVFSError(error);
1614		//	break;
1615		}
1616
1617		bcopy(iterator, prev_iterator, sizeof(BTreeIterator));
1618		prev_data = data;
1619
1620	} /* end for */
1621
1622#if HFC_VERBOSE
1623	if (error == 0)
1624		printf("hfs_agehotfiles: aged %d records out of %d\n", aged, btinfo.numRecords);
1625#endif
1626	(void) BTFlushPath(filefork);
1627out:
1628	hfs_unlock(VTOC(hfsmp->hfc_filevp));
1629out1:
1630	hfs_end_transaction(hfsmp);
1631out2:
1632	if (iterator)
1633		FREE(iterator, M_TEMP);
1634	return (error);
1635}
1636
1637/*
1638 * Return true if any blocks (or all blocks if all is true)
1639 * are contained in the hot file region.
1640 */
1641static int
1642hotextents(struct hfsmount *hfsmp, HFSPlusExtentDescriptor * extents)
1643{
1644	u_int32_t  b1, b2;
1645	int  i;
1646	int  inside = 0;
1647
1648	for (i = 0; i < kHFSPlusExtentDensity; ++i) {
1649		b1 = extents[i].startBlock;
1650		if (b1 == 0)
1651			break;
1652		b2 = b1 + extents[i].blockCount - 1;
1653		if ((b1 >= hfsmp->hfs_hotfile_start &&
1654		     b2 <= hfsmp->hfs_hotfile_end) ||
1655		    (b1 < hfsmp->hfs_hotfile_end &&
1656		     b2 > hfsmp->hfs_hotfile_end)) {
1657			inside = 1;
1658			break;
1659		}
1660	}
1661	return (inside);
1662}
1663
1664
1665/*
1666 *========================================================================
1667 *                       HOT FILE B-TREE ROUTINES
1668 *========================================================================
1669 */
1670
1671/*
1672 * Open the hot files b-tree for writing.
1673 *
1674 * On successful exit the vnode has a reference but not an iocount.
1675 */
1676static int
1677hfc_btree_open(struct hfsmount *hfsmp, struct vnode **vpp)
1678{
1679	proc_t p;
1680	struct vnode *vp;
1681	struct cat_desc  cdesc;
1682	struct cat_attr  cattr;
1683	struct cat_fork  cfork;
1684	static char filename[] = HFC_FILENAME;
1685	int  error;
1686	int  retry = 0;
1687	int lockflags;
1688	int newvnode_flags = 0;
1689
1690	*vpp = NULL;
1691	p = current_proc();
1692
1693	bzero(&cdesc, sizeof(cdesc));
1694	cdesc.cd_parentcnid = kRootDirID;
1695	cdesc.cd_nameptr = (const u_int8_t *)filename;
1696	cdesc.cd_namelen = strlen(filename);
1697
1698	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1699
1700	error = cat_lookup(hfsmp, &cdesc, 0, &cdesc, &cattr, &cfork, NULL);
1701
1702	hfs_systemfile_unlock(hfsmp, lockflags);
1703
1704	if (error) {
1705		printf("hfs: hfc_btree_open: cat_lookup error %d\n", error);
1706		return (error);
1707	}
1708again:
1709	cdesc.cd_flags |= CD_ISMETA;
1710	error = hfs_getnewvnode(hfsmp, NULL, NULL, &cdesc, 0, &cattr,
1711							&cfork, &vp, &newvnode_flags);
1712	if (error) {
1713		printf("hfs: hfc_btree_open: hfs_getnewvnode error %d\n", error);
1714		cat_releasedesc(&cdesc);
1715		return (error);
1716	}
1717	if (!vnode_issystem(vp)) {
1718#if HFC_VERBOSE
1719		printf("hfs: hfc_btree_open: file has UBC, try again\n");
1720#endif
1721		hfs_unlock(VTOC(vp));
1722		vnode_recycle(vp);
1723		vnode_put(vp);
1724		if (retry++ == 0)
1725			goto again;
1726		else
1727			return (EBUSY);
1728	}
1729
1730	/* Open the B-tree file for writing... */
1731	error = BTOpenPath(VTOF(vp), (KeyCompareProcPtr) hfc_comparekeys);
1732	if (error) {
1733		printf("hfs: hfc_btree_open: BTOpenPath error %d\n", error);
1734		error = MacToVFSError(error);
1735	}
1736
1737	hfs_unlock(VTOC(vp));
1738	if (error == 0) {
1739		*vpp = vp;
1740		vnode_ref(vp);  /* keep a reference while its open */
1741	}
1742	vnode_put(vp);
1743
1744	if (!vnode_issystem(vp))
1745		panic("hfs: hfc_btree_open: not a system file (vp = %p)", vp);
1746
1747	return (error);
1748}
1749
1750/*
1751 * Close the hot files b-tree.
1752 *
1753 * On entry the vnode has a reference.
1754 */
1755static int
1756hfc_btree_close(struct hfsmount *hfsmp, struct vnode *vp)
1757{
1758	proc_t p = current_proc();
1759	int  error = 0;
1760
1761
1762	if (hfsmp->jnl) {
1763	    hfs_journal_flush(hfsmp, FALSE);
1764	}
1765
1766	if (vnode_get(vp) == 0) {
1767		error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1768		if (error == 0) {
1769			(void) hfs_fsync(vp, MNT_WAIT, 0, p);
1770			error = BTClosePath(VTOF(vp));
1771			hfs_unlock(VTOC(vp));
1772		}
1773		vnode_rele(vp);
1774		vnode_recycle(vp);
1775		vnode_put(vp);
1776	}
1777
1778	return (error);
1779}
1780
1781/*
1782 *  Create a hot files btree file.
1783 *
1784 */
1785static int
1786hfc_btree_create(struct hfsmount *hfsmp, unsigned int nodesize, unsigned int entries)
1787{
1788	struct vnode *dvp = NULL;
1789	struct vnode *vp = NULL;
1790	struct cnode *cp = NULL;
1791	vfs_context_t ctx = vfs_context_current();
1792	struct vnode_attr va;
1793	struct componentname cname;
1794	static char filename[] = HFC_FILENAME;
1795	int  error;
1796
1797	if (hfsmp->hfc_filevp)
1798		panic("hfs: hfc_btree_create: hfc_filevp exists (vp = %p)", hfsmp->hfc_filevp);
1799
1800	error = VFS_ROOT(HFSTOVFS(hfsmp), &dvp, ctx);
1801	if (error) {
1802		return (error);
1803	}
1804	cname.cn_nameiop = CREATE;
1805	cname.cn_flags = ISLASTCN;
1806	cname.cn_context = ctx;
1807	cname.cn_pnbuf = filename;
1808	cname.cn_pnlen = sizeof(filename);
1809	cname.cn_nameptr = filename;
1810	cname.cn_namelen = strlen(filename);
1811	cname.cn_hash = 0;
1812	cname.cn_consume = 0;
1813
1814	VATTR_INIT(&va);
1815	VATTR_SET(&va, va_type, VREG);
1816	VATTR_SET(&va, va_mode, S_IFREG | S_IRUSR | S_IWUSR);
1817	VATTR_SET(&va, va_uid, 0);
1818	VATTR_SET(&va, va_gid, 0);
1819
1820	if (hfs_start_transaction(hfsmp) != 0) {
1821	    error = EINVAL;
1822	    goto out;
1823	}
1824
1825	/* call ourselves directly, ignore the higher-level VFS file creation code */
1826	error = VNOP_CREATE(dvp, &vp, &cname, &va, ctx);
1827	if (error) {
1828		printf("hfs: error %d creating HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN);
1829		goto out;
1830	}
1831	if (dvp) {
1832		vnode_put(dvp);
1833		dvp = NULL;
1834	}
1835	if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
1836		goto out;
1837	}
1838	cp = VTOC(vp);
1839
1840	/* Don't use non-regular files or files with links. */
1841	if (!vnode_isreg(vp) || cp->c_linkcount != 1) {
1842		error = EFTYPE;
1843		goto out;
1844	}
1845
1846	printf("hfs: created HFBT on %s\n", HFSTOVCB(hfsmp)->vcbVN);
1847
1848	if (VTOF(vp)->ff_size < nodesize) {
1849		caddr_t  buffer;
1850		u_int16_t *index;
1851		u_int16_t  offset;
1852		BTNodeDescriptor  *ndp;
1853		BTHeaderRec  *bthp;
1854		HotFilesInfo *hotfileinfo;
1855		int  nodecnt;
1856		int  filesize;
1857		int  entirespernode;
1858
1859		/*
1860		 * Mark it invisible (truncate will pull these changes).
1861		 */
1862		((FndrFileInfo *)&cp->c_finderinfo[0])->fdFlags |=
1863			SWAP_BE16 (kIsInvisible + kNameLocked);
1864
1865		if (kmem_alloc(kernel_map, (vm_offset_t *)&buffer, nodesize)) {
1866			error = ENOMEM;
1867			goto out;
1868		}
1869		bzero(buffer, nodesize);
1870		index = (u_int16_t *)buffer;
1871
1872		entirespernode = (nodesize - sizeof(BTNodeDescriptor) - 2) /
1873				 (sizeof(HotFileKey) + 6);
1874		nodecnt = 2 + howmany(entries * 2, entirespernode);
1875		nodecnt = roundup(nodecnt, 8);
1876		filesize = nodecnt * nodesize;
1877
1878		/* FILL IN THE NODE DESCRIPTOR:  */
1879		ndp = (BTNodeDescriptor *)buffer;
1880		ndp->kind = kBTHeaderNode;
1881		ndp->numRecords = SWAP_BE16 (3);
1882		offset = sizeof(BTNodeDescriptor);
1883		index[(nodesize / 2) - 1] = SWAP_BE16 (offset);
1884
1885		/* FILL IN THE HEADER RECORD:  */
1886		bthp = (BTHeaderRec *)((u_int8_t *)buffer + offset);
1887		bthp->nodeSize     = SWAP_BE16 (nodesize);
1888		bthp->totalNodes   = SWAP_BE32 (filesize / nodesize);
1889		bthp->freeNodes    = SWAP_BE32 (nodecnt - 1);
1890		bthp->clumpSize    = SWAP_BE32 (filesize);
1891		bthp->btreeType    = kUserBTreeType; /* non-metadata */
1892		bthp->attributes  |= SWAP_BE32 (kBTBigKeysMask);
1893		bthp->maxKeyLength = SWAP_BE16 (HFC_KEYLENGTH);
1894		offset += sizeof(BTHeaderRec);
1895		index[(nodesize / 2) - 2] = SWAP_BE16 (offset);
1896
1897		/* FILL IN THE USER RECORD:  */
1898		hotfileinfo = (HotFilesInfo *)((u_int8_t *)buffer + offset);
1899		hotfileinfo->magic       = SWAP_BE32 (HFC_MAGIC);
1900		hotfileinfo->version     = SWAP_BE32 (HFC_VERSION);
1901		hotfileinfo->duration    = SWAP_BE32 (HFC_DEFAULT_DURATION);
1902		hotfileinfo->timebase    = 0;
1903		hotfileinfo->timeleft    = 0;
1904		hotfileinfo->threshold   = SWAP_BE32 (HFC_MINIMUM_TEMPERATURE);
1905		hotfileinfo->maxfileblks = SWAP_BE32 (HFC_MAXIMUM_FILESIZE / HFSTOVCB(hfsmp)->blockSize);
1906		hotfileinfo->maxfilecnt  = SWAP_BE32 (HFC_DEFAULT_FILE_COUNT);
1907		strlcpy((char *)hotfileinfo->tag, hfc_tag,
1908			sizeof hotfileinfo->tag);
1909		offset += kBTreeHeaderUserBytes;
1910		index[(nodesize / 2) - 3] = SWAP_BE16 (offset);
1911
1912		/* FILL IN THE MAP RECORD (only one node in use). */
1913		*((u_int8_t *)buffer + offset) = 0x80;
1914		offset += nodesize - sizeof(BTNodeDescriptor) - sizeof(BTHeaderRec)
1915				   - kBTreeHeaderUserBytes - (4 * sizeof(int16_t));
1916		index[(nodesize / 2) - 4] = SWAP_BE16 (offset);
1917
1918		vnode_setnoflush(vp);
1919		error = hfs_truncate(vp, (off_t)filesize, IO_NDELAY, 0, 0, ctx);
1920		if (error) {
1921			printf("hfs: error %d growing HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN);
1922			goto out;
1923		}
1924		cp->c_flag |= C_ZFWANTSYNC;
1925		cp->c_zftimeout = 1;
1926
1927		if (error == 0) {
1928			struct vnop_write_args args;
1929			uio_t auio;
1930
1931			auio = uio_create(1, 0, UIO_SYSSPACE, UIO_WRITE);
1932			uio_addiov(auio, (uintptr_t)buffer, nodesize);
1933
1934			args.a_desc = &vnop_write_desc;
1935			args.a_vp = vp;
1936			args.a_uio = auio;
1937			args.a_ioflag = 0;
1938			args.a_context = ctx;
1939
1940			hfs_unlock(cp);
1941			cp = NULL;
1942
1943			error = hfs_vnop_write(&args);
1944			if (error)
1945				printf("hfs: error %d writing HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN);
1946
1947			uio_free(auio);
1948		}
1949		kmem_free(kernel_map, (vm_offset_t)buffer, nodesize);
1950	}
1951out:
1952	hfs_end_transaction(hfsmp);
1953	if (dvp) {
1954		vnode_put(dvp);
1955	}
1956	if (vp) {
1957		if (cp)
1958			hfs_unlock(cp);
1959		vnode_recycle(vp);
1960		vnode_put(vp);
1961	}
1962	return (error);
1963}
1964
1965/*
1966 * Compare two hot file b-tree keys.
1967 *
1968 * Result:   +n  search key > trial key
1969 *            0  search key = trial key
1970 *           -n  search key < trial key
1971 */
1972static int
1973hfc_comparekeys(HotFileKey *searchKey, HotFileKey *trialKey)
1974{
1975	/*
1976	 * Compared temperatures first.
1977	 */
1978	if (searchKey->temperature == trialKey->temperature) {
1979		/*
1980		 * Temperatures are equal so compare file ids.
1981		 */
1982		if (searchKey->fileID == trialKey->fileID) {
1983			/*
1984			 * File ids are equal so compare fork types.
1985			 */
1986			if (searchKey->forkType == trialKey->forkType) {
1987				return (0);
1988			} else if (searchKey->forkType > trialKey->forkType) {
1989				return (1);
1990			}
1991		} else if (searchKey->fileID > trialKey->fileID) {
1992			return (1);
1993		}
1994	} else if (searchKey->temperature > trialKey->temperature) {
1995		return (1);
1996	}
1997
1998	return (-1);
1999}
2000
2001
2002/*
2003 *========================================================================
2004 *               HOT FILE DATA COLLECTING ROUTINES
2005 *========================================================================
2006 */
2007
2008/*
2009 * Lookup a hot file entry in the tree.
2010 */
2011#if HFC_DEBUG
2012static hotfile_entry_t *
2013hf_lookup(hotfile_data_t *hotdata, u_int32_t fileid, u_int32_t temperature)
2014{
2015	hotfile_entry_t *entry = hotdata->rootentry;
2016
2017	while (entry &&
2018	       entry->temperature != temperature &&
2019	       entry->fileid != fileid) {
2020
2021		if (temperature > entry->temperature)
2022			entry = entry->right;
2023		else if (temperature < entry->temperature)
2024			entry = entry->left;
2025		else if (fileid > entry->fileid)
2026			entry = entry->right;
2027		else
2028			entry = entry->left;
2029	}
2030	return (entry);
2031}
2032#endif
2033
2034/*
2035 * Insert a hot file entry into the tree.
2036 */
2037static void
2038hf_insert(hotfile_data_t *hotdata, hotfile_entry_t *newentry)
2039{
2040	hotfile_entry_t *entry = hotdata->rootentry;
2041	u_int32_t fileid = newentry->fileid;
2042	u_int32_t temperature = newentry->temperature;
2043
2044	if (entry == NULL) {
2045		hotdata->rootentry = newentry;
2046		hotdata->coldest = newentry;
2047		hotdata->activefiles++;
2048		return;
2049	}
2050
2051	while (entry) {
2052		if (temperature > entry->temperature) {
2053			if (entry->right)
2054				entry = entry->right;
2055			else {
2056				entry->right = newentry;
2057				break;
2058			}
2059		} else if (temperature < entry->temperature) {
2060			if (entry->left)
2061				entry = entry->left;
2062			else {
2063			    	entry->left = newentry;
2064				break;
2065			}
2066		} else if (fileid > entry->fileid) {
2067			if (entry->right)
2068				entry = entry->right;
2069			else {
2070	       			if (entry->fileid != fileid)
2071					entry->right = newentry;
2072				break;
2073			}
2074		} else {
2075			if (entry->left)
2076				entry = entry->left;
2077			else {
2078	       			if (entry->fileid != fileid)
2079			    		entry->left = newentry;
2080				break;
2081			}
2082		}
2083	}
2084
2085	hotdata->activefiles++;
2086}
2087
2088/*
2089 * Find the coldest entry in the tree.
2090 */
2091static hotfile_entry_t *
2092hf_coldest(hotfile_data_t *hotdata)
2093{
2094	hotfile_entry_t *entry = hotdata->rootentry;
2095
2096	if (entry) {
2097		while (entry->left)
2098			entry = entry->left;
2099	}
2100	return (entry);
2101}
2102
2103/*
2104 * Find the hottest entry in the tree.
2105 */
2106static hotfile_entry_t *
2107hf_hottest(hotfile_data_t *hotdata)
2108{
2109	hotfile_entry_t *entry = hotdata->rootentry;
2110
2111	if (entry) {
2112		while (entry->right)
2113			entry = entry->right;
2114	}
2115	return (entry);
2116}
2117
2118/*
2119 * Delete a hot file entry from the tree.
2120 */
2121static void
2122hf_delete(hotfile_data_t *hotdata, u_int32_t fileid, u_int32_t temperature)
2123{
2124	hotfile_entry_t *entry, *parent, *next;
2125
2126	parent = NULL;
2127	entry = hotdata->rootentry;
2128
2129	while (entry &&
2130	       entry->temperature != temperature &&
2131	       entry->fileid != fileid) {
2132
2133		parent = entry;
2134		if (temperature > entry->temperature)
2135			entry = entry->right;
2136		else if (temperature < entry->temperature)
2137			entry = entry->left;
2138		else if (fileid > entry->fileid)
2139			entry = entry->right;
2140		else
2141			entry = entry->left;
2142	}
2143
2144	if (entry) {
2145		/*
2146		 * Reorginize the sub-trees spanning from our entry.
2147		 */
2148		if ((next = entry->right)) {
2149			hotfile_entry_t *pnextl, *psub;
2150			/*
2151			 * Tree pruning: take the left branch of the
2152			 * current entry and place it at the lowest
2153			 * left branch of the current right branch
2154			 */
2155			psub = next;
2156
2157			/* Walk the Right/Left sub tree from current entry */
2158			while ((pnextl = psub->left))
2159				psub = pnextl;
2160
2161			/* Plug the old left tree to the new ->Right leftmost entry */
2162			psub->left = entry->left;
2163
2164		} else /* only left sub-tree, simple case */ {
2165			next = entry->left;
2166		}
2167		/*
2168		 * Now, plug the current entry sub tree to
2169		 * the good pointer of our parent entry.
2170		 */
2171		if (parent == NULL)
2172			hotdata->rootentry = next;
2173		else if (parent->left == entry)
2174			parent->left = next;
2175		else
2176			parent->right = next;
2177
2178		/* Place entry back on the free-list */
2179		entry->left = 0;
2180		entry->fileid = 0;
2181		entry->temperature = 0;
2182
2183		entry->right = hotdata->freelist;
2184		hotdata->freelist = entry;
2185		hotdata->activefiles--;
2186
2187		if (hotdata->coldest == entry || hotdata->coldest == NULL) {
2188			hotdata->coldest = hf_coldest(hotdata);
2189		}
2190
2191	}
2192}
2193
2194/*
2195 * Get a free hot file entry.
2196 */
2197static hotfile_entry_t *
2198hf_getnewentry(hotfile_data_t *hotdata)
2199{
2200	hotfile_entry_t * entry;
2201
2202	/*
2203	 * When the free list is empty then steal the coldest one
2204	 */
2205	if (hotdata->freelist == NULL) {
2206		entry = hf_coldest(hotdata);
2207		hf_delete(hotdata, entry->fileid, entry->temperature);
2208	}
2209	entry = hotdata->freelist;
2210	hotdata->freelist = entry->right;
2211	entry->right = 0;
2212
2213	return (entry);
2214}
2215
2216
2217/*
2218 * Generate a sorted list of hot files (hottest to coldest).
2219 *
2220 * As a side effect, every node in the hot file tree will be
2221 * deleted (moved to the free list).
2222 */
2223static void
2224hf_getsortedlist(hotfile_data_t * hotdata, hotfilelist_t *sortedlist)
2225{
2226	int i = 0;
2227	hotfile_entry_t *entry;
2228
2229	while ((entry = hf_hottest(hotdata)) != NULL) {
2230		sortedlist->hfl_hotfile[i].hf_fileid = entry->fileid;
2231		sortedlist->hfl_hotfile[i].hf_temperature = entry->temperature;
2232		sortedlist->hfl_hotfile[i].hf_blocks = entry->blocks;
2233		sortedlist->hfl_totalblocks += entry->blocks;
2234		++i;
2235
2236		hf_delete(hotdata, entry->fileid, entry->temperature);
2237	}
2238
2239	sortedlist->hfl_count = i;
2240
2241#if HFC_VERBOSE
2242	printf("hfs: hf_getsortedlist returned %d entries\n", i);
2243#endif
2244}
2245
2246
2247#if HFC_DEBUG
2248static void
2249hf_maxdepth(hotfile_entry_t * root, int depth, int *maxdepth)
2250{
2251	if (root) {
2252		depth++;
2253		if (depth > *maxdepth)
2254			*maxdepth = depth;
2255		hf_maxdepth(root->left, depth, maxdepth);
2256		hf_maxdepth(root->right, depth, maxdepth);
2257	}
2258}
2259
2260static void
2261hf_printtree(hotfile_entry_t * root)
2262{
2263	if (root) {
2264		hf_printtree(root->left);
2265		printf("hfs: temperature: % 8d, fileid %d\n", root->temperature, root->fileid);
2266		hf_printtree(root->right);
2267	}
2268}
2269#endif
2270