1168404Spjd/*
2168404Spjd * CDDL HEADER START
3168404Spjd *
4168404Spjd * The contents of this file are subject to the terms of the
5168404Spjd * Common Development and Distribution License (the "License").
6168404Spjd * You may not use this file except in compliance with the License.
7168404Spjd *
8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9168404Spjd * or http://www.opensolaris.org/os/licensing.
10168404Spjd * See the License for the specific language governing permissions
11168404Spjd * and limitations under the License.
12168404Spjd *
13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each
14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15168404Spjd * If applicable, add the following below this CDDL HEADER, with the
16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18168404Spjd *
19168404Spjd * CDDL HEADER END
20168404Spjd */
21243674Smm
22168404Spjd/*
23219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24265751Sdelphij * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
25168404Spjd */
26168404Spjd
27168404Spjd#include <stdio.h>
28249643Smm#include <unistd.h>
29168404Spjd#include <stdio_ext.h>
30168404Spjd#include <stdlib.h>
31168404Spjd#include <ctype.h>
32168404Spjd#include <sys/zfs_context.h>
33168404Spjd#include <sys/spa.h>
34168404Spjd#include <sys/spa_impl.h>
35168404Spjd#include <sys/dmu.h>
36168404Spjd#include <sys/zap.h>
37168404Spjd#include <sys/fs/zfs.h>
38168404Spjd#include <sys/zfs_znode.h>
39219089Spjd#include <sys/zfs_sa.h>
40219089Spjd#include <sys/sa.h>
41219089Spjd#include <sys/sa_impl.h>
42168404Spjd#include <sys/vdev.h>
43168404Spjd#include <sys/vdev_impl.h>
44168404Spjd#include <sys/metaslab_impl.h>
45168404Spjd#include <sys/dmu_objset.h>
46168404Spjd#include <sys/dsl_dir.h>
47168404Spjd#include <sys/dsl_dataset.h>
48168404Spjd#include <sys/dsl_pool.h>
49168404Spjd#include <sys/dbuf.h>
50168404Spjd#include <sys/zil.h>
51168404Spjd#include <sys/zil_impl.h>
52168404Spjd#include <sys/stat.h>
53168404Spjd#include <sys/resource.h>
54168404Spjd#include <sys/dmu_traverse.h>
55168404Spjd#include <sys/zio_checksum.h>
56168404Spjd#include <sys/zio_compress.h>
57185029Spjd#include <sys/zfs_fuid.h>
58208047Smm#include <sys/arc.h>
59219089Spjd#include <sys/ddt.h>
60243674Smm#include <sys/zfeature.h>
61249643Smm#include <zfs_comutil.h>
62185029Spjd#undef ZFS_MAXNAMELEN
63185029Spjd#undef verify
64185029Spjd#include <libzfs.h>
65168404Spjd
66244602Smm#define	ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ?	\
67244602Smm	zio_compress_table[(idx)].ci_name : "UNKNOWN")
68244602Smm#define	ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ?	\
69244602Smm	zio_checksum_table[(idx)].ci_name : "UNKNOWN")
70244602Smm#define	ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ?	\
71244602Smm	dmu_ot[(idx)].ot_name : DMU_OT_IS_VALID(idx) ?	\
72244602Smm	dmu_ot_byteswap[DMU_OT_BYTESWAP(idx)].ob_name : "UNKNOWN")
73244602Smm#define	ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) :		\
74244602Smm	(((idx) == DMU_OTN_ZAP_DATA || (idx) == DMU_OTN_ZAP_METADATA) ?	\
75244602Smm	DMU_OT_ZAP_OTHER : DMU_OT_NUMTYPES))
76219089Spjd
77219089Spjd#ifndef lint
78219089Spjdextern int zfs_recover;
79219089Spjd#else
80219089Spjdint zfs_recover;
81219089Spjd#endif
82219089Spjd
83168404Spjdconst char cmdname[] = "zdb";
84168404Spjduint8_t dump_opt[256];
85168404Spjd
86168404Spjdtypedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
87168404Spjd
88168404Spjdextern void dump_intent_log(zilog_t *);
89168404Spjduint64_t *zopt_object = NULL;
90168404Spjdint zopt_objects = 0;
91185029Spjdlibzfs_handle_t *g_zfs;
92263394Sdelphijuint64_t max_inflight = 200;
93168404Spjd
94168404Spjd/*
95168404Spjd * These libumem hooks provide a reasonable set of defaults for the allocator's
96168404Spjd * debugging facilities.
97168404Spjd */
98168404Spjdconst char *
99168404Spjd_umem_debug_init()
100168404Spjd{
101168404Spjd	return ("default,verbose"); /* $UMEM_DEBUG setting */
102168404Spjd}
103168404Spjd
104168404Spjdconst char *
105168404Spjd_umem_logging_init(void)
106168404Spjd{
107168404Spjd	return ("fail,contents"); /* $UMEM_LOGGING setting */
108168404Spjd}
109168404Spjd
110168404Spjdstatic void
111168404Spjdusage(void)
112168404Spjd{
113168404Spjd	(void) fprintf(stderr,
114263394Sdelphij	    "Usage: %s [-CumdibcsDvhLXFPA] [-t txg] [-e [-p path...]] "
115263394Sdelphij	    "[-U config] [-M inflight I/Os] poolname [object...]\n"
116263394Sdelphij	    "       %s [-divPA] [-e -p path...] [-U config] dataset "
117263394Sdelphij	    "[object...]\n"
118263394Sdelphij	    "       %s -m [-LXFPA] [-t txg] [-e [-p path...]] [-U config] "
119263394Sdelphij	    "poolname [vdev [metaslab...]]\n"
120263394Sdelphij	    "       %s -R [-A] [-e [-p path...]] poolname "
121263394Sdelphij	    "vdev:offset:size[:flags]\n"
122263394Sdelphij	    "       %s -S [-PA] [-e [-p path...]] [-U config] poolname\n"
123263394Sdelphij	    "       %s -l [-uA] device\n"
124263394Sdelphij	    "       %s -C [-A] [-U config]\n\n",
125219089Spjd	    cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
126168404Spjd
127219089Spjd	(void) fprintf(stderr, "    Dataset name must include at least one "
128219089Spjd	    "separator character '/' or '@'\n");
129219089Spjd	(void) fprintf(stderr, "    If dataset name is specified, only that "
130219089Spjd	    "dataset is dumped\n");
131219089Spjd	(void) fprintf(stderr, "    If object numbers are specified, only "
132219089Spjd	    "those objects are dumped\n\n");
133219089Spjd	(void) fprintf(stderr, "    Options to control amount of output:\n");
134219089Spjd	(void) fprintf(stderr, "        -u uberblock\n");
135219089Spjd	(void) fprintf(stderr, "        -d dataset(s)\n");
136219089Spjd	(void) fprintf(stderr, "        -i intent logs\n");
137219089Spjd	(void) fprintf(stderr, "        -C config (or cachefile if alone)\n");
138219089Spjd	(void) fprintf(stderr, "        -h pool history\n");
139219089Spjd	(void) fprintf(stderr, "        -b block statistics\n");
140219089Spjd	(void) fprintf(stderr, "        -m metaslabs\n");
141219089Spjd	(void) fprintf(stderr, "        -c checksum all metadata (twice for "
142209962Smm	    "all data) blocks\n");
143219089Spjd	(void) fprintf(stderr, "        -s report stats on zdb's I/O\n");
144219089Spjd	(void) fprintf(stderr, "        -D dedup statistics\n");
145219089Spjd	(void) fprintf(stderr, "        -S simulate dedup to measure effect\n");
146219089Spjd	(void) fprintf(stderr, "        -v verbose (applies to all others)\n");
147168404Spjd	(void) fprintf(stderr, "        -l dump label contents\n");
148209962Smm	(void) fprintf(stderr, "        -L disable leak tracking (do not "
149209962Smm	    "load spacemaps)\n");
150219089Spjd	(void) fprintf(stderr, "        -R read and display block from a "
151219089Spjd	    "device\n\n");
152219089Spjd	(void) fprintf(stderr, "    Below options are intended for use "
153219089Spjd	    "with other options (except -l):\n");
154219089Spjd	(void) fprintf(stderr, "        -A ignore assertions (-A), enable "
155219089Spjd	    "panic recovery (-AA) or both (-AAA)\n");
156219089Spjd	(void) fprintf(stderr, "        -F attempt automatic rewind within "
157219089Spjd	    "safe range of transaction groups\n");
158219089Spjd	(void) fprintf(stderr, "        -U <cachefile_path> -- use alternate "
159185029Spjd	    "cachefile\n");
160219089Spjd	(void) fprintf(stderr, "        -X attempt extreme rewind (does not "
161219089Spjd	    "work with dataset)\n");
162219089Spjd	(void) fprintf(stderr, "        -e pool is exported/destroyed/"
163219089Spjd	    "has altroot/not in a cachefile\n");
164219089Spjd	(void) fprintf(stderr, "        -p <path> -- use one or more with "
165219089Spjd	    "-e to specify path to vdev dir\n");
166235701Smm	(void) fprintf(stderr, "	-P print numbers in parseable form\n");
167219089Spjd	(void) fprintf(stderr, "        -t <txg> -- highest txg to use when "
168209962Smm	    "searching for uberblocks\n");
169263394Sdelphij	(void) fprintf(stderr, "        -M <number of inflight I/Os> -- "
170263394Sdelphij	    "specify the maximum number of checksumming I/Os [default is 200]");
171168404Spjd	(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
172168404Spjd	    "to make only that option verbose\n");
173168404Spjd	(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
174168404Spjd	exit(1);
175168404Spjd}
176168404Spjd
177209962Smm/*
178209962Smm * Called for usage errors that are discovered after a call to spa_open(),
179209962Smm * dmu_bonus_hold(), or pool_match().  abort() is called for other errors.
180209962Smm */
181209962Smm
182168404Spjdstatic void
183168404Spjdfatal(const char *fmt, ...)
184168404Spjd{
185168404Spjd	va_list ap;
186168404Spjd
187168404Spjd	va_start(ap, fmt);
188168404Spjd	(void) fprintf(stderr, "%s: ", cmdname);
189168404Spjd	(void) vfprintf(stderr, fmt, ap);
190168404Spjd	va_end(ap);
191168404Spjd	(void) fprintf(stderr, "\n");
192168404Spjd
193209962Smm	exit(1);
194168404Spjd}
195168404Spjd
196168404Spjd/* ARGSUSED */
197168404Spjdstatic void
198168404Spjddump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
199168404Spjd{
200168404Spjd	nvlist_t *nv;
201168404Spjd	size_t nvsize = *(uint64_t *)data;
202168404Spjd	char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
203168404Spjd
204209962Smm	VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH));
205168404Spjd
206168404Spjd	VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
207168404Spjd
208168404Spjd	umem_free(packed, nvsize);
209168404Spjd
210168404Spjd	dump_nvlist(nv, 8);
211168404Spjd
212168404Spjd	nvlist_free(nv);
213168404Spjd}
214168404Spjd
215249643Smm/* ARGSUSED */
216219089Spjdstatic void
217249643Smmdump_history_offsets(objset_t *os, uint64_t object, void *data, size_t size)
218249643Smm{
219249643Smm	spa_history_phys_t *shp = data;
220249643Smm
221249643Smm	if (shp == NULL)
222249643Smm		return;
223249643Smm
224249643Smm	(void) printf("\t\tpool_create_len = %llu\n",
225249643Smm	    (u_longlong_t)shp->sh_pool_create_len);
226249643Smm	(void) printf("\t\tphys_max_off = %llu\n",
227249643Smm	    (u_longlong_t)shp->sh_phys_max_off);
228249643Smm	(void) printf("\t\tbof = %llu\n",
229249643Smm	    (u_longlong_t)shp->sh_bof);
230249643Smm	(void) printf("\t\teof = %llu\n",
231249643Smm	    (u_longlong_t)shp->sh_eof);
232249643Smm	(void) printf("\t\trecords_lost = %llu\n",
233249643Smm	    (u_longlong_t)shp->sh_records_lost);
234249643Smm}
235249643Smm
236249643Smmstatic void
237219089Spjdzdb_nicenum(uint64_t num, char *buf)
238219089Spjd{
239219089Spjd	if (dump_opt['P'])
240219089Spjd		(void) sprintf(buf, "%llu", (longlong_t)num);
241219089Spjd	else
242219089Spjd		nicenum(num, buf);
243219089Spjd}
244219089Spjd
245249643Smmconst char histo_stars[] = "****************************************";
246249643Smmconst int histo_width = sizeof (histo_stars) - 1;
247168404Spjd
248168404Spjdstatic void
249262094Savgdump_histogram(const uint64_t *histo, int size, int offset)
250168404Spjd{
251168404Spjd	int i;
252249643Smm	int minidx = size - 1;
253168404Spjd	int maxidx = 0;
254168404Spjd	uint64_t max = 0;
255168404Spjd
256249643Smm	for (i = 0; i < size; i++) {
257168404Spjd		if (histo[i] > max)
258168404Spjd			max = histo[i];
259168404Spjd		if (histo[i] > 0 && i > maxidx)
260168404Spjd			maxidx = i;
261168404Spjd		if (histo[i] > 0 && i < minidx)
262168404Spjd			minidx = i;
263168404Spjd	}
264168404Spjd
265249643Smm	if (max < histo_width)
266249643Smm		max = histo_width;
267168404Spjd
268249643Smm	for (i = minidx; i <= maxidx; i++) {
269249643Smm		(void) printf("\t\t\t%3u: %6llu %s\n",
270262094Savg		    i + offset, (u_longlong_t)histo[i],
271249643Smm		    &histo_stars[(max - histo[i]) * histo_width / max]);
272249643Smm	}
273168404Spjd}
274168404Spjd
275168404Spjdstatic void
276168404Spjddump_zap_stats(objset_t *os, uint64_t object)
277168404Spjd{
278168404Spjd	int error;
279168404Spjd	zap_stats_t zs;
280168404Spjd
281168404Spjd	error = zap_get_stats(os, object, &zs);
282168404Spjd	if (error)
283168404Spjd		return;
284168404Spjd
285168404Spjd	if (zs.zs_ptrtbl_len == 0) {
286168404Spjd		ASSERT(zs.zs_num_blocks == 1);
287168404Spjd		(void) printf("\tmicrozap: %llu bytes, %llu entries\n",
288168404Spjd		    (u_longlong_t)zs.zs_blocksize,
289168404Spjd		    (u_longlong_t)zs.zs_num_entries);
290168404Spjd		return;
291168404Spjd	}
292168404Spjd
293168404Spjd	(void) printf("\tFat ZAP stats:\n");
294168404Spjd
295168404Spjd	(void) printf("\t\tPointer table:\n");
296168404Spjd	(void) printf("\t\t\t%llu elements\n",
297168404Spjd	    (u_longlong_t)zs.zs_ptrtbl_len);
298168404Spjd	(void) printf("\t\t\tzt_blk: %llu\n",
299168404Spjd	    (u_longlong_t)zs.zs_ptrtbl_zt_blk);
300168404Spjd	(void) printf("\t\t\tzt_numblks: %llu\n",
301168404Spjd	    (u_longlong_t)zs.zs_ptrtbl_zt_numblks);
302168404Spjd	(void) printf("\t\t\tzt_shift: %llu\n",
303168404Spjd	    (u_longlong_t)zs.zs_ptrtbl_zt_shift);
304168404Spjd	(void) printf("\t\t\tzt_blks_copied: %llu\n",
305168404Spjd	    (u_longlong_t)zs.zs_ptrtbl_blks_copied);
306168404Spjd	(void) printf("\t\t\tzt_nextblk: %llu\n",
307168404Spjd	    (u_longlong_t)zs.zs_ptrtbl_nextblk);
308168404Spjd
309168404Spjd	(void) printf("\t\tZAP entries: %llu\n",
310168404Spjd	    (u_longlong_t)zs.zs_num_entries);
311168404Spjd	(void) printf("\t\tLeaf blocks: %llu\n",
312168404Spjd	    (u_longlong_t)zs.zs_num_leafs);
313168404Spjd	(void) printf("\t\tTotal blocks: %llu\n",
314168404Spjd	    (u_longlong_t)zs.zs_num_blocks);
315168404Spjd	(void) printf("\t\tzap_block_type: 0x%llx\n",
316168404Spjd	    (u_longlong_t)zs.zs_block_type);
317168404Spjd	(void) printf("\t\tzap_magic: 0x%llx\n",
318168404Spjd	    (u_longlong_t)zs.zs_magic);
319168404Spjd	(void) printf("\t\tzap_salt: 0x%llx\n",
320168404Spjd	    (u_longlong_t)zs.zs_salt);
321168404Spjd
322168404Spjd	(void) printf("\t\tLeafs with 2^n pointers:\n");
323262094Savg	dump_histogram(zs.zs_leafs_with_2n_pointers, ZAP_HISTOGRAM_SIZE, 0);
324168404Spjd
325168404Spjd	(void) printf("\t\tBlocks with n*5 entries:\n");
326262094Savg	dump_histogram(zs.zs_blocks_with_n5_entries, ZAP_HISTOGRAM_SIZE, 0);
327168404Spjd
328168404Spjd	(void) printf("\t\tBlocks n/10 full:\n");
329262094Savg	dump_histogram(zs.zs_blocks_n_tenths_full, ZAP_HISTOGRAM_SIZE, 0);
330168404Spjd
331168404Spjd	(void) printf("\t\tEntries with n chunks:\n");
332262094Savg	dump_histogram(zs.zs_entries_using_n_chunks, ZAP_HISTOGRAM_SIZE, 0);
333168404Spjd
334168404Spjd	(void) printf("\t\tBuckets with n entries:\n");
335262094Savg	dump_histogram(zs.zs_buckets_with_n_entries, ZAP_HISTOGRAM_SIZE, 0);
336168404Spjd}
337168404Spjd
338168404Spjd/*ARGSUSED*/
339168404Spjdstatic void
340168404Spjddump_none(objset_t *os, uint64_t object, void *data, size_t size)
341168404Spjd{
342168404Spjd}
343168404Spjd
344168404Spjd/*ARGSUSED*/
345219089Spjdstatic void
346219089Spjddump_unknown(objset_t *os, uint64_t object, void *data, size_t size)
347219089Spjd{
348219089Spjd	(void) printf("\tUNKNOWN OBJECT TYPE\n");
349219089Spjd}
350219089Spjd
351219089Spjd/*ARGSUSED*/
352168404Spjdvoid
353168404Spjddump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
354168404Spjd{
355168404Spjd}
356168404Spjd
357168404Spjd/*ARGSUSED*/
358168404Spjdstatic void
359168404Spjddump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
360168404Spjd{
361168404Spjd}
362168404Spjd
363168404Spjd/*ARGSUSED*/
364168404Spjdstatic void
365168404Spjddump_zap(objset_t *os, uint64_t object, void *data, size_t size)
366168404Spjd{
367168404Spjd	zap_cursor_t zc;
368168404Spjd	zap_attribute_t attr;
369168404Spjd	void *prop;
370168404Spjd	int i;
371168404Spjd
372168404Spjd	dump_zap_stats(os, object);
373168404Spjd	(void) printf("\n");
374168404Spjd
375168404Spjd	for (zap_cursor_init(&zc, os, object);
376168404Spjd	    zap_cursor_retrieve(&zc, &attr) == 0;
377168404Spjd	    zap_cursor_advance(&zc)) {
378168404Spjd		(void) printf("\t\t%s = ", attr.za_name);
379168404Spjd		if (attr.za_num_integers == 0) {
380168404Spjd			(void) printf("\n");
381168404Spjd			continue;
382168404Spjd		}
383168404Spjd		prop = umem_zalloc(attr.za_num_integers *
384168404Spjd		    attr.za_integer_length, UMEM_NOFAIL);
385168404Spjd		(void) zap_lookup(os, object, attr.za_name,
386168404Spjd		    attr.za_integer_length, attr.za_num_integers, prop);
387168404Spjd		if (attr.za_integer_length == 1) {
388168404Spjd			(void) printf("%s", (char *)prop);
389168404Spjd		} else {
390168404Spjd			for (i = 0; i < attr.za_num_integers; i++) {
391168404Spjd				switch (attr.za_integer_length) {
392168404Spjd				case 2:
393168404Spjd					(void) printf("%u ",
394168404Spjd					    ((uint16_t *)prop)[i]);
395168404Spjd					break;
396168404Spjd				case 4:
397168404Spjd					(void) printf("%u ",
398168404Spjd					    ((uint32_t *)prop)[i]);
399168404Spjd					break;
400168404Spjd				case 8:
401168404Spjd					(void) printf("%lld ",
402168404Spjd					    (u_longlong_t)((int64_t *)prop)[i]);
403168404Spjd					break;
404168404Spjd				}
405168404Spjd			}
406168404Spjd		}
407168404Spjd		(void) printf("\n");
408168404Spjd		umem_free(prop, attr.za_num_integers * attr.za_integer_length);
409168404Spjd	}
410168404Spjd	zap_cursor_fini(&zc);
411168404Spjd}
412168404Spjd
413185029Spjd/*ARGSUSED*/
414168404Spjdstatic void
415219089Spjddump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size)
416219089Spjd{
417219089Spjd	dump_zap_stats(os, object);
418219089Spjd	/* contents are printed elsewhere, properly decoded */
419219089Spjd}
420219089Spjd
421219089Spjd/*ARGSUSED*/
422219089Spjdstatic void
423219089Spjddump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size)
424219089Spjd{
425219089Spjd	zap_cursor_t zc;
426219089Spjd	zap_attribute_t attr;
427219089Spjd
428219089Spjd	dump_zap_stats(os, object);
429219089Spjd	(void) printf("\n");
430219089Spjd
431219089Spjd	for (zap_cursor_init(&zc, os, object);
432219089Spjd	    zap_cursor_retrieve(&zc, &attr) == 0;
433219089Spjd	    zap_cursor_advance(&zc)) {
434219089Spjd		(void) printf("\t\t%s = ", attr.za_name);
435219089Spjd		if (attr.za_num_integers == 0) {
436219089Spjd			(void) printf("\n");
437219089Spjd			continue;
438219089Spjd		}
439219089Spjd		(void) printf(" %llx : [%d:%d:%d]\n",
440219089Spjd		    (u_longlong_t)attr.za_first_integer,
441219089Spjd		    (int)ATTR_LENGTH(attr.za_first_integer),
442219089Spjd		    (int)ATTR_BSWAP(attr.za_first_integer),
443219089Spjd		    (int)ATTR_NUM(attr.za_first_integer));
444219089Spjd	}
445219089Spjd	zap_cursor_fini(&zc);
446219089Spjd}
447219089Spjd
448219089Spjd/*ARGSUSED*/
449219089Spjdstatic void
450219089Spjddump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size)
451219089Spjd{
452219089Spjd	zap_cursor_t zc;
453219089Spjd	zap_attribute_t attr;
454219089Spjd	uint16_t *layout_attrs;
455219089Spjd	int i;
456219089Spjd
457219089Spjd	dump_zap_stats(os, object);
458219089Spjd	(void) printf("\n");
459219089Spjd
460219089Spjd	for (zap_cursor_init(&zc, os, object);
461219089Spjd	    zap_cursor_retrieve(&zc, &attr) == 0;
462219089Spjd	    zap_cursor_advance(&zc)) {
463219089Spjd		(void) printf("\t\t%s = [", attr.za_name);
464219089Spjd		if (attr.za_num_integers == 0) {
465219089Spjd			(void) printf("\n");
466219089Spjd			continue;
467219089Spjd		}
468219089Spjd
469219089Spjd		VERIFY(attr.za_integer_length == 2);
470219089Spjd		layout_attrs = umem_zalloc(attr.za_num_integers *
471219089Spjd		    attr.za_integer_length, UMEM_NOFAIL);
472219089Spjd
473219089Spjd		VERIFY(zap_lookup(os, object, attr.za_name,
474219089Spjd		    attr.za_integer_length,
475219089Spjd		    attr.za_num_integers, layout_attrs) == 0);
476219089Spjd
477219089Spjd		for (i = 0; i != attr.za_num_integers; i++)
478219089Spjd			(void) printf(" %d ", (int)layout_attrs[i]);
479219089Spjd		(void) printf("]\n");
480219089Spjd		umem_free(layout_attrs,
481219089Spjd		    attr.za_num_integers * attr.za_integer_length);
482219089Spjd	}
483219089Spjd	zap_cursor_fini(&zc);
484219089Spjd}
485219089Spjd
486219089Spjd/*ARGSUSED*/
487219089Spjdstatic void
488185029Spjddump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
489185029Spjd{
490185029Spjd	zap_cursor_t zc;
491185029Spjd	zap_attribute_t attr;
492185029Spjd	const char *typenames[] = {
493185029Spjd		/* 0 */ "not specified",
494185029Spjd		/* 1 */ "FIFO",
495185029Spjd		/* 2 */ "Character Device",
496185029Spjd		/* 3 */ "3 (invalid)",
497185029Spjd		/* 4 */ "Directory",
498185029Spjd		/* 5 */ "5 (invalid)",
499185029Spjd		/* 6 */ "Block Device",
500185029Spjd		/* 7 */ "7 (invalid)",
501185029Spjd		/* 8 */ "Regular File",
502185029Spjd		/* 9 */ "9 (invalid)",
503185029Spjd		/* 10 */ "Symbolic Link",
504185029Spjd		/* 11 */ "11 (invalid)",
505185029Spjd		/* 12 */ "Socket",
506185029Spjd		/* 13 */ "Door",
507185029Spjd		/* 14 */ "Event Port",
508185029Spjd		/* 15 */ "15 (invalid)",
509185029Spjd	};
510185029Spjd
511185029Spjd	dump_zap_stats(os, object);
512185029Spjd	(void) printf("\n");
513185029Spjd
514185029Spjd	for (zap_cursor_init(&zc, os, object);
515185029Spjd	    zap_cursor_retrieve(&zc, &attr) == 0;
516185029Spjd	    zap_cursor_advance(&zc)) {
517185029Spjd		(void) printf("\t\t%s = %lld (type: %s)\n",
518185029Spjd		    attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer),
519185029Spjd		    typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]);
520185029Spjd	}
521185029Spjd	zap_cursor_fini(&zc);
522185029Spjd}
523185029Spjd
524262094Savgint
525262094Savgget_dtl_refcount(vdev_t *vd)
526262094Savg{
527262094Savg	int refcount = 0;
528262094Savg
529262094Savg	if (vd->vdev_ops->vdev_op_leaf) {
530262094Savg		space_map_t *sm = vd->vdev_dtl_sm;
531262094Savg
532262094Savg		if (sm != NULL &&
533262094Savg		    sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
534262094Savg			return (1);
535262094Savg		return (0);
536262094Savg	}
537262094Savg
538262094Savg	for (int c = 0; c < vd->vdev_children; c++)
539262094Savg		refcount += get_dtl_refcount(vd->vdev_child[c]);
540262094Savg	return (refcount);
541262094Savg}
542262094Savg
543262094Savgint
544262094Savgget_metaslab_refcount(vdev_t *vd)
545262094Savg{
546262094Savg	int refcount = 0;
547262094Savg
548262094Savg	if (vd->vdev_top == vd) {
549262094Savg		for (int m = 0; m < vd->vdev_ms_count; m++) {
550262094Savg			space_map_t *sm = vd->vdev_ms[m]->ms_sm;
551262094Savg
552262094Savg			if (sm != NULL &&
553262094Savg			    sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
554262094Savg				refcount++;
555262094Savg		}
556262094Savg	}
557262094Savg	for (int c = 0; c < vd->vdev_children; c++)
558262094Savg		refcount += get_metaslab_refcount(vd->vdev_child[c]);
559262094Savg
560262094Savg	return (refcount);
561262094Savg}
562262094Savg
563262094Savgstatic int
564262094Savgverify_spacemap_refcounts(spa_t *spa)
565262094Savg{
566263391Sdelphij	uint64_t expected_refcount = 0;
567263391Sdelphij	uint64_t actual_refcount;
568262094Savg
569263391Sdelphij	(void) feature_get_refcount(spa,
570263391Sdelphij	    &spa_feature_table[SPA_FEATURE_SPACEMAP_HISTOGRAM],
571263391Sdelphij	    &expected_refcount);
572262094Savg	actual_refcount = get_dtl_refcount(spa->spa_root_vdev);
573262094Savg	actual_refcount += get_metaslab_refcount(spa->spa_root_vdev);
574262094Savg
575262094Savg	if (expected_refcount != actual_refcount) {
576263391Sdelphij		(void) printf("space map refcount mismatch: expected %lld != "
577263391Sdelphij		    "actual %lld\n",
578263391Sdelphij		    (longlong_t)expected_refcount,
579263391Sdelphij		    (longlong_t)actual_refcount);
580262094Savg		return (2);
581262094Savg	}
582262094Savg	return (0);
583262094Savg}
584262094Savg
585185029Spjdstatic void
586262094Savgdump_spacemap(objset_t *os, space_map_t *sm)
587168404Spjd{
588168404Spjd	uint64_t alloc, offset, entry;
589168404Spjd	char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
590168404Spjd			    "INVALID", "INVALID", "INVALID", "INVALID" };
591168404Spjd
592262094Savg	if (sm == NULL)
593168404Spjd		return;
594168404Spjd
595168404Spjd	/*
596168404Spjd	 * Print out the freelist entries in both encoded and decoded form.
597168404Spjd	 */
598168404Spjd	alloc = 0;
599262094Savg	for (offset = 0; offset < space_map_length(sm);
600262094Savg	    offset += sizeof (entry)) {
601262094Savg		uint8_t mapshift = sm->sm_shift;
602262094Savg
603262094Savg		VERIFY0(dmu_read(os, space_map_object(sm), offset,
604209962Smm		    sizeof (entry), &entry, DMU_READ_PREFETCH));
605168404Spjd		if (SM_DEBUG_DECODE(entry)) {
606262094Savg
607219089Spjd			(void) printf("\t    [%6llu] %s: txg %llu, pass %llu\n",
608168404Spjd			    (u_longlong_t)(offset / sizeof (entry)),
609168404Spjd			    ddata[SM_DEBUG_ACTION_DECODE(entry)],
610168404Spjd			    (u_longlong_t)SM_DEBUG_TXG_DECODE(entry),
611168404Spjd			    (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry));
612168404Spjd		} else {
613219089Spjd			(void) printf("\t    [%6llu]    %c  range:"
614219089Spjd			    " %010llx-%010llx  size: %06llx\n",
615168404Spjd			    (u_longlong_t)(offset / sizeof (entry)),
616168404Spjd			    SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
617168404Spjd			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
618262094Savg			    mapshift) + sm->sm_start),
619168404Spjd			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
620262094Savg			    mapshift) + sm->sm_start +
621262094Savg			    (SM_RUN_DECODE(entry) << mapshift)),
622168404Spjd			    (u_longlong_t)(SM_RUN_DECODE(entry) << mapshift));
623168404Spjd			if (SM_TYPE_DECODE(entry) == SM_ALLOC)
624168404Spjd				alloc += SM_RUN_DECODE(entry) << mapshift;
625168404Spjd			else
626168404Spjd				alloc -= SM_RUN_DECODE(entry) << mapshift;
627168404Spjd		}
628168404Spjd	}
629262094Savg	if (alloc != space_map_allocated(sm)) {
630168404Spjd		(void) printf("space_map_object alloc (%llu) INCONSISTENT "
631168404Spjd		    "with space map summary (%llu)\n",
632262094Savg		    (u_longlong_t)space_map_allocated(sm), (u_longlong_t)alloc);
633168404Spjd	}
634168404Spjd}
635168404Spjd
636168404Spjdstatic void
637209962Smmdump_metaslab_stats(metaslab_t *msp)
638209962Smm{
639219089Spjd	char maxbuf[32];
640262094Savg	range_tree_t *rt = msp->ms_tree;
641262094Savg	avl_tree_t *t = &msp->ms_size_tree;
642262094Savg	int free_pct = range_tree_space(rt) * 100 / msp->ms_size;
643209962Smm
644262094Savg	zdb_nicenum(metaslab_block_maxsize(msp), maxbuf);
645209962Smm
646219089Spjd	(void) printf("\t %25s %10lu   %7s  %6s   %4s %4d%%\n",
647209962Smm	    "segments", avl_numnodes(t), "maxsize", maxbuf,
648209962Smm	    "freepct", free_pct);
649262094Savg	(void) printf("\tIn-memory histogram:\n");
650262094Savg	dump_histogram(rt->rt_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
651209962Smm}
652209962Smm
653209962Smmstatic void
654168404Spjddump_metaslab(metaslab_t *msp)
655168404Spjd{
656168404Spjd	vdev_t *vd = msp->ms_group->mg_vd;
657168404Spjd	spa_t *spa = vd->vdev_spa;
658262094Savg	space_map_t *sm = msp->ms_sm;
659219089Spjd	char freebuf[32];
660168404Spjd
661262094Savg	zdb_nicenum(msp->ms_size - space_map_allocated(sm), freebuf);
662168404Spjd
663168404Spjd	(void) printf(
664219089Spjd	    "\tmetaslab %6llu   offset %12llx   spacemap %6llu   free    %5s\n",
665262094Savg	    (u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start,
666262094Savg	    (u_longlong_t)space_map_object(sm), freebuf);
667168404Spjd
668262094Savg	if (dump_opt['m'] > 2 && !dump_opt['L']) {
669209962Smm		mutex_enter(&msp->ms_lock);
670262094Savg		metaslab_load_wait(msp);
671262094Savg		if (!msp->ms_loaded) {
672262094Savg			VERIFY0(metaslab_load(msp));
673262094Savg			range_tree_stat_verify(msp->ms_tree);
674262094Savg		}
675209962Smm		dump_metaslab_stats(msp);
676262094Savg		metaslab_unload(msp);
677209962Smm		mutex_exit(&msp->ms_lock);
678209962Smm	}
679168404Spjd
680262094Savg	if (dump_opt['m'] > 1 && sm != NULL &&
681263391Sdelphij	    spa_feature_is_active(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
682262094Savg		/*
683262094Savg		 * The space map histogram represents free space in chunks
684262094Savg		 * of sm_shift (i.e. bucket 0 refers to 2^sm_shift).
685262094Savg		 */
686262094Savg		(void) printf("\tOn-disk histogram:\n");
687262094Savg		dump_histogram(sm->sm_phys->smp_histogram,
688262094Savg		    SPACE_MAP_HISTOGRAM_SIZE(sm), sm->sm_shift);
689262094Savg	}
690209962Smm
691262094Savg	if (dump_opt['d'] > 5 || dump_opt['m'] > 3) {
692262094Savg		ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift));
693262094Savg
694209962Smm		mutex_enter(&msp->ms_lock);
695262094Savg		dump_spacemap(spa->spa_meta_objset, msp->ms_sm);
696209962Smm		mutex_exit(&msp->ms_lock);
697209962Smm	}
698168404Spjd}
699168404Spjd
700168404Spjdstatic void
701219089Spjdprint_vdev_metaslab_header(vdev_t *vd)
702219089Spjd{
703219089Spjd	(void) printf("\tvdev %10llu\n\t%-10s%5llu   %-19s   %-15s   %-10s\n",
704219089Spjd	    (u_longlong_t)vd->vdev_id,
705219089Spjd	    "metaslabs", (u_longlong_t)vd->vdev_ms_count,
706219089Spjd	    "offset", "spacemap", "free");
707219089Spjd	(void) printf("\t%15s   %19s   %15s   %10s\n",
708219089Spjd	    "---------------", "-------------------",
709219089Spjd	    "---------------", "-------------");
710219089Spjd}
711219089Spjd
712219089Spjdstatic void
713168404Spjddump_metaslabs(spa_t *spa)
714168404Spjd{
715219089Spjd	vdev_t *vd, *rvd = spa->spa_root_vdev;
716219089Spjd	uint64_t m, c = 0, children = rvd->vdev_children;
717168404Spjd
718168404Spjd	(void) printf("\nMetaslabs:\n");
719168404Spjd
720219089Spjd	if (!dump_opt['d'] && zopt_objects > 0) {
721219089Spjd		c = zopt_object[0];
722219089Spjd
723219089Spjd		if (c >= children)
724219089Spjd			(void) fatal("bad vdev id: %llu", (u_longlong_t)c);
725219089Spjd
726219089Spjd		if (zopt_objects > 1) {
727219089Spjd			vd = rvd->vdev_child[c];
728219089Spjd			print_vdev_metaslab_header(vd);
729219089Spjd
730219089Spjd			for (m = 1; m < zopt_objects; m++) {
731219089Spjd				if (zopt_object[m] < vd->vdev_ms_count)
732219089Spjd					dump_metaslab(
733219089Spjd					    vd->vdev_ms[zopt_object[m]]);
734219089Spjd				else
735219089Spjd					(void) fprintf(stderr, "bad metaslab "
736219089Spjd					    "number %llu\n",
737219089Spjd					    (u_longlong_t)zopt_object[m]);
738219089Spjd			}
739219089Spjd			(void) printf("\n");
740219089Spjd			return;
741219089Spjd		}
742219089Spjd		children = c + 1;
743219089Spjd	}
744219089Spjd	for (; c < children; c++) {
745168404Spjd		vd = rvd->vdev_child[c];
746219089Spjd		print_vdev_metaslab_header(vd);
747168404Spjd
748168404Spjd		for (m = 0; m < vd->vdev_ms_count; m++)
749168404Spjd			dump_metaslab(vd->vdev_ms[m]);
750168404Spjd		(void) printf("\n");
751168404Spjd	}
752168404Spjd}
753168404Spjd
754168404Spjdstatic void
755219089Spjddump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
756219089Spjd{
757219089Spjd	const ddt_phys_t *ddp = dde->dde_phys;
758219089Spjd	const ddt_key_t *ddk = &dde->dde_key;
759219089Spjd	char *types[4] = { "ditto", "single", "double", "triple" };
760219089Spjd	char blkbuf[BP_SPRINTF_LEN];
761219089Spjd	blkptr_t blk;
762219089Spjd
763219089Spjd	for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
764219089Spjd		if (ddp->ddp_phys_birth == 0)
765219089Spjd			continue;
766219089Spjd		ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
767263398Sdelphij		snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk);
768219089Spjd		(void) printf("index %llx refcnt %llu %s %s\n",
769219089Spjd		    (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
770219089Spjd		    types[p], blkbuf);
771219089Spjd	}
772219089Spjd}
773219089Spjd
774219089Spjdstatic void
775219089Spjddump_dedup_ratio(const ddt_stat_t *dds)
776219089Spjd{
777219089Spjd	double rL, rP, rD, D, dedup, compress, copies;
778219089Spjd
779219089Spjd	if (dds->dds_blocks == 0)
780219089Spjd		return;
781219089Spjd
782219089Spjd	rL = (double)dds->dds_ref_lsize;
783219089Spjd	rP = (double)dds->dds_ref_psize;
784219089Spjd	rD = (double)dds->dds_ref_dsize;
785219089Spjd	D = (double)dds->dds_dsize;
786219089Spjd
787219089Spjd	dedup = rD / D;
788219089Spjd	compress = rL / rP;
789219089Spjd	copies = rD / rP;
790219089Spjd
791219089Spjd	(void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, "
792219089Spjd	    "dedup * compress / copies = %.2f\n\n",
793219089Spjd	    dedup, compress, copies, dedup * compress / copies);
794219089Spjd}
795219089Spjd
796219089Spjdstatic void
797219089Spjddump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
798219089Spjd{
799219089Spjd	char name[DDT_NAMELEN];
800219089Spjd	ddt_entry_t dde;
801219089Spjd	uint64_t walk = 0;
802219089Spjd	dmu_object_info_t doi;
803219089Spjd	uint64_t count, dspace, mspace;
804219089Spjd	int error;
805219089Spjd
806219089Spjd	error = ddt_object_info(ddt, type, class, &doi);
807219089Spjd
808219089Spjd	if (error == ENOENT)
809219089Spjd		return;
810219089Spjd	ASSERT(error == 0);
811219089Spjd
812246574Sdelphij	error = ddt_object_count(ddt, type, class, &count);
813246574Sdelphij	ASSERT(error == 0);
814246574Sdelphij	if (count == 0)
815219089Spjd		return;
816219089Spjd
817219089Spjd	dspace = doi.doi_physical_blocks_512 << 9;
818219089Spjd	mspace = doi.doi_fill_count * doi.doi_data_block_size;
819219089Spjd
820219089Spjd	ddt_object_name(ddt, type, class, name);
821219089Spjd
822219089Spjd	(void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
823219089Spjd	    name,
824219089Spjd	    (u_longlong_t)count,
825219089Spjd	    (u_longlong_t)(dspace / count),
826219089Spjd	    (u_longlong_t)(mspace / count));
827219089Spjd
828219089Spjd	if (dump_opt['D'] < 3)
829219089Spjd		return;
830219089Spjd
831219089Spjd	zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]);
832219089Spjd
833219089Spjd	if (dump_opt['D'] < 4)
834219089Spjd		return;
835219089Spjd
836219089Spjd	if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE)
837219089Spjd		return;
838219089Spjd
839219089Spjd	(void) printf("%s contents:\n\n", name);
840219089Spjd
841219089Spjd	while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
842219089Spjd		dump_dde(ddt, &dde, walk);
843219089Spjd
844219089Spjd	ASSERT(error == ENOENT);
845219089Spjd
846219089Spjd	(void) printf("\n");
847219089Spjd}
848219089Spjd
849219089Spjdstatic void
850219089Spjddump_all_ddts(spa_t *spa)
851219089Spjd{
852219089Spjd	ddt_histogram_t ddh_total = { 0 };
853219089Spjd	ddt_stat_t dds_total = { 0 };
854219089Spjd
855219089Spjd	for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
856219089Spjd		ddt_t *ddt = spa->spa_ddt[c];
857219089Spjd		for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
858219089Spjd			for (enum ddt_class class = 0; class < DDT_CLASSES;
859219089Spjd			    class++) {
860219089Spjd				dump_ddt(ddt, type, class);
861219089Spjd			}
862219089Spjd		}
863219089Spjd	}
864219089Spjd
865219089Spjd	ddt_get_dedup_stats(spa, &dds_total);
866219089Spjd
867219089Spjd	if (dds_total.dds_blocks == 0) {
868219089Spjd		(void) printf("All DDTs are empty\n");
869219089Spjd		return;
870219089Spjd	}
871219089Spjd
872219089Spjd	(void) printf("\n");
873219089Spjd
874219089Spjd	if (dump_opt['D'] > 1) {
875219089Spjd		(void) printf("DDT histogram (aggregated over all DDTs):\n");
876219089Spjd		ddt_get_dedup_histogram(spa, &ddh_total);
877219089Spjd		zpool_dump_ddt(&dds_total, &ddh_total);
878219089Spjd	}
879219089Spjd
880219089Spjd	dump_dedup_ratio(&dds_total);
881219089Spjd}
882219089Spjd
883219089Spjdstatic void
884262094Savgdump_dtl_seg(void *arg, uint64_t start, uint64_t size)
885209962Smm{
886262094Savg	char *prefix = arg;
887209962Smm
888209962Smm	(void) printf("%s [%llu,%llu) length %llu\n",
889209962Smm	    prefix,
890209962Smm	    (u_longlong_t)start,
891209962Smm	    (u_longlong_t)(start + size),
892209962Smm	    (u_longlong_t)(size));
893209962Smm}
894209962Smm
895209962Smmstatic void
896168404Spjddump_dtl(vdev_t *vd, int indent)
897168404Spjd{
898209962Smm	spa_t *spa = vd->vdev_spa;
899209962Smm	boolean_t required;
900209962Smm	char *name[DTL_TYPES] = { "missing", "partial", "scrub", "outage" };
901209962Smm	char prefix[256];
902168404Spjd
903219089Spjd	spa_vdev_state_enter(spa, SCL_NONE);
904209962Smm	required = vdev_dtl_required(vd);
905209962Smm	(void) spa_vdev_state_exit(spa, NULL, 0);
906209962Smm
907168404Spjd	if (indent == 0)
908168404Spjd		(void) printf("\nDirty time logs:\n\n");
909168404Spjd
910209962Smm	(void) printf("\t%*s%s [%s]\n", indent, "",
911185029Spjd	    vd->vdev_path ? vd->vdev_path :
912209962Smm	    vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa),
913209962Smm	    required ? "DTL-required" : "DTL-expendable");
914168404Spjd
915209962Smm	for (int t = 0; t < DTL_TYPES; t++) {
916262094Savg		range_tree_t *rt = vd->vdev_dtl[t];
917262094Savg		if (range_tree_space(rt) == 0)
918209962Smm			continue;
919209962Smm		(void) snprintf(prefix, sizeof (prefix), "\t%*s%s",
920209962Smm		    indent + 2, "", name[t]);
921262094Savg		mutex_enter(rt->rt_lock);
922262094Savg		range_tree_walk(rt, dump_dtl_seg, prefix);
923262094Savg		mutex_exit(rt->rt_lock);
924209962Smm		if (dump_opt['d'] > 5 && vd->vdev_children == 0)
925262094Savg			dump_spacemap(spa->spa_meta_objset, vd->vdev_dtl_sm);
926168404Spjd	}
927168404Spjd
928209962Smm	for (int c = 0; c < vd->vdev_children; c++)
929168404Spjd		dump_dtl(vd->vdev_child[c], indent + 4);
930168404Spjd}
931168404Spjd
932265040Sdelphij/* from spa_history.c: spa_history_create_obj() */
933265040Sdelphij#define	HIS_BUF_LEN_DEF	(128 << 10)
934265040Sdelphij#define	HIS_BUF_LEN_MAX	(1 << 30)
935265040Sdelphij
936219089Spjdstatic void
937219089Spjddump_history(spa_t *spa)
938219089Spjd{
939219089Spjd	nvlist_t **events = NULL;
940265040Sdelphij	char *buf = NULL;
941265040Sdelphij	uint64_t bufsize = HIS_BUF_LEN_DEF;
942219089Spjd	uint64_t resid, len, off = 0;
943219089Spjd	uint_t num = 0;
944219089Spjd	int error;
945219089Spjd	time_t tsec;
946219089Spjd	struct tm t;
947219089Spjd	char tbuf[30];
948219089Spjd	char internalstr[MAXPATHLEN];
949219089Spjd
950265040Sdelphij	if ((buf = malloc(bufsize)) == NULL)
951265040Sdelphij		(void) fprintf(stderr, "Unable to read history: "
952265040Sdelphij		    "out of memory\n");
953219089Spjd	do {
954265040Sdelphij		len = bufsize;
955219089Spjd
956219089Spjd		if ((error = spa_history_get(spa, &off, &len, buf)) != 0) {
957219089Spjd			(void) fprintf(stderr, "Unable to read history: "
958219089Spjd			    "error %d\n", error);
959219089Spjd			return;
960219089Spjd		}
961219089Spjd
962219089Spjd		if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0)
963219089Spjd			break;
964265040Sdelphij		off -= resid;
965219089Spjd
966265040Sdelphij		/*
967265040Sdelphij		 * If the history block is too big, double the buffer
968265040Sdelphij		 * size and try again.
969265040Sdelphij		 */
970265040Sdelphij		if (resid == len) {
971265040Sdelphij			free(buf);
972265040Sdelphij			buf = NULL;
973265040Sdelphij
974265040Sdelphij			bufsize <<= 1;
975265040Sdelphij			if ((bufsize >= HIS_BUF_LEN_MAX) ||
976265040Sdelphij			    ((buf = malloc(bufsize)) == NULL)) {
977265040Sdelphij				(void) fprintf(stderr, "Unable to read history: "
978265040Sdelphij				    "out of memory\n");
979265040Sdelphij				return;
980265040Sdelphij			}
981265040Sdelphij		}
982219089Spjd	} while (len != 0);
983265040Sdelphij	free(buf);
984219089Spjd
985219089Spjd	(void) printf("\nHistory:\n");
986219089Spjd	for (int i = 0; i < num; i++) {
987219089Spjd		uint64_t time, txg, ievent;
988219089Spjd		char *cmd, *intstr;
989249643Smm		boolean_t printed = B_FALSE;
990219089Spjd
991219089Spjd		if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME,
992219089Spjd		    &time) != 0)
993249643Smm			goto next;
994219089Spjd		if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD,
995219089Spjd		    &cmd) != 0) {
996219089Spjd			if (nvlist_lookup_uint64(events[i],
997219089Spjd			    ZPOOL_HIST_INT_EVENT, &ievent) != 0)
998249643Smm				goto next;
999219089Spjd			verify(nvlist_lookup_uint64(events[i],
1000219089Spjd			    ZPOOL_HIST_TXG, &txg) == 0);
1001219089Spjd			verify(nvlist_lookup_string(events[i],
1002219089Spjd			    ZPOOL_HIST_INT_STR, &intstr) == 0);
1003249643Smm			if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS)
1004249643Smm				goto next;
1005219089Spjd
1006219089Spjd			(void) snprintf(internalstr,
1007219089Spjd			    sizeof (internalstr),
1008219089Spjd			    "[internal %s txg:%lld] %s",
1009219089Spjd			    zfs_history_event_names[ievent], txg,
1010219089Spjd			    intstr);
1011219089Spjd			cmd = internalstr;
1012219089Spjd		}
1013219089Spjd		tsec = time;
1014219089Spjd		(void) localtime_r(&tsec, &t);
1015219089Spjd		(void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
1016219089Spjd		(void) printf("%s %s\n", tbuf, cmd);
1017249643Smm		printed = B_TRUE;
1018249643Smm
1019249643Smmnext:
1020249643Smm		if (dump_opt['h'] > 1) {
1021249643Smm			if (!printed)
1022249643Smm				(void) printf("unrecognized record:\n");
1023249643Smm			dump_nvlist(events[i], 2);
1024249643Smm		}
1025219089Spjd	}
1026219089Spjd}
1027219089Spjd
1028168404Spjd/*ARGSUSED*/
1029168404Spjdstatic void
1030168404Spjddump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
1031168404Spjd{
1032168404Spjd}
1033168404Spjd
1034168404Spjdstatic uint64_t
1035219089Spjdblkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, const zbookmark_t *zb)
1036168404Spjd{
1037219089Spjd	if (dnp == NULL) {
1038219089Spjd		ASSERT(zb->zb_level < 0);
1039219089Spjd		if (zb->zb_object == 0)
1040219089Spjd			return (zb->zb_blkid);
1041219089Spjd		return (zb->zb_blkid * BP_GET_LSIZE(bp));
1042219089Spjd	}
1043168404Spjd
1044219089Spjd	ASSERT(zb->zb_level >= 0);
1045219089Spjd
1046219089Spjd	return ((zb->zb_blkid <<
1047219089Spjd	    (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
1048168404Spjd	    dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
1049168404Spjd}
1050168404Spjd
1051168404Spjdstatic void
1052263398Sdelphijsnprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp)
1053168404Spjd{
1054219089Spjd	const dva_t *dva = bp->blk_dva;
1055219089Spjd	int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
1056168404Spjd
1057249643Smm	if (dump_opt['b'] >= 6) {
1058263398Sdelphij		snprintf_blkptr(blkbuf, buflen, bp);
1059219089Spjd		return;
1060219089Spjd	}
1061219089Spjd
1062168404Spjd	blkbuf[0] = '\0';
1063168404Spjd
1064219089Spjd	for (int i = 0; i < ndvas; i++)
1065263398Sdelphij		(void) snprintf(blkbuf + strlen(blkbuf),
1066263398Sdelphij		    buflen - strlen(blkbuf), "%llu:%llx:%llx ",
1067168404Spjd		    (u_longlong_t)DVA_GET_VDEV(&dva[i]),
1068168404Spjd		    (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
1069168404Spjd		    (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
1070168404Spjd
1071263398Sdelphij	if (BP_IS_HOLE(bp)) {
1072263398Sdelphij		(void) snprintf(blkbuf + strlen(blkbuf),
1073263398Sdelphij		    buflen - strlen(blkbuf), "B=%llu",
1074263398Sdelphij		    (u_longlong_t)bp->blk_birth);
1075263398Sdelphij	} else {
1076263398Sdelphij		(void) snprintf(blkbuf + strlen(blkbuf),
1077263398Sdelphij		    buflen - strlen(blkbuf),
1078263398Sdelphij		    "%llxL/%llxP F=%llu B=%llu/%llu",
1079263398Sdelphij		    (u_longlong_t)BP_GET_LSIZE(bp),
1080263398Sdelphij		    (u_longlong_t)BP_GET_PSIZE(bp),
1081263398Sdelphij		    (u_longlong_t)bp->blk_fill,
1082263398Sdelphij		    (u_longlong_t)bp->blk_birth,
1083263398Sdelphij		    (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
1084263398Sdelphij	}
1085168404Spjd}
1086168404Spjd
1087208047Smmstatic void
1088208047Smmprint_indirect(blkptr_t *bp, const zbookmark_t *zb,
1089208047Smm    const dnode_phys_t *dnp)
1090168404Spjd{
1091208047Smm	char blkbuf[BP_SPRINTF_LEN];
1092168404Spjd	int l;
1093168404Spjd
1094208047Smm	ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
1095208047Smm	ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
1096168404Spjd
1097219089Spjd	(void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
1098168404Spjd
1099208047Smm	ASSERT(zb->zb_level >= 0);
1100168404Spjd
1101208047Smm	for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
1102208047Smm		if (l == zb->zb_level) {
1103208047Smm			(void) printf("L%llx", (u_longlong_t)zb->zb_level);
1104208047Smm		} else {
1105208047Smm			(void) printf(" ");
1106168404Spjd		}
1107168404Spjd	}
1108168404Spjd
1109263398Sdelphij	snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
1110208047Smm	(void) printf("%s\n", blkbuf);
1111208047Smm}
1112208047Smm
1113208047Smmstatic int
1114208047Smmvisit_indirect(spa_t *spa, const dnode_phys_t *dnp,
1115208047Smm    blkptr_t *bp, const zbookmark_t *zb)
1116208047Smm{
1117219089Spjd	int err = 0;
1118208047Smm
1119208047Smm	if (bp->blk_birth == 0)
1120208047Smm		return (0);
1121208047Smm
1122208047Smm	print_indirect(bp, zb, dnp);
1123208047Smm
1124263398Sdelphij	if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) {
1125208047Smm		uint32_t flags = ARC_WAIT;
1126208047Smm		int i;
1127208047Smm		blkptr_t *cbp;
1128208047Smm		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
1129208047Smm		arc_buf_t *buf;
1130168404Spjd		uint64_t fill = 0;
1131168404Spjd
1132247406Smm		err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf,
1133208047Smm		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
1134208047Smm		if (err)
1135208047Smm			return (err);
1136219089Spjd		ASSERT(buf->b_data);
1137168404Spjd
1138208047Smm		/* recursively visit blocks below this */
1139208047Smm		cbp = buf->b_data;
1140208047Smm		for (i = 0; i < epb; i++, cbp++) {
1141208047Smm			zbookmark_t czb;
1142168404Spjd
1143208047Smm			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
1144208047Smm			    zb->zb_level - 1,
1145208047Smm			    zb->zb_blkid * epb + i);
1146208047Smm			err = visit_indirect(spa, dnp, cbp, &czb);
1147208047Smm			if (err)
1148208047Smm				break;
1149208047Smm			fill += cbp->blk_fill;
1150168404Spjd		}
1151209962Smm		if (!err)
1152209962Smm			ASSERT3U(fill, ==, bp->blk_fill);
1153208047Smm		(void) arc_buf_remove_ref(buf, &buf);
1154168404Spjd	}
1155168404Spjd
1156208047Smm	return (err);
1157168404Spjd}
1158168404Spjd
1159168404Spjd/*ARGSUSED*/
1160168404Spjdstatic void
1161208047Smmdump_indirect(dnode_t *dn)
1162168404Spjd{
1163208047Smm	dnode_phys_t *dnp = dn->dn_phys;
1164208047Smm	int j;
1165208047Smm	zbookmark_t czb;
1166168404Spjd
1167168404Spjd	(void) printf("Indirect blocks:\n");
1168168404Spjd
1169219089Spjd	SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset),
1170208047Smm	    dn->dn_object, dnp->dn_nlevels - 1, 0);
1171208047Smm	for (j = 0; j < dnp->dn_nblkptr; j++) {
1172208047Smm		czb.zb_blkid = j;
1173219089Spjd		(void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp,
1174208047Smm		    &dnp->dn_blkptr[j], &czb);
1175208047Smm	}
1176168404Spjd
1177168404Spjd	(void) printf("\n");
1178168404Spjd}
1179168404Spjd
1180168404Spjd/*ARGSUSED*/
1181168404Spjdstatic void
1182168404Spjddump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
1183168404Spjd{
1184168404Spjd	dsl_dir_phys_t *dd = data;
1185168404Spjd	time_t crtime;
1186219089Spjd	char nice[32];
1187168404Spjd
1188168404Spjd	if (dd == NULL)
1189168404Spjd		return;
1190168404Spjd
1191185029Spjd	ASSERT3U(size, >=, sizeof (dsl_dir_phys_t));
1192168404Spjd
1193168404Spjd	crtime = dd->dd_creation_time;
1194168404Spjd	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
1195168404Spjd	(void) printf("\t\thead_dataset_obj = %llu\n",
1196168404Spjd	    (u_longlong_t)dd->dd_head_dataset_obj);
1197168404Spjd	(void) printf("\t\tparent_dir_obj = %llu\n",
1198168404Spjd	    (u_longlong_t)dd->dd_parent_obj);
1199185029Spjd	(void) printf("\t\torigin_obj = %llu\n",
1200185029Spjd	    (u_longlong_t)dd->dd_origin_obj);
1201168404Spjd	(void) printf("\t\tchild_dir_zapobj = %llu\n",
1202168404Spjd	    (u_longlong_t)dd->dd_child_dir_zapobj);
1203219089Spjd	zdb_nicenum(dd->dd_used_bytes, nice);
1204185029Spjd	(void) printf("\t\tused_bytes = %s\n", nice);
1205219089Spjd	zdb_nicenum(dd->dd_compressed_bytes, nice);
1206185029Spjd	(void) printf("\t\tcompressed_bytes = %s\n", nice);
1207219089Spjd	zdb_nicenum(dd->dd_uncompressed_bytes, nice);
1208185029Spjd	(void) printf("\t\tuncompressed_bytes = %s\n", nice);
1209219089Spjd	zdb_nicenum(dd->dd_quota, nice);
1210185029Spjd	(void) printf("\t\tquota = %s\n", nice);
1211219089Spjd	zdb_nicenum(dd->dd_reserved, nice);
1212185029Spjd	(void) printf("\t\treserved = %s\n", nice);
1213168404Spjd	(void) printf("\t\tprops_zapobj = %llu\n",
1214168404Spjd	    (u_longlong_t)dd->dd_props_zapobj);
1215185029Spjd	(void) printf("\t\tdeleg_zapobj = %llu\n",
1216185029Spjd	    (u_longlong_t)dd->dd_deleg_zapobj);
1217185029Spjd	(void) printf("\t\tflags = %llx\n",
1218185029Spjd	    (u_longlong_t)dd->dd_flags);
1219185029Spjd
1220185029Spjd#define	DO(which) \
1221219089Spjd	zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice); \
1222185029Spjd	(void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
1223185029Spjd	DO(HEAD);
1224185029Spjd	DO(SNAP);
1225185029Spjd	DO(CHILD);
1226185029Spjd	DO(CHILD_RSRV);
1227185029Spjd	DO(REFRSRV);
1228185029Spjd#undef DO
1229168404Spjd}
1230168404Spjd
1231168404Spjd/*ARGSUSED*/
1232168404Spjdstatic void
1233168404Spjddump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
1234168404Spjd{
1235168404Spjd	dsl_dataset_phys_t *ds = data;
1236168404Spjd	time_t crtime;
1237219089Spjd	char used[32], compressed[32], uncompressed[32], unique[32];
1238168404Spjd	char blkbuf[BP_SPRINTF_LEN];
1239168404Spjd
1240168404Spjd	if (ds == NULL)
1241168404Spjd		return;
1242168404Spjd
1243168404Spjd	ASSERT(size == sizeof (*ds));
1244168404Spjd	crtime = ds->ds_creation_time;
1245243674Smm	zdb_nicenum(ds->ds_referenced_bytes, used);
1246219089Spjd	zdb_nicenum(ds->ds_compressed_bytes, compressed);
1247219089Spjd	zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed);
1248219089Spjd	zdb_nicenum(ds->ds_unique_bytes, unique);
1249263398Sdelphij	snprintf_blkptr(blkbuf, sizeof (blkbuf), &ds->ds_bp);
1250168404Spjd
1251185029Spjd	(void) printf("\t\tdir_obj = %llu\n",
1252168404Spjd	    (u_longlong_t)ds->ds_dir_obj);
1253168404Spjd	(void) printf("\t\tprev_snap_obj = %llu\n",
1254168404Spjd	    (u_longlong_t)ds->ds_prev_snap_obj);
1255168404Spjd	(void) printf("\t\tprev_snap_txg = %llu\n",
1256168404Spjd	    (u_longlong_t)ds->ds_prev_snap_txg);
1257168404Spjd	(void) printf("\t\tnext_snap_obj = %llu\n",
1258168404Spjd	    (u_longlong_t)ds->ds_next_snap_obj);
1259168404Spjd	(void) printf("\t\tsnapnames_zapobj = %llu\n",
1260168404Spjd	    (u_longlong_t)ds->ds_snapnames_zapobj);
1261168404Spjd	(void) printf("\t\tnum_children = %llu\n",
1262168404Spjd	    (u_longlong_t)ds->ds_num_children);
1263219089Spjd	(void) printf("\t\tuserrefs_obj = %llu\n",
1264219089Spjd	    (u_longlong_t)ds->ds_userrefs_obj);
1265168404Spjd	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
1266168404Spjd	(void) printf("\t\tcreation_txg = %llu\n",
1267168404Spjd	    (u_longlong_t)ds->ds_creation_txg);
1268168404Spjd	(void) printf("\t\tdeadlist_obj = %llu\n",
1269168404Spjd	    (u_longlong_t)ds->ds_deadlist_obj);
1270168404Spjd	(void) printf("\t\tused_bytes = %s\n", used);
1271168404Spjd	(void) printf("\t\tcompressed_bytes = %s\n", compressed);
1272168404Spjd	(void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
1273168404Spjd	(void) printf("\t\tunique = %s\n", unique);
1274168404Spjd	(void) printf("\t\tfsid_guid = %llu\n",
1275168404Spjd	    (u_longlong_t)ds->ds_fsid_guid);
1276168404Spjd	(void) printf("\t\tguid = %llu\n",
1277168404Spjd	    (u_longlong_t)ds->ds_guid);
1278168404Spjd	(void) printf("\t\tflags = %llx\n",
1279168404Spjd	    (u_longlong_t)ds->ds_flags);
1280185029Spjd	(void) printf("\t\tnext_clones_obj = %llu\n",
1281185029Spjd	    (u_longlong_t)ds->ds_next_clones_obj);
1282185029Spjd	(void) printf("\t\tprops_obj = %llu\n",
1283185029Spjd	    (u_longlong_t)ds->ds_props_obj);
1284168404Spjd	(void) printf("\t\tbp = %s\n", blkbuf);
1285168404Spjd}
1286168404Spjd
1287219089Spjd/* ARGSUSED */
1288219089Spjdstatic int
1289243674Smmdump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
1290243674Smm{
1291243674Smm	char blkbuf[BP_SPRINTF_LEN];
1292243674Smm
1293243674Smm	if (bp->blk_birth != 0) {
1294263398Sdelphij		snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
1295243674Smm		(void) printf("\t%s\n", blkbuf);
1296243674Smm	}
1297243674Smm	return (0);
1298243674Smm}
1299243674Smm
1300243674Smmstatic void
1301243674Smmdump_bptree(objset_t *os, uint64_t obj, char *name)
1302243674Smm{
1303243674Smm	char bytes[32];
1304243674Smm	bptree_phys_t *bt;
1305243674Smm	dmu_buf_t *db;
1306243674Smm
1307243674Smm	if (dump_opt['d'] < 3)
1308243674Smm		return;
1309243674Smm
1310243674Smm	VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
1311243674Smm	bt = db->db_data;
1312243674Smm	zdb_nicenum(bt->bt_bytes, bytes);
1313243674Smm	(void) printf("\n    %s: %llu datasets, %s\n",
1314243674Smm	    name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes);
1315243674Smm	dmu_buf_rele(db, FTAG);
1316243674Smm
1317243674Smm	if (dump_opt['d'] < 5)
1318243674Smm		return;
1319243674Smm
1320243674Smm	(void) printf("\n");
1321243674Smm
1322243674Smm	(void) bptree_iterate(os, obj, B_FALSE, dump_bptree_cb, NULL, NULL);
1323243674Smm}
1324243674Smm
1325243674Smm/* ARGSUSED */
1326243674Smmstatic int
1327219089Spjddump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
1328219089Spjd{
1329219089Spjd	char blkbuf[BP_SPRINTF_LEN];
1330219089Spjd
1331219089Spjd	ASSERT(bp->blk_birth != 0);
1332263398Sdelphij	snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
1333219089Spjd	(void) printf("\t%s\n", blkbuf);
1334219089Spjd	return (0);
1335219089Spjd}
1336219089Spjd
1337168404Spjdstatic void
1338248369Smmdump_bpobj(bpobj_t *bpo, char *name, int indent)
1339168404Spjd{
1340219089Spjd	char bytes[32];
1341219089Spjd	char comp[32];
1342219089Spjd	char uncomp[32];
1343168404Spjd
1344168404Spjd	if (dump_opt['d'] < 3)
1345168404Spjd		return;
1346168404Spjd
1347219089Spjd	zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes);
1348248369Smm	if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) {
1349219089Spjd		zdb_nicenum(bpo->bpo_phys->bpo_comp, comp);
1350219089Spjd		zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp);
1351248369Smm		(void) printf("    %*s: object %llu, %llu local blkptrs, "
1352248369Smm		    "%llu subobjs, %s (%s/%s comp)\n",
1353248369Smm		    indent * 8, name,
1354248369Smm		    (u_longlong_t)bpo->bpo_object,
1355248369Smm		    (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
1356219089Spjd		    (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs,
1357168404Spjd		    bytes, comp, uncomp);
1358248369Smm
1359248369Smm		for (uint64_t i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) {
1360248369Smm			uint64_t subobj;
1361248369Smm			bpobj_t subbpo;
1362248369Smm			int error;
1363248369Smm			VERIFY0(dmu_read(bpo->bpo_os,
1364248369Smm			    bpo->bpo_phys->bpo_subobjs,
1365248369Smm			    i * sizeof (subobj), sizeof (subobj), &subobj, 0));
1366248369Smm			error = bpobj_open(&subbpo, bpo->bpo_os, subobj);
1367248369Smm			if (error != 0) {
1368248369Smm				(void) printf("ERROR %u while trying to open "
1369248369Smm				    "subobj id %llu\n",
1370248369Smm				    error, (u_longlong_t)subobj);
1371248369Smm				continue;
1372248369Smm			}
1373248369Smm			dump_bpobj(&subbpo, "subobj", indent + 1);
1374248369Smm			bpobj_close(&subbpo);
1375248369Smm		}
1376168404Spjd	} else {
1377248369Smm		(void) printf("    %*s: object %llu, %llu blkptrs, %s\n",
1378248369Smm		    indent * 8, name,
1379248369Smm		    (u_longlong_t)bpo->bpo_object,
1380248369Smm		    (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
1381248369Smm		    bytes);
1382168404Spjd	}
1383168404Spjd
1384219089Spjd	if (dump_opt['d'] < 5)
1385168404Spjd		return;
1386168404Spjd
1387168404Spjd
1388248369Smm	if (indent == 0) {
1389248369Smm		(void) bpobj_iterate_nofree(bpo, dump_bpobj_cb, NULL, NULL);
1390248369Smm		(void) printf("\n");
1391248369Smm	}
1392219089Spjd}
1393168404Spjd
1394219089Spjdstatic void
1395219089Spjddump_deadlist(dsl_deadlist_t *dl)
1396219089Spjd{
1397219089Spjd	dsl_deadlist_entry_t *dle;
1398248369Smm	uint64_t unused;
1399219089Spjd	char bytes[32];
1400219089Spjd	char comp[32];
1401219089Spjd	char uncomp[32];
1402219089Spjd
1403219089Spjd	if (dump_opt['d'] < 3)
1404219089Spjd		return;
1405219089Spjd
1406219089Spjd	zdb_nicenum(dl->dl_phys->dl_used, bytes);
1407219089Spjd	zdb_nicenum(dl->dl_phys->dl_comp, comp);
1408219089Spjd	zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp);
1409219089Spjd	(void) printf("\n    Deadlist: %s (%s/%s comp)\n",
1410219089Spjd	    bytes, comp, uncomp);
1411219089Spjd
1412219089Spjd	if (dump_opt['d'] < 4)
1413219089Spjd		return;
1414219089Spjd
1415219089Spjd	(void) printf("\n");
1416219089Spjd
1417248369Smm	/* force the tree to be loaded */
1418248369Smm	dsl_deadlist_space_range(dl, 0, UINT64_MAX, &unused, &unused, &unused);
1419248369Smm
1420219089Spjd	for (dle = avl_first(&dl->dl_tree); dle;
1421219089Spjd	    dle = AVL_NEXT(&dl->dl_tree, dle)) {
1422248369Smm		if (dump_opt['d'] >= 5) {
1423248369Smm			char buf[128];
1424248369Smm			(void) snprintf(buf, sizeof (buf), "mintxg %llu -> ",
1425248369Smm			    (longlong_t)dle->dle_mintxg,
1426248369Smm			    (longlong_t)dle->dle_bpobj.bpo_object);
1427219089Spjd
1428248369Smm			dump_bpobj(&dle->dle_bpobj, buf, 0);
1429248369Smm		} else {
1430248369Smm			(void) printf("mintxg %llu -> obj %llu\n",
1431248369Smm			    (longlong_t)dle->dle_mintxg,
1432248369Smm			    (longlong_t)dle->dle_bpobj.bpo_object);
1433248369Smm
1434248369Smm		}
1435168404Spjd	}
1436168404Spjd}
1437168404Spjd
1438185029Spjdstatic avl_tree_t idx_tree;
1439185029Spjdstatic avl_tree_t domain_tree;
1440185029Spjdstatic boolean_t fuid_table_loaded;
1441219089Spjdstatic boolean_t sa_loaded;
1442219089Spjdsa_attr_type_t *sa_attr_table;
1443185029Spjd
1444185029Spjdstatic void
1445185029Spjdfuid_table_destroy()
1446185029Spjd{
1447185029Spjd	if (fuid_table_loaded) {
1448185029Spjd		zfs_fuid_table_destroy(&idx_tree, &domain_tree);
1449185029Spjd		fuid_table_loaded = B_FALSE;
1450185029Spjd	}
1451185029Spjd}
1452185029Spjd
1453185029Spjd/*
1454185029Spjd * print uid or gid information.
1455185029Spjd * For normal POSIX id just the id is printed in decimal format.
1456185029Spjd * For CIFS files with FUID the fuid is printed in hex followed by
1457248369Smm * the domain-rid string.
1458185029Spjd */
1459185029Spjdstatic void
1460185029Spjdprint_idstr(uint64_t id, const char *id_type)
1461185029Spjd{
1462185029Spjd	if (FUID_INDEX(id)) {
1463185029Spjd		char *domain;
1464185029Spjd
1465185029Spjd		domain = zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id));
1466185029Spjd		(void) printf("\t%s     %llx [%s-%d]\n", id_type,
1467185029Spjd		    (u_longlong_t)id, domain, (int)FUID_RID(id));
1468185029Spjd	} else {
1469185029Spjd		(void) printf("\t%s     %llu\n", id_type, (u_longlong_t)id);
1470185029Spjd	}
1471185029Spjd
1472185029Spjd}
1473185029Spjd
1474185029Spjdstatic void
1475219089Spjddump_uidgid(objset_t *os, uint64_t uid, uint64_t gid)
1476185029Spjd{
1477185029Spjd	uint32_t uid_idx, gid_idx;
1478185029Spjd
1479219089Spjd	uid_idx = FUID_INDEX(uid);
1480219089Spjd	gid_idx = FUID_INDEX(gid);
1481185029Spjd
1482185029Spjd	/* Load domain table, if not already loaded */
1483185029Spjd	if (!fuid_table_loaded && (uid_idx || gid_idx)) {
1484185029Spjd		uint64_t fuid_obj;
1485185029Spjd
1486185029Spjd		/* first find the fuid object.  It lives in the master node */
1487185029Spjd		VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES,
1488185029Spjd		    8, 1, &fuid_obj) == 0);
1489209962Smm		zfs_fuid_avl_tree_create(&idx_tree, &domain_tree);
1490185029Spjd		(void) zfs_fuid_table_load(os, fuid_obj,
1491185029Spjd		    &idx_tree, &domain_tree);
1492185029Spjd		fuid_table_loaded = B_TRUE;
1493185029Spjd	}
1494185029Spjd
1495219089Spjd	print_idstr(uid, "uid");
1496219089Spjd	print_idstr(gid, "gid");
1497185029Spjd}
1498185029Spjd
1499168404Spjd/*ARGSUSED*/
1500168404Spjdstatic void
1501168404Spjddump_znode(objset_t *os, uint64_t object, void *data, size_t size)
1502168404Spjd{
1503219089Spjd	char path[MAXPATHLEN * 2];	/* allow for xattr and failure prefix */
1504219089Spjd	sa_handle_t *hdl;
1505219089Spjd	uint64_t xattr, rdev, gen;
1506219089Spjd	uint64_t uid, gid, mode, fsize, parent, links;
1507219089Spjd	uint64_t pflags;
1508219089Spjd	uint64_t acctm[2], modtm[2], chgtm[2], crtm[2];
1509168404Spjd	time_t z_crtime, z_atime, z_mtime, z_ctime;
1510219089Spjd	sa_bulk_attr_t bulk[12];
1511219089Spjd	int idx = 0;
1512168404Spjd	int error;
1513168404Spjd
1514219089Spjd	if (!sa_loaded) {
1515219089Spjd		uint64_t sa_attrs = 0;
1516219089Spjd		uint64_t version;
1517168404Spjd
1518219089Spjd		VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
1519219089Spjd		    8, 1, &version) == 0);
1520219089Spjd		if (version >= ZPL_VERSION_SA) {
1521219089Spjd			VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS,
1522219089Spjd			    8, 1, &sa_attrs) == 0);
1523219089Spjd		}
1524219089Spjd		if ((error = sa_setup(os, sa_attrs, zfs_attr_table,
1525219089Spjd		    ZPL_END, &sa_attr_table)) != 0) {
1526219089Spjd			(void) printf("sa_setup failed errno %d, can't "
1527219089Spjd			    "display znode contents\n", error);
1528219089Spjd			return;
1529219089Spjd		}
1530219089Spjd		sa_loaded = B_TRUE;
1531219089Spjd	}
1532219089Spjd
1533219089Spjd	if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) {
1534219089Spjd		(void) printf("Failed to get handle for SA znode\n");
1535219089Spjd		return;
1536219089Spjd	}
1537219089Spjd
1538219089Spjd	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8);
1539219089Spjd	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8);
1540219089Spjd	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL,
1541219089Spjd	    &links, 8);
1542219089Spjd	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8);
1543219089Spjd	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL,
1544219089Spjd	    &mode, 8);
1545219089Spjd	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT],
1546219089Spjd	    NULL, &parent, 8);
1547219089Spjd	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL,
1548219089Spjd	    &fsize, 8);
1549219089Spjd	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL,
1550219089Spjd	    acctm, 16);
1551219089Spjd	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL,
1552219089Spjd	    modtm, 16);
1553219089Spjd	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL,
1554219089Spjd	    crtm, 16);
1555219089Spjd	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL,
1556219089Spjd	    chgtm, 16);
1557219089Spjd	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_FLAGS], NULL,
1558219089Spjd	    &pflags, 8);
1559219089Spjd
1560219089Spjd	if (sa_bulk_lookup(hdl, bulk, idx)) {
1561219089Spjd		(void) sa_handle_destroy(hdl);
1562219089Spjd		return;
1563219089Spjd	}
1564219089Spjd
1565168404Spjd	error = zfs_obj_to_path(os, object, path, sizeof (path));
1566168404Spjd	if (error != 0) {
1567168404Spjd		(void) snprintf(path, sizeof (path), "\?\?\?<object#%llu>",
1568168404Spjd		    (u_longlong_t)object);
1569168404Spjd	}
1570168404Spjd	if (dump_opt['d'] < 3) {
1571168404Spjd		(void) printf("\t%s\n", path);
1572219089Spjd		(void) sa_handle_destroy(hdl);
1573168404Spjd		return;
1574168404Spjd	}
1575168404Spjd
1576219089Spjd	z_crtime = (time_t)crtm[0];
1577219089Spjd	z_atime = (time_t)acctm[0];
1578219089Spjd	z_mtime = (time_t)modtm[0];
1579219089Spjd	z_ctime = (time_t)chgtm[0];
1580168404Spjd
1581168404Spjd	(void) printf("\tpath	%s\n", path);
1582219089Spjd	dump_uidgid(os, uid, gid);
1583168404Spjd	(void) printf("\tatime	%s", ctime(&z_atime));
1584168404Spjd	(void) printf("\tmtime	%s", ctime(&z_mtime));
1585168404Spjd	(void) printf("\tctime	%s", ctime(&z_ctime));
1586168404Spjd	(void) printf("\tcrtime	%s", ctime(&z_crtime));
1587219089Spjd	(void) printf("\tgen	%llu\n", (u_longlong_t)gen);
1588219089Spjd	(void) printf("\tmode	%llo\n", (u_longlong_t)mode);
1589219089Spjd	(void) printf("\tsize	%llu\n", (u_longlong_t)fsize);
1590219089Spjd	(void) printf("\tparent	%llu\n", (u_longlong_t)parent);
1591219089Spjd	(void) printf("\tlinks	%llu\n", (u_longlong_t)links);
1592219089Spjd	(void) printf("\tpflags	%llx\n", (u_longlong_t)pflags);
1593219089Spjd	if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr,
1594219089Spjd	    sizeof (uint64_t)) == 0)
1595219089Spjd		(void) printf("\txattr	%llu\n", (u_longlong_t)xattr);
1596219089Spjd	if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev,
1597219089Spjd	    sizeof (uint64_t)) == 0)
1598219089Spjd		(void) printf("\trdev	0x%016llx\n", (u_longlong_t)rdev);
1599219089Spjd	sa_handle_destroy(hdl);
1600168404Spjd}
1601168404Spjd
1602168404Spjd/*ARGSUSED*/
1603168404Spjdstatic void
1604168404Spjddump_acl(objset_t *os, uint64_t object, void *data, size_t size)
1605168404Spjd{
1606168404Spjd}
1607168404Spjd
1608168404Spjd/*ARGSUSED*/
1609168404Spjdstatic void
1610168404Spjddump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
1611168404Spjd{
1612168404Spjd}
1613168404Spjd
1614219089Spjdstatic object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
1615168404Spjd	dump_none,		/* unallocated			*/
1616168404Spjd	dump_zap,		/* object directory		*/
1617168404Spjd	dump_uint64,		/* object array			*/
1618168404Spjd	dump_none,		/* packed nvlist		*/
1619168404Spjd	dump_packed_nvlist,	/* packed nvlist size		*/
1620168404Spjd	dump_none,		/* bplist			*/
1621168404Spjd	dump_none,		/* bplist header		*/
1622168404Spjd	dump_none,		/* SPA space map header		*/
1623168404Spjd	dump_none,		/* SPA space map		*/
1624168404Spjd	dump_none,		/* ZIL intent log		*/
1625168404Spjd	dump_dnode,		/* DMU dnode			*/
1626168404Spjd	dump_dmu_objset,	/* DMU objset			*/
1627168404Spjd	dump_dsl_dir,		/* DSL directory		*/
1628168404Spjd	dump_zap,		/* DSL directory child map	*/
1629168404Spjd	dump_zap,		/* DSL dataset snap map		*/
1630168404Spjd	dump_zap,		/* DSL props			*/
1631168404Spjd	dump_dsl_dataset,	/* DSL dataset			*/
1632168404Spjd	dump_znode,		/* ZFS znode			*/
1633185029Spjd	dump_acl,		/* ZFS V0 ACL			*/
1634168404Spjd	dump_uint8,		/* ZFS plain file		*/
1635185029Spjd	dump_zpldir,		/* ZFS directory		*/
1636168404Spjd	dump_zap,		/* ZFS master node		*/
1637168404Spjd	dump_zap,		/* ZFS delete queue		*/
1638168404Spjd	dump_uint8,		/* zvol object			*/
1639168404Spjd	dump_zap,		/* zvol prop			*/
1640168404Spjd	dump_uint8,		/* other uint8[]		*/
1641168404Spjd	dump_uint64,		/* other uint64[]		*/
1642168404Spjd	dump_zap,		/* other ZAP			*/
1643168404Spjd	dump_zap,		/* persistent error log		*/
1644168404Spjd	dump_uint8,		/* SPA history			*/
1645249643Smm	dump_history_offsets,	/* SPA history offsets		*/
1646168404Spjd	dump_zap,		/* Pool properties		*/
1647185029Spjd	dump_zap,		/* DSL permissions		*/
1648185029Spjd	dump_acl,		/* ZFS ACL			*/
1649185029Spjd	dump_uint8,		/* ZFS SYSACL			*/
1650185029Spjd	dump_none,		/* FUID nvlist			*/
1651185029Spjd	dump_packed_nvlist,	/* FUID nvlist size		*/
1652185029Spjd	dump_zap,		/* DSL dataset next clones	*/
1653185029Spjd	dump_zap,		/* DSL scrub queue		*/
1654209962Smm	dump_zap,		/* ZFS user/group used		*/
1655209962Smm	dump_zap,		/* ZFS user/group quota		*/
1656219089Spjd	dump_zap,		/* snapshot refcount tags	*/
1657219089Spjd	dump_ddt_zap,		/* DDT ZAP object		*/
1658219089Spjd	dump_zap,		/* DDT statistics		*/
1659219089Spjd	dump_znode,		/* SA object			*/
1660219089Spjd	dump_zap,		/* SA Master Node		*/
1661219089Spjd	dump_sa_attrs,		/* SA attribute registration	*/
1662219089Spjd	dump_sa_layouts,	/* SA attribute layouts		*/
1663219089Spjd	dump_zap,		/* DSL scrub translations	*/
1664219089Spjd	dump_none,		/* fake dedup BP		*/
1665219089Spjd	dump_zap,		/* deadlist			*/
1666219089Spjd	dump_none,		/* deadlist hdr			*/
1667219089Spjd	dump_zap,		/* dsl clones			*/
1668219089Spjd	dump_none,		/* bpobj subobjs		*/
1669219089Spjd	dump_unknown,		/* Unknown type, must be last	*/
1670168404Spjd};
1671168404Spjd
1672168404Spjdstatic void
1673168404Spjddump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
1674168404Spjd{
1675168404Spjd	dmu_buf_t *db = NULL;
1676168404Spjd	dmu_object_info_t doi;
1677168404Spjd	dnode_t *dn;
1678168404Spjd	void *bonus = NULL;
1679168404Spjd	size_t bsize = 0;
1680219089Spjd	char iblk[32], dblk[32], lsize[32], asize[32], fill[32];
1681219089Spjd	char bonus_size[32];
1682168404Spjd	char aux[50];
1683168404Spjd	int error;
1684168404Spjd
1685168404Spjd	if (*print_header) {
1686219089Spjd		(void) printf("\n%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
1687219089Spjd		    "Object", "lvl", "iblk", "dblk", "dsize", "lsize",
1688219089Spjd		    "%full", "type");
1689168404Spjd		*print_header = 0;
1690168404Spjd	}
1691168404Spjd
1692168404Spjd	if (object == 0) {
1693219089Spjd		dn = DMU_META_DNODE(os);
1694168404Spjd	} else {
1695168404Spjd		error = dmu_bonus_hold(os, object, FTAG, &db);
1696168404Spjd		if (error)
1697168404Spjd			fatal("dmu_bonus_hold(%llu) failed, errno %u",
1698168404Spjd			    object, error);
1699168404Spjd		bonus = db->db_data;
1700168404Spjd		bsize = db->db_size;
1701219089Spjd		dn = DB_DNODE((dmu_buf_impl_t *)db);
1702168404Spjd	}
1703168404Spjd	dmu_object_info_from_dnode(dn, &doi);
1704168404Spjd
1705219089Spjd	zdb_nicenum(doi.doi_metadata_block_size, iblk);
1706219089Spjd	zdb_nicenum(doi.doi_data_block_size, dblk);
1707219089Spjd	zdb_nicenum(doi.doi_max_offset, lsize);
1708219089Spjd	zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize);
1709219089Spjd	zdb_nicenum(doi.doi_bonus_size, bonus_size);
1710219089Spjd	(void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
1711219089Spjd	    doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) /
1712219089Spjd	    doi.doi_max_offset);
1713168404Spjd
1714168404Spjd	aux[0] = '\0';
1715168404Spjd
1716185029Spjd	if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) {
1717168404Spjd		(void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)",
1718219089Spjd		    ZDB_CHECKSUM_NAME(doi.doi_checksum));
1719185029Spjd	}
1720168404Spjd
1721185029Spjd	if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
1722168404Spjd		(void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)",
1723219089Spjd		    ZDB_COMPRESS_NAME(doi.doi_compress));
1724185029Spjd	}
1725168404Spjd
1726219089Spjd	(void) printf("%10lld  %3u  %5s  %5s  %5s  %5s  %6s  %s%s\n",
1727219089Spjd	    (u_longlong_t)object, doi.doi_indirection, iblk, dblk,
1728219089Spjd	    asize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
1729168404Spjd
1730168404Spjd	if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
1731219089Spjd		(void) printf("%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
1732219089Spjd		    "", "", "", "", "", bonus_size, "bonus",
1733219089Spjd		    ZDB_OT_NAME(doi.doi_bonus_type));
1734168404Spjd	}
1735168404Spjd
1736168404Spjd	if (verbosity >= 4) {
1737219089Spjd		(void) printf("\tdnode flags: %s%s%s\n",
1738209962Smm		    (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
1739209962Smm		    "USED_BYTES " : "",
1740209962Smm		    (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
1741219089Spjd		    "USERUSED_ACCOUNTED " : "",
1742219089Spjd		    (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ?
1743219089Spjd		    "SPILL_BLKPTR" : "");
1744209962Smm		(void) printf("\tdnode maxblkid: %llu\n",
1745209962Smm		    (longlong_t)dn->dn_phys->dn_maxblkid);
1746209962Smm
1747219089Spjd		object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object,
1748219089Spjd		    bonus, bsize);
1749219089Spjd		object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0);
1750168404Spjd		*print_header = 1;
1751168404Spjd	}
1752168404Spjd
1753168404Spjd	if (verbosity >= 5)
1754208047Smm		dump_indirect(dn);
1755168404Spjd
1756168404Spjd	if (verbosity >= 5) {
1757168404Spjd		/*
1758168404Spjd		 * Report the list of segments that comprise the object.
1759168404Spjd		 */
1760168404Spjd		uint64_t start = 0;
1761168404Spjd		uint64_t end;
1762168404Spjd		uint64_t blkfill = 1;
1763168404Spjd		int minlvl = 1;
1764168404Spjd
1765168404Spjd		if (dn->dn_type == DMU_OT_DNODE) {
1766168404Spjd			minlvl = 0;
1767168404Spjd			blkfill = DNODES_PER_BLOCK;
1768168404Spjd		}
1769168404Spjd
1770168404Spjd		for (;;) {
1771219089Spjd			char segsize[32];
1772185029Spjd			error = dnode_next_offset(dn,
1773185029Spjd			    0, &start, minlvl, blkfill, 0);
1774168404Spjd			if (error)
1775168404Spjd				break;
1776168404Spjd			end = start;
1777185029Spjd			error = dnode_next_offset(dn,
1778185029Spjd			    DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
1779219089Spjd			zdb_nicenum(end - start, segsize);
1780168404Spjd			(void) printf("\t\tsegment [%016llx, %016llx)"
1781168404Spjd			    " size %5s\n", (u_longlong_t)start,
1782168404Spjd			    (u_longlong_t)end, segsize);
1783168404Spjd			if (error)
1784168404Spjd				break;
1785168404Spjd			start = end;
1786168404Spjd		}
1787168404Spjd	}
1788168404Spjd
1789168404Spjd	if (db != NULL)
1790168404Spjd		dmu_buf_rele(db, FTAG);
1791168404Spjd}
1792168404Spjd
1793168404Spjdstatic char *objset_types[DMU_OST_NUMTYPES] = {
1794168404Spjd	"NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
1795168404Spjd
1796168404Spjdstatic void
1797168404Spjddump_dir(objset_t *os)
1798168404Spjd{
1799168404Spjd	dmu_objset_stats_t dds;
1800168404Spjd	uint64_t object, object_count;
1801168404Spjd	uint64_t refdbytes, usedobjs, scratch;
1802219089Spjd	char numbuf[32];
1803209962Smm	char blkbuf[BP_SPRINTF_LEN + 20];
1804168404Spjd	char osname[MAXNAMELEN];
1805168404Spjd	char *type = "UNKNOWN";
1806168404Spjd	int verbosity = dump_opt['d'];
1807168404Spjd	int print_header = 1;
1808168404Spjd	int i, error;
1809168404Spjd
1810249643Smm	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
1811168404Spjd	dmu_objset_fast_stat(os, &dds);
1812249643Smm	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
1813168404Spjd
1814168404Spjd	if (dds.dds_type < DMU_OST_NUMTYPES)
1815168404Spjd		type = objset_types[dds.dds_type];
1816168404Spjd
1817168404Spjd	if (dds.dds_type == DMU_OST_META) {
1818168404Spjd		dds.dds_creation_txg = TXG_INITIAL;
1819219089Spjd		usedobjs = os->os_rootbp->blk_fill;
1820219089Spjd		refdbytes = os->os_spa->spa_dsl_pool->
1821185029Spjd		    dp_mos_dir->dd_phys->dd_used_bytes;
1822168404Spjd	} else {
1823168404Spjd		dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
1824168404Spjd	}
1825168404Spjd
1826219089Spjd	ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill);
1827168404Spjd
1828219089Spjd	zdb_nicenum(refdbytes, numbuf);
1829168404Spjd
1830168404Spjd	if (verbosity >= 4) {
1831263398Sdelphij		(void) snprintf(blkbuf, sizeof (blkbuf), ", rootbp ");
1832263398Sdelphij		(void) snprintf_blkptr(blkbuf + strlen(blkbuf),
1833263398Sdelphij		    sizeof (blkbuf) - strlen(blkbuf), os->os_rootbp);
1834168404Spjd	} else {
1835168404Spjd		blkbuf[0] = '\0';
1836168404Spjd	}
1837168404Spjd
1838168404Spjd	dmu_objset_name(os, osname);
1839168404Spjd
1840168404Spjd	(void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
1841168404Spjd	    "%s, %llu objects%s\n",
1842168404Spjd	    osname, type, (u_longlong_t)dmu_objset_id(os),
1843168404Spjd	    (u_longlong_t)dds.dds_creation_txg,
1844168404Spjd	    numbuf, (u_longlong_t)usedobjs, blkbuf);
1845168404Spjd
1846219089Spjd	if (zopt_objects != 0) {
1847219089Spjd		for (i = 0; i < zopt_objects; i++)
1848219089Spjd			dump_object(os, zopt_object[i], verbosity,
1849219089Spjd			    &print_header);
1850219089Spjd		(void) printf("\n");
1851219089Spjd		return;
1852219089Spjd	}
1853168404Spjd
1854219089Spjd	if (dump_opt['i'] != 0 || verbosity >= 2)
1855219089Spjd		dump_intent_log(dmu_objset_zil(os));
1856219089Spjd
1857168404Spjd	if (dmu_objset_ds(os) != NULL)
1858219089Spjd		dump_deadlist(&dmu_objset_ds(os)->ds_deadlist);
1859168404Spjd
1860168404Spjd	if (verbosity < 2)
1861168404Spjd		return;
1862168404Spjd
1863263398Sdelphij	if (BP_IS_HOLE(os->os_rootbp))
1864185029Spjd		return;
1865185029Spjd
1866168404Spjd	dump_object(os, 0, verbosity, &print_header);
1867209962Smm	object_count = 0;
1868219089Spjd	if (DMU_USERUSED_DNODE(os) != NULL &&
1869219089Spjd	    DMU_USERUSED_DNODE(os)->dn_type != 0) {
1870209962Smm		dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header);
1871209962Smm		dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header);
1872209962Smm	}
1873168404Spjd
1874168404Spjd	object = 0;
1875168404Spjd	while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
1876168404Spjd		dump_object(os, object, verbosity, &print_header);
1877168404Spjd		object_count++;
1878168404Spjd	}
1879168404Spjd
1880168404Spjd	ASSERT3U(object_count, ==, usedobjs);
1881168404Spjd
1882168404Spjd	(void) printf("\n");
1883168404Spjd
1884209962Smm	if (error != ESRCH) {
1885209962Smm		(void) fprintf(stderr, "dmu_object_next() = %d\n", error);
1886209962Smm		abort();
1887209962Smm	}
1888168404Spjd}
1889168404Spjd
1890168404Spjdstatic void
1891219089Spjddump_uberblock(uberblock_t *ub, const char *header, const char *footer)
1892168404Spjd{
1893168404Spjd	time_t timestamp = ub->ub_timestamp;
1894168404Spjd
1895219089Spjd	(void) printf(header ? header : "");
1896168404Spjd	(void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
1897168404Spjd	(void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
1898168404Spjd	(void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
1899168404Spjd	(void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
1900168404Spjd	(void) printf("\ttimestamp = %llu UTC = %s",
1901168404Spjd	    (u_longlong_t)ub->ub_timestamp, asctime(localtime(&timestamp)));
1902168404Spjd	if (dump_opt['u'] >= 3) {
1903168404Spjd		char blkbuf[BP_SPRINTF_LEN];
1904263398Sdelphij		snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp);
1905168404Spjd		(void) printf("\trootbp = %s\n", blkbuf);
1906168404Spjd	}
1907219089Spjd	(void) printf(footer ? footer : "");
1908168404Spjd}
1909168404Spjd
1910168404Spjdstatic void
1911219089Spjddump_config(spa_t *spa)
1912168404Spjd{
1913219089Spjd	dmu_buf_t *db;
1914219089Spjd	size_t nvsize = 0;
1915219089Spjd	int error = 0;
1916168404Spjd
1917219089Spjd
1918219089Spjd	error = dmu_bonus_hold(spa->spa_meta_objset,
1919219089Spjd	    spa->spa_config_object, FTAG, &db);
1920219089Spjd
1921219089Spjd	if (error == 0) {
1922219089Spjd		nvsize = *(uint64_t *)db->db_data;
1923219089Spjd		dmu_buf_rele(db, FTAG);
1924219089Spjd
1925219089Spjd		(void) printf("\nMOS Configuration:\n");
1926219089Spjd		dump_packed_nvlist(spa->spa_meta_objset,
1927219089Spjd		    spa->spa_config_object, (void *)&nvsize, 1);
1928219089Spjd	} else {
1929219089Spjd		(void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d",
1930219089Spjd		    (u_longlong_t)spa->spa_config_object, error);
1931168404Spjd	}
1932168404Spjd}
1933168404Spjd
1934168404Spjdstatic void
1935185029Spjddump_cachefile(const char *cachefile)
1936185029Spjd{
1937185029Spjd	int fd;
1938185029Spjd	struct stat64 statbuf;
1939185029Spjd	char *buf;
1940185029Spjd	nvlist_t *config;
1941185029Spjd
1942185029Spjd	if ((fd = open64(cachefile, O_RDONLY)) < 0) {
1943185029Spjd		(void) printf("cannot open '%s': %s\n", cachefile,
1944185029Spjd		    strerror(errno));
1945185029Spjd		exit(1);
1946185029Spjd	}
1947185029Spjd
1948185029Spjd	if (fstat64(fd, &statbuf) != 0) {
1949185029Spjd		(void) printf("failed to stat '%s': %s\n", cachefile,
1950185029Spjd		    strerror(errno));
1951185029Spjd		exit(1);
1952185029Spjd	}
1953185029Spjd
1954185029Spjd	if ((buf = malloc(statbuf.st_size)) == NULL) {
1955185029Spjd		(void) fprintf(stderr, "failed to allocate %llu bytes\n",
1956185029Spjd		    (u_longlong_t)statbuf.st_size);
1957185029Spjd		exit(1);
1958185029Spjd	}
1959185029Spjd
1960185029Spjd	if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
1961185029Spjd		(void) fprintf(stderr, "failed to read %llu bytes\n",
1962185029Spjd		    (u_longlong_t)statbuf.st_size);
1963185029Spjd		exit(1);
1964185029Spjd	}
1965185029Spjd
1966185029Spjd	(void) close(fd);
1967185029Spjd
1968185029Spjd	if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) {
1969185029Spjd		(void) fprintf(stderr, "failed to unpack nvlist\n");
1970185029Spjd		exit(1);
1971185029Spjd	}
1972185029Spjd
1973185029Spjd	free(buf);
1974185029Spjd
1975185029Spjd	dump_nvlist(config, 0);
1976185029Spjd
1977185029Spjd	nvlist_free(config);
1978185029Spjd}
1979185029Spjd
1980219089Spjd#define	ZDB_MAX_UB_HEADER_SIZE 32
1981219089Spjd
1982185029Spjdstatic void
1983219089Spjddump_label_uberblocks(vdev_label_t *lbl, uint64_t ashift)
1984219089Spjd{
1985219089Spjd	vdev_t vd;
1986219089Spjd	vdev_t *vdp = &vd;
1987219089Spjd	char header[ZDB_MAX_UB_HEADER_SIZE];
1988219089Spjd
1989219089Spjd	vd.vdev_ashift = ashift;
1990219089Spjd	vdp->vdev_top = vdp;
1991219089Spjd
1992219089Spjd	for (int i = 0; i < VDEV_UBERBLOCK_COUNT(vdp); i++) {
1993219089Spjd		uint64_t uoff = VDEV_UBERBLOCK_OFFSET(vdp, i);
1994219089Spjd		uberblock_t *ub = (void *)((char *)lbl + uoff);
1995219089Spjd
1996219089Spjd		if (uberblock_verify(ub))
1997219089Spjd			continue;
1998219089Spjd		(void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE,
1999219089Spjd		    "Uberblock[%d]\n", i);
2000219089Spjd		dump_uberblock(ub, header, "");
2001219089Spjd	}
2002219089Spjd}
2003219089Spjd
2004219089Spjdstatic void
2005168404Spjddump_label(const char *dev)
2006168404Spjd{
2007168404Spjd	int fd;
2008168404Spjd	vdev_label_t label;
2009219089Spjd	char *path, *buf = label.vl_vdev_phys.vp_nvlist;
2010168404Spjd	size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
2011168404Spjd	struct stat64 statbuf;
2012219089Spjd	uint64_t psize, ashift;
2013219089Spjd	int len = strlen(dev) + 1;
2014168404Spjd
2015219089Spjd	if (strncmp(dev, "/dev/dsk/", 9) == 0) {
2016219089Spjd		len++;
2017219089Spjd		path = malloc(len);
2018219089Spjd		(void) snprintf(path, len, "%s%s", "/dev/rdsk/", dev + 9);
2019219089Spjd	} else {
2020219089Spjd		path = strdup(dev);
2021219089Spjd	}
2022219089Spjd
2023219089Spjd	if ((fd = open64(path, O_RDONLY)) < 0) {
2024219089Spjd		(void) printf("cannot open '%s': %s\n", path, strerror(errno));
2025219089Spjd		free(path);
2026168404Spjd		exit(1);
2027168404Spjd	}
2028168404Spjd
2029168404Spjd	if (fstat64(fd, &statbuf) != 0) {
2030219089Spjd		(void) printf("failed to stat '%s': %s\n", path,
2031168404Spjd		    strerror(errno));
2032219089Spjd		free(path);
2033219089Spjd		(void) close(fd);
2034168404Spjd		exit(1);
2035168404Spjd	}
2036168404Spjd
2037219089Spjd	if (S_ISBLK(statbuf.st_mode)) {
2038219089Spjd		(void) printf("cannot use '%s': character device required\n",
2039219089Spjd		    path);
2040219089Spjd		free(path);
2041219089Spjd		(void) close(fd);
2042219089Spjd		exit(1);
2043196928Spjd	}
2044196928Spjd
2045185029Spjd	psize = statbuf.st_size;
2046168404Spjd	psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
2047168404Spjd
2048219089Spjd	for (int l = 0; l < VDEV_LABELS; l++) {
2049168404Spjd		nvlist_t *config = NULL;
2050168404Spjd
2051168404Spjd		(void) printf("--------------------------------------------\n");
2052168404Spjd		(void) printf("LABEL %d\n", l);
2053168404Spjd		(void) printf("--------------------------------------------\n");
2054168404Spjd
2055168404Spjd		if (pread64(fd, &label, sizeof (label),
2056168404Spjd		    vdev_label_offset(psize, l, 0)) != sizeof (label)) {
2057168404Spjd			(void) printf("failed to read label %d\n", l);
2058168404Spjd			continue;
2059168404Spjd		}
2060168404Spjd
2061168404Spjd		if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
2062168404Spjd			(void) printf("failed to unpack label %d\n", l);
2063219089Spjd			ashift = SPA_MINBLOCKSHIFT;
2064219089Spjd		} else {
2065219089Spjd			nvlist_t *vdev_tree = NULL;
2066219089Spjd
2067219089Spjd			dump_nvlist(config, 4);
2068219089Spjd			if ((nvlist_lookup_nvlist(config,
2069219089Spjd			    ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) ||
2070219089Spjd			    (nvlist_lookup_uint64(vdev_tree,
2071219089Spjd			    ZPOOL_CONFIG_ASHIFT, &ashift) != 0))
2072219089Spjd				ashift = SPA_MINBLOCKSHIFT;
2073219089Spjd			nvlist_free(config);
2074168404Spjd		}
2075219089Spjd		if (dump_opt['u'])
2076219089Spjd			dump_label_uberblocks(&label, ashift);
2077168404Spjd	}
2078219089Spjd
2079219089Spjd	free(path);
2080219089Spjd	(void) close(fd);
2081168404Spjd}
2082168404Spjd
2083168404Spjd/*ARGSUSED*/
2084168404Spjdstatic int
2085219089Spjddump_one_dir(const char *dsname, void *arg)
2086168404Spjd{
2087168404Spjd	int error;
2088168404Spjd	objset_t *os;
2089168404Spjd
2090219089Spjd	error = dmu_objset_own(dsname, DMU_OST_ANY, B_TRUE, FTAG, &os);
2091168404Spjd	if (error) {
2092219089Spjd		(void) printf("Could not open %s, error %d\n", dsname, error);
2093168404Spjd		return (0);
2094168404Spjd	}
2095168404Spjd	dump_dir(os);
2096219089Spjd	dmu_objset_disown(os, FTAG);
2097185029Spjd	fuid_table_destroy();
2098219089Spjd	sa_loaded = B_FALSE;
2099168404Spjd	return (0);
2100168404Spjd}
2101168404Spjd
2102168404Spjd/*
2103219089Spjd * Block statistics.
2104168404Spjd */
2105249643Smm#define	PSIZE_HISTO_SIZE (SPA_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1)
2106168404Spjdtypedef struct zdb_blkstats {
2107249643Smm	uint64_t zb_asize;
2108249643Smm	uint64_t zb_lsize;
2109249643Smm	uint64_t zb_psize;
2110249643Smm	uint64_t zb_count;
2111249643Smm	uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE];
2112168404Spjd} zdb_blkstats_t;
2113168404Spjd
2114219089Spjd/*
2115219089Spjd * Extended object types to report deferred frees and dedup auto-ditto blocks.
2116219089Spjd */
2117219089Spjd#define	ZDB_OT_DEFERRED	(DMU_OT_NUMTYPES + 0)
2118219089Spjd#define	ZDB_OT_DITTO	(DMU_OT_NUMTYPES + 1)
2119243674Smm#define	ZDB_OT_OTHER	(DMU_OT_NUMTYPES + 2)
2120243674Smm#define	ZDB_OT_TOTAL	(DMU_OT_NUMTYPES + 3)
2121168404Spjd
2122219089Spjdstatic char *zdb_ot_extname[] = {
2123219089Spjd	"deferred free",
2124219089Spjd	"dedup ditto",
2125243674Smm	"other",
2126219089Spjd	"Total",
2127219089Spjd};
2128219089Spjd
2129208047Smm#define	ZB_TOTAL	DN_MAX_LEVELS
2130168404Spjd
2131168404Spjdtypedef struct zdb_cb {
2132219089Spjd	zdb_blkstats_t	zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
2133219089Spjd	uint64_t	zcb_dedup_asize;
2134219089Spjd	uint64_t	zcb_dedup_blocks;
2135249643Smm	uint64_t	zcb_start;
2136249643Smm	uint64_t	zcb_lastprint;
2137249643Smm	uint64_t	zcb_totalasize;
2138168404Spjd	uint64_t	zcb_errors[256];
2139168404Spjd	int		zcb_readfails;
2140168404Spjd	int		zcb_haderrors;
2141219089Spjd	spa_t		*zcb_spa;
2142168404Spjd} zdb_cb_t;
2143168404Spjd
2144168404Spjdstatic void
2145219089Spjdzdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
2146219089Spjd    dmu_object_type_t type)
2147168404Spjd{
2148219089Spjd	uint64_t refcnt = 0;
2149219089Spjd
2150219089Spjd	ASSERT(type < ZDB_OT_TOTAL);
2151219089Spjd
2152219089Spjd	if (zilog && zil_bp_tree_add(zilog, bp) != 0)
2153219089Spjd		return;
2154219089Spjd
2155185029Spjd	for (int i = 0; i < 4; i++) {
2156168404Spjd		int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
2157219089Spjd		int t = (i & 1) ? type : ZDB_OT_TOTAL;
2158168404Spjd		zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
2159168404Spjd
2160168404Spjd		zb->zb_asize += BP_GET_ASIZE(bp);
2161168404Spjd		zb->zb_lsize += BP_GET_LSIZE(bp);
2162168404Spjd		zb->zb_psize += BP_GET_PSIZE(bp);
2163168404Spjd		zb->zb_count++;
2164249643Smm		zb->zb_psize_histogram[BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT]++;
2165168404Spjd	}
2166168404Spjd
2167219089Spjd	if (dump_opt['L'])
2168219089Spjd		return;
2169168404Spjd
2170219089Spjd	if (BP_GET_DEDUP(bp)) {
2171219089Spjd		ddt_t *ddt;
2172219089Spjd		ddt_entry_t *dde;
2173168404Spjd
2174219089Spjd		ddt = ddt_select(zcb->zcb_spa, bp);
2175219089Spjd		ddt_enter(ddt);
2176219089Spjd		dde = ddt_lookup(ddt, bp, B_FALSE);
2177168404Spjd
2178219089Spjd		if (dde == NULL) {
2179219089Spjd			refcnt = 0;
2180219089Spjd		} else {
2181219089Spjd			ddt_phys_t *ddp = ddt_phys_select(dde, bp);
2182219089Spjd			ddt_phys_decref(ddp);
2183219089Spjd			refcnt = ddp->ddp_refcnt;
2184219089Spjd			if (ddt_phys_total_refcnt(dde) == 0)
2185219089Spjd				ddt_remove(ddt, dde);
2186185029Spjd		}
2187219089Spjd		ddt_exit(ddt);
2188185029Spjd	}
2189168404Spjd
2190219089Spjd	VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa,
2191219089Spjd	    refcnt ? 0 : spa_first_txg(zcb->zcb_spa),
2192219089Spjd	    bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
2193168404Spjd}
2194168404Spjd
2195263394Sdelphij/* ARGSUSED */
2196263394Sdelphijstatic void
2197263394Sdelphijzdb_blkptr_done(zio_t *zio)
2198263394Sdelphij{
2199263394Sdelphij	spa_t *spa = zio->io_spa;
2200263394Sdelphij	blkptr_t *bp = zio->io_bp;
2201263394Sdelphij	int ioerr = zio->io_error;
2202263394Sdelphij	zdb_cb_t *zcb = zio->io_private;
2203263394Sdelphij	zbookmark_t *zb = &zio->io_bookmark;
2204263394Sdelphij
2205263394Sdelphij	zio_data_buf_free(zio->io_data, zio->io_size);
2206263394Sdelphij
2207263394Sdelphij	mutex_enter(&spa->spa_scrub_lock);
2208263394Sdelphij	spa->spa_scrub_inflight--;
2209263394Sdelphij	cv_broadcast(&spa->spa_scrub_io_cv);
2210263394Sdelphij
2211263394Sdelphij	if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
2212263394Sdelphij		char blkbuf[BP_SPRINTF_LEN];
2213263394Sdelphij
2214263394Sdelphij		zcb->zcb_haderrors = 1;
2215263394Sdelphij		zcb->zcb_errors[ioerr]++;
2216263394Sdelphij
2217263394Sdelphij		if (dump_opt['b'] >= 2)
2218263398Sdelphij			snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
2219263394Sdelphij		else
2220263394Sdelphij			blkbuf[0] = '\0';
2221263394Sdelphij
2222263394Sdelphij		(void) printf("zdb_blkptr_cb: "
2223263394Sdelphij		    "Got error %d reading "
2224263394Sdelphij		    "<%llu, %llu, %lld, %llx> %s -- skipping\n",
2225263394Sdelphij		    ioerr,
2226263394Sdelphij		    (u_longlong_t)zb->zb_objset,
2227263394Sdelphij		    (u_longlong_t)zb->zb_object,
2228263394Sdelphij		    (u_longlong_t)zb->zb_level,
2229263394Sdelphij		    (u_longlong_t)zb->zb_blkid,
2230263394Sdelphij		    blkbuf);
2231263394Sdelphij	}
2232263394Sdelphij	mutex_exit(&spa->spa_scrub_lock);
2233263394Sdelphij}
2234263394Sdelphij
2235263394Sdelphij/* ARGSUSED */
2236168404Spjdstatic int
2237247406Smmzdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
2238219089Spjd    const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
2239168404Spjd{
2240168404Spjd	zdb_cb_t *zcb = arg;
2241209962Smm	dmu_object_type_t type;
2242219089Spjd	boolean_t is_metadata;
2243168404Spjd
2244263398Sdelphij	if (dump_opt['b'] >= 5 && bp->blk_birth > 0) {
2245263398Sdelphij		char blkbuf[BP_SPRINTF_LEN];
2246263398Sdelphij		snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
2247263398Sdelphij		(void) printf("objset %llu object %llu "
2248263398Sdelphij		    "level %lld offset 0x%llx %s\n",
2249263398Sdelphij		    (u_longlong_t)zb->zb_objset,
2250263398Sdelphij		    (u_longlong_t)zb->zb_object,
2251263398Sdelphij		    (longlong_t)zb->zb_level,
2252263398Sdelphij		    (u_longlong_t)blkid2offset(dnp, bp, zb),
2253263398Sdelphij		    blkbuf);
2254263398Sdelphij	}
2255263398Sdelphij
2256263398Sdelphij	if (BP_IS_HOLE(bp))
2257208047Smm		return (0);
2258185029Spjd
2259209962Smm	type = BP_GET_TYPE(bp);
2260185029Spjd
2261243674Smm	zdb_count_block(zcb, zilog, bp,
2262243674Smm	    (type & DMU_OT_NEWTYPE) ? ZDB_OT_OTHER : type);
2263209962Smm
2264243674Smm	is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));
2265209962Smm
2266219089Spjd	if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
2267219089Spjd		size_t size = BP_GET_PSIZE(bp);
2268263394Sdelphij		void *data = zio_data_buf_alloc(size);
2269219089Spjd		int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
2270208047Smm
2271219089Spjd		/* If it's an intent log block, failure is expected. */
2272219089Spjd		if (zb->zb_level == ZB_ZIL_LEVEL)
2273219089Spjd			flags |= ZIO_FLAG_SPECULATIVE;
2274219089Spjd
2275263394Sdelphij		mutex_enter(&spa->spa_scrub_lock);
2276263394Sdelphij		while (spa->spa_scrub_inflight > max_inflight)
2277263394Sdelphij			cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
2278263394Sdelphij		spa->spa_scrub_inflight++;
2279263394Sdelphij		mutex_exit(&spa->spa_scrub_lock);
2280219089Spjd
2281263394Sdelphij		zio_nowait(zio_read(NULL, spa, bp, data, size,
2282263394Sdelphij		    zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb));
2283168404Spjd	}
2284168404Spjd
2285168404Spjd	zcb->zcb_readfails = 0;
2286168404Spjd
2287249643Smm	if (dump_opt['b'] < 5 && isatty(STDERR_FILENO) &&
2288249643Smm	    gethrtime() > zcb->zcb_lastprint + NANOSEC) {
2289249643Smm		uint64_t now = gethrtime();
2290249643Smm		char buf[10];
2291249643Smm		uint64_t bytes = zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL].zb_asize;
2292249643Smm		int kb_per_sec =
2293249643Smm		    1 + bytes / (1 + ((now - zcb->zcb_start) / 1000 / 1000));
2294249643Smm		int sec_remaining =
2295249643Smm		    (zcb->zcb_totalasize - bytes) / 1024 / kb_per_sec;
2296249643Smm
2297249643Smm		zfs_nicenum(bytes, buf, sizeof (buf));
2298249643Smm		(void) fprintf(stderr,
2299249643Smm		    "\r%5s completed (%4dMB/s) "
2300249643Smm		    "estimated time remaining: %uhr %02umin %02usec        ",
2301249643Smm		    buf, kb_per_sec / 1024,
2302249643Smm		    sec_remaining / 60 / 60,
2303249643Smm		    sec_remaining / 60 % 60,
2304249643Smm		    sec_remaining % 60);
2305249643Smm
2306249643Smm		zcb->zcb_lastprint = now;
2307249643Smm	}
2308249643Smm
2309168404Spjd	return (0);
2310168404Spjd}
2311168404Spjd
2312219089Spjdstatic void
2313262094Savgzdb_leak(void *arg, uint64_t start, uint64_t size)
2314219089Spjd{
2315262094Savg	vdev_t *vd = arg;
2316219089Spjd
2317219089Spjd	(void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
2318219089Spjd	    (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
2319219089Spjd}
2320219089Spjd
2321262094Savgstatic metaslab_ops_t zdb_metaslab_ops = {
2322219089Spjd	NULL,	/* alloc */
2323262094Savg	NULL	/* fragmented */
2324219089Spjd};
2325219089Spjd
2326219089Spjdstatic void
2327219089Spjdzdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
2328219089Spjd{
2329219089Spjd	ddt_bookmark_t ddb = { 0 };
2330219089Spjd	ddt_entry_t dde;
2331219089Spjd	int error;
2332219089Spjd
2333219089Spjd	while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
2334219089Spjd		blkptr_t blk;
2335219089Spjd		ddt_phys_t *ddp = dde.dde_phys;
2336219089Spjd
2337219089Spjd		if (ddb.ddb_class == DDT_CLASS_UNIQUE)
2338219089Spjd			return;
2339219089Spjd
2340219089Spjd		ASSERT(ddt_phys_total_refcnt(&dde) > 1);
2341219089Spjd
2342219089Spjd		for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
2343219089Spjd			if (ddp->ddp_phys_birth == 0)
2344219089Spjd				continue;
2345219089Spjd			ddt_bp_create(ddb.ddb_checksum,
2346219089Spjd			    &dde.dde_key, ddp, &blk);
2347219089Spjd			if (p == DDT_PHYS_DITTO) {
2348219089Spjd				zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO);
2349219089Spjd			} else {
2350219089Spjd				zcb->zcb_dedup_asize +=
2351219089Spjd				    BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
2352219089Spjd				zcb->zcb_dedup_blocks++;
2353219089Spjd			}
2354219089Spjd		}
2355219089Spjd		if (!dump_opt['L']) {
2356219089Spjd			ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
2357219089Spjd			ddt_enter(ddt);
2358219089Spjd			VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
2359219089Spjd			ddt_exit(ddt);
2360219089Spjd		}
2361219089Spjd	}
2362219089Spjd
2363219089Spjd	ASSERT(error == ENOENT);
2364219089Spjd}
2365219089Spjd
2366219089Spjdstatic void
2367219089Spjdzdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
2368219089Spjd{
2369219089Spjd	zcb->zcb_spa = spa;
2370219089Spjd
2371219089Spjd	if (!dump_opt['L']) {
2372219089Spjd		vdev_t *rvd = spa->spa_root_vdev;
2373219089Spjd		for (int c = 0; c < rvd->vdev_children; c++) {
2374219089Spjd			vdev_t *vd = rvd->vdev_child[c];
2375219089Spjd			for (int m = 0; m < vd->vdev_ms_count; m++) {
2376219089Spjd				metaslab_t *msp = vd->vdev_ms[m];
2377219089Spjd				mutex_enter(&msp->ms_lock);
2378262094Savg				metaslab_unload(msp);
2379262094Savg
2380262094Savg				/*
2381262094Savg				 * For leak detection, we overload the metaslab
2382262094Savg				 * ms_tree to contain allocated segments
2383262094Savg				 * instead of free segments. As a result,
2384262094Savg				 * we can't use the normal metaslab_load/unload
2385262094Savg				 * interfaces.
2386262094Savg				 */
2387262094Savg				if (msp->ms_sm != NULL) {
2388262094Savg					msp->ms_ops = &zdb_metaslab_ops;
2389262094Savg					VERIFY0(space_map_load(msp->ms_sm,
2390262094Savg					    msp->ms_tree, SM_ALLOC));
2391262094Savg					msp->ms_loaded = B_TRUE;
2392262094Savg				}
2393219089Spjd				mutex_exit(&msp->ms_lock);
2394219089Spjd			}
2395219089Spjd		}
2396219089Spjd	}
2397219089Spjd
2398219089Spjd	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
2399219089Spjd
2400219089Spjd	zdb_ddt_leak_init(spa, zcb);
2401219089Spjd
2402219089Spjd	spa_config_exit(spa, SCL_CONFIG, FTAG);
2403219089Spjd}
2404219089Spjd
2405219089Spjdstatic void
2406219089Spjdzdb_leak_fini(spa_t *spa)
2407219089Spjd{
2408219089Spjd	if (!dump_opt['L']) {
2409219089Spjd		vdev_t *rvd = spa->spa_root_vdev;
2410219089Spjd		for (int c = 0; c < rvd->vdev_children; c++) {
2411219089Spjd			vdev_t *vd = rvd->vdev_child[c];
2412219089Spjd			for (int m = 0; m < vd->vdev_ms_count; m++) {
2413219089Spjd				metaslab_t *msp = vd->vdev_ms[m];
2414219089Spjd				mutex_enter(&msp->ms_lock);
2415262094Savg
2416262094Savg				/*
2417262094Savg				 * The ms_tree has been overloaded to
2418262094Savg				 * contain allocated segments. Now that we
2419262094Savg				 * finished traversing all blocks, any
2420262094Savg				 * block that remains in the ms_tree
2421262094Savg				 * represents an allocated block that we
2422262094Savg				 * did not claim during the traversal.
2423262094Savg				 * Claimed blocks would have been removed
2424262094Savg				 * from the ms_tree.
2425262094Savg				 */
2426262094Savg				range_tree_vacate(msp->ms_tree, zdb_leak, vd);
2427262094Savg				msp->ms_loaded = B_FALSE;
2428262094Savg
2429219089Spjd				mutex_exit(&msp->ms_lock);
2430219089Spjd			}
2431219089Spjd		}
2432219089Spjd	}
2433219089Spjd}
2434219089Spjd
2435219089Spjd/* ARGSUSED */
2436168404Spjdstatic int
2437219089Spjdcount_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
2438219089Spjd{
2439219089Spjd	zdb_cb_t *zcb = arg;
2440219089Spjd
2441249643Smm	if (dump_opt['b'] >= 5) {
2442219089Spjd		char blkbuf[BP_SPRINTF_LEN];
2443263398Sdelphij		snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
2444219089Spjd		(void) printf("[%s] %s\n",
2445219089Spjd		    "deferred free", blkbuf);
2446219089Spjd	}
2447219089Spjd	zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED);
2448219089Spjd	return (0);
2449219089Spjd}
2450219089Spjd
2451219089Spjdstatic int
2452168404Spjddump_block_stats(spa_t *spa)
2453168404Spjd{
2454168404Spjd	zdb_cb_t zcb = { 0 };
2455168404Spjd	zdb_blkstats_t *zb, *tzb;
2456219089Spjd	uint64_t norm_alloc, norm_space, total_alloc, total_found;
2457219089Spjd	int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
2458168404Spjd	int leaks = 0;
2459168404Spjd
2460249643Smm	(void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
2461219089Spjd	    (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
2462219089Spjd	    (dump_opt['c'] == 1) ? "metadata " : "",
2463219089Spjd	    dump_opt['c'] ? "checksums " : "",
2464219089Spjd	    (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
2465219089Spjd	    !dump_opt['L'] ? "nothing leaked " : "");
2466168404Spjd
2467168404Spjd	/*
2468185029Spjd	 * Load all space maps as SM_ALLOC maps, then traverse the pool
2469185029Spjd	 * claiming each block we discover.  If the pool is perfectly
2470185029Spjd	 * consistent, the space maps will be empty when we're done.
2471185029Spjd	 * Anything left over is a leak; any block we can't claim (because
2472185029Spjd	 * it's not part of any space map) is a double allocation,
2473185029Spjd	 * reference to a freed block, or an unclaimed log block.
2474168404Spjd	 */
2475219089Spjd	zdb_leak_init(spa, &zcb);
2476168404Spjd
2477168404Spjd	/*
2478168404Spjd	 * If there's a deferred-free bplist, process that first.
2479168404Spjd	 */
2480219089Spjd	(void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj,
2481219089Spjd	    count_block_cb, &zcb, NULL);
2482227703Spjd	if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
2483227703Spjd		(void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
2484227703Spjd		    count_block_cb, &zcb, NULL);
2485227703Spjd	}
2486263391Sdelphij	if (spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) {
2487243674Smm		VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset,
2488243674Smm		    spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb,
2489243674Smm		    &zcb, NULL));
2490243674Smm	}
2491168404Spjd
2492219089Spjd	if (dump_opt['c'] > 1)
2493219089Spjd		flags |= TRAVERSE_PREFETCH_DATA;
2494168404Spjd
2495249643Smm	zcb.zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa));
2496249643Smm	zcb.zcb_start = zcb.zcb_lastprint = gethrtime();
2497219089Spjd	zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
2498168404Spjd
2499263394Sdelphij	/*
2500263394Sdelphij	 * If we've traversed the data blocks then we need to wait for those
2501263394Sdelphij	 * I/Os to complete. We leverage "The Godfather" zio to wait on
2502263394Sdelphij	 * all async I/Os to complete.
2503263394Sdelphij	 */
2504263394Sdelphij	if (dump_opt['c']) {
2505263394Sdelphij		(void) zio_wait(spa->spa_async_zio_root);
2506263394Sdelphij		spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
2507263394Sdelphij		    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
2508263394Sdelphij		    ZIO_FLAG_GODFATHER);
2509263394Sdelphij	}
2510263394Sdelphij
2511219089Spjd	if (zcb.zcb_haderrors) {
2512168404Spjd		(void) printf("\nError counts:\n\n");
2513168404Spjd		(void) printf("\t%5s  %s\n", "errno", "count");
2514219089Spjd		for (int e = 0; e < 256; e++) {
2515168404Spjd			if (zcb.zcb_errors[e] != 0) {
2516168404Spjd				(void) printf("\t%5d  %llu\n",
2517168404Spjd				    e, (u_longlong_t)zcb.zcb_errors[e]);
2518168404Spjd			}
2519168404Spjd		}
2520168404Spjd	}
2521168404Spjd
2522168404Spjd	/*
2523168404Spjd	 * Report any leaked segments.
2524168404Spjd	 */
2525219089Spjd	zdb_leak_fini(spa);
2526168404Spjd
2527219089Spjd	tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
2528185029Spjd
2529219089Spjd	norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
2530219089Spjd	norm_space = metaslab_class_get_space(spa_normal_class(spa));
2531168404Spjd
2532219089Spjd	total_alloc = norm_alloc + metaslab_class_get_alloc(spa_log_class(spa));
2533219089Spjd	total_found = tzb->zb_asize - zcb.zcb_dedup_asize;
2534185029Spjd
2535219089Spjd	if (total_found == total_alloc) {
2536209962Smm		if (!dump_opt['L'])
2537209962Smm			(void) printf("\n\tNo leaks (block sum matches space"
2538209962Smm			    " maps exactly)\n");
2539168404Spjd	} else {
2540168404Spjd		(void) printf("block traversal size %llu != alloc %llu "
2541209962Smm		    "(%s %lld)\n",
2542219089Spjd		    (u_longlong_t)total_found,
2543219089Spjd		    (u_longlong_t)total_alloc,
2544209962Smm		    (dump_opt['L']) ? "unreachable" : "leaked",
2545219089Spjd		    (longlong_t)(total_alloc - total_found));
2546168404Spjd		leaks = 1;
2547168404Spjd	}
2548168404Spjd
2549168404Spjd	if (tzb->zb_count == 0)
2550168404Spjd		return (2);
2551168404Spjd
2552168404Spjd	(void) printf("\n");
2553168404Spjd	(void) printf("\tbp count:      %10llu\n",
2554168404Spjd	    (u_longlong_t)tzb->zb_count);
2555219089Spjd	(void) printf("\tbp logical:    %10llu      avg: %6llu\n",
2556168404Spjd	    (u_longlong_t)tzb->zb_lsize,
2557168404Spjd	    (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
2558219089Spjd	(void) printf("\tbp physical:   %10llu      avg:"
2559219089Spjd	    " %6llu     compression: %6.2f\n",
2560168404Spjd	    (u_longlong_t)tzb->zb_psize,
2561168404Spjd	    (u_longlong_t)(tzb->zb_psize / tzb->zb_count),
2562168404Spjd	    (double)tzb->zb_lsize / tzb->zb_psize);
2563219089Spjd	(void) printf("\tbp allocated:  %10llu      avg:"
2564219089Spjd	    " %6llu     compression: %6.2f\n",
2565168404Spjd	    (u_longlong_t)tzb->zb_asize,
2566168404Spjd	    (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
2567168404Spjd	    (double)tzb->zb_lsize / tzb->zb_asize);
2568219089Spjd	(void) printf("\tbp deduped:    %10llu    ref>1:"
2569219089Spjd	    " %6llu   deduplication: %6.2f\n",
2570219089Spjd	    (u_longlong_t)zcb.zcb_dedup_asize,
2571219089Spjd	    (u_longlong_t)zcb.zcb_dedup_blocks,
2572219089Spjd	    (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0);
2573219089Spjd	(void) printf("\tSPA allocated: %10llu     used: %5.2f%%\n",
2574219089Spjd	    (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
2575168404Spjd
2576168404Spjd	if (dump_opt['b'] >= 2) {
2577168404Spjd		int l, t, level;
2578168404Spjd		(void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
2579168404Spjd		    "\t  avg\t comp\t%%Total\tType\n");
2580168404Spjd
2581219089Spjd		for (t = 0; t <= ZDB_OT_TOTAL; t++) {
2582219089Spjd			char csize[32], lsize[32], psize[32], asize[32];
2583219089Spjd			char avg[32];
2584168404Spjd			char *typename;
2585168404Spjd
2586219089Spjd			if (t < DMU_OT_NUMTYPES)
2587219089Spjd				typename = dmu_ot[t].ot_name;
2588219089Spjd			else
2589219089Spjd				typename = zdb_ot_extname[t - DMU_OT_NUMTYPES];
2590168404Spjd
2591168404Spjd			if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
2592168404Spjd				(void) printf("%6s\t%5s\t%5s\t%5s"
2593168404Spjd				    "\t%5s\t%5s\t%6s\t%s\n",
2594168404Spjd				    "-",
2595168404Spjd				    "-",
2596168404Spjd				    "-",
2597168404Spjd				    "-",
2598168404Spjd				    "-",
2599168404Spjd				    "-",
2600168404Spjd				    "-",
2601168404Spjd				    typename);
2602168404Spjd				continue;
2603168404Spjd			}
2604168404Spjd
2605168404Spjd			for (l = ZB_TOTAL - 1; l >= -1; l--) {
2606168404Spjd				level = (l == -1 ? ZB_TOTAL : l);
2607168404Spjd				zb = &zcb.zcb_type[level][t];
2608168404Spjd
2609168404Spjd				if (zb->zb_asize == 0)
2610168404Spjd					continue;
2611168404Spjd
2612168404Spjd				if (dump_opt['b'] < 3 && level != ZB_TOTAL)
2613168404Spjd					continue;
2614168404Spjd
2615168404Spjd				if (level == 0 && zb->zb_asize ==
2616168404Spjd				    zcb.zcb_type[ZB_TOTAL][t].zb_asize)
2617168404Spjd					continue;
2618168404Spjd
2619219089Spjd				zdb_nicenum(zb->zb_count, csize);
2620219089Spjd				zdb_nicenum(zb->zb_lsize, lsize);
2621219089Spjd				zdb_nicenum(zb->zb_psize, psize);
2622219089Spjd				zdb_nicenum(zb->zb_asize, asize);
2623219089Spjd				zdb_nicenum(zb->zb_asize / zb->zb_count, avg);
2624168404Spjd
2625168404Spjd				(void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
2626168404Spjd				    "\t%5.2f\t%6.2f\t",
2627168404Spjd				    csize, lsize, psize, asize, avg,
2628168404Spjd				    (double)zb->zb_lsize / zb->zb_psize,
2629168404Spjd				    100.0 * zb->zb_asize / tzb->zb_asize);
2630168404Spjd
2631168404Spjd				if (level == ZB_TOTAL)
2632168404Spjd					(void) printf("%s\n", typename);
2633168404Spjd				else
2634168404Spjd					(void) printf("    L%d %s\n",
2635168404Spjd					    level, typename);
2636249643Smm
2637249643Smm				if (dump_opt['b'] >= 4) {
2638249643Smm					(void) printf("psize "
2639249643Smm					    "(in 512-byte sectors): "
2640249643Smm					    "number of blocks\n");
2641249643Smm					dump_histogram(zb->zb_psize_histogram,
2642262094Savg					    PSIZE_HISTO_SIZE, 0);
2643249643Smm				}
2644168404Spjd			}
2645168404Spjd		}
2646168404Spjd	}
2647168404Spjd
2648168404Spjd	(void) printf("\n");
2649168404Spjd
2650168404Spjd	if (leaks)
2651168404Spjd		return (2);
2652168404Spjd
2653168404Spjd	if (zcb.zcb_haderrors)
2654168404Spjd		return (3);
2655168404Spjd
2656168404Spjd	return (0);
2657168404Spjd}
2658168404Spjd
2659219089Spjdtypedef struct zdb_ddt_entry {
2660219089Spjd	ddt_key_t	zdde_key;
2661219089Spjd	uint64_t	zdde_ref_blocks;
2662219089Spjd	uint64_t	zdde_ref_lsize;
2663219089Spjd	uint64_t	zdde_ref_psize;
2664219089Spjd	uint64_t	zdde_ref_dsize;
2665219089Spjd	avl_node_t	zdde_node;
2666219089Spjd} zdb_ddt_entry_t;
2667219089Spjd
2668219089Spjd/* ARGSUSED */
2669219089Spjdstatic int
2670219089Spjdzdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
2671247406Smm    const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
2672219089Spjd{
2673219089Spjd	avl_tree_t *t = arg;
2674219089Spjd	avl_index_t where;
2675219089Spjd	zdb_ddt_entry_t *zdde, zdde_search;
2676219089Spjd
2677263398Sdelphij	if (BP_IS_HOLE(bp))
2678219089Spjd		return (0);
2679219089Spjd
2680219089Spjd	if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
2681219089Spjd		(void) printf("traversing objset %llu, %llu objects, "
2682219089Spjd		    "%lu blocks so far\n",
2683219089Spjd		    (u_longlong_t)zb->zb_objset,
2684219089Spjd		    (u_longlong_t)bp->blk_fill,
2685219089Spjd		    avl_numnodes(t));
2686219089Spjd	}
2687219089Spjd
2688219089Spjd	if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF ||
2689243674Smm	    BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
2690219089Spjd		return (0);
2691219089Spjd
2692219089Spjd	ddt_key_fill(&zdde_search.zdde_key, bp);
2693219089Spjd
2694219089Spjd	zdde = avl_find(t, &zdde_search, &where);
2695219089Spjd
2696219089Spjd	if (zdde == NULL) {
2697219089Spjd		zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL);
2698219089Spjd		zdde->zdde_key = zdde_search.zdde_key;
2699219089Spjd		avl_insert(t, zdde, where);
2700219089Spjd	}
2701219089Spjd
2702219089Spjd	zdde->zdde_ref_blocks += 1;
2703219089Spjd	zdde->zdde_ref_lsize += BP_GET_LSIZE(bp);
2704219089Spjd	zdde->zdde_ref_psize += BP_GET_PSIZE(bp);
2705219089Spjd	zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp);
2706219089Spjd
2707219089Spjd	return (0);
2708219089Spjd}
2709219089Spjd
2710168404Spjdstatic void
2711219089Spjddump_simulated_ddt(spa_t *spa)
2712219089Spjd{
2713219089Spjd	avl_tree_t t;
2714219089Spjd	void *cookie = NULL;
2715219089Spjd	zdb_ddt_entry_t *zdde;
2716219089Spjd	ddt_histogram_t ddh_total = { 0 };
2717219089Spjd	ddt_stat_t dds_total = { 0 };
2718219089Spjd
2719219089Spjd	avl_create(&t, ddt_entry_compare,
2720219089Spjd	    sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node));
2721219089Spjd
2722219089Spjd	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
2723219089Spjd
2724219089Spjd	(void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
2725219089Spjd	    zdb_ddt_add_cb, &t);
2726219089Spjd
2727219089Spjd	spa_config_exit(spa, SCL_CONFIG, FTAG);
2728219089Spjd
2729219089Spjd	while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
2730219089Spjd		ddt_stat_t dds;
2731219089Spjd		uint64_t refcnt = zdde->zdde_ref_blocks;
2732219089Spjd		ASSERT(refcnt != 0);
2733219089Spjd
2734219089Spjd		dds.dds_blocks = zdde->zdde_ref_blocks / refcnt;
2735219089Spjd		dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
2736219089Spjd		dds.dds_psize = zdde->zdde_ref_psize / refcnt;
2737219089Spjd		dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
2738219089Spjd
2739219089Spjd		dds.dds_ref_blocks = zdde->zdde_ref_blocks;
2740219089Spjd		dds.dds_ref_lsize = zdde->zdde_ref_lsize;
2741219089Spjd		dds.dds_ref_psize = zdde->zdde_ref_psize;
2742219089Spjd		dds.dds_ref_dsize = zdde->zdde_ref_dsize;
2743219089Spjd
2744265751Sdelphij		ddt_stat_add(&ddh_total.ddh_stat[highbit64(refcnt) - 1],
2745265751Sdelphij		    &dds, 0);
2746219089Spjd
2747219089Spjd		umem_free(zdde, sizeof (*zdde));
2748219089Spjd	}
2749219089Spjd
2750219089Spjd	avl_destroy(&t);
2751219089Spjd
2752219089Spjd	ddt_histogram_stat(&dds_total, &ddh_total);
2753219089Spjd
2754219089Spjd	(void) printf("Simulated DDT histogram:\n");
2755219089Spjd
2756219089Spjd	zpool_dump_ddt(&dds_total, &ddh_total);
2757219089Spjd
2758219089Spjd	dump_dedup_ratio(&dds_total);
2759219089Spjd}
2760219089Spjd
2761219089Spjdstatic void
2762168404Spjddump_zpool(spa_t *spa)
2763168404Spjd{
2764168404Spjd	dsl_pool_t *dp = spa_get_dsl(spa);
2765168404Spjd	int rc = 0;
2766168404Spjd
2767219089Spjd	if (dump_opt['S']) {
2768219089Spjd		dump_simulated_ddt(spa);
2769219089Spjd		return;
2770219089Spjd	}
2771219089Spjd
2772219089Spjd	if (!dump_opt['e'] && dump_opt['C'] > 1) {
2773219089Spjd		(void) printf("\nCached configuration:\n");
2774219089Spjd		dump_nvlist(spa->spa_config, 8);
2775219089Spjd	}
2776219089Spjd
2777219089Spjd	if (dump_opt['C'])
2778219089Spjd		dump_config(spa);
2779219089Spjd
2780168404Spjd	if (dump_opt['u'])
2781219089Spjd		dump_uberblock(&spa->spa_uberblock, "\nUberblock:\n", "\n");
2782168404Spjd
2783219089Spjd	if (dump_opt['D'])
2784219089Spjd		dump_all_ddts(spa);
2785219089Spjd
2786219089Spjd	if (dump_opt['d'] > 2 || dump_opt['m'])
2787219089Spjd		dump_metaslabs(spa);
2788219089Spjd
2789219089Spjd	if (dump_opt['d'] || dump_opt['i']) {
2790168404Spjd		dump_dir(dp->dp_meta_objset);
2791168404Spjd		if (dump_opt['d'] >= 3) {
2792248369Smm			dump_bpobj(&spa->spa_deferred_bpobj,
2793248369Smm			    "Deferred frees", 0);
2794219089Spjd			if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
2795219089Spjd				dump_bpobj(&spa->spa_dsl_pool->dp_free_bpobj,
2796248369Smm				    "Pool snapshot frees", 0);
2797219089Spjd			}
2798243674Smm
2799243674Smm			if (spa_feature_is_active(spa,
2800263391Sdelphij			    SPA_FEATURE_ASYNC_DESTROY)) {
2801243674Smm				dump_bptree(spa->spa_meta_objset,
2802243674Smm				    spa->spa_dsl_pool->dp_bptree_obj,
2803243674Smm				    "Pool dataset frees");
2804243674Smm			}
2805168404Spjd			dump_dtl(spa->spa_root_vdev, 0);
2806209962Smm		}
2807219089Spjd		(void) dmu_objset_find(spa_name(spa), dump_one_dir,
2808219089Spjd		    NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
2809168404Spjd	}
2810219089Spjd	if (dump_opt['b'] || dump_opt['c'])
2811168404Spjd		rc = dump_block_stats(spa);
2812168404Spjd
2813262094Savg	if (rc == 0)
2814262094Savg		rc = verify_spacemap_refcounts(spa);
2815262094Savg
2816168404Spjd	if (dump_opt['s'])
2817168404Spjd		show_pool_stats(spa);
2818168404Spjd
2819219089Spjd	if (dump_opt['h'])
2820219089Spjd		dump_history(spa);
2821219089Spjd
2822168404Spjd	if (rc != 0)
2823168404Spjd		exit(rc);
2824168404Spjd}
2825168404Spjd
2826168404Spjd#define	ZDB_FLAG_CHECKSUM	0x0001
2827168404Spjd#define	ZDB_FLAG_DECOMPRESS	0x0002
2828168404Spjd#define	ZDB_FLAG_BSWAP		0x0004
2829168404Spjd#define	ZDB_FLAG_GBH		0x0008
2830168404Spjd#define	ZDB_FLAG_INDIRECT	0x0010
2831168404Spjd#define	ZDB_FLAG_PHYS		0x0020
2832168404Spjd#define	ZDB_FLAG_RAW		0x0040
2833168404Spjd#define	ZDB_FLAG_PRINT_BLKPTR	0x0080
2834168404Spjd
2835168404Spjdint flagbits[256];
2836168404Spjd
2837168404Spjdstatic void
2838168404Spjdzdb_print_blkptr(blkptr_t *bp, int flags)
2839168404Spjd{
2840219089Spjd	char blkbuf[BP_SPRINTF_LEN];
2841168404Spjd
2842168404Spjd	if (flags & ZDB_FLAG_BSWAP)
2843168404Spjd		byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
2844219089Spjd
2845263398Sdelphij	snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
2846219089Spjd	(void) printf("%s\n", blkbuf);
2847168404Spjd}
2848168404Spjd
2849168404Spjdstatic void
2850168404Spjdzdb_dump_indirect(blkptr_t *bp, int nbps, int flags)
2851168404Spjd{
2852168404Spjd	int i;
2853168404Spjd
2854168404Spjd	for (i = 0; i < nbps; i++)
2855168404Spjd		zdb_print_blkptr(&bp[i], flags);
2856168404Spjd}
2857168404Spjd
2858168404Spjdstatic void
2859168404Spjdzdb_dump_gbh(void *buf, int flags)
2860168404Spjd{
2861168404Spjd	zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags);
2862168404Spjd}
2863168404Spjd
2864168404Spjdstatic void
2865168404Spjdzdb_dump_block_raw(void *buf, uint64_t size, int flags)
2866168404Spjd{
2867168404Spjd	if (flags & ZDB_FLAG_BSWAP)
2868168404Spjd		byteswap_uint64_array(buf, size);
2869219089Spjd	(void) write(1, buf, size);
2870168404Spjd}
2871168404Spjd
2872168404Spjdstatic void
2873168404Spjdzdb_dump_block(char *label, void *buf, uint64_t size, int flags)
2874168404Spjd{
2875168404Spjd	uint64_t *d = (uint64_t *)buf;
2876168404Spjd	int nwords = size / sizeof (uint64_t);
2877168404Spjd	int do_bswap = !!(flags & ZDB_FLAG_BSWAP);
2878168404Spjd	int i, j;
2879168404Spjd	char *hdr, *c;
2880168404Spjd
2881168404Spjd
2882168404Spjd	if (do_bswap)
2883168404Spjd		hdr = " 7 6 5 4 3 2 1 0   f e d c b a 9 8";
2884168404Spjd	else
2885168404Spjd		hdr = " 0 1 2 3 4 5 6 7   8 9 a b c d e f";
2886168404Spjd
2887168404Spjd	(void) printf("\n%s\n%6s   %s  0123456789abcdef\n", label, "", hdr);
2888168404Spjd
2889168404Spjd	for (i = 0; i < nwords; i += 2) {
2890168404Spjd		(void) printf("%06llx:  %016llx  %016llx  ",
2891168404Spjd		    (u_longlong_t)(i * sizeof (uint64_t)),
2892168404Spjd		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]),
2893168404Spjd		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1]));
2894168404Spjd
2895168404Spjd		c = (char *)&d[i];
2896168404Spjd		for (j = 0; j < 2 * sizeof (uint64_t); j++)
2897168404Spjd			(void) printf("%c", isprint(c[j]) ? c[j] : '.');
2898168404Spjd		(void) printf("\n");
2899168404Spjd	}
2900168404Spjd}
2901168404Spjd
2902168404Spjd/*
2903168404Spjd * There are two acceptable formats:
2904168404Spjd *	leaf_name	  - For example: c1t0d0 or /tmp/ztest.0a
2905168404Spjd *	child[.child]*    - For example: 0.1.1
2906168404Spjd *
2907168404Spjd * The second form can be used to specify arbitrary vdevs anywhere
2908168404Spjd * in the heirarchy.  For example, in a pool with a mirror of
2909168404Spjd * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 .
2910168404Spjd */
2911168404Spjdstatic vdev_t *
2912168404Spjdzdb_vdev_lookup(vdev_t *vdev, char *path)
2913168404Spjd{
2914168404Spjd	char *s, *p, *q;
2915168404Spjd	int i;
2916168404Spjd
2917168404Spjd	if (vdev == NULL)
2918168404Spjd		return (NULL);
2919168404Spjd
2920168404Spjd	/* First, assume the x.x.x.x format */
2921168404Spjd	i = (int)strtoul(path, &s, 10);
2922168404Spjd	if (s == path || (s && *s != '.' && *s != '\0'))
2923168404Spjd		goto name;
2924168404Spjd	if (i < 0 || i >= vdev->vdev_children)
2925168404Spjd		return (NULL);
2926168404Spjd
2927168404Spjd	vdev = vdev->vdev_child[i];
2928168404Spjd	if (*s == '\0')
2929168404Spjd		return (vdev);
2930168404Spjd	return (zdb_vdev_lookup(vdev, s+1));
2931168404Spjd
2932168404Spjdname:
2933168404Spjd	for (i = 0; i < vdev->vdev_children; i++) {
2934168404Spjd		vdev_t *vc = vdev->vdev_child[i];
2935168404Spjd
2936168404Spjd		if (vc->vdev_path == NULL) {
2937168404Spjd			vc = zdb_vdev_lookup(vc, path);
2938168404Spjd			if (vc == NULL)
2939168404Spjd				continue;
2940168404Spjd			else
2941168404Spjd				return (vc);
2942168404Spjd		}
2943168404Spjd
2944168404Spjd		p = strrchr(vc->vdev_path, '/');
2945168404Spjd		p = p ? p + 1 : vc->vdev_path;
2946168404Spjd		q = &vc->vdev_path[strlen(vc->vdev_path) - 2];
2947168404Spjd
2948168404Spjd		if (strcmp(vc->vdev_path, path) == 0)
2949168404Spjd			return (vc);
2950168404Spjd		if (strcmp(p, path) == 0)
2951168404Spjd			return (vc);
2952168404Spjd		if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0)
2953168404Spjd			return (vc);
2954168404Spjd	}
2955168404Spjd
2956168404Spjd	return (NULL);
2957168404Spjd}
2958168404Spjd
2959168404Spjd/*
2960168404Spjd * Read a block from a pool and print it out.  The syntax of the
2961168404Spjd * block descriptor is:
2962168404Spjd *
2963168404Spjd *	pool:vdev_specifier:offset:size[:flags]
2964168404Spjd *
2965168404Spjd *	pool           - The name of the pool you wish to read from
2966168404Spjd *	vdev_specifier - Which vdev (see comment for zdb_vdev_lookup)
2967168404Spjd *	offset         - offset, in hex, in bytes
2968168404Spjd *	size           - Amount of data to read, in hex, in bytes
2969168404Spjd *	flags          - A string of characters specifying options
2970168404Spjd *		 b: Decode a blkptr at given offset within block
2971168404Spjd *		*c: Calculate and display checksums
2972219089Spjd *		 d: Decompress data before dumping
2973168404Spjd *		 e: Byteswap data before dumping
2974219089Spjd *		 g: Display data as a gang block header
2975219089Spjd *		 i: Display as an indirect block
2976168404Spjd *		 p: Do I/O to physical offset
2977168404Spjd *		 r: Dump raw data to stdout
2978168404Spjd *
2979168404Spjd *              * = not yet implemented
2980168404Spjd */
2981168404Spjdstatic void
2982219089Spjdzdb_read_block(char *thing, spa_t *spa)
2983168404Spjd{
2984219089Spjd	blkptr_t blk, *bp = &blk;
2985219089Spjd	dva_t *dva = bp->blk_dva;
2986168404Spjd	int flags = 0;
2987219089Spjd	uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0;
2988168404Spjd	zio_t *zio;
2989168404Spjd	vdev_t *vd;
2990219089Spjd	void *pbuf, *lbuf, *buf;
2991219089Spjd	char *s, *p, *dup, *vdev, *flagstr;
2992219089Spjd	int i, error;
2993168404Spjd
2994168404Spjd	dup = strdup(thing);
2995168404Spjd	s = strtok(dup, ":");
2996168404Spjd	vdev = s ? s : "";
2997168404Spjd	s = strtok(NULL, ":");
2998168404Spjd	offset = strtoull(s ? s : "", NULL, 16);
2999168404Spjd	s = strtok(NULL, ":");
3000168404Spjd	size = strtoull(s ? s : "", NULL, 16);
3001168404Spjd	s = strtok(NULL, ":");
3002168404Spjd	flagstr = s ? s : "";
3003168404Spjd
3004168404Spjd	s = NULL;
3005168404Spjd	if (size == 0)
3006168404Spjd		s = "size must not be zero";
3007168404Spjd	if (!IS_P2ALIGNED(size, DEV_BSIZE))
3008168404Spjd		s = "size must be a multiple of sector size";
3009168404Spjd	if (!IS_P2ALIGNED(offset, DEV_BSIZE))
3010168404Spjd		s = "offset must be a multiple of sector size";
3011168404Spjd	if (s) {
3012168404Spjd		(void) printf("Invalid block specifier: %s  - %s\n", thing, s);
3013168404Spjd		free(dup);
3014168404Spjd		return;
3015168404Spjd	}
3016168404Spjd
3017168404Spjd	for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) {
3018168404Spjd		for (i = 0; flagstr[i]; i++) {
3019168404Spjd			int bit = flagbits[(uchar_t)flagstr[i]];
3020168404Spjd
3021168404Spjd			if (bit == 0) {
3022168404Spjd				(void) printf("***Invalid flag: %c\n",
3023168404Spjd				    flagstr[i]);
3024168404Spjd				continue;
3025168404Spjd			}
3026168404Spjd			flags |= bit;
3027168404Spjd
3028168404Spjd			/* If it's not something with an argument, keep going */
3029219089Spjd			if ((bit & (ZDB_FLAG_CHECKSUM |
3030168404Spjd			    ZDB_FLAG_PRINT_BLKPTR)) == 0)
3031168404Spjd				continue;
3032168404Spjd
3033168404Spjd			p = &flagstr[i + 1];
3034168404Spjd			if (bit == ZDB_FLAG_PRINT_BLKPTR)
3035168404Spjd				blkptr_offset = strtoull(p, &p, 16);
3036168404Spjd			if (*p != ':' && *p != '\0') {
3037168404Spjd				(void) printf("***Invalid flag arg: '%s'\n", s);
3038168404Spjd				free(dup);
3039168404Spjd				return;
3040168404Spjd			}
3041262105Savg			i += p - &flagstr[i + 1]; /* skip over the number */
3042168404Spjd		}
3043168404Spjd	}
3044168404Spjd
3045168404Spjd	vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
3046168404Spjd	if (vd == NULL) {
3047168404Spjd		(void) printf("***Invalid vdev: %s\n", vdev);
3048168404Spjd		free(dup);
3049168404Spjd		return;
3050168404Spjd	} else {
3051168404Spjd		if (vd->vdev_path)
3052219089Spjd			(void) fprintf(stderr, "Found vdev: %s\n",
3053219089Spjd			    vd->vdev_path);
3054168404Spjd		else
3055219089Spjd			(void) fprintf(stderr, "Found vdev type: %s\n",
3056168404Spjd			    vd->vdev_ops->vdev_op_type);
3057168404Spjd	}
3058168404Spjd
3059219089Spjd	psize = size;
3060219089Spjd	lsize = size;
3061168404Spjd
3062219089Spjd	pbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
3063219089Spjd	lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
3064168404Spjd
3065219089Spjd	BP_ZERO(bp);
3066219089Spjd
3067219089Spjd	DVA_SET_VDEV(&dva[0], vd->vdev_id);
3068219089Spjd	DVA_SET_OFFSET(&dva[0], offset);
3069219089Spjd	DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH));
3070219089Spjd	DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize));
3071219089Spjd
3072219089Spjd	BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL);
3073219089Spjd
3074219089Spjd	BP_SET_LSIZE(bp, lsize);
3075219089Spjd	BP_SET_PSIZE(bp, psize);
3076219089Spjd	BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
3077219089Spjd	BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF);
3078219089Spjd	BP_SET_TYPE(bp, DMU_OT_NONE);
3079219089Spjd	BP_SET_LEVEL(bp, 0);
3080219089Spjd	BP_SET_DEDUP(bp, 0);
3081219089Spjd	BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
3082219089Spjd
3083185029Spjd	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
3084168404Spjd	zio = zio_root(spa, NULL, NULL, 0);
3085219089Spjd
3086219089Spjd	if (vd == vd->vdev_top) {
3087219089Spjd		/*
3088219089Spjd		 * Treat this as a normal block read.
3089219089Spjd		 */
3090219089Spjd		zio_nowait(zio_read(zio, spa, bp, pbuf, psize, NULL, NULL,
3091219089Spjd		    ZIO_PRIORITY_SYNC_READ,
3092219089Spjd		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
3093219089Spjd	} else {
3094219089Spjd		/*
3095219089Spjd		 * Treat this as a vdev child I/O.
3096219089Spjd		 */
3097219089Spjd		zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pbuf, psize,
3098219089Spjd		    ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
3099219089Spjd		    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
3100219089Spjd		    ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
3101219089Spjd		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL, NULL));
3102219089Spjd	}
3103219089Spjd
3104168404Spjd	error = zio_wait(zio);
3105185029Spjd	spa_config_exit(spa, SCL_STATE, FTAG);
3106168404Spjd
3107168404Spjd	if (error) {
3108168404Spjd		(void) printf("Read of %s failed, error: %d\n", thing, error);
3109168404Spjd		goto out;
3110168404Spjd	}
3111168404Spjd
3112219089Spjd	if (flags & ZDB_FLAG_DECOMPRESS) {
3113219089Spjd		/*
3114219089Spjd		 * We don't know how the data was compressed, so just try
3115219089Spjd		 * every decompress function at every inflated blocksize.
3116219089Spjd		 */
3117219089Spjd		enum zio_compress c;
3118219089Spjd		void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
3119219089Spjd		void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
3120219089Spjd
3121219089Spjd		bcopy(pbuf, pbuf2, psize);
3122219089Spjd
3123219089Spjd		VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf + psize,
3124219089Spjd		    SPA_MAXBLOCKSIZE - psize) == 0);
3125219089Spjd
3126219089Spjd		VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
3127219089Spjd		    SPA_MAXBLOCKSIZE - psize) == 0);
3128219089Spjd
3129219089Spjd		for (lsize = SPA_MAXBLOCKSIZE; lsize > psize;
3130219089Spjd		    lsize -= SPA_MINBLOCKSIZE) {
3131219089Spjd			for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) {
3132219089Spjd				if (zio_decompress_data(c, pbuf, lbuf,
3133219089Spjd				    psize, lsize) == 0 &&
3134219089Spjd				    zio_decompress_data(c, pbuf2, lbuf2,
3135219089Spjd				    psize, lsize) == 0 &&
3136219089Spjd				    bcmp(lbuf, lbuf2, lsize) == 0)
3137219089Spjd					break;
3138219089Spjd			}
3139219089Spjd			if (c != ZIO_COMPRESS_FUNCTIONS)
3140219089Spjd				break;
3141219089Spjd			lsize -= SPA_MINBLOCKSIZE;
3142219089Spjd		}
3143219089Spjd
3144219089Spjd		umem_free(pbuf2, SPA_MAXBLOCKSIZE);
3145219089Spjd		umem_free(lbuf2, SPA_MAXBLOCKSIZE);
3146219089Spjd
3147219089Spjd		if (lsize <= psize) {
3148219089Spjd			(void) printf("Decompress of %s failed\n", thing);
3149219089Spjd			goto out;
3150219089Spjd		}
3151219089Spjd		buf = lbuf;
3152219089Spjd		size = lsize;
3153219089Spjd	} else {
3154219089Spjd		buf = pbuf;
3155219089Spjd		size = psize;
3156219089Spjd	}
3157219089Spjd
3158168404Spjd	if (flags & ZDB_FLAG_PRINT_BLKPTR)
3159168404Spjd		zdb_print_blkptr((blkptr_t *)(void *)
3160168404Spjd		    ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
3161168404Spjd	else if (flags & ZDB_FLAG_RAW)
3162168404Spjd		zdb_dump_block_raw(buf, size, flags);
3163168404Spjd	else if (flags & ZDB_FLAG_INDIRECT)
3164168404Spjd		zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t),
3165168404Spjd		    flags);
3166168404Spjd	else if (flags & ZDB_FLAG_GBH)
3167168404Spjd		zdb_dump_gbh(buf, flags);
3168168404Spjd	else
3169168404Spjd		zdb_dump_block(thing, buf, size, flags);
3170168404Spjd
3171168404Spjdout:
3172219089Spjd	umem_free(pbuf, SPA_MAXBLOCKSIZE);
3173219089Spjd	umem_free(lbuf, SPA_MAXBLOCKSIZE);
3174168404Spjd	free(dup);
3175168404Spjd}
3176168404Spjd
3177185029Spjdstatic boolean_t
3178219089Spjdpool_match(nvlist_t *cfg, char *tgt)
3179185029Spjd{
3180219089Spjd	uint64_t v, guid = strtoull(tgt, NULL, 0);
3181185029Spjd	char *s;
3182185029Spjd
3183219089Spjd	if (guid != 0) {
3184219089Spjd		if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0)
3185219089Spjd			return (v == guid);
3186219089Spjd	} else {
3187219089Spjd		if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0)
3188219089Spjd			return (strcmp(s, tgt) == 0);
3189185029Spjd	}
3190185029Spjd	return (B_FALSE);
3191185029Spjd}
3192185029Spjd
3193219089Spjdstatic char *
3194219089Spjdfind_zpool(char **target, nvlist_t **configp, int dirc, char **dirv)
3195185029Spjd{
3196219089Spjd	nvlist_t *pools;
3197219089Spjd	nvlist_t *match = NULL;
3198219089Spjd	char *name = NULL;
3199219089Spjd	char *sepp = NULL;
3200219089Spjd	char sep;
3201219089Spjd	int count = 0;
3202219089Spjd	importargs_t args = { 0 };
3203185029Spjd
3204219089Spjd	args.paths = dirc;
3205219089Spjd	args.path = dirv;
3206219089Spjd	args.can_be_active = B_TRUE;
3207185029Spjd
3208219089Spjd	if ((sepp = strpbrk(*target, "/@")) != NULL) {
3209219089Spjd		sep = *sepp;
3210219089Spjd		*sepp = '\0';
3211185029Spjd	}
3212185029Spjd
3213219089Spjd	pools = zpool_search_import(g_zfs, &args);
3214185029Spjd
3215185029Spjd	if (pools != NULL) {
3216185029Spjd		nvpair_t *elem = NULL;
3217185029Spjd		while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
3218185029Spjd			verify(nvpair_value_nvlist(elem, configp) == 0);
3219219089Spjd			if (pool_match(*configp, *target)) {
3220219089Spjd				count++;
3221185029Spjd				if (match != NULL) {
3222219089Spjd					/* print previously found config */
3223219089Spjd					if (name != NULL) {
3224219089Spjd						(void) printf("%s\n", name);
3225219089Spjd						dump_nvlist(match, 8);
3226219089Spjd						name = NULL;
3227219089Spjd					}
3228219089Spjd					(void) printf("%s\n",
3229219089Spjd					    nvpair_name(elem));
3230219089Spjd					dump_nvlist(*configp, 8);
3231185029Spjd				} else {
3232185029Spjd					match = *configp;
3233219089Spjd					name = nvpair_name(elem);
3234185029Spjd				}
3235185029Spjd			}
3236185029Spjd		}
3237185029Spjd	}
3238219089Spjd	if (count > 1)
3239219089Spjd		(void) fatal("\tMatched %d pools - use pool GUID "
3240219089Spjd		    "instead of pool name or \n"
3241219089Spjd		    "\tpool name part of a dataset name to select pool", count);
3242185029Spjd
3243219089Spjd	if (sepp)
3244219089Spjd		*sepp = sep;
3245219089Spjd	/*
3246219089Spjd	 * If pool GUID was specified for pool id, replace it with pool name
3247219089Spjd	 */
3248219089Spjd	if (name && (strstr(*target, name) != *target)) {
3249219089Spjd		int sz = 1 + strlen(name) + ((sepp) ? strlen(sepp) : 0);
3250185029Spjd
3251219089Spjd		*target = umem_alloc(sz, UMEM_NOFAIL);
3252219089Spjd		(void) snprintf(*target, sz, "%s%s", name, sepp ? sepp : "");
3253219089Spjd	}
3254219089Spjd
3255219089Spjd	*configp = name ? match : NULL;
3256219089Spjd
3257219089Spjd	return (name);
3258185029Spjd}
3259185029Spjd
3260168404Spjdint
3261168404Spjdmain(int argc, char **argv)
3262168404Spjd{
3263168404Spjd	int i, c;
3264168404Spjd	struct rlimit rl = { 1024, 1024 };
3265219089Spjd	spa_t *spa = NULL;
3266168404Spjd	objset_t *os = NULL;
3267168404Spjd	int dump_all = 1;
3268168404Spjd	int verbose = 0;
3269219089Spjd	int error = 0;
3270219089Spjd	char **searchdirs = NULL;
3271219089Spjd	int nsearch = 0;
3272219089Spjd	char *target;
3273219089Spjd	nvlist_t *policy = NULL;
3274219089Spjd	uint64_t max_txg = UINT64_MAX;
3275219089Spjd	int rewind = ZPOOL_NEVER_REWIND;
3276168404Spjd
3277168404Spjd	(void) setrlimit(RLIMIT_NOFILE, &rl);
3278168404Spjd	(void) enable_extended_FILE_stdio(-1, -1);
3279168404Spjd
3280168404Spjd	dprintf_setup(&argc, argv);
3281168404Spjd
3282263394Sdelphij	while ((c = getopt(argc, argv, "bcdhilmM:suCDRSAFLXevp:t:U:P")) != -1) {
3283168404Spjd		switch (c) {
3284219089Spjd		case 'b':
3285219089Spjd		case 'c':
3286168404Spjd		case 'd':
3287219089Spjd		case 'h':
3288168404Spjd		case 'i':
3289219089Spjd		case 'l':
3290209962Smm		case 'm':
3291168404Spjd		case 's':
3292219089Spjd		case 'u':
3293168404Spjd		case 'C':
3294219089Spjd		case 'D':
3295168404Spjd		case 'R':
3296219089Spjd		case 'S':
3297168404Spjd			dump_opt[c]++;
3298168404Spjd			dump_all = 0;
3299168404Spjd			break;
3300219089Spjd		case 'A':
3301219089Spjd		case 'F':
3302209962Smm		case 'L':
3303219089Spjd		case 'X':
3304219089Spjd		case 'e':
3305219089Spjd		case 'P':
3306209962Smm			dump_opt[c]++;
3307209962Smm			break;
3308168404Spjd		case 'v':
3309168404Spjd			verbose++;
3310168404Spjd			break;
3311263394Sdelphij		case 'M':
3312263394Sdelphij			max_inflight = strtoull(optarg, NULL, 0);
3313263394Sdelphij			if (max_inflight == 0) {
3314263394Sdelphij				(void) fprintf(stderr, "maximum number "
3315263394Sdelphij				    "of inflight I/Os must be greater "
3316263394Sdelphij				    "than 0\n");
3317263394Sdelphij				usage();
3318263394Sdelphij			}
3319263394Sdelphij			break;
3320185029Spjd		case 'p':
3321219089Spjd			if (searchdirs == NULL) {
3322219089Spjd				searchdirs = umem_alloc(sizeof (char *),
3323219089Spjd				    UMEM_NOFAIL);
3324219089Spjd			} else {
3325219089Spjd				char **tmp = umem_alloc((nsearch + 1) *
3326219089Spjd				    sizeof (char *), UMEM_NOFAIL);
3327219089Spjd				bcopy(searchdirs, tmp, nsearch *
3328219089Spjd				    sizeof (char *));
3329219089Spjd				umem_free(searchdirs,
3330219089Spjd				    nsearch * sizeof (char *));
3331219089Spjd				searchdirs = tmp;
3332219089Spjd			}
3333219089Spjd			searchdirs[nsearch++] = optarg;
3334185029Spjd			break;
3335209962Smm		case 't':
3336219089Spjd			max_txg = strtoull(optarg, NULL, 0);
3337219089Spjd			if (max_txg < TXG_INITIAL) {
3338209962Smm				(void) fprintf(stderr, "incorrect txg "
3339209962Smm				    "specified: %s\n", optarg);
3340209962Smm				usage();
3341209962Smm			}
3342209962Smm			break;
3343219089Spjd		case 'U':
3344219089Spjd			spa_config_path = optarg;
3345219089Spjd			break;
3346168404Spjd		default:
3347168404Spjd			usage();
3348168404Spjd			break;
3349168404Spjd		}
3350168404Spjd	}
3351168404Spjd
3352219089Spjd	if (!dump_opt['e'] && searchdirs != NULL) {
3353208047Smm		(void) fprintf(stderr, "-p option requires use of -e\n");
3354208047Smm		usage();
3355208047Smm	}
3356185029Spjd
3357168404Spjd	kernel_init(FREAD);
3358185029Spjd	g_zfs = libzfs_init();
3359185029Spjd	ASSERT(g_zfs != NULL);
3360168404Spjd
3361219089Spjd	if (dump_all)
3362219089Spjd		verbose = MAX(verbose, 1);
3363219089Spjd
3364168404Spjd	for (c = 0; c < 256; c++) {
3365219089Spjd		if (dump_all && !strchr("elAFLRSXP", c))
3366168404Spjd			dump_opt[c] = 1;
3367168404Spjd		if (dump_opt[c])
3368168404Spjd			dump_opt[c] += verbose;
3369168404Spjd	}
3370168404Spjd
3371219089Spjd	aok = (dump_opt['A'] == 1) || (dump_opt['A'] > 2);
3372219089Spjd	zfs_recover = (dump_opt['A'] > 1);
3373219089Spjd
3374168404Spjd	argc -= optind;
3375168404Spjd	argv += optind;
3376168404Spjd
3377219089Spjd	if (argc < 2 && dump_opt['R'])
3378219089Spjd		usage();
3379168404Spjd	if (argc < 1) {
3380219089Spjd		if (!dump_opt['e'] && dump_opt['C']) {
3381185029Spjd			dump_cachefile(spa_config_path);
3382168404Spjd			return (0);
3383168404Spjd		}
3384168404Spjd		usage();
3385168404Spjd	}
3386168404Spjd
3387168404Spjd	if (dump_opt['l']) {
3388168404Spjd		dump_label(argv[0]);
3389168404Spjd		return (0);
3390168404Spjd	}
3391168404Spjd
3392219089Spjd	if (dump_opt['X'] || dump_opt['F'])
3393219089Spjd		rewind = ZPOOL_DO_REWIND |
3394219089Spjd		    (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0);
3395168404Spjd
3396219089Spjd	if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 ||
3397219089Spjd	    nvlist_add_uint64(policy, ZPOOL_REWIND_REQUEST_TXG, max_txg) != 0 ||
3398219089Spjd	    nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind) != 0)
3399219089Spjd		fatal("internal error: %s", strerror(ENOMEM));
3400168404Spjd
3401185029Spjd	error = 0;
3402219089Spjd	target = argv[0];
3403185029Spjd
3404219089Spjd	if (dump_opt['e']) {
3405219089Spjd		nvlist_t *cfg = NULL;
3406219089Spjd		char *name = find_zpool(&target, &cfg, nsearch, searchdirs);
3407185029Spjd
3408219089Spjd		error = ENOENT;
3409219089Spjd		if (name) {
3410219089Spjd			if (dump_opt['C'] > 1) {
3411219089Spjd				(void) printf("\nConfiguration for import:\n");
3412219089Spjd				dump_nvlist(cfg, 8);
3413185029Spjd			}
3414219089Spjd			if (nvlist_add_nvlist(cfg,
3415219089Spjd			    ZPOOL_REWIND_POLICY, policy) != 0) {
3416219089Spjd				fatal("can't open '%s': %s",
3417219089Spjd				    target, strerror(ENOMEM));
3418219089Spjd			}
3419219089Spjd			if ((error = spa_import(name, cfg, NULL,
3420219089Spjd			    ZFS_IMPORT_MISSING_LOG)) != 0) {
3421219089Spjd				error = spa_import(name, cfg, NULL,
3422219089Spjd				    ZFS_IMPORT_VERBATIM);
3423219089Spjd			}
3424185029Spjd		}
3425168404Spjd	}
3426168404Spjd
3427185029Spjd	if (error == 0) {
3428219089Spjd		if (strpbrk(target, "/@") == NULL || dump_opt['R']) {
3429219089Spjd			error = spa_open_rewind(target, &spa, FTAG, policy,
3430219089Spjd			    NULL);
3431219089Spjd			if (error) {
3432219089Spjd				/*
3433219089Spjd				 * If we're missing the log device then
3434219089Spjd				 * try opening the pool after clearing the
3435219089Spjd				 * log state.
3436219089Spjd				 */
3437219089Spjd				mutex_enter(&spa_namespace_lock);
3438219089Spjd				if ((spa = spa_lookup(target)) != NULL &&
3439219089Spjd				    spa->spa_log_state == SPA_LOG_MISSING) {
3440219089Spjd					spa->spa_log_state = SPA_LOG_CLEAR;
3441219089Spjd					error = 0;
3442219089Spjd				}
3443219089Spjd				mutex_exit(&spa_namespace_lock);
3444219089Spjd
3445219089Spjd				if (!error) {
3446219089Spjd					error = spa_open_rewind(target, &spa,
3447219089Spjd					    FTAG, policy, NULL);
3448219089Spjd				}
3449219089Spjd			}
3450185029Spjd		} else {
3451219089Spjd			error = dmu_objset_own(target, DMU_OST_ANY,
3452219089Spjd			    B_TRUE, FTAG, &os);
3453185029Spjd		}
3454185029Spjd	}
3455219089Spjd	nvlist_free(policy);
3456185029Spjd
3457168404Spjd	if (error)
3458219089Spjd		fatal("can't open '%s': %s", target, strerror(error));
3459168404Spjd
3460168404Spjd	argv++;
3461219089Spjd	argc--;
3462219089Spjd	if (!dump_opt['R']) {
3463219089Spjd		if (argc > 0) {
3464219089Spjd			zopt_objects = argc;
3465219089Spjd			zopt_object = calloc(zopt_objects, sizeof (uint64_t));
3466219089Spjd			for (i = 0; i < zopt_objects; i++) {
3467219089Spjd				errno = 0;
3468219089Spjd				zopt_object[i] = strtoull(argv[i], NULL, 0);
3469219089Spjd				if (zopt_object[i] == 0 && errno != 0)
3470219089Spjd					fatal("bad number %s: %s",
3471219089Spjd					    argv[i], strerror(errno));
3472219089Spjd			}
3473168404Spjd		}
3474244602Smm		if (os != NULL) {
3475244602Smm			dump_dir(os);
3476244602Smm		} else if (zopt_objects > 0 && !dump_opt['m']) {
3477244602Smm			dump_dir(spa->spa_meta_objset);
3478244602Smm		} else {
3479244602Smm			dump_zpool(spa);
3480244602Smm		}
3481219089Spjd	} else {
3482219089Spjd		flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
3483219089Spjd		flagbits['c'] = ZDB_FLAG_CHECKSUM;
3484219089Spjd		flagbits['d'] = ZDB_FLAG_DECOMPRESS;
3485219089Spjd		flagbits['e'] = ZDB_FLAG_BSWAP;
3486219089Spjd		flagbits['g'] = ZDB_FLAG_GBH;
3487219089Spjd		flagbits['i'] = ZDB_FLAG_INDIRECT;
3488219089Spjd		flagbits['p'] = ZDB_FLAG_PHYS;
3489219089Spjd		flagbits['r'] = ZDB_FLAG_RAW;
3490168404Spjd
3491219089Spjd		for (i = 0; i < argc; i++)
3492219089Spjd			zdb_read_block(argv[i], spa);
3493168404Spjd	}
3494168404Spjd
3495219089Spjd	(os != NULL) ? dmu_objset_disown(os, FTAG) : spa_close(spa, FTAG);
3496219089Spjd
3497185029Spjd	fuid_table_destroy();
3498219089Spjd	sa_loaded = B_FALSE;
3499185029Spjd
3500185029Spjd	libzfs_fini(g_zfs);
3501168404Spjd	kernel_fini();
3502168404Spjd
3503168404Spjd	return (0);
3504168404Spjd}
3505