1185029Spjd/*
2185029Spjd * CDDL HEADER START
3185029Spjd *
4185029Spjd * The contents of this file are subject to the terms of the
5185029Spjd * Common Development and Distribution License (the "License").
6185029Spjd * You may not use this file except in compliance with the License.
7185029Spjd *
8185029Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9185029Spjd * or http://www.opensolaris.org/os/licensing.
10185029Spjd * See the License for the specific language governing permissions
11185029Spjd * and limitations under the License.
12185029Spjd *
13185029Spjd * When distributing Covered Code, include this CDDL HEADER in each
14185029Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15185029Spjd * If applicable, add the following below this CDDL HEADER, with the
16185029Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17185029Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18185029Spjd *
19185029Spjd * CDDL HEADER END
20185029Spjd */
21185029Spjd/*
22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23247265Smm * Copyright (c) 2012 by Delphix. All rights reserved.
24185029Spjd */
25185029Spjd
26185029Spjd/*
27185029Spjd * ZFS Fault Injector
28185029Spjd *
29185029Spjd * This userland component takes a set of options and uses libzpool to translate
30185029Spjd * from a user-visible object type and name to an internal representation.
31185029Spjd * There are two basic types of faults: device faults and data faults.
32185029Spjd *
33185029Spjd *
34185029Spjd * DEVICE FAULTS
35185029Spjd *
36185029Spjd * Errors can be injected into a particular vdev using the '-d' option.  This
37185029Spjd * option takes a path or vdev GUID to uniquely identify the device within a
38185029Spjd * pool.  There are two types of errors that can be injected, EIO and ENXIO,
39185029Spjd * that can be controlled through the '-e' option.  The default is ENXIO.  For
40185029Spjd * EIO failures, any attempt to read data from the device will return EIO, but
41185029Spjd * subsequent attempt to reopen the device will succeed.  For ENXIO failures,
42185029Spjd * any attempt to read from the device will return EIO, but any attempt to
43185029Spjd * reopen the device will also return ENXIO.
44185029Spjd * For label faults, the -L option must be specified. This allows faults
45219089Spjd * to be injected into either the nvlist, uberblock, pad1, or pad2 region
46219089Spjd * of all the labels for the specified device.
47185029Spjd *
48185029Spjd * This form of the command looks like:
49185029Spjd *
50219089Spjd * 	zinject -d device [-e errno] [-L <uber | nvlist | pad1 | pad2>] pool
51185029Spjd *
52185029Spjd *
53185029Spjd * DATA FAULTS
54185029Spjd *
55185029Spjd * We begin with a tuple of the form:
56185029Spjd *
57185029Spjd * 	<type,level,range,object>
58185029Spjd *
59185029Spjd * 	type	A string describing the type of data to target.  Each type
60185029Spjd * 		implicitly describes how to interpret 'object'. Currently,
61185029Spjd * 		the following values are supported:
62185029Spjd *
63185029Spjd * 		data		User data for a file
64185029Spjd * 		dnode		Dnode for a file or directory
65185029Spjd *
66185029Spjd *		The following MOS objects are special.  Instead of injecting
67185029Spjd *		errors on a particular object or blkid, we inject errors across
68185029Spjd *		all objects of the given type.
69185029Spjd *
70185029Spjd * 		mos		Any data in the MOS
71185029Spjd * 		mosdir		object directory
72185029Spjd * 		config		pool configuration
73219089Spjd * 		bpobj		blkptr list
74185029Spjd * 		spacemap	spacemap
75185029Spjd * 		metaslab	metaslab
76185029Spjd * 		errlog		persistent error log
77185029Spjd *
78185029Spjd * 	level	Object level.  Defaults to '0', not applicable to all types.  If
79185029Spjd * 		a range is given, this corresponds to the indirect block
80185029Spjd * 		corresponding to the specific range.
81185029Spjd *
82185029Spjd *	range	A numerical range [start,end) within the object.  Defaults to
83185029Spjd *		the full size of the file.
84185029Spjd *
85185029Spjd * 	object	A string describing the logical location of the object.  For
86185029Spjd * 		files and directories (currently the only supported types),
87185029Spjd * 		this is the path of the object on disk.
88185029Spjd *
89185029Spjd * This is translated, via libzpool, into the following internal representation:
90185029Spjd *
91185029Spjd * 	<type,objset,object,level,range>
92185029Spjd *
93185029Spjd * These types should be self-explanatory.  This tuple is then passed to the
94185029Spjd * kernel via a special ioctl() to initiate fault injection for the given
95185029Spjd * object.  Note that 'type' is not strictly necessary for fault injection, but
96185029Spjd * is used when translating existing faults into a human-readable string.
97185029Spjd *
98185029Spjd *
99185029Spjd * The command itself takes one of the forms:
100185029Spjd *
101185029Spjd * 	zinject
102185029Spjd * 	zinject <-a | -u pool>
103185029Spjd * 	zinject -c <id|all>
104185029Spjd * 	zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level]
105185029Spjd *	    [-r range] <object>
106185029Spjd * 	zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool
107185029Spjd *
108185029Spjd * With no arguments, the command prints all currently registered injection
109185029Spjd * handlers, with their numeric identifiers.
110185029Spjd *
111185029Spjd * The '-c' option will clear the given handler, or all handlers if 'all' is
112185029Spjd * specified.
113185029Spjd *
114185029Spjd * The '-e' option takes a string describing the errno to simulate.  This must
115185029Spjd * be either 'io' or 'checksum'.  In most cases this will result in the same
116185029Spjd * behavior, but RAID-Z will produce a different set of ereports for this
117185029Spjd * situation.
118185029Spjd *
119185029Spjd * The '-a', '-u', and '-m' flags toggle internal flush behavior.  If '-a' is
120185029Spjd * specified, then the ARC cache is flushed appropriately.  If '-u' is
121185029Spjd * specified, then the underlying SPA is unloaded.  Either of these flags can be
122185029Spjd * specified independently of any other handlers.  The '-m' flag automatically
123185029Spjd * does an unmount and remount of the underlying dataset to aid in flushing the
124185029Spjd * cache.
125185029Spjd *
126185029Spjd * The '-f' flag controls the frequency of errors injected, expressed as a
127185029Spjd * integer percentage between 1 and 100.  The default is 100.
128185029Spjd *
129185029Spjd * The this form is responsible for actually injecting the handler into the
130185029Spjd * framework.  It takes the arguments described above, translates them to the
131185029Spjd * internal tuple using libzpool, and then issues an ioctl() to register the
132185029Spjd * handler.
133185029Spjd *
134185029Spjd * The final form can target a specific bookmark, regardless of whether a
135185029Spjd * human-readable interface has been designed.  It allows developers to specify
136185029Spjd * a particular block by number.
137185029Spjd */
138185029Spjd
139185029Spjd#include <errno.h>
140185029Spjd#include <fcntl.h>
141185029Spjd#include <stdio.h>
142185029Spjd#include <stdlib.h>
143185029Spjd#include <strings.h>
144185029Spjd#include <unistd.h>
145185029Spjd
146185029Spjd#include <sys/fs/zfs.h>
147186568Srwatson#include <sys/param.h>
148185029Spjd#include <sys/mount.h>
149185029Spjd
150185029Spjd#include <libzfs.h>
151185029Spjd
152185029Spjd#undef verify	/* both libzfs.h and zfs_context.h want to define this */
153185029Spjd
154185029Spjd#include "zinject.h"
155185029Spjd
156185029Spjdlibzfs_handle_t *g_zfs;
157185029Spjdint zfs_fd;
158185029Spjd
159185029Spjd#ifndef ECKSUM
160185029Spjd#define	ECKSUM	EBADE
161185029Spjd#endif
162185029Spjd
163185029Spjdstatic const char *errtable[TYPE_INVAL] = {
164185029Spjd	"data",
165185029Spjd	"dnode",
166185029Spjd	"mos",
167185029Spjd	"mosdir",
168185029Spjd	"metaslab",
169185029Spjd	"config",
170219089Spjd	"bpobj",
171185029Spjd	"spacemap",
172185029Spjd	"errlog",
173185029Spjd	"uber",
174219089Spjd	"nvlist",
175219089Spjd	"pad1",
176219089Spjd	"pad2"
177185029Spjd};
178185029Spjd
179185029Spjdstatic err_type_t
180185029Spjdname_to_type(const char *arg)
181185029Spjd{
182185029Spjd	int i;
183185029Spjd	for (i = 0; i < TYPE_INVAL; i++)
184185029Spjd		if (strcmp(errtable[i], arg) == 0)
185185029Spjd			return (i);
186185029Spjd
187185029Spjd	return (TYPE_INVAL);
188185029Spjd}
189185029Spjd
190185029Spjdstatic const char *
191185029Spjdtype_to_name(uint64_t type)
192185029Spjd{
193185029Spjd	switch (type) {
194185029Spjd	case DMU_OT_OBJECT_DIRECTORY:
195185029Spjd		return ("mosdir");
196185029Spjd	case DMU_OT_OBJECT_ARRAY:
197185029Spjd		return ("metaslab");
198185029Spjd	case DMU_OT_PACKED_NVLIST:
199185029Spjd		return ("config");
200219089Spjd	case DMU_OT_BPOBJ:
201219089Spjd		return ("bpobj");
202185029Spjd	case DMU_OT_SPACE_MAP:
203185029Spjd		return ("spacemap");
204185029Spjd	case DMU_OT_ERROR_LOG:
205185029Spjd		return ("errlog");
206185029Spjd	default:
207185029Spjd		return ("-");
208185029Spjd	}
209185029Spjd}
210185029Spjd
211185029Spjd
212185029Spjd/*
213185029Spjd * Print usage message.
214185029Spjd */
215185029Spjdvoid
216185029Spjdusage(void)
217185029Spjd{
218185029Spjd	(void) printf(
219185029Spjd	    "usage:\n"
220185029Spjd	    "\n"
221185029Spjd	    "\tzinject\n"
222185029Spjd	    "\n"
223185029Spjd	    "\t\tList all active injection records.\n"
224185029Spjd	    "\n"
225185029Spjd	    "\tzinject -c <id|all>\n"
226185029Spjd	    "\n"
227185029Spjd	    "\t\tClear the particular record (if given a numeric ID), or\n"
228185029Spjd	    "\t\tall records if 'all' is specificed.\n"
229185029Spjd	    "\n"
230219089Spjd	    "\tzinject -p <function name> pool\n"
231219089Spjd	    "\t\tInject a panic fault at the specified function. Only \n"
232219089Spjd	    "\t\tfunctions which call spa_vdev_config_exit(), or \n"
233219089Spjd	    "\t\tspa_vdev_exit() will trigger a panic.\n"
234219089Spjd	    "\n"
235219089Spjd	    "\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
236219089Spjd	    "\t    [-T <read|write|free|claim|all> pool\n"
237185029Spjd	    "\t\tInject a fault into a particular device or the device's\n"
238219089Spjd	    "\t\tlabel.  Label injection can either be 'nvlist', 'uber',\n "
239219089Spjd	    "\t\t'pad1', or 'pad2'.\n"
240219089Spjd	    "\t\t'errno' can be 'nxio' (the default), 'io', or 'dtl'.\n"
241185029Spjd	    "\n"
242219089Spjd	    "\tzinject -d device -A <degrade|fault> pool\n"
243219089Spjd	    "\t\tPerform a specific action on a particular device\n"
244219089Spjd	    "\n"
245219089Spjd	    "\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
246219089Spjd	    "\t\tCause the pool to stop writing blocks yet not\n"
247219089Spjd	    "\t\treport errors for a duration.  Simulates buggy hardware\n"
248219089Spjd	    "\t\tthat fails to honor cache flush requests.\n"
249219089Spjd	    "\t\tDefault duration is 30 seconds.  The machine is panicked\n"
250219089Spjd	    "\t\tat the end of the duration.\n"
251219089Spjd	    "\n"
252185029Spjd	    "\tzinject -b objset:object:level:blkid pool\n"
253185029Spjd	    "\n"
254185029Spjd	    "\t\tInject an error into pool 'pool' with the numeric bookmark\n"
255185029Spjd	    "\t\tspecified by the remaining tuple.  Each number is in\n"
256185029Spjd	    "\t\thexidecimal, and only one block can be specified.\n"
257185029Spjd	    "\n"
258185029Spjd	    "\tzinject [-q] <-t type> [-e errno] [-l level] [-r range]\n"
259185029Spjd	    "\t    [-a] [-m] [-u] [-f freq] <object>\n"
260185029Spjd	    "\n"
261185029Spjd	    "\t\tInject an error into the object specified by the '-t' option\n"
262185029Spjd	    "\t\tand the object descriptor.  The 'object' parameter is\n"
263185029Spjd	    "\t\tinterperted depending on the '-t' option.\n"
264185029Spjd	    "\n"
265185029Spjd	    "\t\t-q\tQuiet mode.  Only print out the handler number added.\n"
266185029Spjd	    "\t\t-e\tInject a specific error.  Must be either 'io' or\n"
267185029Spjd	    "\t\t\t'checksum'.  Default is 'io'.\n"
268185029Spjd	    "\t\t-l\tInject error at a particular block level. Default is "
269185029Spjd	    "0.\n"
270185029Spjd	    "\t\t-m\tAutomatically remount underlying filesystem.\n"
271185029Spjd	    "\t\t-r\tInject error over a particular logical range of an\n"
272185029Spjd	    "\t\t\tobject.  Will be translated to the appropriate blkid\n"
273185029Spjd	    "\t\t\trange according to the object's properties.\n"
274185029Spjd	    "\t\t-a\tFlush the ARC cache.  Can be specified without any\n"
275185029Spjd	    "\t\t\tassociated object.\n"
276185029Spjd	    "\t\t-u\tUnload the associated pool.  Can be specified with only\n"
277185029Spjd	    "\t\t\ta pool object.\n"
278185029Spjd	    "\t\t-f\tOnly inject errors a fraction of the time.  Expressed as\n"
279185029Spjd	    "\t\t\ta percentage between 1 and 100.\n"
280185029Spjd	    "\n"
281185029Spjd	    "\t-t data\t\tInject an error into the plain file contents of a\n"
282185029Spjd	    "\t\t\tfile.  The object must be specified as a complete path\n"
283185029Spjd	    "\t\t\tto a file on a ZFS filesystem.\n"
284185029Spjd	    "\n"
285185029Spjd	    "\t-t dnode\tInject an error into the metadnode in the block\n"
286185029Spjd	    "\t\t\tcorresponding to the dnode for a file or directory.  The\n"
287185029Spjd	    "\t\t\t'-r' option is incompatible with this mode.  The object\n"
288185029Spjd	    "\t\t\tis specified as a complete path to a file or directory\n"
289185029Spjd	    "\t\t\ton a ZFS filesystem.\n"
290185029Spjd	    "\n"
291185029Spjd	    "\t-t <mos>\tInject errors into the MOS for objects of the given\n"
292219089Spjd	    "\t\t\ttype.  Valid types are: mos, mosdir, config, bpobj,\n"
293185029Spjd	    "\t\t\tspacemap, metaslab, errlog.  The only valid <object> is\n"
294185029Spjd	    "\t\t\tthe poolname.\n");
295185029Spjd}
296185029Spjd
297185029Spjdstatic int
298185029Spjditer_handlers(int (*func)(int, const char *, zinject_record_t *, void *),
299185029Spjd    void *data)
300185029Spjd{
301239774Smm	zfs_cmd_t zc = { 0 };
302185029Spjd	int ret;
303185029Spjd
304185029Spjd	while (ioctl(zfs_fd, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0)
305185029Spjd		if ((ret = func((int)zc.zc_guid, zc.zc_name,
306185029Spjd		    &zc.zc_inject_record, data)) != 0)
307185029Spjd			return (ret);
308185029Spjd
309219089Spjd	if (errno != ENOENT) {
310219089Spjd		(void) fprintf(stderr, "Unable to list handlers: %s\n",
311219089Spjd		    strerror(errno));
312219089Spjd		return (-1);
313219089Spjd	}
314219089Spjd
315185029Spjd	return (0);
316185029Spjd}
317185029Spjd
318185029Spjdstatic int
319185029Spjdprint_data_handler(int id, const char *pool, zinject_record_t *record,
320185029Spjd    void *data)
321185029Spjd{
322185029Spjd	int *count = data;
323185029Spjd
324219089Spjd	if (record->zi_guid != 0 || record->zi_func[0] != '\0')
325185029Spjd		return (0);
326185029Spjd
327185029Spjd	if (*count == 0) {
328185029Spjd		(void) printf("%3s  %-15s  %-6s  %-6s  %-8s  %3s  %-15s\n",
329185029Spjd		    "ID", "POOL", "OBJSET", "OBJECT", "TYPE", "LVL",  "RANGE");
330185029Spjd		(void) printf("---  ---------------  ------  "
331185029Spjd		    "------  --------  ---  ---------------\n");
332185029Spjd	}
333185029Spjd
334185029Spjd	*count += 1;
335185029Spjd
336185029Spjd	(void) printf("%3d  %-15s  %-6llu  %-6llu  %-8s  %3d  ", id, pool,
337185029Spjd	    (u_longlong_t)record->zi_objset, (u_longlong_t)record->zi_object,
338185029Spjd	    type_to_name(record->zi_type), record->zi_level);
339185029Spjd
340185029Spjd	if (record->zi_start == 0 &&
341185029Spjd	    record->zi_end == -1ULL)
342185029Spjd		(void) printf("all\n");
343185029Spjd	else
344185029Spjd		(void) printf("[%llu, %llu]\n", (u_longlong_t)record->zi_start,
345185029Spjd		    (u_longlong_t)record->zi_end);
346185029Spjd
347185029Spjd	return (0);
348185029Spjd}
349185029Spjd
350185029Spjdstatic int
351185029Spjdprint_device_handler(int id, const char *pool, zinject_record_t *record,
352185029Spjd    void *data)
353185029Spjd{
354185029Spjd	int *count = data;
355185029Spjd
356219089Spjd	if (record->zi_guid == 0 || record->zi_func[0] != '\0')
357185029Spjd		return (0);
358185029Spjd
359185029Spjd	if (*count == 0) {
360185029Spjd		(void) printf("%3s  %-15s  %s\n", "ID", "POOL", "GUID");
361185029Spjd		(void) printf("---  ---------------  ----------------\n");
362185029Spjd	}
363185029Spjd
364185029Spjd	*count += 1;
365185029Spjd
366185029Spjd	(void) printf("%3d  %-15s  %llx\n", id, pool,
367185029Spjd	    (u_longlong_t)record->zi_guid);
368185029Spjd
369185029Spjd	return (0);
370185029Spjd}
371185029Spjd
372219089Spjdstatic int
373219089Spjdprint_panic_handler(int id, const char *pool, zinject_record_t *record,
374219089Spjd    void *data)
375219089Spjd{
376219089Spjd	int *count = data;
377219089Spjd
378219089Spjd	if (record->zi_func[0] == '\0')
379219089Spjd		return (0);
380219089Spjd
381219089Spjd	if (*count == 0) {
382219089Spjd		(void) printf("%3s  %-15s  %s\n", "ID", "POOL", "FUNCTION");
383219089Spjd		(void) printf("---  ---------------  ----------------\n");
384219089Spjd	}
385219089Spjd
386219089Spjd	*count += 1;
387219089Spjd
388219089Spjd	(void) printf("%3d  %-15s  %s\n", id, pool, record->zi_func);
389219089Spjd
390219089Spjd	return (0);
391219089Spjd}
392219089Spjd
393185029Spjd/*
394185029Spjd * Print all registered error handlers.  Returns the number of handlers
395185029Spjd * registered.
396185029Spjd */
397185029Spjdstatic int
398185029Spjdprint_all_handlers(void)
399185029Spjd{
400219089Spjd	int count = 0, total = 0;
401185029Spjd
402185029Spjd	(void) iter_handlers(print_device_handler, &count);
403219089Spjd	if (count > 0) {
404219089Spjd		total += count;
405219089Spjd		(void) printf("\n");
406219089Spjd		count = 0;
407219089Spjd	}
408219089Spjd
409185029Spjd	(void) iter_handlers(print_data_handler, &count);
410219089Spjd	if (count > 0) {
411219089Spjd		total += count;
412219089Spjd		(void) printf("\n");
413219089Spjd		count = 0;
414219089Spjd	}
415185029Spjd
416219089Spjd	(void) iter_handlers(print_panic_handler, &count);
417219089Spjd
418219089Spjd	return (count + total);
419185029Spjd}
420185029Spjd
421185029Spjd/* ARGSUSED */
422185029Spjdstatic int
423185029Spjdcancel_one_handler(int id, const char *pool, zinject_record_t *record,
424185029Spjd    void *data)
425185029Spjd{
426239774Smm	zfs_cmd_t zc = { 0 };
427185029Spjd
428185029Spjd	zc.zc_guid = (uint64_t)id;
429185029Spjd
430185029Spjd	if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
431185029Spjd		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
432185029Spjd		    id, strerror(errno));
433185029Spjd		return (1);
434185029Spjd	}
435185029Spjd
436185029Spjd	return (0);
437185029Spjd}
438185029Spjd
439185029Spjd/*
440185029Spjd * Remove all fault injection handlers.
441185029Spjd */
442185029Spjdstatic int
443185029Spjdcancel_all_handlers(void)
444185029Spjd{
445185029Spjd	int ret = iter_handlers(cancel_one_handler, NULL);
446185029Spjd
447219089Spjd	if (ret == 0)
448219089Spjd		(void) printf("removed all registered handlers\n");
449185029Spjd
450185029Spjd	return (ret);
451185029Spjd}
452185029Spjd
453185029Spjd/*
454185029Spjd * Remove a specific fault injection handler.
455185029Spjd */
456185029Spjdstatic int
457185029Spjdcancel_handler(int id)
458185029Spjd{
459239774Smm	zfs_cmd_t zc = { 0 };
460185029Spjd
461185029Spjd	zc.zc_guid = (uint64_t)id;
462185029Spjd
463185029Spjd	if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
464185029Spjd		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
465185029Spjd		    id, strerror(errno));
466185029Spjd		return (1);
467185029Spjd	}
468185029Spjd
469185029Spjd	(void) printf("removed handler %d\n", id);
470185029Spjd
471185029Spjd	return (0);
472185029Spjd}
473185029Spjd
474185029Spjd/*
475185029Spjd * Register a new fault injection handler.
476185029Spjd */
477185029Spjdstatic int
478185029Spjdregister_handler(const char *pool, int flags, zinject_record_t *record,
479185029Spjd    int quiet)
480185029Spjd{
481239774Smm	zfs_cmd_t zc = { 0 };
482185029Spjd
483185029Spjd	(void) strcpy(zc.zc_name, pool);
484185029Spjd	zc.zc_inject_record = *record;
485185029Spjd	zc.zc_guid = flags;
486185029Spjd
487185029Spjd	if (ioctl(zfs_fd, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
488185029Spjd		(void) fprintf(stderr, "failed to add handler: %s\n",
489185029Spjd		    strerror(errno));
490185029Spjd		return (1);
491185029Spjd	}
492185029Spjd
493185029Spjd	if (flags & ZINJECT_NULL)
494185029Spjd		return (0);
495185029Spjd
496185029Spjd	if (quiet) {
497185029Spjd		(void) printf("%llu\n", (u_longlong_t)zc.zc_guid);
498185029Spjd	} else {
499185029Spjd		(void) printf("Added handler %llu with the following "
500185029Spjd		    "properties:\n", (u_longlong_t)zc.zc_guid);
501185029Spjd		(void) printf("  pool: %s\n", pool);
502185029Spjd		if (record->zi_guid) {
503185029Spjd			(void) printf("  vdev: %llx\n",
504185029Spjd			    (u_longlong_t)record->zi_guid);
505219089Spjd		} else if (record->zi_func[0] != '\0') {
506219089Spjd			(void) printf("  panic function: %s\n",
507219089Spjd			    record->zi_func);
508219089Spjd		} else if (record->zi_duration > 0) {
509219089Spjd			(void) printf(" time: %lld seconds\n",
510219089Spjd			    (u_longlong_t)record->zi_duration);
511219089Spjd		} else if (record->zi_duration < 0) {
512219089Spjd			(void) printf(" txgs: %lld \n",
513219089Spjd			    (u_longlong_t)-record->zi_duration);
514185029Spjd		} else {
515185029Spjd			(void) printf("objset: %llu\n",
516185029Spjd			    (u_longlong_t)record->zi_objset);
517185029Spjd			(void) printf("object: %llu\n",
518185029Spjd			    (u_longlong_t)record->zi_object);
519185029Spjd			(void) printf("  type: %llu\n",
520185029Spjd			    (u_longlong_t)record->zi_type);
521185029Spjd			(void) printf(" level: %d\n", record->zi_level);
522185029Spjd			if (record->zi_start == 0 &&
523185029Spjd			    record->zi_end == -1ULL)
524185029Spjd				(void) printf(" range: all\n");
525185029Spjd			else
526185029Spjd				(void) printf(" range: [%llu, %llu)\n",
527185029Spjd				    (u_longlong_t)record->zi_start,
528185029Spjd				    (u_longlong_t)record->zi_end);
529185029Spjd		}
530185029Spjd	}
531185029Spjd
532185029Spjd	return (0);
533185029Spjd}
534185029Spjd
535185029Spjdint
536219089Spjdperform_action(const char *pool, zinject_record_t *record, int cmd)
537219089Spjd{
538239774Smm	zfs_cmd_t zc = { 0 };
539219089Spjd
540219089Spjd	ASSERT(cmd == VDEV_STATE_DEGRADED || cmd == VDEV_STATE_FAULTED);
541219089Spjd	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
542219089Spjd	zc.zc_guid = record->zi_guid;
543219089Spjd	zc.zc_cookie = cmd;
544219089Spjd
545219089Spjd	if (ioctl(zfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
546219089Spjd		return (0);
547219089Spjd
548219089Spjd	return (1);
549219089Spjd}
550219089Spjd
551219089Spjdint
552185029Spjdmain(int argc, char **argv)
553185029Spjd{
554185029Spjd	int c;
555185029Spjd	char *range = NULL;
556185029Spjd	char *cancel = NULL;
557185029Spjd	char *end;
558185029Spjd	char *raw = NULL;
559185029Spjd	char *device = NULL;
560185029Spjd	int level = 0;
561185029Spjd	int quiet = 0;
562185029Spjd	int error = 0;
563185029Spjd	int domount = 0;
564219089Spjd	int io_type = ZIO_TYPES;
565219089Spjd	int action = VDEV_STATE_UNKNOWN;
566185029Spjd	err_type_t type = TYPE_INVAL;
567185029Spjd	err_type_t label = TYPE_INVAL;
568185029Spjd	zinject_record_t record = { 0 };
569185029Spjd	char pool[MAXNAMELEN];
570185029Spjd	char dataset[MAXNAMELEN];
571185029Spjd	zfs_handle_t *zhp;
572219089Spjd	int nowrites = 0;
573219089Spjd	int dur_txg = 0;
574219089Spjd	int dur_secs = 0;
575185029Spjd	int ret;
576185029Spjd	int flags = 0;
577185029Spjd
578185029Spjd	if ((g_zfs = libzfs_init()) == NULL) {
579185029Spjd		(void) fprintf(stderr, "internal error: failed to "
580185029Spjd		    "initialize ZFS library\n");
581185029Spjd		return (1);
582185029Spjd	}
583185029Spjd
584185029Spjd	libzfs_print_on_error(g_zfs, B_TRUE);
585185029Spjd
586185029Spjd	if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
587185029Spjd		(void) fprintf(stderr, "failed to open ZFS device\n");
588185029Spjd		return (1);
589185029Spjd	}
590185029Spjd
591185029Spjd	if (argc == 1) {
592185029Spjd		/*
593185029Spjd		 * No arguments.  Print the available handlers.  If there are no
594185029Spjd		 * available handlers, direct the user to '-h' for help
595185029Spjd		 * information.
596185029Spjd		 */
597185029Spjd		if (print_all_handlers() == 0) {
598185029Spjd			(void) printf("No handlers registered.\n");
599185029Spjd			(void) printf("Run 'zinject -h' for usage "
600185029Spjd			    "information.\n");
601185029Spjd		}
602185029Spjd
603185029Spjd		return (0);
604185029Spjd	}
605185029Spjd
606219089Spjd	while ((c = getopt(argc, argv,
607247265Smm	    ":aA:b:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:")) != -1) {
608185029Spjd		switch (c) {
609185029Spjd		case 'a':
610185029Spjd			flags |= ZINJECT_FLUSH_ARC;
611185029Spjd			break;
612219089Spjd		case 'A':
613219089Spjd			if (strcasecmp(optarg, "degrade") == 0) {
614219089Spjd				action = VDEV_STATE_DEGRADED;
615219089Spjd			} else if (strcasecmp(optarg, "fault") == 0) {
616219089Spjd				action = VDEV_STATE_FAULTED;
617219089Spjd			} else {
618219089Spjd				(void) fprintf(stderr, "invalid action '%s': "
619219089Spjd				    "must be 'degrade' or 'fault'\n", optarg);
620219089Spjd				usage();
621219089Spjd				return (1);
622219089Spjd			}
623219089Spjd			break;
624185029Spjd		case 'b':
625185029Spjd			raw = optarg;
626185029Spjd			break;
627185029Spjd		case 'c':
628185029Spjd			cancel = optarg;
629185029Spjd			break;
630185029Spjd		case 'd':
631185029Spjd			device = optarg;
632185029Spjd			break;
633247265Smm		case 'D':
634247265Smm			record.zi_timer = strtoull(optarg, &end, 10);
635247265Smm			if (errno != 0 || *end != '\0') {
636247265Smm				(void) fprintf(stderr, "invalid i/o delay "
637247265Smm				    "value: '%s'\n", optarg);
638247265Smm				usage();
639247265Smm				return (1);
640247265Smm			}
641247265Smm			break;
642185029Spjd		case 'e':
643185029Spjd			if (strcasecmp(optarg, "io") == 0) {
644185029Spjd				error = EIO;
645185029Spjd			} else if (strcasecmp(optarg, "checksum") == 0) {
646185029Spjd				error = ECKSUM;
647185029Spjd			} else if (strcasecmp(optarg, "nxio") == 0) {
648185029Spjd				error = ENXIO;
649219089Spjd			} else if (strcasecmp(optarg, "dtl") == 0) {
650219089Spjd				error = ECHILD;
651185029Spjd			} else {
652185029Spjd				(void) fprintf(stderr, "invalid error type "
653185029Spjd				    "'%s': must be 'io', 'checksum' or "
654185029Spjd				    "'nxio'\n", optarg);
655185029Spjd				usage();
656185029Spjd				return (1);
657185029Spjd			}
658185029Spjd			break;
659185029Spjd		case 'f':
660185029Spjd			record.zi_freq = atoi(optarg);
661185029Spjd			if (record.zi_freq < 1 || record.zi_freq > 100) {
662185029Spjd				(void) fprintf(stderr, "frequency range must "
663185029Spjd				    "be in the range (0, 100]\n");
664185029Spjd				return (1);
665185029Spjd			}
666185029Spjd			break;
667213198Smm		case 'F':
668213198Smm			record.zi_failfast = B_TRUE;
669213198Smm			break;
670219089Spjd		case 'g':
671219089Spjd			dur_txg = 1;
672219089Spjd			record.zi_duration = (int)strtol(optarg, &end, 10);
673219089Spjd			if (record.zi_duration <= 0 || *end != '\0') {
674219089Spjd				(void) fprintf(stderr, "invalid duration '%s': "
675219089Spjd				    "must be a positive integer\n", optarg);
676219089Spjd				usage();
677219089Spjd				return (1);
678219089Spjd			}
679219089Spjd			/* store duration of txgs as its negative */
680219089Spjd			record.zi_duration *= -1;
681219089Spjd			break;
682185029Spjd		case 'h':
683185029Spjd			usage();
684185029Spjd			return (0);
685219089Spjd		case 'I':
686219089Spjd			/* default duration, if one hasn't yet been defined */
687219089Spjd			nowrites = 1;
688219089Spjd			if (dur_secs == 0 && dur_txg == 0)
689219089Spjd				record.zi_duration = 30;
690219089Spjd			break;
691185029Spjd		case 'l':
692185029Spjd			level = (int)strtol(optarg, &end, 10);
693185029Spjd			if (*end != '\0') {
694185029Spjd				(void) fprintf(stderr, "invalid level '%s': "
695185029Spjd				    "must be an integer\n", optarg);
696185029Spjd				usage();
697185029Spjd				return (1);
698185029Spjd			}
699185029Spjd			break;
700185029Spjd		case 'm':
701185029Spjd			domount = 1;
702185029Spjd			break;
703219089Spjd		case 'p':
704219089Spjd			(void) strlcpy(record.zi_func, optarg,
705219089Spjd			    sizeof (record.zi_func));
706247265Smm			record.zi_cmd = ZINJECT_PANIC;
707219089Spjd			break;
708185029Spjd		case 'q':
709185029Spjd			quiet = 1;
710185029Spjd			break;
711185029Spjd		case 'r':
712185029Spjd			range = optarg;
713185029Spjd			break;
714219089Spjd		case 's':
715219089Spjd			dur_secs = 1;
716219089Spjd			record.zi_duration = (int)strtol(optarg, &end, 10);
717219089Spjd			if (record.zi_duration <= 0 || *end != '\0') {
718219089Spjd				(void) fprintf(stderr, "invalid duration '%s': "
719219089Spjd				    "must be a positive integer\n", optarg);
720219089Spjd				usage();
721219089Spjd				return (1);
722219089Spjd			}
723219089Spjd			break;
724219089Spjd		case 'T':
725219089Spjd			if (strcasecmp(optarg, "read") == 0) {
726219089Spjd				io_type = ZIO_TYPE_READ;
727219089Spjd			} else if (strcasecmp(optarg, "write") == 0) {
728219089Spjd				io_type = ZIO_TYPE_WRITE;
729219089Spjd			} else if (strcasecmp(optarg, "free") == 0) {
730219089Spjd				io_type = ZIO_TYPE_FREE;
731219089Spjd			} else if (strcasecmp(optarg, "claim") == 0) {
732219089Spjd				io_type = ZIO_TYPE_CLAIM;
733219089Spjd			} else if (strcasecmp(optarg, "all") == 0) {
734219089Spjd				io_type = ZIO_TYPES;
735219089Spjd			} else {
736219089Spjd				(void) fprintf(stderr, "invalid I/O type "
737219089Spjd				    "'%s': must be 'read', 'write', 'free', "
738219089Spjd				    "'claim' or 'all'\n", optarg);
739219089Spjd				usage();
740219089Spjd				return (1);
741219089Spjd			}
742219089Spjd			break;
743185029Spjd		case 't':
744185029Spjd			if ((type = name_to_type(optarg)) == TYPE_INVAL &&
745185029Spjd			    !MOS_TYPE(type)) {
746185029Spjd				(void) fprintf(stderr, "invalid type '%s'\n",
747185029Spjd				    optarg);
748185029Spjd				usage();
749185029Spjd				return (1);
750185029Spjd			}
751185029Spjd			break;
752185029Spjd		case 'u':
753185029Spjd			flags |= ZINJECT_UNLOAD_SPA;
754185029Spjd			break;
755185029Spjd		case 'L':
756185029Spjd			if ((label = name_to_type(optarg)) == TYPE_INVAL &&
757185029Spjd			    !LABEL_TYPE(type)) {
758185029Spjd				(void) fprintf(stderr, "invalid label type "
759185029Spjd				    "'%s'\n", optarg);
760185029Spjd				usage();
761185029Spjd				return (1);
762185029Spjd			}
763185029Spjd			break;
764185029Spjd		case ':':
765185029Spjd			(void) fprintf(stderr, "option -%c requires an "
766185029Spjd			    "operand\n", optopt);
767185029Spjd			usage();
768185029Spjd			return (1);
769185029Spjd		case '?':
770185029Spjd			(void) fprintf(stderr, "invalid option '%c'\n",
771185029Spjd			    optopt);
772185029Spjd			usage();
773185029Spjd			return (2);
774185029Spjd		}
775185029Spjd	}
776185029Spjd
777185029Spjd	argc -= optind;
778185029Spjd	argv += optind;
779185029Spjd
780247265Smm	if (record.zi_duration != 0)
781247265Smm		record.zi_cmd = ZINJECT_IGNORED_WRITES;
782247265Smm
783185029Spjd	if (cancel != NULL) {
784185029Spjd		/*
785185029Spjd		 * '-c' is invalid with any other options.
786185029Spjd		 */
787185029Spjd		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
788247265Smm		    level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED) {
789185029Spjd			(void) fprintf(stderr, "cancel (-c) incompatible with "
790185029Spjd			    "any other options\n");
791185029Spjd			usage();
792185029Spjd			return (2);
793185029Spjd		}
794185029Spjd		if (argc != 0) {
795185029Spjd			(void) fprintf(stderr, "extraneous argument to '-c'\n");
796185029Spjd			usage();
797185029Spjd			return (2);
798185029Spjd		}
799185029Spjd
800185029Spjd		if (strcmp(cancel, "all") == 0) {
801185029Spjd			return (cancel_all_handlers());
802185029Spjd		} else {
803185029Spjd			int id = (int)strtol(cancel, &end, 10);
804185029Spjd			if (*end != '\0') {
805185029Spjd				(void) fprintf(stderr, "invalid handle id '%s':"
806185029Spjd				    " must be an integer or 'all'\n", cancel);
807185029Spjd				usage();
808185029Spjd				return (1);
809185029Spjd			}
810185029Spjd			return (cancel_handler(id));
811185029Spjd		}
812185029Spjd	}
813185029Spjd
814185029Spjd	if (device != NULL) {
815185029Spjd		/*
816185029Spjd		 * Device (-d) injection uses a completely different mechanism
817185029Spjd		 * for doing injection, so handle it separately here.
818185029Spjd		 */
819185029Spjd		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
820247265Smm		    level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED) {
821185029Spjd			(void) fprintf(stderr, "device (-d) incompatible with "
822185029Spjd			    "data error injection\n");
823185029Spjd			usage();
824185029Spjd			return (2);
825185029Spjd		}
826185029Spjd
827185029Spjd		if (argc != 1) {
828185029Spjd			(void) fprintf(stderr, "device (-d) injection requires "
829185029Spjd			    "a single pool name\n");
830185029Spjd			usage();
831185029Spjd			return (2);
832185029Spjd		}
833185029Spjd
834185029Spjd		(void) strcpy(pool, argv[0]);
835185029Spjd		dataset[0] = '\0';
836185029Spjd
837185029Spjd		if (error == ECKSUM) {
838185029Spjd			(void) fprintf(stderr, "device error type must be "
839185029Spjd			    "'io' or 'nxio'\n");
840185029Spjd			return (1);
841185029Spjd		}
842185029Spjd
843219089Spjd		record.zi_iotype = io_type;
844185029Spjd		if (translate_device(pool, device, label, &record) != 0)
845185029Spjd			return (1);
846185029Spjd		if (!error)
847185029Spjd			error = ENXIO;
848219089Spjd
849219089Spjd		if (action != VDEV_STATE_UNKNOWN)
850219089Spjd			return (perform_action(pool, &record, action));
851219089Spjd
852185029Spjd	} else if (raw != NULL) {
853219089Spjd		if (range != NULL || type != TYPE_INVAL || level != 0 ||
854247265Smm		    record.zi_cmd != ZINJECT_UNINITIALIZED) {
855185029Spjd			(void) fprintf(stderr, "raw (-b) format with "
856185029Spjd			    "any other options\n");
857185029Spjd			usage();
858185029Spjd			return (2);
859185029Spjd		}
860185029Spjd
861185029Spjd		if (argc != 1) {
862185029Spjd			(void) fprintf(stderr, "raw (-b) format expects a "
863185029Spjd			    "single pool name\n");
864185029Spjd			usage();
865185029Spjd			return (2);
866185029Spjd		}
867185029Spjd
868185029Spjd		(void) strcpy(pool, argv[0]);
869185029Spjd		dataset[0] = '\0';
870185029Spjd
871185029Spjd		if (error == ENXIO) {
872185029Spjd			(void) fprintf(stderr, "data error type must be "
873185029Spjd			    "'checksum' or 'io'\n");
874185029Spjd			return (1);
875185029Spjd		}
876185029Spjd
877247265Smm		record.zi_cmd = ZINJECT_DATA_FAULT;
878185029Spjd		if (translate_raw(raw, &record) != 0)
879185029Spjd			return (1);
880185029Spjd		if (!error)
881185029Spjd			error = EIO;
882247265Smm	} else if (record.zi_cmd == ZINJECT_PANIC) {
883219089Spjd		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
884247265Smm		    level != 0 || device != NULL) {
885219089Spjd			(void) fprintf(stderr, "panic (-p) incompatible with "
886219089Spjd			    "other options\n");
887219089Spjd			usage();
888219089Spjd			return (2);
889219089Spjd		}
890219089Spjd
891219089Spjd		if (argc < 1 || argc > 2) {
892219089Spjd			(void) fprintf(stderr, "panic (-p) injection requires "
893219089Spjd			    "a single pool name and an optional id\n");
894219089Spjd			usage();
895219089Spjd			return (2);
896219089Spjd		}
897219089Spjd
898219089Spjd		(void) strcpy(pool, argv[0]);
899219089Spjd		if (argv[1] != NULL)
900219089Spjd			record.zi_type = atoi(argv[1]);
901219089Spjd		dataset[0] = '\0';
902247265Smm	} else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) {
903219089Spjd		if (nowrites == 0) {
904219089Spjd			(void) fprintf(stderr, "-s or -g meaningless "
905219089Spjd			    "without -I (ignore writes)\n");
906219089Spjd			usage();
907219089Spjd			return (2);
908219089Spjd		} else if (dur_secs && dur_txg) {
909219089Spjd			(void) fprintf(stderr, "choose a duration either "
910219089Spjd			    "in seconds (-s) or a number of txgs (-g) "
911219089Spjd			    "but not both\n");
912219089Spjd			usage();
913219089Spjd			return (2);
914219089Spjd		} else if (argc != 1) {
915219089Spjd			(void) fprintf(stderr, "ignore writes (-I) "
916219089Spjd			    "injection requires a single pool name\n");
917219089Spjd			usage();
918219089Spjd			return (2);
919219089Spjd		}
920219089Spjd
921219089Spjd		(void) strcpy(pool, argv[0]);
922219089Spjd		dataset[0] = '\0';
923185029Spjd	} else if (type == TYPE_INVAL) {
924185029Spjd		if (flags == 0) {
925185029Spjd			(void) fprintf(stderr, "at least one of '-b', '-d', "
926219089Spjd			    "'-t', '-a', '-p', '-I' or '-u' "
927219089Spjd			    "must be specified\n");
928185029Spjd			usage();
929185029Spjd			return (2);
930185029Spjd		}
931185029Spjd
932185029Spjd		if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) {
933185029Spjd			(void) strcpy(pool, argv[0]);
934185029Spjd			dataset[0] = '\0';
935185029Spjd		} else if (argc != 0) {
936185029Spjd			(void) fprintf(stderr, "extraneous argument for "
937185029Spjd			    "'-f'\n");
938185029Spjd			usage();
939185029Spjd			return (2);
940185029Spjd		}
941185029Spjd
942185029Spjd		flags |= ZINJECT_NULL;
943185029Spjd	} else {
944185029Spjd		if (argc != 1) {
945185029Spjd			(void) fprintf(stderr, "missing object\n");
946185029Spjd			usage();
947185029Spjd			return (2);
948185029Spjd		}
949185029Spjd
950185029Spjd		if (error == ENXIO) {
951185029Spjd			(void) fprintf(stderr, "data error type must be "
952185029Spjd			    "'checksum' or 'io'\n");
953185029Spjd			return (1);
954185029Spjd		}
955185029Spjd
956247265Smm		record.zi_cmd = ZINJECT_DATA_FAULT;
957185029Spjd		if (translate_record(type, argv[0], range, level, &record, pool,
958185029Spjd		    dataset) != 0)
959185029Spjd			return (1);
960185029Spjd		if (!error)
961185029Spjd			error = EIO;
962185029Spjd	}
963185029Spjd
964185029Spjd	/*
965185029Spjd	 * If this is pool-wide metadata, unmount everything.  The ioctl() will
966185029Spjd	 * unload the pool, so that we trigger spa-wide reopen of metadata next
967185029Spjd	 * time we access the pool.
968185029Spjd	 */
969185029Spjd	if (dataset[0] != '\0' && domount) {
970185029Spjd		if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET)) == NULL)
971185029Spjd			return (1);
972185029Spjd
973185029Spjd		if (zfs_unmount(zhp, NULL, 0) != 0)
974185029Spjd			return (1);
975185029Spjd	}
976185029Spjd
977185029Spjd	record.zi_error = error;
978185029Spjd
979185029Spjd	ret = register_handler(pool, flags, &record, quiet);
980185029Spjd
981185029Spjd	if (dataset[0] != '\0' && domount)
982185029Spjd		ret = (zfs_mount(zhp, NULL, 0) != 0);
983185029Spjd
984185029Spjd	libzfs_fini(g_zfs);
985185029Spjd
986185029Spjd	return (ret);
987185029Spjd}
988