1185029Spjd/* 2185029Spjd * CDDL HEADER START 3185029Spjd * 4185029Spjd * The contents of this file are subject to the terms of the 5185029Spjd * Common Development and Distribution License (the "License"). 6185029Spjd * You may not use this file except in compliance with the License. 7185029Spjd * 8185029Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9185029Spjd * or http://www.opensolaris.org/os/licensing. 10185029Spjd * See the License for the specific language governing permissions 11185029Spjd * and limitations under the License. 12185029Spjd * 13185029Spjd * When distributing Covered Code, include this CDDL HEADER in each 14185029Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15185029Spjd * If applicable, add the following below this CDDL HEADER, with the 16185029Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17185029Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18185029Spjd * 19185029Spjd * CDDL HEADER END 20185029Spjd */ 21185029Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23247265Smm * Copyright (c) 2012 by Delphix. All rights reserved. 24185029Spjd */ 25185029Spjd 26185029Spjd/* 27185029Spjd * ZFS Fault Injector 28185029Spjd * 29185029Spjd * This userland component takes a set of options and uses libzpool to translate 30185029Spjd * from a user-visible object type and name to an internal representation. 31185029Spjd * There are two basic types of faults: device faults and data faults. 32185029Spjd * 33185029Spjd * 34185029Spjd * DEVICE FAULTS 35185029Spjd * 36185029Spjd * Errors can be injected into a particular vdev using the '-d' option. This 37185029Spjd * option takes a path or vdev GUID to uniquely identify the device within a 38185029Spjd * pool. There are two types of errors that can be injected, EIO and ENXIO, 39185029Spjd * that can be controlled through the '-e' option. The default is ENXIO. For 40185029Spjd * EIO failures, any attempt to read data from the device will return EIO, but 41185029Spjd * subsequent attempt to reopen the device will succeed. For ENXIO failures, 42185029Spjd * any attempt to read from the device will return EIO, but any attempt to 43185029Spjd * reopen the device will also return ENXIO. 44185029Spjd * For label faults, the -L option must be specified. This allows faults 45219089Spjd * to be injected into either the nvlist, uberblock, pad1, or pad2 region 46219089Spjd * of all the labels for the specified device. 47185029Spjd * 48185029Spjd * This form of the command looks like: 49185029Spjd * 50219089Spjd * zinject -d device [-e errno] [-L <uber | nvlist | pad1 | pad2>] pool 51185029Spjd * 52185029Spjd * 53185029Spjd * DATA FAULTS 54185029Spjd * 55185029Spjd * We begin with a tuple of the form: 56185029Spjd * 57185029Spjd * <type,level,range,object> 58185029Spjd * 59185029Spjd * type A string describing the type of data to target. Each type 60185029Spjd * implicitly describes how to interpret 'object'. Currently, 61185029Spjd * the following values are supported: 62185029Spjd * 63185029Spjd * data User data for a file 64185029Spjd * dnode Dnode for a file or directory 65185029Spjd * 66185029Spjd * The following MOS objects are special. Instead of injecting 67185029Spjd * errors on a particular object or blkid, we inject errors across 68185029Spjd * all objects of the given type. 69185029Spjd * 70185029Spjd * mos Any data in the MOS 71185029Spjd * mosdir object directory 72185029Spjd * config pool configuration 73219089Spjd * bpobj blkptr list 74185029Spjd * spacemap spacemap 75185029Spjd * metaslab metaslab 76185029Spjd * errlog persistent error log 77185029Spjd * 78185029Spjd * level Object level. Defaults to '0', not applicable to all types. If 79185029Spjd * a range is given, this corresponds to the indirect block 80185029Spjd * corresponding to the specific range. 81185029Spjd * 82185029Spjd * range A numerical range [start,end) within the object. Defaults to 83185029Spjd * the full size of the file. 84185029Spjd * 85185029Spjd * object A string describing the logical location of the object. For 86185029Spjd * files and directories (currently the only supported types), 87185029Spjd * this is the path of the object on disk. 88185029Spjd * 89185029Spjd * This is translated, via libzpool, into the following internal representation: 90185029Spjd * 91185029Spjd * <type,objset,object,level,range> 92185029Spjd * 93185029Spjd * These types should be self-explanatory. This tuple is then passed to the 94185029Spjd * kernel via a special ioctl() to initiate fault injection for the given 95185029Spjd * object. Note that 'type' is not strictly necessary for fault injection, but 96185029Spjd * is used when translating existing faults into a human-readable string. 97185029Spjd * 98185029Spjd * 99185029Spjd * The command itself takes one of the forms: 100185029Spjd * 101185029Spjd * zinject 102185029Spjd * zinject <-a | -u pool> 103185029Spjd * zinject -c <id|all> 104185029Spjd * zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level] 105185029Spjd * [-r range] <object> 106185029Spjd * zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool 107185029Spjd * 108185029Spjd * With no arguments, the command prints all currently registered injection 109185029Spjd * handlers, with their numeric identifiers. 110185029Spjd * 111185029Spjd * The '-c' option will clear the given handler, or all handlers if 'all' is 112185029Spjd * specified. 113185029Spjd * 114185029Spjd * The '-e' option takes a string describing the errno to simulate. This must 115185029Spjd * be either 'io' or 'checksum'. In most cases this will result in the same 116185029Spjd * behavior, but RAID-Z will produce a different set of ereports for this 117185029Spjd * situation. 118185029Spjd * 119185029Spjd * The '-a', '-u', and '-m' flags toggle internal flush behavior. If '-a' is 120185029Spjd * specified, then the ARC cache is flushed appropriately. If '-u' is 121185029Spjd * specified, then the underlying SPA is unloaded. Either of these flags can be 122185029Spjd * specified independently of any other handlers. The '-m' flag automatically 123185029Spjd * does an unmount and remount of the underlying dataset to aid in flushing the 124185029Spjd * cache. 125185029Spjd * 126185029Spjd * The '-f' flag controls the frequency of errors injected, expressed as a 127185029Spjd * integer percentage between 1 and 100. The default is 100. 128185029Spjd * 129185029Spjd * The this form is responsible for actually injecting the handler into the 130185029Spjd * framework. It takes the arguments described above, translates them to the 131185029Spjd * internal tuple using libzpool, and then issues an ioctl() to register the 132185029Spjd * handler. 133185029Spjd * 134185029Spjd * The final form can target a specific bookmark, regardless of whether a 135185029Spjd * human-readable interface has been designed. It allows developers to specify 136185029Spjd * a particular block by number. 137185029Spjd */ 138185029Spjd 139185029Spjd#include <errno.h> 140185029Spjd#include <fcntl.h> 141185029Spjd#include <stdio.h> 142185029Spjd#include <stdlib.h> 143185029Spjd#include <strings.h> 144185029Spjd#include <unistd.h> 145185029Spjd 146185029Spjd#include <sys/fs/zfs.h> 147186568Srwatson#include <sys/param.h> 148185029Spjd#include <sys/mount.h> 149185029Spjd 150185029Spjd#include <libzfs.h> 151185029Spjd 152185029Spjd#undef verify /* both libzfs.h and zfs_context.h want to define this */ 153185029Spjd 154185029Spjd#include "zinject.h" 155185029Spjd 156185029Spjdlibzfs_handle_t *g_zfs; 157185029Spjdint zfs_fd; 158185029Spjd 159185029Spjd#ifndef ECKSUM 160185029Spjd#define ECKSUM EBADE 161185029Spjd#endif 162185029Spjd 163185029Spjdstatic const char *errtable[TYPE_INVAL] = { 164185029Spjd "data", 165185029Spjd "dnode", 166185029Spjd "mos", 167185029Spjd "mosdir", 168185029Spjd "metaslab", 169185029Spjd "config", 170219089Spjd "bpobj", 171185029Spjd "spacemap", 172185029Spjd "errlog", 173185029Spjd "uber", 174219089Spjd "nvlist", 175219089Spjd "pad1", 176219089Spjd "pad2" 177185029Spjd}; 178185029Spjd 179185029Spjdstatic err_type_t 180185029Spjdname_to_type(const char *arg) 181185029Spjd{ 182185029Spjd int i; 183185029Spjd for (i = 0; i < TYPE_INVAL; i++) 184185029Spjd if (strcmp(errtable[i], arg) == 0) 185185029Spjd return (i); 186185029Spjd 187185029Spjd return (TYPE_INVAL); 188185029Spjd} 189185029Spjd 190185029Spjdstatic const char * 191185029Spjdtype_to_name(uint64_t type) 192185029Spjd{ 193185029Spjd switch (type) { 194185029Spjd case DMU_OT_OBJECT_DIRECTORY: 195185029Spjd return ("mosdir"); 196185029Spjd case DMU_OT_OBJECT_ARRAY: 197185029Spjd return ("metaslab"); 198185029Spjd case DMU_OT_PACKED_NVLIST: 199185029Spjd return ("config"); 200219089Spjd case DMU_OT_BPOBJ: 201219089Spjd return ("bpobj"); 202185029Spjd case DMU_OT_SPACE_MAP: 203185029Spjd return ("spacemap"); 204185029Spjd case DMU_OT_ERROR_LOG: 205185029Spjd return ("errlog"); 206185029Spjd default: 207185029Spjd return ("-"); 208185029Spjd } 209185029Spjd} 210185029Spjd 211185029Spjd 212185029Spjd/* 213185029Spjd * Print usage message. 214185029Spjd */ 215185029Spjdvoid 216185029Spjdusage(void) 217185029Spjd{ 218185029Spjd (void) printf( 219185029Spjd "usage:\n" 220185029Spjd "\n" 221185029Spjd "\tzinject\n" 222185029Spjd "\n" 223185029Spjd "\t\tList all active injection records.\n" 224185029Spjd "\n" 225185029Spjd "\tzinject -c <id|all>\n" 226185029Spjd "\n" 227185029Spjd "\t\tClear the particular record (if given a numeric ID), or\n" 228185029Spjd "\t\tall records if 'all' is specificed.\n" 229185029Spjd "\n" 230219089Spjd "\tzinject -p <function name> pool\n" 231219089Spjd "\t\tInject a panic fault at the specified function. Only \n" 232219089Spjd "\t\tfunctions which call spa_vdev_config_exit(), or \n" 233219089Spjd "\t\tspa_vdev_exit() will trigger a panic.\n" 234219089Spjd "\n" 235219089Spjd "\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n" 236219089Spjd "\t [-T <read|write|free|claim|all> pool\n" 237185029Spjd "\t\tInject a fault into a particular device or the device's\n" 238219089Spjd "\t\tlabel. Label injection can either be 'nvlist', 'uber',\n " 239219089Spjd "\t\t'pad1', or 'pad2'.\n" 240219089Spjd "\t\t'errno' can be 'nxio' (the default), 'io', or 'dtl'.\n" 241185029Spjd "\n" 242219089Spjd "\tzinject -d device -A <degrade|fault> pool\n" 243219089Spjd "\t\tPerform a specific action on a particular device\n" 244219089Spjd "\n" 245219089Spjd "\tzinject -I [-s <seconds> | -g <txgs>] pool\n" 246219089Spjd "\t\tCause the pool to stop writing blocks yet not\n" 247219089Spjd "\t\treport errors for a duration. Simulates buggy hardware\n" 248219089Spjd "\t\tthat fails to honor cache flush requests.\n" 249219089Spjd "\t\tDefault duration is 30 seconds. The machine is panicked\n" 250219089Spjd "\t\tat the end of the duration.\n" 251219089Spjd "\n" 252185029Spjd "\tzinject -b objset:object:level:blkid pool\n" 253185029Spjd "\n" 254185029Spjd "\t\tInject an error into pool 'pool' with the numeric bookmark\n" 255185029Spjd "\t\tspecified by the remaining tuple. Each number is in\n" 256185029Spjd "\t\thexidecimal, and only one block can be specified.\n" 257185029Spjd "\n" 258185029Spjd "\tzinject [-q] <-t type> [-e errno] [-l level] [-r range]\n" 259185029Spjd "\t [-a] [-m] [-u] [-f freq] <object>\n" 260185029Spjd "\n" 261185029Spjd "\t\tInject an error into the object specified by the '-t' option\n" 262185029Spjd "\t\tand the object descriptor. The 'object' parameter is\n" 263185029Spjd "\t\tinterperted depending on the '-t' option.\n" 264185029Spjd "\n" 265185029Spjd "\t\t-q\tQuiet mode. Only print out the handler number added.\n" 266185029Spjd "\t\t-e\tInject a specific error. Must be either 'io' or\n" 267185029Spjd "\t\t\t'checksum'. Default is 'io'.\n" 268185029Spjd "\t\t-l\tInject error at a particular block level. Default is " 269185029Spjd "0.\n" 270185029Spjd "\t\t-m\tAutomatically remount underlying filesystem.\n" 271185029Spjd "\t\t-r\tInject error over a particular logical range of an\n" 272185029Spjd "\t\t\tobject. Will be translated to the appropriate blkid\n" 273185029Spjd "\t\t\trange according to the object's properties.\n" 274185029Spjd "\t\t-a\tFlush the ARC cache. Can be specified without any\n" 275185029Spjd "\t\t\tassociated object.\n" 276185029Spjd "\t\t-u\tUnload the associated pool. Can be specified with only\n" 277185029Spjd "\t\t\ta pool object.\n" 278185029Spjd "\t\t-f\tOnly inject errors a fraction of the time. Expressed as\n" 279185029Spjd "\t\t\ta percentage between 1 and 100.\n" 280185029Spjd "\n" 281185029Spjd "\t-t data\t\tInject an error into the plain file contents of a\n" 282185029Spjd "\t\t\tfile. The object must be specified as a complete path\n" 283185029Spjd "\t\t\tto a file on a ZFS filesystem.\n" 284185029Spjd "\n" 285185029Spjd "\t-t dnode\tInject an error into the metadnode in the block\n" 286185029Spjd "\t\t\tcorresponding to the dnode for a file or directory. The\n" 287185029Spjd "\t\t\t'-r' option is incompatible with this mode. The object\n" 288185029Spjd "\t\t\tis specified as a complete path to a file or directory\n" 289185029Spjd "\t\t\ton a ZFS filesystem.\n" 290185029Spjd "\n" 291185029Spjd "\t-t <mos>\tInject errors into the MOS for objects of the given\n" 292219089Spjd "\t\t\ttype. Valid types are: mos, mosdir, config, bpobj,\n" 293185029Spjd "\t\t\tspacemap, metaslab, errlog. The only valid <object> is\n" 294185029Spjd "\t\t\tthe poolname.\n"); 295185029Spjd} 296185029Spjd 297185029Spjdstatic int 298185029Spjditer_handlers(int (*func)(int, const char *, zinject_record_t *, void *), 299185029Spjd void *data) 300185029Spjd{ 301239774Smm zfs_cmd_t zc = { 0 }; 302185029Spjd int ret; 303185029Spjd 304185029Spjd while (ioctl(zfs_fd, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0) 305185029Spjd if ((ret = func((int)zc.zc_guid, zc.zc_name, 306185029Spjd &zc.zc_inject_record, data)) != 0) 307185029Spjd return (ret); 308185029Spjd 309219089Spjd if (errno != ENOENT) { 310219089Spjd (void) fprintf(stderr, "Unable to list handlers: %s\n", 311219089Spjd strerror(errno)); 312219089Spjd return (-1); 313219089Spjd } 314219089Spjd 315185029Spjd return (0); 316185029Spjd} 317185029Spjd 318185029Spjdstatic int 319185029Spjdprint_data_handler(int id, const char *pool, zinject_record_t *record, 320185029Spjd void *data) 321185029Spjd{ 322185029Spjd int *count = data; 323185029Spjd 324219089Spjd if (record->zi_guid != 0 || record->zi_func[0] != '\0') 325185029Spjd return (0); 326185029Spjd 327185029Spjd if (*count == 0) { 328185029Spjd (void) printf("%3s %-15s %-6s %-6s %-8s %3s %-15s\n", 329185029Spjd "ID", "POOL", "OBJSET", "OBJECT", "TYPE", "LVL", "RANGE"); 330185029Spjd (void) printf("--- --------------- ------ " 331185029Spjd "------ -------- --- ---------------\n"); 332185029Spjd } 333185029Spjd 334185029Spjd *count += 1; 335185029Spjd 336185029Spjd (void) printf("%3d %-15s %-6llu %-6llu %-8s %3d ", id, pool, 337185029Spjd (u_longlong_t)record->zi_objset, (u_longlong_t)record->zi_object, 338185029Spjd type_to_name(record->zi_type), record->zi_level); 339185029Spjd 340185029Spjd if (record->zi_start == 0 && 341185029Spjd record->zi_end == -1ULL) 342185029Spjd (void) printf("all\n"); 343185029Spjd else 344185029Spjd (void) printf("[%llu, %llu]\n", (u_longlong_t)record->zi_start, 345185029Spjd (u_longlong_t)record->zi_end); 346185029Spjd 347185029Spjd return (0); 348185029Spjd} 349185029Spjd 350185029Spjdstatic int 351185029Spjdprint_device_handler(int id, const char *pool, zinject_record_t *record, 352185029Spjd void *data) 353185029Spjd{ 354185029Spjd int *count = data; 355185029Spjd 356219089Spjd if (record->zi_guid == 0 || record->zi_func[0] != '\0') 357185029Spjd return (0); 358185029Spjd 359185029Spjd if (*count == 0) { 360185029Spjd (void) printf("%3s %-15s %s\n", "ID", "POOL", "GUID"); 361185029Spjd (void) printf("--- --------------- ----------------\n"); 362185029Spjd } 363185029Spjd 364185029Spjd *count += 1; 365185029Spjd 366185029Spjd (void) printf("%3d %-15s %llx\n", id, pool, 367185029Spjd (u_longlong_t)record->zi_guid); 368185029Spjd 369185029Spjd return (0); 370185029Spjd} 371185029Spjd 372219089Spjdstatic int 373219089Spjdprint_panic_handler(int id, const char *pool, zinject_record_t *record, 374219089Spjd void *data) 375219089Spjd{ 376219089Spjd int *count = data; 377219089Spjd 378219089Spjd if (record->zi_func[0] == '\0') 379219089Spjd return (0); 380219089Spjd 381219089Spjd if (*count == 0) { 382219089Spjd (void) printf("%3s %-15s %s\n", "ID", "POOL", "FUNCTION"); 383219089Spjd (void) printf("--- --------------- ----------------\n"); 384219089Spjd } 385219089Spjd 386219089Spjd *count += 1; 387219089Spjd 388219089Spjd (void) printf("%3d %-15s %s\n", id, pool, record->zi_func); 389219089Spjd 390219089Spjd return (0); 391219089Spjd} 392219089Spjd 393185029Spjd/* 394185029Spjd * Print all registered error handlers. Returns the number of handlers 395185029Spjd * registered. 396185029Spjd */ 397185029Spjdstatic int 398185029Spjdprint_all_handlers(void) 399185029Spjd{ 400219089Spjd int count = 0, total = 0; 401185029Spjd 402185029Spjd (void) iter_handlers(print_device_handler, &count); 403219089Spjd if (count > 0) { 404219089Spjd total += count; 405219089Spjd (void) printf("\n"); 406219089Spjd count = 0; 407219089Spjd } 408219089Spjd 409185029Spjd (void) iter_handlers(print_data_handler, &count); 410219089Spjd if (count > 0) { 411219089Spjd total += count; 412219089Spjd (void) printf("\n"); 413219089Spjd count = 0; 414219089Spjd } 415185029Spjd 416219089Spjd (void) iter_handlers(print_panic_handler, &count); 417219089Spjd 418219089Spjd return (count + total); 419185029Spjd} 420185029Spjd 421185029Spjd/* ARGSUSED */ 422185029Spjdstatic int 423185029Spjdcancel_one_handler(int id, const char *pool, zinject_record_t *record, 424185029Spjd void *data) 425185029Spjd{ 426239774Smm zfs_cmd_t zc = { 0 }; 427185029Spjd 428185029Spjd zc.zc_guid = (uint64_t)id; 429185029Spjd 430185029Spjd if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) { 431185029Spjd (void) fprintf(stderr, "failed to remove handler %d: %s\n", 432185029Spjd id, strerror(errno)); 433185029Spjd return (1); 434185029Spjd } 435185029Spjd 436185029Spjd return (0); 437185029Spjd} 438185029Spjd 439185029Spjd/* 440185029Spjd * Remove all fault injection handlers. 441185029Spjd */ 442185029Spjdstatic int 443185029Spjdcancel_all_handlers(void) 444185029Spjd{ 445185029Spjd int ret = iter_handlers(cancel_one_handler, NULL); 446185029Spjd 447219089Spjd if (ret == 0) 448219089Spjd (void) printf("removed all registered handlers\n"); 449185029Spjd 450185029Spjd return (ret); 451185029Spjd} 452185029Spjd 453185029Spjd/* 454185029Spjd * Remove a specific fault injection handler. 455185029Spjd */ 456185029Spjdstatic int 457185029Spjdcancel_handler(int id) 458185029Spjd{ 459239774Smm zfs_cmd_t zc = { 0 }; 460185029Spjd 461185029Spjd zc.zc_guid = (uint64_t)id; 462185029Spjd 463185029Spjd if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) { 464185029Spjd (void) fprintf(stderr, "failed to remove handler %d: %s\n", 465185029Spjd id, strerror(errno)); 466185029Spjd return (1); 467185029Spjd } 468185029Spjd 469185029Spjd (void) printf("removed handler %d\n", id); 470185029Spjd 471185029Spjd return (0); 472185029Spjd} 473185029Spjd 474185029Spjd/* 475185029Spjd * Register a new fault injection handler. 476185029Spjd */ 477185029Spjdstatic int 478185029Spjdregister_handler(const char *pool, int flags, zinject_record_t *record, 479185029Spjd int quiet) 480185029Spjd{ 481239774Smm zfs_cmd_t zc = { 0 }; 482185029Spjd 483185029Spjd (void) strcpy(zc.zc_name, pool); 484185029Spjd zc.zc_inject_record = *record; 485185029Spjd zc.zc_guid = flags; 486185029Spjd 487185029Spjd if (ioctl(zfs_fd, ZFS_IOC_INJECT_FAULT, &zc) != 0) { 488185029Spjd (void) fprintf(stderr, "failed to add handler: %s\n", 489185029Spjd strerror(errno)); 490185029Spjd return (1); 491185029Spjd } 492185029Spjd 493185029Spjd if (flags & ZINJECT_NULL) 494185029Spjd return (0); 495185029Spjd 496185029Spjd if (quiet) { 497185029Spjd (void) printf("%llu\n", (u_longlong_t)zc.zc_guid); 498185029Spjd } else { 499185029Spjd (void) printf("Added handler %llu with the following " 500185029Spjd "properties:\n", (u_longlong_t)zc.zc_guid); 501185029Spjd (void) printf(" pool: %s\n", pool); 502185029Spjd if (record->zi_guid) { 503185029Spjd (void) printf(" vdev: %llx\n", 504185029Spjd (u_longlong_t)record->zi_guid); 505219089Spjd } else if (record->zi_func[0] != '\0') { 506219089Spjd (void) printf(" panic function: %s\n", 507219089Spjd record->zi_func); 508219089Spjd } else if (record->zi_duration > 0) { 509219089Spjd (void) printf(" time: %lld seconds\n", 510219089Spjd (u_longlong_t)record->zi_duration); 511219089Spjd } else if (record->zi_duration < 0) { 512219089Spjd (void) printf(" txgs: %lld \n", 513219089Spjd (u_longlong_t)-record->zi_duration); 514185029Spjd } else { 515185029Spjd (void) printf("objset: %llu\n", 516185029Spjd (u_longlong_t)record->zi_objset); 517185029Spjd (void) printf("object: %llu\n", 518185029Spjd (u_longlong_t)record->zi_object); 519185029Spjd (void) printf(" type: %llu\n", 520185029Spjd (u_longlong_t)record->zi_type); 521185029Spjd (void) printf(" level: %d\n", record->zi_level); 522185029Spjd if (record->zi_start == 0 && 523185029Spjd record->zi_end == -1ULL) 524185029Spjd (void) printf(" range: all\n"); 525185029Spjd else 526185029Spjd (void) printf(" range: [%llu, %llu)\n", 527185029Spjd (u_longlong_t)record->zi_start, 528185029Spjd (u_longlong_t)record->zi_end); 529185029Spjd } 530185029Spjd } 531185029Spjd 532185029Spjd return (0); 533185029Spjd} 534185029Spjd 535185029Spjdint 536219089Spjdperform_action(const char *pool, zinject_record_t *record, int cmd) 537219089Spjd{ 538239774Smm zfs_cmd_t zc = { 0 }; 539219089Spjd 540219089Spjd ASSERT(cmd == VDEV_STATE_DEGRADED || cmd == VDEV_STATE_FAULTED); 541219089Spjd (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name)); 542219089Spjd zc.zc_guid = record->zi_guid; 543219089Spjd zc.zc_cookie = cmd; 544219089Spjd 545219089Spjd if (ioctl(zfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) 546219089Spjd return (0); 547219089Spjd 548219089Spjd return (1); 549219089Spjd} 550219089Spjd 551219089Spjdint 552185029Spjdmain(int argc, char **argv) 553185029Spjd{ 554185029Spjd int c; 555185029Spjd char *range = NULL; 556185029Spjd char *cancel = NULL; 557185029Spjd char *end; 558185029Spjd char *raw = NULL; 559185029Spjd char *device = NULL; 560185029Spjd int level = 0; 561185029Spjd int quiet = 0; 562185029Spjd int error = 0; 563185029Spjd int domount = 0; 564219089Spjd int io_type = ZIO_TYPES; 565219089Spjd int action = VDEV_STATE_UNKNOWN; 566185029Spjd err_type_t type = TYPE_INVAL; 567185029Spjd err_type_t label = TYPE_INVAL; 568185029Spjd zinject_record_t record = { 0 }; 569185029Spjd char pool[MAXNAMELEN]; 570185029Spjd char dataset[MAXNAMELEN]; 571185029Spjd zfs_handle_t *zhp; 572219089Spjd int nowrites = 0; 573219089Spjd int dur_txg = 0; 574219089Spjd int dur_secs = 0; 575185029Spjd int ret; 576185029Spjd int flags = 0; 577185029Spjd 578185029Spjd if ((g_zfs = libzfs_init()) == NULL) { 579185029Spjd (void) fprintf(stderr, "internal error: failed to " 580185029Spjd "initialize ZFS library\n"); 581185029Spjd return (1); 582185029Spjd } 583185029Spjd 584185029Spjd libzfs_print_on_error(g_zfs, B_TRUE); 585185029Spjd 586185029Spjd if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) { 587185029Spjd (void) fprintf(stderr, "failed to open ZFS device\n"); 588185029Spjd return (1); 589185029Spjd } 590185029Spjd 591185029Spjd if (argc == 1) { 592185029Spjd /* 593185029Spjd * No arguments. Print the available handlers. If there are no 594185029Spjd * available handlers, direct the user to '-h' for help 595185029Spjd * information. 596185029Spjd */ 597185029Spjd if (print_all_handlers() == 0) { 598185029Spjd (void) printf("No handlers registered.\n"); 599185029Spjd (void) printf("Run 'zinject -h' for usage " 600185029Spjd "information.\n"); 601185029Spjd } 602185029Spjd 603185029Spjd return (0); 604185029Spjd } 605185029Spjd 606219089Spjd while ((c = getopt(argc, argv, 607247265Smm ":aA:b:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:")) != -1) { 608185029Spjd switch (c) { 609185029Spjd case 'a': 610185029Spjd flags |= ZINJECT_FLUSH_ARC; 611185029Spjd break; 612219089Spjd case 'A': 613219089Spjd if (strcasecmp(optarg, "degrade") == 0) { 614219089Spjd action = VDEV_STATE_DEGRADED; 615219089Spjd } else if (strcasecmp(optarg, "fault") == 0) { 616219089Spjd action = VDEV_STATE_FAULTED; 617219089Spjd } else { 618219089Spjd (void) fprintf(stderr, "invalid action '%s': " 619219089Spjd "must be 'degrade' or 'fault'\n", optarg); 620219089Spjd usage(); 621219089Spjd return (1); 622219089Spjd } 623219089Spjd break; 624185029Spjd case 'b': 625185029Spjd raw = optarg; 626185029Spjd break; 627185029Spjd case 'c': 628185029Spjd cancel = optarg; 629185029Spjd break; 630185029Spjd case 'd': 631185029Spjd device = optarg; 632185029Spjd break; 633247265Smm case 'D': 634247265Smm record.zi_timer = strtoull(optarg, &end, 10); 635247265Smm if (errno != 0 || *end != '\0') { 636247265Smm (void) fprintf(stderr, "invalid i/o delay " 637247265Smm "value: '%s'\n", optarg); 638247265Smm usage(); 639247265Smm return (1); 640247265Smm } 641247265Smm break; 642185029Spjd case 'e': 643185029Spjd if (strcasecmp(optarg, "io") == 0) { 644185029Spjd error = EIO; 645185029Spjd } else if (strcasecmp(optarg, "checksum") == 0) { 646185029Spjd error = ECKSUM; 647185029Spjd } else if (strcasecmp(optarg, "nxio") == 0) { 648185029Spjd error = ENXIO; 649219089Spjd } else if (strcasecmp(optarg, "dtl") == 0) { 650219089Spjd error = ECHILD; 651185029Spjd } else { 652185029Spjd (void) fprintf(stderr, "invalid error type " 653185029Spjd "'%s': must be 'io', 'checksum' or " 654185029Spjd "'nxio'\n", optarg); 655185029Spjd usage(); 656185029Spjd return (1); 657185029Spjd } 658185029Spjd break; 659185029Spjd case 'f': 660185029Spjd record.zi_freq = atoi(optarg); 661185029Spjd if (record.zi_freq < 1 || record.zi_freq > 100) { 662185029Spjd (void) fprintf(stderr, "frequency range must " 663185029Spjd "be in the range (0, 100]\n"); 664185029Spjd return (1); 665185029Spjd } 666185029Spjd break; 667213198Smm case 'F': 668213198Smm record.zi_failfast = B_TRUE; 669213198Smm break; 670219089Spjd case 'g': 671219089Spjd dur_txg = 1; 672219089Spjd record.zi_duration = (int)strtol(optarg, &end, 10); 673219089Spjd if (record.zi_duration <= 0 || *end != '\0') { 674219089Spjd (void) fprintf(stderr, "invalid duration '%s': " 675219089Spjd "must be a positive integer\n", optarg); 676219089Spjd usage(); 677219089Spjd return (1); 678219089Spjd } 679219089Spjd /* store duration of txgs as its negative */ 680219089Spjd record.zi_duration *= -1; 681219089Spjd break; 682185029Spjd case 'h': 683185029Spjd usage(); 684185029Spjd return (0); 685219089Spjd case 'I': 686219089Spjd /* default duration, if one hasn't yet been defined */ 687219089Spjd nowrites = 1; 688219089Spjd if (dur_secs == 0 && dur_txg == 0) 689219089Spjd record.zi_duration = 30; 690219089Spjd break; 691185029Spjd case 'l': 692185029Spjd level = (int)strtol(optarg, &end, 10); 693185029Spjd if (*end != '\0') { 694185029Spjd (void) fprintf(stderr, "invalid level '%s': " 695185029Spjd "must be an integer\n", optarg); 696185029Spjd usage(); 697185029Spjd return (1); 698185029Spjd } 699185029Spjd break; 700185029Spjd case 'm': 701185029Spjd domount = 1; 702185029Spjd break; 703219089Spjd case 'p': 704219089Spjd (void) strlcpy(record.zi_func, optarg, 705219089Spjd sizeof (record.zi_func)); 706247265Smm record.zi_cmd = ZINJECT_PANIC; 707219089Spjd break; 708185029Spjd case 'q': 709185029Spjd quiet = 1; 710185029Spjd break; 711185029Spjd case 'r': 712185029Spjd range = optarg; 713185029Spjd break; 714219089Spjd case 's': 715219089Spjd dur_secs = 1; 716219089Spjd record.zi_duration = (int)strtol(optarg, &end, 10); 717219089Spjd if (record.zi_duration <= 0 || *end != '\0') { 718219089Spjd (void) fprintf(stderr, "invalid duration '%s': " 719219089Spjd "must be a positive integer\n", optarg); 720219089Spjd usage(); 721219089Spjd return (1); 722219089Spjd } 723219089Spjd break; 724219089Spjd case 'T': 725219089Spjd if (strcasecmp(optarg, "read") == 0) { 726219089Spjd io_type = ZIO_TYPE_READ; 727219089Spjd } else if (strcasecmp(optarg, "write") == 0) { 728219089Spjd io_type = ZIO_TYPE_WRITE; 729219089Spjd } else if (strcasecmp(optarg, "free") == 0) { 730219089Spjd io_type = ZIO_TYPE_FREE; 731219089Spjd } else if (strcasecmp(optarg, "claim") == 0) { 732219089Spjd io_type = ZIO_TYPE_CLAIM; 733219089Spjd } else if (strcasecmp(optarg, "all") == 0) { 734219089Spjd io_type = ZIO_TYPES; 735219089Spjd } else { 736219089Spjd (void) fprintf(stderr, "invalid I/O type " 737219089Spjd "'%s': must be 'read', 'write', 'free', " 738219089Spjd "'claim' or 'all'\n", optarg); 739219089Spjd usage(); 740219089Spjd return (1); 741219089Spjd } 742219089Spjd break; 743185029Spjd case 't': 744185029Spjd if ((type = name_to_type(optarg)) == TYPE_INVAL && 745185029Spjd !MOS_TYPE(type)) { 746185029Spjd (void) fprintf(stderr, "invalid type '%s'\n", 747185029Spjd optarg); 748185029Spjd usage(); 749185029Spjd return (1); 750185029Spjd } 751185029Spjd break; 752185029Spjd case 'u': 753185029Spjd flags |= ZINJECT_UNLOAD_SPA; 754185029Spjd break; 755185029Spjd case 'L': 756185029Spjd if ((label = name_to_type(optarg)) == TYPE_INVAL && 757185029Spjd !LABEL_TYPE(type)) { 758185029Spjd (void) fprintf(stderr, "invalid label type " 759185029Spjd "'%s'\n", optarg); 760185029Spjd usage(); 761185029Spjd return (1); 762185029Spjd } 763185029Spjd break; 764185029Spjd case ':': 765185029Spjd (void) fprintf(stderr, "option -%c requires an " 766185029Spjd "operand\n", optopt); 767185029Spjd usage(); 768185029Spjd return (1); 769185029Spjd case '?': 770185029Spjd (void) fprintf(stderr, "invalid option '%c'\n", 771185029Spjd optopt); 772185029Spjd usage(); 773185029Spjd return (2); 774185029Spjd } 775185029Spjd } 776185029Spjd 777185029Spjd argc -= optind; 778185029Spjd argv += optind; 779185029Spjd 780247265Smm if (record.zi_duration != 0) 781247265Smm record.zi_cmd = ZINJECT_IGNORED_WRITES; 782247265Smm 783185029Spjd if (cancel != NULL) { 784185029Spjd /* 785185029Spjd * '-c' is invalid with any other options. 786185029Spjd */ 787185029Spjd if (raw != NULL || range != NULL || type != TYPE_INVAL || 788247265Smm level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED) { 789185029Spjd (void) fprintf(stderr, "cancel (-c) incompatible with " 790185029Spjd "any other options\n"); 791185029Spjd usage(); 792185029Spjd return (2); 793185029Spjd } 794185029Spjd if (argc != 0) { 795185029Spjd (void) fprintf(stderr, "extraneous argument to '-c'\n"); 796185029Spjd usage(); 797185029Spjd return (2); 798185029Spjd } 799185029Spjd 800185029Spjd if (strcmp(cancel, "all") == 0) { 801185029Spjd return (cancel_all_handlers()); 802185029Spjd } else { 803185029Spjd int id = (int)strtol(cancel, &end, 10); 804185029Spjd if (*end != '\0') { 805185029Spjd (void) fprintf(stderr, "invalid handle id '%s':" 806185029Spjd " must be an integer or 'all'\n", cancel); 807185029Spjd usage(); 808185029Spjd return (1); 809185029Spjd } 810185029Spjd return (cancel_handler(id)); 811185029Spjd } 812185029Spjd } 813185029Spjd 814185029Spjd if (device != NULL) { 815185029Spjd /* 816185029Spjd * Device (-d) injection uses a completely different mechanism 817185029Spjd * for doing injection, so handle it separately here. 818185029Spjd */ 819185029Spjd if (raw != NULL || range != NULL || type != TYPE_INVAL || 820247265Smm level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED) { 821185029Spjd (void) fprintf(stderr, "device (-d) incompatible with " 822185029Spjd "data error injection\n"); 823185029Spjd usage(); 824185029Spjd return (2); 825185029Spjd } 826185029Spjd 827185029Spjd if (argc != 1) { 828185029Spjd (void) fprintf(stderr, "device (-d) injection requires " 829185029Spjd "a single pool name\n"); 830185029Spjd usage(); 831185029Spjd return (2); 832185029Spjd } 833185029Spjd 834185029Spjd (void) strcpy(pool, argv[0]); 835185029Spjd dataset[0] = '\0'; 836185029Spjd 837185029Spjd if (error == ECKSUM) { 838185029Spjd (void) fprintf(stderr, "device error type must be " 839185029Spjd "'io' or 'nxio'\n"); 840185029Spjd return (1); 841185029Spjd } 842185029Spjd 843219089Spjd record.zi_iotype = io_type; 844185029Spjd if (translate_device(pool, device, label, &record) != 0) 845185029Spjd return (1); 846185029Spjd if (!error) 847185029Spjd error = ENXIO; 848219089Spjd 849219089Spjd if (action != VDEV_STATE_UNKNOWN) 850219089Spjd return (perform_action(pool, &record, action)); 851219089Spjd 852185029Spjd } else if (raw != NULL) { 853219089Spjd if (range != NULL || type != TYPE_INVAL || level != 0 || 854247265Smm record.zi_cmd != ZINJECT_UNINITIALIZED) { 855185029Spjd (void) fprintf(stderr, "raw (-b) format with " 856185029Spjd "any other options\n"); 857185029Spjd usage(); 858185029Spjd return (2); 859185029Spjd } 860185029Spjd 861185029Spjd if (argc != 1) { 862185029Spjd (void) fprintf(stderr, "raw (-b) format expects a " 863185029Spjd "single pool name\n"); 864185029Spjd usage(); 865185029Spjd return (2); 866185029Spjd } 867185029Spjd 868185029Spjd (void) strcpy(pool, argv[0]); 869185029Spjd dataset[0] = '\0'; 870185029Spjd 871185029Spjd if (error == ENXIO) { 872185029Spjd (void) fprintf(stderr, "data error type must be " 873185029Spjd "'checksum' or 'io'\n"); 874185029Spjd return (1); 875185029Spjd } 876185029Spjd 877247265Smm record.zi_cmd = ZINJECT_DATA_FAULT; 878185029Spjd if (translate_raw(raw, &record) != 0) 879185029Spjd return (1); 880185029Spjd if (!error) 881185029Spjd error = EIO; 882247265Smm } else if (record.zi_cmd == ZINJECT_PANIC) { 883219089Spjd if (raw != NULL || range != NULL || type != TYPE_INVAL || 884247265Smm level != 0 || device != NULL) { 885219089Spjd (void) fprintf(stderr, "panic (-p) incompatible with " 886219089Spjd "other options\n"); 887219089Spjd usage(); 888219089Spjd return (2); 889219089Spjd } 890219089Spjd 891219089Spjd if (argc < 1 || argc > 2) { 892219089Spjd (void) fprintf(stderr, "panic (-p) injection requires " 893219089Spjd "a single pool name and an optional id\n"); 894219089Spjd usage(); 895219089Spjd return (2); 896219089Spjd } 897219089Spjd 898219089Spjd (void) strcpy(pool, argv[0]); 899219089Spjd if (argv[1] != NULL) 900219089Spjd record.zi_type = atoi(argv[1]); 901219089Spjd dataset[0] = '\0'; 902247265Smm } else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) { 903219089Spjd if (nowrites == 0) { 904219089Spjd (void) fprintf(stderr, "-s or -g meaningless " 905219089Spjd "without -I (ignore writes)\n"); 906219089Spjd usage(); 907219089Spjd return (2); 908219089Spjd } else if (dur_secs && dur_txg) { 909219089Spjd (void) fprintf(stderr, "choose a duration either " 910219089Spjd "in seconds (-s) or a number of txgs (-g) " 911219089Spjd "but not both\n"); 912219089Spjd usage(); 913219089Spjd return (2); 914219089Spjd } else if (argc != 1) { 915219089Spjd (void) fprintf(stderr, "ignore writes (-I) " 916219089Spjd "injection requires a single pool name\n"); 917219089Spjd usage(); 918219089Spjd return (2); 919219089Spjd } 920219089Spjd 921219089Spjd (void) strcpy(pool, argv[0]); 922219089Spjd dataset[0] = '\0'; 923185029Spjd } else if (type == TYPE_INVAL) { 924185029Spjd if (flags == 0) { 925185029Spjd (void) fprintf(stderr, "at least one of '-b', '-d', " 926219089Spjd "'-t', '-a', '-p', '-I' or '-u' " 927219089Spjd "must be specified\n"); 928185029Spjd usage(); 929185029Spjd return (2); 930185029Spjd } 931185029Spjd 932185029Spjd if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) { 933185029Spjd (void) strcpy(pool, argv[0]); 934185029Spjd dataset[0] = '\0'; 935185029Spjd } else if (argc != 0) { 936185029Spjd (void) fprintf(stderr, "extraneous argument for " 937185029Spjd "'-f'\n"); 938185029Spjd usage(); 939185029Spjd return (2); 940185029Spjd } 941185029Spjd 942185029Spjd flags |= ZINJECT_NULL; 943185029Spjd } else { 944185029Spjd if (argc != 1) { 945185029Spjd (void) fprintf(stderr, "missing object\n"); 946185029Spjd usage(); 947185029Spjd return (2); 948185029Spjd } 949185029Spjd 950185029Spjd if (error == ENXIO) { 951185029Spjd (void) fprintf(stderr, "data error type must be " 952185029Spjd "'checksum' or 'io'\n"); 953185029Spjd return (1); 954185029Spjd } 955185029Spjd 956247265Smm record.zi_cmd = ZINJECT_DATA_FAULT; 957185029Spjd if (translate_record(type, argv[0], range, level, &record, pool, 958185029Spjd dataset) != 0) 959185029Spjd return (1); 960185029Spjd if (!error) 961185029Spjd error = EIO; 962185029Spjd } 963185029Spjd 964185029Spjd /* 965185029Spjd * If this is pool-wide metadata, unmount everything. The ioctl() will 966185029Spjd * unload the pool, so that we trigger spa-wide reopen of metadata next 967185029Spjd * time we access the pool. 968185029Spjd */ 969185029Spjd if (dataset[0] != '\0' && domount) { 970185029Spjd if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET)) == NULL) 971185029Spjd return (1); 972185029Spjd 973185029Spjd if (zfs_unmount(zhp, NULL, 0) != 0) 974185029Spjd return (1); 975185029Spjd } 976185029Spjd 977185029Spjd record.zi_error = error; 978185029Spjd 979185029Spjd ret = register_handler(pool, flags, &record, quiet); 980185029Spjd 981185029Spjd if (dataset[0] != '\0' && domount) 982185029Spjd ret = (zfs_mount(zhp, NULL, 0) != 0); 983185029Spjd 984185029Spjd libzfs_fini(g_zfs); 985185029Spjd 986185029Spjd return (ret); 987185029Spjd} 988