1/*-
2 * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions, and the following disclaimer,
10 *    without modification.
11 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
12 *    substantially similar to the "NO WARRANTY" disclaimer below
13 *    ("Disclaimer") and any redistribution must be conditioned upon
14 *    including a substantially similar Disclaimer requirement for further
15 *    binary redistribution.
16 *
17 * NO WARRANTY
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
27 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGES.
29 *
30 * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
31 */
32
33/**
34 * \file zfsd_event.cc
35 */
36#include <sys/cdefs.h>
37#include <sys/byteorder.h>
38#include <sys/time.h>
39#include <sys/fs/zfs.h>
40#include <sys/vdev_impl.h>
41
42#include <syslog.h>
43
44#include <libzfs.h>
45#include <libzutil.h>
46/*
47 * Undefine flush, defined by cpufunc.h on sparc64, because it conflicts with
48 * C++ flush methods
49 */
50#undef   flush
51#undef	__init
52#include <list>
53#include <map>
54#include <sstream>
55#include <string>
56
57#include <devdctl/guid.h>
58#include <devdctl/event.h>
59#include <devdctl/event_factory.h>
60#include <devdctl/exception.h>
61#include <devdctl/consumer.h>
62
63#include "callout.h"
64#include "vdev_iterator.h"
65#include "zfsd_event.h"
66#include "case_file.h"
67#include "vdev.h"
68#include "zfsd.h"
69#include "zfsd_exception.h"
70#include "zpool_list.h"
71/*============================ Namespace Control =============================*/
72using DevdCtl::Event;
73using DevdCtl::Guid;
74using DevdCtl::NVPairMap;
75using std::stringstream;
76
77/*=========================== Class Implementations ==========================*/
78
79/*-------------------------------- GeomEvent --------------------------------*/
80
81//- GeomEvent Static Public Methods -------------------------------------------
82Event *
83GeomEvent::Builder(Event::Type type,
84		   NVPairMap &nvPairs,
85		   const string &eventString)
86{
87	return (new GeomEvent(type, nvPairs, eventString));
88}
89
90//- GeomEvent Virtual Public Methods ------------------------------------------
91Event *
92GeomEvent::DeepCopy() const
93{
94	return (new GeomEvent(*this));
95}
96
97bool
98GeomEvent::Process() const
99{
100	/*
101	 * We only use GEOM events to repair damaged pools.  So return early if
102	 * there are no damaged pools
103	 */
104	if (CaseFile::Empty())
105		return (false);
106
107	/*
108	 * We are only concerned with arrivals and physical path changes,
109	 * because those can be used to satisfy online and autoreplace
110	 * operations
111	 */
112	if (Value("type") != "GEOM::physpath" && Value("type") != "CREATE")
113		return (false);
114
115	/* Log the event since it is of interest. */
116	Log(LOG_INFO);
117
118	string devPath;
119	if (!DevPath(devPath))
120		return (false);
121
122	int devFd(open(devPath.c_str(), O_RDONLY));
123	if (devFd == -1)
124		return (false);
125
126	bool inUse;
127	bool degraded;
128	nvlist_t *devLabel(ReadLabel(devFd, inUse, degraded));
129
130	string physPath;
131        bool havePhysPath(PhysicalPath(physPath));
132
133	string devName;
134	DevName(devName);
135	close(devFd);
136
137	if (inUse && devLabel != NULL) {
138		OnlineByLabel(devPath, physPath, devLabel);
139	} else if (degraded) {
140		syslog(LOG_INFO, "%s is marked degraded.  Ignoring "
141		       "as a replace by physical path candidate.\n",
142		       devName.c_str());
143	} else if (havePhysPath) {
144		/*
145		 * TODO: attempt to resolve events using every casefile
146		 * that matches this physpath
147		 */
148		CaseFile *caseFile(CaseFile::Find(physPath));
149		if (caseFile != NULL) {
150			syslog(LOG_INFO,
151			       "Found CaseFile(%s:%s:%s) - ReEvaluating\n",
152			       caseFile->PoolGUIDString().c_str(),
153			       caseFile->VdevGUIDString().c_str(),
154			       zpool_state_to_name(caseFile->VdevState(),
155						   VDEV_AUX_NONE));
156			caseFile->ReEvaluate(devPath, physPath, /*vdev*/NULL);
157		}
158	}
159	return (false);
160}
161
162//- GeomEvent Protected Methods -----------------------------------------------
163GeomEvent::GeomEvent(Event::Type type, NVPairMap &nvpairs,
164			       const string &eventString)
165 : DevdCtl::GeomEvent(type, nvpairs, eventString)
166{
167}
168
169GeomEvent::GeomEvent(const GeomEvent &src)
170 : DevdCtl::GeomEvent::GeomEvent(src)
171{
172}
173
174nvlist_t *
175GeomEvent::ReadLabel(int devFd, bool &inUse, bool &degraded)
176{
177	pool_state_t poolState;
178	char        *poolName;
179	boolean_t    b_inuse;
180	int          nlabels;
181
182	inUse    = false;
183	degraded = false;
184	poolName = NULL;
185	if (zpool_in_use(g_zfsHandle, devFd, &poolState,
186			 &poolName, &b_inuse) == 0) {
187		nvlist_t *devLabel = NULL;
188
189		inUse = b_inuse == B_TRUE;
190		if (poolName != NULL)
191			free(poolName);
192
193		if (zpool_read_label(devFd, &devLabel, &nlabels) != 0)
194			return (NULL);
195		/*
196		 * If we find a disk with fewer than the maximum number of
197		 * labels, it might be the whole disk of a partitioned disk
198		 * where ZFS resides on a partition.  In that case, we should do
199		 * nothing and wait for the partition to appear.  Or, the disk
200		 * might be damaged.  In that case, zfsd should do nothing and
201		 * wait for the sysadmin to decide.
202		 */
203		if (nlabels != VDEV_LABELS || devLabel == NULL) {
204			nvlist_free(devLabel);
205			return (NULL);
206		}
207
208		try {
209			Vdev vdev(devLabel);
210			degraded = vdev.State() != VDEV_STATE_HEALTHY;
211			return (devLabel);
212		} catch (ZfsdException &exp) {
213			string devName = fdevname(devFd);
214			string devPath = _PATH_DEV + devName;
215			string context("GeomEvent::ReadLabel: "
216				     + devPath + ": ");
217
218			exp.GetString().insert(0, context);
219			exp.Log();
220			nvlist_free(devLabel);
221		}
222	}
223	return (NULL);
224}
225
226bool
227GeomEvent::OnlineByLabel(const string &devPath, const string& physPath,
228			      nvlist_t *devConfig)
229{
230	bool ret = false;
231	try {
232		CaseFileList case_list;
233		/*
234		 * A device with ZFS label information has been
235		 * inserted.  If it matches a device for which we
236		 * have a case, see if we can solve that case.
237		 */
238		syslog(LOG_INFO, "Interrogating VDEV label for %s\n",
239		       devPath.c_str());
240		Vdev vdev(devConfig);
241		CaseFile::Find(vdev.PoolGUID(),vdev.GUID(), case_list);
242		for (CaseFileList::iterator curr = case_list.begin();
243		    curr != case_list.end(); curr++) {
244			ret |= (*curr)->ReEvaluate(devPath, physPath, &vdev);
245		}
246		return (ret);
247
248	} catch (ZfsdException &exp) {
249		string context("GeomEvent::OnlineByLabel: " + devPath + ": ");
250
251		exp.GetString().insert(0, context);
252		exp.Log();
253	}
254	return (ret);
255}
256
257
258/*--------------------------------- ZfsEvent ---------------------------------*/
259//- ZfsEvent Static Public Methods ---------------------------------------------
260DevdCtl::Event *
261ZfsEvent::Builder(Event::Type type, NVPairMap &nvpairs,
262		  const string &eventString)
263{
264	return (new ZfsEvent(type, nvpairs, eventString));
265}
266
267//- ZfsEvent Virtual Public Methods --------------------------------------------
268Event *
269ZfsEvent::DeepCopy() const
270{
271	return (new ZfsEvent(*this));
272}
273
274bool
275ZfsEvent::Process() const
276{
277	string logstr("");
278
279	if (!Contains("class") && !Contains("type")) {
280		syslog(LOG_ERR,
281		       "ZfsEvent::Process: Missing class or type data.");
282		return (false);
283	}
284
285	/* On config syncs, replay any queued events first. */
286	if (Value("type").find("sysevent.fs.zfs.config_sync") == 0) {
287		/*
288		 * Even if saved events are unconsumed the second time
289		 * around, drop them.  Any events that still can't be
290		 * consumed are probably referring to vdevs or pools that
291		 * no longer exist.
292		 */
293		ZfsDaemon::Get().ReplayUnconsumedEvents(/*discard*/true);
294		CaseFile::ReEvaluateByGuid(PoolGUID(), *this);
295	}
296
297	if (Value("type").find("sysevent.fs.zfs.") == 0) {
298		/* Configuration changes, resilver events, etc. */
299		ProcessPoolEvent();
300		return (false);
301	}
302
303	if (!Contains("pool_guid") || !Contains("vdev_guid")) {
304		/* Only currently interested in Vdev related events. */
305		return (false);
306	}
307
308	CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID()));
309	if (caseFile != NULL) {
310		Log(LOG_INFO);
311		syslog(LOG_INFO, "Evaluating existing case file\n");
312		caseFile->ReEvaluate(*this);
313		return (false);
314	}
315
316	/* Skip events that can't be handled. */
317	Guid poolGUID(PoolGUID());
318	/* If there are no replicas for a pool, then it's not manageable. */
319	if (Value("class").find("fs.zfs.vdev.no_replicas") == 0) {
320		stringstream msg;
321		msg << "No replicas available for pool "  << poolGUID;
322		msg << ", ignoring";
323		Log(LOG_INFO);
324		syslog(LOG_INFO, "%s", msg.str().c_str());
325		return (false);
326	}
327
328	/*
329	 * Create a case file for this vdev, and have it
330	 * evaluate the event.
331	 */
332	ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
333	if (zpl.empty()) {
334		stringstream msg;
335		int priority = LOG_INFO;
336		msg << "ZfsEvent::Process: Event for unknown pool ";
337		msg << poolGUID << " ";
338		msg << "queued";
339		Log(LOG_INFO);
340		syslog(priority, "%s", msg.str().c_str());
341		return (true);
342	}
343
344	nvlist_t *vdevConfig = VdevIterator(zpl.front()).Find(VdevGUID());
345	if (vdevConfig == NULL) {
346		stringstream msg;
347		int priority = LOG_INFO;
348		msg << "ZfsEvent::Process: Event for unknown vdev ";
349		msg << VdevGUID() << " ";
350		msg << "queued";
351		Log(LOG_INFO);
352		syslog(priority, "%s", msg.str().c_str());
353		return (true);
354	}
355
356	Vdev vdev(zpl.front(), vdevConfig);
357	caseFile = &CaseFile::Create(vdev);
358	if (caseFile->ReEvaluate(*this) == false) {
359		stringstream msg;
360		int priority = LOG_INFO;
361		msg << "ZfsEvent::Process: Unconsumed event for vdev(";
362		msg << zpool_get_name(zpl.front()) << ",";
363		msg << vdev.GUID() << ") ";
364		msg << "queued";
365		Log(LOG_INFO);
366		syslog(priority, "%s", msg.str().c_str());
367		return (true);
368	}
369	return (false);
370}
371
372//- ZfsEvent Protected Methods -------------------------------------------------
373ZfsEvent::ZfsEvent(Event::Type type, NVPairMap &nvpairs,
374			   const string &eventString)
375 : DevdCtl::ZfsEvent(type, nvpairs, eventString)
376{
377}
378
379ZfsEvent::ZfsEvent(const ZfsEvent &src)
380 : DevdCtl::ZfsEvent(src)
381{
382}
383
384/*
385 * Sometimes the kernel won't detach a spare when it is no longer needed.  This
386 * can happen for example if a drive is removed, then either the pool is
387 * exported or the machine is powered off, then the drive is reinserted, then
388 * the machine is powered on or the pool is imported.  ZFSD must detach these
389 * spares itself.
390 */
391void
392ZfsEvent::CleanupSpares() const
393{
394	Guid poolGUID(PoolGUID());
395	ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
396	if (!zpl.empty()) {
397		zpool_handle_t* hdl;
398
399		hdl = zpl.front();
400		VdevIterator(hdl).Each(TryDetach, (void*)hdl);
401	}
402}
403
404void
405ZfsEvent::ProcessPoolEvent() const
406{
407	bool degradedDevice(false);
408
409	/* The pool is destroyed.  Discard any open cases */
410	if (Value("type") == "sysevent.fs.zfs.pool_destroy") {
411		Log(LOG_INFO);
412		CaseFile::ReEvaluateByGuid(PoolGUID(), *this);
413		return;
414	}
415
416	CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID()));
417	if (caseFile != NULL) {
418		if (caseFile->VdevState() != VDEV_STATE_UNKNOWN
419		 && caseFile->VdevState() < VDEV_STATE_HEALTHY)
420			degradedDevice = true;
421
422		Log(LOG_INFO);
423		caseFile->ReEvaluate(*this);
424	}
425	else if (Value("type") == "sysevent.fs.zfs.resilver_finish")
426	{
427		/*
428		 * It's possible to get a resilver_finish event with no
429		 * corresponding casefile.  For example, if a damaged pool were
430		 * exported, repaired, then reimported.
431		 */
432		Log(LOG_INFO);
433		CleanupSpares();
434	}
435
436	if (Value("type") == "sysevent.fs.zfs.vdev_remove"
437	 && degradedDevice == false) {
438
439		/* See if any other cases can make use of this device. */
440		Log(LOG_INFO);
441		ZfsDaemon::RequestSystemRescan();
442	}
443}
444
445bool
446ZfsEvent::TryDetach(Vdev &vdev, void *cbArg)
447{
448	/*
449	 * Outline:
450	 * if this device is a spare, and its parent includes one healthy,
451	 * non-spare child, then detach this device.
452	 */
453	zpool_handle_t *hdl(static_cast<zpool_handle_t*>(cbArg));
454
455	if (vdev.IsSpare()) {
456		std::list<Vdev> siblings;
457		std::list<Vdev>::iterator siblings_it;
458		boolean_t cleanup = B_FALSE;
459
460		Vdev parent = vdev.Parent();
461		siblings = parent.Children();
462
463		/* Determine whether the parent should be cleaned up */
464		for (siblings_it = siblings.begin();
465		     siblings_it != siblings.end();
466		     siblings_it++) {
467			Vdev sibling = *siblings_it;
468
469			if (!sibling.IsSpare() &&
470			     sibling.State() == VDEV_STATE_HEALTHY) {
471				cleanup = B_TRUE;
472				break;
473			}
474		}
475
476		if (cleanup) {
477			syslog(LOG_INFO, "Detaching spare vdev %s from pool %s",
478			       vdev.Path().c_str(), zpool_get_name(hdl));
479			zpool_vdev_detach(hdl, vdev.Path().c_str());
480		}
481
482	}
483
484	/* Always return false, because there may be other spares to detach */
485	return (false);
486}
487