case_file.cc revision 314431
1/*-
2 * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions, and the following disclaimer,
10 *    without modification.
11 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
12 *    substantially similar to the "NO WARRANTY" disclaimer below
13 *    ("Disclaimer") and any redistribution must be conditioned upon
14 *    including a substantially similar Disclaimer requirement for further
15 *    binary redistribution.
16 *
17 * NO WARRANTY
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
27 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGES.
29 *
30 * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
31 */
32
33/**
34 * \file case_file.cc
35 *
36 * We keep case files for any leaf vdev that is not in the optimal state.
37 * However, we only serialize to disk those events that need to be preserved
38 * across reboots.  For now, this is just a log of soft errors which we
39 * accumulate in order to mark a device as degraded.
40 */
41#include <sys/cdefs.h>
42#include <sys/time.h>
43
44#include <sys/fs/zfs.h>
45
46#include <dirent.h>
47#include <iomanip>
48#include <fstream>
49#include <functional>
50#include <sstream>
51#include <syslog.h>
52#include <unistd.h>
53
54#include <libzfs.h>
55
56#include <list>
57#include <map>
58#include <string>
59
60#include <devdctl/guid.h>
61#include <devdctl/event.h>
62#include <devdctl/event_factory.h>
63#include <devdctl/exception.h>
64#include <devdctl/consumer.h>
65
66#include "callout.h"
67#include "vdev_iterator.h"
68#include "zfsd_event.h"
69#include "case_file.h"
70#include "vdev.h"
71#include "zfsd.h"
72#include "zfsd_exception.h"
73#include "zpool_list.h"
74
75__FBSDID("$FreeBSD: stable/11/cddl/usr.sbin/zfsd/case_file.cc 314431 2017-02-28 23:03:51Z asomers $");
76
77/*============================ Namespace Control =============================*/
78using std::auto_ptr;
79using std::hex;
80using std::ifstream;
81using std::stringstream;
82using std::setfill;
83using std::setw;
84
85using DevdCtl::Event;
86using DevdCtl::EventFactory;
87using DevdCtl::EventList;
88using DevdCtl::Guid;
89using DevdCtl::ParseException;
90
91/*--------------------------------- CaseFile ---------------------------------*/
92//- CaseFile Static Data -------------------------------------------------------
93
94CaseFileList  CaseFile::s_activeCases;
95const string  CaseFile::s_caseFilePath = "/var/db/zfsd/cases";
96const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/};
97
98//- CaseFile Static Public Methods ---------------------------------------------
99CaseFile *
100CaseFile::Find(Guid poolGUID, Guid vdevGUID)
101{
102	for (CaseFileList::iterator curCase = s_activeCases.begin();
103	     curCase != s_activeCases.end(); curCase++) {
104
105		if ((*curCase)->PoolGUID() != poolGUID
106		 || (*curCase)->VdevGUID() != vdevGUID)
107			continue;
108
109		/*
110		 * We only carry one active case per-vdev.
111		 */
112		return (*curCase);
113	}
114	return (NULL);
115}
116
117CaseFile *
118CaseFile::Find(const string &physPath)
119{
120	CaseFile *result = NULL;
121
122	for (CaseFileList::iterator curCase = s_activeCases.begin();
123	     curCase != s_activeCases.end(); curCase++) {
124
125		if ((*curCase)->PhysicalPath() != physPath)
126			continue;
127
128		if (result != NULL) {
129			syslog(LOG_WARNING, "Multiple casefiles found for "
130			    "physical path %s.  "
131			    "This is most likely a bug in zfsd",
132			    physPath.c_str());
133		}
134		result = *curCase;
135	}
136	return (result);
137}
138
139
140void
141CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event)
142{
143	CaseFileList::iterator casefile;
144	for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){
145		CaseFileList::iterator next = casefile;
146		next++;
147		if (poolGUID == (*casefile)->PoolGUID())
148			(*casefile)->ReEvaluate(event);
149		casefile = next;
150	}
151}
152
153CaseFile &
154CaseFile::Create(Vdev &vdev)
155{
156	CaseFile *activeCase;
157
158	activeCase = Find(vdev.PoolGUID(), vdev.GUID());
159	if (activeCase == NULL)
160		activeCase = new CaseFile(vdev);
161
162	return (*activeCase);
163}
164
165void
166CaseFile::DeSerialize()
167{
168	struct dirent **caseFiles;
169
170	int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles,
171			 DeSerializeSelector, /*compar*/NULL));
172
173	if (numCaseFiles == -1)
174		return;
175	if (numCaseFiles == 0) {
176		free(caseFiles);
177		return;
178	}
179
180	for (int i = 0; i < numCaseFiles; i++) {
181
182		DeSerializeFile(caseFiles[i]->d_name);
183		free(caseFiles[i]);
184	}
185	free(caseFiles);
186}
187
188void
189CaseFile::LogAll()
190{
191	for (CaseFileList::iterator curCase = s_activeCases.begin();
192	     curCase != s_activeCases.end(); curCase++)
193		(*curCase)->Log();
194}
195
196void
197CaseFile::PurgeAll()
198{
199	/*
200	 * Serialize casefiles before deleting them so that they can be reread
201	 * and revalidated during BuildCaseFiles.
202	 * CaseFiles remove themselves from this list on destruction.
203	 */
204	while (s_activeCases.size() != 0) {
205		CaseFile *casefile = s_activeCases.front();
206		casefile->Serialize();
207		delete casefile;
208	}
209
210}
211
212//- CaseFile Public Methods ----------------------------------------------------
213bool
214CaseFile::RefreshVdevState()
215{
216	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
217	zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front());
218	if (casePool == NULL)
219		return (false);
220
221	Vdev vd(casePool, CaseVdev(casePool));
222	if (vd.DoesNotExist())
223		return (false);
224
225	m_vdevState    = vd.State();
226	m_vdevPhysPath = vd.PhysicalPath();
227	return (true);
228}
229
230bool
231CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev)
232{
233	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
234	zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front());
235
236	if (pool == NULL || !RefreshVdevState()) {
237		/*
238		 * The pool or vdev for this case file is no longer
239		 * part of the configuration.  This can happen
240		 * if we process a device arrival notification
241		 * before seeing the ZFS configuration change
242		 * event.
243		 */
244		syslog(LOG_INFO,
245		       "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured.  "
246		       "Closing\n",
247		       PoolGUIDString().c_str(),
248		       VdevGUIDString().c_str());
249		Close();
250
251		/*
252		 * Since this event was not used to close this
253		 * case, do not report it as consumed.
254		 */
255		return (/*consumed*/false);
256	}
257
258	if (VdevState() > VDEV_STATE_CANT_OPEN) {
259		/*
260		 * For now, newly discovered devices only help for
261		 * devices that are missing.  In the future, we might
262		 * use a newly inserted spare to replace a degraded
263		 * or faulted device.
264		 */
265		syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored",
266		    PoolGUIDString().c_str(), VdevGUIDString().c_str());
267		return (/*consumed*/false);
268	}
269
270	if (vdev != NULL
271	 && vdev->PoolGUID() == m_poolGUID
272	 && vdev->GUID() == m_vdevGUID) {
273
274		zpool_vdev_online(pool, vdev->GUIDString().c_str(),
275				  ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE,
276				  &m_vdevState);
277		syslog(LOG_INFO, "Onlined vdev(%s/%s:%s).  State now %s.\n",
278		       zpool_get_name(pool), vdev->GUIDString().c_str(),
279		       devPath.c_str(),
280		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
281
282		/*
283		 * Check the vdev state post the online action to see
284		 * if we can retire this case.
285		 */
286		CloseIfSolved();
287
288		return (/*consumed*/true);
289	}
290
291	/*
292	 * If the auto-replace policy is enabled, and we have physical
293	 * path information, try a physical path replacement.
294	 */
295	if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) {
296		syslog(LOG_INFO,
297		       "CaseFile(%s:%s:%s): AutoReplace not set.  "
298		       "Ignoring device insertion.\n",
299		       PoolGUIDString().c_str(),
300		       VdevGUIDString().c_str(),
301		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
302		return (/*consumed*/false);
303	}
304
305	if (PhysicalPath().empty()) {
306		syslog(LOG_INFO,
307		       "CaseFile(%s:%s:%s): No physical path information.  "
308		       "Ignoring device insertion.\n",
309		       PoolGUIDString().c_str(),
310		       VdevGUIDString().c_str(),
311		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
312		return (/*consumed*/false);
313	}
314
315	if (physPath != PhysicalPath()) {
316		syslog(LOG_INFO,
317		       "CaseFile(%s:%s:%s): Physical path mismatch.  "
318		       "Ignoring device insertion.\n",
319		       PoolGUIDString().c_str(),
320		       VdevGUIDString().c_str(),
321		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
322		return (/*consumed*/false);
323	}
324
325	/* Write a label on the newly inserted disk. */
326	if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) {
327		syslog(LOG_ERR,
328		       "Replace vdev(%s/%s) by physical path (label): %s: %s\n",
329		       zpool_get_name(pool), VdevGUIDString().c_str(),
330		       libzfs_error_action(g_zfsHandle),
331		       libzfs_error_description(g_zfsHandle));
332		return (/*consumed*/false);
333	}
334
335	syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s",
336	    PoolGUIDString().c_str(), VdevGUIDString().c_str(),
337	    devPath.c_str());
338	return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false));
339}
340
341bool
342CaseFile::ReEvaluate(const ZfsEvent &event)
343{
344	bool consumed(false);
345
346	if (event.Value("type") == "misc.fs.zfs.vdev_remove") {
347		/*
348		 * The Vdev we represent has been removed from the
349		 * configuration.  This case is no longer of value.
350		 */
351		Close();
352
353		return (/*consumed*/true);
354	} else if (event.Value("type") == "misc.fs.zfs.pool_destroy") {
355		/* This Pool has been destroyed.  Discard the case */
356		Close();
357
358		return (/*consumed*/true);
359	} else if (event.Value("type") == "misc.fs.zfs.config_sync") {
360		RefreshVdevState();
361		if (VdevState() < VDEV_STATE_HEALTHY)
362			consumed = ActivateSpare();
363	}
364
365
366	if (event.Value("class") == "resource.fs.zfs.removed") {
367		bool spare_activated;
368
369		if (!RefreshVdevState()) {
370			/*
371			 * The pool or vdev for this case file is no longer
372			 * part of the configuration.  This can happen
373			 * if we process a device arrival notification
374			 * before seeing the ZFS configuration change
375			 * event.
376			 */
377			syslog(LOG_INFO,
378			       "CaseFile::ReEvaluate(%s,%s) Pool/Vdev "
379			       "unconfigured.  Closing\n",
380			       PoolGUIDString().c_str(),
381			       VdevGUIDString().c_str());
382			/*
383			 * Close the case now so we won't waste cycles in the
384			 * system rescan
385			 */
386			Close();
387
388			/*
389			 * Since this event was not used to close this
390			 * case, do not report it as consumed.
391			 */
392			return (/*consumed*/false);
393		}
394
395		/*
396		 * Discard any tentative I/O error events for
397		 * this case.  They were most likely caused by the
398		 * hot-unplug of this device.
399		 */
400		PurgeTentativeEvents();
401
402		/* Try to activate spares if they are available */
403		spare_activated = ActivateSpare();
404
405		/*
406		 * Rescan the drives in the system to see if a recent
407		 * drive arrival can be used to solve this case.
408		 */
409		ZfsDaemon::RequestSystemRescan();
410
411		/*
412		 * Consume the event if we successfully activated a spare.
413		 * Otherwise, leave it in the unconsumed events list so that the
414		 * future addition of a spare to this pool might be able to
415		 * close the case
416		 */
417		consumed = spare_activated;
418	} else if (event.Value("class") == "resource.fs.zfs.statechange") {
419		RefreshVdevState();
420		/*
421		 * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to
422		 * activate a hotspare.  Otherwise, ignore the event
423		 */
424		if (VdevState() == VDEV_STATE_FAULTED ||
425		    VdevState() == VDEV_STATE_DEGRADED ||
426		    VdevState() == VDEV_STATE_CANT_OPEN)
427			(void) ActivateSpare();
428		consumed = true;
429	}
430	else if (event.Value("class") == "ereport.fs.zfs.io" ||
431	         event.Value("class") == "ereport.fs.zfs.checksum") {
432
433		m_tentativeEvents.push_front(event.DeepCopy());
434		RegisterCallout(event);
435		consumed = true;
436	}
437
438	bool closed(CloseIfSolved());
439
440	return (consumed || closed);
441}
442
443
444bool
445CaseFile::ActivateSpare() {
446	nvlist_t	*config, *nvroot;
447	nvlist_t       **spares;
448	char		*devPath, *vdev_type;
449	const char	*poolname;
450	u_int		 nspares, i;
451	int		 error;
452
453	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
454	zpool_handle_t	*zhp(zpl.empty() ? NULL : zpl.front());
455	if (zhp == NULL) {
456		syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
457		       "for pool_guid %" PRIu64".", (uint64_t)m_poolGUID);
458		return (false);
459	}
460	poolname = zpool_get_name(zhp);
461	config = zpool_get_config(zhp, NULL);
462	if (config == NULL) {
463		syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
464		       "config for pool %s", poolname);
465		return (false);
466	}
467	error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot);
468	if (error != 0){
469		syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev "
470		       "tree for pool %s", poolname);
471		return (false);
472	}
473	nspares = 0;
474	nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
475				   &nspares);
476	if (nspares == 0) {
477		/* The pool has no spares configured */
478		syslog(LOG_INFO, "CaseFile::ActivateSpare: "
479		       "No spares available for pool %s", poolname);
480		return (false);
481	}
482	for (i = 0; i < nspares; i++) {
483		uint64_t    *nvlist_array;
484		vdev_stat_t *vs;
485		uint_t	     nstats;
486
487		if (nvlist_lookup_uint64_array(spares[i],
488		    ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) {
489			syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not "
490			       "find vdev stats for pool %s, spare %d",
491			       poolname, i);
492			return (false);
493		}
494		vs = reinterpret_cast<vdev_stat_t *>(nvlist_array);
495
496		if ((vs->vs_aux != VDEV_AUX_SPARED)
497		 && (vs->vs_state == VDEV_STATE_HEALTHY)) {
498			/* We found a usable spare */
499			break;
500		}
501	}
502
503	if (i == nspares) {
504		/* No available spares were found */
505		return (false);
506	}
507
508	error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath);
509	if (error != 0) {
510		syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
511		       "the path of pool %s, spare %d. Error %d",
512		       poolname, i, error);
513		return (false);
514	}
515
516	error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type);
517	if (error != 0) {
518		syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
519		       "the vdev type of pool %s, spare %d. Error %d",
520		       poolname, i, error);
521		return (false);
522	}
523
524	return (Replace(vdev_type, devPath, /*isspare*/true));
525}
526
527void
528CaseFile::RegisterCallout(const Event &event)
529{
530	timeval now, countdown, elapsed, timestamp, zero, remaining;
531
532	gettimeofday(&now, 0);
533	timestamp = event.GetTimestamp();
534	timersub(&now, &timestamp, &elapsed);
535	timersub(&s_removeGracePeriod, &elapsed, &countdown);
536	/*
537	 * If countdown is <= zero, Reset the timer to the
538	 * smallest positive time value instead
539	 */
540	timerclear(&zero);
541	if (timercmp(&countdown, &zero, <=)) {
542		timerclear(&countdown);
543		countdown.tv_usec = 1;
544	}
545
546	remaining = m_tentativeTimer.TimeRemaining();
547
548	if (!m_tentativeTimer.IsPending()
549	 || timercmp(&countdown, &remaining, <))
550		m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this);
551}
552
553
554bool
555CaseFile::CloseIfSolved()
556{
557	if (m_events.empty()
558	 && m_tentativeEvents.empty()) {
559
560		/*
561		 * We currently do not track or take actions on
562		 * devices in the degraded or faulted state.
563		 * Once we have support for spare pools, we'll
564		 * retain these cases so that any spares added in
565		 * the future can be applied to them.
566		 */
567		switch (VdevState()) {
568		case VDEV_STATE_HEALTHY:
569			/* No need to keep cases for healthy vdevs */
570			Close();
571			return (true);
572		case VDEV_STATE_REMOVED:
573		case VDEV_STATE_CANT_OPEN:
574			/*
575			 * Keep open.  We may solve it with a newly inserted
576			 * device.
577			 */
578		case VDEV_STATE_FAULTED:
579		case VDEV_STATE_DEGRADED:
580			/*
581			 * Keep open.  We may solve it with the future
582			 * addition of a spare to the pool
583			 */
584		case VDEV_STATE_UNKNOWN:
585		case VDEV_STATE_CLOSED:
586		case VDEV_STATE_OFFLINE:
587			/*
588			 * Keep open?  This may not be the correct behavior,
589			 * but it's what we've always done
590			 */
591			;
592		}
593
594		/*
595		 * Re-serialize the case in order to remove any
596		 * previous event data.
597		 */
598		Serialize();
599	}
600
601	return (false);
602}
603
604void
605CaseFile::Log()
606{
607	syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(),
608	       VdevGUIDString().c_str(), PhysicalPath().c_str());
609	syslog(LOG_INFO, "\tVdev State = %s\n",
610	       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
611	if (m_tentativeEvents.size() != 0) {
612		syslog(LOG_INFO, "\t=== Tentative Events ===\n");
613		for (EventList::iterator event(m_tentativeEvents.begin());
614		     event != m_tentativeEvents.end(); event++)
615			(*event)->Log(LOG_INFO);
616	}
617	if (m_events.size() != 0) {
618		syslog(LOG_INFO, "\t=== Events ===\n");
619		for (EventList::iterator event(m_events.begin());
620		     event != m_events.end(); event++)
621			(*event)->Log(LOG_INFO);
622	}
623}
624
625//- CaseFile Static Protected Methods ------------------------------------------
626void
627CaseFile::OnGracePeriodEnded(void *arg)
628{
629	CaseFile &casefile(*static_cast<CaseFile *>(arg));
630
631	casefile.OnGracePeriodEnded();
632}
633
634int
635CaseFile::DeSerializeSelector(const struct dirent *dirEntry)
636{
637	uint64_t poolGUID;
638	uint64_t vdevGUID;
639
640	if (dirEntry->d_type == DT_REG
641	 && sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
642		   &poolGUID, &vdevGUID) == 2)
643		return (1);
644	return (0);
645}
646
647void
648CaseFile::DeSerializeFile(const char *fileName)
649{
650	string	  fullName(s_caseFilePath + '/' + fileName);
651	CaseFile *existingCaseFile(NULL);
652	CaseFile *caseFile(NULL);
653
654	try {
655		uint64_t poolGUID;
656		uint64_t vdevGUID;
657		nvlist_t *vdevConf;
658
659		if (sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
660		       &poolGUID, &vdevGUID) != 2) {
661			throw ZfsdException("CaseFile::DeSerialize: "
662			    "Unintelligible CaseFile filename %s.\n", fileName);
663		}
664		existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID));
665		if (existingCaseFile != NULL) {
666			/*
667			 * If the vdev is already degraded or faulted,
668			 * there's no point in keeping the state around
669			 * that we use to put a drive into the degraded
670			 * state.  However, if the vdev is simply missing,
671			 * preserve the case data in the hopes that it will
672			 * return.
673			 */
674			caseFile = existingCaseFile;
675			vdev_state curState(caseFile->VdevState());
676			if (curState > VDEV_STATE_CANT_OPEN
677			 && curState < VDEV_STATE_HEALTHY) {
678				unlink(fileName);
679				return;
680			}
681		} else {
682			ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
683			if (zpl.empty()
684			 || (vdevConf = VdevIterator(zpl.front())
685						    .Find(vdevGUID)) == NULL) {
686				/*
687				 * Either the pool no longer exists
688				 * or this vdev is no longer a member of
689				 * the pool.
690				 */
691				unlink(fullName.c_str());
692				return;
693			}
694
695			/*
696			 * Any vdev we find that does not have a case file
697			 * must be in the healthy state and thus worthy of
698			 * continued SERD data tracking.
699			 */
700			caseFile = new CaseFile(Vdev(zpl.front(), vdevConf));
701		}
702
703		ifstream caseStream(fullName.c_str());
704		if (!caseStream)
705			throw ZfsdException("CaseFile::DeSerialize: Unable to "
706					    "read %s.\n", fileName);
707
708		caseFile->DeSerialize(caseStream);
709	} catch (const ParseException &exp) {
710
711		exp.Log();
712		if (caseFile != existingCaseFile)
713			delete caseFile;
714
715		/*
716		 * Since we can't parse the file, unlink it so we don't
717		 * trip over it again.
718		 */
719		unlink(fileName);
720	} catch (const ZfsdException &zfsException) {
721
722		zfsException.Log();
723		if (caseFile != existingCaseFile)
724			delete caseFile;
725	}
726}
727
728//- CaseFile Protected Methods -------------------------------------------------
729CaseFile::CaseFile(const Vdev &vdev)
730 : m_poolGUID(vdev.PoolGUID()),
731   m_vdevGUID(vdev.GUID()),
732   m_vdevState(vdev.State()),
733   m_vdevPhysPath(vdev.PhysicalPath())
734{
735	stringstream guidString;
736
737	guidString << m_vdevGUID;
738	m_vdevGUIDString = guidString.str();
739	guidString.str("");
740	guidString << m_poolGUID;
741	m_poolGUIDString = guidString.str();
742
743	s_activeCases.push_back(this);
744
745	syslog(LOG_INFO, "Creating new CaseFile:\n");
746	Log();
747}
748
749CaseFile::~CaseFile()
750{
751	PurgeEvents();
752	PurgeTentativeEvents();
753	m_tentativeTimer.Stop();
754	s_activeCases.remove(this);
755}
756
757void
758CaseFile::PurgeEvents()
759{
760	for (EventList::iterator event(m_events.begin());
761	     event != m_events.end(); event++)
762		delete *event;
763
764	m_events.clear();
765}
766
767void
768CaseFile::PurgeTentativeEvents()
769{
770	for (EventList::iterator event(m_tentativeEvents.begin());
771	     event != m_tentativeEvents.end(); event++)
772		delete *event;
773
774	m_tentativeEvents.clear();
775}
776
777void
778CaseFile::SerializeEvList(const EventList events, int fd,
779		const char* prefix) const
780{
781	if (events.empty())
782		return;
783	for (EventList::const_iterator curEvent = events.begin();
784	     curEvent != events.end(); curEvent++) {
785		const string &eventString((*curEvent)->GetEventString());
786
787		// TODO: replace many write(2) calls with a single writev(2)
788		if (prefix)
789			write(fd, prefix, strlen(prefix));
790		write(fd, eventString.c_str(), eventString.length());
791	}
792}
793
794void
795CaseFile::Serialize()
796{
797	stringstream saveFile;
798
799	saveFile << setfill('0')
800		 << s_caseFilePath << "/"
801		 << "pool_" << PoolGUIDString()
802		 << "_vdev_" << VdevGUIDString()
803		 << ".case";
804
805	if (m_events.empty() && m_tentativeEvents.empty()) {
806		unlink(saveFile.str().c_str());
807		return;
808	}
809
810	int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644));
811	if (fd == -1) {
812		syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n",
813		       saveFile.str().c_str());
814		return;
815	}
816	SerializeEvList(m_events, fd);
817	SerializeEvList(m_tentativeEvents, fd, "tentative ");
818	close(fd);
819}
820
821/*
822 * XXX: This method assumes that events may not contain embedded newlines.  If
823 * ever events can contain embedded newlines, then CaseFile must switch
824 * serialization formats
825 */
826void
827CaseFile::DeSerialize(ifstream &caseStream)
828{
829	string	      evString;
830	const EventFactory &factory(ZfsDaemon::Get().GetFactory());
831
832	caseStream >> std::noskipws >> std::ws;
833	while (caseStream.good()) {
834		/*
835		 * Outline:
836		 * read the beginning of a line and check it for
837		 * "tentative".  If found, discard "tentative".
838		 * Create a new event
839		 * continue
840		 */
841		EventList* destEvents;
842		const string tentFlag("tentative ");
843		string line;
844		std::stringbuf lineBuf;
845
846		caseStream.get(lineBuf);
847		caseStream.ignore();  /*discard the newline character*/
848		line = lineBuf.str();
849		if (line.compare(0, tentFlag.size(), tentFlag) == 0) {
850			/* Discard "tentative" */
851			line.erase(0, tentFlag.size());
852			destEvents = &m_tentativeEvents;
853		} else {
854			destEvents = &m_events;
855		}
856		Event *event(Event::CreateEvent(factory, line));
857		if (event != NULL) {
858			destEvents->push_back(event);
859			RegisterCallout(*event);
860		}
861	}
862}
863
864void
865CaseFile::Close()
866{
867	/*
868	 * This case is no longer relevant.  Clean up our
869	 * serialization file, and delete the case.
870	 */
871	syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n",
872	       PoolGUIDString().c_str(), VdevGUIDString().c_str(),
873	       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
874
875	/*
876	 * Serialization of a Case with no event data, clears the
877	 * Serialization data for that event.
878	 */
879	PurgeEvents();
880	Serialize();
881
882	delete this;
883}
884
885void
886CaseFile::OnGracePeriodEnded()
887{
888	bool should_fault, should_degrade;
889	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
890	zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
891
892	m_events.splice(m_events.begin(), m_tentativeEvents);
893	should_fault = ShouldFault();
894	should_degrade = ShouldDegrade();
895
896	if (should_fault || should_degrade) {
897		if (zhp == NULL
898		 || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) {
899			/*
900			 * Either the pool no longer exists
901			 * or this vdev is no longer a member of
902			 * the pool.
903			 */
904			Close();
905			return;
906		}
907
908	}
909
910	/* A fault condition has priority over a degrade condition */
911	if (ShouldFault()) {
912		/* Fault the vdev and close the case. */
913		if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID,
914				       VDEV_AUX_ERR_EXCEEDED) == 0) {
915			syslog(LOG_INFO, "Faulting vdev(%s/%s)",
916			       PoolGUIDString().c_str(),
917			       VdevGUIDString().c_str());
918			Close();
919			return;
920		}
921		else {
922			syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n",
923			       PoolGUIDString().c_str(),
924			       VdevGUIDString().c_str(),
925			       libzfs_error_action(g_zfsHandle),
926			       libzfs_error_description(g_zfsHandle));
927		}
928	}
929	else if (ShouldDegrade()) {
930		/* Degrade the vdev and close the case. */
931		if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID,
932				       VDEV_AUX_ERR_EXCEEDED) == 0) {
933			syslog(LOG_INFO, "Degrading vdev(%s/%s)",
934			       PoolGUIDString().c_str(),
935			       VdevGUIDString().c_str());
936			Close();
937			return;
938		}
939		else {
940			syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n",
941			       PoolGUIDString().c_str(),
942			       VdevGUIDString().c_str(),
943			       libzfs_error_action(g_zfsHandle),
944			       libzfs_error_description(g_zfsHandle));
945		}
946	}
947	Serialize();
948}
949
950Vdev
951CaseFile::BeingReplacedBy(zpool_handle_t *zhp) {
952	Vdev vd(zhp, CaseVdev(zhp));
953	std::list<Vdev> children;
954	std::list<Vdev>::iterator children_it;
955
956	Vdev parent(vd.Parent());
957	Vdev replacing(NonexistentVdev);
958
959	/*
960	 * To determine whether we are being replaced by another spare that
961	 * is still working, then make sure that it is currently spared and
962	 * that the spare is either resilvering or healthy.  If any of these
963	 * conditions fail, then we are not being replaced by a spare.
964	 *
965	 * If the spare is healthy, then the case file should be closed very
966	 * soon after this check.
967	 */
968	if (parent.DoesNotExist()
969	 || parent.Name(zhp, /*verbose*/false) != "spare")
970		return (NonexistentVdev);
971
972	children = parent.Children();
973	children_it = children.begin();
974	for (;children_it != children.end(); children_it++) {
975		Vdev child = *children_it;
976
977		/* Skip our vdev. */
978		if (child.GUID() == VdevGUID())
979			continue;
980		/*
981		 * Accept the first child that doesn't match our GUID, or
982		 * any resilvering/healthy device if one exists.
983		 */
984		if (replacing.DoesNotExist() || child.IsResilvering()
985		 || child.State() == VDEV_STATE_HEALTHY)
986			replacing = child;
987	}
988
989	return (replacing);
990}
991
992bool
993CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) {
994	nvlist_t *nvroot, *newvd;
995	const char *poolname;
996	string oldstr(VdevGUIDString());
997	bool retval = true;
998
999	/* Figure out what pool we're working on */
1000	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
1001	zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
1002	if (zhp == NULL) {
1003		syslog(LOG_ERR, "CaseFile::Replace: could not find pool for "
1004		       "pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID);
1005		return (false);
1006	}
1007	poolname = zpool_get_name(zhp);
1008	Vdev vd(zhp, CaseVdev(zhp));
1009	Vdev replaced(BeingReplacedBy(zhp));
1010
1011	if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) {
1012		/* If we are already being replaced by a working spare, pass. */
1013		if (replaced.IsResilvering()
1014		 || replaced.State() == VDEV_STATE_HEALTHY) {
1015			syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already "
1016			    "replaced", VdevGUIDString().c_str(), path);
1017			return (/*consumed*/false);
1018		}
1019		/*
1020		 * If we have already been replaced by a spare, but that spare
1021		 * is broken, we must spare the spare, not the original device.
1022		 */
1023		oldstr = replaced.GUIDString();
1024		syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing "
1025		    "broken spare %s instead", VdevGUIDString().c_str(),
1026		    path, oldstr.c_str());
1027	}
1028
1029	/*
1030	 * Build a root vdev/leaf vdev configuration suitable for
1031	 * zpool_vdev_attach. Only enough data for the kernel to find
1032	 * the device (i.e. type and disk device node path) are needed.
1033	 */
1034	nvroot = NULL;
1035	newvd = NULL;
1036
1037	if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0
1038	 || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
1039		syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate "
1040		    "configuration data.", poolname, oldstr.c_str());
1041		if (nvroot != NULL)
1042			nvlist_free(nvroot);
1043		return (false);
1044	}
1045	if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0
1046	 || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0
1047	 || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0
1048	 || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1049				    &newvd, 1) != 0) {
1050		syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize "
1051		    "configuration data.", poolname, oldstr.c_str());
1052		nvlist_free(newvd);
1053		nvlist_free(nvroot);
1054		return (true);
1055	}
1056
1057	/* Data was copied when added to the root vdev. */
1058	nvlist_free(newvd);
1059
1060	retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot,
1061	    /*replace*/B_TRUE) == 0);
1062	if (retval)
1063		syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n",
1064		    poolname, oldstr.c_str(), path);
1065	else
1066		syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n",
1067		    poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle),
1068		    libzfs_error_description(g_zfsHandle));
1069	nvlist_free(nvroot);
1070
1071	return (retval);
1072}
1073
1074/* Does the argument event refer to a checksum error? */
1075static bool
1076IsChecksumEvent(const Event* const event)
1077{
1078	return ("ereport.fs.zfs.checksum" == event->Value("type"));
1079}
1080
1081/* Does the argument event refer to an IO error? */
1082static bool
1083IsIOEvent(const Event* const event)
1084{
1085	return ("ereport.fs.zfs.io" == event->Value("type"));
1086}
1087
1088bool
1089CaseFile::ShouldDegrade() const
1090{
1091	return (std::count_if(m_events.begin(), m_events.end(),
1092			      IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT);
1093}
1094
1095bool
1096CaseFile::ShouldFault() const
1097{
1098	return (std::count_if(m_events.begin(), m_events.end(),
1099			      IsIOEvent) > ZFS_DEGRADE_IO_COUNT);
1100}
1101
1102nvlist_t *
1103CaseFile::CaseVdev(zpool_handle_t *zhp) const
1104{
1105	return (VdevIterator(zhp).Find(VdevGUID()));
1106}
1107