control.c revision 213003
167754Smsmith/*-
267754Smsmith * Copyright (c) 2009-2010 The FreeBSD Foundation
367754Smsmith * All rights reserved.
467754Smsmith *
567754Smsmith * This software was developed by Pawel Jakub Dawidek under sponsorship from
667754Smsmith * the FreeBSD Foundation.
7217365Sjkim *
8217365Sjkim * Redistribution and use in source and binary forms, with or without
970243Smsmith * modification, are permitted provided that the following conditions
1067754Smsmith * are met:
11217365Sjkim * 1. Redistributions of source code must retain the above copyright
12217365Sjkim *    notice, this list of conditions and the following disclaimer.
13217365Sjkim * 2. Redistributions in binary form must reproduce the above copyright
14217365Sjkim *    notice, this list of conditions and the following disclaimer in the
15217365Sjkim *    documentation and/or other materials provided with the distribution.
16217365Sjkim *
17217365Sjkim * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
18217365Sjkim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19217365Sjkim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20217365Sjkim * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
21217365Sjkim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22217365Sjkim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23217365Sjkim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24217365Sjkim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2567754Smsmith * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26217365Sjkim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27217365Sjkim * SUCH DAMAGE.
28217365Sjkim */
2967754Smsmith
30217365Sjkim#include <sys/cdefs.h>
31217365Sjkim__FBSDID("$FreeBSD: head/sbin/hastd/control.c 213003 2010-09-22 18:38:02Z pjd $");
32217365Sjkim
33217365Sjkim#include <sys/types.h>
34217365Sjkim#include <sys/wait.h>
35217365Sjkim
36217365Sjkim#include <assert.h>
37217365Sjkim#include <errno.h>
38217365Sjkim#include <pthread.h>
39217365Sjkim#include <signal.h>
40217365Sjkim#include <stdio.h>
41217365Sjkim#include <string.h>
42217365Sjkim
4367754Smsmith#include "hast.h"
4467754Smsmith#include "hastd.h"
4567754Smsmith#include "hast_proto.h"
46193341Sjkim#include "hooks.h"
47193341Sjkim#include "nv.h"
48193341Sjkim#include "pjdlog.h"
49193341Sjkim#include "proto.h"
5067754Smsmith#include "subr.h"
5167754Smsmith
5277424Smsmith#include "control.h"
5391116Smsmith
5467754Smsmithstatic void
5567754Smsmithcontrol_set_role_common(struct hastd_config *cfg, struct nv *nvout,
5667754Smsmith    uint8_t role, struct hast_resource *res, const char *name, unsigned int no)
5767754Smsmith{
5891116Smsmith	int oldrole;
5967754Smsmith
6091116Smsmith	/* Name is always needed. */
6191116Smsmith	if (name != NULL)
6291116Smsmith		nv_add_string(nvout, name, "resource%u", no);
6367754Smsmith
64193267Sjkim	if (res == NULL) {
65193267Sjkim		assert(cfg != NULL);
6691116Smsmith		assert(name != NULL);
6767754Smsmith
6891116Smsmith		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
6967754Smsmith			if (strcmp(res->hr_name, name) == 0)
7067754Smsmith				break;
7167754Smsmith		}
72193267Sjkim		if (res == NULL) {
7391116Smsmith			nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no);
7491116Smsmith			return;
7591116Smsmith		}
76114237Snjl	}
7767754Smsmith	assert(res != NULL);
7899679Siwasaki
7967754Smsmith	/* Send previous role back. */
8067754Smsmith	nv_add_string(nvout, role2str(res->hr_role), "role%u", no);
8167754Smsmith
82167802Sjkim	/* Nothing changed, return here. */
8367754Smsmith	if (role == res->hr_role)
8467754Smsmith		return;
8591116Smsmith
8691116Smsmith	pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
8791116Smsmith	pjdlog_info("Role changed to %s.", role2str(role));
8891116Smsmith
8967754Smsmith	/* Change role to the new one. */
9091116Smsmith	oldrole = res->hr_role;
9191116Smsmith	res->hr_role = role;
92193267Sjkim	pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
9367754Smsmith
9467754Smsmith	/*
9591116Smsmith	 * If previous role was primary or secondary we have to kill process
9667754Smsmith	 * doing that work.
9791116Smsmith	 */
9891116Smsmith	if (res->hr_workerpid != 0) {
9967754Smsmith		if (kill(res->hr_workerpid, SIGTERM) < 0) {
10091116Smsmith			pjdlog_errno(LOG_WARNING,
10191116Smsmith			    "Unable to kill worker process %u",
10291116Smsmith			    (unsigned int)res->hr_workerpid);
10367754Smsmith		} else if (waitpid(res->hr_workerpid, NULL, 0) !=
10491116Smsmith		    res->hr_workerpid) {
10591116Smsmith			pjdlog_errno(LOG_WARNING,
106117521Snjl			    "Error while waiting for worker process %u",
107209746Sjkim			    (unsigned int)res->hr_workerpid);
10891116Smsmith		} else {
10991116Smsmith			pjdlog_debug(1, "Worker process %u stopped.",
11091116Smsmith			    (unsigned int)res->hr_workerpid);
11191116Smsmith		}
112114237Snjl		res->hr_workerpid = 0;
11367754Smsmith	}
11467754Smsmith
11591116Smsmith	/* Start worker process if we are changing to primary. */
11667754Smsmith	if (role == HAST_ROLE_PRIMARY)
11791116Smsmith		hastd_primary(res);
11867754Smsmith	pjdlog_prefix_set("%s", "");
11991116Smsmith	hook_exec(res->hr_exec, "role", res->hr_name, role2str(oldrole),
12067754Smsmith	    role2str(res->hr_role), NULL);
121167802Sjkim}
122204773Sjkim
12399679Siwasakivoid
124193267Sjkimcontrol_set_role(struct hast_resource *res, uint8_t role)
125193267Sjkim{
12667754Smsmith
12767754Smsmith	control_set_role_common(NULL, NULL, role, res, NULL, 0);
128193267Sjkim}
12991116Smsmith
13067754Smsmithstatic void
13167754Smsmithcontrol_status_worker(struct hast_resource *res, struct nv *nvout,
13291116Smsmith    unsigned int no)
13391116Smsmith{
13491116Smsmith	struct nv *cnvin, *cnvout;
13591116Smsmith	const char *str;
136151937Sjkim	int error;
13791116Smsmith
13891116Smsmith	cnvin = cnvout = NULL;
13991116Smsmith	error = 0;
14091116Smsmith
14191116Smsmith	/*
142193267Sjkim	 * Prepare and send command to worker process.
143193267Sjkim	 */
14491116Smsmith	cnvout = nv_alloc();
14591116Smsmith	nv_add_uint8(cnvout, HASTCTL_STATUS, "cmd");
14667754Smsmith	error = nv_error(cnvout);
147114237Snjl	if (error != 0) {
14891116Smsmith		/* LOG */
14991116Smsmith		goto end;
15091116Smsmith	}
151193267Sjkim	if (hast_proto_send(res, res->hr_ctrl, cnvout, NULL, 0) < 0) {
152114237Snjl		error = errno;
15391116Smsmith		/* LOG */
15467754Smsmith		goto end;
15567754Smsmith	}
156167802Sjkim
15767754Smsmith	/*
15891116Smsmith	 * Receive response.
15991116Smsmith	 */
16091116Smsmith	if (hast_proto_recv_hdr(res->hr_ctrl, &cnvin) < 0) {
16191116Smsmith		error = errno;
162193267Sjkim		/* LOG */
163193267Sjkim		goto end;
164193267Sjkim	}
165193267Sjkim
16691116Smsmith	error = nv_get_int64(cnvin, "error");
16791116Smsmith	if (error != 0)
16891116Smsmith		goto end;
169167802Sjkim
17091116Smsmith	if ((str = nv_get_string(cnvin, "status")) == NULL) {
17167754Smsmith		error = ENOENT;
172193267Sjkim		/* LOG */
17391116Smsmith		goto end;
17467754Smsmith	}
17567754Smsmith	nv_add_string(nvout, str, "status%u", no);
17691116Smsmith	nv_add_uint64(nvout, nv_get_uint64(cnvin, "dirty"), "dirty%u", no);
17791116Smsmith	nv_add_uint32(nvout, nv_get_uint32(cnvin, "extentsize"),
178193267Sjkim	    "extentsize%u", no);
179193267Sjkim	nv_add_uint32(nvout, nv_get_uint32(cnvin, "keepdirty"),
180193267Sjkim	    "keepdirty%u", no);
181193267Sjkimend:
182193267Sjkim	if (cnvin != NULL)
183193267Sjkim		nv_free(cnvin);
184193267Sjkim	if (cnvout != NULL)
18567754Smsmith		nv_free(cnvout);
18667754Smsmith	if (error != 0)
18767754Smsmith		nv_add_int16(nvout, error, "error");
18867754Smsmith}
18967754Smsmith
19067754Smsmithstatic void
19173561Smsmithcontrol_status(struct hastd_config *cfg, struct nv *nvout,
19273561Smsmith    struct hast_resource *res, const char *name, unsigned int no)
19373561Smsmith{
19473561Smsmith
19573561Smsmith	assert(cfg != NULL);
19673561Smsmith	assert(nvout != NULL);
19773561Smsmith	assert(name != NULL);
19873561Smsmith
19973561Smsmith	/* Name is always needed. */
20073561Smsmith	nv_add_string(nvout, name, "resource%u", no);
20191116Smsmith
20273561Smsmith	if (res == NULL) {
20373561Smsmith		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
20473561Smsmith			if (strcmp(res->hr_name, name) == 0)
20591116Smsmith				break;
20673561Smsmith		}
20773561Smsmith		if (res == NULL) {
20883174Smsmith			nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no);
20991116Smsmith			return;
21083174Smsmith		}
21183174Smsmith	}
21273561Smsmith	assert(res != NULL);
21373561Smsmith	nv_add_string(nvout, res->hr_provname, "provname%u", no);
21473561Smsmith	nv_add_string(nvout, res->hr_localpath, "localpath%u", no);
21573561Smsmith	nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr%u", no);
21691116Smsmith	switch (res->hr_replication) {
21791116Smsmith	case HAST_REPLICATION_FULLSYNC:
21891116Smsmith		nv_add_string(nvout, "fullsync", "replication%u", no);
219100966Siwasaki		break;
22073561Smsmith	case HAST_REPLICATION_MEMSYNC:
221193267Sjkim		nv_add_string(nvout, "memsync", "replication%u", no);
222193267Sjkim		break;
223193267Sjkim	case HAST_REPLICATION_ASYNC:
224193267Sjkim		nv_add_string(nvout, "async", "replication%u", no);
225193267Sjkim		break;
226193267Sjkim	default:
227193267Sjkim		nv_add_string(nvout, "unknown", "replication%u", no);
228114237Snjl		break;
229209746Sjkim	}
23073561Smsmith	nv_add_string(nvout, role2str(res->hr_role), "role%u", no);
23173561Smsmith
232138287Smarks	switch (res->hr_role) {
233138287Smarks	case HAST_ROLE_PRIMARY:
234193267Sjkim		assert(res->hr_workerpid != 0);
235138287Smarks		/* FALLTHROUGH */
236138287Smarks	case HAST_ROLE_SECONDARY:
237138287Smarks		if (res->hr_workerpid != 0)
23873561Smsmith			break;
23973561Smsmith		/* FALLTHROUGH */
24073561Smsmith	default:
24173561Smsmith		return;
24273561Smsmith	}
24367754Smsmith
24467754Smsmith	/*
24567754Smsmith	 * If we are here, it means that we have a worker process, which we
24667754Smsmith	 * want to ask some questions.
24791116Smsmith	 */
24867754Smsmith	control_status_worker(res, nvout, no);
24967754Smsmith}
25067754Smsmith
25167754Smsmithvoid
25267754Smsmithcontrol_handle(struct hastd_config *cfg)
25367754Smsmith{
25467754Smsmith	struct proto_conn *conn;
25567754Smsmith	struct nv *nvin, *nvout;
25667754Smsmith	unsigned int ii;
25767754Smsmith	const char *str;
25891116Smsmith	uint8_t cmd, role;
25967754Smsmith	int error;
26091116Smsmith
26167754Smsmith	if (proto_accept(cfg->hc_controlconn, &conn) < 0) {
26291116Smsmith		pjdlog_errno(LOG_ERR, "Unable to accept control connection");
26367754Smsmith		return;
26483174Smsmith	}
265167802Sjkim
26667754Smsmith	nvin = nvout = NULL;
26767754Smsmith	role = HAST_ROLE_UNDEF;
268200553Sjkim
26967754Smsmith	if (hast_proto_recv_hdr(conn, &nvin) < 0) {
27067754Smsmith		pjdlog_errno(LOG_ERR, "Unable to receive control header");
27167754Smsmith		nvin = NULL;
27267754Smsmith		goto close;
27367754Smsmith	}
27491116Smsmith
27567754Smsmith	/* Obtain command code. 0 means that nv_get_uint8() failed. */
27691116Smsmith	cmd = nv_get_uint8(nvin, "cmd");
277193267Sjkim	if (cmd == 0) {
278193267Sjkim		pjdlog_error("Control header is missing 'cmd' field.");
279193267Sjkim		error = EHAST_INVALID;
280193267Sjkim		goto close;
28169746Smsmith	}
28291116Smsmith
28369746Smsmith	/* Allocate outgoing nv structure. */
28491116Smsmith	nvout = nv_alloc();
28591116Smsmith	if (nvout == NULL) {
28667754Smsmith		pjdlog_error("Unable to allocate header for control response.");
28791116Smsmith		error = EHAST_NOMEMORY;
28867754Smsmith		goto close;
28967754Smsmith	}
29091116Smsmith
29187031Smsmith	error = 0;
292193267Sjkim
293193267Sjkim	str = nv_get_string(nvin, "resource0");
294193267Sjkim	if (str == NULL) {
295193267Sjkim		pjdlog_error("Control header is missing 'resource0' field.");
296193267Sjkim		error = EHAST_INVALID;
29767754Smsmith		goto fail;
298151937Sjkim	}
299138287Smarks	if (cmd == HASTCTL_SET_ROLE) {
30091116Smsmith		role = nv_get_uint8(nvin, "role");
30167754Smsmith		switch (role) {
30267754Smsmith		case HAST_ROLE_INIT:	/* Is that valid to set, hmm? */
30367754Smsmith		case HAST_ROLE_PRIMARY:
304		case HAST_ROLE_SECONDARY:
305			break;
306		default:
307			pjdlog_error("Invalid role received (%hhu).", role);
308			error = EHAST_INVALID;
309			goto fail;
310		}
311	}
312	if (strcmp(str, "all") == 0) {
313		struct hast_resource *res;
314
315		/* All configured resources. */
316
317		ii = 0;
318		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
319			switch (cmd) {
320			case HASTCTL_SET_ROLE:
321				control_set_role_common(cfg, nvout, role, res,
322				    res->hr_name, ii++);
323				break;
324			case HASTCTL_STATUS:
325				control_status(cfg, nvout, res, res->hr_name,
326				    ii++);
327				break;
328			default:
329				pjdlog_error("Invalid command received (%hhu).",
330				    cmd);
331				error = EHAST_UNIMPLEMENTED;
332				goto fail;
333			}
334		}
335	} else {
336		/* Only selected resources. */
337
338		for (ii = 0; ; ii++) {
339			str = nv_get_string(nvin, "resource%u", ii);
340			if (str == NULL)
341				break;
342			switch (cmd) {
343			case HASTCTL_SET_ROLE:
344				control_set_role_common(cfg, nvout, role, NULL,
345				    str, ii);
346				break;
347			case HASTCTL_STATUS:
348				control_status(cfg, nvout, NULL, str, ii);
349				break;
350			default:
351				pjdlog_error("Invalid command received (%hhu).",
352				    cmd);
353				error = EHAST_UNIMPLEMENTED;
354				goto fail;
355			}
356		}
357	}
358	if (nv_error(nvout) != 0)
359		goto close;
360fail:
361	if (error != 0)
362		nv_add_int16(nvout, error, "error");
363
364	if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0)
365		pjdlog_errno(LOG_ERR, "Unable to send control response");
366close:
367	if (nvin != NULL)
368		nv_free(nvin);
369	if (nvout != NULL)
370		nv_free(nvout);
371	proto_close(conn);
372}
373
374/*
375 * Thread handles control requests from the parent.
376 */
377void *
378ctrl_thread(void *arg)
379{
380	struct hast_resource *res = arg;
381	struct nv *nvin, *nvout;
382	uint8_t cmd;
383
384	for (;;) {
385		if (hast_proto_recv_hdr(res->hr_ctrl, &nvin) < 0) {
386			if (sigexit_received)
387				pthread_exit(NULL);
388			pjdlog_errno(LOG_ERR,
389			    "Unable to receive control message");
390			continue;
391		}
392		cmd = nv_get_uint8(nvin, "cmd");
393		if (cmd == 0) {
394			pjdlog_error("Control message is missing 'cmd' field.");
395			nv_free(nvin);
396			continue;
397		}
398		nv_free(nvin);
399		nvout = nv_alloc();
400		switch (cmd) {
401		case HASTCTL_STATUS:
402			if (res->hr_remotein != NULL &&
403			    res->hr_remoteout != NULL) {
404				nv_add_string(nvout, "complete", "status");
405			} else {
406				nv_add_string(nvout, "degraded", "status");
407			}
408			nv_add_uint32(nvout, (uint32_t)res->hr_extentsize,
409			    "extentsize");
410			if (res->hr_role == HAST_ROLE_PRIMARY) {
411				nv_add_uint32(nvout,
412				    (uint32_t)res->hr_keepdirty, "keepdirty");
413				nv_add_uint64(nvout,
414				    (uint64_t)(activemap_ndirty(res->hr_amp) *
415				    res->hr_extentsize), "dirty");
416			} else {
417				nv_add_uint32(nvout, (uint32_t)0, "keepdirty");
418				nv_add_uint64(nvout, (uint64_t)0, "dirty");
419			}
420			break;
421		default:
422			nv_add_int16(nvout, EINVAL, "error");
423			break;
424		}
425		if (nv_error(nvout) != 0) {
426			pjdlog_error("Unable to create answer on control message.");
427			nv_free(nvout);
428			continue;
429		}
430		if (hast_proto_send(NULL, res->hr_ctrl, nvout, NULL, 0) < 0) {
431			pjdlog_errno(LOG_ERR,
432			    "Unable to send reply to control message");
433		}
434		nv_free(nvout);
435	}
436	/* NOTREACHED */
437	return (NULL);
438}
439