control.c revision 213003
167754Smsmith/*- 267754Smsmith * Copyright (c) 2009-2010 The FreeBSD Foundation 367754Smsmith * All rights reserved. 467754Smsmith * 567754Smsmith * This software was developed by Pawel Jakub Dawidek under sponsorship from 667754Smsmith * the FreeBSD Foundation. 7217365Sjkim * 8217365Sjkim * Redistribution and use in source and binary forms, with or without 970243Smsmith * modification, are permitted provided that the following conditions 1067754Smsmith * are met: 11217365Sjkim * 1. Redistributions of source code must retain the above copyright 12217365Sjkim * notice, this list of conditions and the following disclaimer. 13217365Sjkim * 2. Redistributions in binary form must reproduce the above copyright 14217365Sjkim * notice, this list of conditions and the following disclaimer in the 15217365Sjkim * documentation and/or other materials provided with the distribution. 16217365Sjkim * 17217365Sjkim * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 18217365Sjkim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19217365Sjkim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20217365Sjkim * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 21217365Sjkim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22217365Sjkim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23217365Sjkim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24217365Sjkim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2567754Smsmith * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26217365Sjkim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27217365Sjkim * SUCH DAMAGE. 28217365Sjkim */ 2967754Smsmith 30217365Sjkim#include <sys/cdefs.h> 31217365Sjkim__FBSDID("$FreeBSD: head/sbin/hastd/control.c 213003 2010-09-22 18:38:02Z pjd $"); 32217365Sjkim 33217365Sjkim#include <sys/types.h> 34217365Sjkim#include <sys/wait.h> 35217365Sjkim 36217365Sjkim#include <assert.h> 37217365Sjkim#include <errno.h> 38217365Sjkim#include <pthread.h> 39217365Sjkim#include <signal.h> 40217365Sjkim#include <stdio.h> 41217365Sjkim#include <string.h> 42217365Sjkim 4367754Smsmith#include "hast.h" 4467754Smsmith#include "hastd.h" 4567754Smsmith#include "hast_proto.h" 46193341Sjkim#include "hooks.h" 47193341Sjkim#include "nv.h" 48193341Sjkim#include "pjdlog.h" 49193341Sjkim#include "proto.h" 5067754Smsmith#include "subr.h" 5167754Smsmith 5277424Smsmith#include "control.h" 5391116Smsmith 5467754Smsmithstatic void 5567754Smsmithcontrol_set_role_common(struct hastd_config *cfg, struct nv *nvout, 5667754Smsmith uint8_t role, struct hast_resource *res, const char *name, unsigned int no) 5767754Smsmith{ 5891116Smsmith int oldrole; 5967754Smsmith 6091116Smsmith /* Name is always needed. */ 6191116Smsmith if (name != NULL) 6291116Smsmith nv_add_string(nvout, name, "resource%u", no); 6367754Smsmith 64193267Sjkim if (res == NULL) { 65193267Sjkim assert(cfg != NULL); 6691116Smsmith assert(name != NULL); 6767754Smsmith 6891116Smsmith TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 6967754Smsmith if (strcmp(res->hr_name, name) == 0) 7067754Smsmith break; 7167754Smsmith } 72193267Sjkim if (res == NULL) { 7391116Smsmith nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no); 7491116Smsmith return; 7591116Smsmith } 76114237Snjl } 7767754Smsmith assert(res != NULL); 7899679Siwasaki 7967754Smsmith /* Send previous role back. */ 8067754Smsmith nv_add_string(nvout, role2str(res->hr_role), "role%u", no); 8167754Smsmith 82167802Sjkim /* Nothing changed, return here. */ 8367754Smsmith if (role == res->hr_role) 8467754Smsmith return; 8591116Smsmith 8691116Smsmith pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 8791116Smsmith pjdlog_info("Role changed to %s.", role2str(role)); 8891116Smsmith 8967754Smsmith /* Change role to the new one. */ 9091116Smsmith oldrole = res->hr_role; 9191116Smsmith res->hr_role = role; 92193267Sjkim pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 9367754Smsmith 9467754Smsmith /* 9591116Smsmith * If previous role was primary or secondary we have to kill process 9667754Smsmith * doing that work. 9791116Smsmith */ 9891116Smsmith if (res->hr_workerpid != 0) { 9967754Smsmith if (kill(res->hr_workerpid, SIGTERM) < 0) { 10091116Smsmith pjdlog_errno(LOG_WARNING, 10191116Smsmith "Unable to kill worker process %u", 10291116Smsmith (unsigned int)res->hr_workerpid); 10367754Smsmith } else if (waitpid(res->hr_workerpid, NULL, 0) != 10491116Smsmith res->hr_workerpid) { 10591116Smsmith pjdlog_errno(LOG_WARNING, 106117521Snjl "Error while waiting for worker process %u", 107209746Sjkim (unsigned int)res->hr_workerpid); 10891116Smsmith } else { 10991116Smsmith pjdlog_debug(1, "Worker process %u stopped.", 11091116Smsmith (unsigned int)res->hr_workerpid); 11191116Smsmith } 112114237Snjl res->hr_workerpid = 0; 11367754Smsmith } 11467754Smsmith 11591116Smsmith /* Start worker process if we are changing to primary. */ 11667754Smsmith if (role == HAST_ROLE_PRIMARY) 11791116Smsmith hastd_primary(res); 11867754Smsmith pjdlog_prefix_set("%s", ""); 11991116Smsmith hook_exec(res->hr_exec, "role", res->hr_name, role2str(oldrole), 12067754Smsmith role2str(res->hr_role), NULL); 121167802Sjkim} 122204773Sjkim 12399679Siwasakivoid 124193267Sjkimcontrol_set_role(struct hast_resource *res, uint8_t role) 125193267Sjkim{ 12667754Smsmith 12767754Smsmith control_set_role_common(NULL, NULL, role, res, NULL, 0); 128193267Sjkim} 12991116Smsmith 13067754Smsmithstatic void 13167754Smsmithcontrol_status_worker(struct hast_resource *res, struct nv *nvout, 13291116Smsmith unsigned int no) 13391116Smsmith{ 13491116Smsmith struct nv *cnvin, *cnvout; 13591116Smsmith const char *str; 136151937Sjkim int error; 13791116Smsmith 13891116Smsmith cnvin = cnvout = NULL; 13991116Smsmith error = 0; 14091116Smsmith 14191116Smsmith /* 142193267Sjkim * Prepare and send command to worker process. 143193267Sjkim */ 14491116Smsmith cnvout = nv_alloc(); 14591116Smsmith nv_add_uint8(cnvout, HASTCTL_STATUS, "cmd"); 14667754Smsmith error = nv_error(cnvout); 147114237Snjl if (error != 0) { 14891116Smsmith /* LOG */ 14991116Smsmith goto end; 15091116Smsmith } 151193267Sjkim if (hast_proto_send(res, res->hr_ctrl, cnvout, NULL, 0) < 0) { 152114237Snjl error = errno; 15391116Smsmith /* LOG */ 15467754Smsmith goto end; 15567754Smsmith } 156167802Sjkim 15767754Smsmith /* 15891116Smsmith * Receive response. 15991116Smsmith */ 16091116Smsmith if (hast_proto_recv_hdr(res->hr_ctrl, &cnvin) < 0) { 16191116Smsmith error = errno; 162193267Sjkim /* LOG */ 163193267Sjkim goto end; 164193267Sjkim } 165193267Sjkim 16691116Smsmith error = nv_get_int64(cnvin, "error"); 16791116Smsmith if (error != 0) 16891116Smsmith goto end; 169167802Sjkim 17091116Smsmith if ((str = nv_get_string(cnvin, "status")) == NULL) { 17167754Smsmith error = ENOENT; 172193267Sjkim /* LOG */ 17391116Smsmith goto end; 17467754Smsmith } 17567754Smsmith nv_add_string(nvout, str, "status%u", no); 17691116Smsmith nv_add_uint64(nvout, nv_get_uint64(cnvin, "dirty"), "dirty%u", no); 17791116Smsmith nv_add_uint32(nvout, nv_get_uint32(cnvin, "extentsize"), 178193267Sjkim "extentsize%u", no); 179193267Sjkim nv_add_uint32(nvout, nv_get_uint32(cnvin, "keepdirty"), 180193267Sjkim "keepdirty%u", no); 181193267Sjkimend: 182193267Sjkim if (cnvin != NULL) 183193267Sjkim nv_free(cnvin); 184193267Sjkim if (cnvout != NULL) 18567754Smsmith nv_free(cnvout); 18667754Smsmith if (error != 0) 18767754Smsmith nv_add_int16(nvout, error, "error"); 18867754Smsmith} 18967754Smsmith 19067754Smsmithstatic void 19173561Smsmithcontrol_status(struct hastd_config *cfg, struct nv *nvout, 19273561Smsmith struct hast_resource *res, const char *name, unsigned int no) 19373561Smsmith{ 19473561Smsmith 19573561Smsmith assert(cfg != NULL); 19673561Smsmith assert(nvout != NULL); 19773561Smsmith assert(name != NULL); 19873561Smsmith 19973561Smsmith /* Name is always needed. */ 20073561Smsmith nv_add_string(nvout, name, "resource%u", no); 20191116Smsmith 20273561Smsmith if (res == NULL) { 20373561Smsmith TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 20473561Smsmith if (strcmp(res->hr_name, name) == 0) 20591116Smsmith break; 20673561Smsmith } 20773561Smsmith if (res == NULL) { 20883174Smsmith nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no); 20991116Smsmith return; 21083174Smsmith } 21183174Smsmith } 21273561Smsmith assert(res != NULL); 21373561Smsmith nv_add_string(nvout, res->hr_provname, "provname%u", no); 21473561Smsmith nv_add_string(nvout, res->hr_localpath, "localpath%u", no); 21573561Smsmith nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr%u", no); 21691116Smsmith switch (res->hr_replication) { 21791116Smsmith case HAST_REPLICATION_FULLSYNC: 21891116Smsmith nv_add_string(nvout, "fullsync", "replication%u", no); 219100966Siwasaki break; 22073561Smsmith case HAST_REPLICATION_MEMSYNC: 221193267Sjkim nv_add_string(nvout, "memsync", "replication%u", no); 222193267Sjkim break; 223193267Sjkim case HAST_REPLICATION_ASYNC: 224193267Sjkim nv_add_string(nvout, "async", "replication%u", no); 225193267Sjkim break; 226193267Sjkim default: 227193267Sjkim nv_add_string(nvout, "unknown", "replication%u", no); 228114237Snjl break; 229209746Sjkim } 23073561Smsmith nv_add_string(nvout, role2str(res->hr_role), "role%u", no); 23173561Smsmith 232138287Smarks switch (res->hr_role) { 233138287Smarks case HAST_ROLE_PRIMARY: 234193267Sjkim assert(res->hr_workerpid != 0); 235138287Smarks /* FALLTHROUGH */ 236138287Smarks case HAST_ROLE_SECONDARY: 237138287Smarks if (res->hr_workerpid != 0) 23873561Smsmith break; 23973561Smsmith /* FALLTHROUGH */ 24073561Smsmith default: 24173561Smsmith return; 24273561Smsmith } 24367754Smsmith 24467754Smsmith /* 24567754Smsmith * If we are here, it means that we have a worker process, which we 24667754Smsmith * want to ask some questions. 24791116Smsmith */ 24867754Smsmith control_status_worker(res, nvout, no); 24967754Smsmith} 25067754Smsmith 25167754Smsmithvoid 25267754Smsmithcontrol_handle(struct hastd_config *cfg) 25367754Smsmith{ 25467754Smsmith struct proto_conn *conn; 25567754Smsmith struct nv *nvin, *nvout; 25667754Smsmith unsigned int ii; 25767754Smsmith const char *str; 25891116Smsmith uint8_t cmd, role; 25967754Smsmith int error; 26091116Smsmith 26167754Smsmith if (proto_accept(cfg->hc_controlconn, &conn) < 0) { 26291116Smsmith pjdlog_errno(LOG_ERR, "Unable to accept control connection"); 26367754Smsmith return; 26483174Smsmith } 265167802Sjkim 26667754Smsmith nvin = nvout = NULL; 26767754Smsmith role = HAST_ROLE_UNDEF; 268200553Sjkim 26967754Smsmith if (hast_proto_recv_hdr(conn, &nvin) < 0) { 27067754Smsmith pjdlog_errno(LOG_ERR, "Unable to receive control header"); 27167754Smsmith nvin = NULL; 27267754Smsmith goto close; 27367754Smsmith } 27491116Smsmith 27567754Smsmith /* Obtain command code. 0 means that nv_get_uint8() failed. */ 27691116Smsmith cmd = nv_get_uint8(nvin, "cmd"); 277193267Sjkim if (cmd == 0) { 278193267Sjkim pjdlog_error("Control header is missing 'cmd' field."); 279193267Sjkim error = EHAST_INVALID; 280193267Sjkim goto close; 28169746Smsmith } 28291116Smsmith 28369746Smsmith /* Allocate outgoing nv structure. */ 28491116Smsmith nvout = nv_alloc(); 28591116Smsmith if (nvout == NULL) { 28667754Smsmith pjdlog_error("Unable to allocate header for control response."); 28791116Smsmith error = EHAST_NOMEMORY; 28867754Smsmith goto close; 28967754Smsmith } 29091116Smsmith 29187031Smsmith error = 0; 292193267Sjkim 293193267Sjkim str = nv_get_string(nvin, "resource0"); 294193267Sjkim if (str == NULL) { 295193267Sjkim pjdlog_error("Control header is missing 'resource0' field."); 296193267Sjkim error = EHAST_INVALID; 29767754Smsmith goto fail; 298151937Sjkim } 299138287Smarks if (cmd == HASTCTL_SET_ROLE) { 30091116Smsmith role = nv_get_uint8(nvin, "role"); 30167754Smsmith switch (role) { 30267754Smsmith case HAST_ROLE_INIT: /* Is that valid to set, hmm? */ 30367754Smsmith case HAST_ROLE_PRIMARY: 304 case HAST_ROLE_SECONDARY: 305 break; 306 default: 307 pjdlog_error("Invalid role received (%hhu).", role); 308 error = EHAST_INVALID; 309 goto fail; 310 } 311 } 312 if (strcmp(str, "all") == 0) { 313 struct hast_resource *res; 314 315 /* All configured resources. */ 316 317 ii = 0; 318 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 319 switch (cmd) { 320 case HASTCTL_SET_ROLE: 321 control_set_role_common(cfg, nvout, role, res, 322 res->hr_name, ii++); 323 break; 324 case HASTCTL_STATUS: 325 control_status(cfg, nvout, res, res->hr_name, 326 ii++); 327 break; 328 default: 329 pjdlog_error("Invalid command received (%hhu).", 330 cmd); 331 error = EHAST_UNIMPLEMENTED; 332 goto fail; 333 } 334 } 335 } else { 336 /* Only selected resources. */ 337 338 for (ii = 0; ; ii++) { 339 str = nv_get_string(nvin, "resource%u", ii); 340 if (str == NULL) 341 break; 342 switch (cmd) { 343 case HASTCTL_SET_ROLE: 344 control_set_role_common(cfg, nvout, role, NULL, 345 str, ii); 346 break; 347 case HASTCTL_STATUS: 348 control_status(cfg, nvout, NULL, str, ii); 349 break; 350 default: 351 pjdlog_error("Invalid command received (%hhu).", 352 cmd); 353 error = EHAST_UNIMPLEMENTED; 354 goto fail; 355 } 356 } 357 } 358 if (nv_error(nvout) != 0) 359 goto close; 360fail: 361 if (error != 0) 362 nv_add_int16(nvout, error, "error"); 363 364 if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) 365 pjdlog_errno(LOG_ERR, "Unable to send control response"); 366close: 367 if (nvin != NULL) 368 nv_free(nvin); 369 if (nvout != NULL) 370 nv_free(nvout); 371 proto_close(conn); 372} 373 374/* 375 * Thread handles control requests from the parent. 376 */ 377void * 378ctrl_thread(void *arg) 379{ 380 struct hast_resource *res = arg; 381 struct nv *nvin, *nvout; 382 uint8_t cmd; 383 384 for (;;) { 385 if (hast_proto_recv_hdr(res->hr_ctrl, &nvin) < 0) { 386 if (sigexit_received) 387 pthread_exit(NULL); 388 pjdlog_errno(LOG_ERR, 389 "Unable to receive control message"); 390 continue; 391 } 392 cmd = nv_get_uint8(nvin, "cmd"); 393 if (cmd == 0) { 394 pjdlog_error("Control message is missing 'cmd' field."); 395 nv_free(nvin); 396 continue; 397 } 398 nv_free(nvin); 399 nvout = nv_alloc(); 400 switch (cmd) { 401 case HASTCTL_STATUS: 402 if (res->hr_remotein != NULL && 403 res->hr_remoteout != NULL) { 404 nv_add_string(nvout, "complete", "status"); 405 } else { 406 nv_add_string(nvout, "degraded", "status"); 407 } 408 nv_add_uint32(nvout, (uint32_t)res->hr_extentsize, 409 "extentsize"); 410 if (res->hr_role == HAST_ROLE_PRIMARY) { 411 nv_add_uint32(nvout, 412 (uint32_t)res->hr_keepdirty, "keepdirty"); 413 nv_add_uint64(nvout, 414 (uint64_t)(activemap_ndirty(res->hr_amp) * 415 res->hr_extentsize), "dirty"); 416 } else { 417 nv_add_uint32(nvout, (uint32_t)0, "keepdirty"); 418 nv_add_uint64(nvout, (uint64_t)0, "dirty"); 419 } 420 break; 421 default: 422 nv_add_int16(nvout, EINVAL, "error"); 423 break; 424 } 425 if (nv_error(nvout) != 0) { 426 pjdlog_error("Unable to create answer on control message."); 427 nv_free(nvout); 428 continue; 429 } 430 if (hast_proto_send(NULL, res->hr_ctrl, nvout, NULL, 0) < 0) { 431 pjdlog_errno(LOG_ERR, 432 "Unable to send reply to control message"); 433 } 434 nv_free(nvout); 435 } 436 /* NOTREACHED */ 437 return (NULL); 438} 439