1233176Sae/*- 2233176Sae * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org> 3233176Sae * All rights reserved. 4233176Sae * 5233176Sae * Redistribution and use in source and binary forms, with or without 6233176Sae * modification, are permitted provided that the following conditions 7233176Sae * are met: 8233176Sae * 9233176Sae * 1. Redistributions of source code must retain the above copyright 10233176Sae * notice, this list of conditions and the following disclaimer. 11233176Sae * 2. Redistributions in binary form must reproduce the above copyright 12233176Sae * notice, this list of conditions and the following disclaimer in the 13233176Sae * documentation and/or other materials provided with the distribution. 14233176Sae * 15233176Sae * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16233176Sae * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17233176Sae * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18233176Sae * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19233176Sae * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20233176Sae * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21233176Sae * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22233176Sae * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23233176Sae * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24233176Sae * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25233176Sae */ 26233176Sae 27233176Sae#include <sys/cdefs.h> 28233176Sae__FBSDID("$FreeBSD$"); 29233176Sae 30233176Sae#include <sys/param.h> 31233176Sae#include <sys/bio.h> 32233176Sae#include <sys/diskmbr.h> 33233176Sae#include <sys/endian.h> 34233176Sae#include <sys/gpt.h> 35233176Sae#include <sys/kernel.h> 36233176Sae#include <sys/kobj.h> 37233176Sae#include <sys/limits.h> 38233176Sae#include <sys/lock.h> 39233176Sae#include <sys/malloc.h> 40233176Sae#include <sys/mutex.h> 41233176Sae#include <sys/queue.h> 42233176Sae#include <sys/sbuf.h> 43233176Sae#include <sys/systm.h> 44233176Sae#include <sys/sysctl.h> 45233176Sae#include <sys/uuid.h> 46233176Sae#include <geom/geom.h> 47233176Sae#include <geom/part/g_part.h> 48233176Sae 49233176Sae#include "g_part_if.h" 50233176Sae 51233176SaeFEATURE(geom_part_ldm, "GEOM partitioning class for LDM support"); 52233176Sae 53233176SaeSYSCTL_DECL(_kern_geom_part); 54233176Saestatic SYSCTL_NODE(_kern_geom_part, OID_AUTO, ldm, CTLFLAG_RW, 0, 55233176Sae "GEOM_PART_LDM Logical Disk Manager"); 56233176Sae 57233176Saestatic u_int ldm_debug = 0; 58233176SaeTUNABLE_INT("kern.geom.part.ldm.debug", &ldm_debug); 59233181SaeSYSCTL_UINT(_kern_geom_part_ldm, OID_AUTO, debug, 60233181Sae CTLFLAG_RW | CTLFLAG_TUN, &ldm_debug, 0, "Debug level"); 61233176Sae 62233176Sae/* 63233176Sae * This allows access to mirrored LDM volumes. Since we do not 64233176Sae * doing mirroring here, it is not enabled by default. 65233176Sae */ 66233176Saestatic u_int show_mirrors = 0; 67233176SaeTUNABLE_INT("kern.geom.part.ldm.show_mirrors", &show_mirrors); 68233181SaeSYSCTL_UINT(_kern_geom_part_ldm, OID_AUTO, show_mirrors, 69233181Sae CTLFLAG_RW | CTLFLAG_TUN, &show_mirrors, 0, "Show mirrored volumes"); 70233176Sae 71233176Sae#define LDM_DEBUG(lvl, fmt, ...) do { \ 72233176Sae if (ldm_debug >= (lvl)) { \ 73233176Sae printf("GEOM_PART: " fmt "\n", __VA_ARGS__); \ 74233176Sae } \ 75233176Sae} while (0) 76233176Sae#define LDM_DUMP(buf, size) do { \ 77233176Sae if (ldm_debug > 1) { \ 78233176Sae hexdump(buf, size, NULL, 0); \ 79233176Sae } \ 80233176Sae} while (0) 81233176Sae 82233176Sae/* 83233176Sae * There are internal representations of LDM structures. 84233176Sae * 85233176Sae * We do not keep all fields of on-disk structures, only most useful. 86233176Sae * All numbers in an on-disk structures are in big-endian format. 87233176Sae */ 88233176Sae 89233176Sae/* 90233176Sae * Private header is 512 bytes long. There are three copies on each disk. 91233176Sae * Offset and sizes are in sectors. Location of each copy: 92233176Sae * - the first offset is relative to the disk start; 93233176Sae * - the second and third offset are relative to the LDM database start. 94233176Sae * 95233176Sae * On a disk partitioned with GPT, the LDM has not first private header. 96233176Sae */ 97233176Sae#define LDM_PH_MBRINDEX 0 98233176Sae#define LDM_PH_GPTINDEX 2 99233176Saestatic const uint64_t ldm_ph_off[] = {6, 1856, 2047}; 100233176Sae#define LDM_VERSION_2K 0x2000b 101233176Sae#define LDM_VERSION_VISTA 0x2000c 102233176Sae#define LDM_PH_VERSION_OFF 0x00c 103233176Sae#define LDM_PH_DISKGUID_OFF 0x030 104233176Sae#define LDM_PH_DGGUID_OFF 0x0b0 105233176Sae#define LDM_PH_DGNAME_OFF 0x0f0 106233176Sae#define LDM_PH_START_OFF 0x11b 107233176Sae#define LDM_PH_SIZE_OFF 0x123 108233176Sae#define LDM_PH_DB_OFF 0x12b 109233176Sae#define LDM_PH_DBSIZE_OFF 0x133 110233176Sae#define LDM_PH_TH1_OFF 0x13b 111233176Sae#define LDM_PH_TH2_OFF 0x143 112233176Sae#define LDM_PH_CONFSIZE_OFF 0x153 113233176Sae#define LDM_PH_LOGSIZE_OFF 0x15b 114233176Sae#define LDM_PH_SIGN "PRIVHEAD" 115233176Saestruct ldm_privhdr { 116233176Sae struct uuid disk_guid; 117233176Sae struct uuid dg_guid; 118233176Sae u_char dg_name[32]; 119233176Sae uint64_t start; /* logical disk start */ 120233176Sae uint64_t size; /* logical disk size */ 121233176Sae uint64_t db_offset; /* LDM database start */ 122233176Sae#define LDM_DB_SIZE 2048 123233176Sae uint64_t db_size; /* LDM database size */ 124233176Sae#define LDM_TH_COUNT 2 125233176Sae uint64_t th_offset[LDM_TH_COUNT]; /* TOC header offsets */ 126233176Sae uint64_t conf_size; /* configuration size */ 127233176Sae uint64_t log_size; /* size of log */ 128233176Sae}; 129233176Sae 130233176Sae/* 131233176Sae * Table of contents header is 512 bytes long. 132233176Sae * There are two identical copies at offsets from the private header. 133233176Sae * Offsets are relative to the LDM database start. 134233176Sae */ 135233176Sae#define LDM_TH_SIGN "TOCBLOCK" 136233176Sae#define LDM_TH_NAME1 "config" 137233176Sae#define LDM_TH_NAME2 "log" 138233176Sae#define LDM_TH_NAME1_OFF 0x024 139233176Sae#define LDM_TH_CONF_OFF 0x02e 140233176Sae#define LDM_TH_CONFSIZE_OFF 0x036 141233176Sae#define LDM_TH_NAME2_OFF 0x046 142233176Sae#define LDM_TH_LOG_OFF 0x050 143233176Sae#define LDM_TH_LOGSIZE_OFF 0x058 144233176Saestruct ldm_tochdr { 145233176Sae uint64_t conf_offset; /* configuration offset */ 146233176Sae uint64_t log_offset; /* log offset */ 147233176Sae}; 148233176Sae 149233176Sae/* 150233176Sae * LDM database header is 512 bytes long. 151233176Sae */ 152233176Sae#define LDM_VMDB_SIGN "VMDB" 153233176Sae#define LDM_DB_LASTSEQ_OFF 0x004 154233176Sae#define LDM_DB_SIZE_OFF 0x008 155233176Sae#define LDM_DB_STATUS_OFF 0x010 156233176Sae#define LDM_DB_VERSION_OFF 0x012 157233176Sae#define LDM_DB_DGNAME_OFF 0x016 158233176Sae#define LDM_DB_DGGUID_OFF 0x035 159233176Saestruct ldm_vmdbhdr { 160233176Sae uint32_t last_seq; /* sequence number of last VBLK */ 161233176Sae uint32_t size; /* size of VBLK */ 162233176Sae}; 163233176Sae 164233176Sae/* 165233176Sae * The LDM database configuration section contains VMDB header and 166233176Sae * many VBLKs. Each VBLK represents a disk group, disk partition, 167233176Sae * component or volume. 168233176Sae * 169233176Sae * The most interesting for us are volumes, they are represents 170233176Sae * partitions in the GEOM_PART meaning. But volume VBLK does not 171233176Sae * contain all information needed to create GEOM provider. And we 172233176Sae * should get this information from the related VBLK. This is how 173233176Sae * VBLK releated: 174233176Sae * Volumes <- Components <- Partitions -> Disks 175233176Sae * 176233176Sae * One volume can contain several components. In this case LDM 177233176Sae * does mirroring of volume data to each component. 178233176Sae * 179233176Sae * Also each component can contain several partitions (spanned or 180233176Sae * striped volumes). 181233176Sae */ 182233176Sae 183233176Saestruct ldm_component { 184233176Sae uint64_t id; /* object id */ 185233176Sae uint64_t vol_id; /* parent volume object id */ 186233176Sae 187233176Sae int count; 188233176Sae LIST_HEAD(, ldm_partition) partitions; 189233176Sae LIST_ENTRY(ldm_component) entry; 190233176Sae}; 191233176Sae 192233176Saestruct ldm_volume { 193233176Sae uint64_t id; /* object id */ 194233176Sae uint64_t size; /* volume size */ 195233176Sae uint8_t number; /* used for ordering */ 196233176Sae uint8_t part_type; /* partition type */ 197233176Sae 198233176Sae int count; 199233176Sae LIST_HEAD(, ldm_component) components; 200233176Sae LIST_ENTRY(ldm_volume) entry; 201233176Sae}; 202233176Sae 203233176Saestruct ldm_disk { 204233176Sae uint64_t id; /* object id */ 205233176Sae struct uuid guid; /* disk guid */ 206233176Sae 207233176Sae LIST_ENTRY(ldm_disk) entry; 208233176Sae}; 209233176Sae 210233176Sae#if 0 211233176Saestruct ldm_disk_group { 212233176Sae uint64_t id; /* object id */ 213233176Sae struct uuid guid; /* disk group guid */ 214233176Sae u_char name[32]; /* disk group name */ 215233176Sae 216233176Sae LIST_ENTRY(ldm_disk_group) entry; 217233176Sae}; 218233176Sae#endif 219233176Sae 220233176Saestruct ldm_partition { 221233176Sae uint64_t id; /* object id */ 222233176Sae uint64_t disk_id; /* disk object id */ 223233176Sae uint64_t comp_id; /* parent component object id */ 224233176Sae uint64_t start; /* offset relative to disk start */ 225233176Sae uint64_t offset; /* offset for spanned volumes */ 226233176Sae uint64_t size; /* partition size */ 227233176Sae 228233176Sae LIST_ENTRY(ldm_partition) entry; 229233176Sae}; 230233176Sae 231233176Sae/* 232233176Sae * Each VBLK is 128 bytes long and has standard 16 bytes header. 233233176Sae * Some of VBLK's fields are fixed size, but others has variable size. 234233176Sae * Fields with variable size are prefixed with one byte length marker. 235233176Sae * Some fields are strings and also can have fixed size and variable. 236233176Sae * Strings with fixed size are NULL-terminated, others are not. 237233176Sae * All VBLKs have same several first fields: 238233176Sae * Offset Size Description 239233176Sae * ---------------+---------------+-------------------------- 240233176Sae * 0x00 16 standard VBLK header 241233176Sae * 0x10 2 update status 242233176Sae * 0x13 1 VBLK type 243233176Sae * 0x18 PS object id 244233176Sae * 0x18+ PN object name 245233176Sae * 246233176Sae * o Offset 0x18+ means '0x18 + length of all variable-width fields' 247233176Sae * o 'P' in size column means 'prefixed' (variable-width), 248233176Sae * 'S' - string, 'N' - number. 249233176Sae */ 250233176Sae#define LDM_VBLK_SIGN "VBLK" 251233176Sae#define LDM_VBLK_SEQ_OFF 0x04 252233176Sae#define LDM_VBLK_GROUP_OFF 0x08 253233176Sae#define LDM_VBLK_INDEX_OFF 0x0c 254233176Sae#define LDM_VBLK_COUNT_OFF 0x0e 255233176Sae#define LDM_VBLK_TYPE_OFF 0x13 256233176Sae#define LDM_VBLK_OID_OFF 0x18 257233176Saestruct ldm_vblkhdr { 258233176Sae uint32_t seq; /* sequence number */ 259233176Sae uint32_t group; /* group number */ 260233176Sae uint16_t index; /* index in the group */ 261233176Sae uint16_t count; /* number of entries in the group */ 262233176Sae}; 263233176Sae 264233176Sae#define LDM_VBLK_T_COMPONENT 0x32 265233176Sae#define LDM_VBLK_T_PARTITION 0x33 266233176Sae#define LDM_VBLK_T_DISK 0x34 267233176Sae#define LDM_VBLK_T_DISKGROUP 0x35 268233176Sae#define LDM_VBLK_T_DISK4 0x44 269233176Sae#define LDM_VBLK_T_DISKGROUP4 0x45 270233176Sae#define LDM_VBLK_T_VOLUME 0x51 271233176Saestruct ldm_vblk { 272233176Sae uint8_t type; /* VBLK type */ 273233176Sae union { 274233176Sae uint64_t id; 275233176Sae struct ldm_volume vol; 276233176Sae struct ldm_component comp; 277233176Sae struct ldm_disk disk; 278233176Sae struct ldm_partition part; 279233176Sae#if 0 280233176Sae struct ldm_disk_group disk_group; 281233176Sae#endif 282233176Sae } u; 283233176Sae LIST_ENTRY(ldm_vblk) entry; 284233176Sae}; 285233176Sae 286233176Sae/* 287233176Sae * Some VBLKs contains a bit more data than can fit into 128 bytes. These 288233176Sae * VBLKs are called eXtended VBLK. Before parsing, the data from these VBLK 289233176Sae * should be placed into continuous memory buffer. We can determine xVBLK 290233176Sae * by the count field in the standard VBLK header (count > 1). 291233176Sae */ 292233176Saestruct ldm_xvblk { 293233176Sae uint32_t group; /* xVBLK group number */ 294233176Sae uint32_t size; /* the total size of xVBLK */ 295233176Sae uint8_t map; /* bitmask of currently saved VBLKs */ 296233176Sae u_char *data; /* xVBLK data */ 297233176Sae 298233176Sae LIST_ENTRY(ldm_xvblk) entry; 299233176Sae}; 300233176Sae 301233176Sae/* The internal representation of LDM database. */ 302233176Saestruct ldm_db { 303233176Sae struct ldm_privhdr ph; /* private header */ 304233176Sae struct ldm_tochdr th; /* TOC header */ 305233176Sae struct ldm_vmdbhdr dh; /* VMDB header */ 306233176Sae 307233176Sae LIST_HEAD(, ldm_volume) volumes; 308233176Sae LIST_HEAD(, ldm_disk) disks; 309233176Sae LIST_HEAD(, ldm_vblk) vblks; 310233176Sae LIST_HEAD(, ldm_xvblk) xvblks; 311233176Sae}; 312233176Sae 313233176Saestatic struct uuid gpt_uuid_ms_ldm_metadata = GPT_ENT_TYPE_MS_LDM_METADATA; 314233176Sae 315233176Saestruct g_part_ldm_table { 316233176Sae struct g_part_table base; 317233176Sae uint64_t db_offset; 318233176Sae int is_gpt; 319233176Sae}; 320233176Saestruct g_part_ldm_entry { 321233176Sae struct g_part_entry base; 322233176Sae uint8_t type; 323233176Sae}; 324233176Sae 325233176Saestatic int g_part_ldm_add(struct g_part_table *, struct g_part_entry *, 326233176Sae struct g_part_parms *); 327233176Saestatic int g_part_ldm_bootcode(struct g_part_table *, struct g_part_parms *); 328233176Saestatic int g_part_ldm_create(struct g_part_table *, struct g_part_parms *); 329233176Saestatic int g_part_ldm_destroy(struct g_part_table *, struct g_part_parms *); 330233176Saestatic void g_part_ldm_dumpconf(struct g_part_table *, struct g_part_entry *, 331233176Sae struct sbuf *, const char *); 332233176Saestatic int g_part_ldm_dumpto(struct g_part_table *, struct g_part_entry *); 333233176Saestatic int g_part_ldm_modify(struct g_part_table *, struct g_part_entry *, 334233176Sae struct g_part_parms *); 335233176Saestatic const char *g_part_ldm_name(struct g_part_table *, struct g_part_entry *, 336233176Sae char *, size_t); 337233176Saestatic int g_part_ldm_probe(struct g_part_table *, struct g_consumer *); 338233176Saestatic int g_part_ldm_read(struct g_part_table *, struct g_consumer *); 339233176Saestatic const char *g_part_ldm_type(struct g_part_table *, struct g_part_entry *, 340233176Sae char *, size_t); 341233176Saestatic int g_part_ldm_write(struct g_part_table *, struct g_consumer *); 342233176Sae 343233176Saestatic kobj_method_t g_part_ldm_methods[] = { 344233176Sae KOBJMETHOD(g_part_add, g_part_ldm_add), 345233176Sae KOBJMETHOD(g_part_bootcode, g_part_ldm_bootcode), 346233176Sae KOBJMETHOD(g_part_create, g_part_ldm_create), 347233176Sae KOBJMETHOD(g_part_destroy, g_part_ldm_destroy), 348233176Sae KOBJMETHOD(g_part_dumpconf, g_part_ldm_dumpconf), 349233176Sae KOBJMETHOD(g_part_dumpto, g_part_ldm_dumpto), 350233176Sae KOBJMETHOD(g_part_modify, g_part_ldm_modify), 351233176Sae KOBJMETHOD(g_part_name, g_part_ldm_name), 352233176Sae KOBJMETHOD(g_part_probe, g_part_ldm_probe), 353233176Sae KOBJMETHOD(g_part_read, g_part_ldm_read), 354233176Sae KOBJMETHOD(g_part_type, g_part_ldm_type), 355233176Sae KOBJMETHOD(g_part_write, g_part_ldm_write), 356233176Sae { 0, 0 } 357233176Sae}; 358233176Sae 359233176Saestatic struct g_part_scheme g_part_ldm_scheme = { 360233176Sae "LDM", 361233176Sae g_part_ldm_methods, 362233176Sae sizeof(struct g_part_ldm_table), 363233176Sae .gps_entrysz = sizeof(struct g_part_ldm_entry) 364233176Sae}; 365233176SaeG_PART_SCHEME_DECLARE(g_part_ldm); 366233176Sae 367233176Saestatic struct g_part_ldm_alias { 368233176Sae u_char typ; 369233176Sae int alias; 370233176Sae} ldm_alias_match[] = { 371233176Sae { DOSPTYP_NTFS, G_PART_ALIAS_MS_NTFS }, 372233176Sae { DOSPTYP_FAT32, G_PART_ALIAS_MS_FAT32 }, 373233176Sae { DOSPTYP_386BSD, G_PART_ALIAS_FREEBSD }, 374233176Sae { DOSPTYP_LDM, G_PART_ALIAS_MS_LDM_DATA }, 375233176Sae { DOSPTYP_LINSWP, G_PART_ALIAS_LINUX_SWAP }, 376233176Sae { DOSPTYP_LINUX, G_PART_ALIAS_LINUX_DATA }, 377233176Sae { DOSPTYP_LINLVM, G_PART_ALIAS_LINUX_LVM }, 378233176Sae { DOSPTYP_LINRAID, G_PART_ALIAS_LINUX_RAID }, 379233176Sae}; 380233176Sae 381233176Saestatic u_char* 382233176Saeldm_privhdr_read(struct g_consumer *cp, uint64_t off, int *error) 383233176Sae{ 384233176Sae struct g_provider *pp; 385233176Sae u_char *buf; 386233176Sae 387233176Sae pp = cp->provider; 388233176Sae buf = g_read_data(cp, off, pp->sectorsize, error); 389233176Sae if (buf == NULL) 390233176Sae return (NULL); 391233176Sae 392233176Sae if (memcmp(buf, LDM_PH_SIGN, strlen(LDM_PH_SIGN)) != 0) { 393233176Sae LDM_DEBUG(1, "%s: invalid LDM private header signature", 394233176Sae pp->name); 395233176Sae g_free(buf); 396233176Sae buf = NULL; 397233176Sae *error = EINVAL; 398233176Sae } 399233176Sae return (buf); 400233176Sae} 401233176Sae 402233176Saestatic int 403233176Saeldm_privhdr_parse(struct g_consumer *cp, struct ldm_privhdr *hdr, 404233176Sae const u_char *buf) 405233176Sae{ 406233176Sae uint32_t version; 407233176Sae int error; 408233176Sae 409233176Sae memset(hdr, 0, sizeof(*hdr)); 410233176Sae version = be32dec(buf + LDM_PH_VERSION_OFF); 411233176Sae if (version != LDM_VERSION_2K && 412233176Sae version != LDM_VERSION_VISTA) { 413233176Sae LDM_DEBUG(0, "%s: unsupported LDM version %u.%u", 414233176Sae cp->provider->name, version >> 16, 415233176Sae version & 0xFFFF); 416233176Sae return (ENXIO); 417233176Sae } 418233176Sae error = parse_uuid(buf + LDM_PH_DISKGUID_OFF, &hdr->disk_guid); 419233176Sae if (error != 0) 420233176Sae return (error); 421233176Sae error = parse_uuid(buf + LDM_PH_DGGUID_OFF, &hdr->dg_guid); 422233176Sae if (error != 0) 423233176Sae return (error); 424233176Sae strncpy(hdr->dg_name, buf + LDM_PH_DGNAME_OFF, sizeof(hdr->dg_name)); 425233176Sae hdr->start = be64dec(buf + LDM_PH_START_OFF); 426233176Sae hdr->size = be64dec(buf + LDM_PH_SIZE_OFF); 427233176Sae hdr->db_offset = be64dec(buf + LDM_PH_DB_OFF); 428233176Sae hdr->db_size = be64dec(buf + LDM_PH_DBSIZE_OFF); 429233176Sae hdr->th_offset[0] = be64dec(buf + LDM_PH_TH1_OFF); 430233176Sae hdr->th_offset[1] = be64dec(buf + LDM_PH_TH2_OFF); 431233176Sae hdr->conf_size = be64dec(buf + LDM_PH_CONFSIZE_OFF); 432233176Sae hdr->log_size = be64dec(buf + LDM_PH_LOGSIZE_OFF); 433233176Sae return (0); 434233176Sae} 435233176Sae 436233176Saestatic int 437233176Saeldm_privhdr_check(struct ldm_db *db, struct g_consumer *cp, int is_gpt) 438233176Sae{ 439233176Sae struct g_consumer *cp2; 440233176Sae struct g_provider *pp; 441233176Sae struct ldm_privhdr hdr; 442233176Sae uint64_t offset, last; 443233176Sae int error, found, i; 444233176Sae u_char *buf; 445233176Sae 446233176Sae pp = cp->provider; 447233176Sae if (is_gpt) { 448233176Sae /* 449233176Sae * The last LBA is used in several checks below, for the 450233176Sae * GPT case it should be calculated relative to the whole 451233176Sae * disk. 452233176Sae */ 453233176Sae cp2 = LIST_FIRST(&pp->geom->consumer); 454233176Sae last = 455233176Sae cp2->provider->mediasize / cp2->provider->sectorsize - 1; 456233176Sae } else 457233176Sae last = pp->mediasize / pp->sectorsize - 1; 458233176Sae for (found = 0, i = is_gpt; 459233176Sae i < sizeof(ldm_ph_off) / sizeof(ldm_ph_off[0]); i++) { 460233176Sae offset = ldm_ph_off[i]; 461233176Sae /* 462233176Sae * In the GPT case consumer is attached to the LDM metadata 463233176Sae * partition and we don't need add db_offset. 464233176Sae */ 465233176Sae if (!is_gpt) 466233176Sae offset += db->ph.db_offset; 467233176Sae if (i == LDM_PH_MBRINDEX) { 468233176Sae /* 469233176Sae * Prepare to errors and setup new base offset 470233176Sae * to read backup private headers. Assume that LDM 471233176Sae * database is in the last 1Mbyte area. 472233176Sae */ 473233176Sae db->ph.db_offset = last - LDM_DB_SIZE; 474233176Sae } 475233176Sae buf = ldm_privhdr_read(cp, offset * pp->sectorsize, &error); 476233176Sae if (buf == NULL) { 477233176Sae LDM_DEBUG(1, "%s: failed to read private header " 478233176Sae "%d at LBA %ju", pp->name, i, (uintmax_t)offset); 479233176Sae continue; 480233176Sae } 481233176Sae error = ldm_privhdr_parse(cp, &hdr, buf); 482233176Sae if (error != 0) { 483233176Sae LDM_DEBUG(1, "%s: failed to parse private " 484233176Sae "header %d", pp->name, i); 485233176Sae LDM_DUMP(buf, pp->sectorsize); 486233176Sae g_free(buf); 487233176Sae continue; 488233176Sae } 489233176Sae g_free(buf); 490233176Sae if (hdr.start > last || 491233176Sae hdr.start + hdr.size - 1 > last || 492233652Sae (hdr.start + hdr.size - 1 > hdr.db_offset && !is_gpt) || 493233176Sae hdr.db_size != LDM_DB_SIZE || 494233176Sae hdr.db_offset + LDM_DB_SIZE - 1 > last || 495233176Sae hdr.th_offset[0] >= LDM_DB_SIZE || 496233176Sae hdr.th_offset[1] >= LDM_DB_SIZE || 497233176Sae hdr.conf_size + hdr.log_size >= LDM_DB_SIZE) { 498233176Sae LDM_DEBUG(1, "%s: invalid values in the " 499233176Sae "private header %d", pp->name, i); 500233176Sae LDM_DEBUG(2, "%s: start: %jd, size: %jd, " 501233176Sae "db_offset: %jd, db_size: %jd, th_offset0: %jd, " 502233176Sae "th_offset1: %jd, conf_size: %jd, log_size: %jd, " 503233176Sae "last: %jd", pp->name, hdr.start, hdr.size, 504233176Sae hdr.db_offset, hdr.db_size, hdr.th_offset[0], 505233176Sae hdr.th_offset[1], hdr.conf_size, hdr.log_size, 506233176Sae last); 507233176Sae continue; 508233176Sae } 509233176Sae if (found != 0 && memcmp(&db->ph, &hdr, sizeof(hdr)) != 0) { 510233176Sae LDM_DEBUG(0, "%s: private headers are not equal", 511233176Sae pp->name); 512233176Sae if (i > 1) { 513233176Sae /* 514233176Sae * We have different headers in the LDM. 515233176Sae * We can not trust this metadata. 516233176Sae */ 517233176Sae LDM_DEBUG(0, "%s: refuse LDM metadata", 518233176Sae pp->name); 519233176Sae return (EINVAL); 520233176Sae } 521233176Sae /* 522233176Sae * We already have read primary private header 523233176Sae * and it differs from this backup one. 524233176Sae * Prefer the backup header and save it. 525233176Sae */ 526233176Sae found = 0; 527233176Sae } 528233176Sae if (found == 0) 529233176Sae memcpy(&db->ph, &hdr, sizeof(hdr)); 530233176Sae found = 1; 531233176Sae } 532233176Sae if (found == 0) { 533233176Sae LDM_DEBUG(1, "%s: valid LDM private header not found", 534233176Sae pp->name); 535233176Sae return (ENXIO); 536233176Sae } 537233176Sae return (0); 538233176Sae} 539233176Sae 540233176Saestatic int 541233176Saeldm_gpt_check(struct ldm_db *db, struct g_consumer *cp) 542233176Sae{ 543233176Sae struct g_part_table *gpt; 544233176Sae struct g_part_entry *e; 545233176Sae struct g_consumer *cp2; 546233176Sae int error; 547233176Sae 548233176Sae cp2 = LIST_NEXT(cp, consumer); 549233176Sae g_topology_lock(); 550233176Sae gpt = cp->provider->geom->softc; 551233176Sae error = 0; 552233176Sae LIST_FOREACH(e, &gpt->gpt_entry, gpe_entry) { 553233176Sae if (cp->provider == e->gpe_pp) { 554233176Sae /* ms-ldm-metadata partition */ 555233176Sae if (e->gpe_start != db->ph.db_offset || 556233176Sae e->gpe_end != db->ph.db_offset + LDM_DB_SIZE - 1) 557233176Sae error++; 558233176Sae } else if (cp2->provider == e->gpe_pp) { 559233176Sae /* ms-ldm-data partition */ 560233176Sae if (e->gpe_start != db->ph.start || 561233176Sae e->gpe_end != db->ph.start + db->ph.size - 1) 562233176Sae error++; 563233176Sae } 564233176Sae if (error != 0) { 565233176Sae LDM_DEBUG(0, "%s: GPT partition %d boundaries " 566233176Sae "do not match with the LDM metadata", 567233176Sae e->gpe_pp->name, e->gpe_index); 568233176Sae error = ENXIO; 569233176Sae break; 570233176Sae } 571233176Sae } 572233176Sae g_topology_unlock(); 573233176Sae return (error); 574233176Sae} 575233176Sae 576233176Saestatic int 577233176Saeldm_tochdr_check(struct ldm_db *db, struct g_consumer *cp) 578233176Sae{ 579233176Sae struct g_provider *pp; 580233176Sae struct ldm_tochdr hdr; 581233176Sae uint64_t offset, conf_size, log_size; 582233176Sae int error, found, i; 583233176Sae u_char *buf; 584233176Sae 585233176Sae pp = cp->provider; 586233176Sae for (i = 0, found = 0; i < LDM_TH_COUNT; i++) { 587233176Sae offset = db->ph.db_offset + db->ph.th_offset[i]; 588233176Sae buf = g_read_data(cp, 589233176Sae offset * pp->sectorsize, pp->sectorsize, &error); 590233176Sae if (buf == NULL) { 591233176Sae LDM_DEBUG(1, "%s: failed to read TOC header " 592233176Sae "at LBA %ju", pp->name, (uintmax_t)offset); 593233176Sae continue; 594233176Sae } 595233176Sae if (memcmp(buf, LDM_TH_SIGN, strlen(LDM_TH_SIGN)) != 0 || 596233176Sae memcmp(buf + LDM_TH_NAME1_OFF, LDM_TH_NAME1, 597233176Sae strlen(LDM_TH_NAME1)) != 0 || 598233176Sae memcmp(buf + LDM_TH_NAME2_OFF, LDM_TH_NAME2, 599233176Sae strlen(LDM_TH_NAME2)) != 0) { 600233176Sae LDM_DEBUG(1, "%s: failed to parse TOC header " 601233176Sae "at LBA %ju", pp->name, (uintmax_t)offset); 602233176Sae LDM_DUMP(buf, pp->sectorsize); 603233176Sae g_free(buf); 604233176Sae continue; 605233176Sae } 606233176Sae hdr.conf_offset = be64dec(buf + LDM_TH_CONF_OFF); 607233176Sae hdr.log_offset = be64dec(buf + LDM_TH_LOG_OFF); 608233176Sae conf_size = be64dec(buf + LDM_TH_CONFSIZE_OFF); 609233176Sae log_size = be64dec(buf + LDM_TH_LOGSIZE_OFF); 610233176Sae if (conf_size != db->ph.conf_size || 611233176Sae hdr.conf_offset + conf_size >= LDM_DB_SIZE || 612233176Sae log_size != db->ph.log_size || 613233176Sae hdr.log_offset + log_size >= LDM_DB_SIZE) { 614233176Sae LDM_DEBUG(1, "%s: invalid values in the " 615233176Sae "TOC header at LBA %ju", pp->name, 616233176Sae (uintmax_t)offset); 617233176Sae LDM_DUMP(buf, pp->sectorsize); 618233176Sae g_free(buf); 619233176Sae continue; 620233176Sae } 621233176Sae g_free(buf); 622233176Sae if (found == 0) 623233176Sae memcpy(&db->th, &hdr, sizeof(hdr)); 624233176Sae found = 1; 625233176Sae } 626233176Sae if (found == 0) { 627233176Sae LDM_DEBUG(0, "%s: valid LDM TOC header not found.", 628233176Sae pp->name); 629233176Sae return (ENXIO); 630233176Sae } 631233176Sae return (0); 632233176Sae} 633233176Sae 634233176Saestatic int 635233176Saeldm_vmdbhdr_check(struct ldm_db *db, struct g_consumer *cp) 636233176Sae{ 637233176Sae struct g_provider *pp; 638233176Sae struct uuid dg_guid; 639233176Sae uint64_t offset; 640233176Sae uint32_t version; 641233176Sae int error; 642233176Sae u_char *buf; 643233176Sae 644233176Sae pp = cp->provider; 645233176Sae offset = db->ph.db_offset + db->th.conf_offset; 646233176Sae buf = g_read_data(cp, offset * pp->sectorsize, pp->sectorsize, 647233176Sae &error); 648233176Sae if (buf == NULL) { 649233176Sae LDM_DEBUG(0, "%s: failed to read VMDB header at " 650233176Sae "LBA %ju", pp->name, (uintmax_t)offset); 651233176Sae return (error); 652233176Sae } 653233176Sae if (memcmp(buf, LDM_VMDB_SIGN, strlen(LDM_VMDB_SIGN)) != 0) { 654233176Sae g_free(buf); 655233176Sae LDM_DEBUG(0, "%s: failed to parse VMDB header at " 656233176Sae "LBA %ju", pp->name, (uintmax_t)offset); 657233176Sae return (ENXIO); 658233176Sae } 659233176Sae /* Check version. */ 660233176Sae version = be32dec(buf + LDM_DB_VERSION_OFF); 661233176Sae if (version != 0x4000A) { 662233176Sae g_free(buf); 663233176Sae LDM_DEBUG(0, "%s: unsupported VMDB version %u.%u", 664233176Sae pp->name, version >> 16, version & 0xFFFF); 665233176Sae return (ENXIO); 666233176Sae } 667233176Sae /* 668233176Sae * Check VMDB update status: 669233176Sae * 1 - in a consistent state; 670233176Sae * 2 - in a creation phase; 671233176Sae * 3 - in a deletion phase; 672233176Sae */ 673233176Sae if (be16dec(buf + LDM_DB_STATUS_OFF) != 1) { 674233176Sae g_free(buf); 675233176Sae LDM_DEBUG(0, "%s: VMDB is not in a consistent state", 676233176Sae pp->name); 677233176Sae return (ENXIO); 678233176Sae } 679233176Sae db->dh.last_seq = be32dec(buf + LDM_DB_LASTSEQ_OFF); 680233176Sae db->dh.size = be32dec(buf + LDM_DB_SIZE_OFF); 681233176Sae error = parse_uuid(buf + LDM_DB_DGGUID_OFF, &dg_guid); 682233176Sae /* Compare disk group name and guid from VMDB and private headers */ 683233176Sae if (error != 0 || db->dh.size == 0 || 684233176Sae pp->sectorsize % db->dh.size != 0 || 685233176Sae strncmp(buf + LDM_DB_DGNAME_OFF, db->ph.dg_name, 31) != 0 || 686233176Sae memcmp(&dg_guid, &db->ph.dg_guid, sizeof(dg_guid)) != 0 || 687233176Sae db->dh.size * db->dh.last_seq > 688233176Sae db->ph.conf_size * pp->sectorsize) { 689233176Sae LDM_DEBUG(0, "%s: invalid values in the VMDB header", 690233176Sae pp->name); 691233176Sae LDM_DUMP(buf, pp->sectorsize); 692233176Sae g_free(buf); 693233176Sae return (EINVAL); 694233176Sae } 695233176Sae g_free(buf); 696233176Sae return (0); 697233176Sae} 698233176Sae 699233176Saestatic int 700233176Saeldm_xvblk_handle(struct ldm_db *db, struct ldm_vblkhdr *vh, const u_char *p) 701233176Sae{ 702233176Sae struct ldm_xvblk *blk; 703233176Sae size_t size; 704233176Sae 705233176Sae size = db->dh.size - 16; 706233176Sae LIST_FOREACH(blk, &db->xvblks, entry) 707233176Sae if (blk->group == vh->group) 708233176Sae break; 709233176Sae if (blk == NULL) { 710233176Sae blk = g_malloc(sizeof(*blk), M_WAITOK | M_ZERO); 711233176Sae blk->group = vh->group; 712233176Sae blk->size = size * vh->count + 16; 713233176Sae blk->data = g_malloc(blk->size, M_WAITOK | M_ZERO); 714233176Sae blk->map = 0xFF << vh->count; 715233176Sae LIST_INSERT_HEAD(&db->xvblks, blk, entry); 716233176Sae } 717233176Sae if ((blk->map & (1 << vh->index)) != 0) { 718233176Sae /* Block with given index has been already saved. */ 719233176Sae return (EINVAL); 720233176Sae } 721233176Sae /* Copy the data block to the place related to index. */ 722233176Sae memcpy(blk->data + size * vh->index + 16, p + 16, size); 723233176Sae blk->map |= 1 << vh->index; 724233176Sae return (0); 725233176Sae} 726233176Sae 727233176Sae/* Read the variable-width numeric field and return new offset */ 728233176Saestatic int 729233176Saeldm_vnum_get(const u_char *buf, int offset, uint64_t *result, size_t range) 730233176Sae{ 731233176Sae uint64_t num; 732233176Sae uint8_t len; 733233176Sae 734233176Sae len = buf[offset++]; 735233176Sae if (len > sizeof(uint64_t) || len + offset >= range) 736233176Sae return (-1); 737233176Sae for (num = 0; len > 0; len--) 738233176Sae num = (num << 8) | buf[offset++]; 739233176Sae *result = num; 740233176Sae return (offset); 741233176Sae} 742233176Sae 743233176Sae/* Read the variable-width string and return new offset */ 744233176Saestatic int 745233176Saeldm_vstr_get(const u_char *buf, int offset, u_char *result, 746233176Sae size_t maxlen, size_t range) 747233176Sae{ 748233176Sae uint8_t len; 749233176Sae 750233176Sae len = buf[offset++]; 751233176Sae if (len >= maxlen || len + offset >= range) 752233176Sae return (-1); 753233176Sae memcpy(result, buf + offset, len); 754233176Sae result[len] = '\0'; 755233176Sae return (offset + len); 756233176Sae} 757233176Sae 758233176Sae/* Just skip the variable-width variable and return new offset */ 759233176Saestatic int 760233176Saeldm_vparm_skip(const u_char *buf, int offset, size_t range) 761233176Sae{ 762233176Sae uint8_t len; 763233176Sae 764233176Sae len = buf[offset++]; 765233176Sae if (offset + len >= range) 766233176Sae return (-1); 767233176Sae 768233176Sae return (offset + len); 769233176Sae} 770233176Sae 771233176Saestatic int 772233176Saeldm_vblk_handle(struct ldm_db *db, const u_char *p, size_t size) 773233176Sae{ 774233176Sae struct ldm_vblk *blk; 775233176Sae struct ldm_volume *volume, *last; 776233176Sae const char *errstr; 777233176Sae u_char vstr[64]; 778233176Sae int error, offset; 779233176Sae 780233176Sae blk = g_malloc(sizeof(*blk), M_WAITOK | M_ZERO); 781233176Sae blk->type = p[LDM_VBLK_TYPE_OFF]; 782233176Sae offset = ldm_vnum_get(p, LDM_VBLK_OID_OFF, &blk->u.id, size); 783233176Sae if (offset < 0) { 784233176Sae errstr = "object id"; 785233176Sae goto fail; 786233176Sae } 787233176Sae offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size); 788233176Sae if (offset < 0) { 789233176Sae errstr = "object name"; 790233176Sae goto fail; 791233176Sae } 792233176Sae switch (blk->type) { 793233176Sae /* 794233176Sae * Component VBLK fields: 795233176Sae * Offset Size Description 796233176Sae * ------------+-------+------------------------ 797233176Sae * 0x18+ PS volume state 798233176Sae * 0x18+5 PN component children count 799233176Sae * 0x1D+16 PN parent's volume object id 800233176Sae * 0x2D+1 PN stripe size 801233176Sae */ 802233176Sae case LDM_VBLK_T_COMPONENT: 803233176Sae offset = ldm_vparm_skip(p, offset, size); 804233176Sae if (offset < 0) { 805233176Sae errstr = "volume state"; 806233176Sae goto fail; 807233176Sae } 808233176Sae offset = ldm_vparm_skip(p, offset + 5, size); 809233176Sae if (offset < 0) { 810233176Sae errstr = "children count"; 811233176Sae goto fail; 812233176Sae } 813233176Sae offset = ldm_vnum_get(p, offset + 16, 814233176Sae &blk->u.comp.vol_id, size); 815233176Sae if (offset < 0) { 816233176Sae errstr = "volume id"; 817233176Sae goto fail; 818233176Sae } 819233176Sae break; 820233176Sae /* 821233176Sae * Partition VBLK fields: 822233176Sae * Offset Size Description 823233176Sae * ------------+-------+------------------------ 824233176Sae * 0x18+12 8 partition start offset 825233176Sae * 0x18+20 8 volume offset 826233176Sae * 0x18+28 PN partition size 827233176Sae * 0x34+ PN parent's component object id 828233176Sae * 0x34+ PN disk's object id 829233176Sae */ 830233176Sae case LDM_VBLK_T_PARTITION: 831233176Sae if (offset + 28 >= size) { 832233176Sae errstr = "too small buffer"; 833233176Sae goto fail; 834233176Sae } 835233176Sae blk->u.part.start = be64dec(p + offset + 12); 836233176Sae blk->u.part.offset = be64dec(p + offset + 20); 837233176Sae offset = ldm_vnum_get(p, offset + 28, &blk->u.part.size, size); 838233176Sae if (offset < 0) { 839233176Sae errstr = "partition size"; 840233176Sae goto fail; 841233176Sae } 842233176Sae offset = ldm_vnum_get(p, offset, &blk->u.part.comp_id, size); 843233176Sae if (offset < 0) { 844233176Sae errstr = "component id"; 845233176Sae goto fail; 846233176Sae } 847233176Sae offset = ldm_vnum_get(p, offset, &blk->u.part.disk_id, size); 848233176Sae if (offset < 0) { 849233176Sae errstr = "disk id"; 850233176Sae goto fail; 851233176Sae } 852233176Sae break; 853233176Sae /* 854233176Sae * Disk VBLK fields: 855233176Sae * Offset Size Description 856233176Sae * ------------+-------+------------------------ 857233176Sae * 0x18+ PS disk GUID 858233176Sae */ 859233176Sae case LDM_VBLK_T_DISK: 860233176Sae errstr = "disk guid"; 861233176Sae offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size); 862233176Sae if (offset < 0) 863233176Sae goto fail; 864233176Sae error = parse_uuid(vstr, &blk->u.disk.guid); 865233176Sae if (error != 0) 866233176Sae goto fail; 867233176Sae LIST_INSERT_HEAD(&db->disks, &blk->u.disk, entry); 868233176Sae break; 869233176Sae /* 870233176Sae * Disk group VBLK fields: 871233176Sae * Offset Size Description 872233176Sae * ------------+-------+------------------------ 873233176Sae * 0x18+ PS disk group GUID 874233176Sae */ 875233176Sae case LDM_VBLK_T_DISKGROUP: 876233176Sae#if 0 877233176Sae strncpy(blk->u.disk_group.name, vstr, 878233176Sae sizeof(blk->u.disk_group.name)); 879233176Sae offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size); 880233176Sae if (offset < 0) { 881233176Sae errstr = "disk group guid"; 882233176Sae goto fail; 883233176Sae } 884233176Sae error = parse_uuid(name, &blk->u.disk_group.guid); 885233176Sae if (error != 0) { 886233176Sae errstr = "disk group guid"; 887233176Sae goto fail; 888233176Sae } 889233176Sae LIST_INSERT_HEAD(&db->groups, &blk->u.disk_group, entry); 890233176Sae#endif 891233176Sae break; 892233176Sae /* 893233176Sae * Disk VBLK fields: 894233176Sae * Offset Size Description 895233176Sae * ------------+-------+------------------------ 896233176Sae * 0x18+ 16 disk GUID 897233176Sae */ 898233176Sae case LDM_VBLK_T_DISK4: 899233176Sae be_uuid_dec(p + offset, &blk->u.disk.guid); 900233176Sae LIST_INSERT_HEAD(&db->disks, &blk->u.disk, entry); 901233176Sae break; 902233176Sae /* 903233176Sae * Disk group VBLK fields: 904233176Sae * Offset Size Description 905233176Sae * ------------+-------+------------------------ 906233176Sae * 0x18+ 16 disk GUID 907233176Sae */ 908233176Sae case LDM_VBLK_T_DISKGROUP4: 909233176Sae#if 0 910233176Sae strncpy(blk->u.disk_group.name, vstr, 911233176Sae sizeof(blk->u.disk_group.name)); 912233176Sae be_uuid_dec(p + offset, &blk->u.disk.guid); 913233176Sae LIST_INSERT_HEAD(&db->groups, &blk->u.disk_group, entry); 914233176Sae#endif 915233176Sae break; 916233176Sae /* 917233176Sae * Volume VBLK fields: 918233176Sae * Offset Size Description 919233176Sae * ------------+-------+------------------------ 920233176Sae * 0x18+ PS volume type 921233176Sae * 0x18+ PS unknown 922233176Sae * 0x18+ 14(S) volume state 923233176Sae * 0x18+16 1 volume number 924233176Sae * 0x18+21 PN volume children count 925233176Sae * 0x2D+16 PN volume size 926233176Sae * 0x3D+4 1 partition type 927233176Sae */ 928233176Sae case LDM_VBLK_T_VOLUME: 929233176Sae offset = ldm_vparm_skip(p, offset, size); 930233176Sae if (offset < 0) { 931233176Sae errstr = "volume type"; 932233176Sae goto fail; 933233176Sae } 934233176Sae offset = ldm_vparm_skip(p, offset, size); 935233176Sae if (offset < 0) { 936233176Sae errstr = "unknown param"; 937233176Sae goto fail; 938233176Sae } 939233176Sae if (offset + 21 >= size) { 940233176Sae errstr = "too small buffer"; 941233176Sae goto fail; 942233176Sae } 943233176Sae blk->u.vol.number = p[offset + 16]; 944233176Sae offset = ldm_vparm_skip(p, offset + 21, size); 945233176Sae if (offset < 0) { 946233176Sae errstr = "children count"; 947233176Sae goto fail; 948233176Sae } 949233176Sae offset = ldm_vnum_get(p, offset + 16, &blk->u.vol.size, size); 950233176Sae if (offset < 0) { 951233176Sae errstr = "volume size"; 952233176Sae goto fail; 953233176Sae } 954233176Sae if (offset + 4 >= size) { 955233176Sae errstr = "too small buffer"; 956233176Sae goto fail; 957233176Sae } 958233176Sae blk->u.vol.part_type = p[offset + 4]; 959233176Sae /* keep volumes ordered by volume number */ 960233176Sae last = NULL; 961233176Sae LIST_FOREACH(volume, &db->volumes, entry) { 962233176Sae if (volume->number > blk->u.vol.number) 963233176Sae break; 964233176Sae last = volume; 965233176Sae } 966233176Sae if (last != NULL) 967233176Sae LIST_INSERT_AFTER(last, &blk->u.vol, entry); 968233176Sae else 969233176Sae LIST_INSERT_HEAD(&db->volumes, &blk->u.vol, entry); 970233176Sae break; 971233176Sae default: 972233176Sae LDM_DEBUG(1, "unknown VBLK type 0x%02x\n", blk->type); 973233176Sae LDM_DUMP(p, size); 974233176Sae } 975233176Sae LIST_INSERT_HEAD(&db->vblks, blk, entry); 976233176Sae return (0); 977233176Saefail: 978233176Sae LDM_DEBUG(0, "failed to parse '%s' in VBLK of type 0x%02x\n", 979233176Sae errstr, blk->type); 980233176Sae LDM_DUMP(p, size); 981233176Sae g_free(blk); 982233176Sae return (EINVAL); 983233176Sae} 984233176Sae 985233176Saestatic void 986233176Saeldm_vmdb_free(struct ldm_db *db) 987233176Sae{ 988233176Sae struct ldm_vblk *vblk; 989233176Sae struct ldm_xvblk *xvblk; 990233176Sae 991233176Sae while (!LIST_EMPTY(&db->xvblks)) { 992233176Sae xvblk = LIST_FIRST(&db->xvblks); 993233176Sae LIST_REMOVE(xvblk, entry); 994233176Sae g_free(xvblk->data); 995233176Sae g_free(xvblk); 996233176Sae } 997233176Sae while (!LIST_EMPTY(&db->vblks)) { 998233176Sae vblk = LIST_FIRST(&db->vblks); 999233176Sae LIST_REMOVE(vblk, entry); 1000233176Sae g_free(vblk); 1001233176Sae } 1002233176Sae} 1003233176Sae 1004233176Saestatic int 1005233176Saeldm_vmdb_parse(struct ldm_db *db, struct g_consumer *cp) 1006233176Sae{ 1007233176Sae struct g_provider *pp; 1008233176Sae struct ldm_vblk *vblk; 1009233176Sae struct ldm_xvblk *xvblk; 1010233176Sae struct ldm_volume *volume; 1011233176Sae struct ldm_component *comp; 1012233176Sae struct ldm_vblkhdr vh; 1013233176Sae u_char *buf, *p; 1014233176Sae size_t size, n, sectors; 1015233176Sae uint64_t offset; 1016233176Sae int error; 1017233176Sae 1018233176Sae pp = cp->provider; 1019233176Sae size = (db->dh.last_seq * db->dh.size + 1020233176Sae pp->sectorsize - 1) / pp->sectorsize; 1021233176Sae size -= 1; /* one sector takes vmdb header */ 1022233176Sae for (n = 0; n < size; n += MAXPHYS / pp->sectorsize) { 1023233176Sae offset = db->ph.db_offset + db->th.conf_offset + n + 1; 1024233176Sae sectors = (size - n) > (MAXPHYS / pp->sectorsize) ? 1025233176Sae MAXPHYS / pp->sectorsize: size - n; 1026233176Sae /* read VBLKs */ 1027233176Sae buf = g_read_data(cp, offset * pp->sectorsize, 1028233176Sae sectors * pp->sectorsize, &error); 1029233176Sae if (buf == NULL) { 1030233176Sae LDM_DEBUG(0, "%s: failed to read VBLK\n", 1031233176Sae pp->name); 1032233176Sae goto fail; 1033233176Sae } 1034233176Sae for (p = buf; p < buf + sectors * pp->sectorsize; 1035233176Sae p += db->dh.size) { 1036233176Sae if (memcmp(p, LDM_VBLK_SIGN, 1037233176Sae strlen(LDM_VBLK_SIGN)) != 0) { 1038233176Sae LDM_DEBUG(0, "%s: no VBLK signature\n", 1039233176Sae pp->name); 1040233176Sae LDM_DUMP(p, db->dh.size); 1041233176Sae goto fail; 1042233176Sae } 1043233176Sae vh.seq = be32dec(p + LDM_VBLK_SEQ_OFF); 1044233176Sae vh.group = be32dec(p + LDM_VBLK_GROUP_OFF); 1045233176Sae /* skip empty blocks */ 1046233176Sae if (vh.seq == 0 || vh.group == 0) 1047233176Sae continue; 1048233176Sae vh.index = be16dec(p + LDM_VBLK_INDEX_OFF); 1049233176Sae vh.count = be16dec(p + LDM_VBLK_COUNT_OFF); 1050233176Sae if (vh.count == 0 || vh.count > 4 || 1051233176Sae vh.seq > db->dh.last_seq) { 1052233176Sae LDM_DEBUG(0, "%s: invalid values " 1053233176Sae "in the VBLK header\n", pp->name); 1054233176Sae LDM_DUMP(p, db->dh.size); 1055233176Sae goto fail; 1056233176Sae } 1057233176Sae if (vh.count > 1) { 1058233176Sae error = ldm_xvblk_handle(db, &vh, p); 1059233176Sae if (error != 0) { 1060233176Sae LDM_DEBUG(0, "%s: xVBLK " 1061233176Sae "is corrupted\n", pp->name); 1062233176Sae LDM_DUMP(p, db->dh.size); 1063233176Sae goto fail; 1064233176Sae } 1065233176Sae continue; 1066233176Sae } 1067233176Sae if (be16dec(p + 16) != 0) 1068233176Sae LDM_DEBUG(1, "%s: VBLK update" 1069233176Sae " status is %u\n", pp->name, 1070233176Sae be16dec(p + 16)); 1071233176Sae error = ldm_vblk_handle(db, p, db->dh.size); 1072233176Sae if (error != 0) 1073233176Sae goto fail; 1074233176Sae } 1075233176Sae g_free(buf); 1076233176Sae buf = NULL; 1077233176Sae } 1078233176Sae /* Parse xVBLKs */ 1079233176Sae while (!LIST_EMPTY(&db->xvblks)) { 1080233176Sae xvblk = LIST_FIRST(&db->xvblks); 1081233176Sae if (xvblk->map == 0xFF) { 1082233176Sae error = ldm_vblk_handle(db, xvblk->data, xvblk->size); 1083233176Sae if (error != 0) 1084233176Sae goto fail; 1085233176Sae } else { 1086233176Sae LDM_DEBUG(0, "%s: incomplete or corrupt " 1087233176Sae "xVBLK found\n", pp->name); 1088233176Sae goto fail; 1089233176Sae } 1090233176Sae LIST_REMOVE(xvblk, entry); 1091233176Sae g_free(xvblk->data); 1092233176Sae g_free(xvblk); 1093233176Sae } 1094233176Sae /* construct all VBLKs relations */ 1095233176Sae LIST_FOREACH(volume, &db->volumes, entry) { 1096233176Sae LIST_FOREACH(vblk, &db->vblks, entry) 1097233176Sae if (vblk->type == LDM_VBLK_T_COMPONENT && 1098233176Sae vblk->u.comp.vol_id == volume->id) { 1099233176Sae LIST_INSERT_HEAD(&volume->components, 1100233176Sae &vblk->u.comp, entry); 1101233176Sae volume->count++; 1102233176Sae } 1103233176Sae LIST_FOREACH(comp, &volume->components, entry) 1104233176Sae LIST_FOREACH(vblk, &db->vblks, entry) 1105233176Sae if (vblk->type == LDM_VBLK_T_PARTITION && 1106233176Sae vblk->u.part.comp_id == comp->id) { 1107233176Sae LIST_INSERT_HEAD(&comp->partitions, 1108233176Sae &vblk->u.part, entry); 1109233176Sae comp->count++; 1110233176Sae } 1111233176Sae } 1112233176Sae return (0); 1113233176Saefail: 1114233176Sae ldm_vmdb_free(db); 1115233176Sae g_free(buf); 1116233176Sae return (ENXIO); 1117233176Sae} 1118233176Sae 1119233176Saestatic int 1120233176Saeg_part_ldm_add(struct g_part_table *basetable, struct g_part_entry *baseentry, 1121233176Sae struct g_part_parms *gpp) 1122233176Sae{ 1123233176Sae 1124233176Sae return (ENOSYS); 1125233176Sae} 1126233176Sae 1127233176Saestatic int 1128233176Saeg_part_ldm_bootcode(struct g_part_table *basetable, struct g_part_parms *gpp) 1129233176Sae{ 1130233176Sae 1131233176Sae return (ENOSYS); 1132233176Sae} 1133233176Sae 1134233176Saestatic int 1135233176Saeg_part_ldm_create(struct g_part_table *basetable, struct g_part_parms *gpp) 1136233176Sae{ 1137233176Sae 1138233176Sae return (ENOSYS); 1139233176Sae} 1140233176Sae 1141233176Saestatic int 1142233176Saeg_part_ldm_destroy(struct g_part_table *basetable, struct g_part_parms *gpp) 1143233176Sae{ 1144233176Sae struct g_part_ldm_table *table; 1145233176Sae struct g_provider *pp; 1146233176Sae 1147233176Sae table = (struct g_part_ldm_table *)basetable; 1148233176Sae /* 1149233176Sae * To destroy LDM on a disk partitioned with GPT we should delete 1150233176Sae * ms-ldm-metadata partition, but we can't do this via standard 1151233176Sae * GEOM_PART method. 1152233176Sae */ 1153233176Sae if (table->is_gpt) 1154233176Sae return (ENOSYS); 1155233176Sae pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider; 1156233176Sae /* 1157233176Sae * To destroy LDM we should wipe MBR, first private header and 1158233176Sae * backup private headers. 1159233176Sae */ 1160233176Sae basetable->gpt_smhead = (1 << ldm_ph_off[0]) | 1; 1161233176Sae /* 1162233176Sae * Don't touch last backup private header when LDM database is 1163233176Sae * not located in the last 1MByte area. 1164233176Sae * XXX: can't remove all blocks. 1165233176Sae */ 1166233176Sae if (table->db_offset + LDM_DB_SIZE == 1167233176Sae pp->mediasize / pp->sectorsize) 1168233176Sae basetable->gpt_smtail = 1; 1169233176Sae return (0); 1170233176Sae} 1171233176Sae 1172233176Saestatic void 1173233176Saeg_part_ldm_dumpconf(struct g_part_table *basetable, 1174233176Sae struct g_part_entry *baseentry, struct sbuf *sb, const char *indent) 1175233176Sae{ 1176233176Sae struct g_part_ldm_entry *entry; 1177233176Sae 1178233176Sae entry = (struct g_part_ldm_entry *)baseentry; 1179233176Sae if (indent == NULL) { 1180233176Sae /* conftxt: libdisk compatibility */ 1181233176Sae sbuf_printf(sb, " xs LDM xt %u", entry->type); 1182233176Sae } else if (entry != NULL) { 1183233176Sae /* confxml: partition entry information */ 1184233176Sae sbuf_printf(sb, "%s<rawtype>%u</rawtype>\n", indent, 1185233176Sae entry->type); 1186233176Sae } else { 1187233176Sae /* confxml: scheme information */ 1188233176Sae } 1189233176Sae} 1190233176Sae 1191233176Saestatic int 1192233176Saeg_part_ldm_dumpto(struct g_part_table *table, struct g_part_entry *baseentry) 1193233176Sae{ 1194233176Sae 1195233176Sae return (0); 1196233176Sae} 1197233176Sae 1198233176Saestatic int 1199233176Saeg_part_ldm_modify(struct g_part_table *basetable, 1200233176Sae struct g_part_entry *baseentry, struct g_part_parms *gpp) 1201233176Sae{ 1202233176Sae 1203233176Sae return (ENOSYS); 1204233176Sae} 1205233176Sae 1206233176Saestatic const char * 1207233176Saeg_part_ldm_name(struct g_part_table *table, struct g_part_entry *baseentry, 1208233176Sae char *buf, size_t bufsz) 1209233176Sae{ 1210233176Sae 1211233176Sae snprintf(buf, bufsz, "s%d", baseentry->gpe_index); 1212233176Sae return (buf); 1213233176Sae} 1214233176Sae 1215233176Saestatic int 1216233176Saeldm_gpt_probe(struct g_part_table *basetable, struct g_consumer *cp) 1217233176Sae{ 1218233176Sae struct g_part_ldm_table *table; 1219233176Sae struct g_part_table *gpt; 1220233176Sae struct g_part_entry *entry; 1221233176Sae struct g_consumer *cp2; 1222233176Sae struct gpt_ent *part; 1223233176Sae u_char *buf; 1224233176Sae int error; 1225233176Sae 1226233176Sae /* 1227233176Sae * XXX: We use some knowlege about GEOM_PART_GPT internal 1228233176Sae * structures, but it is easier than parse GPT by himself. 1229233176Sae */ 1230233176Sae g_topology_lock(); 1231233176Sae gpt = cp->provider->geom->softc; 1232233176Sae LIST_FOREACH(entry, &gpt->gpt_entry, gpe_entry) { 1233233176Sae part = (struct gpt_ent *)(entry + 1); 1234233176Sae /* Search ms-ldm-metadata partition */ 1235233176Sae if (memcmp(&part->ent_type, 1236233176Sae &gpt_uuid_ms_ldm_metadata, sizeof(struct uuid)) != 0 || 1237233176Sae entry->gpe_end - entry->gpe_start < LDM_DB_SIZE - 1) 1238233176Sae continue; 1239233176Sae 1240233176Sae /* Create new consumer and attach it to metadata partition */ 1241233176Sae cp2 = g_new_consumer(cp->geom); 1242233176Sae error = g_attach(cp2, entry->gpe_pp); 1243233176Sae if (error != 0) { 1244233176Sae g_destroy_consumer(cp2); 1245233176Sae g_topology_unlock(); 1246233176Sae return (ENXIO); 1247233176Sae } 1248233176Sae error = g_access(cp2, 1, 0, 0); 1249233176Sae if (error != 0) { 1250233176Sae g_detach(cp2); 1251233176Sae g_destroy_consumer(cp2); 1252233176Sae g_topology_unlock(); 1253233176Sae return (ENXIO); 1254233176Sae } 1255233176Sae g_topology_unlock(); 1256233176Sae 1257233176Sae LDM_DEBUG(2, "%s: LDM metadata partition %s found in the GPT", 1258233176Sae cp->provider->name, cp2->provider->name); 1259233176Sae /* Read the LDM private header */ 1260233176Sae buf = ldm_privhdr_read(cp2, 1261233176Sae ldm_ph_off[LDM_PH_GPTINDEX] * cp2->provider->sectorsize, 1262233176Sae &error); 1263233176Sae if (buf != NULL) { 1264233176Sae table = (struct g_part_ldm_table *)basetable; 1265233176Sae table->is_gpt = 1; 1266233176Sae g_free(buf); 1267233176Sae return (G_PART_PROBE_PRI_HIGH); 1268233176Sae } 1269233176Sae 1270233176Sae /* second consumer is no longer needed. */ 1271233176Sae g_topology_lock(); 1272233176Sae g_access(cp2, -1, 0, 0); 1273233176Sae g_detach(cp2); 1274233176Sae g_destroy_consumer(cp2); 1275233176Sae break; 1276233176Sae } 1277233176Sae g_topology_unlock(); 1278233176Sae return (ENXIO); 1279233176Sae} 1280233176Sae 1281233176Saestatic int 1282233176Saeg_part_ldm_probe(struct g_part_table *basetable, struct g_consumer *cp) 1283233176Sae{ 1284233176Sae struct g_provider *pp; 1285233176Sae u_char *buf, type[64]; 1286233176Sae int error, idx; 1287233176Sae 1288233176Sae 1289233176Sae pp = cp->provider; 1290233176Sae if (pp->sectorsize != 512) 1291233176Sae return (ENXIO); 1292233176Sae 1293233176Sae error = g_getattr("PART::scheme", cp, &type); 1294233176Sae if (error == 0 && strcmp(type, "GPT") == 0) { 1295233176Sae if (g_getattr("PART::type", cp, &type) != 0 || 1296233176Sae strcmp(type, "ms-ldm-data") != 0) 1297233176Sae return (ENXIO); 1298233176Sae error = ldm_gpt_probe(basetable, cp); 1299233176Sae return (error); 1300233176Sae } 1301233176Sae 1302233176Sae if (basetable->gpt_depth != 0) 1303233176Sae return (ENXIO); 1304233176Sae 1305233176Sae /* LDM has 1M metadata area */ 1306233176Sae if (pp->mediasize <= 1024 * 1024) 1307233176Sae return (ENOSPC); 1308233176Sae 1309233176Sae /* Check that there's a MBR */ 1310233176Sae buf = g_read_data(cp, 0, pp->sectorsize, &error); 1311233176Sae if (buf == NULL) 1312233176Sae return (error); 1313233176Sae 1314233176Sae if (le16dec(buf + DOSMAGICOFFSET) != DOSMAGIC) { 1315233176Sae g_free(buf); 1316233176Sae return (ENXIO); 1317233176Sae } 1318233176Sae error = ENXIO; 1319233176Sae /* Check that we have LDM partitions in the MBR */ 1320233176Sae for (idx = 0; idx < NDOSPART && error != 0; idx++) { 1321233176Sae if (buf[DOSPARTOFF + idx * DOSPARTSIZE + 4] == DOSPTYP_LDM) 1322233176Sae error = 0; 1323233176Sae } 1324233176Sae g_free(buf); 1325233176Sae if (error == 0) { 1326233176Sae LDM_DEBUG(2, "%s: LDM data partitions found in MBR", 1327233176Sae pp->name); 1328233176Sae /* Read the LDM private header */ 1329233176Sae buf = ldm_privhdr_read(cp, 1330233176Sae ldm_ph_off[LDM_PH_MBRINDEX] * pp->sectorsize, &error); 1331233176Sae if (buf == NULL) 1332233176Sae return (error); 1333233176Sae g_free(buf); 1334233176Sae return (G_PART_PROBE_PRI_HIGH); 1335233176Sae } 1336233176Sae return (error); 1337233176Sae} 1338233176Sae 1339233176Saestatic int 1340233176Saeg_part_ldm_read(struct g_part_table *basetable, struct g_consumer *cp) 1341233176Sae{ 1342233176Sae struct g_part_ldm_table *table; 1343233176Sae struct g_part_ldm_entry *entry; 1344233176Sae struct g_consumer *cp2; 1345233176Sae struct ldm_component *comp; 1346233176Sae struct ldm_partition *part; 1347233176Sae struct ldm_volume *vol; 1348233176Sae struct ldm_disk *disk; 1349233176Sae struct ldm_db db; 1350233176Sae int error, index, skipped; 1351233176Sae 1352233176Sae table = (struct g_part_ldm_table *)basetable; 1353233176Sae memset(&db, 0, sizeof(db)); 1354233176Sae cp2 = cp; /* ms-ldm-data */ 1355233176Sae if (table->is_gpt) 1356233176Sae cp = LIST_FIRST(&cp->geom->consumer); /* ms-ldm-metadata */ 1357233176Sae /* Read and parse LDM private headers. */ 1358233176Sae error = ldm_privhdr_check(&db, cp, table->is_gpt); 1359233176Sae if (error != 0) 1360233651Sae goto gpt_cleanup; 1361233176Sae basetable->gpt_first = table->is_gpt ? 0: db.ph.start; 1362233176Sae basetable->gpt_last = basetable->gpt_first + db.ph.size - 1; 1363233176Sae table->db_offset = db.ph.db_offset; 1364233176Sae /* Make additional checks for GPT */ 1365233176Sae if (table->is_gpt) { 1366233651Sae error = ldm_gpt_check(&db, cp); 1367233651Sae if (error != 0) 1368233651Sae goto gpt_cleanup; 1369233176Sae /* 1370233176Sae * Now we should reset database offset to zero, because our 1371233176Sae * consumer cp is attached to the ms-ldm-metadata partition 1372233176Sae * and we don't need add db_offset to read from it. 1373233176Sae */ 1374233176Sae db.ph.db_offset = 0; 1375233176Sae } 1376233176Sae /* Read and parse LDM TOC headers. */ 1377233176Sae error = ldm_tochdr_check(&db, cp); 1378233176Sae if (error != 0) 1379233651Sae goto gpt_cleanup; 1380233176Sae /* Read and parse LDM VMDB header. */ 1381233176Sae error = ldm_vmdbhdr_check(&db, cp); 1382233176Sae if (error != 0) 1383233651Sae goto gpt_cleanup; 1384233176Sae error = ldm_vmdb_parse(&db, cp); 1385233651Sae /* 1386233651Sae * For the GPT case we must detach and destroy 1387233651Sae * second consumer before return. 1388233651Sae */ 1389233651Saegpt_cleanup: 1390233651Sae if (table->is_gpt) { 1391233651Sae g_topology_lock(); 1392233651Sae g_access(cp, -1, 0, 0); 1393233651Sae g_detach(cp); 1394233651Sae g_destroy_consumer(cp); 1395233651Sae g_topology_unlock(); 1396233651Sae cp = cp2; 1397233651Sae } 1398233176Sae if (error != 0) 1399233176Sae return (error); 1400233176Sae /* Search current disk in the disk list. */ 1401233176Sae LIST_FOREACH(disk, &db.disks, entry) 1402233176Sae if (memcmp(&disk->guid, &db.ph.disk_guid, 1403233176Sae sizeof(struct uuid)) == 0) 1404233176Sae break; 1405233176Sae if (disk == NULL) { 1406233176Sae LDM_DEBUG(1, "%s: no LDM volumes on this disk", 1407233176Sae cp->provider->name); 1408233176Sae ldm_vmdb_free(&db); 1409233176Sae return (ENXIO); 1410233176Sae } 1411233176Sae index = 1; 1412233176Sae LIST_FOREACH(vol, &db.volumes, entry) { 1413233176Sae LIST_FOREACH(comp, &vol->components, entry) { 1414233176Sae /* Skip volumes from different disks. */ 1415233176Sae part = LIST_FIRST(&comp->partitions); 1416233176Sae if (part->disk_id != disk->id) 1417233176Sae continue; 1418233176Sae skipped = 0; 1419233176Sae /* We don't support spanned and striped volumes. */ 1420233176Sae if (comp->count > 1 || part->offset != 0) { 1421233176Sae LDM_DEBUG(1, "%s: LDM volume component " 1422233176Sae "%ju has %u partitions. Skipped", 1423233176Sae cp->provider->name, (uintmax_t)comp->id, 1424233176Sae comp->count); 1425233176Sae skipped = 1; 1426233176Sae } 1427233176Sae /* 1428233176Sae * Allow mirrored volumes only when they are explicitly 1429233176Sae * allowed with kern.geom.part.ldm.show_mirrors=1. 1430233176Sae */ 1431233176Sae if (vol->count > 1 && show_mirrors == 0) { 1432233176Sae LDM_DEBUG(1, "%s: LDM volume %ju has %u " 1433233176Sae "components. Skipped", 1434233176Sae cp->provider->name, (uintmax_t)vol->id, 1435233176Sae vol->count); 1436233176Sae skipped = 1; 1437233176Sae } 1438233176Sae entry = (struct g_part_ldm_entry *)g_part_new_entry( 1439233176Sae basetable, index++, 1440233176Sae basetable->gpt_first + part->start, 1441233176Sae basetable->gpt_first + part->start + 1442233176Sae part->size - 1); 1443233176Sae /* 1444233176Sae * Mark skipped partition as ms-ldm-data partition. 1445233176Sae * We do not support them, but it is better to show 1446233176Sae * that we have something there, than just show 1447233176Sae * free space. 1448233176Sae */ 1449233176Sae if (skipped == 0) 1450233176Sae entry->type = vol->part_type; 1451233176Sae else 1452233176Sae entry->type = DOSPTYP_LDM; 1453233176Sae LDM_DEBUG(1, "%s: new volume id: %ju, start: %ju," 1454233176Sae " end: %ju, type: 0x%02x\n", cp->provider->name, 1455233176Sae (uintmax_t)part->id,(uintmax_t)part->start + 1456233176Sae basetable->gpt_first, (uintmax_t)part->start + 1457233176Sae part->size + basetable->gpt_first - 1, 1458233176Sae vol->part_type); 1459233176Sae } 1460233176Sae } 1461233176Sae ldm_vmdb_free(&db); 1462233176Sae return (error); 1463233176Sae} 1464233176Sae 1465233176Saestatic const char * 1466233176Saeg_part_ldm_type(struct g_part_table *basetable, struct g_part_entry *baseentry, 1467233176Sae char *buf, size_t bufsz) 1468233176Sae{ 1469233176Sae struct g_part_ldm_entry *entry; 1470233176Sae int i; 1471233176Sae 1472233176Sae entry = (struct g_part_ldm_entry *)baseentry; 1473233176Sae for (i = 0; 1474233176Sae i < sizeof(ldm_alias_match) / sizeof(ldm_alias_match[0]); i++) { 1475233176Sae if (ldm_alias_match[i].typ == entry->type) 1476233176Sae return (g_part_alias_name(ldm_alias_match[i].alias)); 1477233176Sae } 1478233176Sae snprintf(buf, bufsz, "!%d", entry->type); 1479233176Sae return (buf); 1480233176Sae} 1481233176Sae 1482233176Saestatic int 1483233176Saeg_part_ldm_write(struct g_part_table *basetable, struct g_consumer *cp) 1484233176Sae{ 1485233176Sae 1486233176Sae return (ENOSYS); 1487233176Sae} 1488