1/*-
2 * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD$");
29
30#include <sys/param.h>
31#include <sys/bio.h>
32#include <sys/diskmbr.h>
33#include <sys/endian.h>
34#include <sys/gpt.h>
35#include <sys/kernel.h>
36#include <sys/kobj.h>
37#include <sys/limits.h>
38#include <sys/lock.h>
39#include <sys/malloc.h>
40#include <sys/mutex.h>
41#include <sys/queue.h>
42#include <sys/sbuf.h>
43#include <sys/systm.h>
44#include <sys/sysctl.h>
45#include <sys/uuid.h>
46#include <geom/geom.h>
47#include <geom/part/g_part.h>
48
49#include "g_part_if.h"
50
51FEATURE(geom_part_ldm, "GEOM partitioning class for LDM support");
52
53SYSCTL_DECL(_kern_geom_part);
54static SYSCTL_NODE(_kern_geom_part, OID_AUTO, ldm, CTLFLAG_RW, 0,
55    "GEOM_PART_LDM Logical Disk Manager");
56
57static u_int ldm_debug = 0;
58TUNABLE_INT("kern.geom.part.ldm.debug", &ldm_debug);
59SYSCTL_UINT(_kern_geom_part_ldm, OID_AUTO, debug,
60    CTLFLAG_RW | CTLFLAG_TUN, &ldm_debug, 0, "Debug level");
61
62/*
63 * This allows access to mirrored LDM volumes. Since we do not
64 * doing mirroring here, it is not enabled by default.
65 */
66static u_int show_mirrors = 0;
67TUNABLE_INT("kern.geom.part.ldm.show_mirrors", &show_mirrors);
68SYSCTL_UINT(_kern_geom_part_ldm, OID_AUTO, show_mirrors,
69    CTLFLAG_RW | CTLFLAG_TUN, &show_mirrors, 0, "Show mirrored volumes");
70
71#define	LDM_DEBUG(lvl, fmt, ...)	do {				\
72	if (ldm_debug >= (lvl)) {					\
73		printf("GEOM_PART: " fmt "\n", __VA_ARGS__);		\
74	}								\
75} while (0)
76#define	LDM_DUMP(buf, size)	do {					\
77	if (ldm_debug > 1) {						\
78		hexdump(buf, size, NULL, 0);				\
79	}								\
80} while (0)
81
82/*
83 * There are internal representations of LDM structures.
84 *
85 * We do not keep all fields of on-disk structures, only most useful.
86 * All numbers in an on-disk structures are in big-endian format.
87 */
88
89/*
90 * Private header is 512 bytes long. There are three copies on each disk.
91 * Offset and sizes are in sectors. Location of each copy:
92 * - the first offset is relative to the disk start;
93 * - the second and third offset are relative to the LDM database start.
94 *
95 * On a disk partitioned with GPT, the LDM has not first private header.
96 */
97#define	LDM_PH_MBRINDEX		0
98#define	LDM_PH_GPTINDEX		2
99static const uint64_t	ldm_ph_off[] = {6, 1856, 2047};
100#define	LDM_VERSION_2K		0x2000b
101#define	LDM_VERSION_VISTA	0x2000c
102#define	LDM_PH_VERSION_OFF	0x00c
103#define	LDM_PH_DISKGUID_OFF	0x030
104#define	LDM_PH_DGGUID_OFF	0x0b0
105#define	LDM_PH_DGNAME_OFF	0x0f0
106#define	LDM_PH_START_OFF	0x11b
107#define	LDM_PH_SIZE_OFF		0x123
108#define	LDM_PH_DB_OFF		0x12b
109#define	LDM_PH_DBSIZE_OFF	0x133
110#define	LDM_PH_TH1_OFF		0x13b
111#define	LDM_PH_TH2_OFF		0x143
112#define	LDM_PH_CONFSIZE_OFF	0x153
113#define	LDM_PH_LOGSIZE_OFF	0x15b
114#define	LDM_PH_SIGN		"PRIVHEAD"
115struct ldm_privhdr {
116	struct uuid	disk_guid;
117	struct uuid	dg_guid;
118	u_char		dg_name[32];
119	uint64_t	start;		/* logical disk start */
120	uint64_t	size;		/* logical disk size */
121	uint64_t	db_offset;	/* LDM database start */
122#define	LDM_DB_SIZE		2048
123	uint64_t	db_size;	/* LDM database size */
124#define	LDM_TH_COUNT		2
125	uint64_t	th_offset[LDM_TH_COUNT]; /* TOC header offsets */
126	uint64_t	conf_size;	/* configuration size */
127	uint64_t	log_size;	/* size of log */
128};
129
130/*
131 * Table of contents header is 512 bytes long.
132 * There are two identical copies at offsets from the private header.
133 * Offsets are relative to the LDM database start.
134 */
135#define	LDM_TH_SIGN		"TOCBLOCK"
136#define	LDM_TH_NAME1		"config"
137#define	LDM_TH_NAME2		"log"
138#define	LDM_TH_NAME1_OFF	0x024
139#define	LDM_TH_CONF_OFF		0x02e
140#define	LDM_TH_CONFSIZE_OFF	0x036
141#define	LDM_TH_NAME2_OFF	0x046
142#define	LDM_TH_LOG_OFF		0x050
143#define	LDM_TH_LOGSIZE_OFF	0x058
144struct ldm_tochdr {
145	uint64_t	conf_offset;	/* configuration offset */
146	uint64_t	log_offset;	/* log offset */
147};
148
149/*
150 * LDM database header is 512 bytes long.
151 */
152#define	LDM_VMDB_SIGN		"VMDB"
153#define	LDM_DB_LASTSEQ_OFF	0x004
154#define	LDM_DB_SIZE_OFF		0x008
155#define	LDM_DB_STATUS_OFF	0x010
156#define	LDM_DB_VERSION_OFF	0x012
157#define	LDM_DB_DGNAME_OFF	0x016
158#define	LDM_DB_DGGUID_OFF	0x035
159struct ldm_vmdbhdr {
160	uint32_t	last_seq;	/* sequence number of last VBLK */
161	uint32_t	size;		/* size of VBLK */
162};
163
164/*
165 * The LDM database configuration section contains VMDB header and
166 * many VBLKs. Each VBLK represents a disk group, disk partition,
167 * component or volume.
168 *
169 * The most interesting for us are volumes, they are represents
170 * partitions in the GEOM_PART meaning. But volume VBLK does not
171 * contain all information needed to create GEOM provider. And we
172 * should get this information from the related VBLK. This is how
173 * VBLK releated:
174 *	Volumes <- Components <- Partitions -> Disks
175 *
176 * One volume can contain several components. In this case LDM
177 * does mirroring of volume data to each component.
178 *
179 * Also each component can contain several partitions (spanned or
180 * striped volumes).
181 */
182
183struct ldm_component {
184	uint64_t	id;		/* object id */
185	uint64_t	vol_id;		/* parent volume object id */
186
187	int		count;
188	LIST_HEAD(, ldm_partition) partitions;
189	LIST_ENTRY(ldm_component) entry;
190};
191
192struct ldm_volume {
193	uint64_t	id;		/* object id */
194	uint64_t	size;		/* volume size */
195	uint8_t		number;		/* used for ordering */
196	uint8_t		part_type;	/* partition type */
197
198	int		count;
199	LIST_HEAD(, ldm_component) components;
200	LIST_ENTRY(ldm_volume)	entry;
201};
202
203struct ldm_disk {
204	uint64_t	id;		/* object id */
205	struct uuid	guid;		/* disk guid */
206
207	LIST_ENTRY(ldm_disk) entry;
208};
209
210#if 0
211struct ldm_disk_group {
212	uint64_t	id;		/* object id */
213	struct uuid	guid;		/* disk group guid */
214	u_char		name[32];	/* disk group name */
215
216	LIST_ENTRY(ldm_disk_group) entry;
217};
218#endif
219
220struct ldm_partition {
221	uint64_t	id;		/* object id */
222	uint64_t	disk_id;	/* disk object id */
223	uint64_t	comp_id;	/* parent component object id */
224	uint64_t	start;		/* offset relative to disk start */
225	uint64_t	offset;		/* offset for spanned volumes */
226	uint64_t	size;		/* partition size */
227
228	LIST_ENTRY(ldm_partition) entry;
229};
230
231/*
232 * Each VBLK is 128 bytes long and has standard 16 bytes header.
233 * Some of VBLK's fields are fixed size, but others has variable size.
234 * Fields with variable size are prefixed with one byte length marker.
235 * Some fields are strings and also can have fixed size and variable.
236 * Strings with fixed size are NULL-terminated, others are not.
237 * All VBLKs have same several first fields:
238 *	Offset		Size		Description
239 *	---------------+---------------+--------------------------
240 *	0x00		16		standard VBLK header
241 *	0x10		2		update status
242 *	0x13		1		VBLK type
243 *	0x18		PS		object id
244 *	0x18+		PN		object name
245 *
246 *  o Offset 0x18+ means '0x18 + length of all variable-width fields'
247 *  o 'P' in size column means 'prefixed' (variable-width),
248 *    'S' - string, 'N' - number.
249 */
250#define	LDM_VBLK_SIGN		"VBLK"
251#define	LDM_VBLK_SEQ_OFF	0x04
252#define	LDM_VBLK_GROUP_OFF	0x08
253#define	LDM_VBLK_INDEX_OFF	0x0c
254#define	LDM_VBLK_COUNT_OFF	0x0e
255#define	LDM_VBLK_TYPE_OFF	0x13
256#define	LDM_VBLK_OID_OFF	0x18
257struct ldm_vblkhdr {
258	uint32_t	seq;		/* sequence number */
259	uint32_t	group;		/* group number */
260	uint16_t	index;		/* index in the group */
261	uint16_t	count;		/* number of entries in the group */
262};
263
264#define	LDM_VBLK_T_COMPONENT	0x32
265#define	LDM_VBLK_T_PARTITION	0x33
266#define	LDM_VBLK_T_DISK		0x34
267#define	LDM_VBLK_T_DISKGROUP	0x35
268#define	LDM_VBLK_T_DISK4	0x44
269#define	LDM_VBLK_T_DISKGROUP4	0x45
270#define	LDM_VBLK_T_VOLUME	0x51
271struct ldm_vblk {
272	uint8_t		type;		/* VBLK type */
273	union {
274		uint64_t		id;
275		struct ldm_volume	vol;
276		struct ldm_component	comp;
277		struct ldm_disk		disk;
278		struct ldm_partition	part;
279#if 0
280		struct ldm_disk_group	disk_group;
281#endif
282	} u;
283	LIST_ENTRY(ldm_vblk) entry;
284};
285
286/*
287 * Some VBLKs contains a bit more data than can fit into 128 bytes. These
288 * VBLKs are called eXtended VBLK. Before parsing, the data from these VBLK
289 * should be placed into continuous memory buffer. We can determine xVBLK
290 * by the count field in the standard VBLK header (count > 1).
291 */
292struct ldm_xvblk {
293	uint32_t	group;		/* xVBLK group number */
294	uint32_t	size;		/* the total size of xVBLK */
295	uint8_t		map;		/* bitmask of currently saved VBLKs */
296	u_char		*data;		/* xVBLK data */
297
298	LIST_ENTRY(ldm_xvblk)	entry;
299};
300
301/* The internal representation of LDM database. */
302struct ldm_db {
303	struct ldm_privhdr		ph;	/* private header */
304	struct ldm_tochdr		th;	/* TOC header */
305	struct ldm_vmdbhdr		dh;	/* VMDB header */
306
307	LIST_HEAD(, ldm_volume)		volumes;
308	LIST_HEAD(, ldm_disk)		disks;
309	LIST_HEAD(, ldm_vblk)		vblks;
310	LIST_HEAD(, ldm_xvblk)		xvblks;
311};
312
313static struct uuid gpt_uuid_ms_ldm_metadata = GPT_ENT_TYPE_MS_LDM_METADATA;
314
315struct g_part_ldm_table {
316	struct g_part_table	base;
317	uint64_t		db_offset;
318	int			is_gpt;
319};
320struct g_part_ldm_entry {
321	struct g_part_entry	base;
322	uint8_t			type;
323};
324
325static int g_part_ldm_add(struct g_part_table *, struct g_part_entry *,
326    struct g_part_parms *);
327static int g_part_ldm_bootcode(struct g_part_table *, struct g_part_parms *);
328static int g_part_ldm_create(struct g_part_table *, struct g_part_parms *);
329static int g_part_ldm_destroy(struct g_part_table *, struct g_part_parms *);
330static void g_part_ldm_dumpconf(struct g_part_table *, struct g_part_entry *,
331    struct sbuf *, const char *);
332static int g_part_ldm_dumpto(struct g_part_table *, struct g_part_entry *);
333static int g_part_ldm_modify(struct g_part_table *, struct g_part_entry *,
334    struct g_part_parms *);
335static const char *g_part_ldm_name(struct g_part_table *, struct g_part_entry *,
336    char *, size_t);
337static int g_part_ldm_probe(struct g_part_table *, struct g_consumer *);
338static int g_part_ldm_read(struct g_part_table *, struct g_consumer *);
339static const char *g_part_ldm_type(struct g_part_table *, struct g_part_entry *,
340    char *, size_t);
341static int g_part_ldm_write(struct g_part_table *, struct g_consumer *);
342
343static kobj_method_t g_part_ldm_methods[] = {
344	KOBJMETHOD(g_part_add,		g_part_ldm_add),
345	KOBJMETHOD(g_part_bootcode,	g_part_ldm_bootcode),
346	KOBJMETHOD(g_part_create,	g_part_ldm_create),
347	KOBJMETHOD(g_part_destroy,	g_part_ldm_destroy),
348	KOBJMETHOD(g_part_dumpconf,	g_part_ldm_dumpconf),
349	KOBJMETHOD(g_part_dumpto,	g_part_ldm_dumpto),
350	KOBJMETHOD(g_part_modify,	g_part_ldm_modify),
351	KOBJMETHOD(g_part_name,		g_part_ldm_name),
352	KOBJMETHOD(g_part_probe,	g_part_ldm_probe),
353	KOBJMETHOD(g_part_read,		g_part_ldm_read),
354	KOBJMETHOD(g_part_type,		g_part_ldm_type),
355	KOBJMETHOD(g_part_write,	g_part_ldm_write),
356	{ 0, 0 }
357};
358
359static struct g_part_scheme g_part_ldm_scheme = {
360	"LDM",
361	g_part_ldm_methods,
362	sizeof(struct g_part_ldm_table),
363	.gps_entrysz = sizeof(struct g_part_ldm_entry)
364};
365G_PART_SCHEME_DECLARE(g_part_ldm);
366
367static struct g_part_ldm_alias {
368	u_char		typ;
369	int		alias;
370} ldm_alias_match[] = {
371	{ DOSPTYP_NTFS,		G_PART_ALIAS_MS_NTFS },
372	{ DOSPTYP_FAT32,	G_PART_ALIAS_MS_FAT32 },
373	{ DOSPTYP_386BSD,	G_PART_ALIAS_FREEBSD },
374	{ DOSPTYP_LDM,		G_PART_ALIAS_MS_LDM_DATA },
375	{ DOSPTYP_LINSWP,	G_PART_ALIAS_LINUX_SWAP },
376	{ DOSPTYP_LINUX,	G_PART_ALIAS_LINUX_DATA },
377	{ DOSPTYP_LINLVM,	G_PART_ALIAS_LINUX_LVM },
378	{ DOSPTYP_LINRAID,	G_PART_ALIAS_LINUX_RAID },
379};
380
381static u_char*
382ldm_privhdr_read(struct g_consumer *cp, uint64_t off, int *error)
383{
384	struct g_provider *pp;
385	u_char *buf;
386
387	pp = cp->provider;
388	buf = g_read_data(cp, off, pp->sectorsize, error);
389	if (buf == NULL)
390		return (NULL);
391
392	if (memcmp(buf, LDM_PH_SIGN, strlen(LDM_PH_SIGN)) != 0) {
393		LDM_DEBUG(1, "%s: invalid LDM private header signature",
394		    pp->name);
395		g_free(buf);
396		buf = NULL;
397		*error = EINVAL;
398	}
399	return (buf);
400}
401
402static int
403ldm_privhdr_parse(struct g_consumer *cp, struct ldm_privhdr *hdr,
404    const u_char *buf)
405{
406	uint32_t version;
407	int error;
408
409	memset(hdr, 0, sizeof(*hdr));
410	version = be32dec(buf + LDM_PH_VERSION_OFF);
411	if (version != LDM_VERSION_2K &&
412	    version != LDM_VERSION_VISTA) {
413		LDM_DEBUG(0, "%s: unsupported LDM version %u.%u",
414		    cp->provider->name, version >> 16,
415		    version & 0xFFFF);
416		return (ENXIO);
417	}
418	error = parse_uuid(buf + LDM_PH_DISKGUID_OFF, &hdr->disk_guid);
419	if (error != 0)
420		return (error);
421	error = parse_uuid(buf + LDM_PH_DGGUID_OFF, &hdr->dg_guid);
422	if (error != 0)
423		return (error);
424	strncpy(hdr->dg_name, buf + LDM_PH_DGNAME_OFF, sizeof(hdr->dg_name));
425	hdr->start = be64dec(buf + LDM_PH_START_OFF);
426	hdr->size = be64dec(buf + LDM_PH_SIZE_OFF);
427	hdr->db_offset = be64dec(buf + LDM_PH_DB_OFF);
428	hdr->db_size = be64dec(buf + LDM_PH_DBSIZE_OFF);
429	hdr->th_offset[0] = be64dec(buf + LDM_PH_TH1_OFF);
430	hdr->th_offset[1] = be64dec(buf + LDM_PH_TH2_OFF);
431	hdr->conf_size = be64dec(buf + LDM_PH_CONFSIZE_OFF);
432	hdr->log_size = be64dec(buf + LDM_PH_LOGSIZE_OFF);
433	return (0);
434}
435
436static int
437ldm_privhdr_check(struct ldm_db *db, struct g_consumer *cp, int is_gpt)
438{
439	struct g_consumer *cp2;
440	struct g_provider *pp;
441	struct ldm_privhdr hdr;
442	uint64_t offset, last;
443	int error, found, i;
444	u_char *buf;
445
446	pp = cp->provider;
447	if (is_gpt) {
448		/*
449		 * The last LBA is used in several checks below, for the
450		 * GPT case it should be calculated relative to the whole
451		 * disk.
452		 */
453		cp2 = LIST_FIRST(&pp->geom->consumer);
454		last =
455		    cp2->provider->mediasize / cp2->provider->sectorsize - 1;
456	} else
457		last = pp->mediasize / pp->sectorsize - 1;
458	for (found = 0, i = is_gpt;
459	    i < sizeof(ldm_ph_off) / sizeof(ldm_ph_off[0]); i++) {
460		offset = ldm_ph_off[i];
461		/*
462		 * In the GPT case consumer is attached to the LDM metadata
463		 * partition and we don't need add db_offset.
464		 */
465		if (!is_gpt)
466			offset += db->ph.db_offset;
467		if (i == LDM_PH_MBRINDEX) {
468			/*
469			 * Prepare to errors and setup new base offset
470			 * to read backup private headers. Assume that LDM
471			 * database is in the last 1Mbyte area.
472			 */
473			db->ph.db_offset = last - LDM_DB_SIZE;
474		}
475		buf = ldm_privhdr_read(cp, offset * pp->sectorsize, &error);
476		if (buf == NULL) {
477			LDM_DEBUG(1, "%s: failed to read private header "
478			    "%d at LBA %ju", pp->name, i, (uintmax_t)offset);
479			continue;
480		}
481		error = ldm_privhdr_parse(cp, &hdr, buf);
482		if (error != 0) {
483			LDM_DEBUG(1, "%s: failed to parse private "
484			    "header %d", pp->name, i);
485			LDM_DUMP(buf, pp->sectorsize);
486			g_free(buf);
487			continue;
488		}
489		g_free(buf);
490		if (hdr.start > last ||
491		    hdr.start + hdr.size - 1 > last ||
492		    (hdr.start + hdr.size - 1 > hdr.db_offset && !is_gpt) ||
493		    hdr.db_size != LDM_DB_SIZE ||
494		    hdr.db_offset + LDM_DB_SIZE - 1 > last ||
495		    hdr.th_offset[0] >= LDM_DB_SIZE ||
496		    hdr.th_offset[1] >= LDM_DB_SIZE ||
497		    hdr.conf_size + hdr.log_size >= LDM_DB_SIZE) {
498			LDM_DEBUG(1, "%s: invalid values in the "
499			    "private header %d", pp->name, i);
500			LDM_DEBUG(2, "%s: start: %jd, size: %jd, "
501			    "db_offset: %jd, db_size: %jd, th_offset0: %jd, "
502			    "th_offset1: %jd, conf_size: %jd, log_size: %jd, "
503			    "last: %jd", pp->name, hdr.start, hdr.size,
504			    hdr.db_offset, hdr.db_size, hdr.th_offset[0],
505			    hdr.th_offset[1], hdr.conf_size, hdr.log_size,
506			    last);
507			continue;
508		}
509		if (found != 0 && memcmp(&db->ph, &hdr, sizeof(hdr)) != 0) {
510			LDM_DEBUG(0, "%s: private headers are not equal",
511			    pp->name);
512			if (i > 1) {
513				/*
514				 * We have different headers in the LDM.
515				 * We can not trust this metadata.
516				 */
517				LDM_DEBUG(0, "%s: refuse LDM metadata",
518				    pp->name);
519				return (EINVAL);
520			}
521			/*
522			 * We already have read primary private header
523			 * and it differs from this backup one.
524			 * Prefer the backup header and save it.
525			 */
526			found = 0;
527		}
528		if (found == 0)
529			memcpy(&db->ph, &hdr, sizeof(hdr));
530		found = 1;
531	}
532	if (found == 0) {
533		LDM_DEBUG(1, "%s: valid LDM private header not found",
534		    pp->name);
535		return (ENXIO);
536	}
537	return (0);
538}
539
540static int
541ldm_gpt_check(struct ldm_db *db, struct g_consumer *cp)
542{
543	struct g_part_table *gpt;
544	struct g_part_entry *e;
545	struct g_consumer *cp2;
546	int error;
547
548	cp2 = LIST_NEXT(cp, consumer);
549	g_topology_lock();
550	gpt = cp->provider->geom->softc;
551	error = 0;
552	LIST_FOREACH(e, &gpt->gpt_entry, gpe_entry) {
553		if (cp->provider == e->gpe_pp) {
554			/* ms-ldm-metadata partition */
555			if (e->gpe_start != db->ph.db_offset ||
556			    e->gpe_end != db->ph.db_offset + LDM_DB_SIZE - 1)
557				error++;
558		} else if (cp2->provider == e->gpe_pp) {
559			/* ms-ldm-data partition */
560			if (e->gpe_start != db->ph.start ||
561			    e->gpe_end != db->ph.start + db->ph.size - 1)
562				error++;
563		}
564		if (error != 0) {
565			LDM_DEBUG(0, "%s: GPT partition %d boundaries "
566			    "do not match with the LDM metadata",
567			    e->gpe_pp->name, e->gpe_index);
568			error = ENXIO;
569			break;
570		}
571	}
572	g_topology_unlock();
573	return (error);
574}
575
576static int
577ldm_tochdr_check(struct ldm_db *db, struct g_consumer *cp)
578{
579	struct g_provider *pp;
580	struct ldm_tochdr hdr;
581	uint64_t offset, conf_size, log_size;
582	int error, found, i;
583	u_char *buf;
584
585	pp = cp->provider;
586	for (i = 0, found = 0; i < LDM_TH_COUNT; i++) {
587		offset = db->ph.db_offset + db->ph.th_offset[i];
588		buf = g_read_data(cp,
589		    offset * pp->sectorsize, pp->sectorsize, &error);
590		if (buf == NULL) {
591			LDM_DEBUG(1, "%s: failed to read TOC header "
592			    "at LBA %ju", pp->name, (uintmax_t)offset);
593			continue;
594		}
595		if (memcmp(buf, LDM_TH_SIGN, strlen(LDM_TH_SIGN)) != 0 ||
596		    memcmp(buf + LDM_TH_NAME1_OFF, LDM_TH_NAME1,
597		    strlen(LDM_TH_NAME1)) != 0 ||
598		    memcmp(buf + LDM_TH_NAME2_OFF, LDM_TH_NAME2,
599		    strlen(LDM_TH_NAME2)) != 0) {
600			LDM_DEBUG(1, "%s: failed to parse TOC header "
601			    "at LBA %ju", pp->name, (uintmax_t)offset);
602			LDM_DUMP(buf, pp->sectorsize);
603			g_free(buf);
604			continue;
605		}
606		hdr.conf_offset = be64dec(buf + LDM_TH_CONF_OFF);
607		hdr.log_offset = be64dec(buf + LDM_TH_LOG_OFF);
608		conf_size = be64dec(buf + LDM_TH_CONFSIZE_OFF);
609		log_size = be64dec(buf + LDM_TH_LOGSIZE_OFF);
610		if (conf_size != db->ph.conf_size ||
611		    hdr.conf_offset + conf_size >= LDM_DB_SIZE ||
612		    log_size != db->ph.log_size ||
613		    hdr.log_offset + log_size >= LDM_DB_SIZE) {
614			LDM_DEBUG(1, "%s: invalid values in the "
615			    "TOC header at LBA %ju", pp->name,
616			    (uintmax_t)offset);
617			LDM_DUMP(buf, pp->sectorsize);
618			g_free(buf);
619			continue;
620		}
621		g_free(buf);
622		if (found == 0)
623			memcpy(&db->th, &hdr, sizeof(hdr));
624		found = 1;
625	}
626	if (found == 0) {
627		LDM_DEBUG(0, "%s: valid LDM TOC header not found.",
628		    pp->name);
629		return (ENXIO);
630	}
631	return (0);
632}
633
634static int
635ldm_vmdbhdr_check(struct ldm_db *db, struct g_consumer *cp)
636{
637	struct g_provider *pp;
638	struct uuid dg_guid;
639	uint64_t offset;
640	uint32_t version;
641	int error;
642	u_char *buf;
643
644	pp = cp->provider;
645	offset = db->ph.db_offset + db->th.conf_offset;
646	buf = g_read_data(cp, offset * pp->sectorsize, pp->sectorsize,
647	    &error);
648	if (buf == NULL) {
649		LDM_DEBUG(0, "%s: failed to read VMDB header at "
650		    "LBA %ju", pp->name, (uintmax_t)offset);
651		return (error);
652	}
653	if (memcmp(buf, LDM_VMDB_SIGN, strlen(LDM_VMDB_SIGN)) != 0) {
654		g_free(buf);
655		LDM_DEBUG(0, "%s: failed to parse VMDB header at "
656		    "LBA %ju", pp->name, (uintmax_t)offset);
657		return (ENXIO);
658	}
659	/* Check version. */
660	version = be32dec(buf + LDM_DB_VERSION_OFF);
661	if (version != 0x4000A) {
662		g_free(buf);
663		LDM_DEBUG(0, "%s: unsupported VMDB version %u.%u",
664		    pp->name, version >> 16, version & 0xFFFF);
665		return (ENXIO);
666	}
667	/*
668	 * Check VMDB update status:
669	 *	1 - in a consistent state;
670	 *	2 - in a creation phase;
671	 *	3 - in a deletion phase;
672	 */
673	if (be16dec(buf + LDM_DB_STATUS_OFF) != 1) {
674		g_free(buf);
675		LDM_DEBUG(0, "%s: VMDB is not in a consistent state",
676		    pp->name);
677		return (ENXIO);
678	}
679	db->dh.last_seq = be32dec(buf + LDM_DB_LASTSEQ_OFF);
680	db->dh.size = be32dec(buf + LDM_DB_SIZE_OFF);
681	error = parse_uuid(buf + LDM_DB_DGGUID_OFF, &dg_guid);
682	/* Compare disk group name and guid from VMDB and private headers */
683	if (error != 0 || db->dh.size == 0 ||
684	    pp->sectorsize % db->dh.size != 0 ||
685	    strncmp(buf + LDM_DB_DGNAME_OFF, db->ph.dg_name, 31) != 0 ||
686	    memcmp(&dg_guid, &db->ph.dg_guid, sizeof(dg_guid)) != 0 ||
687	    db->dh.size * db->dh.last_seq >
688	    db->ph.conf_size * pp->sectorsize) {
689		LDM_DEBUG(0, "%s: invalid values in the VMDB header",
690		    pp->name);
691		LDM_DUMP(buf, pp->sectorsize);
692		g_free(buf);
693		return (EINVAL);
694	}
695	g_free(buf);
696	return (0);
697}
698
699static int
700ldm_xvblk_handle(struct ldm_db *db, struct ldm_vblkhdr *vh, const u_char *p)
701{
702	struct ldm_xvblk *blk;
703	size_t size;
704
705	size = db->dh.size - 16;
706	LIST_FOREACH(blk, &db->xvblks, entry)
707		if (blk->group == vh->group)
708			break;
709	if (blk == NULL) {
710		blk = g_malloc(sizeof(*blk), M_WAITOK | M_ZERO);
711		blk->group = vh->group;
712		blk->size = size * vh->count + 16;
713		blk->data = g_malloc(blk->size, M_WAITOK | M_ZERO);
714		blk->map = 0xFF << vh->count;
715		LIST_INSERT_HEAD(&db->xvblks, blk, entry);
716	}
717	if ((blk->map & (1 << vh->index)) != 0) {
718		/* Block with given index has been already saved. */
719		return (EINVAL);
720	}
721	/* Copy the data block to the place related to index. */
722	memcpy(blk->data + size * vh->index + 16, p + 16, size);
723	blk->map |= 1 << vh->index;
724	return (0);
725}
726
727/* Read the variable-width numeric field and return new offset */
728static int
729ldm_vnum_get(const u_char *buf, int offset, uint64_t *result, size_t range)
730{
731	uint64_t num;
732	uint8_t len;
733
734	len = buf[offset++];
735	if (len > sizeof(uint64_t) || len + offset >= range)
736		return (-1);
737	for (num = 0; len > 0; len--)
738		num = (num << 8) | buf[offset++];
739	*result = num;
740	return (offset);
741}
742
743/* Read the variable-width string and return new offset */
744static int
745ldm_vstr_get(const u_char *buf, int offset, u_char *result,
746    size_t maxlen, size_t range)
747{
748	uint8_t len;
749
750	len = buf[offset++];
751	if (len >= maxlen || len + offset >= range)
752		return (-1);
753	memcpy(result, buf + offset, len);
754	result[len] = '\0';
755	return (offset + len);
756}
757
758/* Just skip the variable-width variable and return new offset */
759static int
760ldm_vparm_skip(const u_char *buf, int offset, size_t range)
761{
762	uint8_t len;
763
764	len = buf[offset++];
765	if (offset + len >= range)
766		return (-1);
767
768	return (offset + len);
769}
770
771static int
772ldm_vblk_handle(struct ldm_db *db, const u_char *p, size_t size)
773{
774	struct ldm_vblk *blk;
775	struct ldm_volume *volume, *last;
776	const char *errstr;
777	u_char vstr[64];
778	int error, offset;
779
780	blk = g_malloc(sizeof(*blk), M_WAITOK | M_ZERO);
781	blk->type = p[LDM_VBLK_TYPE_OFF];
782	offset = ldm_vnum_get(p, LDM_VBLK_OID_OFF, &blk->u.id, size);
783	if (offset < 0) {
784		errstr = "object id";
785		goto fail;
786	}
787	offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
788	if (offset < 0) {
789		errstr = "object name";
790		goto fail;
791	}
792	switch (blk->type) {
793	/*
794	 * Component VBLK fields:
795	 * Offset	Size	Description
796	 * ------------+-------+------------------------
797	 *  0x18+	PS	volume state
798	 *  0x18+5	PN	component children count
799	 *  0x1D+16	PN	parent's volume object id
800	 *  0x2D+1	PN	stripe size
801	 */
802	case LDM_VBLK_T_COMPONENT:
803		offset = ldm_vparm_skip(p, offset, size);
804		if (offset < 0) {
805			errstr = "volume state";
806			goto fail;
807		}
808		offset = ldm_vparm_skip(p, offset + 5, size);
809		if (offset < 0) {
810			errstr = "children count";
811			goto fail;
812		}
813		offset = ldm_vnum_get(p, offset + 16,
814		    &blk->u.comp.vol_id, size);
815		if (offset < 0) {
816			errstr = "volume id";
817			goto fail;
818		}
819		break;
820	/*
821	 * Partition VBLK fields:
822	 * Offset	Size	Description
823	 * ------------+-------+------------------------
824	 *  0x18+12	8	partition start offset
825	 *  0x18+20	8	volume offset
826	 *  0x18+28	PN	partition size
827	 *  0x34+	PN	parent's component object id
828	 *  0x34+	PN	disk's object id
829	 */
830	case LDM_VBLK_T_PARTITION:
831		if (offset + 28 >= size) {
832			errstr = "too small buffer";
833			goto fail;
834		}
835		blk->u.part.start = be64dec(p + offset + 12);
836		blk->u.part.offset = be64dec(p + offset + 20);
837		offset = ldm_vnum_get(p, offset + 28, &blk->u.part.size, size);
838		if (offset < 0) {
839			errstr = "partition size";
840			goto fail;
841		}
842		offset = ldm_vnum_get(p, offset, &blk->u.part.comp_id, size);
843		if (offset < 0) {
844			errstr = "component id";
845			goto fail;
846		}
847		offset = ldm_vnum_get(p, offset, &blk->u.part.disk_id, size);
848		if (offset < 0) {
849			errstr = "disk id";
850			goto fail;
851		}
852		break;
853	/*
854	 * Disk VBLK fields:
855	 * Offset	Size	Description
856	 * ------------+-------+------------------------
857	 *  0x18+	PS	disk GUID
858	 */
859	case LDM_VBLK_T_DISK:
860		errstr = "disk guid";
861		offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
862		if (offset < 0)
863			goto fail;
864		error = parse_uuid(vstr, &blk->u.disk.guid);
865		if (error != 0)
866			goto fail;
867		LIST_INSERT_HEAD(&db->disks, &blk->u.disk, entry);
868		break;
869	/*
870	 * Disk group VBLK fields:
871	 * Offset	Size	Description
872	 * ------------+-------+------------------------
873	 *  0x18+	PS	disk group GUID
874	 */
875	case LDM_VBLK_T_DISKGROUP:
876#if 0
877		strncpy(blk->u.disk_group.name, vstr,
878		    sizeof(blk->u.disk_group.name));
879		offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
880		if (offset < 0) {
881			errstr = "disk group guid";
882			goto fail;
883		}
884		error = parse_uuid(name, &blk->u.disk_group.guid);
885		if (error != 0) {
886			errstr = "disk group guid";
887			goto fail;
888		}
889		LIST_INSERT_HEAD(&db->groups, &blk->u.disk_group, entry);
890#endif
891		break;
892	/*
893	 * Disk VBLK fields:
894	 * Offset	Size	Description
895	 * ------------+-------+------------------------
896	 *  0x18+	16	disk GUID
897	 */
898	case LDM_VBLK_T_DISK4:
899		be_uuid_dec(p + offset, &blk->u.disk.guid);
900		LIST_INSERT_HEAD(&db->disks, &blk->u.disk, entry);
901		break;
902	/*
903	 * Disk group VBLK fields:
904	 * Offset	Size	Description
905	 * ------------+-------+------------------------
906	 *  0x18+	16	disk GUID
907	 */
908	case LDM_VBLK_T_DISKGROUP4:
909#if 0
910		strncpy(blk->u.disk_group.name, vstr,
911		    sizeof(blk->u.disk_group.name));
912		be_uuid_dec(p + offset, &blk->u.disk.guid);
913		LIST_INSERT_HEAD(&db->groups, &blk->u.disk_group, entry);
914#endif
915		break;
916	/*
917	 * Volume VBLK fields:
918	 * Offset	Size	Description
919	 * ------------+-------+------------------------
920	 *  0x18+	PS	volume type
921	 *  0x18+	PS	unknown
922	 *  0x18+	14(S)	volume state
923	 *  0x18+16	1	volume number
924	 *  0x18+21	PN	volume children count
925	 *  0x2D+16	PN	volume size
926	 *  0x3D+4	1	partition type
927	 */
928	case LDM_VBLK_T_VOLUME:
929		offset = ldm_vparm_skip(p, offset, size);
930		if (offset < 0) {
931			errstr = "volume type";
932			goto fail;
933		}
934		offset = ldm_vparm_skip(p, offset, size);
935		if (offset < 0) {
936			errstr = "unknown param";
937			goto fail;
938		}
939		if (offset + 21 >= size) {
940			errstr = "too small buffer";
941			goto fail;
942		}
943		blk->u.vol.number = p[offset + 16];
944		offset = ldm_vparm_skip(p, offset + 21, size);
945		if (offset < 0) {
946			errstr = "children count";
947			goto fail;
948		}
949		offset = ldm_vnum_get(p, offset + 16, &blk->u.vol.size, size);
950		if (offset < 0) {
951			errstr = "volume size";
952			goto fail;
953		}
954		if (offset + 4 >= size) {
955			errstr = "too small buffer";
956			goto fail;
957		}
958		blk->u.vol.part_type = p[offset + 4];
959		/* keep volumes ordered by volume number */
960		last = NULL;
961		LIST_FOREACH(volume, &db->volumes, entry) {
962			if (volume->number > blk->u.vol.number)
963				break;
964			last = volume;
965		}
966		if (last != NULL)
967			LIST_INSERT_AFTER(last, &blk->u.vol, entry);
968		else
969			LIST_INSERT_HEAD(&db->volumes, &blk->u.vol, entry);
970		break;
971	default:
972		LDM_DEBUG(1, "unknown VBLK type 0x%02x\n", blk->type);
973		LDM_DUMP(p, size);
974	}
975	LIST_INSERT_HEAD(&db->vblks, blk, entry);
976	return (0);
977fail:
978	LDM_DEBUG(0, "failed to parse '%s' in VBLK of type 0x%02x\n",
979	    errstr, blk->type);
980	LDM_DUMP(p, size);
981	g_free(blk);
982	return (EINVAL);
983}
984
985static void
986ldm_vmdb_free(struct ldm_db *db)
987{
988	struct ldm_vblk *vblk;
989	struct ldm_xvblk *xvblk;
990
991	while (!LIST_EMPTY(&db->xvblks)) {
992		xvblk = LIST_FIRST(&db->xvblks);
993		LIST_REMOVE(xvblk, entry);
994		g_free(xvblk->data);
995		g_free(xvblk);
996	}
997	while (!LIST_EMPTY(&db->vblks)) {
998		vblk = LIST_FIRST(&db->vblks);
999		LIST_REMOVE(vblk, entry);
1000		g_free(vblk);
1001	}
1002}
1003
1004static int
1005ldm_vmdb_parse(struct ldm_db *db, struct g_consumer *cp)
1006{
1007	struct g_provider *pp;
1008	struct ldm_vblk *vblk;
1009	struct ldm_xvblk *xvblk;
1010	struct ldm_volume *volume;
1011	struct ldm_component *comp;
1012	struct ldm_vblkhdr vh;
1013	u_char *buf, *p;
1014	size_t size, n, sectors;
1015	uint64_t offset;
1016	int error;
1017
1018	pp = cp->provider;
1019	size = (db->dh.last_seq * db->dh.size +
1020	    pp->sectorsize - 1) / pp->sectorsize;
1021	size -= 1; /* one sector takes vmdb header */
1022	for (n = 0; n < size; n += MAXPHYS / pp->sectorsize) {
1023		offset = db->ph.db_offset + db->th.conf_offset + n + 1;
1024		sectors = (size - n) > (MAXPHYS / pp->sectorsize) ?
1025		    MAXPHYS / pp->sectorsize: size - n;
1026		/* read VBLKs */
1027		buf = g_read_data(cp, offset * pp->sectorsize,
1028		    sectors * pp->sectorsize, &error);
1029		if (buf == NULL) {
1030			LDM_DEBUG(0, "%s: failed to read VBLK\n",
1031			    pp->name);
1032			goto fail;
1033		}
1034		for (p = buf; p < buf + sectors * pp->sectorsize;
1035		    p += db->dh.size) {
1036			if (memcmp(p, LDM_VBLK_SIGN,
1037			    strlen(LDM_VBLK_SIGN)) != 0) {
1038				LDM_DEBUG(0, "%s: no VBLK signature\n",
1039				    pp->name);
1040				LDM_DUMP(p, db->dh.size);
1041				goto fail;
1042			}
1043			vh.seq = be32dec(p + LDM_VBLK_SEQ_OFF);
1044			vh.group = be32dec(p + LDM_VBLK_GROUP_OFF);
1045			/* skip empty blocks */
1046			if (vh.seq == 0 || vh.group == 0)
1047				continue;
1048			vh.index = be16dec(p + LDM_VBLK_INDEX_OFF);
1049			vh.count = be16dec(p + LDM_VBLK_COUNT_OFF);
1050			if (vh.count == 0 || vh.count > 4 ||
1051			    vh.seq > db->dh.last_seq) {
1052				LDM_DEBUG(0, "%s: invalid values "
1053				    "in the VBLK header\n", pp->name);
1054				LDM_DUMP(p, db->dh.size);
1055				goto fail;
1056			}
1057			if (vh.count > 1) {
1058				error = ldm_xvblk_handle(db, &vh, p);
1059				if (error != 0) {
1060					LDM_DEBUG(0, "%s: xVBLK "
1061					    "is corrupted\n", pp->name);
1062					LDM_DUMP(p, db->dh.size);
1063					goto fail;
1064				}
1065				continue;
1066			}
1067			if (be16dec(p + 16) != 0)
1068				LDM_DEBUG(1, "%s: VBLK update"
1069				    " status is %u\n", pp->name,
1070				    be16dec(p + 16));
1071			error = ldm_vblk_handle(db, p, db->dh.size);
1072			if (error != 0)
1073				goto fail;
1074		}
1075		g_free(buf);
1076		buf = NULL;
1077	}
1078	/* Parse xVBLKs */
1079	while (!LIST_EMPTY(&db->xvblks)) {
1080		xvblk = LIST_FIRST(&db->xvblks);
1081		if (xvblk->map == 0xFF) {
1082			error = ldm_vblk_handle(db, xvblk->data, xvblk->size);
1083			if (error != 0)
1084				goto fail;
1085		} else {
1086			LDM_DEBUG(0, "%s: incomplete or corrupt "
1087			    "xVBLK found\n", pp->name);
1088			goto fail;
1089		}
1090		LIST_REMOVE(xvblk, entry);
1091		g_free(xvblk->data);
1092		g_free(xvblk);
1093	}
1094	/* construct all VBLKs relations */
1095	LIST_FOREACH(volume, &db->volumes, entry) {
1096		LIST_FOREACH(vblk, &db->vblks, entry)
1097			if (vblk->type == LDM_VBLK_T_COMPONENT &&
1098			    vblk->u.comp.vol_id == volume->id) {
1099				LIST_INSERT_HEAD(&volume->components,
1100				    &vblk->u.comp, entry);
1101				volume->count++;
1102			}
1103		LIST_FOREACH(comp, &volume->components, entry)
1104			LIST_FOREACH(vblk, &db->vblks, entry)
1105				if (vblk->type == LDM_VBLK_T_PARTITION &&
1106				    vblk->u.part.comp_id == comp->id) {
1107					LIST_INSERT_HEAD(&comp->partitions,
1108					    &vblk->u.part, entry);
1109					comp->count++;
1110				}
1111	}
1112	return (0);
1113fail:
1114	ldm_vmdb_free(db);
1115	g_free(buf);
1116	return (ENXIO);
1117}
1118
1119static int
1120g_part_ldm_add(struct g_part_table *basetable, struct g_part_entry *baseentry,
1121    struct g_part_parms *gpp)
1122{
1123
1124	return (ENOSYS);
1125}
1126
1127static int
1128g_part_ldm_bootcode(struct g_part_table *basetable, struct g_part_parms *gpp)
1129{
1130
1131	return (ENOSYS);
1132}
1133
1134static int
1135g_part_ldm_create(struct g_part_table *basetable, struct g_part_parms *gpp)
1136{
1137
1138	return (ENOSYS);
1139}
1140
1141static int
1142g_part_ldm_destroy(struct g_part_table *basetable, struct g_part_parms *gpp)
1143{
1144	struct g_part_ldm_table *table;
1145	struct g_provider *pp;
1146
1147	table = (struct g_part_ldm_table *)basetable;
1148	/*
1149	 * To destroy LDM on a disk partitioned with GPT we should delete
1150	 * ms-ldm-metadata partition, but we can't do this via standard
1151	 * GEOM_PART method.
1152	 */
1153	if (table->is_gpt)
1154		return (ENOSYS);
1155	pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
1156	/*
1157	 * To destroy LDM we should wipe MBR, first private header and
1158	 * backup private headers.
1159	 */
1160	basetable->gpt_smhead = (1 << ldm_ph_off[0]) | 1;
1161	/*
1162	 * Don't touch last backup private header when LDM database is
1163	 * not located in the last 1MByte area.
1164	 * XXX: can't remove all blocks.
1165	 */
1166	if (table->db_offset + LDM_DB_SIZE ==
1167	    pp->mediasize / pp->sectorsize)
1168		basetable->gpt_smtail = 1;
1169	return (0);
1170}
1171
1172static void
1173g_part_ldm_dumpconf(struct g_part_table *basetable,
1174    struct g_part_entry *baseentry, struct sbuf *sb, const char *indent)
1175{
1176	struct g_part_ldm_entry *entry;
1177
1178	entry = (struct g_part_ldm_entry *)baseentry;
1179	if (indent == NULL) {
1180		/* conftxt: libdisk compatibility */
1181		sbuf_printf(sb, " xs LDM xt %u", entry->type);
1182	} else if (entry != NULL) {
1183		/* confxml: partition entry information */
1184		sbuf_printf(sb, "%s<rawtype>%u</rawtype>\n", indent,
1185		    entry->type);
1186	} else {
1187		/* confxml: scheme information */
1188	}
1189}
1190
1191static int
1192g_part_ldm_dumpto(struct g_part_table *table, struct g_part_entry *baseentry)
1193{
1194
1195	return (0);
1196}
1197
1198static int
1199g_part_ldm_modify(struct g_part_table *basetable,
1200    struct g_part_entry *baseentry, struct g_part_parms *gpp)
1201{
1202
1203	return (ENOSYS);
1204}
1205
1206static const char *
1207g_part_ldm_name(struct g_part_table *table, struct g_part_entry *baseentry,
1208    char *buf, size_t bufsz)
1209{
1210
1211	snprintf(buf, bufsz, "s%d", baseentry->gpe_index);
1212	return (buf);
1213}
1214
1215static int
1216ldm_gpt_probe(struct g_part_table *basetable, struct g_consumer *cp)
1217{
1218	struct g_part_ldm_table *table;
1219	struct g_part_table *gpt;
1220	struct g_part_entry *entry;
1221	struct g_consumer *cp2;
1222	struct gpt_ent *part;
1223	u_char *buf;
1224	int error;
1225
1226	/*
1227	 * XXX: We use some knowlege about GEOM_PART_GPT internal
1228	 * structures, but it is easier than parse GPT by himself.
1229	 */
1230	g_topology_lock();
1231	gpt = cp->provider->geom->softc;
1232	LIST_FOREACH(entry, &gpt->gpt_entry, gpe_entry) {
1233		part = (struct gpt_ent *)(entry + 1);
1234		/* Search ms-ldm-metadata partition */
1235		if (memcmp(&part->ent_type,
1236		    &gpt_uuid_ms_ldm_metadata, sizeof(struct uuid)) != 0 ||
1237		    entry->gpe_end - entry->gpe_start < LDM_DB_SIZE - 1)
1238			continue;
1239
1240		/* Create new consumer and attach it to metadata partition */
1241		cp2 = g_new_consumer(cp->geom);
1242		error = g_attach(cp2, entry->gpe_pp);
1243		if (error != 0) {
1244			g_destroy_consumer(cp2);
1245			g_topology_unlock();
1246			return (ENXIO);
1247		}
1248		error = g_access(cp2, 1, 0, 0);
1249		if (error != 0) {
1250			g_detach(cp2);
1251			g_destroy_consumer(cp2);
1252			g_topology_unlock();
1253			return (ENXIO);
1254		}
1255		g_topology_unlock();
1256
1257		LDM_DEBUG(2, "%s: LDM metadata partition %s found in the GPT",
1258		    cp->provider->name, cp2->provider->name);
1259		/* Read the LDM private header */
1260		buf = ldm_privhdr_read(cp2,
1261		    ldm_ph_off[LDM_PH_GPTINDEX] * cp2->provider->sectorsize,
1262		    &error);
1263		if (buf != NULL) {
1264			table = (struct g_part_ldm_table *)basetable;
1265			table->is_gpt = 1;
1266			g_free(buf);
1267			return (G_PART_PROBE_PRI_HIGH);
1268		}
1269
1270		/* second consumer is no longer needed. */
1271		g_topology_lock();
1272		g_access(cp2, -1, 0, 0);
1273		g_detach(cp2);
1274		g_destroy_consumer(cp2);
1275		break;
1276	}
1277	g_topology_unlock();
1278	return (ENXIO);
1279}
1280
1281static int
1282g_part_ldm_probe(struct g_part_table *basetable, struct g_consumer *cp)
1283{
1284	struct g_provider *pp;
1285	u_char *buf, type[64];
1286	int error, idx;
1287
1288
1289	pp = cp->provider;
1290	if (pp->sectorsize != 512)
1291		return (ENXIO);
1292
1293	error = g_getattr("PART::scheme", cp, &type);
1294	if (error == 0 && strcmp(type, "GPT") == 0) {
1295		if (g_getattr("PART::type", cp, &type) != 0 ||
1296		    strcmp(type, "ms-ldm-data") != 0)
1297			return (ENXIO);
1298		error = ldm_gpt_probe(basetable, cp);
1299		return (error);
1300	}
1301
1302	if (basetable->gpt_depth != 0)
1303		return (ENXIO);
1304
1305	/* LDM has 1M metadata area */
1306	if (pp->mediasize <= 1024 * 1024)
1307		return (ENOSPC);
1308
1309	/* Check that there's a MBR */
1310	buf = g_read_data(cp, 0, pp->sectorsize, &error);
1311	if (buf == NULL)
1312		return (error);
1313
1314	if (le16dec(buf + DOSMAGICOFFSET) != DOSMAGIC) {
1315		g_free(buf);
1316		return (ENXIO);
1317	}
1318	error = ENXIO;
1319	/* Check that we have LDM partitions in the MBR */
1320	for (idx = 0; idx < NDOSPART && error != 0; idx++) {
1321		if (buf[DOSPARTOFF + idx * DOSPARTSIZE + 4] == DOSPTYP_LDM)
1322			error = 0;
1323	}
1324	g_free(buf);
1325	if (error == 0) {
1326		LDM_DEBUG(2, "%s: LDM data partitions found in MBR",
1327		    pp->name);
1328		/* Read the LDM private header */
1329		buf = ldm_privhdr_read(cp,
1330		    ldm_ph_off[LDM_PH_MBRINDEX] * pp->sectorsize, &error);
1331		if (buf == NULL)
1332			return (error);
1333		g_free(buf);
1334		return (G_PART_PROBE_PRI_HIGH);
1335	}
1336	return (error);
1337}
1338
1339static int
1340g_part_ldm_read(struct g_part_table *basetable, struct g_consumer *cp)
1341{
1342	struct g_part_ldm_table *table;
1343	struct g_part_ldm_entry *entry;
1344	struct g_consumer *cp2;
1345	struct ldm_component *comp;
1346	struct ldm_partition *part;
1347	struct ldm_volume *vol;
1348	struct ldm_disk *disk;
1349	struct ldm_db db;
1350	int error, index, skipped;
1351
1352	table = (struct g_part_ldm_table *)basetable;
1353	memset(&db, 0, sizeof(db));
1354	cp2 = cp;					/* ms-ldm-data */
1355	if (table->is_gpt)
1356		cp = LIST_FIRST(&cp->geom->consumer);	/* ms-ldm-metadata */
1357	/* Read and parse LDM private headers. */
1358	error = ldm_privhdr_check(&db, cp, table->is_gpt);
1359	if (error != 0)
1360		goto gpt_cleanup;
1361	basetable->gpt_first = table->is_gpt ? 0: db.ph.start;
1362	basetable->gpt_last = basetable->gpt_first + db.ph.size - 1;
1363	table->db_offset = db.ph.db_offset;
1364	/* Make additional checks for GPT */
1365	if (table->is_gpt) {
1366		error = ldm_gpt_check(&db, cp);
1367		if (error != 0)
1368			goto gpt_cleanup;
1369		/*
1370		 * Now we should reset database offset to zero, because our
1371		 * consumer cp is attached to the ms-ldm-metadata partition
1372		 * and we don't need add db_offset to read from it.
1373		 */
1374		db.ph.db_offset = 0;
1375	}
1376	/* Read and parse LDM TOC headers. */
1377	error = ldm_tochdr_check(&db, cp);
1378	if (error != 0)
1379		goto gpt_cleanup;
1380	/* Read and parse LDM VMDB header. */
1381	error = ldm_vmdbhdr_check(&db, cp);
1382	if (error != 0)
1383		goto gpt_cleanup;
1384	error = ldm_vmdb_parse(&db, cp);
1385	/*
1386	 * For the GPT case we must detach and destroy
1387	 * second consumer before return.
1388	 */
1389gpt_cleanup:
1390	if (table->is_gpt) {
1391		g_topology_lock();
1392		g_access(cp, -1, 0, 0);
1393		g_detach(cp);
1394		g_destroy_consumer(cp);
1395		g_topology_unlock();
1396		cp = cp2;
1397	}
1398	if (error != 0)
1399		return (error);
1400	/* Search current disk in the disk list. */
1401	LIST_FOREACH(disk, &db.disks, entry)
1402	    if (memcmp(&disk->guid, &db.ph.disk_guid,
1403		sizeof(struct uuid)) == 0)
1404		    break;
1405	if (disk == NULL) {
1406		LDM_DEBUG(1, "%s: no LDM volumes on this disk",
1407		    cp->provider->name);
1408		ldm_vmdb_free(&db);
1409		return (ENXIO);
1410	}
1411	index = 1;
1412	LIST_FOREACH(vol, &db.volumes, entry) {
1413		LIST_FOREACH(comp, &vol->components, entry) {
1414			/* Skip volumes from different disks. */
1415			part = LIST_FIRST(&comp->partitions);
1416			if (part->disk_id != disk->id)
1417				continue;
1418			skipped = 0;
1419			/* We don't support spanned and striped volumes. */
1420			if (comp->count > 1 || part->offset != 0) {
1421				LDM_DEBUG(1, "%s: LDM volume component "
1422				    "%ju has %u partitions. Skipped",
1423				    cp->provider->name, (uintmax_t)comp->id,
1424				    comp->count);
1425				skipped = 1;
1426			}
1427			/*
1428			 * Allow mirrored volumes only when they are explicitly
1429			 * allowed with kern.geom.part.ldm.show_mirrors=1.
1430			 */
1431			if (vol->count > 1 && show_mirrors == 0) {
1432				LDM_DEBUG(1, "%s: LDM volume %ju has %u "
1433				    "components. Skipped",
1434				    cp->provider->name, (uintmax_t)vol->id,
1435				    vol->count);
1436				skipped = 1;
1437			}
1438			entry = (struct g_part_ldm_entry *)g_part_new_entry(
1439			    basetable, index++,
1440			    basetable->gpt_first + part->start,
1441			    basetable->gpt_first + part->start +
1442			    part->size - 1);
1443			/*
1444			 * Mark skipped partition as ms-ldm-data partition.
1445			 * We do not support them, but it is better to show
1446			 * that we have something there, than just show
1447			 * free space.
1448			 */
1449			if (skipped == 0)
1450				entry->type = vol->part_type;
1451			else
1452				entry->type = DOSPTYP_LDM;
1453			LDM_DEBUG(1, "%s: new volume id: %ju, start: %ju,"
1454			    " end: %ju, type: 0x%02x\n", cp->provider->name,
1455			    (uintmax_t)part->id,(uintmax_t)part->start +
1456			    basetable->gpt_first, (uintmax_t)part->start +
1457			    part->size + basetable->gpt_first - 1,
1458			    vol->part_type);
1459		}
1460	}
1461	ldm_vmdb_free(&db);
1462	return (error);
1463}
1464
1465static const char *
1466g_part_ldm_type(struct g_part_table *basetable, struct g_part_entry *baseentry,
1467    char *buf, size_t bufsz)
1468{
1469	struct g_part_ldm_entry *entry;
1470	int i;
1471
1472	entry = (struct g_part_ldm_entry *)baseentry;
1473	for (i = 0;
1474	    i < sizeof(ldm_alias_match) / sizeof(ldm_alias_match[0]); i++) {
1475		if (ldm_alias_match[i].typ == entry->type)
1476			return (g_part_alias_name(ldm_alias_match[i].alias));
1477	}
1478	snprintf(buf, bufsz, "!%d", entry->type);
1479	return (buf);
1480}
1481
1482static int
1483g_part_ldm_write(struct g_part_table *basetable, struct g_consumer *cp)
1484{
1485
1486	return (ENOSYS);
1487}
1488