1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23290765Smav * Copyright (c) 2013, 2015 by Delphix. All rights reserved. 24168404Spjd */ 25168404Spjd 26168404Spjd/* 27168404Spjd * The 512-byte leaf is broken into 32 16-byte chunks. 28168404Spjd * chunk number n means l_chunk[n], even though the header precedes it. 29168404Spjd * the names are stored null-terminated. 30168404Spjd */ 31168404Spjd 32219089Spjd#include <sys/zio.h> 33219089Spjd#include <sys/spa.h> 34219089Spjd#include <sys/dmu.h> 35168404Spjd#include <sys/zfs_context.h> 36219089Spjd#include <sys/fs/zfs.h> 37168404Spjd#include <sys/zap.h> 38168404Spjd#include <sys/zap_impl.h> 39168404Spjd#include <sys/zap_leaf.h> 40219089Spjd#include <sys/arc.h> 41168404Spjd 42185029Spjdstatic uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry); 43185029Spjd 44168404Spjd#define CHAIN_END 0xffff /* end of the chunk chain */ 45168404Spjd 46168404Spjd/* half the (current) minimum block size */ 47168404Spjd#define MAX_ARRAY_BYTES (8<<10) 48168404Spjd 49168404Spjd#define LEAF_HASH(l, h) \ 50168404Spjd ((ZAP_LEAF_HASH_NUMENTRIES(l)-1) & \ 51277585Sdelphij ((h) >> \ 52277585Sdelphij (64 - ZAP_LEAF_HASH_SHIFT(l) - zap_leaf_phys(l)->l_hdr.lh_prefix_len))) 53168404Spjd 54277585Sdelphij#define LEAF_HASH_ENTPTR(l, h) (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)]) 55168404Spjd 56277585Sdelphijextern inline zap_leaf_phys_t *zap_leaf_phys(zap_leaf_t *l); 57168404Spjd 58168404Spjdstatic void 59168404Spjdzap_memset(void *a, int c, size_t n) 60168404Spjd{ 61168404Spjd char *cp = a; 62168404Spjd char *cpend = cp + n; 63168404Spjd 64168404Spjd while (cp < cpend) 65168404Spjd *cp++ = c; 66168404Spjd} 67168404Spjd 68168404Spjdstatic void 69168404Spjdstv(int len, void *addr, uint64_t value) 70168404Spjd{ 71168404Spjd switch (len) { 72168404Spjd case 1: 73168404Spjd *(uint8_t *)addr = value; 74168404Spjd return; 75168404Spjd case 2: 76168404Spjd *(uint16_t *)addr = value; 77168404Spjd return; 78168404Spjd case 4: 79168404Spjd *(uint32_t *)addr = value; 80168404Spjd return; 81168404Spjd case 8: 82168404Spjd *(uint64_t *)addr = value; 83168404Spjd return; 84168404Spjd } 85168404Spjd ASSERT(!"bad int len"); 86168404Spjd} 87168404Spjd 88168404Spjdstatic uint64_t 89168404Spjdldv(int len, const void *addr) 90168404Spjd{ 91168404Spjd switch (len) { 92168404Spjd case 1: 93168404Spjd return (*(uint8_t *)addr); 94168404Spjd case 2: 95168404Spjd return (*(uint16_t *)addr); 96168404Spjd case 4: 97168404Spjd return (*(uint32_t *)addr); 98168404Spjd case 8: 99168404Spjd return (*(uint64_t *)addr); 100168404Spjd } 101168404Spjd ASSERT(!"bad int len"); 102168404Spjd return (0xFEEDFACEDEADBEEFULL); 103168404Spjd} 104168404Spjd 105168404Spjdvoid 106168404Spjdzap_leaf_byteswap(zap_leaf_phys_t *buf, int size) 107168404Spjd{ 108168404Spjd int i; 109168404Spjd zap_leaf_t l; 110277585Sdelphij dmu_buf_t l_dbuf; 111277585Sdelphij 112277585Sdelphij l_dbuf.db_data = buf; 113265740Sdelphij l.l_bs = highbit64(size) - 1; 114277585Sdelphij l.l_dbuf = &l_dbuf; 115168404Spjd 116265740Sdelphij buf->l_hdr.lh_block_type = BSWAP_64(buf->l_hdr.lh_block_type); 117265740Sdelphij buf->l_hdr.lh_prefix = BSWAP_64(buf->l_hdr.lh_prefix); 118265740Sdelphij buf->l_hdr.lh_magic = BSWAP_32(buf->l_hdr.lh_magic); 119265740Sdelphij buf->l_hdr.lh_nfree = BSWAP_16(buf->l_hdr.lh_nfree); 120265740Sdelphij buf->l_hdr.lh_nentries = BSWAP_16(buf->l_hdr.lh_nentries); 121265740Sdelphij buf->l_hdr.lh_prefix_len = BSWAP_16(buf->l_hdr.lh_prefix_len); 122265740Sdelphij buf->l_hdr.lh_freelist = BSWAP_16(buf->l_hdr.lh_freelist); 123168404Spjd 124168404Spjd for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++) 125168404Spjd buf->l_hash[i] = BSWAP_16(buf->l_hash[i]); 126168404Spjd 127168404Spjd for (i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) { 128168404Spjd zap_leaf_chunk_t *lc = &ZAP_LEAF_CHUNK(&l, i); 129168404Spjd struct zap_leaf_entry *le; 130168404Spjd 131168404Spjd switch (lc->l_free.lf_type) { 132168404Spjd case ZAP_CHUNK_ENTRY: 133168404Spjd le = &lc->l_entry; 134168404Spjd 135168404Spjd le->le_type = BSWAP_8(le->le_type); 136219089Spjd le->le_value_intlen = BSWAP_8(le->le_value_intlen); 137168404Spjd le->le_next = BSWAP_16(le->le_next); 138168404Spjd le->le_name_chunk = BSWAP_16(le->le_name_chunk); 139219089Spjd le->le_name_numints = BSWAP_16(le->le_name_numints); 140168404Spjd le->le_value_chunk = BSWAP_16(le->le_value_chunk); 141219089Spjd le->le_value_numints = BSWAP_16(le->le_value_numints); 142168404Spjd le->le_cd = BSWAP_32(le->le_cd); 143168404Spjd le->le_hash = BSWAP_64(le->le_hash); 144168404Spjd break; 145168404Spjd case ZAP_CHUNK_FREE: 146168404Spjd lc->l_free.lf_type = BSWAP_8(lc->l_free.lf_type); 147168404Spjd lc->l_free.lf_next = BSWAP_16(lc->l_free.lf_next); 148168404Spjd break; 149168404Spjd case ZAP_CHUNK_ARRAY: 150168404Spjd lc->l_array.la_type = BSWAP_8(lc->l_array.la_type); 151168404Spjd lc->l_array.la_next = BSWAP_16(lc->l_array.la_next); 152168404Spjd /* la_array doesn't need swapping */ 153168404Spjd break; 154168404Spjd default: 155168404Spjd ASSERT(!"bad leaf type"); 156168404Spjd } 157168404Spjd } 158168404Spjd} 159168404Spjd 160168404Spjdvoid 161185029Spjdzap_leaf_init(zap_leaf_t *l, boolean_t sort) 162168404Spjd{ 163168404Spjd int i; 164168404Spjd 165265740Sdelphij l->l_bs = highbit64(l->l_dbuf->db_size) - 1; 166277585Sdelphij zap_memset(&zap_leaf_phys(l)->l_hdr, 0, 167277585Sdelphij sizeof (struct zap_leaf_header)); 168277585Sdelphij zap_memset(zap_leaf_phys(l)->l_hash, CHAIN_END, 169277585Sdelphij 2*ZAP_LEAF_HASH_NUMENTRIES(l)); 170168404Spjd for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) { 171168404Spjd ZAP_LEAF_CHUNK(l, i).l_free.lf_type = ZAP_CHUNK_FREE; 172168404Spjd ZAP_LEAF_CHUNK(l, i).l_free.lf_next = i+1; 173168404Spjd } 174168404Spjd ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)-1).l_free.lf_next = CHAIN_END; 175277585Sdelphij zap_leaf_phys(l)->l_hdr.lh_block_type = ZBT_LEAF; 176277585Sdelphij zap_leaf_phys(l)->l_hdr.lh_magic = ZAP_LEAF_MAGIC; 177277585Sdelphij zap_leaf_phys(l)->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l); 178185029Spjd if (sort) 179277585Sdelphij zap_leaf_phys(l)->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED; 180168404Spjd} 181168404Spjd 182168404Spjd/* 183168404Spjd * Routines which manipulate leaf chunks (l_chunk[]). 184168404Spjd */ 185168404Spjd 186168404Spjdstatic uint16_t 187168404Spjdzap_leaf_chunk_alloc(zap_leaf_t *l) 188168404Spjd{ 189168404Spjd int chunk; 190168404Spjd 191277585Sdelphij ASSERT(zap_leaf_phys(l)->l_hdr.lh_nfree > 0); 192168404Spjd 193277585Sdelphij chunk = zap_leaf_phys(l)->l_hdr.lh_freelist; 194168404Spjd ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 195168404Spjd ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_free.lf_type, ==, ZAP_CHUNK_FREE); 196168404Spjd 197277585Sdelphij zap_leaf_phys(l)->l_hdr.lh_freelist = 198277585Sdelphij ZAP_LEAF_CHUNK(l, chunk).l_free.lf_next; 199168404Spjd 200277585Sdelphij zap_leaf_phys(l)->l_hdr.lh_nfree--; 201168404Spjd 202168404Spjd return (chunk); 203168404Spjd} 204168404Spjd 205168404Spjdstatic void 206168404Spjdzap_leaf_chunk_free(zap_leaf_t *l, uint16_t chunk) 207168404Spjd{ 208168404Spjd struct zap_leaf_free *zlf = &ZAP_LEAF_CHUNK(l, chunk).l_free; 209277585Sdelphij ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_nfree, <, ZAP_LEAF_NUMCHUNKS(l)); 210168404Spjd ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 211168404Spjd ASSERT(zlf->lf_type != ZAP_CHUNK_FREE); 212168404Spjd 213168404Spjd zlf->lf_type = ZAP_CHUNK_FREE; 214277585Sdelphij zlf->lf_next = zap_leaf_phys(l)->l_hdr.lh_freelist; 215168404Spjd bzero(zlf->lf_pad, sizeof (zlf->lf_pad)); /* help it to compress */ 216277585Sdelphij zap_leaf_phys(l)->l_hdr.lh_freelist = chunk; 217168404Spjd 218277585Sdelphij zap_leaf_phys(l)->l_hdr.lh_nfree++; 219168404Spjd} 220168404Spjd 221168404Spjd/* 222168404Spjd * Routines which manipulate leaf arrays (zap_leaf_array type chunks). 223168404Spjd */ 224168404Spjd 225168404Spjdstatic uint16_t 226168404Spjdzap_leaf_array_create(zap_leaf_t *l, const char *buf, 227219089Spjd int integer_size, int num_integers) 228168404Spjd{ 229168404Spjd uint16_t chunk_head; 230168404Spjd uint16_t *chunkp = &chunk_head; 231168404Spjd int byten = 0; 232247187Smm uint64_t value = 0; 233168404Spjd int shift = (integer_size-1)*8; 234168404Spjd int len = num_integers; 235168404Spjd 236168404Spjd ASSERT3U(num_integers * integer_size, <, MAX_ARRAY_BYTES); 237168404Spjd 238168404Spjd while (len > 0) { 239168404Spjd uint16_t chunk = zap_leaf_chunk_alloc(l); 240168404Spjd struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; 241168404Spjd int i; 242168404Spjd 243168404Spjd la->la_type = ZAP_CHUNK_ARRAY; 244168404Spjd for (i = 0; i < ZAP_LEAF_ARRAY_BYTES; i++) { 245168404Spjd if (byten == 0) 246168404Spjd value = ldv(integer_size, buf); 247168404Spjd la->la_array[i] = value >> shift; 248168404Spjd value <<= 8; 249168404Spjd if (++byten == integer_size) { 250168404Spjd byten = 0; 251168404Spjd buf += integer_size; 252168404Spjd if (--len == 0) 253168404Spjd break; 254168404Spjd } 255168404Spjd } 256168404Spjd 257168404Spjd *chunkp = chunk; 258168404Spjd chunkp = &la->la_next; 259168404Spjd } 260168404Spjd *chunkp = CHAIN_END; 261168404Spjd 262168404Spjd return (chunk_head); 263168404Spjd} 264168404Spjd 265168404Spjdstatic void 266168404Spjdzap_leaf_array_free(zap_leaf_t *l, uint16_t *chunkp) 267168404Spjd{ 268168404Spjd uint16_t chunk = *chunkp; 269168404Spjd 270168404Spjd *chunkp = CHAIN_END; 271168404Spjd 272168404Spjd while (chunk != CHAIN_END) { 273168404Spjd int nextchunk = ZAP_LEAF_CHUNK(l, chunk).l_array.la_next; 274168404Spjd ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_array.la_type, ==, 275168404Spjd ZAP_CHUNK_ARRAY); 276168404Spjd zap_leaf_chunk_free(l, chunk); 277168404Spjd chunk = nextchunk; 278168404Spjd } 279168404Spjd} 280168404Spjd 281168404Spjd/* array_len and buf_len are in integers, not bytes */ 282168404Spjdstatic void 283168404Spjdzap_leaf_array_read(zap_leaf_t *l, uint16_t chunk, 284168404Spjd int array_int_len, int array_len, int buf_int_len, uint64_t buf_len, 285219089Spjd void *buf) 286168404Spjd{ 287168404Spjd int len = MIN(array_len, buf_len); 288168404Spjd int byten = 0; 289168404Spjd uint64_t value = 0; 290219089Spjd char *p = buf; 291168404Spjd 292168404Spjd ASSERT3U(array_int_len, <=, buf_int_len); 293168404Spjd 294168404Spjd /* Fast path for one 8-byte integer */ 295168404Spjd if (array_int_len == 8 && buf_int_len == 8 && len == 1) { 296168404Spjd struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; 297168404Spjd uint8_t *ip = la->la_array; 298219089Spjd uint64_t *buf64 = buf; 299168404Spjd 300168404Spjd *buf64 = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 | 301168404Spjd (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 | 302168404Spjd (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 | 303168404Spjd (uint64_t)ip[6] << 8 | (uint64_t)ip[7]; 304168404Spjd return; 305168404Spjd } 306168404Spjd 307168404Spjd /* Fast path for an array of 1-byte integers (eg. the entry name) */ 308168404Spjd if (array_int_len == 1 && buf_int_len == 1 && 309168404Spjd buf_len > array_len + ZAP_LEAF_ARRAY_BYTES) { 310168404Spjd while (chunk != CHAIN_END) { 311168404Spjd struct zap_leaf_array *la = 312168404Spjd &ZAP_LEAF_CHUNK(l, chunk).l_array; 313219089Spjd bcopy(la->la_array, p, ZAP_LEAF_ARRAY_BYTES); 314219089Spjd p += ZAP_LEAF_ARRAY_BYTES; 315168404Spjd chunk = la->la_next; 316168404Spjd } 317168404Spjd return; 318168404Spjd } 319168404Spjd 320168404Spjd while (len > 0) { 321168404Spjd struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; 322168404Spjd int i; 323168404Spjd 324168404Spjd ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 325168404Spjd for (i = 0; i < ZAP_LEAF_ARRAY_BYTES && len > 0; i++) { 326168404Spjd value = (value << 8) | la->la_array[i]; 327168404Spjd byten++; 328168404Spjd if (byten == array_int_len) { 329219089Spjd stv(buf_int_len, p, value); 330168404Spjd byten = 0; 331168404Spjd len--; 332168404Spjd if (len == 0) 333168404Spjd return; 334219089Spjd p += buf_int_len; 335168404Spjd } 336168404Spjd } 337168404Spjd chunk = la->la_next; 338168404Spjd } 339168404Spjd} 340168404Spjd 341185029Spjdstatic boolean_t 342219089Spjdzap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn, 343219089Spjd int chunk, int array_numints) 344168404Spjd{ 345168404Spjd int bseen = 0; 346168404Spjd 347219089Spjd if (zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY) { 348219089Spjd uint64_t *thiskey; 349219089Spjd boolean_t match; 350219089Spjd 351219089Spjd ASSERT(zn->zn_key_intlen == sizeof (*thiskey)); 352219089Spjd thiskey = kmem_alloc(array_numints * sizeof (*thiskey), 353219089Spjd KM_SLEEP); 354219089Spjd 355219089Spjd zap_leaf_array_read(l, chunk, sizeof (*thiskey), array_numints, 356219089Spjd sizeof (*thiskey), array_numints, thiskey); 357219089Spjd match = bcmp(thiskey, zn->zn_key_orig, 358219089Spjd array_numints * sizeof (*thiskey)) == 0; 359219089Spjd kmem_free(thiskey, array_numints * sizeof (*thiskey)); 360219089Spjd return (match); 361219089Spjd } 362219089Spjd 363219089Spjd ASSERT(zn->zn_key_intlen == 1); 364185029Spjd if (zn->zn_matchtype == MT_FIRST) { 365219089Spjd char *thisname = kmem_alloc(array_numints, KM_SLEEP); 366185029Spjd boolean_t match; 367185029Spjd 368219089Spjd zap_leaf_array_read(l, chunk, sizeof (char), array_numints, 369219089Spjd sizeof (char), array_numints, thisname); 370185029Spjd match = zap_match(zn, thisname); 371219089Spjd kmem_free(thisname, array_numints); 372185029Spjd return (match); 373185029Spjd } 374185029Spjd 375219089Spjd /* 376219089Spjd * Fast path for exact matching. 377219089Spjd * First check that the lengths match, so that we don't read 378219089Spjd * past the end of the zn_key_orig array. 379219089Spjd */ 380219089Spjd if (array_numints != zn->zn_key_orig_numints) 381219089Spjd return (B_FALSE); 382219089Spjd while (bseen < array_numints) { 383168404Spjd struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; 384219089Spjd int toread = MIN(array_numints - bseen, ZAP_LEAF_ARRAY_BYTES); 385168404Spjd ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 386219089Spjd if (bcmp(la->la_array, (char *)zn->zn_key_orig + bseen, toread)) 387168404Spjd break; 388168404Spjd chunk = la->la_next; 389168404Spjd bseen += toread; 390168404Spjd } 391219089Spjd return (bseen == array_numints); 392168404Spjd} 393168404Spjd 394168404Spjd/* 395168404Spjd * Routines which manipulate leaf entries. 396168404Spjd */ 397168404Spjd 398168404Spjdint 399185029Spjdzap_leaf_lookup(zap_leaf_t *l, zap_name_t *zn, zap_entry_handle_t *zeh) 400168404Spjd{ 401168404Spjd uint16_t *chunkp; 402168404Spjd struct zap_leaf_entry *le; 403168404Spjd 404277585Sdelphij ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC); 405168404Spjd 406185029Spjdagain: 407185029Spjd for (chunkp = LEAF_HASH_ENTPTR(l, zn->zn_hash); 408168404Spjd *chunkp != CHAIN_END; chunkp = &le->le_next) { 409168404Spjd uint16_t chunk = *chunkp; 410168404Spjd le = ZAP_LEAF_ENTRY(l, chunk); 411168404Spjd 412168404Spjd ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 413168404Spjd ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 414168404Spjd 415185029Spjd if (le->le_hash != zn->zn_hash) 416168404Spjd continue; 417168404Spjd 418185029Spjd /* 419185029Spjd * NB: the entry chain is always sorted by cd on 420185029Spjd * normalized zap objects, so this will find the 421185029Spjd * lowest-cd match for MT_FIRST. 422185029Spjd */ 423185029Spjd ASSERT(zn->zn_matchtype == MT_EXACT || 424277585Sdelphij (zap_leaf_phys(l)->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED)); 425185029Spjd if (zap_leaf_array_match(l, zn, le->le_name_chunk, 426219089Spjd le->le_name_numints)) { 427219089Spjd zeh->zeh_num_integers = le->le_value_numints; 428219089Spjd zeh->zeh_integer_size = le->le_value_intlen; 429168404Spjd zeh->zeh_cd = le->le_cd; 430168404Spjd zeh->zeh_hash = le->le_hash; 431168404Spjd zeh->zeh_chunkp = chunkp; 432168404Spjd zeh->zeh_leaf = l; 433168404Spjd return (0); 434168404Spjd } 435168404Spjd } 436168404Spjd 437185029Spjd /* 438185029Spjd * NB: we could of course do this in one pass, but that would be 439185029Spjd * a pain. We'll see if MT_BEST is even used much. 440185029Spjd */ 441185029Spjd if (zn->zn_matchtype == MT_BEST) { 442185029Spjd zn->zn_matchtype = MT_FIRST; 443185029Spjd goto again; 444185029Spjd } 445185029Spjd 446249195Smm return (SET_ERROR(ENOENT)); 447168404Spjd} 448168404Spjd 449168404Spjd/* Return (h1,cd1 >= h2,cd2) */ 450168404Spjd#define HCD_GTEQ(h1, cd1, h2, cd2) \ 451168404Spjd ((h1 > h2) ? TRUE : ((h1 == h2 && cd1 >= cd2) ? TRUE : FALSE)) 452168404Spjd 453168404Spjdint 454168404Spjdzap_leaf_lookup_closest(zap_leaf_t *l, 455168404Spjd uint64_t h, uint32_t cd, zap_entry_handle_t *zeh) 456168404Spjd{ 457168404Spjd uint16_t chunk; 458168404Spjd uint64_t besth = -1ULL; 459219089Spjd uint32_t bestcd = -1U; 460168404Spjd uint16_t bestlh = ZAP_LEAF_HASH_NUMENTRIES(l)-1; 461168404Spjd uint16_t lh; 462168404Spjd struct zap_leaf_entry *le; 463168404Spjd 464277585Sdelphij ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC); 465168404Spjd 466168404Spjd for (lh = LEAF_HASH(l, h); lh <= bestlh; lh++) { 467277585Sdelphij for (chunk = zap_leaf_phys(l)->l_hash[lh]; 468168404Spjd chunk != CHAIN_END; chunk = le->le_next) { 469168404Spjd le = ZAP_LEAF_ENTRY(l, chunk); 470168404Spjd 471168404Spjd ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 472168404Spjd ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 473168404Spjd 474168404Spjd if (HCD_GTEQ(le->le_hash, le->le_cd, h, cd) && 475168404Spjd HCD_GTEQ(besth, bestcd, le->le_hash, le->le_cd)) { 476168404Spjd ASSERT3U(bestlh, >=, lh); 477168404Spjd bestlh = lh; 478168404Spjd besth = le->le_hash; 479168404Spjd bestcd = le->le_cd; 480168404Spjd 481219089Spjd zeh->zeh_num_integers = le->le_value_numints; 482219089Spjd zeh->zeh_integer_size = le->le_value_intlen; 483168404Spjd zeh->zeh_cd = le->le_cd; 484168404Spjd zeh->zeh_hash = le->le_hash; 485168404Spjd zeh->zeh_fakechunk = chunk; 486168404Spjd zeh->zeh_chunkp = &zeh->zeh_fakechunk; 487168404Spjd zeh->zeh_leaf = l; 488168404Spjd } 489168404Spjd } 490168404Spjd } 491168404Spjd 492219089Spjd return (bestcd == -1U ? ENOENT : 0); 493168404Spjd} 494168404Spjd 495168404Spjdint 496168404Spjdzap_entry_read(const zap_entry_handle_t *zeh, 497168404Spjd uint8_t integer_size, uint64_t num_integers, void *buf) 498168404Spjd{ 499168404Spjd struct zap_leaf_entry *le = 500168404Spjd ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp); 501168404Spjd ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 502168404Spjd 503219089Spjd if (le->le_value_intlen > integer_size) 504249195Smm return (SET_ERROR(EINVAL)); 505168404Spjd 506219089Spjd zap_leaf_array_read(zeh->zeh_leaf, le->le_value_chunk, 507219089Spjd le->le_value_intlen, le->le_value_numints, 508219089Spjd integer_size, num_integers, buf); 509168404Spjd 510168404Spjd if (zeh->zeh_num_integers > num_integers) 511249195Smm return (SET_ERROR(EOVERFLOW)); 512168404Spjd return (0); 513168404Spjd 514168404Spjd} 515168404Spjd 516168404Spjdint 517219089Spjdzap_entry_read_name(zap_t *zap, const zap_entry_handle_t *zeh, uint16_t buflen, 518219089Spjd char *buf) 519168404Spjd{ 520168404Spjd struct zap_leaf_entry *le = 521168404Spjd ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp); 522168404Spjd ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 523168404Spjd 524219089Spjd if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) { 525219089Spjd zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 8, 526219089Spjd le->le_name_numints, 8, buflen / 8, buf); 527219089Spjd } else { 528219089Spjd zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 1, 529219089Spjd le->le_name_numints, 1, buflen, buf); 530219089Spjd } 531219089Spjd if (le->le_name_numints > buflen) 532249195Smm return (SET_ERROR(EOVERFLOW)); 533168404Spjd return (0); 534168404Spjd} 535168404Spjd 536168404Spjdint 537168404Spjdzap_entry_update(zap_entry_handle_t *zeh, 538290765Smav uint8_t integer_size, uint64_t num_integers, const void *buf) 539168404Spjd{ 540168404Spjd int delta_chunks; 541168404Spjd zap_leaf_t *l = zeh->zeh_leaf; 542168404Spjd struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, *zeh->zeh_chunkp); 543168404Spjd 544168404Spjd delta_chunks = ZAP_LEAF_ARRAY_NCHUNKS(num_integers * integer_size) - 545219089Spjd ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints * le->le_value_intlen); 546168404Spjd 547277585Sdelphij if ((int)zap_leaf_phys(l)->l_hdr.lh_nfree < delta_chunks) 548249195Smm return (SET_ERROR(EAGAIN)); 549168404Spjd 550168404Spjd zap_leaf_array_free(l, &le->le_value_chunk); 551168404Spjd le->le_value_chunk = 552168404Spjd zap_leaf_array_create(l, buf, integer_size, num_integers); 553219089Spjd le->le_value_numints = num_integers; 554219089Spjd le->le_value_intlen = integer_size; 555168404Spjd return (0); 556168404Spjd} 557168404Spjd 558168404Spjdvoid 559168404Spjdzap_entry_remove(zap_entry_handle_t *zeh) 560168404Spjd{ 561168404Spjd uint16_t entry_chunk; 562168404Spjd struct zap_leaf_entry *le; 563168404Spjd zap_leaf_t *l = zeh->zeh_leaf; 564168404Spjd 565168404Spjd ASSERT3P(zeh->zeh_chunkp, !=, &zeh->zeh_fakechunk); 566168404Spjd 567168404Spjd entry_chunk = *zeh->zeh_chunkp; 568168404Spjd le = ZAP_LEAF_ENTRY(l, entry_chunk); 569168404Spjd ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 570168404Spjd 571168404Spjd zap_leaf_array_free(l, &le->le_name_chunk); 572168404Spjd zap_leaf_array_free(l, &le->le_value_chunk); 573168404Spjd 574168404Spjd *zeh->zeh_chunkp = le->le_next; 575168404Spjd zap_leaf_chunk_free(l, entry_chunk); 576168404Spjd 577277585Sdelphij zap_leaf_phys(l)->l_hdr.lh_nentries--; 578168404Spjd} 579168404Spjd 580168404Spjdint 581219089Spjdzap_entry_create(zap_leaf_t *l, zap_name_t *zn, uint32_t cd, 582168404Spjd uint8_t integer_size, uint64_t num_integers, const void *buf, 583168404Spjd zap_entry_handle_t *zeh) 584168404Spjd{ 585168404Spjd uint16_t chunk; 586168404Spjd uint16_t *chunkp; 587168404Spjd struct zap_leaf_entry *le; 588219089Spjd uint64_t valuelen; 589168404Spjd int numchunks; 590219089Spjd uint64_t h = zn->zn_hash; 591168404Spjd 592168404Spjd valuelen = integer_size * num_integers; 593168404Spjd 594219089Spjd numchunks = 1 + ZAP_LEAF_ARRAY_NCHUNKS(zn->zn_key_orig_numints * 595219089Spjd zn->zn_key_intlen) + ZAP_LEAF_ARRAY_NCHUNKS(valuelen); 596168404Spjd if (numchunks > ZAP_LEAF_NUMCHUNKS(l)) 597168404Spjd return (E2BIG); 598168404Spjd 599219089Spjd if (cd == ZAP_NEED_CD) { 600185029Spjd /* find the lowest unused cd */ 601277585Sdelphij if (zap_leaf_phys(l)->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED) { 602185029Spjd cd = 0; 603185029Spjd 604168404Spjd for (chunk = *LEAF_HASH_ENTPTR(l, h); 605168404Spjd chunk != CHAIN_END; chunk = le->le_next) { 606168404Spjd le = ZAP_LEAF_ENTRY(l, chunk); 607185029Spjd if (le->le_cd > cd) 608168404Spjd break; 609185029Spjd if (le->le_hash == h) { 610185029Spjd ASSERT3U(cd, ==, le->le_cd); 611185029Spjd cd++; 612168404Spjd } 613168404Spjd } 614185029Spjd } else { 615185029Spjd /* old unsorted format; do it the O(n^2) way */ 616219089Spjd for (cd = 0; ; cd++) { 617185029Spjd for (chunk = *LEAF_HASH_ENTPTR(l, h); 618185029Spjd chunk != CHAIN_END; chunk = le->le_next) { 619185029Spjd le = ZAP_LEAF_ENTRY(l, chunk); 620185029Spjd if (le->le_hash == h && 621185029Spjd le->le_cd == cd) { 622185029Spjd break; 623185029Spjd } 624185029Spjd } 625185029Spjd /* If this cd is not in use, we are good. */ 626185029Spjd if (chunk == CHAIN_END) 627185029Spjd break; 628185029Spjd } 629168404Spjd } 630185029Spjd /* 631219089Spjd * We would run out of space in a block before we could 632219089Spjd * store enough entries to run out of CD values. 633185029Spjd */ 634219089Spjd ASSERT3U(cd, <, zap_maxcd(zn->zn_zap)); 635168404Spjd } 636168404Spjd 637277585Sdelphij if (zap_leaf_phys(l)->l_hdr.lh_nfree < numchunks) 638249195Smm return (SET_ERROR(EAGAIN)); 639168404Spjd 640168404Spjd /* make the entry */ 641168404Spjd chunk = zap_leaf_chunk_alloc(l); 642168404Spjd le = ZAP_LEAF_ENTRY(l, chunk); 643168404Spjd le->le_type = ZAP_CHUNK_ENTRY; 644219089Spjd le->le_name_chunk = zap_leaf_array_create(l, zn->zn_key_orig, 645219089Spjd zn->zn_key_intlen, zn->zn_key_orig_numints); 646219089Spjd le->le_name_numints = zn->zn_key_orig_numints; 647168404Spjd le->le_value_chunk = 648168404Spjd zap_leaf_array_create(l, buf, integer_size, num_integers); 649219089Spjd le->le_value_numints = num_integers; 650219089Spjd le->le_value_intlen = integer_size; 651168404Spjd le->le_hash = h; 652168404Spjd le->le_cd = cd; 653168404Spjd 654168404Spjd /* link it into the hash chain */ 655185029Spjd /* XXX if we did the search above, we could just use that */ 656185029Spjd chunkp = zap_leaf_rehash_entry(l, chunk); 657168404Spjd 658277585Sdelphij zap_leaf_phys(l)->l_hdr.lh_nentries++; 659168404Spjd 660168404Spjd zeh->zeh_leaf = l; 661168404Spjd zeh->zeh_num_integers = num_integers; 662219089Spjd zeh->zeh_integer_size = le->le_value_intlen; 663168404Spjd zeh->zeh_cd = le->le_cd; 664168404Spjd zeh->zeh_hash = le->le_hash; 665168404Spjd zeh->zeh_chunkp = chunkp; 666168404Spjd 667168404Spjd return (0); 668168404Spjd} 669168404Spjd 670168404Spjd/* 671185029Spjd * Determine if there is another entry with the same normalized form. 672185029Spjd * For performance purposes, either zn or name must be provided (the 673185029Spjd * other can be NULL). Note, there usually won't be any hash 674185029Spjd * conflicts, in which case we don't need the concatenated/normalized 675185029Spjd * form of the name. But all callers have one of these on hand anyway, 676185029Spjd * so might as well take advantage. A cleaner but slower interface 677185029Spjd * would accept neither argument, and compute the normalized name as 678185029Spjd * needed (using zap_name_alloc(zap_entry_read_name(zeh))). 679185029Spjd */ 680185029Spjdboolean_t 681185029Spjdzap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn, 682185029Spjd const char *name, zap_t *zap) 683185029Spjd{ 684185029Spjd uint64_t chunk; 685185029Spjd struct zap_leaf_entry *le; 686185029Spjd boolean_t allocdzn = B_FALSE; 687185029Spjd 688185029Spjd if (zap->zap_normflags == 0) 689185029Spjd return (B_FALSE); 690185029Spjd 691185029Spjd for (chunk = *LEAF_HASH_ENTPTR(zeh->zeh_leaf, zeh->zeh_hash); 692185029Spjd chunk != CHAIN_END; chunk = le->le_next) { 693185029Spjd le = ZAP_LEAF_ENTRY(zeh->zeh_leaf, chunk); 694185029Spjd if (le->le_hash != zeh->zeh_hash) 695185029Spjd continue; 696185029Spjd if (le->le_cd == zeh->zeh_cd) 697185029Spjd continue; 698185029Spjd 699185029Spjd if (zn == NULL) { 700185029Spjd zn = zap_name_alloc(zap, name, MT_FIRST); 701185029Spjd allocdzn = B_TRUE; 702185029Spjd } 703185029Spjd if (zap_leaf_array_match(zeh->zeh_leaf, zn, 704219089Spjd le->le_name_chunk, le->le_name_numints)) { 705185029Spjd if (allocdzn) 706185029Spjd zap_name_free(zn); 707185029Spjd return (B_TRUE); 708185029Spjd } 709185029Spjd } 710185029Spjd if (allocdzn) 711185029Spjd zap_name_free(zn); 712185029Spjd return (B_FALSE); 713185029Spjd} 714185029Spjd 715185029Spjd/* 716168404Spjd * Routines for transferring entries between leafs. 717168404Spjd */ 718168404Spjd 719185029Spjdstatic uint16_t * 720168404Spjdzap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry) 721168404Spjd{ 722168404Spjd struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry); 723185029Spjd struct zap_leaf_entry *le2; 724185029Spjd uint16_t *chunkp; 725185029Spjd 726185029Spjd /* 727185029Spjd * keep the entry chain sorted by cd 728185029Spjd * NB: this will not cause problems for unsorted leafs, though 729185029Spjd * it is unnecessary there. 730185029Spjd */ 731185029Spjd for (chunkp = LEAF_HASH_ENTPTR(l, le->le_hash); 732185029Spjd *chunkp != CHAIN_END; chunkp = &le2->le_next) { 733185029Spjd le2 = ZAP_LEAF_ENTRY(l, *chunkp); 734185029Spjd if (le2->le_cd > le->le_cd) 735185029Spjd break; 736185029Spjd } 737185029Spjd 738185029Spjd le->le_next = *chunkp; 739185029Spjd *chunkp = entry; 740185029Spjd return (chunkp); 741168404Spjd} 742168404Spjd 743168404Spjdstatic uint16_t 744168404Spjdzap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl) 745168404Spjd{ 746168404Spjd uint16_t new_chunk; 747168404Spjd uint16_t *nchunkp = &new_chunk; 748168404Spjd 749168404Spjd while (chunk != CHAIN_END) { 750168404Spjd uint16_t nchunk = zap_leaf_chunk_alloc(nl); 751168404Spjd struct zap_leaf_array *nla = 752168404Spjd &ZAP_LEAF_CHUNK(nl, nchunk).l_array; 753168404Spjd struct zap_leaf_array *la = 754168404Spjd &ZAP_LEAF_CHUNK(l, chunk).l_array; 755168404Spjd int nextchunk = la->la_next; 756168404Spjd 757168404Spjd ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 758168404Spjd ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS(l)); 759168404Spjd 760168404Spjd *nla = *la; /* structure assignment */ 761168404Spjd 762168404Spjd zap_leaf_chunk_free(l, chunk); 763168404Spjd chunk = nextchunk; 764168404Spjd *nchunkp = nchunk; 765168404Spjd nchunkp = &nla->la_next; 766168404Spjd } 767168404Spjd *nchunkp = CHAIN_END; 768168404Spjd return (new_chunk); 769168404Spjd} 770168404Spjd 771168404Spjdstatic void 772168404Spjdzap_leaf_transfer_entry(zap_leaf_t *l, int entry, zap_leaf_t *nl) 773168404Spjd{ 774168404Spjd struct zap_leaf_entry *le, *nle; 775168404Spjd uint16_t chunk; 776168404Spjd 777168404Spjd le = ZAP_LEAF_ENTRY(l, entry); 778168404Spjd ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 779168404Spjd 780168404Spjd chunk = zap_leaf_chunk_alloc(nl); 781168404Spjd nle = ZAP_LEAF_ENTRY(nl, chunk); 782168404Spjd *nle = *le; /* structure assignment */ 783168404Spjd 784185029Spjd (void) zap_leaf_rehash_entry(nl, chunk); 785168404Spjd 786168404Spjd nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl); 787168404Spjd nle->le_value_chunk = 788168404Spjd zap_leaf_transfer_array(l, le->le_value_chunk, nl); 789168404Spjd 790168404Spjd zap_leaf_chunk_free(l, entry); 791168404Spjd 792277585Sdelphij zap_leaf_phys(l)->l_hdr.lh_nentries--; 793277585Sdelphij zap_leaf_phys(nl)->l_hdr.lh_nentries++; 794168404Spjd} 795168404Spjd 796168404Spjd/* 797168404Spjd * Transfer the entries whose hash prefix ends in 1 to the new leaf. 798168404Spjd */ 799168404Spjdvoid 800185029Spjdzap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort) 801168404Spjd{ 802168404Spjd int i; 803277585Sdelphij int bit = 64 - 1 - zap_leaf_phys(l)->l_hdr.lh_prefix_len; 804168404Spjd 805168404Spjd /* set new prefix and prefix_len */ 806277585Sdelphij zap_leaf_phys(l)->l_hdr.lh_prefix <<= 1; 807277585Sdelphij zap_leaf_phys(l)->l_hdr.lh_prefix_len++; 808277585Sdelphij zap_leaf_phys(nl)->l_hdr.lh_prefix = 809277585Sdelphij zap_leaf_phys(l)->l_hdr.lh_prefix | 1; 810277585Sdelphij zap_leaf_phys(nl)->l_hdr.lh_prefix_len = 811277585Sdelphij zap_leaf_phys(l)->l_hdr.lh_prefix_len; 812168404Spjd 813168404Spjd /* break existing hash chains */ 814277585Sdelphij zap_memset(zap_leaf_phys(l)->l_hash, CHAIN_END, 815277585Sdelphij 2*ZAP_LEAF_HASH_NUMENTRIES(l)); 816168404Spjd 817185029Spjd if (sort) 818277585Sdelphij zap_leaf_phys(l)->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED; 819185029Spjd 820168404Spjd /* 821168404Spjd * Transfer entries whose hash bit 'bit' is set to nl; rehash 822168404Spjd * the remaining entries 823168404Spjd * 824168404Spjd * NB: We could find entries via the hashtable instead. That 825168404Spjd * would be O(hashents+numents) rather than O(numblks+numents), 826168404Spjd * but this accesses memory more sequentially, and when we're 827168404Spjd * called, the block is usually pretty full. 828168404Spjd */ 829168404Spjd for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) { 830168404Spjd struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, i); 831168404Spjd if (le->le_type != ZAP_CHUNK_ENTRY) 832168404Spjd continue; 833168404Spjd 834168404Spjd if (le->le_hash & (1ULL << bit)) 835168404Spjd zap_leaf_transfer_entry(l, i, nl); 836168404Spjd else 837185029Spjd (void) zap_leaf_rehash_entry(l, i); 838168404Spjd } 839168404Spjd} 840168404Spjd 841168404Spjdvoid 842168404Spjdzap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs) 843168404Spjd{ 844168404Spjd int i, n; 845168404Spjd 846277585Sdelphij n = zap_f_phys(zap)->zap_ptrtbl.zt_shift - 847277585Sdelphij zap_leaf_phys(l)->l_hdr.lh_prefix_len; 848168404Spjd n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 849168404Spjd zs->zs_leafs_with_2n_pointers[n]++; 850168404Spjd 851168404Spjd 852277585Sdelphij n = zap_leaf_phys(l)->l_hdr.lh_nentries/5; 853168404Spjd n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 854168404Spjd zs->zs_blocks_with_n5_entries[n]++; 855168404Spjd 856168404Spjd n = ((1<<FZAP_BLOCK_SHIFT(zap)) - 857277585Sdelphij zap_leaf_phys(l)->l_hdr.lh_nfree * (ZAP_LEAF_ARRAY_BYTES+1))*10 / 858168404Spjd (1<<FZAP_BLOCK_SHIFT(zap)); 859168404Spjd n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 860168404Spjd zs->zs_blocks_n_tenths_full[n]++; 861168404Spjd 862168404Spjd for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(l); i++) { 863168404Spjd int nentries = 0; 864277585Sdelphij int chunk = zap_leaf_phys(l)->l_hash[i]; 865168404Spjd 866168404Spjd while (chunk != CHAIN_END) { 867168404Spjd struct zap_leaf_entry *le = 868168404Spjd ZAP_LEAF_ENTRY(l, chunk); 869168404Spjd 870219089Spjd n = 1 + ZAP_LEAF_ARRAY_NCHUNKS(le->le_name_numints) + 871219089Spjd ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints * 872219089Spjd le->le_value_intlen); 873168404Spjd n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 874168404Spjd zs->zs_entries_using_n_chunks[n]++; 875168404Spjd 876168404Spjd chunk = le->le_next; 877168404Spjd nentries++; 878168404Spjd } 879168404Spjd 880168404Spjd n = nentries; 881168404Spjd n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 882168404Spjd zs->zs_buckets_with_n_entries[n]++; 883168404Spjd } 884168404Spjd} 885