1131846Stjr/*- 2131846Stjr * Copyright (c) 2004 Tim J. Robbins. 3131846Stjr * All rights reserved. 4131846Stjr * 5131846Stjr * Redistribution and use in source and binary forms, with or without 6131846Stjr * modification, are permitted provided that the following conditions 7131846Stjr * are met: 8131846Stjr * 1. Redistributions of source code must retain the above copyright 9131846Stjr * notice, this list of conditions and the following disclaimer. 10131846Stjr * 2. Redistributions in binary form must reproduce the above copyright 11131846Stjr * notice, this list of conditions and the following disclaimer in the 12131846Stjr * documentation and/or other materials provided with the distribution. 13131846Stjr * 14131846Stjr * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15131846Stjr * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16131846Stjr * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17131846Stjr * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18131846Stjr * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19131846Stjr * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20131846Stjr * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21131846Stjr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22131846Stjr * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23131846Stjr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24131846Stjr * SUCH DAMAGE. 25131846Stjr */ 26131846Stjr/* 27131846Stjr * "Set of characters" ADT implemented as a splay tree of extents, with 28131846Stjr * a lookup table cache to simplify looking up the first bunch of 29131846Stjr * characters (which are presumably more common than others). 30131846Stjr */ 31131846Stjr 32131846Stjr#include <sys/cdefs.h> 33131846Stjr__FBSDID("$FreeBSD$"); 34131846Stjr 35131846Stjr#include <assert.h> 36200462Sdelphij#include <stdbool.h> 37131846Stjr#include <stdlib.h> 38200462Sdelphij#include <wchar.h> 39200462Sdelphij#include <wctype.h> 40131846Stjr#include "cset.h" 41131846Stjr 42131846Stjrstatic struct csnode * cset_delete(struct csnode *, wchar_t); 43131846Stjrstatic __inline int cset_rangecmp(struct csnode *, wchar_t); 44131846Stjrstatic struct csnode * cset_splay(struct csnode *, wchar_t); 45131846Stjr 46131846Stjr/* 47131846Stjr * cset_alloc -- 48131846Stjr * Allocate a set of characters. 49131846Stjr */ 50131846Stjrstruct cset * 51131846Stjrcset_alloc(void) 52131846Stjr{ 53131846Stjr struct cset *cs; 54131846Stjr 55131846Stjr if ((cs = malloc(sizeof(*cs))) == NULL) 56131846Stjr return (NULL); 57131846Stjr cs->cs_root = NULL; 58131846Stjr cs->cs_classes = NULL; 59131846Stjr cs->cs_havecache = false; 60131891Stjr cs->cs_invert = false; 61131846Stjr return (cs); 62131846Stjr} 63131846Stjr 64131846Stjr/* 65131846Stjr * cset_add -- 66131846Stjr * Add a character to the set. 67131846Stjr */ 68131846Stjrbool 69131846Stjrcset_add(struct cset *cs, wchar_t ch) 70131846Stjr{ 71131846Stjr struct csnode *csn, *ncsn; 72131846Stjr wchar_t oval; 73131846Stjr 74131846Stjr cs->cs_havecache = false; 75131846Stjr 76131846Stjr /* 77131846Stjr * Inserting into empty tree; new item becomes the root. 78131846Stjr */ 79131846Stjr if (cs->cs_root == NULL) { 80131846Stjr csn = malloc(sizeof(*cs->cs_root)); 81131846Stjr if (csn == NULL) 82131846Stjr return (false); 83131846Stjr csn->csn_left = csn->csn_right = NULL; 84131846Stjr csn->csn_min = csn->csn_max = ch; 85131846Stjr cs->cs_root = csn; 86131846Stjr return (true); 87131846Stjr } 88131846Stjr 89131846Stjr /* 90131846Stjr * Splay to check whether the item already exists, and otherwise, 91131846Stjr * where we should put it. 92131846Stjr */ 93131846Stjr csn = cs->cs_root = cset_splay(cs->cs_root, ch); 94131846Stjr 95131846Stjr /* 96132142Stjr * Avoid adding duplicate nodes. 97131846Stjr */ 98131846Stjr if (cset_rangecmp(csn, ch) == 0) 99131846Stjr return (true); 100131846Stjr 101131846Stjr /* 102132142Stjr * Allocate a new node and make it the new root. 103131846Stjr */ 104131846Stjr ncsn = malloc(sizeof(*ncsn)); 105131846Stjr if (ncsn == NULL) 106131846Stjr return (false); 107131846Stjr ncsn->csn_min = ncsn->csn_max = ch; 108131846Stjr if (cset_rangecmp(csn, ch) < 0) { 109131846Stjr ncsn->csn_left = csn->csn_left; 110131846Stjr ncsn->csn_right = csn; 111131846Stjr csn->csn_left = NULL; 112131846Stjr } else { 113131846Stjr ncsn->csn_right = csn->csn_right; 114131846Stjr ncsn->csn_left = csn; 115131846Stjr csn->csn_right = NULL; 116131846Stjr } 117131846Stjr cs->cs_root = ncsn; 118131846Stjr 119131846Stjr /* 120132142Stjr * Coalesce with left and right neighbours if possible. 121131846Stjr */ 122132142Stjr if (ncsn->csn_left != NULL) { 123132142Stjr ncsn->csn_left = cset_splay(ncsn->csn_left, ncsn->csn_min - 1); 124132142Stjr if (ncsn->csn_left->csn_max == ncsn->csn_min - 1) { 125132142Stjr oval = ncsn->csn_left->csn_min; 126132142Stjr ncsn->csn_left = cset_delete(ncsn->csn_left, 127132142Stjr ncsn->csn_left->csn_min); 128132142Stjr ncsn->csn_min = oval; 129132142Stjr } 130131846Stjr } 131132142Stjr if (ncsn->csn_right != NULL) { 132132142Stjr ncsn->csn_right = cset_splay(ncsn->csn_right, 133132142Stjr ncsn->csn_max + 1); 134132142Stjr if (ncsn->csn_right->csn_min == ncsn->csn_max + 1) { 135132142Stjr oval = ncsn->csn_right->csn_max; 136132142Stjr ncsn->csn_right = cset_delete(ncsn->csn_right, 137132142Stjr ncsn->csn_right->csn_min); 138132142Stjr ncsn->csn_max = oval; 139132142Stjr } 140131846Stjr } 141131846Stjr 142131846Stjr return (true); 143131846Stjr} 144131846Stjr 145131846Stjr/* 146131846Stjr * cset_in_hard -- 147131846Stjr * Determine whether a character is in the set without using 148131846Stjr * the cache. 149131846Stjr */ 150131846Stjrbool 151131846Stjrcset_in_hard(struct cset *cs, wchar_t ch) 152131846Stjr{ 153131846Stjr struct csclass *csc; 154131846Stjr 155131846Stjr for (csc = cs->cs_classes; csc != NULL; csc = csc->csc_next) 156226363Sed if (csc->csc_invert ^ (iswctype(ch, csc->csc_type) != 0)) 157131846Stjr return (cs->cs_invert ^ true); 158131846Stjr if (cs->cs_root != NULL) { 159131846Stjr cs->cs_root = cset_splay(cs->cs_root, ch); 160226363Sed return (cs->cs_invert ^ (cset_rangecmp(cs->cs_root, ch) == 0)); 161131846Stjr } 162131846Stjr return (cs->cs_invert ^ false); 163131846Stjr} 164131846Stjr 165131846Stjr/* 166131846Stjr * cset_cache -- 167131846Stjr * Update the cache. 168131846Stjr */ 169131846Stjrvoid 170131846Stjrcset_cache(struct cset *cs) 171131846Stjr{ 172131846Stjr wchar_t i; 173131846Stjr 174131846Stjr for (i = 0; i < CS_CACHE_SIZE; i++) 175131846Stjr cs->cs_cache[i] = cset_in_hard(cs, i); 176131846Stjr 177131846Stjr cs->cs_havecache = true; 178131846Stjr} 179131846Stjr 180131846Stjr/* 181131846Stjr * cset_invert -- 182131846Stjr * Invert the character set. 183131846Stjr */ 184131846Stjrvoid 185131846Stjrcset_invert(struct cset *cs) 186131846Stjr{ 187131846Stjr 188131846Stjr cs->cs_invert ^= true; 189131846Stjr cs->cs_havecache = false; 190131846Stjr} 191131846Stjr 192131846Stjr/* 193131846Stjr * cset_addclass -- 194131846Stjr * Add a wctype()-style character class to the set, optionally 195131846Stjr * inverting it. 196131846Stjr */ 197131846Stjrbool 198131846Stjrcset_addclass(struct cset *cs, wctype_t type, bool invert) 199131846Stjr{ 200131846Stjr struct csclass *csc; 201131846Stjr 202131846Stjr csc = malloc(sizeof(*csc)); 203131846Stjr if (csc == NULL) 204131846Stjr return (false); 205131846Stjr csc->csc_type = type; 206131846Stjr csc->csc_invert = invert; 207131846Stjr csc->csc_next = cs->cs_classes; 208131846Stjr cs->cs_classes = csc; 209131846Stjr cs->cs_havecache = false; 210131846Stjr return (true); 211131846Stjr} 212131846Stjr 213131846Stjrstatic __inline int 214131846Stjrcset_rangecmp(struct csnode *t, wchar_t ch) 215131846Stjr{ 216131846Stjr 217131846Stjr if (ch < t->csn_min) 218131846Stjr return (-1); 219131846Stjr if (ch > t->csn_max) 220131846Stjr return (1); 221131846Stjr return (0); 222131846Stjr} 223131846Stjr 224131846Stjrstatic struct csnode * 225131846Stjrcset_splay(struct csnode *t, wchar_t ch) 226131846Stjr{ 227131846Stjr struct csnode N, *l, *r, *y; 228131846Stjr 229131846Stjr /* 230131846Stjr * Based on public domain code from Sleator. 231131846Stjr */ 232131846Stjr 233131846Stjr assert(t != NULL); 234131846Stjr 235131846Stjr N.csn_left = N.csn_right = NULL; 236131846Stjr l = r = &N; 237131846Stjr for (;;) { 238131846Stjr if (cset_rangecmp(t, ch) < 0) { 239131846Stjr if (t->csn_left != NULL && 240131846Stjr cset_rangecmp(t->csn_left, ch) < 0) { 241131846Stjr y = t->csn_left; 242131846Stjr t->csn_left = y->csn_right; 243131846Stjr y->csn_right = t; 244131846Stjr t = y; 245131846Stjr } 246131846Stjr if (t->csn_left == NULL) 247131846Stjr break; 248131846Stjr r->csn_left = t; 249131846Stjr r = t; 250131846Stjr t = t->csn_left; 251131846Stjr } else if (cset_rangecmp(t, ch) > 0) { 252131846Stjr if (t->csn_right != NULL && 253131846Stjr cset_rangecmp(t->csn_right, ch) > 0) { 254131846Stjr y = t->csn_right; 255131846Stjr t->csn_right = y->csn_left; 256131846Stjr y->csn_left = t; 257131846Stjr t = y; 258131846Stjr } 259131846Stjr if (t->csn_right == NULL) 260131846Stjr break; 261131846Stjr l->csn_right = t; 262131846Stjr l = t; 263131846Stjr t = t->csn_right; 264131846Stjr } else 265131846Stjr break; 266131846Stjr } 267131846Stjr l->csn_right = t->csn_left; 268131846Stjr r->csn_left = t->csn_right; 269131846Stjr t->csn_left = N.csn_right; 270131846Stjr t->csn_right = N.csn_left; 271131846Stjr return (t); 272131846Stjr} 273131846Stjr 274131846Stjrstatic struct csnode * 275131846Stjrcset_delete(struct csnode *t, wchar_t ch) 276131846Stjr{ 277131846Stjr struct csnode *x; 278131846Stjr 279131846Stjr assert(t != NULL); 280131846Stjr t = cset_splay(t, ch); 281131846Stjr assert(cset_rangecmp(t, ch) == 0); 282131846Stjr if (t->csn_left == NULL) 283131846Stjr x = t->csn_right; 284131846Stjr else { 285131846Stjr x = cset_splay(t->csn_left, ch); 286131846Stjr x->csn_right = t->csn_right; 287131846Stjr } 288131846Stjr free(t); 289131846Stjr return x; 290131846Stjr} 291