CodeRangeBuffer.java revision 953:221a84ef44c0
1/* 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 * this software and associated documentation files (the "Software"), to deal in 4 * the Software without restriction, including without limitation the rights to 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 * of the Software, and to permit persons to whom the Software is furnished to do 7 * so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice shall be included in all 10 * copies or substantial portions of the Software. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 * SOFTWARE. 19 */ 20package jdk.nashorn.internal.runtime.regexp.joni; 21 22import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; 23import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException; 24 25public final class CodeRangeBuffer implements Cloneable { 26 private static final int INIT_MULTI_BYTE_RANGE_SIZE = 5; 27 private static final int ALL_MULTI_BYTE_RANGE = 0x7fffffff; 28 29 int[] p; 30 int used; 31 32 33 public CodeRangeBuffer() { 34 p = new int[INIT_MULTI_BYTE_RANGE_SIZE]; 35 writeCodePoint(0, 0); 36 } 37 38 // CodeRange.isInCodeRange 39 public boolean isInCodeRange(final int code) { 40 int low = 0; 41 final int n = p[0]; 42 int high = n; 43 44 while (low < high) { 45 final int x = (low + high) >> 1; 46 if (code > p[(x << 1) + 2]) { 47 low = x + 1; 48 } else { 49 high = x; 50 } 51 } 52 return low < n && code >= p[(low << 1) + 1]; 53 } 54 55 private CodeRangeBuffer(final CodeRangeBuffer orig) { 56 p = new int[orig.p.length]; 57 System.arraycopy(orig.p, 0, p, 0, p.length); 58 used = orig.used; 59 } 60 61 @Override 62 public String toString() { 63 final StringBuilder buf = new StringBuilder(); 64 buf.append("CodeRange"); 65 buf.append("\n used: ").append(used); 66 buf.append("\n code point: ").append(p[0]); 67 buf.append("\n ranges: "); 68 69 for (int i=0; i<p[0]; i++) { 70 buf.append("[").append(rangeNumToString(p[i * 2 + 1])).append("..").append(rangeNumToString(p[i * 2 + 2])).append("]"); 71 if (i > 0 && i % 6 == 0) buf.append("\n "); 72 } 73 74 return buf.toString(); 75 } 76 77 private static String rangeNumToString(final int num){ 78 return "0x" + Integer.toString(num, 16); 79 } 80 81 public void expand(final int low) { 82 int length = p.length; 83 do { length <<= 1; } while (length < low); 84 final int[]tmp = new int[length]; 85 System.arraycopy(p, 0, tmp, 0, used); 86 p = tmp; 87 } 88 89 public void ensureSize(final int size) { 90 int length = p.length; 91 while (length < size ) { length <<= 1; } 92 if (p.length != length) { 93 final int[]tmp = new int[length]; 94 System.arraycopy(p, 0, tmp, 0, used); 95 p = tmp; 96 } 97 } 98 99 private void moveRight(final int from, final int to, final int n) { 100 if (to + n > p.length) expand(to + n); 101 System.arraycopy(p, from, p, to, n); 102 if (to + n > used) used = to + n; 103 } 104 105 protected void moveLeft(final int from, final int to, final int n) { 106 System.arraycopy(p, from, p, to, n); 107 } 108 109 private void moveLeftAndReduce(final int from, final int to) { 110 System.arraycopy(p, from, p, to, used - from); 111 used -= from - to; 112 } 113 114 public void writeCodePoint(final int pos, final int b) { 115 final int u = pos + 1; 116 if (p.length < u) expand(u); 117 p[pos] = b; 118 if (used < u) used = u; 119 } 120 121 @Override 122 public CodeRangeBuffer clone() { 123 return new CodeRangeBuffer(this); 124 } 125 126 // ugly part: these methods should be made OO 127 // add_code_range_to_buf 128 public static CodeRangeBuffer addCodeRangeToBuff(CodeRangeBuffer pbuf, int from, int to) { 129 if (from > to) { 130 final int n = from; 131 from = to; 132 to = n; 133 } 134 135 if (pbuf == null) pbuf = new CodeRangeBuffer(); // move to CClassNode 136 137 final int[]p = pbuf.p; 138 int n = p[0]; 139 140 int low = 0; 141 int bound = n; 142 143 while (low < bound) { 144 final int x = (low + bound) >>> 1; 145 if (from > p[x * 2 + 2]) { 146 low = x + 1; 147 } else { 148 bound = x; 149 } 150 } 151 152 int high = low; 153 bound = n; 154 155 while (high < bound) { 156 final int x = (high + bound) >>> 1; 157 if (to >= p[x * 2 + 1] - 1) { 158 high = x + 1; 159 } else { 160 bound = x; 161 } 162 } 163 164 final int incN = low + 1 - high; 165 166 if (n + incN > Config.MAX_MULTI_BYTE_RANGES_NUM) throw new ValueException(ErrorMessages.ERR_TOO_MANY_MULTI_BYTE_RANGES); 167 168 if (incN != 1) { 169 if (from > p[low * 2 + 1]) from = p[low * 2 + 1]; 170 if (to < p[(high - 1) * 2 + 2]) to = p[(high - 1) * 2 + 2]; 171 } 172 173 if (incN != 0 && high < n) { 174 final int fromPos = 1 + high * 2; 175 final int toPos = 1 + (low + 1) * 2; 176 final int size = (n - high) * 2; 177 178 if (incN > 0) { 179 pbuf.moveRight(fromPos, toPos, size); 180 } else { 181 pbuf.moveLeftAndReduce(fromPos, toPos); 182 } 183 } 184 185 final int pos = 1 + low * 2; 186 // pbuf.ensureSize(pos + 2); 187 pbuf.writeCodePoint(pos, from); 188 pbuf.writeCodePoint(pos + 1, to); 189 n += incN; 190 pbuf.writeCodePoint(0, n); 191 192 return pbuf; 193 } 194 195 // add_code_range, be aware of it returning null! 196 public static CodeRangeBuffer addCodeRange(final CodeRangeBuffer pbuf, final ScanEnvironment env, final int from, final int to) { 197 if (from > to) { 198 if (env.syntax.allowEmptyRangeInCC()) { 199 return pbuf; 200 } else { 201 throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS); 202 } 203 } 204 return addCodeRangeToBuff(pbuf, from, to); 205 } 206 207 // SET_ALL_MULTI_BYTE_RANGE 208 protected static CodeRangeBuffer setAllMultiByteRange(final CodeRangeBuffer pbuf) { 209 return addCodeRangeToBuff(pbuf, EncodingHelper.mbcodeStartPosition(), ALL_MULTI_BYTE_RANGE); 210 } 211 212 // ADD_ALL_MULTI_BYTE_RANGE 213 public static CodeRangeBuffer addAllMultiByteRange(final CodeRangeBuffer pbuf) { 214 return setAllMultiByteRange(pbuf); 215 } 216 217 // not_code_range_buf 218 public static CodeRangeBuffer notCodeRangeBuff(final CodeRangeBuffer bbuf) { 219 CodeRangeBuffer pbuf = null; 220 221 if (bbuf == null) return setAllMultiByteRange(pbuf); 222 223 final int[]p = bbuf.p; 224 final int n = p[0]; 225 226 if (n <= 0) return setAllMultiByteRange(pbuf); 227 228 int pre = EncodingHelper.mbcodeStartPosition(); 229 230 int from; 231 int to = 0; 232 for (int i=0; i<n; i++) { 233 from = p[i * 2 + 1]; 234 to = p[i * 2 + 2]; 235 if (pre <= from - 1) { 236 pbuf = addCodeRangeToBuff(pbuf, pre, from - 1); 237 } 238 if (to == ALL_MULTI_BYTE_RANGE) break; 239 pre = to + 1; 240 } 241 242 if (to < ALL_MULTI_BYTE_RANGE) pbuf = addCodeRangeToBuff(pbuf, to + 1, ALL_MULTI_BYTE_RANGE); 243 return pbuf; 244 } 245 246 // or_code_range_buf 247 public static CodeRangeBuffer orCodeRangeBuff(CodeRangeBuffer bbuf1, boolean not1, 248 CodeRangeBuffer bbuf2, boolean not2) { 249 CodeRangeBuffer pbuf = null; 250 251 if (bbuf1 == null && bbuf2 == null) { 252 if (not1 || not2) { 253 return setAllMultiByteRange(pbuf); 254 } 255 return null; 256 } 257 258 if (bbuf2 == null) { 259 CodeRangeBuffer tbuf; 260 boolean tnot; 261 // swap 262 tnot = not1; not1 = not2; not2 = tnot; 263 tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; 264 } 265 266 if (bbuf1 == null) { 267 if (not1) { 268 return setAllMultiByteRange(pbuf); 269 } else { 270 if (!not2) { 271 return bbuf2.clone(); 272 } else { 273 return notCodeRangeBuff(bbuf2); 274 } 275 } 276 } 277 278 if (not1) { 279 CodeRangeBuffer tbuf; 280 boolean tnot; 281 // swap 282 tnot = not1; not1 = not2; not2 = tnot; 283 tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; 284 } 285 286 if (!not2 && !not1) { /* 1 OR 2 */ 287 pbuf = bbuf2.clone(); 288 } else if (!not1) { /* 1 OR (not 2) */ 289 pbuf = notCodeRangeBuff(bbuf2); 290 } 291 292 final int[]p1 = bbuf1.p; 293 final int n1 = p1[0]; 294 295 for (int i=0; i<n1; i++) { 296 final int from = p1[i * 2 + 1]; 297 final int to = p1[i * 2 + 2]; 298 pbuf = addCodeRangeToBuff(pbuf, from, to); 299 } 300 301 return pbuf; 302 } 303 304 // and_code_range1 305 public static CodeRangeBuffer andCodeRange1(CodeRangeBuffer pbuf, int from1, int to1, final int[]data, final int n) { 306 for (int i=0; i<n; i++) { 307 final int from2 = data[i * 2 + 1]; 308 final int to2 = data[i * 2 + 2]; 309 if (from2 < from1) { 310 if (to2 < from1) { 311 continue; 312 } else { 313 from1 = to2 + 1; 314 } 315 } else if (from2 <= to1) { 316 if (to2 < to1) { 317 if (from1 <= from2 - 1) { 318 pbuf = addCodeRangeToBuff(pbuf, from1, from2 - 1); 319 } 320 from1 = to2 + 1; 321 } else { 322 to1 = from2 - 1; 323 } 324 } else { 325 from1 = from2; 326 } 327 if (from1 > to1) break; 328 } 329 330 if (from1 <= to1) { 331 pbuf = addCodeRangeToBuff(pbuf, from1, to1); 332 } 333 334 return pbuf; 335 } 336 337 // and_code_range_buf 338 public static CodeRangeBuffer andCodeRangeBuff(CodeRangeBuffer bbuf1, boolean not1, 339 CodeRangeBuffer bbuf2, boolean not2) { 340 CodeRangeBuffer pbuf = null; 341 342 if (bbuf1 == null) { 343 if (not1 && bbuf2 != null) return bbuf2.clone(); /* not1 != 0 -> not2 == 0 */ 344 return null; 345 } else if (bbuf2 == null) { 346 if (not2) return bbuf1.clone(); 347 return null; 348 } 349 350 if (not1) { 351 CodeRangeBuffer tbuf; 352 boolean tnot; 353 // swap 354 tnot = not1; not1 = not2; not2 = tnot; 355 tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; 356 } 357 358 final int[]p1 = bbuf1.p; 359 final int n1 = p1[0]; 360 final int[]p2 = bbuf2.p; 361 final int n2 = p2[0]; 362 363 if (!not2 && !not1) { /* 1 AND 2 */ 364 for (int i=0; i<n1; i++) { 365 final int from1 = p1[i * 2 + 1]; 366 final int to1 = p1[i * 2 + 2]; 367 368 for (int j=0; j<n2; j++) { 369 final int from2 = p2[j * 2 + 1]; 370 final int to2 = p2[j * 2 + 2]; 371 372 if (from2 > to1) break; 373 if (to2 < from1) continue; 374 final int from = from1 > from2 ? from1 : from2; 375 final int to = to1 < to2 ? to1 : to2; 376 pbuf = addCodeRangeToBuff(pbuf, from, to); 377 } 378 } 379 } else if (!not1) { /* 1 AND (not 2) */ 380 for (int i=0; i<n1; i++) { 381 final int from1 = p1[i * 2 + 1]; 382 final int to1 = p1[i * 2 + 2]; 383 pbuf = andCodeRange1(pbuf, from1, to1, p2, n2); 384 } 385 } 386 387 return pbuf; 388 } 389} 390